spu: acquire_pc rewritten to be more precise in timing

- More accurate pauses may slightly reduce performance but makes it more viable
  to allow more threads to participate without stuttering
This commit is contained in:
kd-11 2017-07-24 20:55:11 +03:00
parent dd19622823
commit 17c399d4e8
1 changed files with 26 additions and 9 deletions

View File

@ -33,6 +33,7 @@ bool operator ==(const u128& lhs, const u128& rhs)
#endif
extern u64 get_timebased_time();
extern u64 get_system_time();
extern thread_local u64 g_tls_fault_spu;
@ -63,24 +64,40 @@ namespace spu
namespace scheduler
{
std::array<std::atomic<u8>, 65536> atomic_instruction_table = {};
constexpr u32 native_jiffy_duration_us = 2000000;
constexpr u32 native_jiffy_duration_us = 1500; //About 1ms resolution with a half offset
void acquire_pc_address(u32 pc, u32 timeout_ms = 3)
{
const u8 max_concurrent_instructions = (u8)g_cfg.core.preferred_spu_threads;
const u32 pc_offset = pc >> 2;
if (timeout_ms > 0)
if (atomic_instruction_table[pc_offset].load(std::memory_order_consume) >= max_concurrent_instructions)
{
while (timeout_ms--)
if (timeout_ms > 0)
{
if (atomic_instruction_table[pc_offset].load(std::memory_order_consume) >= max_concurrent_instructions)
std::this_thread::sleep_for(1ms);
const auto timeout = timeout_ms * 1000u; //convert to microseconds
const auto start = get_system_time();
auto remaining = timeout;
while (atomic_instruction_table[pc_offset].load(std::memory_order_consume) >= max_concurrent_instructions)
{
if (remaining >= native_jiffy_duration_us)
std::this_thread::sleep_for(1ms);
else
std::this_thread::yield();
const auto now = get_system_time();
const auto elapsed = now - start;
if (elapsed > timeout) break;
remaining = timeout - elapsed;
}
}
else
{
//Slight pause if function is overburdened
thread_ctrl::wait_for(100);
}
}
else
{
std::this_thread::yield();
}
atomic_instruction_table[pc_offset]++;