mirror of https://github.com/RPCS3/rpcs3.git
mfc: Fix support for list transfer stall bit with partial support for out of order execution - Also give Sync commands a size so they are properly detected by queue checks
This commit is contained in:
parent
8b476b5bfa
commit
34e01ba3d8
|
@ -142,107 +142,142 @@ void mfc_thread::cpu_task()
|
|||
|
||||
if (queue_size)
|
||||
{
|
||||
auto& cmd = spu.mfc_queue[0];
|
||||
|
||||
if ((cmd.cmd & ~(MFC_BARRIER_MASK | MFC_FENCE_MASK)) == MFC_PUTQLLUC_CMD)
|
||||
u32 fence_mask = 0; // Using this instead of stall_mask to avoid a possible race condition
|
||||
u32 barrier_mask = 0;
|
||||
bool first = true;
|
||||
for (u32 i = 0; i < spu.mfc_queue.size(); i++, first = false)
|
||||
{
|
||||
auto& data = vm::ps3::_ref<decltype(spu.rdata)>(cmd.eal);
|
||||
const auto to_write = spu._ref<decltype(spu.rdata)>(cmd.lsa & 0x3ffff);
|
||||
auto& cmd = spu.mfc_queue[i];
|
||||
|
||||
cmd.size = 0;
|
||||
no_updates = 0;
|
||||
// this check all revolves around a potential 'stalled list' in the queue as its the one thing that can cause out of order mfc list execution currently
|
||||
// a list with barrier hard blocks that tag until it's been dealt with
|
||||
// and a new command that has a fence cant be executed until the stalled list has been dealt with
|
||||
if ((cmd.size != 0) && ((barrier_mask & (1u << cmd.tag)) || ((cmd.cmd & MFC_FENCE_MASK) && ((1 << cmd.tag) & fence_mask))))
|
||||
continue;
|
||||
|
||||
vm::reservation_acquire(cmd.eal, 128);
|
||||
|
||||
// Store unconditionally
|
||||
if (s_use_rtm && utils::transaction_enter())
|
||||
if ((cmd.cmd & ~(MFC_BARRIER_MASK | MFC_FENCE_MASK)) == MFC_PUTQLLUC_CMD)
|
||||
{
|
||||
if (!vm::reader_lock{vm::try_to_lock})
|
||||
{
|
||||
_xabort(0);
|
||||
}
|
||||
auto& data = vm::ps3::_ref<decltype(spu.rdata)>(cmd.eal);
|
||||
const auto to_write = spu._ref<decltype(spu.rdata)>(cmd.lsa & 0x3ffff);
|
||||
|
||||
data = to_write;
|
||||
vm::reservation_update(cmd.eal, 128);
|
||||
vm::notify(cmd.eal, 128);
|
||||
_xend();
|
||||
}
|
||||
else
|
||||
{
|
||||
vm::writer_lock lock(0);
|
||||
data = to_write;
|
||||
vm::reservation_update(cmd.eal, 128);
|
||||
vm::notify(cmd.eal, 128);
|
||||
}
|
||||
}
|
||||
else if (cmd.cmd & MFC_LIST_MASK)
|
||||
{
|
||||
struct list_element
|
||||
{
|
||||
be_t<u16> sb; // Stall-and-Notify bit (0x8000)
|
||||
be_t<u16> ts; // List Transfer Size
|
||||
be_t<u32> ea; // External Address Low
|
||||
};
|
||||
|
||||
if (cmd.size && (spu.ch_stall_mask & (1u << cmd.tag)) == 0)
|
||||
{
|
||||
cmd.lsa &= 0x3fff0;
|
||||
|
||||
const list_element item = spu._ref<list_element>(cmd.eal & 0x3fff8);
|
||||
|
||||
const u32 size = item.ts;
|
||||
const u32 addr = item.ea;
|
||||
|
||||
if (size)
|
||||
{
|
||||
spu_mfc_cmd transfer;
|
||||
transfer.eal = addr;
|
||||
transfer.eah = 0;
|
||||
transfer.lsa = cmd.lsa | (addr & 0xf);
|
||||
transfer.tag = cmd.tag;
|
||||
transfer.cmd = MFC(cmd.cmd & ~MFC_LIST_MASK);
|
||||
transfer.size = size;
|
||||
|
||||
spu.do_dma_transfer(transfer);
|
||||
cmd.lsa += std::max<u32>(size, 16);
|
||||
}
|
||||
|
||||
cmd.eal += 8;
|
||||
cmd.size -= 8;
|
||||
cmd.size = 0;
|
||||
no_updates = 0;
|
||||
|
||||
if (item.sb & 0x8000)
|
||||
vm::reservation_acquire(cmd.eal, 128);
|
||||
|
||||
// Store unconditionally
|
||||
if (s_use_rtm && utils::transaction_enter())
|
||||
{
|
||||
spu.ch_stall_stat.push_or(spu, 1 << cmd.tag);
|
||||
|
||||
const u32 evt = spu.ch_event_stat.fetch_or(SPU_EVENT_SN);
|
||||
|
||||
if (evt & SPU_EVENT_WAITING)
|
||||
if (!vm::reader_lock{ vm::try_to_lock })
|
||||
{
|
||||
spu.notify();
|
||||
}
|
||||
else if (evt & SPU_EVENT_INTR_ENABLED)
|
||||
{
|
||||
spu.state += cpu_flag::suspend;
|
||||
_xabort(0);
|
||||
}
|
||||
|
||||
data = to_write;
|
||||
vm::reservation_update(cmd.eal, 128);
|
||||
vm::notify(cmd.eal, 128);
|
||||
_xend();
|
||||
}
|
||||
else
|
||||
{
|
||||
vm::writer_lock lock(0);
|
||||
data = to_write;
|
||||
vm::reservation_update(cmd.eal, 128);
|
||||
vm::notify(cmd.eal, 128);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (LIKELY(cmd.size))
|
||||
{
|
||||
spu.do_dma_transfer(cmd);
|
||||
cmd.size = 0;
|
||||
}
|
||||
else if (UNLIKELY((cmd.cmd & ~0xc) == MFC_BARRIER_CMD))
|
||||
{
|
||||
// TODO (MFC_BARRIER_CMD, MFC_EIEIO_CMD, MFC_SYNC_CMD)
|
||||
_mm_mfence();
|
||||
}
|
||||
else if (cmd.cmd & MFC_LIST_MASK)
|
||||
{
|
||||
struct list_element
|
||||
{
|
||||
be_t<u16> sb; // Stall-and-Notify bit (0x8000)
|
||||
be_t<u16> ts; // List Transfer Size
|
||||
be_t<u32> ea; // External Address Low
|
||||
};
|
||||
|
||||
if (!cmd.size)
|
||||
{
|
||||
spu.mfc_queue.end_pop();
|
||||
no_updates = 0;
|
||||
if (cmd.size && (spu.ch_stall_mask & (1u << cmd.tag)) == 0)
|
||||
{
|
||||
cmd.lsa &= 0x3fff0;
|
||||
|
||||
// try to get the whole list done in one go
|
||||
while (cmd.size != 0)
|
||||
{
|
||||
const list_element item = spu._ref<list_element>(cmd.eal & 0x3fff8);
|
||||
|
||||
const u32 size = item.ts;
|
||||
const u32 addr = item.ea;
|
||||
|
||||
if (size)
|
||||
{
|
||||
spu_mfc_cmd transfer;
|
||||
transfer.eal = addr;
|
||||
transfer.eah = 0;
|
||||
transfer.lsa = cmd.lsa | (addr & 0xf);
|
||||
transfer.tag = cmd.tag;
|
||||
transfer.cmd = MFC(cmd.cmd & ~MFC_LIST_MASK);
|
||||
transfer.size = size;
|
||||
|
||||
spu.do_dma_transfer(transfer);
|
||||
cmd.lsa += std::max<u32>(size, 16);
|
||||
}
|
||||
|
||||
cmd.eal += 8;
|
||||
cmd.size -= 8;
|
||||
no_updates = 0;
|
||||
|
||||
// dont stall for last 'item' in list
|
||||
if ((item.sb & 0x8000) && (cmd.size != 0))
|
||||
{
|
||||
spu.ch_stall_mask |= (1 << cmd.tag);
|
||||
spu.ch_stall_stat.push_or(spu, 1 << cmd.tag);
|
||||
|
||||
const u32 evt = spu.ch_event_stat.fetch_or(SPU_EVENT_SN);
|
||||
|
||||
if (evt & SPU_EVENT_WAITING)
|
||||
{
|
||||
spu.notify();
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (cmd.size != 0 && (cmd.cmd & MFC_BARRIER_MASK))
|
||||
barrier_mask |= (1 << cmd.tag);
|
||||
else if (cmd.size != 0)
|
||||
fence_mask |= (1 << cmd.tag);
|
||||
}
|
||||
else if (UNLIKELY((cmd.cmd & ~0xc) == MFC_BARRIER_CMD))
|
||||
{
|
||||
// Raw barrier commands / sync commands are tag agnostic and hard sync the mfc list
|
||||
// Need to gaurentee everything ahead of it has processed before this
|
||||
if (first)
|
||||
cmd.size = 0;
|
||||
else
|
||||
break;
|
||||
}
|
||||
else if (LIKELY(cmd.size))
|
||||
{
|
||||
spu.do_dma_transfer(cmd);
|
||||
cmd.size = 0;
|
||||
}
|
||||
if (!cmd.size && first)
|
||||
{
|
||||
spu.mfc_queue.end_pop();
|
||||
no_updates = 0;
|
||||
break;
|
||||
}
|
||||
else if (!cmd.size && i == 1)
|
||||
{
|
||||
// nasty hack, shoving stalled list down one
|
||||
// this *works* from the idea that the only thing that could have been passed over in position 0 is a stalled list
|
||||
// todo: this can still create a situation where we say the mfc queue is full when its actually not, which will cause a rough deadlock between spu and mfc
|
||||
// which will causes a situation where the spu is waiting for the queue to open up but hasnt signaled the stall yet
|
||||
spu.mfc_queue[1] = spu.mfc_queue[0];
|
||||
spu.mfc_queue.end_pop();
|
||||
no_updates = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -252,26 +287,21 @@ void mfc_thread::cpu_task()
|
|||
{
|
||||
// Mask incomplete transfers
|
||||
u32 completed = spu.ch_tag_mask;
|
||||
|
||||
for (u32 i = 0; i < spu.mfc_queue.size(); i++)
|
||||
{
|
||||
const auto& _cmd = spu.mfc_queue[i];
|
||||
|
||||
if (_cmd.size)
|
||||
for (u32 i = 0; i < spu.mfc_queue.size(); i++)
|
||||
{
|
||||
if (spu.ch_tag_upd == 1)
|
||||
{
|
||||
const auto& _cmd = spu.mfc_queue[i];
|
||||
if (_cmd.size)
|
||||
completed &= ~(1u << _cmd.tag);
|
||||
}
|
||||
else
|
||||
{
|
||||
completed = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (completed && spu.ch_tag_upd.exchange(0))
|
||||
if (completed && spu.ch_tag_upd.compare_and_swap_test(1, 0))
|
||||
{
|
||||
spu.ch_tag_stat.push(spu, completed);
|
||||
no_updates = 0;
|
||||
}
|
||||
else if (completed && spu.ch_tag_mask == completed && spu.ch_tag_upd.compare_and_swap_test(2, 0))
|
||||
{
|
||||
spu.ch_tag_stat.push(spu, completed);
|
||||
no_updates = 0;
|
||||
|
@ -280,7 +310,6 @@ void mfc_thread::cpu_task()
|
|||
|
||||
test_state();
|
||||
}
|
||||
|
||||
if (no_updates++)
|
||||
{
|
||||
if (no_updates >= 3)
|
||||
|
|
|
@ -815,7 +815,7 @@ void SPUThread::process_mfc_cmd()
|
|||
case MFC_GETLB_CMD:
|
||||
case MFC_GETLF_CMD:
|
||||
{
|
||||
if (ch_mfc_cmd.size <= max_imm_dma_size && mfc_queue.size() == 0 && (ch_stall_mask & (1u << ch_mfc_cmd.tag)) == 0)
|
||||
if (ch_mfc_cmd.size <= max_imm_dma_size && mfc_queue.size() == 0)
|
||||
{
|
||||
vm::reader_lock lock(vm::try_to_lock);
|
||||
|
||||
|
@ -890,7 +890,7 @@ void SPUThread::process_mfc_cmd()
|
|||
case MFC_EIEIO_CMD:
|
||||
case MFC_SYNC_CMD:
|
||||
{
|
||||
ch_mfc_cmd.size = 0;
|
||||
ch_mfc_cmd.size = 1;
|
||||
|
||||
if (mfc_queue.size() == 0)
|
||||
{
|
||||
|
|
|
@ -283,7 +283,7 @@ struct cfg_root : cfg::node
|
|||
cfg::_bool bind_spu_cores{this, "Bind SPU threads to secondary cores"};
|
||||
cfg::_bool lower_spu_priority{this, "Lower SPU thread priority"};
|
||||
cfg::_bool spu_debug{this, "SPU Debug"};
|
||||
cfg::_int<32, 16384> max_spu_immediate_write_size{this, "Maximum immediate DMA write size", 16384}; // Maximum size that an SPU thread can write directly without posting to MFC
|
||||
cfg::_int<0, 16384> max_spu_immediate_write_size{this, "Maximum immediate DMA write size", 16384}; // Maximum size that an SPU thread can write directly without posting to MFC
|
||||
cfg::_int<0, 6> preferred_spu_threads{this, "Preferred SPU Threads", 0}; //Numnber of hardware threads dedicated to heavy simultaneous spu tasks
|
||||
cfg::_int<0, 16> spu_delay_penalty{this, "SPU delay penalty", 3}; //Number of milliseconds to block a thread if a virtual 'core' isn't free
|
||||
cfg::_bool spu_loop_detection{this, "SPU loop detection", true}; //Try to detect wait loops and trigger thread yield
|
||||
|
|
Loading…
Reference in New Issue