DMA: Template transfer functions

~20% speedup in FMV playback on a Ryzen 9 7950X3D. CPUs hate branches.
This commit is contained in:
Stenzek 2023-12-19 00:55:04 +10:00
parent e736998f1e
commit f5ddd7ba32
No known key found for this signature in database
3 changed files with 75 additions and 31 deletions

View File

@ -166,15 +166,20 @@ static void UpdateIRQ();
// returns false if the DMA should now be halted // returns false if the DMA should now be halted
static TickCount GetTransferSliceTicks(); static TickCount GetTransferSliceTicks();
static TickCount GetTransferHaltTicks(); static TickCount GetTransferHaltTicks();
static bool TransferChannel(Channel channel);
static void HaltTransfer(TickCount duration); static void HaltTransfer(TickCount duration);
static void UnhaltTransfer(void*, TickCount ticks, TickCount ticks_late); static void UnhaltTransfer(void*, TickCount ticks, TickCount ticks_late);
template<Channel channel>
static bool TransferChannel();
// from device -> memory // from device -> memory
static TickCount TransferDeviceToMemory(Channel channel, u32 address, u32 increment, u32 word_count); template<Channel channel>
static TickCount TransferDeviceToMemory(u32 address, u32 increment, u32 word_count);
// from memory -> device // from memory -> device
static TickCount TransferMemoryToDevice(Channel channel, u32 address, u32 increment, u32 word_count); template<Channel channel>
static TickCount TransferMemoryToDevice(u32 address, u32 increment, u32 word_count);
// configuration // configuration
static TickCount s_max_slice_ticks = 1000; static TickCount s_max_slice_ticks = 1000;
@ -187,6 +192,17 @@ static TickCount s_halt_ticks_remaining = 0;
static std::array<ChannelState, NUM_CHANNELS> s_state; static std::array<ChannelState, NUM_CHANNELS> s_state;
static DPCR s_DPCR = {}; static DPCR s_DPCR = {};
static DICR s_DICR = {}; static DICR s_DICR = {};
static constexpr std::array<bool (*)(), NUM_CHANNELS> s_channel_transfer_functions = {{
&TransferChannel<Channel::MDECin>,
&TransferChannel<Channel::MDECout>,
&TransferChannel<Channel::GPU>,
&TransferChannel<Channel::CDROM>,
&TransferChannel<Channel::SPU>,
&TransferChannel<Channel::PIO>,
&TransferChannel<Channel::OTC>,
}};
}; // namespace DMA }; // namespace DMA
u32 DMA::GetAddressMask() u32 DMA::GetAddressMask()
@ -343,7 +359,7 @@ void DMA::WriteRegister(u32 offset, u32 value)
SetRequest(static_cast<Channel>(channel_index), state.channel_control.start_trigger); SetRequest(static_cast<Channel>(channel_index), state.channel_control.start_trigger);
if (CanTransferChannel(static_cast<Channel>(channel_index), ignore_halt)) if (CanTransferChannel(static_cast<Channel>(channel_index), ignore_halt))
TransferChannel(static_cast<Channel>(channel_index)); s_channel_transfer_functions[channel_index]();
return; return;
} }
@ -364,7 +380,7 @@ void DMA::WriteRegister(u32 offset, u32 value)
{ {
if (CanTransferChannel(static_cast<Channel>(i), false)) if (CanTransferChannel(static_cast<Channel>(i), false))
{ {
if (!TransferChannel(static_cast<Channel>(i))) if (!s_channel_transfer_functions[i]())
break; break;
} }
} }
@ -397,7 +413,7 @@ void DMA::SetRequest(Channel channel, bool request)
cs.request = request; cs.request = request;
if (CanTransferChannel(channel, false)) if (CanTransferChannel(channel, false))
TransferChannel(channel); s_channel_transfer_functions[static_cast<u32>(channel)]();
} }
void DMA::SetMaxSliceTicks(TickCount ticks) void DMA::SetMaxSliceTicks(TickCount ticks)
@ -410,7 +426,7 @@ void DMA::SetHaltTicks(TickCount ticks)
s_halt_ticks = ticks; s_halt_ticks = ticks;
} }
bool DMA::CanTransferChannel(Channel channel, bool ignore_halt) ALWAYS_INLINE_RELEASE bool DMA::CanTransferChannel(Channel channel, bool ignore_halt)
{ {
if (!s_DPCR.GetMasterEnable(channel)) if (!s_DPCR.GetMasterEnable(channel))
return false; return false;
@ -468,7 +484,8 @@ TickCount DMA::GetTransferHaltTicks()
return Pad::IsTransmitting() ? HALT_TICKS_WHEN_TRANSMITTING_PAD : s_halt_ticks; return Pad::IsTransmitting() ? HALT_TICKS_WHEN_TRANSMITTING_PAD : s_halt_ticks;
} }
bool DMA::TransferChannel(Channel channel) template<DMA::Channel channel>
bool DMA::TransferChannel()
{ {
ChannelState& cs = s_state[static_cast<u32>(channel)]; ChannelState& cs = s_state[static_cast<u32>(channel)];
const u32 mask = GetAddressMask(); const u32 mask = GetAddressMask();
@ -490,9 +507,9 @@ bool DMA::TransferChannel(Channel channel)
TickCount used_ticks; TickCount used_ticks;
if (copy_to_device) if (copy_to_device)
used_ticks = TransferMemoryToDevice(channel, current_address & mask, increment, word_count); used_ticks = TransferMemoryToDevice<channel>(current_address & mask, increment, word_count);
else else
used_ticks = TransferDeviceToMemory(channel, current_address & mask, increment, word_count); used_ticks = TransferDeviceToMemory<channel>(current_address & mask, increment, word_count);
CPU::AddPendingTicks(used_ticks); CPU::AddPendingTicks(used_ticks);
} }
@ -528,7 +545,7 @@ bool DMA::TransferChannel(Channel channel)
remaining_ticks -= 5; remaining_ticks -= 5;
const TickCount block_ticks = const TickCount block_ticks =
TransferMemoryToDevice(channel, (current_address + sizeof(header)) & mask, 4, word_count); TransferMemoryToDevice<channel>((current_address + sizeof(header)) & mask, 4, word_count);
CPU::AddPendingTicks(block_ticks); CPU::AddPendingTicks(block_ticks);
remaining_ticks -= block_ticks; remaining_ticks -= block_ticks;
} }
@ -574,7 +591,7 @@ bool DMA::TransferChannel(Channel channel)
{ {
blocks_remaining--; blocks_remaining--;
const TickCount ticks = TransferMemoryToDevice(channel, current_address & mask, increment, block_size); const TickCount ticks = TransferMemoryToDevice<channel>(current_address & mask, increment, block_size);
CPU::AddPendingTicks(ticks); CPU::AddPendingTicks(ticks);
ticks_remaining -= ticks; ticks_remaining -= ticks;
@ -587,7 +604,7 @@ bool DMA::TransferChannel(Channel channel)
{ {
blocks_remaining--; blocks_remaining--;
const TickCount ticks = TransferDeviceToMemory(channel, current_address & mask, increment, block_size); const TickCount ticks = TransferDeviceToMemory<channel>(current_address & mask, increment, block_size);
CPU::AddPendingTicks(ticks); CPU::AddPendingTicks(ticks);
ticks_remaining -= ticks; ticks_remaining -= ticks;
@ -655,7 +672,7 @@ void DMA::UnhaltTransfer(void*, TickCount ticks, TickCount ticks_late)
{ {
if (CanTransferChannel(static_cast<Channel>(i), false)) if (CanTransferChannel(static_cast<Channel>(i), false))
{ {
if (!TransferChannel(static_cast<Channel>(i))) if (!s_channel_transfer_functions[i]())
return; return;
} }
} }
@ -664,23 +681,26 @@ void DMA::UnhaltTransfer(void*, TickCount ticks, TickCount ticks_late)
s_halt_ticks_remaining = 0; s_halt_ticks_remaining = 0;
} }
TickCount DMA::TransferMemoryToDevice(Channel channel, u32 address, u32 increment, u32 word_count) template<DMA::Channel channel>
TickCount DMA::TransferMemoryToDevice(u32 address, u32 increment, u32 word_count)
{ {
const u32* src_pointer = reinterpret_cast<u32*>(Bus::g_ram + address); const u32* src_pointer = reinterpret_cast<u32*>(Bus::g_ram + address);
const u32 mask = GetAddressMask(); const u32 mask = GetAddressMask();
if (channel != Channel::GPU && if constexpr (channel != Channel::GPU)
(static_cast<s32>(increment) < 0 || ((address + (increment * word_count)) & mask) <= address))
{ {
// Use temp buffer if it's wrapping around if (static_cast<s32>(increment) < 0 || ((address + (increment * word_count)) & mask) <= address)
if (s_transfer_buffer.size() < word_count)
s_transfer_buffer.resize(word_count);
src_pointer = s_transfer_buffer.data();
u8* ram_pointer = Bus::g_ram;
for (u32 i = 0; i < word_count; i++)
{ {
std::memcpy(&s_transfer_buffer[i], &ram_pointer[address], sizeof(u32)); // Use temp buffer if it's wrapping around
address = (address + increment) & mask; if (s_transfer_buffer.size() < word_count)
s_transfer_buffer.resize(word_count);
src_pointer = s_transfer_buffer.data();
u8* ram_pointer = Bus::g_ram;
for (u32 i = 0; i < word_count; i++)
{
std::memcpy(&s_transfer_buffer[i], &ram_pointer[address], sizeof(u32));
address = (address + increment) & mask;
}
} }
} }
@ -722,11 +742,12 @@ TickCount DMA::TransferMemoryToDevice(Channel channel, u32 address, u32 incremen
return Bus::GetDMARAMTickCount(word_count); return Bus::GetDMARAMTickCount(word_count);
} }
TickCount DMA::TransferDeviceToMemory(Channel channel, u32 address, u32 increment, u32 word_count) template<DMA::Channel channel>
TickCount DMA::TransferDeviceToMemory(u32 address, u32 increment, u32 word_count)
{ {
const u32 mask = GetAddressMask(); const u32 mask = GetAddressMask();
if (channel == Channel::OTC) if constexpr (channel == Channel::OTC)
{ {
// clear ordering table // clear ordering table
u8* ram_pointer = Bus::g_ram; u8* ram_pointer = Bus::g_ram;
@ -868,3 +889,26 @@ void DMA::DrawDebugStateWindow()
ImGui::Columns(1); ImGui::Columns(1);
ImGui::End(); ImGui::End();
} }
// Instantiate channel functions.
template TickCount DMA::TransferDeviceToMemory<DMA::Channel::MDECin>(u32 address, u32 increment, u32 word_count);
template TickCount DMA::TransferMemoryToDevice<DMA::Channel::MDECin>(u32 address, u32 increment, u32 word_count);
template bool DMA::TransferChannel<DMA::Channel::MDECin>();
template TickCount DMA::TransferDeviceToMemory<DMA::Channel::MDECout>(u32 address, u32 increment, u32 word_count);
template TickCount DMA::TransferMemoryToDevice<DMA::Channel::MDECout>(u32 address, u32 increment, u32 word_count);
template bool DMA::TransferChannel<DMA::Channel::MDECout>();
template TickCount DMA::TransferDeviceToMemory<DMA::Channel::GPU>(u32 address, u32 increment, u32 word_count);
template TickCount DMA::TransferMemoryToDevice<DMA::Channel::GPU>(u32 address, u32 increment, u32 word_count);
template bool DMA::TransferChannel<DMA::Channel::GPU>();
template TickCount DMA::TransferDeviceToMemory<DMA::Channel::CDROM>(u32 address, u32 increment, u32 word_count);
template TickCount DMA::TransferMemoryToDevice<DMA::Channel::CDROM>(u32 address, u32 increment, u32 word_count);
template bool DMA::TransferChannel<DMA::Channel::CDROM>();
template TickCount DMA::TransferDeviceToMemory<DMA::Channel::SPU>(u32 address, u32 increment, u32 word_count);
template TickCount DMA::TransferMemoryToDevice<DMA::Channel::SPU>(u32 address, u32 increment, u32 word_count);
template bool DMA::TransferChannel<DMA::Channel::SPU>();
template TickCount DMA::TransferDeviceToMemory<DMA::Channel::PIO>(u32 address, u32 increment, u32 word_count);
template TickCount DMA::TransferMemoryToDevice<DMA::Channel::PIO>(u32 address, u32 increment, u32 word_count);
template bool DMA::TransferChannel<DMA::Channel::PIO>();
template TickCount DMA::TransferDeviceToMemory<DMA::Channel::OTC>(u32 address, u32 increment, u32 word_count);
template TickCount DMA::TransferMemoryToDevice<DMA::Channel::OTC>(u32 address, u32 increment, u32 word_count);
template bool DMA::TransferChannel<DMA::Channel::OTC>();

View File

@ -248,7 +248,7 @@ void MDEC::WriteRegister(u32 offset, u32 value)
void MDEC::DMARead(u32* words, u32 word_count) void MDEC::DMARead(u32* words, u32 word_count)
{ {
if (s_data_out_fifo.GetSize() < word_count) if (s_data_out_fifo.GetSize() < word_count) [[unlikely]]
{ {
Log_WarningPrintf("Insufficient data in output FIFO (requested %u, have %u)", word_count, Log_WarningPrintf("Insufficient data in output FIFO (requested %u, have %u)", word_count,
s_data_out_fifo.GetSize()); s_data_out_fifo.GetSize());
@ -269,7 +269,7 @@ void MDEC::DMARead(u32* words, u32 word_count)
void MDEC::DMAWrite(const u32* words, u32 word_count) void MDEC::DMAWrite(const u32* words, u32 word_count)
{ {
if (s_data_in_fifo.GetSpace() < (word_count * 2)) if (s_data_in_fifo.GetSpace() < (word_count * 2)) [[unlikely]]
{ {
Log_WarningPrintf("Input FIFO overflow (writing %u, space %u)", word_count * 2, s_data_in_fifo.GetSpace()); Log_WarningPrintf("Input FIFO overflow (writing %u, space %u)", word_count * 2, s_data_in_fifo.GetSpace());
} }

View File

@ -1445,7 +1445,7 @@ void SPU::DMAWrite(const u32* words, u32 word_count)
const u32 words_to_transfer = std::min(s_transfer_fifo.GetSpace(), halfword_count); const u32 words_to_transfer = std::min(s_transfer_fifo.GetSpace(), halfword_count);
s_transfer_fifo.PushRange(halfwords, words_to_transfer); s_transfer_fifo.PushRange(halfwords, words_to_transfer);
if (words_to_transfer != halfword_count) if (words_to_transfer != halfword_count) [[unlikely]]
Log_WarningPrintf("Transfer FIFO overflow, dropping %u halfwords", halfword_count - words_to_transfer); Log_WarningPrintf("Transfer FIFO overflow, dropping %u halfwords", halfword_count - words_to_transfer);
UpdateDMARequest(); UpdateDMARequest();