DMA: Template transfer functions
~20% speedup in FMV playback on a Ryzen 9 7950X3D. CPUs hate branches.
This commit is contained in:
parent
e736998f1e
commit
f5ddd7ba32
100
src/core/dma.cpp
100
src/core/dma.cpp
|
@ -166,15 +166,20 @@ static void UpdateIRQ();
|
||||||
// returns false if the DMA should now be halted
|
// returns false if the DMA should now be halted
|
||||||
static TickCount GetTransferSliceTicks();
|
static TickCount GetTransferSliceTicks();
|
||||||
static TickCount GetTransferHaltTicks();
|
static TickCount GetTransferHaltTicks();
|
||||||
static bool TransferChannel(Channel channel);
|
|
||||||
static void HaltTransfer(TickCount duration);
|
static void HaltTransfer(TickCount duration);
|
||||||
static void UnhaltTransfer(void*, TickCount ticks, TickCount ticks_late);
|
static void UnhaltTransfer(void*, TickCount ticks, TickCount ticks_late);
|
||||||
|
|
||||||
|
template<Channel channel>
|
||||||
|
static bool TransferChannel();
|
||||||
|
|
||||||
// from device -> memory
|
// from device -> memory
|
||||||
static TickCount TransferDeviceToMemory(Channel channel, u32 address, u32 increment, u32 word_count);
|
template<Channel channel>
|
||||||
|
static TickCount TransferDeviceToMemory(u32 address, u32 increment, u32 word_count);
|
||||||
|
|
||||||
// from memory -> device
|
// from memory -> device
|
||||||
static TickCount TransferMemoryToDevice(Channel channel, u32 address, u32 increment, u32 word_count);
|
template<Channel channel>
|
||||||
|
static TickCount TransferMemoryToDevice(u32 address, u32 increment, u32 word_count);
|
||||||
|
|
||||||
// configuration
|
// configuration
|
||||||
static TickCount s_max_slice_ticks = 1000;
|
static TickCount s_max_slice_ticks = 1000;
|
||||||
|
@ -187,6 +192,17 @@ static TickCount s_halt_ticks_remaining = 0;
|
||||||
static std::array<ChannelState, NUM_CHANNELS> s_state;
|
static std::array<ChannelState, NUM_CHANNELS> s_state;
|
||||||
static DPCR s_DPCR = {};
|
static DPCR s_DPCR = {};
|
||||||
static DICR s_DICR = {};
|
static DICR s_DICR = {};
|
||||||
|
|
||||||
|
static constexpr std::array<bool (*)(), NUM_CHANNELS> s_channel_transfer_functions = {{
|
||||||
|
&TransferChannel<Channel::MDECin>,
|
||||||
|
&TransferChannel<Channel::MDECout>,
|
||||||
|
&TransferChannel<Channel::GPU>,
|
||||||
|
&TransferChannel<Channel::CDROM>,
|
||||||
|
&TransferChannel<Channel::SPU>,
|
||||||
|
&TransferChannel<Channel::PIO>,
|
||||||
|
&TransferChannel<Channel::OTC>,
|
||||||
|
}};
|
||||||
|
|
||||||
}; // namespace DMA
|
}; // namespace DMA
|
||||||
|
|
||||||
u32 DMA::GetAddressMask()
|
u32 DMA::GetAddressMask()
|
||||||
|
@ -343,7 +359,7 @@ void DMA::WriteRegister(u32 offset, u32 value)
|
||||||
SetRequest(static_cast<Channel>(channel_index), state.channel_control.start_trigger);
|
SetRequest(static_cast<Channel>(channel_index), state.channel_control.start_trigger);
|
||||||
|
|
||||||
if (CanTransferChannel(static_cast<Channel>(channel_index), ignore_halt))
|
if (CanTransferChannel(static_cast<Channel>(channel_index), ignore_halt))
|
||||||
TransferChannel(static_cast<Channel>(channel_index));
|
s_channel_transfer_functions[channel_index]();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -364,7 +380,7 @@ void DMA::WriteRegister(u32 offset, u32 value)
|
||||||
{
|
{
|
||||||
if (CanTransferChannel(static_cast<Channel>(i), false))
|
if (CanTransferChannel(static_cast<Channel>(i), false))
|
||||||
{
|
{
|
||||||
if (!TransferChannel(static_cast<Channel>(i)))
|
if (!s_channel_transfer_functions[i]())
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -397,7 +413,7 @@ void DMA::SetRequest(Channel channel, bool request)
|
||||||
|
|
||||||
cs.request = request;
|
cs.request = request;
|
||||||
if (CanTransferChannel(channel, false))
|
if (CanTransferChannel(channel, false))
|
||||||
TransferChannel(channel);
|
s_channel_transfer_functions[static_cast<u32>(channel)]();
|
||||||
}
|
}
|
||||||
|
|
||||||
void DMA::SetMaxSliceTicks(TickCount ticks)
|
void DMA::SetMaxSliceTicks(TickCount ticks)
|
||||||
|
@ -410,7 +426,7 @@ void DMA::SetHaltTicks(TickCount ticks)
|
||||||
s_halt_ticks = ticks;
|
s_halt_ticks = ticks;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DMA::CanTransferChannel(Channel channel, bool ignore_halt)
|
ALWAYS_INLINE_RELEASE bool DMA::CanTransferChannel(Channel channel, bool ignore_halt)
|
||||||
{
|
{
|
||||||
if (!s_DPCR.GetMasterEnable(channel))
|
if (!s_DPCR.GetMasterEnable(channel))
|
||||||
return false;
|
return false;
|
||||||
|
@ -468,7 +484,8 @@ TickCount DMA::GetTransferHaltTicks()
|
||||||
return Pad::IsTransmitting() ? HALT_TICKS_WHEN_TRANSMITTING_PAD : s_halt_ticks;
|
return Pad::IsTransmitting() ? HALT_TICKS_WHEN_TRANSMITTING_PAD : s_halt_ticks;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DMA::TransferChannel(Channel channel)
|
template<DMA::Channel channel>
|
||||||
|
bool DMA::TransferChannel()
|
||||||
{
|
{
|
||||||
ChannelState& cs = s_state[static_cast<u32>(channel)];
|
ChannelState& cs = s_state[static_cast<u32>(channel)];
|
||||||
const u32 mask = GetAddressMask();
|
const u32 mask = GetAddressMask();
|
||||||
|
@ -490,9 +507,9 @@ bool DMA::TransferChannel(Channel channel)
|
||||||
|
|
||||||
TickCount used_ticks;
|
TickCount used_ticks;
|
||||||
if (copy_to_device)
|
if (copy_to_device)
|
||||||
used_ticks = TransferMemoryToDevice(channel, current_address & mask, increment, word_count);
|
used_ticks = TransferMemoryToDevice<channel>(current_address & mask, increment, word_count);
|
||||||
else
|
else
|
||||||
used_ticks = TransferDeviceToMemory(channel, current_address & mask, increment, word_count);
|
used_ticks = TransferDeviceToMemory<channel>(current_address & mask, increment, word_count);
|
||||||
|
|
||||||
CPU::AddPendingTicks(used_ticks);
|
CPU::AddPendingTicks(used_ticks);
|
||||||
}
|
}
|
||||||
|
@ -528,7 +545,7 @@ bool DMA::TransferChannel(Channel channel)
|
||||||
remaining_ticks -= 5;
|
remaining_ticks -= 5;
|
||||||
|
|
||||||
const TickCount block_ticks =
|
const TickCount block_ticks =
|
||||||
TransferMemoryToDevice(channel, (current_address + sizeof(header)) & mask, 4, word_count);
|
TransferMemoryToDevice<channel>((current_address + sizeof(header)) & mask, 4, word_count);
|
||||||
CPU::AddPendingTicks(block_ticks);
|
CPU::AddPendingTicks(block_ticks);
|
||||||
remaining_ticks -= block_ticks;
|
remaining_ticks -= block_ticks;
|
||||||
}
|
}
|
||||||
|
@ -574,7 +591,7 @@ bool DMA::TransferChannel(Channel channel)
|
||||||
{
|
{
|
||||||
blocks_remaining--;
|
blocks_remaining--;
|
||||||
|
|
||||||
const TickCount ticks = TransferMemoryToDevice(channel, current_address & mask, increment, block_size);
|
const TickCount ticks = TransferMemoryToDevice<channel>(current_address & mask, increment, block_size);
|
||||||
CPU::AddPendingTicks(ticks);
|
CPU::AddPendingTicks(ticks);
|
||||||
ticks_remaining -= ticks;
|
ticks_remaining -= ticks;
|
||||||
|
|
||||||
|
@ -587,7 +604,7 @@ bool DMA::TransferChannel(Channel channel)
|
||||||
{
|
{
|
||||||
blocks_remaining--;
|
blocks_remaining--;
|
||||||
|
|
||||||
const TickCount ticks = TransferDeviceToMemory(channel, current_address & mask, increment, block_size);
|
const TickCount ticks = TransferDeviceToMemory<channel>(current_address & mask, increment, block_size);
|
||||||
CPU::AddPendingTicks(ticks);
|
CPU::AddPendingTicks(ticks);
|
||||||
ticks_remaining -= ticks;
|
ticks_remaining -= ticks;
|
||||||
|
|
||||||
|
@ -655,7 +672,7 @@ void DMA::UnhaltTransfer(void*, TickCount ticks, TickCount ticks_late)
|
||||||
{
|
{
|
||||||
if (CanTransferChannel(static_cast<Channel>(i), false))
|
if (CanTransferChannel(static_cast<Channel>(i), false))
|
||||||
{
|
{
|
||||||
if (!TransferChannel(static_cast<Channel>(i)))
|
if (!s_channel_transfer_functions[i]())
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -664,23 +681,26 @@ void DMA::UnhaltTransfer(void*, TickCount ticks, TickCount ticks_late)
|
||||||
s_halt_ticks_remaining = 0;
|
s_halt_ticks_remaining = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
TickCount DMA::TransferMemoryToDevice(Channel channel, u32 address, u32 increment, u32 word_count)
|
template<DMA::Channel channel>
|
||||||
|
TickCount DMA::TransferMemoryToDevice(u32 address, u32 increment, u32 word_count)
|
||||||
{
|
{
|
||||||
const u32* src_pointer = reinterpret_cast<u32*>(Bus::g_ram + address);
|
const u32* src_pointer = reinterpret_cast<u32*>(Bus::g_ram + address);
|
||||||
const u32 mask = GetAddressMask();
|
const u32 mask = GetAddressMask();
|
||||||
if (channel != Channel::GPU &&
|
if constexpr (channel != Channel::GPU)
|
||||||
(static_cast<s32>(increment) < 0 || ((address + (increment * word_count)) & mask) <= address))
|
|
||||||
{
|
{
|
||||||
// Use temp buffer if it's wrapping around
|
if (static_cast<s32>(increment) < 0 || ((address + (increment * word_count)) & mask) <= address)
|
||||||
if (s_transfer_buffer.size() < word_count)
|
|
||||||
s_transfer_buffer.resize(word_count);
|
|
||||||
src_pointer = s_transfer_buffer.data();
|
|
||||||
|
|
||||||
u8* ram_pointer = Bus::g_ram;
|
|
||||||
for (u32 i = 0; i < word_count; i++)
|
|
||||||
{
|
{
|
||||||
std::memcpy(&s_transfer_buffer[i], &ram_pointer[address], sizeof(u32));
|
// Use temp buffer if it's wrapping around
|
||||||
address = (address + increment) & mask;
|
if (s_transfer_buffer.size() < word_count)
|
||||||
|
s_transfer_buffer.resize(word_count);
|
||||||
|
src_pointer = s_transfer_buffer.data();
|
||||||
|
|
||||||
|
u8* ram_pointer = Bus::g_ram;
|
||||||
|
for (u32 i = 0; i < word_count; i++)
|
||||||
|
{
|
||||||
|
std::memcpy(&s_transfer_buffer[i], &ram_pointer[address], sizeof(u32));
|
||||||
|
address = (address + increment) & mask;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -722,11 +742,12 @@ TickCount DMA::TransferMemoryToDevice(Channel channel, u32 address, u32 incremen
|
||||||
return Bus::GetDMARAMTickCount(word_count);
|
return Bus::GetDMARAMTickCount(word_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
TickCount DMA::TransferDeviceToMemory(Channel channel, u32 address, u32 increment, u32 word_count)
|
template<DMA::Channel channel>
|
||||||
|
TickCount DMA::TransferDeviceToMemory(u32 address, u32 increment, u32 word_count)
|
||||||
{
|
{
|
||||||
const u32 mask = GetAddressMask();
|
const u32 mask = GetAddressMask();
|
||||||
|
|
||||||
if (channel == Channel::OTC)
|
if constexpr (channel == Channel::OTC)
|
||||||
{
|
{
|
||||||
// clear ordering table
|
// clear ordering table
|
||||||
u8* ram_pointer = Bus::g_ram;
|
u8* ram_pointer = Bus::g_ram;
|
||||||
|
@ -868,3 +889,26 @@ void DMA::DrawDebugStateWindow()
|
||||||
ImGui::Columns(1);
|
ImGui::Columns(1);
|
||||||
ImGui::End();
|
ImGui::End();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Instantiate channel functions.
|
||||||
|
template TickCount DMA::TransferDeviceToMemory<DMA::Channel::MDECin>(u32 address, u32 increment, u32 word_count);
|
||||||
|
template TickCount DMA::TransferMemoryToDevice<DMA::Channel::MDECin>(u32 address, u32 increment, u32 word_count);
|
||||||
|
template bool DMA::TransferChannel<DMA::Channel::MDECin>();
|
||||||
|
template TickCount DMA::TransferDeviceToMemory<DMA::Channel::MDECout>(u32 address, u32 increment, u32 word_count);
|
||||||
|
template TickCount DMA::TransferMemoryToDevice<DMA::Channel::MDECout>(u32 address, u32 increment, u32 word_count);
|
||||||
|
template bool DMA::TransferChannel<DMA::Channel::MDECout>();
|
||||||
|
template TickCount DMA::TransferDeviceToMemory<DMA::Channel::GPU>(u32 address, u32 increment, u32 word_count);
|
||||||
|
template TickCount DMA::TransferMemoryToDevice<DMA::Channel::GPU>(u32 address, u32 increment, u32 word_count);
|
||||||
|
template bool DMA::TransferChannel<DMA::Channel::GPU>();
|
||||||
|
template TickCount DMA::TransferDeviceToMemory<DMA::Channel::CDROM>(u32 address, u32 increment, u32 word_count);
|
||||||
|
template TickCount DMA::TransferMemoryToDevice<DMA::Channel::CDROM>(u32 address, u32 increment, u32 word_count);
|
||||||
|
template bool DMA::TransferChannel<DMA::Channel::CDROM>();
|
||||||
|
template TickCount DMA::TransferDeviceToMemory<DMA::Channel::SPU>(u32 address, u32 increment, u32 word_count);
|
||||||
|
template TickCount DMA::TransferMemoryToDevice<DMA::Channel::SPU>(u32 address, u32 increment, u32 word_count);
|
||||||
|
template bool DMA::TransferChannel<DMA::Channel::SPU>();
|
||||||
|
template TickCount DMA::TransferDeviceToMemory<DMA::Channel::PIO>(u32 address, u32 increment, u32 word_count);
|
||||||
|
template TickCount DMA::TransferMemoryToDevice<DMA::Channel::PIO>(u32 address, u32 increment, u32 word_count);
|
||||||
|
template bool DMA::TransferChannel<DMA::Channel::PIO>();
|
||||||
|
template TickCount DMA::TransferDeviceToMemory<DMA::Channel::OTC>(u32 address, u32 increment, u32 word_count);
|
||||||
|
template TickCount DMA::TransferMemoryToDevice<DMA::Channel::OTC>(u32 address, u32 increment, u32 word_count);
|
||||||
|
template bool DMA::TransferChannel<DMA::Channel::OTC>();
|
||||||
|
|
|
@ -248,7 +248,7 @@ void MDEC::WriteRegister(u32 offset, u32 value)
|
||||||
|
|
||||||
void MDEC::DMARead(u32* words, u32 word_count)
|
void MDEC::DMARead(u32* words, u32 word_count)
|
||||||
{
|
{
|
||||||
if (s_data_out_fifo.GetSize() < word_count)
|
if (s_data_out_fifo.GetSize() < word_count) [[unlikely]]
|
||||||
{
|
{
|
||||||
Log_WarningPrintf("Insufficient data in output FIFO (requested %u, have %u)", word_count,
|
Log_WarningPrintf("Insufficient data in output FIFO (requested %u, have %u)", word_count,
|
||||||
s_data_out_fifo.GetSize());
|
s_data_out_fifo.GetSize());
|
||||||
|
@ -269,7 +269,7 @@ void MDEC::DMARead(u32* words, u32 word_count)
|
||||||
|
|
||||||
void MDEC::DMAWrite(const u32* words, u32 word_count)
|
void MDEC::DMAWrite(const u32* words, u32 word_count)
|
||||||
{
|
{
|
||||||
if (s_data_in_fifo.GetSpace() < (word_count * 2))
|
if (s_data_in_fifo.GetSpace() < (word_count * 2)) [[unlikely]]
|
||||||
{
|
{
|
||||||
Log_WarningPrintf("Input FIFO overflow (writing %u, space %u)", word_count * 2, s_data_in_fifo.GetSpace());
|
Log_WarningPrintf("Input FIFO overflow (writing %u, space %u)", word_count * 2, s_data_in_fifo.GetSpace());
|
||||||
}
|
}
|
||||||
|
|
|
@ -1445,7 +1445,7 @@ void SPU::DMAWrite(const u32* words, u32 word_count)
|
||||||
const u32 words_to_transfer = std::min(s_transfer_fifo.GetSpace(), halfword_count);
|
const u32 words_to_transfer = std::min(s_transfer_fifo.GetSpace(), halfword_count);
|
||||||
s_transfer_fifo.PushRange(halfwords, words_to_transfer);
|
s_transfer_fifo.PushRange(halfwords, words_to_transfer);
|
||||||
|
|
||||||
if (words_to_transfer != halfword_count)
|
if (words_to_transfer != halfword_count) [[unlikely]]
|
||||||
Log_WarningPrintf("Transfer FIFO overflow, dropping %u halfwords", halfword_count - words_to_transfer);
|
Log_WarningPrintf("Transfer FIFO overflow, dropping %u halfwords", halfword_count - words_to_transfer);
|
||||||
|
|
||||||
UpdateDMARequest();
|
UpdateDMARequest();
|
||||||
|
|
Loading…
Reference in New Issue