DMA: Halt linked list transfers during pad transfer

Plenty of games seem to suffer from this issue where they have a linked list
DMA going while polling the controller. Having a large slice size causes the
serial transfer to complete before the silly busy wait in the BIOS poll routine
returns, resulting in it thinking that the controller is disconnected. Some
games are very sensitive to this (e.g. Newman Haas Racing), to the point that
even using a slice size of 1 is insufficient for avoiding the race, probably
due to the linked list layout.

Therefore, without major refactoring to ensure the CPU runs every DMA block,
and the associated performance penalty, we just halt the DMA until the serial
transfers have completed. To reduce the chances of this significantly affecting
timing, we add accumulate the ticks that have been "lost", and allow them to be
"used up" when the transfer does happen.
This commit is contained in:
Stenzek 2024-04-07 18:52:30 +10:00
parent 8bdeb8bdac
commit 8a1b4d8bee
No known key found for this signature in database
1 changed files with 34 additions and 38 deletions

View File

@ -45,6 +45,10 @@ static constexpr PhysicalMemoryAddress BASE_ADDRESS_MASK = UINT32_C(0x00FFFFFF);
static constexpr PhysicalMemoryAddress TRANSFER_ADDRESS_MASK = UINT32_C(0x00FFFFFC);
static constexpr PhysicalMemoryAddress LINKED_LIST_TERMINATOR = UINT32_C(0x00FFFFFF);
static constexpr TickCount LINKED_LIST_HEADER_READ_TICKS = 10;
static constexpr TickCount LINKED_LIST_BLOCK_SETUP_TICKS = 5;
static constexpr TickCount HALT_TICKS_WHEN_TRANSMITTING_PAD = 100;
struct ChannelState
{
u32 base_address = 0;
@ -172,10 +176,6 @@ static bool CanTransferChannel(Channel channel, bool ignore_halt);
static bool IsTransferHalted();
static void UpdateIRQ();
// returns false if the DMA should now be halted
static TickCount GetTransferSliceTicks();
static TickCount GetTransferHaltTicks();
static void HaltTransfer(TickCount duration);
static void UnhaltTransfer(void*, TickCount ticks, TickCount ticks_late);
@ -543,36 +543,6 @@ ALWAYS_INLINE_RELEASE void DMA::CompleteTransfer(Channel channel, ChannelState&
}
}
// Plenty of games seem to suffer from this issue where they have a linked list DMA going while polling the
// controller. Using a too-large slice size will result in the serial timing being off, and the game thinking
// the controller is disconnected. So we don't hurt performance too much for the general case, we reduce this
// to equal CPU and DMA time when the controller is transferring, but otherwise leave it at the higher size.
enum : TickCount
{
SLICE_SIZE_WHEN_TRANSMITTING_PAD = 100,
HALT_TICKS_WHEN_TRANSMITTING_PAD = 100,
LINKED_LIST_HEADER_READ_TICKS = 10,
LINKED_LIST_BLOCK_SETUP_TICKS = 5,
};
TickCount DMA::GetTransferSliceTicks()
{
#ifdef _DEBUG
if (Pad::IsTransmitting())
{
Log_DebugPrintf("DMA transfer while transmitting pad - using lower slice size of %u vs %u",
SLICE_SIZE_WHEN_TRANSMITTING_PAD, s_max_slice_ticks);
}
#endif
return Pad::IsTransmitting() ? SLICE_SIZE_WHEN_TRANSMITTING_PAD : s_max_slice_ticks;
}
TickCount DMA::GetTransferHaltTicks()
{
return Pad::IsTransmitting() ? HALT_TICKS_WHEN_TRANSMITTING_PAD : s_halt_ticks;
}
template<DMA::Channel channel>
bool DMA::TransferChannel()
{
@ -616,13 +586,36 @@ bool DMA::TransferChannel()
return true;
}
if constexpr (channel == Channel::GPU)
{
// Plenty of games seem to suffer from this issue where they have a linked list DMA going while polling the
// controller. Having a large slice size causes the serial transfer to complete before the silly busy wait
// in the BIOS poll routine returns, resulting in it thinking that the controller is disconnected. Some games
// are very sensitive to this (e.g. Newman Haas Racing), to the point that even using a slice size of 1 is
// insufficient for avoiding the race, probably due to the linked list layout.
//
// Therefore, without major refactoring to ensure the CPU runs every DMA block, and the associated performance
// penalty, we just halt the DMA until the serial transfers have completed. To reduce the chances of this
// significantly affecting timing, we add accumulate the ticks that have been "lost", and allow them to be
// "used up" when the transfer does happen.
//
if (Pad::IsTransmitting())
{
Log_DebugFmt("DMA transfer while transmitting pad - {} ticks are buffered", -s_halt_ticks_remaining);
if (!s_unhalt_event->IsActive())
s_unhalt_event->SetIntervalAndSchedule(HALT_TICKS_WHEN_TRANSMITTING_PAD);
return false;
}
}
Log_DebugFmt("DMA[{}]: Copying linked list starting at 0x{:08X} to device", channel, current_address);
// Prove to the compiler that nothing's going to modify these.
const u8* const ram_ptr = Bus::g_ram;
const u32 mask = Bus::g_ram_mask;
TickCount remaining_ticks = GetTransferSliceTicks();
const TickCount slice_ticks = s_max_slice_ticks + -s_halt_ticks_remaining;
TickCount remaining_ticks = slice_ticks;
while (cs.request && remaining_ticks > 0)
{
u32 header;
@ -665,8 +658,11 @@ bool DMA::TransferChannel()
cs.base_address = current_address;
if (cs.request)
{
// don't actually delay the transfer for the buffered ticks, this variable is dual-purposed.
s_halt_ticks_remaining = std::max(s_halt_ticks_remaining, 0);
// stall the transfer for a bit if we ran for too long
HaltTransfer(GetTransferHaltTicks());
HaltTransfer(s_halt_ticks);
return false;
}
else
@ -685,7 +681,7 @@ bool DMA::TransferChannel()
const u32 block_size = cs.block_control.request.GetBlockSize();
u32 blocks_remaining = cs.block_control.request.GetBlockCount();
TickCount ticks_remaining = GetTransferSliceTicks();
TickCount ticks_remaining = s_max_slice_ticks;
if (copy_to_device)
{
@ -740,7 +736,7 @@ bool DMA::TransferChannel()
{
// we got halted
if (!s_unhalt_event->IsActive())
HaltTransfer(GetTransferHaltTicks());
HaltTransfer(s_halt_ticks);
return false;
}