DC fix/hack: 1) now a GP-watchdog thread on core 2 locks CPU in gatherpipe (TODO better). 2) Video_SendFifoData send full fifo to GP (should be faster by avoiding the decoder to stall).
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1178 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
dc123aeebf
commit
ab1774b28a
|
@ -26,22 +26,32 @@
|
|||
// - Animal Crossing: PEfinish at start but there's a bug...
|
||||
// There's tons of HiWmk/LoWmk ping pong -> Another sync fashion?
|
||||
|
||||
// *What I guess (thx to asynchronous DualCore mode):
|
||||
// PPC have a frame-finish watchdog. Handled by system timming stuff like the decrementer.
|
||||
// (DualCore mode): I have observed, after ZTP logos, a fifo-recovery start when DECREMENTER_EXCEPTION is throwned.
|
||||
// The frame setting (by GP) took too much time and didn't finish properly due to this watchdog.
|
||||
// Faster GX plugins required, indeed :p
|
||||
|
||||
// * BPs are needed for some game GP/CPU sync.
|
||||
// But it could slowdown (MP1 at least) because our GP in DC is faster than "expected" in some area.
|
||||
// eg: in movie-menus in MP1, BP are reached quickly.
|
||||
// The bad thing is that involve too much PPC work (int ack, lock GP, reset BP, new BP addr, unlock BP...) hence the slowdown.
|
||||
// Anyway, emulation is more accurate like this and it emulate some sort of better load balancing.
|
||||
// Anyway, emulation should more accurate like this and it emulate some sort of better load balancing.
|
||||
// Eather way in those area a more accurate GP timing could be done by slowing down the GP or something less stupid.
|
||||
// Not functional and not used atm (breaks MP2).
|
||||
|
||||
// * funny, in revs before those with this note, BP irq wasn't cleared (a bug indeed) and MP1 menus was faster.
|
||||
// BP irq was raised and ack just once but never cleared. However it's sufficient for MP1 to work.
|
||||
// I see a possible ugly hack there :p
|
||||
// This hack is used atm. Known BPs handling doesn't work well (btw, BP irq clearing might be done by CPIntEnable raising edge).
|
||||
// The hack seems to be responsible of the movie stutering in MP1 menus.
|
||||
|
||||
// TODO (mb2):
|
||||
// * raise watermark Ov/Un irq: DONE but not commited because useless(?) and it slowdown DC.
|
||||
// TODO: commit #ifdef'ed?... if I'm begged :p
|
||||
// * raise ReadIdle/CmdIdle irq and observe behaviour of MP1 & ZTP (at least)
|
||||
// * find who command a complete fifo flush (could be some sort of timeout + wait for ReadIdle irq)
|
||||
// * raise watermark Ov/Un irq: POINTLESS since emulated GP timings can't be accuratly set.
|
||||
// Only 3 choises IMHO for a correct emulated load balancing in DC mode:
|
||||
// - make our own GP watchdog hack that can lock CPU if GP too slow.
|
||||
// - hack directly something in PPC timings (dunno how)
|
||||
// - boost GP so we can consider it as infinitly fast compared to CPU.
|
||||
// * raise ReadIdle/CmdIdle flags and observe behaviour of MP1 & ZTP (at least)
|
||||
// * investigate VI and PI for fixing the DC ZTP bug.
|
||||
// * Clean useless comments and debug stuff in Read16, Write16, GatherPipeBursted when sync will be fixed for DC
|
||||
// * (reminder) do the same in:
|
||||
|
@ -160,6 +170,16 @@ inline u16 ReadHigh (u32 _reg) {return (u16)(_reg >> 16);}
|
|||
|
||||
int et_UpdateInterrupts;
|
||||
|
||||
// for GP watchdog hack
|
||||
void IncrementGPWDToken()
|
||||
{
|
||||
#ifdef WIN32
|
||||
InterlockedIncrement((LONG*)&fifo.Fake_GPWDToken);
|
||||
#else
|
||||
//TODO
|
||||
#endif
|
||||
}
|
||||
|
||||
void UpdateInterrupts_Wrapper(u64 userdata, int cyclesLate)
|
||||
{
|
||||
UpdateInterrupts();
|
||||
|
@ -180,17 +200,14 @@ void Init()
|
|||
|
||||
m_tokenReg = 0;
|
||||
|
||||
fifo.bFF_GPReadEnable = FALSE;
|
||||
fifo.bFF_GPLinkEnable = FALSE;
|
||||
fifo.bFF_BPEnable = FALSE;
|
||||
|
||||
fifo.bFF_GPReadEnable = FALSE;
|
||||
fifo.bFF_GPLinkEnable = FALSE;
|
||||
fifo.bFF_BPEnable = FALSE;
|
||||
// for GP watchdog hack
|
||||
fifo.Fake_GPWDInterrupt = FALSE;
|
||||
fifo.Fake_GPWDToken = 0;
|
||||
et_UpdateInterrupts = CoreTiming::RegisterEvent("UpdateInterrupts", UpdateInterrupts_Wrapper);
|
||||
|
||||
#ifdef _WIN32
|
||||
//InitializeCriticalSection(&fifo.sync);
|
||||
#else
|
||||
// fifo.sync = new Common::CriticalSection(0);
|
||||
#endif
|
||||
}
|
||||
|
||||
void Shutdown()
|
||||
|
@ -250,6 +267,9 @@ void Read16(u16& _rReturnValue, const u32 _Address)
|
|||
case FIFO_BP_LO: _rReturnValue = ReadLow (fifo.CPBreakpoint); return;
|
||||
case FIFO_BP_HI: _rReturnValue = ReadHigh(fifo.CPBreakpoint); return;
|
||||
|
||||
// case 0x42: // first metric reg (I guess) read in case of "fifo unknown state"
|
||||
// Crash();
|
||||
// return;
|
||||
|
||||
// case 0x64:
|
||||
// return 4; //Number of clocks per vertex.. todo: calculate properly
|
||||
|
@ -313,11 +333,6 @@ void Write16(const u16 _Value, const u32 _Address)
|
|||
ct++;
|
||||
if (ct) {LOG(COMMANDPROCESSOR, "(Write16): %lu cycles for nothing :[ ", ct);}
|
||||
}
|
||||
#ifdef _WIN32
|
||||
//EnterCriticalSection(&fifo.sync);
|
||||
#else
|
||||
// fifo.sync->Enter();
|
||||
#endif
|
||||
}
|
||||
|
||||
switch (_Address & 0xFFF)
|
||||
|
@ -451,30 +466,18 @@ void Write16(const u16 _Value, const u32 _Address)
|
|||
case FIFO_RW_DISTANCE_LO:
|
||||
LOG(COMMANDPROCESSOR,"try to write to %s : %04x",((_Address & 0xFFF) == FIFO_RW_DISTANCE_HI) ? "FIFO_RW_DISTANCE_HI" : "FIFO_RW_DISTANCE_LO", _Value);
|
||||
_dbg_assert_msg_(COMMANDPROCESSOR, _Value==0, "WTF? attempt to overwrite fifo CPReadWriteDistance with a value(%04x) != 0 ",_Value);
|
||||
// TODO: break this hack
|
||||
// hack: if fifo is not empty and readable then spin until flushed
|
||||
// sleeping here is not a perf issue since GXSetFifo shouldn't be called too much often.
|
||||
while((fifo.CPReadWriteDistance>0) && (fifo.bFF_GPReadEnable==1))
|
||||
Common::SleepCurrentThread(1);
|
||||
_dbg_assert_msg_(COMMANDPROCESSOR, !(fifo.CPReadWriteDistance!=0 && fifo.bFF_GPReadEnable==0), "Is that bad? WARNING: reset a not empty and not readable fifo.\n CPReadWriteDistance: %08x | fifo.bFF_GPReadEnable: %i",fifo.CPReadWriteDistance,fifo.bFF_GPReadEnable);
|
||||
|
||||
// This SHOULD NOT be a problem to overwrite here unless this msg appears:
|
||||
_dbg_assert_msg_(COMMANDPROCESSOR, fifo.CPReadWriteDistance==0, "Is that bad? WARNING: reset a non empty fifo.\n CPReadWriteDistance: %08x | fifo.bFF_GPReadEnable: %i",fifo.CPReadWriteDistance,fifo.bFF_GPReadEnable);
|
||||
//_dbg_assert_msg_(COMMANDPROCESSOR, !(fifo.CPReadWriteDistance!=0 && fifo.bFF_GPReadEnable==0), "Is that bad? WARNING: reset a not empty and not readable fifo.\n CPReadWriteDistance: %08x | fifo.bFF_GPReadEnable: %i",fifo.CPReadWriteDistance,fifo.bFF_GPReadEnable);
|
||||
|
||||
#ifdef _WIN32
|
||||
InterlockedExchange((LONG*)&fifo.CPReadWriteDistance, 0);
|
||||
//InterlockedExchange((LONG*)&fifo.CPReadWriteDistance, 0);
|
||||
#else
|
||||
Common::InterlockedExchange((int*)&fifo.CPReadWriteDistance, 0);
|
||||
//Common::InterlockedExchange((int*)&fifo.CPReadWriteDistance, 0);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
// (mb2) very dangerous to UpdateFifoRegister() that way in DC when there no criticalsections anymore
|
||||
// ... completely useless btw :p
|
||||
// update the registers and run the fifo
|
||||
//UpdateFifoRegister();
|
||||
//if (Core::g_CoreStartupParameter.bUseDualCore)
|
||||
#ifdef _WIN32
|
||||
//LeaveCriticalSection(&fifo.sync);
|
||||
#else
|
||||
// fifo.sync->Leave();
|
||||
#endif
|
||||
}
|
||||
|
||||
void Read32(u32& _rReturnValue, const u32 _Address)
|
||||
|
@ -496,6 +499,17 @@ void GatherPipeBursted()
|
|||
|
||||
if (Core::g_CoreStartupParameter.bUseDualCore)
|
||||
{
|
||||
// for GP watchdog hack
|
||||
if (fifo.Fake_GPWDInterrupt)
|
||||
{
|
||||
// Wait for GPU to catch up
|
||||
while (fifo.CPReadWriteDistance > 0) // TOCHECK: more checks could be needed
|
||||
;
|
||||
InterlockedExchange((LONG*)&fifo.Fake_GPWDInterrupt, 0);
|
||||
LOGV(COMMANDPROCESSOR, 2, "!!! Fake_GPWDInterrupt raised. Fake_GPWDToken %i",fifo.Fake_GPWDToken);
|
||||
}
|
||||
|
||||
|
||||
// update the fifo-pointer
|
||||
fifo.CPWritePointer += GPFifo::GATHER_PIPE_SIZE;
|
||||
if (fifo.CPWritePointer >= fifo.CPEnd)
|
||||
|
@ -506,25 +520,13 @@ void GatherPipeBursted()
|
|||
Common::InterlockedExchangeAdd((int*)&fifo.CPReadWriteDistance, GPFifo::GATHER_PIPE_SIZE);
|
||||
#endif
|
||||
|
||||
// Wait for GPU to catch up
|
||||
#if 0
|
||||
// High watermark overflow handling: hacked way
|
||||
u32 ct=0;
|
||||
while (!(fifo.bFF_BPEnable && fifo.bFF_Breakpoint) && fifo.CPReadWriteDistance > (s32)fifo.CPHiWatermark)
|
||||
{
|
||||
ct++;
|
||||
// dunno if others threads (like the audio thread) really need a forced context switch here
|
||||
//Common::SleepCurrentThread(1);
|
||||
}
|
||||
if (ct) {LOG(COMMANDPROCESSOR, "(GatherPipeBursted): %lu cycles for nothing :[ ", ct);}
|
||||
#else
|
||||
// High watermark overflow handling: less hacked way
|
||||
// High watermark overflow handling (hacked way)
|
||||
u32 ct=0;
|
||||
if (fifo.CPReadWriteDistance > (s32)fifo.CPHiWatermark)
|
||||
{
|
||||
// we should raise an Ov interrupt for an accurate fifo emulation and let PPC deal with it.
|
||||
// But it slowdown things because of if(interrupt blah blah){} blocks for each 32B fifo transactions.
|
||||
// CPU would be a bit more loaded too by interrupt handling...
|
||||
// CPU would be a bit more loaded too by its interrupt handling...
|
||||
// Eather way, CPU would have the ability to resume another thread.
|
||||
// To be clear: this spin loop is like a critical section spin loop in the emulated GX thread hence "hacked way"
|
||||
|
||||
|
@ -538,6 +540,7 @@ void GatherPipeBursted()
|
|||
// - CPU can write to fifo
|
||||
// - disable Underflow interrupt
|
||||
|
||||
// Wait for GPU to catch up
|
||||
//while (!(fifo.bFF_BPEnable && fifo.bFF_Breakpoint) && fifo.CPReadWriteDistance > (s32)fifo.CPLoWatermark)
|
||||
while (fifo.CPReadWriteDistance > (s32)fifo.CPLoWatermark)
|
||||
{
|
||||
|
@ -545,21 +548,13 @@ void GatherPipeBursted()
|
|||
// dunno if others threads (like the audio thread) really need a forced context switch here
|
||||
//Common::SleepCurrentThread(1);
|
||||
}
|
||||
if (ct) {LOG(COMMANDPROCESSOR, "(GatherPipeBursted): %lu cycles for nothing :[ ", ct);}
|
||||
if (ct) {LOG(COMMANDPROCESSOR, "(GatherPipeBursted): %lu cycles for nothing :[", ct);}
|
||||
/**/
|
||||
}
|
||||
#endif
|
||||
// check if we are in sync
|
||||
_assert_msg_(COMMANDPROCESSOR, fifo.CPWritePointer == CPeripheralInterface::Fifo_CPUWritePointer, "FIFOs linked but out of sync");
|
||||
_assert_msg_(COMMANDPROCESSOR, fifo.CPBase == CPeripheralInterface::Fifo_CPUBase, "FIFOs linked but out of sync");
|
||||
_assert_msg_(COMMANDPROCESSOR, fifo.CPEnd == CPeripheralInterface::Fifo_CPUEnd, "FIFOs linked but out of sync");
|
||||
|
||||
// no not from CPU. Only GP can throw BP irq
|
||||
/*if (fifo.bFF_Breakpoint)
|
||||
{
|
||||
m_CPStatusReg.Breakpoint = 1;
|
||||
UpdateInterrupts();
|
||||
}/**/
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -606,7 +601,7 @@ void CatchUpGPU()
|
|||
// We are going to do FP math on the main thread so have to save the current state
|
||||
SaveSSEState();
|
||||
LoadDefaultSSEState();
|
||||
PluginVideo::Video_SendFifoData(ptr);
|
||||
PluginVideo::Video_SendFifoData(ptr,32);
|
||||
LoadSSEState();
|
||||
|
||||
fifo.CPReadWriteDistance -= 32;
|
||||
|
|
|
@ -79,6 +79,9 @@ void UpdateInterruptsFromVideoPlugin();
|
|||
|
||||
bool AllowIdleSkipping();
|
||||
|
||||
// for GP watchdog hack
|
||||
void IncrementGPWDToken();
|
||||
|
||||
} // end of namespace CommandProcessor
|
||||
|
||||
#endif
|
||||
|
|
|
@ -177,6 +177,7 @@ void SetFinish_OnMainThread(u64 userdata, int cyclesLate)
|
|||
{
|
||||
g_bSignalFinishInterrupt = 1;
|
||||
UpdateInterrupts();
|
||||
CommandProcessor::IncrementGPWDToken();
|
||||
}
|
||||
|
||||
// SetToken
|
||||
|
|
|
@ -38,7 +38,7 @@ typedef void (__cdecl* TDllDebugger)(HWND);
|
|||
typedef void (__cdecl* TVideo_Initialize)(SVideoInitialize*);
|
||||
typedef void (__cdecl* TVideo_Prepare)();
|
||||
typedef void (__cdecl* TVideo_Shutdown)();
|
||||
typedef void (__cdecl* TVideo_SendFifoData)(u8*);
|
||||
typedef void (__cdecl* TVideo_SendFifoData)(u8*,u32);
|
||||
typedef void (__cdecl* TVideo_UpdateXFB)(u8*, u32, u32, s32);
|
||||
typedef BOOL (__cdecl* TVideo_Screenshot)(TCHAR*);
|
||||
typedef void (__cdecl* TVideo_EnterLoop)();
|
||||
|
|
|
@ -67,12 +67,9 @@ u8* FAKE_GetFifoEndPtr()
|
|||
return &videoBuffer[size];
|
||||
}
|
||||
|
||||
void Video_SendFifoData(u8* _uData)
|
||||
void Video_SendFifoData(u8* _uData, u32 len)
|
||||
{
|
||||
// TODO (mb2): unrolled loop faster than memcpy here?
|
||||
memcpy(videoBuffer + size, _uData, 32);
|
||||
size += 32;
|
||||
if (size + 32 >= FIFO_SIZE)
|
||||
if (size + len >= FIFO_SIZE)
|
||||
{
|
||||
int pos = (int)(g_pVideoData-videoBuffer);
|
||||
if (size-pos > pos)
|
||||
|
@ -83,12 +80,38 @@ void Video_SendFifoData(u8* _uData)
|
|||
size -= pos;
|
||||
g_pVideoData = FAKE_GetFifoStartPtr();
|
||||
}
|
||||
memcpy(videoBuffer + size, _uData, len);
|
||||
size += len;
|
||||
OpcodeDecoder_Run();
|
||||
}
|
||||
|
||||
// for GP watchdog hack
|
||||
THREAD_RETURN GPWatchdogThread(void *pArg)
|
||||
{
|
||||
const SCPFifoStruct &_fifo = *(SCPFifoStruct*)pArg;
|
||||
u32 token = 0;
|
||||
|
||||
Common::SetCurrentThreadName("GPWatchdogThread");
|
||||
Common::Thread::SetCurrentThreadAffinity(2); //Force to second core
|
||||
//int i=30*1000/16;
|
||||
while (1)
|
||||
{
|
||||
Common::SleepCurrentThread(8); // about 1s/60/2 (less than twice a frame should be enough)
|
||||
//if (!_fifo.bFF_GPReadIdle)
|
||||
|
||||
// TODO (mb2): FIX this !!!
|
||||
//if (token == _fifo.Fake_GPWDToken)
|
||||
{
|
||||
InterlockedExchange((LONG*)&_fifo.Fake_GPWDInterrupt, 1);
|
||||
//__Log(LogTypes::VIDEO,"!!! Watchdog hit",_fifo.CPReadWriteDistance);
|
||||
}
|
||||
token = _fifo.Fake_GPWDToken;
|
||||
//i--;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
//TODO - turn inside out, have the "reader" ask for bytes instead
|
||||
// See Core.cpp for threading idea
|
||||
void Fifo_EnterLoop(const SVideoInitialize &video_initialize)
|
||||
{
|
||||
SCPFifoStruct &_fifo = *video_initialize.pCPFifo;
|
||||
|
@ -97,6 +120,10 @@ void Fifo_EnterLoop(const SVideoInitialize &video_initialize)
|
|||
if (hEventOnIdle==NULL) PanicAlert("Fifo_EnterLoop() -> EventOnIdle NULL");
|
||||
#endif
|
||||
|
||||
// for GP watchdog hack
|
||||
Common::Thread *watchdogThread = NULL;
|
||||
watchdogThread = new Common::Thread(GPWatchdogThread, (void*)&_fifo);
|
||||
|
||||
#ifdef _WIN32
|
||||
// TODO(ector): Don't peek so often!
|
||||
while (video_initialize.pPeekMessages())
|
||||
|
@ -123,7 +150,7 @@ void Fifo_EnterLoop(const SVideoInitialize &video_initialize)
|
|||
#if defined(THREAD_VIDEO_WAKEUP_ONIDLE) && defined(_WIN32)
|
||||
while(_fifo.CPReadWriteDistance > 0)
|
||||
#else
|
||||
while(_fifo.bFF_GPReadEnable && (_fifo.CPReadWriteDistance > 0) )// && count)
|
||||
while(_fifo.bFF_GPReadEnable && (_fifo.CPReadWriteDistance > 0) )
|
||||
#endif
|
||||
{
|
||||
// check if we are on a breakpoint
|
||||
|
@ -144,28 +171,27 @@ void Fifo_EnterLoop(const SVideoInitialize &video_initialize)
|
|||
|
||||
// read the data and send it to the VideoPlugin
|
||||
u8 *uData = video_initialize.pGetMemoryPointer(_fifo.CPReadPointer);
|
||||
#ifdef _WIN32
|
||||
//EnterCriticalSection(&_fifo.sync);
|
||||
#else
|
||||
// _fifo.sync->Enter();
|
||||
#endif
|
||||
Video_SendFifoData(uData);
|
||||
// increase the ReadPtr
|
||||
u32 readPtr = _fifo.CPReadPointer+32;
|
||||
if (readPtr >= _fifo.CPEnd)
|
||||
readPtr = _fifo.CPBase;
|
||||
|
||||
u32 dist = _fifo.CPReadWriteDistance;
|
||||
u32 readPtr = _fifo.CPReadPointer;
|
||||
if ( (dist+readPtr) > _fifo.CPEnd) // TODO: better
|
||||
{
|
||||
dist =_fifo.CPEnd - readPtr;
|
||||
readPtr = _fifo.CPBase;
|
||||
}
|
||||
else
|
||||
readPtr += dist;
|
||||
|
||||
Video_SendFifoData(uData, dist);
|
||||
#ifdef _WIN32
|
||||
InterlockedExchange((LONG*)&_fifo.CPReadPointer, readPtr);
|
||||
InterlockedExchangeAdd((LONG*)&_fifo.CPReadWriteDistance, -32);
|
||||
//LeaveCriticalSection(&_fifo.sync);
|
||||
InterlockedExchangeAdd((LONG*)&_fifo.CPReadWriteDistance, -dist);
|
||||
#else
|
||||
Common::InterlockedExchange((int*)&_fifo.CPReadPointer, readPtr);
|
||||
Common::InterlockedExchangeAdd((int*)&_fifo.CPReadWriteDistance, -32);
|
||||
// _fifo.sync->Leave();
|
||||
Common::InterlockedExchangeAdd((int*)&_fifo.CPReadWriteDistance, -dist);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
#if defined(THREAD_VIDEO_WAKEUP_ONIDLE) && defined(_WIN32)
|
||||
CloseHandle(hEventOnIdle);
|
||||
|
|
|
@ -39,11 +39,10 @@ typedef struct
|
|||
volatile BOOL bFF_BPEnable;
|
||||
volatile BOOL bFF_GPLinkEnable;
|
||||
volatile BOOL bFF_Breakpoint;
|
||||
#ifdef _WIN32
|
||||
// CRITICAL_SECTION sync;
|
||||
#else
|
||||
Common::CriticalSection *sync;
|
||||
#endif
|
||||
|
||||
// for GP watchdog hack
|
||||
volatile BOOL Fake_GPWDInterrupt;
|
||||
volatile u32 Fake_GPWDToken; // cicular incrementer
|
||||
} SCPFifoStruct;
|
||||
|
||||
typedef struct
|
||||
|
@ -137,7 +136,7 @@ EXPORT void CALL Video_Shutdown(void);
|
|||
// input: a data-byte (i know we have to optimize this ;-))
|
||||
// output: none
|
||||
//
|
||||
EXPORT void CALL Video_SendFifoData(u8* _uData);
|
||||
EXPORT void CALL Video_SendFifoData(u8* _uData, u32 len);
|
||||
|
||||
// __________________________________________________________________________________________________
|
||||
// Function: Video_UpdateXFB
|
||||
|
|
Loading…
Reference in New Issue