Merge pull request #885 from comex/gpu-determinism
GPU determinism (apparently it is ready for merge)
This commit is contained in:
commit
fbabc03b3f
|
@ -55,10 +55,24 @@ struct ConfigCache
|
||||||
unsigned int framelimit, frameSkip;
|
unsigned int framelimit, frameSkip;
|
||||||
TEXIDevices m_EXIDevice[MAX_EXI_CHANNELS];
|
TEXIDevices m_EXIDevice[MAX_EXI_CHANNELS];
|
||||||
std::string strBackend, sBackend;
|
std::string strBackend, sBackend;
|
||||||
|
std::string m_strGPUDeterminismMode;
|
||||||
bool bSetFramelimit, bSetEXIDevice[MAX_EXI_CHANNELS], bSetVolume, bSetPads[MAX_SI_CHANNELS], bSetWiimoteSource[MAX_BBMOTES], bSetFrameSkip;
|
bool bSetFramelimit, bSetEXIDevice[MAX_EXI_CHANNELS], bSetVolume, bSetPads[MAX_SI_CHANNELS], bSetWiimoteSource[MAX_BBMOTES], bSetFrameSkip;
|
||||||
};
|
};
|
||||||
static ConfigCache config_cache;
|
static ConfigCache config_cache;
|
||||||
|
|
||||||
|
static GPUDeterminismMode ParseGPUDeterminismMode(const std::string& mode)
|
||||||
|
{
|
||||||
|
if (mode == "auto")
|
||||||
|
return GPU_DETERMINISM_AUTO;
|
||||||
|
if (mode == "none")
|
||||||
|
return GPU_DETERMINISM_NONE;
|
||||||
|
if (mode == "fake-completion")
|
||||||
|
return GPU_DETERMINISM_FAKE_COMPLETION;
|
||||||
|
|
||||||
|
NOTICE_LOG(BOOT, "Unknown GPU determinism mode %s", mode.c_str());
|
||||||
|
return GPU_DETERMINISM_AUTO;
|
||||||
|
}
|
||||||
|
|
||||||
// Boot the ISO or file
|
// Boot the ISO or file
|
||||||
bool BootCore(const std::string& _rFilename)
|
bool BootCore(const std::string& _rFilename)
|
||||||
{
|
{
|
||||||
|
@ -109,6 +123,7 @@ bool BootCore(const std::string& _rFilename)
|
||||||
config_cache.bMergeBlocks = StartUp.bMergeBlocks;
|
config_cache.bMergeBlocks = StartUp.bMergeBlocks;
|
||||||
config_cache.bDSPHLE = StartUp.bDSPHLE;
|
config_cache.bDSPHLE = StartUp.bDSPHLE;
|
||||||
config_cache.strBackend = StartUp.m_strVideoBackend;
|
config_cache.strBackend = StartUp.m_strVideoBackend;
|
||||||
|
config_cache.m_strGPUDeterminismMode = StartUp.m_strGPUDeterminismMode;
|
||||||
config_cache.m_EnableJIT = SConfig::GetInstance().m_DSPEnableJIT;
|
config_cache.m_EnableJIT = SConfig::GetInstance().m_DSPEnableJIT;
|
||||||
config_cache.bDSPThread = StartUp.bDSPThread;
|
config_cache.bDSPThread = StartUp.bDSPThread;
|
||||||
config_cache.Volume = SConfig::GetInstance().m_Volume;
|
config_cache.Volume = SConfig::GetInstance().m_Volume;
|
||||||
|
@ -168,6 +183,8 @@ bool BootCore(const std::string& _rFilename)
|
||||||
dsp_section->Get("EnableJIT", &SConfig::GetInstance().m_DSPEnableJIT, SConfig::GetInstance().m_DSPEnableJIT);
|
dsp_section->Get("EnableJIT", &SConfig::GetInstance().m_DSPEnableJIT, SConfig::GetInstance().m_DSPEnableJIT);
|
||||||
dsp_section->Get("Backend", &SConfig::GetInstance().sBackend, SConfig::GetInstance().sBackend);
|
dsp_section->Get("Backend", &SConfig::GetInstance().sBackend, SConfig::GetInstance().sBackend);
|
||||||
VideoBackend::ActivateBackend(StartUp.m_strVideoBackend);
|
VideoBackend::ActivateBackend(StartUp.m_strVideoBackend);
|
||||||
|
core_section->Get("GPUDeterminismMode", &StartUp.m_strGPUDeterminismMode, StartUp.m_strGPUDeterminismMode);
|
||||||
|
StartUp.m_GPUDeterminismMode = ParseGPUDeterminismMode(StartUp.m_strGPUDeterminismMode);
|
||||||
|
|
||||||
for (unsigned int i = 0; i < MAX_SI_CHANNELS; ++i)
|
for (unsigned int i = 0; i < MAX_SI_CHANNELS; ++i)
|
||||||
{
|
{
|
||||||
|
@ -277,6 +294,7 @@ void Stop()
|
||||||
StartUp.bDSPHLE = config_cache.bDSPHLE;
|
StartUp.bDSPHLE = config_cache.bDSPHLE;
|
||||||
StartUp.bDSPThread = config_cache.bDSPThread;
|
StartUp.bDSPThread = config_cache.bDSPThread;
|
||||||
StartUp.m_strVideoBackend = config_cache.strBackend;
|
StartUp.m_strVideoBackend = config_cache.strBackend;
|
||||||
|
StartUp.m_strGPUDeterminismMode = config_cache.m_strGPUDeterminismMode;
|
||||||
VideoBackend::ActivateBackend(StartUp.m_strVideoBackend);
|
VideoBackend::ActivateBackend(StartUp.m_strVideoBackend);
|
||||||
StartUp.bHLE_BS2 = config_cache.bHLE_BS2;
|
StartUp.bHLE_BS2 = config_cache.bHLE_BS2;
|
||||||
SConfig::GetInstance().sBackend = config_cache.sBackend;
|
SConfig::GetInstance().sBackend = config_cache.sBackend;
|
||||||
|
|
|
@ -317,6 +317,7 @@ void SConfig::SaveCoreSettings(IniFile& ini)
|
||||||
core->Set("FrameLimit", m_Framelimit);
|
core->Set("FrameLimit", m_Framelimit);
|
||||||
core->Set("FrameSkip", m_FrameSkip);
|
core->Set("FrameSkip", m_FrameSkip);
|
||||||
core->Set("GFXBackend", m_LocalCoreStartupParameter.m_strVideoBackend);
|
core->Set("GFXBackend", m_LocalCoreStartupParameter.m_strVideoBackend);
|
||||||
|
core->Set("GPUDeterminismMode", m_LocalCoreStartupParameter.m_strGPUDeterminismMode);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SConfig::SaveMovieSettings(IniFile& ini)
|
void SConfig::SaveMovieSettings(IniFile& ini)
|
||||||
|
@ -542,6 +543,7 @@ void SConfig::LoadCoreSettings(IniFile& ini)
|
||||||
core->Get("FrameLimit", &m_Framelimit, 1); // auto frame limit by default
|
core->Get("FrameLimit", &m_Framelimit, 1); // auto frame limit by default
|
||||||
core->Get("FrameSkip", &m_FrameSkip, 0);
|
core->Get("FrameSkip", &m_FrameSkip, 0);
|
||||||
core->Get("GFXBackend", &m_LocalCoreStartupParameter.m_strVideoBackend, "");
|
core->Get("GFXBackend", &m_LocalCoreStartupParameter.m_strVideoBackend, "");
|
||||||
|
core->Get("GPUDeterminismMode", &m_LocalCoreStartupParameter.m_strGPUDeterminismMode, "auto");
|
||||||
}
|
}
|
||||||
|
|
||||||
void SConfig::LoadMovieSettings(IniFile& ini)
|
void SConfig::LoadMovieSettings(IniFile& ini)
|
||||||
|
|
|
@ -48,6 +48,7 @@
|
||||||
#include "Core/HW/VideoInterface.h"
|
#include "Core/HW/VideoInterface.h"
|
||||||
#include "Core/HW/Wiimote.h"
|
#include "Core/HW/Wiimote.h"
|
||||||
#include "Core/IPC_HLE/WII_IPC_HLE_Device_usb.h"
|
#include "Core/IPC_HLE/WII_IPC_HLE_Device_usb.h"
|
||||||
|
#include "Core/IPC_HLE/WII_Socket.h"
|
||||||
#include "Core/PowerPC/PowerPC.h"
|
#include "Core/PowerPC/PowerPC.h"
|
||||||
|
|
||||||
#ifdef USE_GDBSTUB
|
#ifdef USE_GDBSTUB
|
||||||
|
@ -65,6 +66,8 @@ bool g_aspect_wide;
|
||||||
namespace Core
|
namespace Core
|
||||||
{
|
{
|
||||||
|
|
||||||
|
bool g_want_determinism;
|
||||||
|
|
||||||
// Declarations and definitions
|
// Declarations and definitions
|
||||||
static Common::Timer s_timer;
|
static Common::Timer s_timer;
|
||||||
static volatile u32 s_drawn_frame = 0;
|
static volatile u32 s_drawn_frame = 0;
|
||||||
|
@ -177,6 +180,8 @@ bool Init()
|
||||||
s_emu_thread.join();
|
s_emu_thread.join();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Core::UpdateWantDeterminism(/*initial*/ true);
|
||||||
|
|
||||||
INFO_LOG(OSREPORT, "Starting core = %s mode",
|
INFO_LOG(OSREPORT, "Starting core = %s mode",
|
||||||
_CoreParameter.bWii ? "Wii" : "GameCube");
|
_CoreParameter.bWii ? "Wii" : "GameCube");
|
||||||
INFO_LOG(OSREPORT, "CPU Thread separate = %s",
|
INFO_LOG(OSREPORT, "CPU Thread separate = %s",
|
||||||
|
@ -564,6 +569,9 @@ void RequestRefreshInfo()
|
||||||
|
|
||||||
bool PauseAndLock(bool doLock, bool unpauseOnUnlock)
|
bool PauseAndLock(bool doLock, bool unpauseOnUnlock)
|
||||||
{
|
{
|
||||||
|
if (!IsRunning())
|
||||||
|
return true;
|
||||||
|
|
||||||
// let's support recursive locking to simplify things on the caller's side,
|
// let's support recursive locking to simplify things on the caller's side,
|
||||||
// and let's do it at this outer level in case the individual systems don't support it.
|
// and let's do it at this outer level in case the individual systems don't support it.
|
||||||
if (doLock ? s_pause_and_lock_depth++ : --s_pause_and_lock_depth)
|
if (doLock ? s_pause_and_lock_depth++ : --s_pause_and_lock_depth)
|
||||||
|
@ -702,4 +710,27 @@ void SetOnStoppedCallback(StoppedCallbackFunc callback)
|
||||||
s_on_stopped_callback = callback;
|
s_on_stopped_callback = callback;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void UpdateWantDeterminism(bool initial)
|
||||||
|
{
|
||||||
|
// For now, this value is not itself configurable. Instead, individual
|
||||||
|
// settings that depend on it, such as GPU determinism mode. should have
|
||||||
|
// override options for testing,
|
||||||
|
bool new_want_determinism =
|
||||||
|
Movie::IsPlayingInput() ||
|
||||||
|
Movie::IsRecordingInput() ||
|
||||||
|
NetPlay::IsNetPlayRunning();
|
||||||
|
if (new_want_determinism != g_want_determinism || initial)
|
||||||
|
{
|
||||||
|
WARN_LOG(COMMON, "Want determinism <- %s", new_want_determinism ? "true" : "false");
|
||||||
|
|
||||||
|
bool was_unpaused = Core::PauseAndLock(true);
|
||||||
|
|
||||||
|
g_want_determinism = new_want_determinism;
|
||||||
|
WiiSockMan::GetInstance().UpdateWantDeterminism(new_want_determinism);
|
||||||
|
g_video_backend->UpdateWantDeterminism(new_want_determinism);
|
||||||
|
|
||||||
|
Core::PauseAndLock(false, was_unpaused);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // Core
|
} // Core
|
||||||
|
|
|
@ -23,6 +23,8 @@ extern bool g_aspect_wide;
|
||||||
namespace Core
|
namespace Core
|
||||||
{
|
{
|
||||||
|
|
||||||
|
extern bool g_want_determinism;
|
||||||
|
|
||||||
bool GetIsFramelimiterTempDisabled();
|
bool GetIsFramelimiterTempDisabled();
|
||||||
void SetIsFramelimiterTempDisabled(bool disable);
|
void SetIsFramelimiterTempDisabled(bool disable);
|
||||||
|
|
||||||
|
@ -79,4 +81,7 @@ bool PauseAndLock(bool doLock, bool unpauseOnUnlock=true);
|
||||||
typedef void(*StoppedCallbackFunc)(void);
|
typedef void(*StoppedCallbackFunc)(void);
|
||||||
void SetOnStoppedCallback(StoppedCallbackFunc callback);
|
void SetOnStoppedCallback(StoppedCallbackFunc callback);
|
||||||
|
|
||||||
|
// Run on the GUI thread when the factors change.
|
||||||
|
void UpdateWantDeterminism(bool initial = false);
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
|
@ -97,6 +97,15 @@ enum Hotkey
|
||||||
NUM_HOTKEYS,
|
NUM_HOTKEYS,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum GPUDeterminismMode
|
||||||
|
{
|
||||||
|
GPU_DETERMINISM_AUTO,
|
||||||
|
GPU_DETERMINISM_NONE,
|
||||||
|
// This is currently the only mode. There will probably be at least
|
||||||
|
// one more at some point.
|
||||||
|
GPU_DETERMINISM_FAKE_COMPLETION,
|
||||||
|
};
|
||||||
|
|
||||||
struct SCoreStartupParameter
|
struct SCoreStartupParameter
|
||||||
{
|
{
|
||||||
// Settings
|
// Settings
|
||||||
|
@ -200,6 +209,10 @@ struct SCoreStartupParameter
|
||||||
EBootType m_BootType;
|
EBootType m_BootType;
|
||||||
|
|
||||||
std::string m_strVideoBackend;
|
std::string m_strVideoBackend;
|
||||||
|
std::string m_strGPUDeterminismMode;
|
||||||
|
|
||||||
|
// set based on the string version
|
||||||
|
GPUDeterminismMode m_GPUDeterminismMode;
|
||||||
|
|
||||||
// files
|
// files
|
||||||
std::string m_strFilename;
|
std::string m_strFilename;
|
||||||
|
|
|
@ -331,7 +331,7 @@ bool Wiimote::Step()
|
||||||
m_rumble->controls[0]->control_ref->State(m_rumble_on);
|
m_rumble->controls[0]->control_ref->State(m_rumble_on);
|
||||||
|
|
||||||
// when a movie is active, this button status update is disabled (moved), because movies only record data reports.
|
// when a movie is active, this button status update is disabled (moved), because movies only record data reports.
|
||||||
if (!(Movie::IsMovieActive()) || NetPlay::IsNetPlayRunning())
|
if (!Core::g_want_determinism)
|
||||||
{
|
{
|
||||||
UpdateButtonsStatus();
|
UpdateButtonsStatus();
|
||||||
}
|
}
|
||||||
|
@ -385,7 +385,7 @@ void Wiimote::UpdateButtonsStatus()
|
||||||
void Wiimote::GetCoreData(u8* const data)
|
void Wiimote::GetCoreData(u8* const data)
|
||||||
{
|
{
|
||||||
// when a movie is active, the button update happens here instead of Wiimote::Step, to avoid potential desync issues.
|
// when a movie is active, the button update happens here instead of Wiimote::Step, to avoid potential desync issues.
|
||||||
if (Movie::IsMovieActive() || NetPlay::IsNetPlayRunning())
|
if (Core::g_want_determinism)
|
||||||
{
|
{
|
||||||
UpdateButtonsStatus();
|
UpdateButtonsStatus();
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,8 +4,7 @@
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
#include "Core/Movie.h"
|
#include "Core/Core.h"
|
||||||
#include "Core/NetPlayProto.h"
|
|
||||||
#include "Core/IPC_HLE/WII_IPC_HLE.h"
|
#include "Core/IPC_HLE/WII_IPC_HLE.h"
|
||||||
#include "Core/IPC_HLE/WII_IPC_HLE_Device.h"
|
#include "Core/IPC_HLE/WII_IPC_HLE_Device.h"
|
||||||
#include "Core/IPC_HLE/WII_Socket.h" // No Wii socket support while using NetPlay or TAS
|
#include "Core/IPC_HLE/WII_Socket.h" // No Wii socket support while using NetPlay or TAS
|
||||||
|
@ -559,9 +558,7 @@ void WiiSockMan::AddSocket(s32 fd)
|
||||||
|
|
||||||
s32 WiiSockMan::NewSocket(s32 af, s32 type, s32 protocol)
|
s32 WiiSockMan::NewSocket(s32 af, s32 type, s32 protocol)
|
||||||
{
|
{
|
||||||
if (NetPlay::IsNetPlayRunning() ||
|
if (Core::g_want_determinism)
|
||||||
Movie::IsRecordingInput() ||
|
|
||||||
Movie::IsPlayingInput())
|
|
||||||
{
|
{
|
||||||
return SO_ENOMEM;
|
return SO_ENOMEM;
|
||||||
}
|
}
|
||||||
|
@ -664,5 +661,12 @@ void WiiSockMan::Convert(sockaddr_in const & from, WiiSockAddrIn& to, s32 addrle
|
||||||
to.len = addrlen;
|
to.len = addrlen;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void WiiSockMan::UpdateWantDeterminism(bool want)
|
||||||
|
{
|
||||||
|
// If we switched into movie recording, kill existing sockets.
|
||||||
|
if (want)
|
||||||
|
Clean();
|
||||||
|
}
|
||||||
|
|
||||||
#undef ERRORCODE
|
#undef ERRORCODE
|
||||||
#undef EITHER
|
#undef EITHER
|
||||||
|
|
|
@ -242,6 +242,8 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void UpdateWantDeterminism(bool want);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
WiiSockMan() = default;
|
WiiSockMan() = default;
|
||||||
|
|
||||||
|
|
|
@ -437,6 +437,8 @@ bool BeginRecordingInput(int controllers)
|
||||||
if (s_playMode != MODE_NONE || controllers == 0)
|
if (s_playMode != MODE_NONE || controllers == 0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
bool was_unpaused = Core::PauseAndLock(true);
|
||||||
|
|
||||||
s_numPads = controllers;
|
s_numPads = controllers;
|
||||||
g_currentFrame = g_totalFrames = 0;
|
g_currentFrame = g_totalFrames = 0;
|
||||||
g_currentLagCount = s_totalLagCount = 0;
|
g_currentLagCount = s_totalLagCount = 0;
|
||||||
|
@ -487,6 +489,10 @@ bool BeginRecordingInput(int controllers)
|
||||||
|
|
||||||
s_currentByte = s_totalBytes = 0;
|
s_currentByte = s_totalBytes = 0;
|
||||||
|
|
||||||
|
Core::UpdateWantDeterminism();
|
||||||
|
|
||||||
|
Core::PauseAndLock(false, was_unpaused);
|
||||||
|
|
||||||
Core::DisplayMessage("Starting movie recording", 2000);
|
Core::DisplayMessage("Starting movie recording", 2000);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -764,6 +770,8 @@ bool PlayInput(const std::string& filename)
|
||||||
|
|
||||||
s_playMode = MODE_PLAYING;
|
s_playMode = MODE_PLAYING;
|
||||||
|
|
||||||
|
Core::UpdateWantDeterminism();
|
||||||
|
|
||||||
s_totalBytes = g_recordfd.GetSize() - 256;
|
s_totalBytes = g_recordfd.GetSize() - 256;
|
||||||
EnsureTmpInputSize((size_t)s_totalBytes);
|
EnsureTmpInputSize((size_t)s_totalBytes);
|
||||||
g_recordfd.ReadArray(tmpInput, (size_t)s_totalBytes);
|
g_recordfd.ReadArray(tmpInput, (size_t)s_totalBytes);
|
||||||
|
@ -1097,6 +1105,7 @@ void EndPlayInput(bool cont)
|
||||||
s_rerecords = 0;
|
s_rerecords = 0;
|
||||||
s_currentByte = 0;
|
s_currentByte = 0;
|
||||||
s_playMode = MODE_NONE;
|
s_playMode = MODE_NONE;
|
||||||
|
Core::UpdateWantDeterminism();
|
||||||
Core::DisplayMessage("Movie End.", 2000);
|
Core::DisplayMessage("Movie End.", 2000);
|
||||||
s_bRecordingFromSaveState = false;
|
s_bRecordingFromSaveState = false;
|
||||||
// we don't clear these things because otherwise we can't resume playback if we load a movie state later
|
// we don't clear these things because otherwise we can't resume playback if we load a movie state later
|
||||||
|
|
|
@ -13,46 +13,46 @@ void SWLoadCPReg(u32 sub_cmd, u32 value)
|
||||||
switch (sub_cmd & 0xF0)
|
switch (sub_cmd & 0xF0)
|
||||||
{
|
{
|
||||||
case 0x30:
|
case 0x30:
|
||||||
MatrixIndexA.Hex = value;
|
g_main_cp_state.matrix_index_a.Hex = value;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x40:
|
case 0x40:
|
||||||
MatrixIndexB.Hex = value;
|
g_main_cp_state.matrix_index_b.Hex = value;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x50:
|
case 0x50:
|
||||||
g_VtxDesc.Hex &= ~0x1FFFF; // keep the Upper bits
|
g_main_cp_state.vtx_desc.Hex &= ~0x1FFFF; // keep the Upper bits
|
||||||
g_VtxDesc.Hex |= value;
|
g_main_cp_state.vtx_desc.Hex |= value;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x60:
|
case 0x60:
|
||||||
g_VtxDesc.Hex &= 0x1FFFF; // keep the lower 17Bits
|
g_main_cp_state.vtx_desc.Hex &= 0x1FFFF; // keep the lower 17Bits
|
||||||
g_VtxDesc.Hex |= (u64)value << 17;
|
g_main_cp_state.vtx_desc.Hex |= (u64)value << 17;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x70:
|
case 0x70:
|
||||||
_assert_((sub_cmd & 0x0F) < 8);
|
_assert_((sub_cmd & 0x0F) < 8);
|
||||||
g_VtxAttr[sub_cmd & 7].g0.Hex = value;
|
g_main_cp_state.vtx_attr[sub_cmd & 7].g0.Hex = value;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x80:
|
case 0x80:
|
||||||
_assert_((sub_cmd & 0x0F) < 8);
|
_assert_((sub_cmd & 0x0F) < 8);
|
||||||
g_VtxAttr[sub_cmd & 7].g1.Hex = value;
|
g_main_cp_state.vtx_attr[sub_cmd & 7].g1.Hex = value;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x90:
|
case 0x90:
|
||||||
_assert_((sub_cmd & 0x0F) < 8);
|
_assert_((sub_cmd & 0x0F) < 8);
|
||||||
g_VtxAttr[sub_cmd & 7].g2.Hex = value;
|
g_main_cp_state.vtx_attr[sub_cmd & 7].g2.Hex = value;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// Pointers to vertex arrays in GC RAM
|
// Pointers to vertex arrays in GC RAM
|
||||||
case 0xA0:
|
case 0xA0:
|
||||||
arraybases[sub_cmd & 0xF] = value;
|
g_main_cp_state.array_bases[sub_cmd & 0xF] = value;
|
||||||
cached_arraybases[sub_cmd & 0xF] = Memory::GetPointer(value);
|
cached_arraybases[sub_cmd & 0xF] = Memory::GetPointer(value);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0xB0:
|
case 0xB0:
|
||||||
arraystrides[sub_cmd & 0xF] = value & 0xFF;
|
g_main_cp_state.array_strides[sub_cmd & 0xF] = value & 0xFF;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -57,7 +57,7 @@ static void DecodePrimitiveStream(u32 iBufferSize)
|
||||||
{
|
{
|
||||||
while (streamSize > 0 && iBufferSize >= vertexSize)
|
while (streamSize > 0 && iBufferSize >= vertexSize)
|
||||||
{
|
{
|
||||||
g_pVideoData += vertexSize;
|
g_video_buffer_read_ptr += vertexSize;
|
||||||
iBufferSize -= vertexSize;
|
iBufferSize -= vertexSize;
|
||||||
streamSize--;
|
streamSize--;
|
||||||
}
|
}
|
||||||
|
@ -94,26 +94,26 @@ static void ReadXFData(u32 iBufferSize)
|
||||||
|
|
||||||
static void ExecuteDisplayList(u32 addr, u32 count)
|
static void ExecuteDisplayList(u32 addr, u32 count)
|
||||||
{
|
{
|
||||||
u8 *videoDataSave = g_pVideoData;
|
u8 *videoDataSave = g_video_buffer_read_ptr;
|
||||||
|
|
||||||
u8 *dlStart = Memory::GetPointer(addr);
|
u8 *dlStart = Memory::GetPointer(addr);
|
||||||
|
|
||||||
g_pVideoData = dlStart;
|
g_video_buffer_read_ptr = dlStart;
|
||||||
|
|
||||||
while (OpcodeDecoder::CommandRunnable(count))
|
while (OpcodeDecoder::CommandRunnable(count))
|
||||||
{
|
{
|
||||||
OpcodeDecoder::Run(count);
|
OpcodeDecoder::Run(count);
|
||||||
|
|
||||||
// if data was read by the opcode decoder then the video data pointer changed
|
// if data was read by the opcode decoder then the video data pointer changed
|
||||||
u32 readCount = (u32)(g_pVideoData - dlStart);
|
u32 readCount = (u32)(g_video_buffer_read_ptr - dlStart);
|
||||||
dlStart = g_pVideoData;
|
dlStart = g_video_buffer_read_ptr;
|
||||||
|
|
||||||
_assert_msg_(VIDEO, count >= readCount, "Display list underrun");
|
_assert_msg_(VIDEO, count >= readCount, "Display list underrun");
|
||||||
|
|
||||||
count -= readCount;
|
count -= readCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
g_pVideoData = videoDataSave;
|
g_video_buffer_read_ptr = videoDataSave;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void DecodeStandard(u32 bufferSize)
|
static void DecodeStandard(u32 bufferSize)
|
||||||
|
|
|
@ -57,7 +57,7 @@ void DoState(PointerWrap &p)
|
||||||
p.Do(interruptWaiting);
|
p.Do(interruptWaiting);
|
||||||
|
|
||||||
// Is this right?
|
// Is this right?
|
||||||
p.DoArray(g_pVideoData,writePos);
|
p.DoArray(g_video_buffer_read_ptr,writePos);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void UpdateInterrupts_Wrapper(u64 userdata, int cyclesLate)
|
static void UpdateInterrupts_Wrapper(u64 userdata, int cyclesLate)
|
||||||
|
@ -95,7 +95,7 @@ void Init()
|
||||||
interruptSet = false;
|
interruptSet = false;
|
||||||
interruptWaiting = false;
|
interruptWaiting = false;
|
||||||
|
|
||||||
g_pVideoData = nullptr;
|
g_video_buffer_read_ptr = nullptr;
|
||||||
g_bSkipCurrentFrame = false;
|
g_bSkipCurrentFrame = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -311,7 +311,7 @@ bool RunBuffer()
|
||||||
|
|
||||||
_dbg_assert_(COMMANDPROCESSOR, writePos >= readPos);
|
_dbg_assert_(COMMANDPROCESSOR, writePos >= readPos);
|
||||||
|
|
||||||
g_pVideoData = &commandBuffer[readPos];
|
g_video_buffer_read_ptr = &commandBuffer[readPos];
|
||||||
|
|
||||||
u32 availableBytes = writePos - readPos;
|
u32 availableBytes = writePos - readPos;
|
||||||
|
|
||||||
|
@ -322,7 +322,7 @@ bool RunBuffer()
|
||||||
OpcodeDecoder::Run(availableBytes);
|
OpcodeDecoder::Run(availableBytes);
|
||||||
|
|
||||||
// if data was read by the opcode decoder then the video data pointer changed
|
// if data was read by the opcode decoder then the video data pointer changed
|
||||||
readPos = (u32)(g_pVideoData - &commandBuffer[0]);
|
readPos = (u32)(g_video_buffer_read_ptr - &commandBuffer[0]);
|
||||||
_dbg_assert_(VIDEO, writePos >= readPos);
|
_dbg_assert_(VIDEO, writePos >= readPos);
|
||||||
availableBytes = writePos - readPos;
|
availableBytes = writePos - readPos;
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,7 +39,7 @@ SWVertexLoader::~SWVertexLoader()
|
||||||
|
|
||||||
void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
|
void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
|
||||||
{
|
{
|
||||||
m_CurrentVat = &g_VtxAttr[attributeIndex];
|
m_CurrentVat = &g_main_cp_state.vtx_attr[attributeIndex];
|
||||||
|
|
||||||
posScale = 1.0f / float(1 << m_CurrentVat->g0.PosFrac);
|
posScale = 1.0f / float(1 << m_CurrentVat->g0.PosFrac);
|
||||||
tcScale[0] = 1.0f / float(1 << m_CurrentVat->g0.Tex0Frac);
|
tcScale[0] = 1.0f / float(1 << m_CurrentVat->g0.Tex0Frac);
|
||||||
|
@ -53,20 +53,20 @@ void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
|
||||||
|
|
||||||
//TexMtx
|
//TexMtx
|
||||||
const u64 tmDesc[8] = {
|
const u64 tmDesc[8] = {
|
||||||
g_VtxDesc.Tex0MatIdx, g_VtxDesc.Tex1MatIdx, g_VtxDesc.Tex2MatIdx, g_VtxDesc.Tex3MatIdx,
|
g_main_cp_state.vtx_desc.Tex0MatIdx, g_main_cp_state.vtx_desc.Tex1MatIdx, g_main_cp_state.vtx_desc.Tex2MatIdx, g_main_cp_state.vtx_desc.Tex3MatIdx,
|
||||||
g_VtxDesc.Tex4MatIdx, g_VtxDesc.Tex5MatIdx, g_VtxDesc.Tex6MatIdx, g_VtxDesc.Tex7MatIdx
|
g_main_cp_state.vtx_desc.Tex4MatIdx, g_main_cp_state.vtx_desc.Tex5MatIdx, g_main_cp_state.vtx_desc.Tex6MatIdx, g_main_cp_state.vtx_desc.Tex7MatIdx
|
||||||
};
|
};
|
||||||
|
|
||||||
// Colors
|
// Colors
|
||||||
const u64 colDesc[2] = {g_VtxDesc.Color0, g_VtxDesc.Color1};
|
const u64 colDesc[2] = {g_main_cp_state.vtx_desc.Color0, g_main_cp_state.vtx_desc.Color1};
|
||||||
colElements[0] = m_CurrentVat->g0.Color0Elements;
|
colElements[0] = m_CurrentVat->g0.Color0Elements;
|
||||||
colElements[1] = m_CurrentVat->g0.Color1Elements;
|
colElements[1] = m_CurrentVat->g0.Color1Elements;
|
||||||
const u32 colComp[2] = {m_CurrentVat->g0.Color0Comp, m_CurrentVat->g0.Color1Comp};
|
const u32 colComp[2] = {m_CurrentVat->g0.Color0Comp, m_CurrentVat->g0.Color1Comp};
|
||||||
|
|
||||||
// TextureCoord
|
// TextureCoord
|
||||||
const u64 tcDesc[8] = {
|
const u64 tcDesc[8] = {
|
||||||
g_VtxDesc.Tex0Coord, g_VtxDesc.Tex1Coord, g_VtxDesc.Tex2Coord, g_VtxDesc.Tex3Coord,
|
g_main_cp_state.vtx_desc.Tex0Coord, g_main_cp_state.vtx_desc.Tex1Coord, g_main_cp_state.vtx_desc.Tex2Coord, g_main_cp_state.vtx_desc.Tex3Coord,
|
||||||
g_VtxDesc.Tex4Coord, g_VtxDesc.Tex5Coord, g_VtxDesc.Tex6Coord, g_VtxDesc.Tex7Coord
|
g_main_cp_state.vtx_desc.Tex4Coord, g_main_cp_state.vtx_desc.Tex5Coord, g_main_cp_state.vtx_desc.Tex6Coord, g_main_cp_state.vtx_desc.Tex7Coord
|
||||||
};
|
};
|
||||||
const u32 tcElements[8] = {
|
const u32 tcElements[8] = {
|
||||||
m_CurrentVat->g0.Tex0CoordElements, m_CurrentVat->g1.Tex1CoordElements, m_CurrentVat->g1.Tex2CoordElements,
|
m_CurrentVat->g0.Tex0CoordElements, m_CurrentVat->g1.Tex1CoordElements, m_CurrentVat->g1.Tex2CoordElements,
|
||||||
|
@ -89,15 +89,15 @@ void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
|
||||||
|
|
||||||
// Reset vertex
|
// Reset vertex
|
||||||
// matrix index from xf regs or cp memory?
|
// matrix index from xf regs or cp memory?
|
||||||
if (xfmem.MatrixIndexA.PosNormalMtxIdx != MatrixIndexA.PosNormalMtxIdx ||
|
if (xfmem.MatrixIndexA.PosNormalMtxIdx != g_main_cp_state.matrix_index_a.PosNormalMtxIdx ||
|
||||||
xfmem.MatrixIndexA.Tex0MtxIdx != MatrixIndexA.Tex0MtxIdx ||
|
xfmem.MatrixIndexA.Tex0MtxIdx != g_main_cp_state.matrix_index_a.Tex0MtxIdx ||
|
||||||
xfmem.MatrixIndexA.Tex1MtxIdx != MatrixIndexA.Tex1MtxIdx ||
|
xfmem.MatrixIndexA.Tex1MtxIdx != g_main_cp_state.matrix_index_a.Tex1MtxIdx ||
|
||||||
xfmem.MatrixIndexA.Tex2MtxIdx != MatrixIndexA.Tex2MtxIdx ||
|
xfmem.MatrixIndexA.Tex2MtxIdx != g_main_cp_state.matrix_index_a.Tex2MtxIdx ||
|
||||||
xfmem.MatrixIndexA.Tex3MtxIdx != MatrixIndexA.Tex3MtxIdx ||
|
xfmem.MatrixIndexA.Tex3MtxIdx != g_main_cp_state.matrix_index_a.Tex3MtxIdx ||
|
||||||
xfmem.MatrixIndexB.Tex4MtxIdx != MatrixIndexB.Tex4MtxIdx ||
|
xfmem.MatrixIndexB.Tex4MtxIdx != g_main_cp_state.matrix_index_b.Tex4MtxIdx ||
|
||||||
xfmem.MatrixIndexB.Tex5MtxIdx != MatrixIndexB.Tex5MtxIdx ||
|
xfmem.MatrixIndexB.Tex5MtxIdx != g_main_cp_state.matrix_index_b.Tex5MtxIdx ||
|
||||||
xfmem.MatrixIndexB.Tex6MtxIdx != MatrixIndexB.Tex6MtxIdx ||
|
xfmem.MatrixIndexB.Tex6MtxIdx != g_main_cp_state.matrix_index_b.Tex6MtxIdx ||
|
||||||
xfmem.MatrixIndexB.Tex7MtxIdx != MatrixIndexB.Tex7MtxIdx)
|
xfmem.MatrixIndexB.Tex7MtxIdx != g_main_cp_state.matrix_index_b.Tex7MtxIdx)
|
||||||
{
|
{
|
||||||
WARN_LOG(VIDEO, "Matrix indices don't match");
|
WARN_LOG(VIDEO, "Matrix indices don't match");
|
||||||
|
|
||||||
|
@ -118,18 +118,18 @@ void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
|
||||||
m_Vertex.texMtx[6] = xfmem.MatrixIndexB.Tex6MtxIdx;
|
m_Vertex.texMtx[6] = xfmem.MatrixIndexB.Tex6MtxIdx;
|
||||||
m_Vertex.texMtx[7] = xfmem.MatrixIndexB.Tex7MtxIdx;
|
m_Vertex.texMtx[7] = xfmem.MatrixIndexB.Tex7MtxIdx;
|
||||||
#else
|
#else
|
||||||
m_Vertex.posMtx = MatrixIndexA.PosNormalMtxIdx;
|
m_Vertex.posMtx = g_main_cp_state.matrix_index_a.PosNormalMtxIdx;
|
||||||
m_Vertex.texMtx[0] = MatrixIndexA.Tex0MtxIdx;
|
m_Vertex.texMtx[0] = g_main_cp_state.matrix_index_a.Tex0MtxIdx;
|
||||||
m_Vertex.texMtx[1] = MatrixIndexA.Tex1MtxIdx;
|
m_Vertex.texMtx[1] = g_main_cp_state.matrix_index_a.Tex1MtxIdx;
|
||||||
m_Vertex.texMtx[2] = MatrixIndexA.Tex2MtxIdx;
|
m_Vertex.texMtx[2] = g_main_cp_state.matrix_index_a.Tex2MtxIdx;
|
||||||
m_Vertex.texMtx[3] = MatrixIndexA.Tex3MtxIdx;
|
m_Vertex.texMtx[3] = g_main_cp_state.matrix_index_a.Tex3MtxIdx;
|
||||||
m_Vertex.texMtx[4] = MatrixIndexB.Tex4MtxIdx;
|
m_Vertex.texMtx[4] = g_main_cp_state.matrix_index_b.Tex4MtxIdx;
|
||||||
m_Vertex.texMtx[5] = MatrixIndexB.Tex5MtxIdx;
|
m_Vertex.texMtx[5] = g_main_cp_state.matrix_index_b.Tex5MtxIdx;
|
||||||
m_Vertex.texMtx[6] = MatrixIndexB.Tex6MtxIdx;
|
m_Vertex.texMtx[6] = g_main_cp_state.matrix_index_b.Tex6MtxIdx;
|
||||||
m_Vertex.texMtx[7] = MatrixIndexB.Tex7MtxIdx;
|
m_Vertex.texMtx[7] = g_main_cp_state.matrix_index_b.Tex7MtxIdx;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (g_VtxDesc.PosMatIdx != NOT_PRESENT)
|
if (g_main_cp_state.vtx_desc.PosMatIdx != NOT_PRESENT)
|
||||||
{
|
{
|
||||||
AddAttributeLoader(LoadPosMtx);
|
AddAttributeLoader(LoadPosMtx);
|
||||||
m_VertexSize++;
|
m_VertexSize++;
|
||||||
|
@ -145,17 +145,17 @@ void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write vertex position loader
|
// Write vertex position loader
|
||||||
m_positionLoader = VertexLoader_Position::GetFunction(g_VtxDesc.Position, m_CurrentVat->g0.PosFormat, m_CurrentVat->g0.PosElements);
|
m_positionLoader = VertexLoader_Position::GetFunction(g_main_cp_state.vtx_desc.Position, m_CurrentVat->g0.PosFormat, m_CurrentVat->g0.PosElements);
|
||||||
m_VertexSize += VertexLoader_Position::GetSize(g_VtxDesc.Position, m_CurrentVat->g0.PosFormat, m_CurrentVat->g0.PosElements);
|
m_VertexSize += VertexLoader_Position::GetSize(g_main_cp_state.vtx_desc.Position, m_CurrentVat->g0.PosFormat, m_CurrentVat->g0.PosElements);
|
||||||
AddAttributeLoader(LoadPosition);
|
AddAttributeLoader(LoadPosition);
|
||||||
|
|
||||||
// Normals
|
// Normals
|
||||||
if (g_VtxDesc.Normal != NOT_PRESENT)
|
if (g_main_cp_state.vtx_desc.Normal != NOT_PRESENT)
|
||||||
{
|
{
|
||||||
m_VertexSize += VertexLoader_Normal::GetSize(g_VtxDesc.Normal,
|
m_VertexSize += VertexLoader_Normal::GetSize(g_main_cp_state.vtx_desc.Normal,
|
||||||
m_CurrentVat->g0.NormalFormat, m_CurrentVat->g0.NormalElements, m_CurrentVat->g0.NormalIndex3);
|
m_CurrentVat->g0.NormalFormat, m_CurrentVat->g0.NormalElements, m_CurrentVat->g0.NormalIndex3);
|
||||||
|
|
||||||
m_normalLoader = VertexLoader_Normal::GetFunction(g_VtxDesc.Normal,
|
m_normalLoader = VertexLoader_Normal::GetFunction(g_main_cp_state.vtx_desc.Normal,
|
||||||
m_CurrentVat->g0.NormalFormat, m_CurrentVat->g0.NormalElements, m_CurrentVat->g0.NormalIndex3);
|
m_CurrentVat->g0.NormalFormat, m_CurrentVat->g0.NormalElements, m_CurrentVat->g0.NormalIndex3);
|
||||||
|
|
||||||
if (m_normalLoader == nullptr)
|
if (m_normalLoader == nullptr)
|
||||||
|
@ -234,8 +234,8 @@ void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
|
||||||
|
|
||||||
// special case if only pos and tex coord 0 and tex coord input is AB11
|
// special case if only pos and tex coord 0 and tex coord input is AB11
|
||||||
m_TexGenSpecialCase =
|
m_TexGenSpecialCase =
|
||||||
((g_VtxDesc.Hex & 0x60600L) == g_VtxDesc.Hex) && // only pos and tex coord 0
|
((g_main_cp_state.vtx_desc.Hex & 0x60600L) == g_main_cp_state.vtx_desc.Hex) && // only pos and tex coord 0
|
||||||
(g_VtxDesc.Tex0Coord != NOT_PRESENT) &&
|
(g_main_cp_state.vtx_desc.Tex0Coord != NOT_PRESENT) &&
|
||||||
(xfmem.texMtxInfo[0].projection == XF_TEXPROJ_ST);
|
(xfmem.texMtxInfo[0].projection == XF_TEXPROJ_ST);
|
||||||
|
|
||||||
m_SetupUnit->Init(primitiveType);
|
m_SetupUnit->Init(primitiveType);
|
||||||
|
@ -252,7 +252,7 @@ void SWVertexLoader::LoadVertex()
|
||||||
// transform input data
|
// transform input data
|
||||||
TransformUnit::TransformPosition(&m_Vertex, outVertex);
|
TransformUnit::TransformPosition(&m_Vertex, outVertex);
|
||||||
|
|
||||||
if (g_VtxDesc.Normal != NOT_PRESENT)
|
if (g_main_cp_state.vtx_desc.Normal != NOT_PRESENT)
|
||||||
{
|
{
|
||||||
TransformUnit::TransformNormal(&m_Vertex, m_CurrentVat->g0.NormalElements, outVertex);
|
TransformUnit::TransformNormal(&m_Vertex, m_CurrentVat->g0.NormalElements, outVertex);
|
||||||
}
|
}
|
||||||
|
|
|
@ -116,14 +116,7 @@ void VideoSoftware::DoState(PointerWrap& p)
|
||||||
p.DoPOD(swstats);
|
p.DoPOD(swstats);
|
||||||
|
|
||||||
// CP Memory
|
// CP Memory
|
||||||
p.DoArray(arraybases, 16);
|
DoCPState(p);
|
||||||
p.DoArray(arraystrides, 16);
|
|
||||||
p.Do(MatrixIndexA);
|
|
||||||
p.Do(MatrixIndexB);
|
|
||||||
p.Do(g_VtxDesc.Hex);
|
|
||||||
p.DoArray(g_VtxAttr, 8);
|
|
||||||
p.DoMarker("CP Memory");
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void VideoSoftware::CheckInvalidState()
|
void VideoSoftware::CheckInvalidState()
|
||||||
|
|
|
@ -74,7 +74,7 @@ void SWLoadIndexedXF(u32 val, int array)
|
||||||
int size = ((val >> 12) & 0xF) + 1;
|
int size = ((val >> 12) & 0xF) + 1;
|
||||||
//load stuff from array to address in xf mem
|
//load stuff from array to address in xf mem
|
||||||
|
|
||||||
u32 *pData = (u32*)Memory::GetPointer(arraybases[array] + arraystrides[array]*index);
|
u32 *pData = (u32*)Memory::GetPointer(g_main_cp_state.array_bases[array] + g_main_cp_state.array_strides[array]*index);
|
||||||
|
|
||||||
// byteswap data
|
// byteswap data
|
||||||
u32 buffer[16];
|
u32 buffer[16];
|
||||||
|
|
|
@ -1085,5 +1085,6 @@ struct BPMemory
|
||||||
extern BPMemory bpmem;
|
extern BPMemory bpmem;
|
||||||
|
|
||||||
void LoadBPReg(u32 value0);
|
void LoadBPReg(u32 value0);
|
||||||
|
void LoadBPRegPreprocess(u32 value0);
|
||||||
|
|
||||||
void GetBPRegInfo(const u8* data, std::string* name, std::string* desc);
|
void GetBPRegInfo(const u8* data, std::string* name, std::string* desc);
|
||||||
|
|
|
@ -173,7 +173,8 @@ static void BPWritten(const BPCmd& bp)
|
||||||
switch (bp.newvalue & 0xFF)
|
switch (bp.newvalue & 0xFF)
|
||||||
{
|
{
|
||||||
case 0x02:
|
case 0x02:
|
||||||
PixelEngine::SetFinish(); // may generate interrupt
|
if (!g_use_deterministic_gpu_thread)
|
||||||
|
PixelEngine::SetFinish(); // may generate interrupt
|
||||||
DEBUG_LOG(VIDEO, "GXSetDrawDone SetPEFinish (value: 0x%02X)", (bp.newvalue & 0xFFFF));
|
DEBUG_LOG(VIDEO, "GXSetDrawDone SetPEFinish (value: 0x%02X)", (bp.newvalue & 0xFFFF));
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
@ -183,11 +184,13 @@ static void BPWritten(const BPCmd& bp)
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
case BPMEM_PE_TOKEN_ID: // Pixel Engine Token ID
|
case BPMEM_PE_TOKEN_ID: // Pixel Engine Token ID
|
||||||
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), false);
|
if (!g_use_deterministic_gpu_thread)
|
||||||
|
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), false);
|
||||||
DEBUG_LOG(VIDEO, "SetPEToken 0x%04x", (bp.newvalue & 0xFFFF));
|
DEBUG_LOG(VIDEO, "SetPEToken 0x%04x", (bp.newvalue & 0xFFFF));
|
||||||
return;
|
return;
|
||||||
case BPMEM_PE_TOKEN_INT_ID: // Pixel Engine Interrupt Token ID
|
case BPMEM_PE_TOKEN_INT_ID: // Pixel Engine Interrupt Token ID
|
||||||
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), true);
|
if (!g_use_deterministic_gpu_thread)
|
||||||
|
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), true);
|
||||||
DEBUG_LOG(VIDEO, "SetPEToken + INT 0x%04x", (bp.newvalue & 0xFFFF));
|
DEBUG_LOG(VIDEO, "SetPEToken + INT 0x%04x", (bp.newvalue & 0xFFFF));
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
@ -685,6 +688,26 @@ void LoadBPReg(u32 value0)
|
||||||
BPWritten(bp);
|
BPWritten(bp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void LoadBPRegPreprocess(u32 value0)
|
||||||
|
{
|
||||||
|
int regNum = value0 >> 24;
|
||||||
|
// masking could hypothetically be a problem
|
||||||
|
u32 newval = value0 & 0xffffff;
|
||||||
|
switch (regNum)
|
||||||
|
{
|
||||||
|
case BPMEM_SETDRAWDONE:
|
||||||
|
if ((newval & 0xff) == 0x02)
|
||||||
|
PixelEngine::SetFinish();
|
||||||
|
break;
|
||||||
|
case BPMEM_PE_TOKEN_ID:
|
||||||
|
PixelEngine::SetToken(newval & 0xffff, false);
|
||||||
|
break;
|
||||||
|
case BPMEM_PE_TOKEN_INT_ID: // Pixel Engine Interrupt Token ID
|
||||||
|
PixelEngine::SetToken(newval & 0xffff, true);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void GetBPRegInfo(const u8* data, std::string* name, std::string* desc)
|
void GetBPRegInfo(const u8* data, std::string* name, std::string* desc)
|
||||||
{
|
{
|
||||||
const char* no_yes[2] = { "No", "Yes" };
|
const char* no_yes[2] = { "No", "Yes" };
|
||||||
|
|
|
@ -7,5 +7,4 @@
|
||||||
#include "VideoCommon/BPMemory.h"
|
#include "VideoCommon/BPMemory.h"
|
||||||
|
|
||||||
void BPInit();
|
void BPInit();
|
||||||
void LoadBPReg(u32 value0);
|
|
||||||
void BPReload();
|
void BPReload();
|
||||||
|
|
|
@ -2,17 +2,32 @@
|
||||||
// Licensed under GPLv2
|
// Licensed under GPLv2
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include "Common/ChunkFile.h"
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
#include "VideoCommon/CPMemory.h"
|
#include "VideoCommon/CPMemory.h"
|
||||||
|
|
||||||
// CP state
|
// CP state
|
||||||
u8 *cached_arraybases[16];
|
u8 *cached_arraybases[16];
|
||||||
|
|
||||||
// STATE_TO_SAVE
|
CPState g_main_cp_state;
|
||||||
u32 arraybases[16];
|
CPState g_preprocess_cp_state;
|
||||||
u32 arraystrides[16];
|
|
||||||
TMatrixIndexA MatrixIndexA;
|
void DoCPState(PointerWrap& p)
|
||||||
TMatrixIndexB MatrixIndexB;
|
{
|
||||||
TVtxDesc g_VtxDesc;
|
// We don't save g_preprocess_cp_state separately because the GPU should be
|
||||||
// Most games only use the first VtxAttr and simply reconfigure it all the time as needed.
|
// synced around state save/load.
|
||||||
VAT g_VtxAttr[8];
|
p.DoArray(g_main_cp_state.array_bases, 16);
|
||||||
|
p.DoArray(g_main_cp_state.array_strides, 16);
|
||||||
|
p.Do(g_main_cp_state.matrix_index_a);
|
||||||
|
p.Do(g_main_cp_state.matrix_index_b);
|
||||||
|
p.Do(g_main_cp_state.vtx_desc.Hex);
|
||||||
|
p.DoArray(g_main_cp_state.vtx_attr, 8);
|
||||||
|
p.DoMarker("CP Memory");
|
||||||
|
if (p.mode == PointerWrap::MODE_READ)
|
||||||
|
CopyPreprocessCPStateFromMain();
|
||||||
|
}
|
||||||
|
|
||||||
|
void CopyPreprocessCPStateFromMain()
|
||||||
|
{
|
||||||
|
memcpy(&g_preprocess_cp_state, &g_main_cp_state, sizeof(CPState));
|
||||||
|
}
|
||||||
|
|
|
@ -231,12 +231,6 @@ union TMatrixIndexB
|
||||||
|
|
||||||
#pragma pack()
|
#pragma pack()
|
||||||
|
|
||||||
extern u32 arraybases[16];
|
|
||||||
extern u8 *cached_arraybases[16];
|
|
||||||
extern u32 arraystrides[16];
|
|
||||||
extern TMatrixIndexA MatrixIndexA;
|
|
||||||
extern TMatrixIndexB MatrixIndexB;
|
|
||||||
|
|
||||||
struct VAT
|
struct VAT
|
||||||
{
|
{
|
||||||
UVAT_group0 g0;
|
UVAT_group0 g0;
|
||||||
|
@ -244,11 +238,37 @@ struct VAT
|
||||||
UVAT_group2 g2;
|
UVAT_group2 g2;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern TVtxDesc g_VtxDesc;
|
class VertexLoader;
|
||||||
extern VAT g_VtxAttr[8];
|
|
||||||
|
// STATE_TO_SAVE
|
||||||
|
struct CPState final
|
||||||
|
{
|
||||||
|
u32 array_bases[16];
|
||||||
|
u32 array_strides[16];
|
||||||
|
TMatrixIndexA matrix_index_a;
|
||||||
|
TMatrixIndexB matrix_index_b;
|
||||||
|
TVtxDesc vtx_desc;
|
||||||
|
// Most games only use the first VtxAttr and simply reconfigure it all the time as needed.
|
||||||
|
VAT vtx_attr[8];
|
||||||
|
|
||||||
|
// Attributes that actually belong to VertexLoaderManager:
|
||||||
|
int attr_dirty; // bitfield
|
||||||
|
VertexLoader* vertex_loaders[8];
|
||||||
|
};
|
||||||
|
|
||||||
|
class PointerWrap;
|
||||||
|
|
||||||
|
extern void DoCPState(PointerWrap& p);
|
||||||
|
|
||||||
|
extern void CopyPreprocessCPStateFromMain();
|
||||||
|
|
||||||
|
extern CPState g_main_cp_state;
|
||||||
|
extern CPState g_preprocess_cp_state;
|
||||||
|
|
||||||
|
extern u8 *cached_arraybases[16];
|
||||||
|
|
||||||
// Might move this into its own file later.
|
// Might move this into its own file later.
|
||||||
void LoadCPReg(u32 SubCmd, u32 Value);
|
void LoadCPReg(u32 SubCmd, u32 Value, bool is_preprocess = false);
|
||||||
|
|
||||||
// Fills memory with data from CP regs
|
// Fills memory with data from CP regs
|
||||||
void FillCPMemoryArray(u32 *memory);
|
void FillCPMemoryArray(u32 *memory);
|
||||||
|
|
|
@ -77,7 +77,7 @@ void DoState(PointerWrap &p)
|
||||||
p.Do(interruptFinishWaiting);
|
p.Do(interruptFinishWaiting);
|
||||||
}
|
}
|
||||||
|
|
||||||
UNUSED static inline void WriteLow(volatile u32& _reg, u16 lowbits)
|
static inline void WriteLow(volatile u32& _reg, u16 lowbits)
|
||||||
{
|
{
|
||||||
Common::AtomicStore(_reg, (_reg & 0xFFFF0000) | lowbits);
|
Common::AtomicStore(_reg, (_reg & 0xFFFF0000) | lowbits);
|
||||||
}
|
}
|
||||||
|
@ -159,9 +159,8 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
|
||||||
{ FIFO_WRITE_POINTER_LO, MMIO::Utils::LowPart(&fifo.CPWritePointer), false, true },
|
{ FIFO_WRITE_POINTER_LO, MMIO::Utils::LowPart(&fifo.CPWritePointer), false, true },
|
||||||
{ FIFO_WRITE_POINTER_HI, MMIO::Utils::HighPart(&fifo.CPWritePointer) },
|
{ FIFO_WRITE_POINTER_HI, MMIO::Utils::HighPart(&fifo.CPWritePointer) },
|
||||||
// FIFO_READ_POINTER has different code for single/dual core.
|
// FIFO_READ_POINTER has different code for single/dual core.
|
||||||
{ FIFO_BP_LO, MMIO::Utils::LowPart(&fifo.CPBreakpoint), false, true },
|
|
||||||
{ FIFO_BP_HI, MMIO::Utils::HighPart(&fifo.CPBreakpoint) },
|
|
||||||
};
|
};
|
||||||
|
|
||||||
for (auto& mapped_var : directly_mapped_vars)
|
for (auto& mapped_var : directly_mapped_vars)
|
||||||
{
|
{
|
||||||
u16 wmask = mapped_var.writes_align_to_32_bytes ? 0xFFE0 : 0xFFFF;
|
u16 wmask = mapped_var.writes_align_to_32_bytes ? 0xFFE0 : 0xFFFF;
|
||||||
|
@ -173,6 +172,19 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mmio->Register(base | FIFO_BP_LO,
|
||||||
|
MMIO::DirectRead<u16>(MMIO::Utils::LowPart(&fifo.CPBreakpoint)),
|
||||||
|
MMIO::ComplexWrite<u16>([](u32, u16 val) {
|
||||||
|
WriteLow(fifo.CPBreakpoint, val & 0xffe0);
|
||||||
|
})
|
||||||
|
);
|
||||||
|
mmio->Register(base | FIFO_BP_HI,
|
||||||
|
MMIO::DirectRead<u16>(MMIO::Utils::HighPart(&fifo.CPBreakpoint)),
|
||||||
|
MMIO::ComplexWrite<u16>([](u32, u16 val) {
|
||||||
|
WriteHigh(fifo.CPBreakpoint, val);
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
// Timing and metrics MMIOs are stubbed with fixed values.
|
// Timing and metrics MMIOs are stubbed with fixed values.
|
||||||
struct {
|
struct {
|
||||||
u32 addr;
|
u32 addr;
|
||||||
|
@ -216,8 +228,7 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
|
||||||
UCPCtrlReg tmp(val);
|
UCPCtrlReg tmp(val);
|
||||||
m_CPCtrlReg.Hex = tmp.Hex;
|
m_CPCtrlReg.Hex = tmp.Hex;
|
||||||
SetCpControlRegister();
|
SetCpControlRegister();
|
||||||
if (!IsOnThread())
|
RunGpu();
|
||||||
RunGpu();
|
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -227,8 +238,7 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
|
||||||
UCPClearReg tmp(val);
|
UCPClearReg tmp(val);
|
||||||
m_CPClearReg.Hex = tmp.Hex;
|
m_CPClearReg.Hex = tmp.Hex;
|
||||||
SetCpClearRegister();
|
SetCpClearRegister();
|
||||||
if (!IsOnThread())
|
RunGpu();
|
||||||
RunGpu();
|
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -260,6 +270,7 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
|
||||||
: MMIO::DirectRead<u16>(MMIO::Utils::HighPart(&fifo.CPReadWriteDistance)),
|
: MMIO::DirectRead<u16>(MMIO::Utils::HighPart(&fifo.CPReadWriteDistance)),
|
||||||
MMIO::ComplexWrite<u16>([](u32, u16 val) {
|
MMIO::ComplexWrite<u16>([](u32, u16 val) {
|
||||||
WriteHigh(fifo.CPReadWriteDistance, val);
|
WriteHigh(fifo.CPReadWriteDistance, val);
|
||||||
|
SyncGPU(SYNC_GPU_OTHER);
|
||||||
if (fifo.CPReadWriteDistance == 0)
|
if (fifo.CPReadWriteDistance == 0)
|
||||||
{
|
{
|
||||||
GPFifo::ResetGatherPipe();
|
GPFifo::ResetGatherPipe();
|
||||||
|
@ -269,8 +280,7 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
|
||||||
{
|
{
|
||||||
ResetVideoBuffer();
|
ResetVideoBuffer();
|
||||||
}
|
}
|
||||||
if (!IsOnThread())
|
RunGpu();
|
||||||
RunGpu();
|
|
||||||
})
|
})
|
||||||
);
|
);
|
||||||
mmio->Register(base | FIFO_READ_POINTER_LO,
|
mmio->Register(base | FIFO_READ_POINTER_LO,
|
||||||
|
@ -298,11 +308,7 @@ void STACKALIGN GatherPipeBursted()
|
||||||
// if we aren't linked, we don't care about gather pipe data
|
// if we aren't linked, we don't care about gather pipe data
|
||||||
if (!m_CPCtrlReg.GPLinkEnable)
|
if (!m_CPCtrlReg.GPLinkEnable)
|
||||||
{
|
{
|
||||||
if (!IsOnThread())
|
if (IsOnThread() && !g_use_deterministic_gpu_thread)
|
||||||
{
|
|
||||||
RunGpu();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
// In multibuffer mode is not allowed write in the same FIFO attached to the GPU.
|
// In multibuffer mode is not allowed write in the same FIFO attached to the GPU.
|
||||||
// Fix Pokemon XD in DC mode.
|
// Fix Pokemon XD in DC mode.
|
||||||
|
@ -313,6 +319,10 @@ void STACKALIGN GatherPipeBursted()
|
||||||
ProcessFifoAllDistance();
|
ProcessFifoAllDistance();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
RunGpu();
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -327,8 +337,7 @@ void STACKALIGN GatherPipeBursted()
|
||||||
|
|
||||||
Common::AtomicAdd(fifo.CPReadWriteDistance, GATHER_PIPE_SIZE);
|
Common::AtomicAdd(fifo.CPReadWriteDistance, GATHER_PIPE_SIZE);
|
||||||
|
|
||||||
if (!IsOnThread())
|
RunGpu();
|
||||||
RunGpu();
|
|
||||||
|
|
||||||
_assert_msg_(COMMANDPROCESSOR, fifo.CPReadWriteDistance <= fifo.CPEnd - fifo.CPBase,
|
_assert_msg_(COMMANDPROCESSOR, fifo.CPReadWriteDistance <= fifo.CPEnd - fifo.CPBase,
|
||||||
"FIFO is overflowed by GatherPipe !\nCPU thread is too fast!");
|
"FIFO is overflowed by GatherPipe !\nCPU thread is too fast!");
|
||||||
|
@ -358,7 +367,8 @@ void UpdateInterrupts(u64 userdata)
|
||||||
|
|
||||||
void UpdateInterruptsFromVideoBackend(u64 userdata)
|
void UpdateInterruptsFromVideoBackend(u64 userdata)
|
||||||
{
|
{
|
||||||
CoreTiming::ScheduleEvent_Threadsafe(0, et_UpdateInterrupts, userdata);
|
if (!g_use_deterministic_gpu_thread)
|
||||||
|
CoreTiming::ScheduleEvent_Threadsafe(0, et_UpdateInterrupts, userdata);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SetCPStatusFromGPU()
|
void SetCPStatusFromGPU()
|
||||||
|
|
|
@ -16,6 +16,7 @@ namespace CommandProcessor
|
||||||
{
|
{
|
||||||
|
|
||||||
extern SCPFifoStruct fifo; //This one is shared between gfx thread and emulator thread.
|
extern SCPFifoStruct fifo; //This one is shared between gfx thread and emulator thread.
|
||||||
|
|
||||||
extern volatile bool isPossibleWaitingSetDrawDone; //This one is used for sync gfx thread and emulator thread.
|
extern volatile bool isPossibleWaitingSetDrawDone; //This one is used for sync gfx thread and emulator thread.
|
||||||
extern volatile bool interruptSet;
|
extern volatile bool interruptSet;
|
||||||
extern volatile bool interruptWaiting;
|
extern volatile bool interruptWaiting;
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||
#include "VideoCommon/VertexManagerBase.h"
|
#include "VideoCommon/VertexManagerBase.h"
|
||||||
|
|
||||||
extern u8* g_pVideoData;
|
extern u8* g_video_buffer_read_ptr;
|
||||||
|
|
||||||
#if _M_SSE >= 0x301 && !(defined __GNUC__ && !defined __SSSE3__)
|
#if _M_SSE >= 0x301 && !(defined __GNUC__ && !defined __SSSE3__)
|
||||||
#include <tmmintrin.h>
|
#include <tmmintrin.h>
|
||||||
|
@ -14,20 +14,20 @@ extern u8* g_pVideoData;
|
||||||
|
|
||||||
__forceinline void DataSkip(u32 skip)
|
__forceinline void DataSkip(u32 skip)
|
||||||
{
|
{
|
||||||
g_pVideoData += skip;
|
g_video_buffer_read_ptr += skip;
|
||||||
}
|
}
|
||||||
|
|
||||||
// probably unnecessary
|
// probably unnecessary
|
||||||
template <int count>
|
template <int count>
|
||||||
__forceinline void DataSkip()
|
__forceinline void DataSkip()
|
||||||
{
|
{
|
||||||
g_pVideoData += count;
|
g_video_buffer_read_ptr += count;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__forceinline T DataPeek(int _uOffset)
|
__forceinline T DataPeek(int _uOffset, u8** bufp = &g_video_buffer_read_ptr)
|
||||||
{
|
{
|
||||||
auto const result = Common::FromBigEndian(*reinterpret_cast<T*>(g_pVideoData + _uOffset));
|
auto const result = Common::FromBigEndian(*reinterpret_cast<T*>(*bufp + _uOffset));
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -48,18 +48,18 @@ __forceinline u32 DataPeek32(int _uOffset)
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__forceinline T DataRead()
|
__forceinline T DataRead(u8** bufp = &g_video_buffer_read_ptr)
|
||||||
{
|
{
|
||||||
auto const result = DataPeek<T>(0);
|
auto const result = DataPeek<T>(0, bufp);
|
||||||
DataSkip<sizeof(T)>();
|
*bufp += sizeof(T);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
class DataReader
|
class DataReader
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
inline DataReader() : buffer(g_pVideoData), offset(0) {}
|
inline DataReader() : buffer(g_video_buffer_read_ptr), offset(0) {}
|
||||||
inline ~DataReader() { g_pVideoData += offset; }
|
inline ~DataReader() { g_video_buffer_read_ptr += offset; }
|
||||||
template <typename T> inline T Read()
|
template <typename T> inline T Read()
|
||||||
{
|
{
|
||||||
const T result = Common::FromBigEndian(*(T*)(buffer + offset));
|
const T result = Common::FromBigEndian(*(T*)(buffer + offset));
|
||||||
|
@ -94,14 +94,14 @@ __forceinline u32 DataReadU32()
|
||||||
|
|
||||||
__forceinline u32 DataReadU32Unswapped()
|
__forceinline u32 DataReadU32Unswapped()
|
||||||
{
|
{
|
||||||
u32 tmp = *(u32*)g_pVideoData;
|
u32 tmp = *(u32*)g_video_buffer_read_ptr;
|
||||||
g_pVideoData += 4;
|
g_video_buffer_read_ptr += 4;
|
||||||
return tmp;
|
return tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline u8* DataGetPosition()
|
__forceinline u8* DataGetPosition()
|
||||||
{
|
{
|
||||||
return g_pVideoData;
|
return g_video_buffer_read_ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
|
|
@ -11,32 +11,63 @@
|
||||||
#include "Core/ConfigManager.h"
|
#include "Core/ConfigManager.h"
|
||||||
#include "Core/Core.h"
|
#include "Core/Core.h"
|
||||||
#include "Core/CoreTiming.h"
|
#include "Core/CoreTiming.h"
|
||||||
|
#include "Core/NetPlayProto.h"
|
||||||
#include "Core/HW/Memmap.h"
|
#include "Core/HW/Memmap.h"
|
||||||
|
|
||||||
#include "VideoCommon/CommandProcessor.h"
|
#include "VideoCommon/CommandProcessor.h"
|
||||||
|
#include "VideoCommon/CPMemory.h"
|
||||||
#include "VideoCommon/DataReader.h"
|
#include "VideoCommon/DataReader.h"
|
||||||
#include "VideoCommon/Fifo.h"
|
#include "VideoCommon/Fifo.h"
|
||||||
#include "VideoCommon/OpcodeDecoding.h"
|
#include "VideoCommon/OpcodeDecoding.h"
|
||||||
#include "VideoCommon/PixelEngine.h"
|
#include "VideoCommon/PixelEngine.h"
|
||||||
|
#include "VideoCommon/VertexLoaderManager.h"
|
||||||
#include "VideoCommon/VideoConfig.h"
|
#include "VideoCommon/VideoConfig.h"
|
||||||
|
|
||||||
bool g_bSkipCurrentFrame = false;
|
bool g_bSkipCurrentFrame = false;
|
||||||
|
|
||||||
namespace
|
|
||||||
{
|
|
||||||
static volatile bool GpuRunningState = false;
|
static volatile bool GpuRunningState = false;
|
||||||
static volatile bool EmuRunningState = false;
|
static volatile bool EmuRunningState = false;
|
||||||
static std::mutex m_csHWVidOccupied;
|
static std::mutex m_csHWVidOccupied;
|
||||||
|
|
||||||
|
// Most of this array is unlikely to be faulted in...
|
||||||
|
static u8 s_fifo_aux_data[FIFO_SIZE];
|
||||||
|
static u8* s_fifo_aux_write_ptr;
|
||||||
|
static u8* s_fifo_aux_read_ptr;
|
||||||
|
|
||||||
|
bool g_use_deterministic_gpu_thread;
|
||||||
|
|
||||||
// STATE_TO_SAVE
|
// STATE_TO_SAVE
|
||||||
static u8 *videoBuffer;
|
static std::mutex s_video_buffer_lock;
|
||||||
static int size = 0;
|
static std::condition_variable s_video_buffer_cond;
|
||||||
} // namespace
|
static u8* s_video_buffer;
|
||||||
|
u8* g_video_buffer_read_ptr;
|
||||||
|
static std::atomic<u8*> s_video_buffer_write_ptr;
|
||||||
|
static std::atomic<u8*> s_video_buffer_seen_ptr;
|
||||||
|
u8* g_video_buffer_pp_read_ptr;
|
||||||
|
// The read_ptr is always owned by the GPU thread. In normal mode, so is the
|
||||||
|
// write_ptr, despite it being atomic. In g_use_deterministic_gpu_thread mode,
|
||||||
|
// things get a bit more complicated:
|
||||||
|
// - The seen_ptr is written by the GPU thread, and points to what it's already
|
||||||
|
// processed as much of as possible - in the case of a partial command which
|
||||||
|
// caused it to stop, not the same as the read ptr. It's written by the GPU,
|
||||||
|
// under the lock, and updating the cond.
|
||||||
|
// - The write_ptr is written by the CPU thread after it copies data from the
|
||||||
|
// FIFO. Maybe someday it will be under the lock. For now, because RunGpuLoop
|
||||||
|
// polls, it's just atomic.
|
||||||
|
// - The pp_read_ptr is the CPU preprocessing version of the read_ptr.
|
||||||
|
|
||||||
void Fifo_DoState(PointerWrap &p)
|
void Fifo_DoState(PointerWrap &p)
|
||||||
{
|
{
|
||||||
p.DoArray(videoBuffer, FIFO_SIZE);
|
p.DoArray(s_video_buffer, FIFO_SIZE);
|
||||||
p.Do(size);
|
u8* write_ptr = s_video_buffer_write_ptr;
|
||||||
p.DoPointer(g_pVideoData, videoBuffer);
|
p.DoPointer(write_ptr, s_video_buffer);
|
||||||
|
s_video_buffer_write_ptr = write_ptr;
|
||||||
|
p.DoPointer(g_video_buffer_read_ptr, s_video_buffer);
|
||||||
|
if (p.mode == PointerWrap::MODE_READ && g_use_deterministic_gpu_thread)
|
||||||
|
{
|
||||||
|
// We're good and paused, right?
|
||||||
|
s_video_buffer_seen_ptr = g_video_buffer_pp_read_ptr = g_video_buffer_read_ptr;
|
||||||
|
}
|
||||||
p.Do(g_bSkipCurrentFrame);
|
p.Do(g_bSkipCurrentFrame);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -44,6 +75,7 @@ void Fifo_PauseAndLock(bool doLock, bool unpauseOnUnlock)
|
||||||
{
|
{
|
||||||
if (doLock)
|
if (doLock)
|
||||||
{
|
{
|
||||||
|
SyncGPU(SYNC_GPU_OTHER);
|
||||||
EmulatorState(false);
|
EmulatorState(false);
|
||||||
if (!Core::IsGPUThread())
|
if (!Core::IsGPUThread())
|
||||||
m_csHWVidOccupied.lock();
|
m_csHWVidOccupied.lock();
|
||||||
|
@ -61,8 +93,8 @@ void Fifo_PauseAndLock(bool doLock, bool unpauseOnUnlock)
|
||||||
|
|
||||||
void Fifo_Init()
|
void Fifo_Init()
|
||||||
{
|
{
|
||||||
videoBuffer = (u8*)AllocateMemoryPages(FIFO_SIZE);
|
s_video_buffer = (u8*)AllocateMemoryPages(FIFO_SIZE);
|
||||||
size = 0;
|
ResetVideoBuffer();
|
||||||
GpuRunningState = false;
|
GpuRunningState = false;
|
||||||
Common::AtomicStore(CommandProcessor::VITicks, CommandProcessor::m_cpClockOrigin);
|
Common::AtomicStore(CommandProcessor::VITicks, CommandProcessor::m_cpClockOrigin);
|
||||||
}
|
}
|
||||||
|
@ -70,18 +102,24 @@ void Fifo_Init()
|
||||||
void Fifo_Shutdown()
|
void Fifo_Shutdown()
|
||||||
{
|
{
|
||||||
if (GpuRunningState) PanicAlert("Fifo shutting down while active");
|
if (GpuRunningState) PanicAlert("Fifo shutting down while active");
|
||||||
FreeMemoryPages(videoBuffer, FIFO_SIZE);
|
FreeMemoryPages(s_video_buffer, FIFO_SIZE);
|
||||||
videoBuffer = nullptr;
|
s_video_buffer = nullptr;
|
||||||
|
s_video_buffer_write_ptr = nullptr;
|
||||||
|
g_video_buffer_pp_read_ptr = nullptr;
|
||||||
|
g_video_buffer_read_ptr = nullptr;
|
||||||
|
s_video_buffer_seen_ptr = nullptr;
|
||||||
|
s_fifo_aux_write_ptr = nullptr;
|
||||||
|
s_fifo_aux_read_ptr = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
u8* GetVideoBufferStartPtr()
|
u8* GetVideoBufferStartPtr()
|
||||||
{
|
{
|
||||||
return videoBuffer;
|
return s_video_buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
u8* GetVideoBufferEndPtr()
|
u8* GetVideoBufferEndPtr()
|
||||||
{
|
{
|
||||||
return &videoBuffer[size];
|
return s_video_buffer_write_ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Fifo_SetRendering(bool enabled)
|
void Fifo_SetRendering(bool enabled)
|
||||||
|
@ -107,30 +145,123 @@ void EmulatorState(bool running)
|
||||||
EmuRunningState = running;
|
EmuRunningState = running;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr)
|
||||||
|
{
|
||||||
|
if (g_use_deterministic_gpu_thread && GpuRunningState)
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lk(s_video_buffer_lock);
|
||||||
|
u8* write_ptr = s_video_buffer_write_ptr;
|
||||||
|
s_video_buffer_cond.wait(lk, [&]() {
|
||||||
|
return !GpuRunningState || s_video_buffer_seen_ptr == write_ptr;
|
||||||
|
});
|
||||||
|
if (!GpuRunningState)
|
||||||
|
return;
|
||||||
|
|
||||||
|
// Opportunistically reset FIFOs so we don't wrap around.
|
||||||
|
if (may_move_read_ptr && s_fifo_aux_write_ptr != s_fifo_aux_read_ptr)
|
||||||
|
PanicAlert("aux fifo not synced (%p, %p)", s_fifo_aux_write_ptr, s_fifo_aux_read_ptr);
|
||||||
|
|
||||||
|
memmove(s_fifo_aux_data, s_fifo_aux_read_ptr, s_fifo_aux_write_ptr - s_fifo_aux_read_ptr);
|
||||||
|
s_fifo_aux_write_ptr -= (s_fifo_aux_read_ptr - s_fifo_aux_data);
|
||||||
|
s_fifo_aux_read_ptr = s_fifo_aux_data;
|
||||||
|
|
||||||
|
if (may_move_read_ptr)
|
||||||
|
{
|
||||||
|
// what's left over in the buffer
|
||||||
|
size_t size = write_ptr - g_video_buffer_pp_read_ptr;
|
||||||
|
|
||||||
|
memmove(s_video_buffer, g_video_buffer_pp_read_ptr, size);
|
||||||
|
// This change always decreases the pointers. We write seen_ptr
|
||||||
|
// after write_ptr here, and read it before in RunGpuLoop, so
|
||||||
|
// 'write_ptr > seen_ptr' there cannot become spuriously true.
|
||||||
|
s_video_buffer_write_ptr = write_ptr = s_video_buffer + size;
|
||||||
|
g_video_buffer_pp_read_ptr = s_video_buffer;
|
||||||
|
g_video_buffer_read_ptr = s_video_buffer;
|
||||||
|
s_video_buffer_seen_ptr = write_ptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void PushFifoAuxBuffer(void* ptr, size_t size)
|
||||||
|
{
|
||||||
|
if (size > (size_t) (s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr))
|
||||||
|
{
|
||||||
|
SyncGPU(SYNC_GPU_AUX_SPACE, /* may_move_read_ptr */ false);
|
||||||
|
if (size > (size_t) (s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr))
|
||||||
|
{
|
||||||
|
// That will sync us up to the last 32 bytes, so this short region
|
||||||
|
// of FIFO would have to point to a 2MB display list or something.
|
||||||
|
PanicAlert("absurdly large aux buffer");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
memcpy(s_fifo_aux_write_ptr, ptr, size);
|
||||||
|
s_fifo_aux_write_ptr += size;
|
||||||
|
}
|
||||||
|
|
||||||
|
void* PopFifoAuxBuffer(size_t size)
|
||||||
|
{
|
||||||
|
void* ret = s_fifo_aux_read_ptr;
|
||||||
|
s_fifo_aux_read_ptr += size;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
// Description: RunGpuLoop() sends data through this function.
|
// Description: RunGpuLoop() sends data through this function.
|
||||||
void ReadDataFromFifo(u8* _uData, u32 len)
|
static void ReadDataFromFifo(u8* _uData, u32 len)
|
||||||
{
|
{
|
||||||
if (size + len >= FIFO_SIZE)
|
if (len > (s_video_buffer + FIFO_SIZE - s_video_buffer_write_ptr))
|
||||||
{
|
{
|
||||||
int pos = (int)(g_pVideoData - videoBuffer);
|
size_t size = s_video_buffer_write_ptr - g_video_buffer_read_ptr;
|
||||||
size -= pos;
|
if (len > FIFO_SIZE - size)
|
||||||
if (size + len > FIFO_SIZE)
|
|
||||||
{
|
{
|
||||||
PanicAlert("FIFO out of bounds (size = %i, len = %i at %08x)", size, len, pos);
|
PanicAlert("FIFO out of bounds (existing %lu + new %lu > %lu)", (unsigned long) size, (unsigned long) len, (unsigned long) FIFO_SIZE);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
memmove(&videoBuffer[0], &videoBuffer[pos], size);
|
memmove(s_video_buffer, g_video_buffer_read_ptr, size);
|
||||||
g_pVideoData = videoBuffer;
|
s_video_buffer_write_ptr = s_video_buffer + size;
|
||||||
|
g_video_buffer_read_ptr = s_video_buffer;
|
||||||
}
|
}
|
||||||
// Copy new video instructions to videoBuffer for future use in rendering the new picture
|
// Copy new video instructions to s_video_buffer for future use in rendering the new picture
|
||||||
memcpy(videoBuffer + size, _uData, len);
|
memcpy(s_video_buffer_write_ptr, _uData, len);
|
||||||
size += len;
|
s_video_buffer_write_ptr += len;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The deterministic_gpu_thread version.
|
||||||
|
static void ReadDataFromFifoOnCPU(u8* _uData, u32 len)
|
||||||
|
{
|
||||||
|
u8 *write_ptr = s_video_buffer_write_ptr;
|
||||||
|
if (len > (s_video_buffer + FIFO_SIZE - write_ptr))
|
||||||
|
{
|
||||||
|
// We can't wrap around while the GPU is working on the data.
|
||||||
|
// This should be very rare due to the reset in SyncGPU.
|
||||||
|
SyncGPU(SYNC_GPU_WRAPAROUND);
|
||||||
|
if (g_video_buffer_pp_read_ptr != g_video_buffer_read_ptr)
|
||||||
|
{
|
||||||
|
PanicAlert("desynced read pointers");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
write_ptr = s_video_buffer_write_ptr;
|
||||||
|
size_t size = write_ptr - g_video_buffer_pp_read_ptr;
|
||||||
|
if (len > FIFO_SIZE - size)
|
||||||
|
{
|
||||||
|
PanicAlert("FIFO out of bounds (existing %lu + new %lu > %lu)", (unsigned long) size, (unsigned long) len, (unsigned long) FIFO_SIZE);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
memcpy(write_ptr, _uData, len);
|
||||||
|
OpcodeDecoder_Preprocess(write_ptr + len);
|
||||||
|
// This would have to be locked if the GPU thread didn't spin.
|
||||||
|
s_video_buffer_write_ptr = write_ptr + len;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ResetVideoBuffer()
|
void ResetVideoBuffer()
|
||||||
{
|
{
|
||||||
g_pVideoData = videoBuffer;
|
g_video_buffer_read_ptr = s_video_buffer;
|
||||||
size = 0;
|
s_video_buffer_write_ptr = s_video_buffer;
|
||||||
|
s_video_buffer_seen_ptr = s_video_buffer;
|
||||||
|
g_video_buffer_pp_read_ptr = s_video_buffer;
|
||||||
|
s_fifo_aux_write_ptr = s_fifo_aux_data;
|
||||||
|
s_fifo_aux_read_ptr = s_fifo_aux_data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -148,53 +279,75 @@ void RunGpuLoop()
|
||||||
g_video_backend->PeekMessages();
|
g_video_backend->PeekMessages();
|
||||||
|
|
||||||
VideoFifo_CheckAsyncRequest();
|
VideoFifo_CheckAsyncRequest();
|
||||||
|
if (g_use_deterministic_gpu_thread)
|
||||||
CommandProcessor::SetCPStatusFromGPU();
|
|
||||||
|
|
||||||
Common::AtomicStore(CommandProcessor::VITicks, CommandProcessor::m_cpClockOrigin);
|
|
||||||
|
|
||||||
// check if we are able to run this buffer
|
|
||||||
while (GpuRunningState && EmuRunningState && !CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
|
|
||||||
{
|
{
|
||||||
fifo.isGpuReadingData = true;
|
// All the fifo/CP stuff is on the CPU. We just need to run the opcode decoder.
|
||||||
CommandProcessor::isPossibleWaitingSetDrawDone = fifo.bFF_GPLinkEnable ? true : false;
|
u8* seen_ptr = s_video_buffer_seen_ptr;
|
||||||
|
u8* write_ptr = s_video_buffer_write_ptr;
|
||||||
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU || Common::AtomicLoad(CommandProcessor::VITicks) > CommandProcessor::m_cpClockOrigin)
|
// See comment in SyncGPU
|
||||||
|
if (write_ptr > seen_ptr)
|
||||||
{
|
{
|
||||||
u32 readPtr = fifo.CPReadPointer;
|
OpcodeDecoder_Run(write_ptr);
|
||||||
u8 *uData = Memory::GetPointer(readPtr);
|
|
||||||
|
|
||||||
if (readPtr == fifo.CPEnd)
|
{
|
||||||
readPtr = fifo.CPBase;
|
std::lock_guard<std::mutex> vblk(s_video_buffer_lock);
|
||||||
else
|
s_video_buffer_seen_ptr = write_ptr;
|
||||||
readPtr += 32;
|
s_video_buffer_cond.notify_all();
|
||||||
|
}
|
||||||
_assert_msg_(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0 ,
|
|
||||||
"Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce instability in the game. Please report it.", fifo.CPReadWriteDistance - 32);
|
|
||||||
|
|
||||||
ReadDataFromFifo(uData, 32);
|
|
||||||
|
|
||||||
cyclesExecuted = OpcodeDecoder_Run(GetVideoBufferEndPtr());
|
|
||||||
|
|
||||||
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU && Common::AtomicLoad(CommandProcessor::VITicks) >= cyclesExecuted)
|
|
||||||
Common::AtomicAdd(CommandProcessor::VITicks, -(s32)cyclesExecuted);
|
|
||||||
|
|
||||||
Common::AtomicStore(fifo.CPReadPointer, readPtr);
|
|
||||||
Common::AtomicAdd(fifo.CPReadWriteDistance, -32);
|
|
||||||
if ((GetVideoBufferEndPtr() - g_pVideoData) == 0)
|
|
||||||
Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer);
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
CommandProcessor::SetCPStatusFromGPU();
|
CommandProcessor::SetCPStatusFromGPU();
|
||||||
|
|
||||||
// This call is pretty important in DualCore mode and must be called in the FIFO Loop.
|
Common::AtomicStore(CommandProcessor::VITicks, CommandProcessor::m_cpClockOrigin);
|
||||||
// If we don't, s_swapRequested or s_efbAccessRequested won't be set to false
|
|
||||||
// leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing things down.
|
|
||||||
VideoFifo_CheckAsyncRequest();
|
|
||||||
CommandProcessor::isPossibleWaitingSetDrawDone = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
fifo.isGpuReadingData = false;
|
// check if we are able to run this buffer
|
||||||
|
while (GpuRunningState && EmuRunningState && !CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
|
||||||
|
{
|
||||||
|
fifo.isGpuReadingData = true;
|
||||||
|
CommandProcessor::isPossibleWaitingSetDrawDone = fifo.bFF_GPLinkEnable ? true : false;
|
||||||
|
|
||||||
|
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU || Common::AtomicLoad(CommandProcessor::VITicks) > CommandProcessor::m_cpClockOrigin)
|
||||||
|
{
|
||||||
|
u32 readPtr = fifo.CPReadPointer;
|
||||||
|
u8 *uData = Memory::GetPointer(readPtr);
|
||||||
|
|
||||||
|
if (readPtr == fifo.CPEnd)
|
||||||
|
readPtr = fifo.CPBase;
|
||||||
|
else
|
||||||
|
readPtr += 32;
|
||||||
|
|
||||||
|
_assert_msg_(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0 ,
|
||||||
|
"Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce instability in the game. Please report it.", fifo.CPReadWriteDistance - 32);
|
||||||
|
|
||||||
|
ReadDataFromFifo(uData, 32);
|
||||||
|
|
||||||
|
u8* write_ptr = s_video_buffer_write_ptr;
|
||||||
|
|
||||||
|
cyclesExecuted = OpcodeDecoder_Run(write_ptr);
|
||||||
|
|
||||||
|
|
||||||
|
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU && Common::AtomicLoad(CommandProcessor::VITicks) >= cyclesExecuted)
|
||||||
|
Common::AtomicAdd(CommandProcessor::VITicks, -(s32)cyclesExecuted);
|
||||||
|
|
||||||
|
Common::AtomicStore(fifo.CPReadPointer, readPtr);
|
||||||
|
Common::AtomicAdd(fifo.CPReadWriteDistance, -32);
|
||||||
|
if ((write_ptr - g_video_buffer_read_ptr) == 0)
|
||||||
|
Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer);
|
||||||
|
}
|
||||||
|
|
||||||
|
CommandProcessor::SetCPStatusFromGPU();
|
||||||
|
|
||||||
|
// This call is pretty important in DualCore mode and must be called in the FIFO Loop.
|
||||||
|
// If we don't, s_swapRequested or s_efbAccessRequested won't be set to false
|
||||||
|
// leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing things down.
|
||||||
|
VideoFifo_CheckAsyncRequest();
|
||||||
|
CommandProcessor::isPossibleWaitingSetDrawDone = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
fifo.isGpuReadingData = false;
|
||||||
|
}
|
||||||
|
|
||||||
if (EmuRunningState)
|
if (EmuRunningState)
|
||||||
{
|
{
|
||||||
|
@ -217,6 +370,8 @@ void RunGpuLoop()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// wake up SyncGPU if we were interrupted
|
||||||
|
s_video_buffer_cond.notify_all();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -228,16 +383,27 @@ bool AtBreakpoint()
|
||||||
|
|
||||||
void RunGpu()
|
void RunGpu()
|
||||||
{
|
{
|
||||||
|
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread &&
|
||||||
|
!g_use_deterministic_gpu_thread)
|
||||||
|
return;
|
||||||
|
|
||||||
SCPFifoStruct &fifo = CommandProcessor::fifo;
|
SCPFifoStruct &fifo = CommandProcessor::fifo;
|
||||||
while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() )
|
while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() )
|
||||||
{
|
{
|
||||||
u8 *uData = Memory::GetPointer(fifo.CPReadPointer);
|
u8 *uData = Memory::GetPointer(fifo.CPReadPointer);
|
||||||
|
|
||||||
FPURoundMode::SaveSIMDState();
|
if (g_use_deterministic_gpu_thread)
|
||||||
FPURoundMode::LoadDefaultSIMDState();
|
{
|
||||||
ReadDataFromFifo(uData, 32);
|
ReadDataFromFifoOnCPU(uData, 32);
|
||||||
OpcodeDecoder_Run(GetVideoBufferEndPtr());
|
}
|
||||||
FPURoundMode::LoadSIMDState();
|
else
|
||||||
|
{
|
||||||
|
FPURoundMode::SaveSIMDState();
|
||||||
|
FPURoundMode::LoadDefaultSIMDState();
|
||||||
|
ReadDataFromFifo(uData, 32);
|
||||||
|
OpcodeDecoder_Run(s_video_buffer_write_ptr);
|
||||||
|
FPURoundMode::LoadSIMDState();
|
||||||
|
}
|
||||||
|
|
||||||
//DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base");
|
//DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base");
|
||||||
|
|
||||||
|
@ -250,3 +416,45 @@ void RunGpu()
|
||||||
}
|
}
|
||||||
CommandProcessor::SetCPStatusFromGPU();
|
CommandProcessor::SetCPStatusFromGPU();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Fifo_UpdateWantDeterminism(bool want)
|
||||||
|
{
|
||||||
|
// We are paused (or not running at all yet) and have m_csHWVidOccupied, so
|
||||||
|
// it should be safe to change this.
|
||||||
|
const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter;
|
||||||
|
bool gpu_thread;
|
||||||
|
switch (param.m_GPUDeterminismMode)
|
||||||
|
{
|
||||||
|
case GPU_DETERMINISM_AUTO:
|
||||||
|
gpu_thread = want;
|
||||||
|
|
||||||
|
// Hack: For now movies are an exception to this being on (but not
|
||||||
|
// to wanting determinism in general). Once vertex arrays are
|
||||||
|
// fixed, there should be no reason to want this off for movies by
|
||||||
|
// default, so this can be removed.
|
||||||
|
if (!NetPlay::IsNetPlayRunning())
|
||||||
|
gpu_thread = false;
|
||||||
|
|
||||||
|
break;
|
||||||
|
case GPU_DETERMINISM_NONE:
|
||||||
|
gpu_thread = false;
|
||||||
|
break;
|
||||||
|
case GPU_DETERMINISM_FAKE_COMPLETION:
|
||||||
|
gpu_thread = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
gpu_thread = gpu_thread && SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread;
|
||||||
|
|
||||||
|
if (g_use_deterministic_gpu_thread != gpu_thread)
|
||||||
|
{
|
||||||
|
g_use_deterministic_gpu_thread = gpu_thread;
|
||||||
|
if (gpu_thread)
|
||||||
|
{
|
||||||
|
// These haven't been updated in non-deterministic mode.
|
||||||
|
s_video_buffer_seen_ptr = g_video_buffer_pp_read_ptr = g_video_buffer_read_ptr;
|
||||||
|
CopyPreprocessCPStateFromMain();
|
||||||
|
VertexLoaderManager::MarkAllDirty();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -13,6 +13,11 @@ class PointerWrap;
|
||||||
|
|
||||||
extern bool g_bSkipCurrentFrame;
|
extern bool g_bSkipCurrentFrame;
|
||||||
|
|
||||||
|
// This could be in SCoreStartupParameter, but it depends on multiple settings
|
||||||
|
// and can change at runtime.
|
||||||
|
extern bool g_use_deterministic_gpu_thread;
|
||||||
|
extern std::atomic<u8*> g_video_buffer_write_ptr_xthread;
|
||||||
|
extern u8* g_video_buffer_pp_read_ptr;
|
||||||
|
|
||||||
void Fifo_Init();
|
void Fifo_Init();
|
||||||
void Fifo_Shutdown();
|
void Fifo_Shutdown();
|
||||||
|
@ -22,8 +27,23 @@ u8* GetVideoBufferEndPtr();
|
||||||
|
|
||||||
void Fifo_DoState(PointerWrap &f);
|
void Fifo_DoState(PointerWrap &f);
|
||||||
void Fifo_PauseAndLock(bool doLock, bool unpauseOnUnlock);
|
void Fifo_PauseAndLock(bool doLock, bool unpauseOnUnlock);
|
||||||
|
void Fifo_UpdateWantDeterminism(bool want);
|
||||||
|
|
||||||
void ReadDataFromFifo(u8* _uData, u32 len);
|
// Used for diagnostics.
|
||||||
|
enum SyncGPUReason {
|
||||||
|
SYNC_GPU_NONE,
|
||||||
|
SYNC_GPU_OTHER,
|
||||||
|
SYNC_GPU_WRAPAROUND,
|
||||||
|
SYNC_GPU_EFB_POKE,
|
||||||
|
SYNC_GPU_PERFQUERY,
|
||||||
|
SYNC_GPU_SWAP,
|
||||||
|
SYNC_GPU_AUX_SPACE,
|
||||||
|
};
|
||||||
|
// In g_use_deterministic_gpu_thread mode, waits for the GPU to be done with pending work.
|
||||||
|
void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr = true);
|
||||||
|
|
||||||
|
void PushFifoAuxBuffer(void* ptr, size_t size);
|
||||||
|
void* PopFifoAuxBuffer(size_t size);
|
||||||
|
|
||||||
void RunGpu();
|
void RunGpu();
|
||||||
void RunGpuLoop();
|
void RunGpuLoop();
|
||||||
|
|
|
@ -118,6 +118,7 @@ void VideoBackendHardware::Video_EndField()
|
||||||
{
|
{
|
||||||
if (s_BackendInitialized)
|
if (s_BackendInitialized)
|
||||||
{
|
{
|
||||||
|
SyncGPU(SYNC_GPU_SWAP);
|
||||||
s_swapRequested.Set();
|
s_swapRequested.Set();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -153,6 +154,8 @@ u32 VideoBackendHardware::Video_AccessEFB(EFBAccessType type, u32 x, u32 y, u32
|
||||||
{
|
{
|
||||||
if (s_BackendInitialized && g_ActiveConfig.bEFBAccessEnable)
|
if (s_BackendInitialized && g_ActiveConfig.bEFBAccessEnable)
|
||||||
{
|
{
|
||||||
|
SyncGPU(SYNC_GPU_EFB_POKE);
|
||||||
|
|
||||||
s_accessEFBArgs.type = type;
|
s_accessEFBArgs.type = type;
|
||||||
s_accessEFBArgs.x = x;
|
s_accessEFBArgs.x = x;
|
||||||
s_accessEFBArgs.y = y;
|
s_accessEFBArgs.y = y;
|
||||||
|
@ -194,6 +197,8 @@ u32 VideoBackendHardware::Video_GetQueryResult(PerfQueryType type)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SyncGPU(SYNC_GPU_PERFQUERY);
|
||||||
|
|
||||||
// TODO: Is this check sane?
|
// TODO: Is this check sane?
|
||||||
if (!g_perf_query->IsFlushed())
|
if (!g_perf_query->IsFlushed())
|
||||||
{
|
{
|
||||||
|
@ -304,3 +309,8 @@ void VideoBackendHardware::RegisterCPMMIO(MMIO::Mapping* mmio, u32 base)
|
||||||
CommandProcessor::RegisterMMIO(mmio, base);
|
CommandProcessor::RegisterMMIO(mmio, base);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VideoBackendHardware::UpdateWantDeterminism(bool want)
|
||||||
|
{
|
||||||
|
Fifo_UpdateWantDeterminism(want);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/Hash.h"
|
||||||
|
|
||||||
// m_components
|
// m_components
|
||||||
enum
|
enum
|
||||||
|
@ -87,6 +87,20 @@ struct PortableVertexDeclaration
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
namespace std
|
||||||
|
{
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct hash<PortableVertexDeclaration>
|
||||||
|
{
|
||||||
|
size_t operator()(const PortableVertexDeclaration& decl) const
|
||||||
|
{
|
||||||
|
return HashFletcher((u8 *) &decl, sizeof(decl));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
// The implementation of this class is specific for GL/DX, so NativeVertexFormat.cpp
|
// The implementation of this class is specific for GL/DX, so NativeVertexFormat.cpp
|
||||||
// is in the respective backend, not here in VideoCommon.
|
// is in the respective backend, not here in VideoCommon.
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,7 @@
|
||||||
#include "VideoCommon/DataReader.h"
|
#include "VideoCommon/DataReader.h"
|
||||||
#include "VideoCommon/Fifo.h"
|
#include "VideoCommon/Fifo.h"
|
||||||
#include "VideoCommon/OpcodeDecoding.h"
|
#include "VideoCommon/OpcodeDecoding.h"
|
||||||
|
#include "VideoCommon/PixelEngine.h"
|
||||||
#include "VideoCommon/Statistics.h"
|
#include "VideoCommon/Statistics.h"
|
||||||
#include "VideoCommon/VertexLoaderManager.h"
|
#include "VideoCommon/VertexLoaderManager.h"
|
||||||
#include "VideoCommon/VideoCommon.h"
|
#include "VideoCommon/VideoCommon.h"
|
||||||
|
@ -31,25 +32,29 @@
|
||||||
#include "VideoCommon/XFMemory.h"
|
#include "VideoCommon/XFMemory.h"
|
||||||
|
|
||||||
|
|
||||||
u8* g_pVideoData = nullptr;
|
|
||||||
bool g_bRecordFifoData = false;
|
bool g_bRecordFifoData = false;
|
||||||
|
|
||||||
static u32 InterpretDisplayList(u32 address, u32 size)
|
static u32 InterpretDisplayList(u32 address, u32 size)
|
||||||
{
|
{
|
||||||
u8* old_pVideoData = g_pVideoData;
|
u8* old_pVideoData = g_video_buffer_read_ptr;
|
||||||
u8* startAddress = Memory::GetPointer(address);
|
u8* startAddress;
|
||||||
|
|
||||||
|
if (g_use_deterministic_gpu_thread)
|
||||||
|
startAddress = (u8*) PopFifoAuxBuffer(size);
|
||||||
|
else
|
||||||
|
startAddress = Memory::GetPointer(address);
|
||||||
|
|
||||||
u32 cycles = 0;
|
u32 cycles = 0;
|
||||||
|
|
||||||
// Avoid the crash if Memory::GetPointer failed ..
|
// Avoid the crash if Memory::GetPointer failed ..
|
||||||
if (startAddress != nullptr)
|
if (startAddress != nullptr)
|
||||||
{
|
{
|
||||||
g_pVideoData = startAddress;
|
g_video_buffer_read_ptr = startAddress;
|
||||||
|
|
||||||
// temporarily swap dl and non-dl (small "hack" for the stats)
|
// temporarily swap dl and non-dl (small "hack" for the stats)
|
||||||
Statistics::SwapDL();
|
Statistics::SwapDL();
|
||||||
|
|
||||||
u8 *end = g_pVideoData + size;
|
u8 *end = g_video_buffer_read_ptr + size;
|
||||||
cycles = OpcodeDecoder_Run(end);
|
cycles = OpcodeDecoder_Run(end);
|
||||||
INCSTAT(stats.thisFrame.numDListsCalled);
|
INCSTAT(stats.thisFrame.numDListsCalled);
|
||||||
|
|
||||||
|
@ -58,16 +63,34 @@ static u32 InterpretDisplayList(u32 address, u32 size)
|
||||||
}
|
}
|
||||||
|
|
||||||
// reset to the old pointer
|
// reset to the old pointer
|
||||||
g_pVideoData = old_pVideoData;
|
g_video_buffer_read_ptr = old_pVideoData;
|
||||||
|
|
||||||
return cycles;
|
return cycles;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void InterpretDisplayListPreprocess(u32 address, u32 size)
|
||||||
|
{
|
||||||
|
u8* old_read_ptr = g_video_buffer_pp_read_ptr;
|
||||||
|
u8* startAddress = Memory::GetPointer(address);
|
||||||
|
|
||||||
|
PushFifoAuxBuffer(startAddress, size);
|
||||||
|
|
||||||
|
if (startAddress != nullptr)
|
||||||
|
{
|
||||||
|
g_video_buffer_pp_read_ptr = startAddress;
|
||||||
|
|
||||||
|
u8 *end = startAddress + size;
|
||||||
|
OpcodeDecoder_Preprocess(end);
|
||||||
|
}
|
||||||
|
|
||||||
|
g_video_buffer_pp_read_ptr = old_read_ptr;
|
||||||
|
}
|
||||||
|
|
||||||
static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess)
|
static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess)
|
||||||
{
|
{
|
||||||
// TODO(Omega): Maybe dump FIFO to file on this error
|
// TODO(Omega): Maybe dump FIFO to file on this error
|
||||||
std::string temp = StringFromFormat(
|
std::string temp = StringFromFormat(
|
||||||
"GFX FIFO: Unknown Opcode (0x%x @ %p).\n"
|
"GFX FIFO: Unknown Opcode (0x%x @ %p, preprocessing=%s).\n"
|
||||||
"This means one of the following:\n"
|
"This means one of the following:\n"
|
||||||
"* The emulated GPU got desynced, disabling dual core can help\n"
|
"* The emulated GPU got desynced, disabling dual core can help\n"
|
||||||
"* Command stream corrupted by some spurious memory bug\n"
|
"* Command stream corrupted by some spurious memory bug\n"
|
||||||
|
@ -75,7 +98,8 @@ static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess)
|
||||||
"* Some other sort of bug\n\n"
|
"* Some other sort of bug\n\n"
|
||||||
"Dolphin will now likely crash or hang. Enjoy." ,
|
"Dolphin will now likely crash or hang. Enjoy." ,
|
||||||
cmd_byte,
|
cmd_byte,
|
||||||
buffer);
|
buffer,
|
||||||
|
preprocess ? "yes" : "no");
|
||||||
Host_SysMessage(temp.c_str());
|
Host_SysMessage(temp.c_str());
|
||||||
INFO_LOG(VIDEO, "%s", temp.c_str());
|
INFO_LOG(VIDEO, "%s", temp.c_str());
|
||||||
{
|
{
|
||||||
|
@ -105,14 +129,16 @@ static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <bool is_preprocess, u8** bufp>
|
||||||
static u32 Decode(u8* end)
|
static u32 Decode(u8* end)
|
||||||
{
|
{
|
||||||
u8 *opcodeStart = g_pVideoData;
|
u8 *opcodeStart = *bufp;
|
||||||
if (g_pVideoData == end)
|
if (*bufp == end)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
u8 cmd_byte = DataReadU8();
|
u8 cmd_byte = DataRead<u8>(bufp);
|
||||||
u32 cycles;
|
u32 cycles;
|
||||||
|
int refarray;
|
||||||
switch (cmd_byte)
|
switch (cmd_byte)
|
||||||
{
|
{
|
||||||
case GX_NOP:
|
case GX_NOP:
|
||||||
|
@ -121,64 +147,72 @@ static u32 Decode(u8* end)
|
||||||
|
|
||||||
case GX_LOAD_CP_REG: //0x08
|
case GX_LOAD_CP_REG: //0x08
|
||||||
{
|
{
|
||||||
if (end - g_pVideoData < 1 + 4)
|
if (end - *bufp < 1 + 4)
|
||||||
return 0;
|
return 0;
|
||||||
cycles = 12;
|
cycles = 12;
|
||||||
u8 sub_cmd = DataReadU8();
|
u8 sub_cmd = DataRead<u8>(bufp);
|
||||||
u32 value = DataReadU32();
|
u32 value = DataRead<u32>(bufp);
|
||||||
LoadCPReg(sub_cmd, value);
|
LoadCPReg(sub_cmd, value, is_preprocess);
|
||||||
INCSTAT(stats.thisFrame.numCPLoads);
|
if (!is_preprocess)
|
||||||
|
INCSTAT(stats.thisFrame.numCPLoads);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case GX_LOAD_XF_REG:
|
case GX_LOAD_XF_REG:
|
||||||
{
|
{
|
||||||
if (end - g_pVideoData < 4)
|
if (end - *bufp < 4)
|
||||||
return 0;
|
return 0;
|
||||||
u32 Cmd2 = DataReadU32();
|
u32 Cmd2 = DataRead<u32>(bufp);
|
||||||
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
|
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
|
||||||
if ((size_t) (end - g_pVideoData) < transfer_size * sizeof(u32))
|
if ((size_t) (end - *bufp) < transfer_size * sizeof(u32))
|
||||||
return 0;
|
return 0;
|
||||||
cycles = 18 + 6 * transfer_size;
|
cycles = 18 + 6 * transfer_size;
|
||||||
u32 xf_address = Cmd2 & 0xFFFF;
|
if (!is_preprocess)
|
||||||
LoadXFReg(transfer_size, xf_address);
|
{
|
||||||
|
u32 xf_address = Cmd2 & 0xFFFF;
|
||||||
|
LoadXFReg(transfer_size, xf_address);
|
||||||
|
|
||||||
INCSTAT(stats.thisFrame.numXFLoads);
|
INCSTAT(stats.thisFrame.numXFLoads);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
*bufp += transfer_size * sizeof(u32);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case GX_LOAD_INDX_A: //used for position matrices
|
case GX_LOAD_INDX_A: //used for position matrices
|
||||||
if (end - g_pVideoData < 4)
|
refarray = 0xC;
|
||||||
return 0;
|
goto load_indx;
|
||||||
cycles = 6;
|
|
||||||
LoadIndexedXF(DataReadU32(), 0xC);
|
|
||||||
break;
|
|
||||||
case GX_LOAD_INDX_B: //used for normal matrices
|
case GX_LOAD_INDX_B: //used for normal matrices
|
||||||
if (end - g_pVideoData < 4)
|
refarray = 0xD;
|
||||||
return 0;
|
goto load_indx;
|
||||||
cycles = 6;
|
|
||||||
LoadIndexedXF(DataReadU32(), 0xD);
|
|
||||||
break;
|
|
||||||
case GX_LOAD_INDX_C: //used for postmatrices
|
case GX_LOAD_INDX_C: //used for postmatrices
|
||||||
if (end - g_pVideoData < 4)
|
refarray = 0xE;
|
||||||
return 0;
|
goto load_indx;
|
||||||
cycles = 6;
|
|
||||||
LoadIndexedXF(DataReadU32(), 0xE);
|
|
||||||
break;
|
|
||||||
case GX_LOAD_INDX_D: //used for lights
|
case GX_LOAD_INDX_D: //used for lights
|
||||||
if (end - g_pVideoData < 4)
|
refarray = 0xF;
|
||||||
|
goto load_indx;
|
||||||
|
load_indx:
|
||||||
|
if (end - *bufp < 4)
|
||||||
return 0;
|
return 0;
|
||||||
cycles = 6;
|
cycles = 6;
|
||||||
LoadIndexedXF(DataReadU32(), 0xF);
|
if (is_preprocess)
|
||||||
|
PreprocessIndexedXF(DataRead<u32>(bufp), refarray);
|
||||||
|
else
|
||||||
|
LoadIndexedXF(DataRead<u32>(bufp), refarray);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case GX_CMD_CALL_DL:
|
case GX_CMD_CALL_DL:
|
||||||
{
|
{
|
||||||
if (end - g_pVideoData < 8)
|
if (end - *bufp < 8)
|
||||||
return 0;
|
return 0;
|
||||||
u32 address = DataReadU32();
|
u32 address = DataRead<u32>(bufp);
|
||||||
u32 count = DataReadU32();
|
u32 count = DataRead<u32>(bufp);
|
||||||
cycles = 6 + InterpretDisplayList(address, count);
|
if (is_preprocess)
|
||||||
|
InterpretDisplayListPreprocess(address, count);
|
||||||
|
else
|
||||||
|
cycles = 6 + InterpretDisplayList(address, count);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -196,12 +230,19 @@ static u32 Decode(u8* end)
|
||||||
// In skipped_frame case: We have to let BP writes through because they set
|
// In skipped_frame case: We have to let BP writes through because they set
|
||||||
// tokens and stuff. TODO: Call a much simplified LoadBPReg instead.
|
// tokens and stuff. TODO: Call a much simplified LoadBPReg instead.
|
||||||
{
|
{
|
||||||
if (end - g_pVideoData < 4)
|
if (end - *bufp < 4)
|
||||||
return 0;
|
return 0;
|
||||||
cycles = 12;
|
cycles = 12;
|
||||||
u32 bp_cmd = DataReadU32();
|
u32 bp_cmd = DataRead<u32>(bufp);
|
||||||
LoadBPReg(bp_cmd);
|
if (is_preprocess)
|
||||||
INCSTAT(stats.thisFrame.numBPLoads);
|
{
|
||||||
|
LoadBPRegPreprocess(bp_cmd);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
LoadBPReg(bp_cmd);
|
||||||
|
INCSTAT(stats.thisFrame.numBPLoads);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -211,38 +252,48 @@ static u32 Decode(u8* end)
|
||||||
{
|
{
|
||||||
cycles = 1600;
|
cycles = 1600;
|
||||||
// load vertices
|
// load vertices
|
||||||
if (end - g_pVideoData < 2)
|
if (end - *bufp < 2)
|
||||||
return 0;
|
return 0;
|
||||||
u16 numVertices = DataReadU16();
|
u16 num_vertices = DataRead<u16>(bufp);
|
||||||
|
|
||||||
if (!VertexLoaderManager::RunVertices(
|
if (is_preprocess)
|
||||||
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
|
|
||||||
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
|
|
||||||
numVertices,
|
|
||||||
end - g_pVideoData,
|
|
||||||
g_bSkipCurrentFrame))
|
|
||||||
{
|
{
|
||||||
return 0;
|
size_t size = num_vertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK, is_preprocess);
|
||||||
|
if ((size_t) (end - *bufp) < size)
|
||||||
|
return 0;
|
||||||
|
*bufp += size;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (!VertexLoaderManager::RunVertices(
|
||||||
|
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
|
||||||
|
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
|
||||||
|
num_vertices,
|
||||||
|
end - *bufp,
|
||||||
|
g_bSkipCurrentFrame))
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
UnknownOpcode(cmd_byte, opcodeStart, false);
|
UnknownOpcode(cmd_byte, opcodeStart, is_preprocess);
|
||||||
cycles = 1;
|
cycles = 1;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Display lists get added directly into the FIFO stream
|
// Display lists get added directly into the FIFO stream
|
||||||
if (g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL)
|
if (!is_preprocess && g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL)
|
||||||
FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(g_pVideoData - opcodeStart));
|
FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(*bufp - opcodeStart));
|
||||||
|
|
||||||
return cycles;
|
// In is_preprocess mode, we don't actually care about cycles, at least for
|
||||||
|
// now... make sure the compiler realizes that.
|
||||||
|
return is_preprocess ? 1 : cycles;
|
||||||
}
|
}
|
||||||
|
|
||||||
void OpcodeDecoder_Init()
|
void OpcodeDecoder_Init()
|
||||||
{
|
{
|
||||||
g_pVideoData = GetVideoBufferStartPtr();
|
g_video_buffer_read_ptr = GetVideoBufferStartPtr();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -255,14 +306,28 @@ u32 OpcodeDecoder_Run(u8* end)
|
||||||
u32 totalCycles = 0;
|
u32 totalCycles = 0;
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
u8* old = g_pVideoData;
|
u8* old = g_video_buffer_read_ptr;
|
||||||
u32 cycles = Decode(end);
|
u32 cycles = Decode</*is_preprocess*/ false, &g_video_buffer_read_ptr>(end);
|
||||||
if (cycles == 0)
|
if (cycles == 0)
|
||||||
{
|
{
|
||||||
g_pVideoData = old;
|
g_video_buffer_read_ptr = old;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
totalCycles += cycles;
|
totalCycles += cycles;
|
||||||
}
|
}
|
||||||
return totalCycles;
|
return totalCycles;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void OpcodeDecoder_Preprocess(u8 *end)
|
||||||
|
{
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
u8* old = g_video_buffer_pp_read_ptr;
|
||||||
|
u32 cycles = Decode</*is_preprocess*/ true, &g_video_buffer_pp_read_ptr>(end);
|
||||||
|
if (cycles == 0)
|
||||||
|
{
|
||||||
|
g_video_buffer_pp_read_ptr = old;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -39,3 +39,4 @@ extern bool g_bRecordFifoData;
|
||||||
void OpcodeDecoder_Init();
|
void OpcodeDecoder_Init();
|
||||||
void OpcodeDecoder_Shutdown();
|
void OpcodeDecoder_Shutdown();
|
||||||
u32 OpcodeDecoder_Run(u8* end);
|
u32 OpcodeDecoder_Run(u8* end);
|
||||||
|
void OpcodeDecoder_Preprocess(u8* write_ptr);
|
||||||
|
|
|
@ -33,14 +33,11 @@
|
||||||
|
|
||||||
// Matrix components are first in GC format but later in PC format - we need to store it temporarily
|
// Matrix components are first in GC format but later in PC format - we need to store it temporarily
|
||||||
// when decoding each vertex.
|
// when decoding each vertex.
|
||||||
static u8 s_curposmtx = MatrixIndexA.PosNormalMtxIdx;
|
static u8 s_curposmtx = g_main_cp_state.matrix_index_a.PosNormalMtxIdx;
|
||||||
static u8 s_curtexmtx[8];
|
static u8 s_curtexmtx[8];
|
||||||
static int s_texmtxwrite = 0;
|
static int s_texmtxwrite = 0;
|
||||||
static int s_texmtxread = 0;
|
static int s_texmtxread = 0;
|
||||||
|
|
||||||
static int loop_counter;
|
|
||||||
|
|
||||||
|
|
||||||
// Vertex loaders read these. Although the scale ones should be baked into the shader.
|
// Vertex loaders read these. Although the scale ones should be baked into the shader.
|
||||||
int tcIndex;
|
int tcIndex;
|
||||||
int colIndex;
|
int colIndex;
|
||||||
|
@ -90,7 +87,7 @@ static void LOADERDECL PosMtx_Write()
|
||||||
DataWrite<u8>(0);
|
DataWrite<u8>(0);
|
||||||
|
|
||||||
// Resetting current position matrix to default is needed for bbox to behave
|
// Resetting current position matrix to default is needed for bbox to behave
|
||||||
s_curposmtx = (u8) MatrixIndexA.PosNormalMtxIdx;
|
s_curposmtx = (u8) g_main_cp_state.matrix_index_a.PosNormalMtxIdx;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void LOADERDECL UpdateBoundingBoxPrepare()
|
static void LOADERDECL UpdateBoundingBoxPrepare()
|
||||||
|
@ -548,7 +545,7 @@ VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr)
|
||||||
m_compiledCode = nullptr;
|
m_compiledCode = nullptr;
|
||||||
m_numLoadedVertices = 0;
|
m_numLoadedVertices = 0;
|
||||||
m_VertexSize = 0;
|
m_VertexSize = 0;
|
||||||
loop_counter = 0;
|
m_native_vertex_format = nullptr;
|
||||||
VertexLoader_Normal::Init();
|
VertexLoader_Normal::Init();
|
||||||
VertexLoader_Position::Init();
|
VertexLoader_Position::Init();
|
||||||
VertexLoader_TextCoord::Init();
|
VertexLoader_TextCoord::Init();
|
||||||
|
@ -584,8 +581,11 @@ void VertexLoader::CompileVertexTranslator()
|
||||||
PanicAlert("Trying to recompile a vertex translator");
|
PanicAlert("Trying to recompile a vertex translator");
|
||||||
|
|
||||||
m_compiledCode = GetCodePtr();
|
m_compiledCode = GetCodePtr();
|
||||||
// We don't use any callee saved registers or anything but RAX.
|
// We only use RAX (caller saved) and RBX (callee saved).
|
||||||
ABI_PushRegistersAndAdjustStack(0, 8);
|
ABI_PushRegistersAndAdjustStack(1 << RBX, 8);
|
||||||
|
|
||||||
|
// save count
|
||||||
|
MOV(64, R(RBX), R(ABI_PARAM1));
|
||||||
|
|
||||||
// Start loop here
|
// Start loop here
|
||||||
const u8 *loop_start = GetCodePtr();
|
const u8 *loop_start = GetCodePtr();
|
||||||
|
@ -842,11 +842,10 @@ void VertexLoader::CompileVertexTranslator()
|
||||||
|
|
||||||
#ifdef USE_VERTEX_LOADER_JIT
|
#ifdef USE_VERTEX_LOADER_JIT
|
||||||
// End loop here
|
// End loop here
|
||||||
MOV(64, R(RAX), Imm64((u64)&loop_counter));
|
SUB(64, R(RBX), Imm8(1));
|
||||||
SUB(32, MatR(RAX), Imm8(1));
|
|
||||||
|
|
||||||
J_CC(CC_NZ, loop_start);
|
J_CC(CC_NZ, loop_start);
|
||||||
ABI_PopRegistersAndAdjustStack(0, 8);
|
ABI_PopRegistersAndAdjustStack(1 << RBX, 8);
|
||||||
RET();
|
RET();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -912,8 +911,7 @@ void VertexLoader::ConvertVertices ( int count )
|
||||||
#ifdef USE_VERTEX_LOADER_JIT
|
#ifdef USE_VERTEX_LOADER_JIT
|
||||||
if (count > 0)
|
if (count > 0)
|
||||||
{
|
{
|
||||||
loop_counter = count;
|
((void (*)(int))(void*)m_compiledCode)(count);
|
||||||
((void (*)())(void*)m_compiledCode)();
|
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
for (int s = 0; s < count; s++)
|
for (int s = 0; s < count; s++)
|
||||||
|
@ -1035,3 +1033,22 @@ void VertexLoader::AppendToString(std::string *dest) const
|
||||||
}
|
}
|
||||||
dest->append(StringFromFormat(" - %i v\n", m_numLoadedVertices));
|
dest->append(StringFromFormat(" - %i v\n", m_numLoadedVertices));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
NativeVertexFormat* VertexLoader::GetNativeVertexFormat()
|
||||||
|
{
|
||||||
|
if (m_native_vertex_format)
|
||||||
|
return m_native_vertex_format;
|
||||||
|
auto& native = s_native_vertex_map[m_native_vtx_decl];
|
||||||
|
if (!native)
|
||||||
|
{
|
||||||
|
auto raw_pointer = g_vertex_manager->CreateNativeVertexFormat();
|
||||||
|
native = std::unique_ptr<NativeVertexFormat>(raw_pointer);
|
||||||
|
native->Initialize(m_native_vtx_decl);
|
||||||
|
native->m_components = m_native_components;
|
||||||
|
}
|
||||||
|
m_native_vertex_format = native.get();
|
||||||
|
return native.get();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unordered_map<PortableVertexDeclaration, std::unique_ptr<NativeVertexFormat>> VertexLoader::s_native_vertex_map;
|
||||||
|
|
|
@ -8,7 +8,9 @@
|
||||||
// Metroid Prime: P I16-flt N I16-s16 T0 I16-u16 T1 i16-flt
|
// Metroid Prime: P I16-flt N I16-s16 T0 I16-u16 T1 i16-flt
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
#include "Common/x64Emitter.h"
|
#include "Common/x64Emitter.h"
|
||||||
|
@ -114,6 +116,9 @@ public:
|
||||||
void AppendToString(std::string *dest) const;
|
void AppendToString(std::string *dest) const;
|
||||||
int GetNumLoadedVerts() const { return m_numLoadedVertices; }
|
int GetNumLoadedVerts() const { return m_numLoadedVertices; }
|
||||||
|
|
||||||
|
NativeVertexFormat* GetNativeVertexFormat();
|
||||||
|
static void ClearNativeVertexFormatCache() { s_native_vertex_map.clear(); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int m_VertexSize; // number of bytes of a raw GC vertex. Computed by CompileVertexTranslator.
|
int m_VertexSize; // number of bytes of a raw GC vertex. Computed by CompileVertexTranslator.
|
||||||
|
|
||||||
|
@ -135,6 +140,9 @@ private:
|
||||||
|
|
||||||
int m_numLoadedVertices;
|
int m_numLoadedVertices;
|
||||||
|
|
||||||
|
NativeVertexFormat* m_native_vertex_format;
|
||||||
|
static std::unordered_map<PortableVertexDeclaration, std::unique_ptr<NativeVertexFormat>> s_native_vertex_map;
|
||||||
|
|
||||||
void SetVAT(const VAT& vat);
|
void SetVAT(const VAT& vat);
|
||||||
|
|
||||||
void CompileVertexTranslator();
|
void CompileVertexTranslator();
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <mutex>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
@ -20,13 +21,8 @@
|
||||||
#include "VideoCommon/VertexShaderManager.h"
|
#include "VideoCommon/VertexShaderManager.h"
|
||||||
#include "VideoCommon/VideoCommon.h"
|
#include "VideoCommon/VideoCommon.h"
|
||||||
|
|
||||||
static int s_attr_dirty; // bitfield
|
|
||||||
|
|
||||||
static NativeVertexFormat* s_current_vtx_fmt;
|
static NativeVertexFormat* s_current_vtx_fmt;
|
||||||
|
|
||||||
typedef std::pair<VertexLoader*, NativeVertexFormat*> VertexLoaderCacheItem;
|
|
||||||
static VertexLoaderCacheItem s_VertexLoaders[8];
|
|
||||||
|
|
||||||
namespace std
|
namespace std
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -41,35 +37,30 @@ struct hash<VertexLoaderUID>
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef std::unordered_map<VertexLoaderUID, VertexLoaderCacheItem> VertexLoaderMap;
|
typedef std::unordered_map<VertexLoaderUID, std::unique_ptr<VertexLoader>> VertexLoaderMap;
|
||||||
typedef std::map<PortableVertexDeclaration, std::unique_ptr<NativeVertexFormat>> NativeVertexLoaderMap;
|
|
||||||
|
|
||||||
namespace VertexLoaderManager
|
namespace VertexLoaderManager
|
||||||
{
|
{
|
||||||
|
|
||||||
static VertexLoaderMap s_VertexLoaderMap;
|
static std::mutex s_vertex_loader_map_lock;
|
||||||
static NativeVertexLoaderMap s_native_vertex_map;
|
static VertexLoaderMap s_vertex_loader_map;
|
||||||
// TODO - change into array of pointers. Keep a map of all seen so far.
|
// TODO - change into array of pointers. Keep a map of all seen so far.
|
||||||
|
|
||||||
void Init()
|
void Init()
|
||||||
{
|
{
|
||||||
MarkAllDirty();
|
MarkAllDirty();
|
||||||
for (auto& map_entry : s_VertexLoaders)
|
for (auto& map_entry : g_main_cp_state.vertex_loaders)
|
||||||
{
|
map_entry = nullptr;
|
||||||
map_entry.first = nullptr;
|
for (auto& map_entry : g_preprocess_cp_state.vertex_loaders)
|
||||||
map_entry.second = nullptr;
|
map_entry = nullptr;
|
||||||
}
|
|
||||||
RecomputeCachedArraybases();
|
RecomputeCachedArraybases();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Shutdown()
|
void Shutdown()
|
||||||
{
|
{
|
||||||
for (auto& map_entry : s_VertexLoaderMap)
|
std::lock_guard<std::mutex> lk(s_vertex_loader_map_lock);
|
||||||
{
|
s_vertex_loader_map.clear();
|
||||||
delete map_entry.second.first;
|
VertexLoader::ClearNativeVertexFormatCache();
|
||||||
}
|
|
||||||
s_VertexLoaderMap.clear();
|
|
||||||
s_native_vertex_map.clear();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
|
@ -87,14 +78,15 @@ struct entry
|
||||||
|
|
||||||
void AppendListToString(std::string *dest)
|
void AppendListToString(std::string *dest)
|
||||||
{
|
{
|
||||||
|
std::lock_guard<std::mutex> lk(s_vertex_loader_map_lock);
|
||||||
std::vector<entry> entries;
|
std::vector<entry> entries;
|
||||||
|
|
||||||
size_t total_size = 0;
|
size_t total_size = 0;
|
||||||
for (const auto& map_entry : s_VertexLoaderMap)
|
for (const auto& map_entry : s_vertex_loader_map)
|
||||||
{
|
{
|
||||||
entry e;
|
entry e;
|
||||||
map_entry.second.first->AppendToString(&e.text);
|
map_entry.second->AppendToString(&e.text);
|
||||||
e.num_verts = map_entry.second.first->GetNumLoadedVerts();
|
e.num_verts = map_entry.second->GetNumLoadedVerts();
|
||||||
entries.push_back(e);
|
entries.push_back(e);
|
||||||
total_size += e.text.size() + 1;
|
total_size += e.text.size() + 1;
|
||||||
}
|
}
|
||||||
|
@ -108,57 +100,46 @@ void AppendListToString(std::string *dest)
|
||||||
|
|
||||||
void MarkAllDirty()
|
void MarkAllDirty()
|
||||||
{
|
{
|
||||||
s_attr_dirty = 0xff;
|
g_main_cp_state.attr_dirty = 0xff;
|
||||||
|
g_preprocess_cp_state.attr_dirty = 0xff;
|
||||||
}
|
}
|
||||||
|
|
||||||
static NativeVertexFormat* GetNativeVertexFormat(const PortableVertexDeclaration& format,
|
static VertexLoader* RefreshLoader(int vtx_attr_group, CPState* state)
|
||||||
u32 components)
|
|
||||||
{
|
{
|
||||||
auto& native = s_native_vertex_map[format];
|
VertexLoader* loader;
|
||||||
if (!native)
|
if ((state->attr_dirty >> vtx_attr_group) & 1)
|
||||||
{
|
{
|
||||||
auto raw_pointer = g_vertex_manager->CreateNativeVertexFormat();
|
VertexLoaderUID uid(state->vtx_desc, state->vtx_attr[vtx_attr_group]);
|
||||||
native = std::unique_ptr<NativeVertexFormat>(raw_pointer);
|
std::lock_guard<std::mutex> lk(s_vertex_loader_map_lock);
|
||||||
native->Initialize(format);
|
VertexLoaderMap::iterator iter = s_vertex_loader_map.find(uid);
|
||||||
native->m_components = components;
|
if (iter != s_vertex_loader_map.end())
|
||||||
}
|
|
||||||
return native.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
static VertexLoaderCacheItem RefreshLoader(int vtx_attr_group)
|
|
||||||
{
|
|
||||||
if ((s_attr_dirty >> vtx_attr_group) & 1)
|
|
||||||
{
|
|
||||||
VertexLoaderUID uid(g_VtxDesc, g_VtxAttr[vtx_attr_group]);
|
|
||||||
VertexLoaderMap::iterator iter = s_VertexLoaderMap.find(uid);
|
|
||||||
if (iter != s_VertexLoaderMap.end())
|
|
||||||
{
|
{
|
||||||
s_VertexLoaders[vtx_attr_group] = iter->second;
|
loader = iter->second.get();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
VertexLoader* loader = new VertexLoader(g_VtxDesc, g_VtxAttr[vtx_attr_group]);
|
loader = new VertexLoader(state->vtx_desc, state->vtx_attr[vtx_attr_group]);
|
||||||
|
s_vertex_loader_map[uid] = std::unique_ptr<VertexLoader>(loader);
|
||||||
NativeVertexFormat* vtx_fmt = GetNativeVertexFormat(
|
|
||||||
loader->GetNativeVertexDeclaration(),
|
|
||||||
loader->GetNativeComponents());
|
|
||||||
|
|
||||||
s_VertexLoaderMap[uid] = std::make_pair(loader, vtx_fmt);
|
|
||||||
s_VertexLoaders[vtx_attr_group] = std::make_pair(loader, vtx_fmt);
|
|
||||||
INCSTAT(stats.numVertexLoaders);
|
INCSTAT(stats.numVertexLoaders);
|
||||||
}
|
}
|
||||||
|
state->vertex_loaders[vtx_attr_group] = loader;
|
||||||
|
state->attr_dirty &= ~(1 << vtx_attr_group);
|
||||||
|
} else {
|
||||||
|
loader = state->vertex_loaders[vtx_attr_group];
|
||||||
}
|
}
|
||||||
s_attr_dirty &= ~(1 << vtx_attr_group);
|
return loader;
|
||||||
return s_VertexLoaders[vtx_attr_group];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size, bool skip_drawing)
|
bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size, bool skip_drawing)
|
||||||
{
|
{
|
||||||
if (!count)
|
if (!count)
|
||||||
return true;
|
return true;
|
||||||
auto loader = RefreshLoader(vtx_attr_group);
|
|
||||||
|
|
||||||
size_t size = count * loader.first->GetVertexSize();
|
CPState* state = &g_main_cp_state;
|
||||||
|
|
||||||
|
VertexLoader* loader = RefreshLoader(vtx_attr_group, state);
|
||||||
|
|
||||||
|
size_t size = count * loader->GetVertexSize();
|
||||||
if (buf_size < size)
|
if (buf_size < size)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -169,15 +150,17 @@ bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
NativeVertexFormat* native = loader->GetNativeVertexFormat();
|
||||||
|
|
||||||
// If the native vertex format changed, force a flush.
|
// If the native vertex format changed, force a flush.
|
||||||
if (loader.second != s_current_vtx_fmt)
|
if (native != s_current_vtx_fmt)
|
||||||
VertexManager::Flush();
|
VertexManager::Flush();
|
||||||
s_current_vtx_fmt = loader.second;
|
s_current_vtx_fmt = native;
|
||||||
|
|
||||||
VertexManager::PrepareForAdditionalData(primitive, count,
|
VertexManager::PrepareForAdditionalData(primitive, count,
|
||||||
loader.first->GetNativeVertexDeclaration().stride);
|
loader->GetNativeVertexDeclaration().stride);
|
||||||
|
|
||||||
loader.first->RunVertices(g_VtxAttr[vtx_attr_group], primitive, count);
|
loader->RunVertices(state->vtx_attr[vtx_attr_group], primitive, count);
|
||||||
|
|
||||||
IndexGenerator::AddIndices(primitive, count);
|
IndexGenerator::AddIndices(primitive, count);
|
||||||
|
|
||||||
|
@ -186,9 +169,9 @@ bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
int GetVertexSize(int vtx_attr_group)
|
int GetVertexSize(int vtx_attr_group, bool preprocess)
|
||||||
{
|
{
|
||||||
return RefreshLoader(vtx_attr_group).first->GetVertexSize();
|
return RefreshLoader(vtx_attr_group, preprocess ? &g_preprocess_cp_state : &g_main_cp_state)->GetVertexSize();
|
||||||
}
|
}
|
||||||
|
|
||||||
NativeVertexFormat* GetCurrentVertexFormat()
|
NativeVertexFormat* GetCurrentVertexFormat()
|
||||||
|
@ -198,78 +181,83 @@ NativeVertexFormat* GetCurrentVertexFormat()
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
void LoadCPReg(u32 sub_cmd, u32 value)
|
void LoadCPReg(u32 sub_cmd, u32 value, bool is_preprocess)
|
||||||
{
|
{
|
||||||
|
bool update_global_state = !is_preprocess;
|
||||||
|
CPState* state = is_preprocess ? &g_preprocess_cp_state : &g_main_cp_state;
|
||||||
switch (sub_cmd & 0xF0)
|
switch (sub_cmd & 0xF0)
|
||||||
{
|
{
|
||||||
case 0x30:
|
case 0x30:
|
||||||
VertexShaderManager::SetTexMatrixChangedA(value);
|
if (update_global_state)
|
||||||
|
VertexShaderManager::SetTexMatrixChangedA(value);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x40:
|
case 0x40:
|
||||||
VertexShaderManager::SetTexMatrixChangedB(value);
|
if (update_global_state)
|
||||||
|
VertexShaderManager::SetTexMatrixChangedB(value);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x50:
|
case 0x50:
|
||||||
g_VtxDesc.Hex &= ~0x1FFFF; // keep the Upper bits
|
state->vtx_desc.Hex &= ~0x1FFFF; // keep the Upper bits
|
||||||
g_VtxDesc.Hex |= value;
|
state->vtx_desc.Hex |= value;
|
||||||
s_attr_dirty = 0xFF;
|
state->attr_dirty = 0xFF;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x60:
|
case 0x60:
|
||||||
g_VtxDesc.Hex &= 0x1FFFF; // keep the lower 17Bits
|
state->vtx_desc.Hex &= 0x1FFFF; // keep the lower 17Bits
|
||||||
g_VtxDesc.Hex |= (u64)value << 17;
|
state->vtx_desc.Hex |= (u64)value << 17;
|
||||||
s_attr_dirty = 0xFF;
|
state->attr_dirty = 0xFF;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x70:
|
case 0x70:
|
||||||
_assert_((sub_cmd & 0x0F) < 8);
|
_assert_((sub_cmd & 0x0F) < 8);
|
||||||
g_VtxAttr[sub_cmd & 7].g0.Hex = value;
|
state->vtx_attr[sub_cmd & 7].g0.Hex = value;
|
||||||
s_attr_dirty |= 1 << (sub_cmd & 7);
|
state->attr_dirty |= 1 << (sub_cmd & 7);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x80:
|
case 0x80:
|
||||||
_assert_((sub_cmd & 0x0F) < 8);
|
_assert_((sub_cmd & 0x0F) < 8);
|
||||||
g_VtxAttr[sub_cmd & 7].g1.Hex = value;
|
state->vtx_attr[sub_cmd & 7].g1.Hex = value;
|
||||||
s_attr_dirty |= 1 << (sub_cmd & 7);
|
state->attr_dirty |= 1 << (sub_cmd & 7);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0x90:
|
case 0x90:
|
||||||
_assert_((sub_cmd & 0x0F) < 8);
|
_assert_((sub_cmd & 0x0F) < 8);
|
||||||
g_VtxAttr[sub_cmd & 7].g2.Hex = value;
|
state->vtx_attr[sub_cmd & 7].g2.Hex = value;
|
||||||
s_attr_dirty |= 1 << (sub_cmd & 7);
|
state->attr_dirty |= 1 << (sub_cmd & 7);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// Pointers to vertex arrays in GC RAM
|
// Pointers to vertex arrays in GC RAM
|
||||||
case 0xA0:
|
case 0xA0:
|
||||||
arraybases[sub_cmd & 0xF] = value;
|
state->array_bases[sub_cmd & 0xF] = value;
|
||||||
cached_arraybases[sub_cmd & 0xF] = Memory::GetPointer(value);
|
if (update_global_state)
|
||||||
|
cached_arraybases[sub_cmd & 0xF] = Memory::GetPointer(value);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 0xB0:
|
case 0xB0:
|
||||||
arraystrides[sub_cmd & 0xF] = value & 0xFF;
|
state->array_strides[sub_cmd & 0xF] = value & 0xFF;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void FillCPMemoryArray(u32 *memory)
|
void FillCPMemoryArray(u32 *memory)
|
||||||
{
|
{
|
||||||
memory[0x30] = MatrixIndexA.Hex;
|
memory[0x30] = g_main_cp_state.matrix_index_a.Hex;
|
||||||
memory[0x40] = MatrixIndexB.Hex;
|
memory[0x40] = g_main_cp_state.matrix_index_b.Hex;
|
||||||
memory[0x50] = (u32)g_VtxDesc.Hex;
|
memory[0x50] = (u32)g_main_cp_state.vtx_desc.Hex;
|
||||||
memory[0x60] = (u32)(g_VtxDesc.Hex >> 17);
|
memory[0x60] = (u32)(g_main_cp_state.vtx_desc.Hex >> 17);
|
||||||
|
|
||||||
for (int i = 0; i < 8; ++i)
|
for (int i = 0; i < 8; ++i)
|
||||||
{
|
{
|
||||||
memory[0x70 + i] = g_VtxAttr[i].g0.Hex;
|
memory[0x70 + i] = g_main_cp_state.vtx_attr[i].g0.Hex;
|
||||||
memory[0x80 + i] = g_VtxAttr[i].g1.Hex;
|
memory[0x80 + i] = g_main_cp_state.vtx_attr[i].g1.Hex;
|
||||||
memory[0x90 + i] = g_VtxAttr[i].g2.Hex;
|
memory[0x90 + i] = g_main_cp_state.vtx_attr[i].g2.Hex;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < 16; ++i)
|
for (int i = 0; i < 16; ++i)
|
||||||
{
|
{
|
||||||
memory[0xA0 + i] = arraybases[i];
|
memory[0xA0 + i] = g_main_cp_state.array_bases[i];
|
||||||
memory[0xB0 + i] = arraystrides[i];
|
memory[0xB0 + i] = g_main_cp_state.array_strides[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -277,6 +265,6 @@ void RecomputeCachedArraybases()
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 16; i++)
|
for (int i = 0; i < 16; i++)
|
||||||
{
|
{
|
||||||
cached_arraybases[i] = Memory::GetPointer(arraybases[i]);
|
cached_arraybases[i] = Memory::GetPointer(g_main_cp_state.array_bases[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,7 +16,7 @@ namespace VertexLoaderManager
|
||||||
|
|
||||||
void MarkAllDirty();
|
void MarkAllDirty();
|
||||||
|
|
||||||
int GetVertexSize(int vtx_attr_group);
|
int GetVertexSize(int vtx_attr_group, bool preprocess);
|
||||||
// Returns false if buf_size is insufficient.
|
// Returns false if buf_size is insufficient.
|
||||||
bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size, bool skip_drawing = false);
|
bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size, bool skip_drawing = false);
|
||||||
|
|
||||||
|
|
|
@ -117,7 +117,7 @@ template <typename I>
|
||||||
void Color_ReadIndex_16b_565()
|
void Color_ReadIndex_16b_565()
|
||||||
{
|
{
|
||||||
auto const Index = DataRead<I>();
|
auto const Index = DataRead<I>();
|
||||||
u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex])));
|
u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex])));
|
||||||
_SetCol565(val);
|
_SetCol565(val);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -125,7 +125,7 @@ template <typename I>
|
||||||
void Color_ReadIndex_24b_888()
|
void Color_ReadIndex_24b_888()
|
||||||
{
|
{
|
||||||
auto const Index = DataRead<I>();
|
auto const Index = DataRead<I>();
|
||||||
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
|
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]);
|
||||||
_SetCol(_Read24(iAddress));
|
_SetCol(_Read24(iAddress));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -133,7 +133,7 @@ template <typename I>
|
||||||
void Color_ReadIndex_32b_888x()
|
void Color_ReadIndex_32b_888x()
|
||||||
{
|
{
|
||||||
auto const Index = DataRead<I>();
|
auto const Index = DataRead<I>();
|
||||||
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
|
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]);
|
||||||
_SetCol(_Read24(iAddress));
|
_SetCol(_Read24(iAddress));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -141,7 +141,7 @@ template <typename I>
|
||||||
void Color_ReadIndex_16b_4444()
|
void Color_ReadIndex_16b_4444()
|
||||||
{
|
{
|
||||||
auto const Index = DataRead<I>();
|
auto const Index = DataRead<I>();
|
||||||
u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]));
|
u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]));
|
||||||
_SetCol4444(val);
|
_SetCol4444(val);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -149,7 +149,7 @@ template <typename I>
|
||||||
void Color_ReadIndex_24b_6666()
|
void Color_ReadIndex_24b_6666()
|
||||||
{
|
{
|
||||||
auto const Index = DataRead<I>();
|
auto const Index = DataRead<I>();
|
||||||
const u8* pData = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]) - 1;
|
const u8* pData = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]) - 1;
|
||||||
u32 val = Common::swap32(pData);
|
u32 val = Common::swap32(pData);
|
||||||
_SetCol6666(val);
|
_SetCol6666(val);
|
||||||
}
|
}
|
||||||
|
@ -158,7 +158,7 @@ template <typename I>
|
||||||
void Color_ReadIndex_32b_8888()
|
void Color_ReadIndex_32b_8888()
|
||||||
{
|
{
|
||||||
auto const Index = DataRead<I>();
|
auto const Index = DataRead<I>();
|
||||||
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
|
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]);
|
||||||
_SetCol(_Read32(iAddress));
|
_SetCol(_Read32(iAddress));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -80,7 +80,7 @@ __forceinline void Normal_Index_Offset()
|
||||||
|
|
||||||
auto const index = DataRead<I>();
|
auto const index = DataRead<I>();
|
||||||
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_NORMAL]
|
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_NORMAL]
|
||||||
+ (index * arraystrides[ARRAY_NORMAL]) + sizeof(T) * 3 * Offset);
|
+ (index * g_main_cp_state.array_strides[ARRAY_NORMAL]) + sizeof(T) * 3 * Offset);
|
||||||
ReadIndirect<T, N * 3>(data);
|
ReadIndirect<T, N * 3>(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -91,7 +91,7 @@ void LOADERDECL Pos_ReadIndex()
|
||||||
static_assert(N <= 3, "N > 3 is not sane!");
|
static_assert(N <= 3, "N > 3 is not sane!");
|
||||||
|
|
||||||
auto const index = DataRead<I>();
|
auto const index = DataRead<I>();
|
||||||
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_POSITION] + (index * arraystrides[ARRAY_POSITION]));
|
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION]));
|
||||||
auto const scale = posScale;
|
auto const scale = posScale;
|
||||||
DataWriter dst;
|
DataWriter dst;
|
||||||
|
|
||||||
|
@ -109,7 +109,7 @@ template <typename I, bool three>
|
||||||
void LOADERDECL Pos_ReadIndex_Float_SSSE3()
|
void LOADERDECL Pos_ReadIndex_Float_SSSE3()
|
||||||
{
|
{
|
||||||
auto const index = DataRead<I>();
|
auto const index = DataRead<I>();
|
||||||
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (index * arraystrides[ARRAY_POSITION]));
|
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION]));
|
||||||
GC_ALIGNED128(const __m128i a = _mm_loadu_si128((__m128i*)pData));
|
GC_ALIGNED128(const __m128i a = _mm_loadu_si128((__m128i*)pData));
|
||||||
GC_ALIGNED128(__m128i b = _mm_shuffle_epi8(a, three ? kMaskSwap32_3 : kMaskSwap32_2));
|
GC_ALIGNED128(__m128i b = _mm_shuffle_epi8(a, three ? kMaskSwap32_3 : kMaskSwap32_2));
|
||||||
_mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, b);
|
_mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, b);
|
||||||
|
|
|
@ -73,7 +73,7 @@ void LOADERDECL TexCoord_ReadIndex()
|
||||||
|
|
||||||
auto const index = DataRead<I>();
|
auto const index = DataRead<I>();
|
||||||
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_TEXCOORD0 + tcIndex]
|
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_TEXCOORD0 + tcIndex]
|
||||||
+ (index * arraystrides[ARRAY_TEXCOORD0 + tcIndex]));
|
+ (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + tcIndex]));
|
||||||
auto const scale = tcScale[tcIndex];
|
auto const scale = tcScale[tcIndex];
|
||||||
DataWriter dst;
|
DataWriter dst;
|
||||||
|
|
||||||
|
@ -94,7 +94,7 @@ void LOADERDECL TexCoord_ReadIndex_Short2_SSE4()
|
||||||
|
|
||||||
// Heavy in ZWW
|
// Heavy in ZWW
|
||||||
auto const index = DataRead<I>();
|
auto const index = DataRead<I>();
|
||||||
const s32 *pData = (const s32*)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
|
const s32 *pData = (const s32*)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0+tcIndex]));
|
||||||
const __m128i a = _mm_cvtsi32_si128(*pData);
|
const __m128i a = _mm_cvtsi32_si128(*pData);
|
||||||
const __m128i b = _mm_shuffle_epi8(a, kMaskSwap16_2);
|
const __m128i b = _mm_shuffle_epi8(a, kMaskSwap16_2);
|
||||||
const __m128i c = _mm_cvtepi16_epi32(b);
|
const __m128i c = _mm_cvtepi16_epi32(b);
|
||||||
|
@ -117,7 +117,7 @@ void LOADERDECL TexCoord_ReadIndex_Float2_SSSE3()
|
||||||
static_assert(!std::numeric_limits<I>::is_signed, "Only unsigned I is sane!");
|
static_assert(!std::numeric_limits<I>::is_signed, "Only unsigned I is sane!");
|
||||||
|
|
||||||
auto const index = DataRead<I>();
|
auto const index = DataRead<I>();
|
||||||
const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
|
const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0+tcIndex]));
|
||||||
GC_ALIGNED128(const __m128i a = _mm_loadl_epi64((__m128i*)pData));
|
GC_ALIGNED128(const __m128i a = _mm_loadl_epi64((__m128i*)pData));
|
||||||
GC_ALIGNED128(const __m128i b = _mm_shuffle_epi8(a, kMaskSwap32));
|
GC_ALIGNED128(const __m128i b = _mm_shuffle_epi8(a, kMaskSwap32));
|
||||||
_mm_storel_epi64((__m128i*)VertexManager::s_pCurBufferPointer, b);
|
_mm_storel_epi64((__m128i*)VertexManager::s_pCurBufferPointer, b);
|
||||||
|
|
|
@ -245,8 +245,8 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ
|
||||||
// donko - this has caused problems in some games. removed for now.
|
// donko - this has caused problems in some games. removed for now.
|
||||||
bool texGenSpecialCase = false;
|
bool texGenSpecialCase = false;
|
||||||
/*bool texGenSpecialCase =
|
/*bool texGenSpecialCase =
|
||||||
((g_VtxDesc.Hex & 0x60600L) == g_VtxDesc.Hex) && // only pos and tex coord 0
|
((g_main_cp_state.vtx_desc.Hex & 0x60600L) == g_main_cp_state.vtx_desc.Hex) && // only pos and tex coord 0
|
||||||
(g_VtxDesc.Tex0Coord != NOT_PRESENT) &&
|
(g_main_cp_state.vtx_desc.Tex0Coord != NOT_PRESENT) &&
|
||||||
(xfmem.texcoords[0].texmtxinfo.inputform == XF_TEXINPUT_AB11);
|
(xfmem.texcoords[0].texmtxinfo.inputform == XF_TEXINPUT_AB11);
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
|
@ -329,8 +329,8 @@ void VertexShaderManager::SetConstants()
|
||||||
{
|
{
|
||||||
bPosNormalMatrixChanged = false;
|
bPosNormalMatrixChanged = false;
|
||||||
|
|
||||||
const float *pos = (const float *)xfmem.posMatrices + MatrixIndexA.PosNormalMtxIdx * 4;
|
const float *pos = (const float *)xfmem.posMatrices + g_main_cp_state.matrix_index_a.PosNormalMtxIdx * 4;
|
||||||
const float *norm = (const float *)xfmem.normalMatrices + 3 * (MatrixIndexA.PosNormalMtxIdx & 31);
|
const float *norm = (const float *)xfmem.normalMatrices + 3 * (g_main_cp_state.matrix_index_a.PosNormalMtxIdx & 31);
|
||||||
|
|
||||||
memcpy(constants.posnormalmatrix, pos, 3*16);
|
memcpy(constants.posnormalmatrix, pos, 3*16);
|
||||||
memcpy(constants.posnormalmatrix[3], norm, 12);
|
memcpy(constants.posnormalmatrix[3], norm, 12);
|
||||||
|
@ -344,10 +344,10 @@ void VertexShaderManager::SetConstants()
|
||||||
bTexMatricesChanged[0] = false;
|
bTexMatricesChanged[0] = false;
|
||||||
const float *fptrs[] =
|
const float *fptrs[] =
|
||||||
{
|
{
|
||||||
(const float *)&xfmem.posMatrices[MatrixIndexA.Tex0MtxIdx * 4],
|
(const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_a.Tex0MtxIdx * 4],
|
||||||
(const float *)&xfmem.posMatrices[MatrixIndexA.Tex1MtxIdx * 4],
|
(const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_a.Tex1MtxIdx * 4],
|
||||||
(const float *)&xfmem.posMatrices[MatrixIndexA.Tex2MtxIdx * 4],
|
(const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_a.Tex2MtxIdx * 4],
|
||||||
(const float *)&xfmem.posMatrices[MatrixIndexA.Tex3MtxIdx * 4]
|
(const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_a.Tex3MtxIdx * 4]
|
||||||
};
|
};
|
||||||
|
|
||||||
for (int i = 0; i < 4; ++i)
|
for (int i = 0; i < 4; ++i)
|
||||||
|
@ -361,10 +361,10 @@ void VertexShaderManager::SetConstants()
|
||||||
{
|
{
|
||||||
bTexMatricesChanged[1] = false;
|
bTexMatricesChanged[1] = false;
|
||||||
const float *fptrs[] = {
|
const float *fptrs[] = {
|
||||||
(const float *)&xfmem.posMatrices[MatrixIndexB.Tex4MtxIdx * 4],
|
(const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_b.Tex4MtxIdx * 4],
|
||||||
(const float *)&xfmem.posMatrices[MatrixIndexB.Tex5MtxIdx * 4],
|
(const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_b.Tex5MtxIdx * 4],
|
||||||
(const float *)&xfmem.posMatrices[MatrixIndexB.Tex6MtxIdx * 4],
|
(const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_b.Tex6MtxIdx * 4],
|
||||||
(const float *)&xfmem.posMatrices[MatrixIndexB.Tex7MtxIdx * 4]
|
(const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_b.Tex7MtxIdx * 4]
|
||||||
};
|
};
|
||||||
|
|
||||||
for (int i = 0; i < 4; ++i)
|
for (int i = 0; i < 4; ++i)
|
||||||
|
@ -536,26 +536,26 @@ void VertexShaderManager::SetConstants()
|
||||||
|
|
||||||
void VertexShaderManager::InvalidateXFRange(int start, int end)
|
void VertexShaderManager::InvalidateXFRange(int start, int end)
|
||||||
{
|
{
|
||||||
if (((u32)start >= (u32)MatrixIndexA.PosNormalMtxIdx * 4 &&
|
if (((u32)start >= (u32)g_main_cp_state.matrix_index_a.PosNormalMtxIdx * 4 &&
|
||||||
(u32)start < (u32)MatrixIndexA.PosNormalMtxIdx * 4 + 12) ||
|
(u32)start < (u32)g_main_cp_state.matrix_index_a.PosNormalMtxIdx * 4 + 12) ||
|
||||||
((u32)start >= XFMEM_NORMALMATRICES + ((u32)MatrixIndexA.PosNormalMtxIdx & 31) * 3 &&
|
((u32)start >= XFMEM_NORMALMATRICES + ((u32)g_main_cp_state.matrix_index_a.PosNormalMtxIdx & 31) * 3 &&
|
||||||
(u32)start < XFMEM_NORMALMATRICES + ((u32)MatrixIndexA.PosNormalMtxIdx & 31) * 3 + 9))
|
(u32)start < XFMEM_NORMALMATRICES + ((u32)g_main_cp_state.matrix_index_a.PosNormalMtxIdx & 31) * 3 + 9))
|
||||||
{
|
{
|
||||||
bPosNormalMatrixChanged = true;
|
bPosNormalMatrixChanged = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (((u32)start >= (u32)MatrixIndexA.Tex0MtxIdx*4 && (u32)start < (u32)MatrixIndexA.Tex0MtxIdx*4+12) ||
|
if (((u32)start >= (u32)g_main_cp_state.matrix_index_a.Tex0MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_a.Tex0MtxIdx*4+12) ||
|
||||||
((u32)start >= (u32)MatrixIndexA.Tex1MtxIdx*4 && (u32)start < (u32)MatrixIndexA.Tex1MtxIdx*4+12) ||
|
((u32)start >= (u32)g_main_cp_state.matrix_index_a.Tex1MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_a.Tex1MtxIdx*4+12) ||
|
||||||
((u32)start >= (u32)MatrixIndexA.Tex2MtxIdx*4 && (u32)start < (u32)MatrixIndexA.Tex2MtxIdx*4+12) ||
|
((u32)start >= (u32)g_main_cp_state.matrix_index_a.Tex2MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_a.Tex2MtxIdx*4+12) ||
|
||||||
((u32)start >= (u32)MatrixIndexA.Tex3MtxIdx*4 && (u32)start < (u32)MatrixIndexA.Tex3MtxIdx*4+12))
|
((u32)start >= (u32)g_main_cp_state.matrix_index_a.Tex3MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_a.Tex3MtxIdx*4+12))
|
||||||
{
|
{
|
||||||
bTexMatricesChanged[0] = true;
|
bTexMatricesChanged[0] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (((u32)start >= (u32)MatrixIndexB.Tex4MtxIdx*4 && (u32)start < (u32)MatrixIndexB.Tex4MtxIdx*4+12) ||
|
if (((u32)start >= (u32)g_main_cp_state.matrix_index_b.Tex4MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_b.Tex4MtxIdx*4+12) ||
|
||||||
((u32)start >= (u32)MatrixIndexB.Tex5MtxIdx*4 && (u32)start < (u32)MatrixIndexB.Tex5MtxIdx*4+12) ||
|
((u32)start >= (u32)g_main_cp_state.matrix_index_b.Tex5MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_b.Tex5MtxIdx*4+12) ||
|
||||||
((u32)start >= (u32)MatrixIndexB.Tex6MtxIdx*4 && (u32)start < (u32)MatrixIndexB.Tex6MtxIdx*4+12) ||
|
((u32)start >= (u32)g_main_cp_state.matrix_index_b.Tex6MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_b.Tex6MtxIdx*4+12) ||
|
||||||
((u32)start >= (u32)MatrixIndexB.Tex7MtxIdx*4 && (u32)start < (u32)MatrixIndexB.Tex7MtxIdx*4+12))
|
((u32)start >= (u32)g_main_cp_state.matrix_index_b.Tex7MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_b.Tex7MtxIdx*4+12))
|
||||||
{
|
{
|
||||||
bTexMatricesChanged[1] = true;
|
bTexMatricesChanged[1] = true;
|
||||||
}
|
}
|
||||||
|
@ -628,23 +628,23 @@ void VertexShaderManager::InvalidateXFRange(int start, int end)
|
||||||
|
|
||||||
void VertexShaderManager::SetTexMatrixChangedA(u32 Value)
|
void VertexShaderManager::SetTexMatrixChangedA(u32 Value)
|
||||||
{
|
{
|
||||||
if (MatrixIndexA.Hex != Value)
|
if (g_main_cp_state.matrix_index_a.Hex != Value)
|
||||||
{
|
{
|
||||||
VertexManager::Flush();
|
VertexManager::Flush();
|
||||||
if (MatrixIndexA.PosNormalMtxIdx != (Value&0x3f))
|
if (g_main_cp_state.matrix_index_a.PosNormalMtxIdx != (Value&0x3f))
|
||||||
bPosNormalMatrixChanged = true;
|
bPosNormalMatrixChanged = true;
|
||||||
bTexMatricesChanged[0] = true;
|
bTexMatricesChanged[0] = true;
|
||||||
MatrixIndexA.Hex = Value;
|
g_main_cp_state.matrix_index_a.Hex = Value;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void VertexShaderManager::SetTexMatrixChangedB(u32 Value)
|
void VertexShaderManager::SetTexMatrixChangedB(u32 Value)
|
||||||
{
|
{
|
||||||
if (MatrixIndexB.Hex != Value)
|
if (g_main_cp_state.matrix_index_b.Hex != Value)
|
||||||
{
|
{
|
||||||
VertexManager::Flush();
|
VertexManager::Flush();
|
||||||
bTexMatricesChanged[1] = true;
|
bTexMatricesChanged[1] = true;
|
||||||
MatrixIndexB.Hex = Value;
|
g_main_cp_state.matrix_index_b.Hex = Value;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -116,6 +116,8 @@ public:
|
||||||
virtual void DoState(PointerWrap &p) = 0;
|
virtual void DoState(PointerWrap &p) = 0;
|
||||||
|
|
||||||
virtual void CheckInvalidState() = 0;
|
virtual void CheckInvalidState() = 0;
|
||||||
|
|
||||||
|
virtual void UpdateWantDeterminism(bool want) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
extern std::vector<VideoBackend*> g_available_video_backends;
|
extern std::vector<VideoBackend*> g_available_video_backends;
|
||||||
|
@ -151,6 +153,8 @@ class VideoBackendHardware : public VideoBackend
|
||||||
void PauseAndLock(bool doLock, bool unpauseOnUnlock=true) override;
|
void PauseAndLock(bool doLock, bool unpauseOnUnlock=true) override;
|
||||||
void DoState(PointerWrap &p) override;
|
void DoState(PointerWrap &p) override;
|
||||||
|
|
||||||
|
void UpdateWantDeterminism(bool want) override;
|
||||||
|
|
||||||
bool m_invalid;
|
bool m_invalid;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
|
@ -22,13 +22,7 @@ static void DoState(PointerWrap &p)
|
||||||
p.DoMarker("BP Memory");
|
p.DoMarker("BP Memory");
|
||||||
|
|
||||||
// CP Memory
|
// CP Memory
|
||||||
p.DoArray(arraybases, 16);
|
DoCPState(p);
|
||||||
p.DoArray(arraystrides, 16);
|
|
||||||
p.Do(MatrixIndexA);
|
|
||||||
p.Do(MatrixIndexB);
|
|
||||||
p.Do(g_VtxDesc.Hex);
|
|
||||||
p.DoArray(g_VtxAttr, 8);
|
|
||||||
p.DoMarker("CP Memory");
|
|
||||||
|
|
||||||
// XF Memory
|
// XF Memory
|
||||||
p.Do(xfmem);
|
p.Do(xfmem);
|
||||||
|
@ -73,11 +67,7 @@ void VideoCommon_RunLoop(bool enable)
|
||||||
|
|
||||||
void VideoCommon_Init()
|
void VideoCommon_Init()
|
||||||
{
|
{
|
||||||
memset(arraybases, 0, sizeof(arraybases));
|
memset(&g_main_cp_state, 0, sizeof(g_main_cp_state));
|
||||||
memset(arraystrides, 0, sizeof(arraystrides));
|
memset(&g_preprocess_cp_state, 0, sizeof(g_preprocess_cp_state));
|
||||||
memset(&MatrixIndexA, 0, sizeof(MatrixIndexA));
|
|
||||||
memset(&MatrixIndexB, 0, sizeof(MatrixIndexB));
|
|
||||||
memset(&g_VtxDesc, 0, sizeof(g_VtxDesc));
|
|
||||||
memset(g_VtxAttr, 0, sizeof(g_VtxAttr));
|
|
||||||
memset(texMem, 0, TMEM_SIZE);
|
memset(texMem, 0, TMEM_SIZE);
|
||||||
}
|
}
|
||||||
|
|
|
@ -275,3 +275,4 @@ extern XFMemory xfmem;
|
||||||
|
|
||||||
void LoadXFReg(u32 transferSize, u32 address);
|
void LoadXFReg(u32 transferSize, u32 address);
|
||||||
void LoadIndexedXF(u32 val, int array);
|
void LoadIndexedXF(u32 val, int array);
|
||||||
|
void PreprocessIndexedXF(u32 val, int refarray);
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
#include "Core/HW/Memmap.h"
|
#include "Core/HW/Memmap.h"
|
||||||
#include "VideoCommon/CPMemory.h"
|
#include "VideoCommon/CPMemory.h"
|
||||||
#include "VideoCommon/DataReader.h"
|
#include "VideoCommon/DataReader.h"
|
||||||
|
#include "VideoCommon/Fifo.h"
|
||||||
#include "VideoCommon/PixelShaderManager.h"
|
#include "VideoCommon/PixelShaderManager.h"
|
||||||
#include "VideoCommon/VertexManagerBase.h"
|
#include "VideoCommon/VertexManagerBase.h"
|
||||||
#include "VideoCommon/VertexShaderManager.h"
|
#include "VideoCommon/VertexShaderManager.h"
|
||||||
|
@ -252,7 +253,15 @@ void LoadIndexedXF(u32 val, int refarray)
|
||||||
//load stuff from array to address in xf mem
|
//load stuff from array to address in xf mem
|
||||||
|
|
||||||
u32* currData = (u32*)(&xfmem) + address;
|
u32* currData = (u32*)(&xfmem) + address;
|
||||||
u32* newData = (u32*)Memory::GetPointer(arraybases[refarray] + arraystrides[refarray] * index);
|
u32* newData;
|
||||||
|
if (g_use_deterministic_gpu_thread)
|
||||||
|
{
|
||||||
|
newData = (u32*)PopFifoAuxBuffer(size * sizeof(u32));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
newData = (u32*)Memory::GetPointer(g_main_cp_state.array_bases[refarray] + g_main_cp_state.array_strides[refarray] * index);
|
||||||
|
}
|
||||||
bool changed = false;
|
bool changed = false;
|
||||||
for (int i = 0; i < size; ++i)
|
for (int i = 0; i < size; ++i)
|
||||||
{
|
{
|
||||||
|
@ -269,3 +278,14 @@ void LoadIndexedXF(u32 val, int refarray)
|
||||||
currData[i] = Common::swap32(newData[i]);
|
currData[i] = Common::swap32(newData[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void PreprocessIndexedXF(u32 val, int refarray)
|
||||||
|
{
|
||||||
|
int index = val >> 16;
|
||||||
|
int size = ((val >> 12) & 0xF) + 1;
|
||||||
|
|
||||||
|
u32* new_data = (u32*)Memory::GetPointer(g_preprocess_cp_state.array_bases[refarray] + g_preprocess_cp_state.array_strides[refarray] * index);
|
||||||
|
|
||||||
|
size_t buf_size = size * sizeof(u32);
|
||||||
|
PushFifoAuxBuffer(new_data, buf_size);
|
||||||
|
}
|
||||||
|
|
|
@ -74,7 +74,7 @@ protected:
|
||||||
|
|
||||||
void ResetPointers()
|
void ResetPointers()
|
||||||
{
|
{
|
||||||
g_pVideoData = &input_memory[0];
|
g_video_buffer_read_ptr = &input_memory[0];
|
||||||
VertexManager::s_pCurBufferPointer = &output_memory[0];
|
VertexManager::s_pCurBufferPointer = &output_memory[0];
|
||||||
m_input_pos = m_output_pos = 0;
|
m_input_pos = m_output_pos = 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue