Merge pull request #885 from comex/gpu-determinism

GPU determinism (apparently it is ready for merge)
This commit is contained in:
comex 2014-09-28 21:59:27 -04:00
commit fbabc03b3f
44 changed files with 911 additions and 419 deletions

View File

@ -55,10 +55,24 @@ struct ConfigCache
unsigned int framelimit, frameSkip; unsigned int framelimit, frameSkip;
TEXIDevices m_EXIDevice[MAX_EXI_CHANNELS]; TEXIDevices m_EXIDevice[MAX_EXI_CHANNELS];
std::string strBackend, sBackend; std::string strBackend, sBackend;
std::string m_strGPUDeterminismMode;
bool bSetFramelimit, bSetEXIDevice[MAX_EXI_CHANNELS], bSetVolume, bSetPads[MAX_SI_CHANNELS], bSetWiimoteSource[MAX_BBMOTES], bSetFrameSkip; bool bSetFramelimit, bSetEXIDevice[MAX_EXI_CHANNELS], bSetVolume, bSetPads[MAX_SI_CHANNELS], bSetWiimoteSource[MAX_BBMOTES], bSetFrameSkip;
}; };
static ConfigCache config_cache; static ConfigCache config_cache;
static GPUDeterminismMode ParseGPUDeterminismMode(const std::string& mode)
{
if (mode == "auto")
return GPU_DETERMINISM_AUTO;
if (mode == "none")
return GPU_DETERMINISM_NONE;
if (mode == "fake-completion")
return GPU_DETERMINISM_FAKE_COMPLETION;
NOTICE_LOG(BOOT, "Unknown GPU determinism mode %s", mode.c_str());
return GPU_DETERMINISM_AUTO;
}
// Boot the ISO or file // Boot the ISO or file
bool BootCore(const std::string& _rFilename) bool BootCore(const std::string& _rFilename)
{ {
@ -109,6 +123,7 @@ bool BootCore(const std::string& _rFilename)
config_cache.bMergeBlocks = StartUp.bMergeBlocks; config_cache.bMergeBlocks = StartUp.bMergeBlocks;
config_cache.bDSPHLE = StartUp.bDSPHLE; config_cache.bDSPHLE = StartUp.bDSPHLE;
config_cache.strBackend = StartUp.m_strVideoBackend; config_cache.strBackend = StartUp.m_strVideoBackend;
config_cache.m_strGPUDeterminismMode = StartUp.m_strGPUDeterminismMode;
config_cache.m_EnableJIT = SConfig::GetInstance().m_DSPEnableJIT; config_cache.m_EnableJIT = SConfig::GetInstance().m_DSPEnableJIT;
config_cache.bDSPThread = StartUp.bDSPThread; config_cache.bDSPThread = StartUp.bDSPThread;
config_cache.Volume = SConfig::GetInstance().m_Volume; config_cache.Volume = SConfig::GetInstance().m_Volume;
@ -168,6 +183,8 @@ bool BootCore(const std::string& _rFilename)
dsp_section->Get("EnableJIT", &SConfig::GetInstance().m_DSPEnableJIT, SConfig::GetInstance().m_DSPEnableJIT); dsp_section->Get("EnableJIT", &SConfig::GetInstance().m_DSPEnableJIT, SConfig::GetInstance().m_DSPEnableJIT);
dsp_section->Get("Backend", &SConfig::GetInstance().sBackend, SConfig::GetInstance().sBackend); dsp_section->Get("Backend", &SConfig::GetInstance().sBackend, SConfig::GetInstance().sBackend);
VideoBackend::ActivateBackend(StartUp.m_strVideoBackend); VideoBackend::ActivateBackend(StartUp.m_strVideoBackend);
core_section->Get("GPUDeterminismMode", &StartUp.m_strGPUDeterminismMode, StartUp.m_strGPUDeterminismMode);
StartUp.m_GPUDeterminismMode = ParseGPUDeterminismMode(StartUp.m_strGPUDeterminismMode);
for (unsigned int i = 0; i < MAX_SI_CHANNELS; ++i) for (unsigned int i = 0; i < MAX_SI_CHANNELS; ++i)
{ {
@ -277,6 +294,7 @@ void Stop()
StartUp.bDSPHLE = config_cache.bDSPHLE; StartUp.bDSPHLE = config_cache.bDSPHLE;
StartUp.bDSPThread = config_cache.bDSPThread; StartUp.bDSPThread = config_cache.bDSPThread;
StartUp.m_strVideoBackend = config_cache.strBackend; StartUp.m_strVideoBackend = config_cache.strBackend;
StartUp.m_strGPUDeterminismMode = config_cache.m_strGPUDeterminismMode;
VideoBackend::ActivateBackend(StartUp.m_strVideoBackend); VideoBackend::ActivateBackend(StartUp.m_strVideoBackend);
StartUp.bHLE_BS2 = config_cache.bHLE_BS2; StartUp.bHLE_BS2 = config_cache.bHLE_BS2;
SConfig::GetInstance().sBackend = config_cache.sBackend; SConfig::GetInstance().sBackend = config_cache.sBackend;

View File

@ -317,6 +317,7 @@ void SConfig::SaveCoreSettings(IniFile& ini)
core->Set("FrameLimit", m_Framelimit); core->Set("FrameLimit", m_Framelimit);
core->Set("FrameSkip", m_FrameSkip); core->Set("FrameSkip", m_FrameSkip);
core->Set("GFXBackend", m_LocalCoreStartupParameter.m_strVideoBackend); core->Set("GFXBackend", m_LocalCoreStartupParameter.m_strVideoBackend);
core->Set("GPUDeterminismMode", m_LocalCoreStartupParameter.m_strGPUDeterminismMode);
} }
void SConfig::SaveMovieSettings(IniFile& ini) void SConfig::SaveMovieSettings(IniFile& ini)
@ -542,6 +543,7 @@ void SConfig::LoadCoreSettings(IniFile& ini)
core->Get("FrameLimit", &m_Framelimit, 1); // auto frame limit by default core->Get("FrameLimit", &m_Framelimit, 1); // auto frame limit by default
core->Get("FrameSkip", &m_FrameSkip, 0); core->Get("FrameSkip", &m_FrameSkip, 0);
core->Get("GFXBackend", &m_LocalCoreStartupParameter.m_strVideoBackend, ""); core->Get("GFXBackend", &m_LocalCoreStartupParameter.m_strVideoBackend, "");
core->Get("GPUDeterminismMode", &m_LocalCoreStartupParameter.m_strGPUDeterminismMode, "auto");
} }
void SConfig::LoadMovieSettings(IniFile& ini) void SConfig::LoadMovieSettings(IniFile& ini)

View File

@ -48,6 +48,7 @@
#include "Core/HW/VideoInterface.h" #include "Core/HW/VideoInterface.h"
#include "Core/HW/Wiimote.h" #include "Core/HW/Wiimote.h"
#include "Core/IPC_HLE/WII_IPC_HLE_Device_usb.h" #include "Core/IPC_HLE/WII_IPC_HLE_Device_usb.h"
#include "Core/IPC_HLE/WII_Socket.h"
#include "Core/PowerPC/PowerPC.h" #include "Core/PowerPC/PowerPC.h"
#ifdef USE_GDBSTUB #ifdef USE_GDBSTUB
@ -65,6 +66,8 @@ bool g_aspect_wide;
namespace Core namespace Core
{ {
bool g_want_determinism;
// Declarations and definitions // Declarations and definitions
static Common::Timer s_timer; static Common::Timer s_timer;
static volatile u32 s_drawn_frame = 0; static volatile u32 s_drawn_frame = 0;
@ -177,6 +180,8 @@ bool Init()
s_emu_thread.join(); s_emu_thread.join();
} }
Core::UpdateWantDeterminism(/*initial*/ true);
INFO_LOG(OSREPORT, "Starting core = %s mode", INFO_LOG(OSREPORT, "Starting core = %s mode",
_CoreParameter.bWii ? "Wii" : "GameCube"); _CoreParameter.bWii ? "Wii" : "GameCube");
INFO_LOG(OSREPORT, "CPU Thread separate = %s", INFO_LOG(OSREPORT, "CPU Thread separate = %s",
@ -564,6 +569,9 @@ void RequestRefreshInfo()
bool PauseAndLock(bool doLock, bool unpauseOnUnlock) bool PauseAndLock(bool doLock, bool unpauseOnUnlock)
{ {
if (!IsRunning())
return true;
// let's support recursive locking to simplify things on the caller's side, // let's support recursive locking to simplify things on the caller's side,
// and let's do it at this outer level in case the individual systems don't support it. // and let's do it at this outer level in case the individual systems don't support it.
if (doLock ? s_pause_and_lock_depth++ : --s_pause_and_lock_depth) if (doLock ? s_pause_and_lock_depth++ : --s_pause_and_lock_depth)
@ -702,4 +710,27 @@ void SetOnStoppedCallback(StoppedCallbackFunc callback)
s_on_stopped_callback = callback; s_on_stopped_callback = callback;
} }
void UpdateWantDeterminism(bool initial)
{
// For now, this value is not itself configurable. Instead, individual
// settings that depend on it, such as GPU determinism mode. should have
// override options for testing,
bool new_want_determinism =
Movie::IsPlayingInput() ||
Movie::IsRecordingInput() ||
NetPlay::IsNetPlayRunning();
if (new_want_determinism != g_want_determinism || initial)
{
WARN_LOG(COMMON, "Want determinism <- %s", new_want_determinism ? "true" : "false");
bool was_unpaused = Core::PauseAndLock(true);
g_want_determinism = new_want_determinism;
WiiSockMan::GetInstance().UpdateWantDeterminism(new_want_determinism);
g_video_backend->UpdateWantDeterminism(new_want_determinism);
Core::PauseAndLock(false, was_unpaused);
}
}
} // Core } // Core

View File

@ -23,6 +23,8 @@ extern bool g_aspect_wide;
namespace Core namespace Core
{ {
extern bool g_want_determinism;
bool GetIsFramelimiterTempDisabled(); bool GetIsFramelimiterTempDisabled();
void SetIsFramelimiterTempDisabled(bool disable); void SetIsFramelimiterTempDisabled(bool disable);
@ -79,4 +81,7 @@ bool PauseAndLock(bool doLock, bool unpauseOnUnlock=true);
typedef void(*StoppedCallbackFunc)(void); typedef void(*StoppedCallbackFunc)(void);
void SetOnStoppedCallback(StoppedCallbackFunc callback); void SetOnStoppedCallback(StoppedCallbackFunc callback);
// Run on the GUI thread when the factors change.
void UpdateWantDeterminism(bool initial = false);
} // namespace } // namespace

View File

@ -97,6 +97,15 @@ enum Hotkey
NUM_HOTKEYS, NUM_HOTKEYS,
}; };
enum GPUDeterminismMode
{
GPU_DETERMINISM_AUTO,
GPU_DETERMINISM_NONE,
// This is currently the only mode. There will probably be at least
// one more at some point.
GPU_DETERMINISM_FAKE_COMPLETION,
};
struct SCoreStartupParameter struct SCoreStartupParameter
{ {
// Settings // Settings
@ -200,6 +209,10 @@ struct SCoreStartupParameter
EBootType m_BootType; EBootType m_BootType;
std::string m_strVideoBackend; std::string m_strVideoBackend;
std::string m_strGPUDeterminismMode;
// set based on the string version
GPUDeterminismMode m_GPUDeterminismMode;
// files // files
std::string m_strFilename; std::string m_strFilename;

View File

@ -331,7 +331,7 @@ bool Wiimote::Step()
m_rumble->controls[0]->control_ref->State(m_rumble_on); m_rumble->controls[0]->control_ref->State(m_rumble_on);
// when a movie is active, this button status update is disabled (moved), because movies only record data reports. // when a movie is active, this button status update is disabled (moved), because movies only record data reports.
if (!(Movie::IsMovieActive()) || NetPlay::IsNetPlayRunning()) if (!Core::g_want_determinism)
{ {
UpdateButtonsStatus(); UpdateButtonsStatus();
} }
@ -385,7 +385,7 @@ void Wiimote::UpdateButtonsStatus()
void Wiimote::GetCoreData(u8* const data) void Wiimote::GetCoreData(u8* const data)
{ {
// when a movie is active, the button update happens here instead of Wiimote::Step, to avoid potential desync issues. // when a movie is active, the button update happens here instead of Wiimote::Step, to avoid potential desync issues.
if (Movie::IsMovieActive() || NetPlay::IsNetPlayRunning()) if (Core::g_want_determinism)
{ {
UpdateButtonsStatus(); UpdateButtonsStatus();
} }

View File

@ -4,8 +4,7 @@
#include <algorithm> #include <algorithm>
#include "Core/Movie.h" #include "Core/Core.h"
#include "Core/NetPlayProto.h"
#include "Core/IPC_HLE/WII_IPC_HLE.h" #include "Core/IPC_HLE/WII_IPC_HLE.h"
#include "Core/IPC_HLE/WII_IPC_HLE_Device.h" #include "Core/IPC_HLE/WII_IPC_HLE_Device.h"
#include "Core/IPC_HLE/WII_Socket.h" // No Wii socket support while using NetPlay or TAS #include "Core/IPC_HLE/WII_Socket.h" // No Wii socket support while using NetPlay or TAS
@ -559,9 +558,7 @@ void WiiSockMan::AddSocket(s32 fd)
s32 WiiSockMan::NewSocket(s32 af, s32 type, s32 protocol) s32 WiiSockMan::NewSocket(s32 af, s32 type, s32 protocol)
{ {
if (NetPlay::IsNetPlayRunning() || if (Core::g_want_determinism)
Movie::IsRecordingInput() ||
Movie::IsPlayingInput())
{ {
return SO_ENOMEM; return SO_ENOMEM;
} }
@ -664,5 +661,12 @@ void WiiSockMan::Convert(sockaddr_in const & from, WiiSockAddrIn& to, s32 addrle
to.len = addrlen; to.len = addrlen;
} }
void WiiSockMan::UpdateWantDeterminism(bool want)
{
// If we switched into movie recording, kill existing sockets.
if (want)
Clean();
}
#undef ERRORCODE #undef ERRORCODE
#undef EITHER #undef EITHER

View File

@ -242,6 +242,8 @@ public:
} }
} }
void UpdateWantDeterminism(bool want);
private: private:
WiiSockMan() = default; WiiSockMan() = default;

View File

@ -437,6 +437,8 @@ bool BeginRecordingInput(int controllers)
if (s_playMode != MODE_NONE || controllers == 0) if (s_playMode != MODE_NONE || controllers == 0)
return false; return false;
bool was_unpaused = Core::PauseAndLock(true);
s_numPads = controllers; s_numPads = controllers;
g_currentFrame = g_totalFrames = 0; g_currentFrame = g_totalFrames = 0;
g_currentLagCount = s_totalLagCount = 0; g_currentLagCount = s_totalLagCount = 0;
@ -487,6 +489,10 @@ bool BeginRecordingInput(int controllers)
s_currentByte = s_totalBytes = 0; s_currentByte = s_totalBytes = 0;
Core::UpdateWantDeterminism();
Core::PauseAndLock(false, was_unpaused);
Core::DisplayMessage("Starting movie recording", 2000); Core::DisplayMessage("Starting movie recording", 2000);
return true; return true;
} }
@ -764,6 +770,8 @@ bool PlayInput(const std::string& filename)
s_playMode = MODE_PLAYING; s_playMode = MODE_PLAYING;
Core::UpdateWantDeterminism();
s_totalBytes = g_recordfd.GetSize() - 256; s_totalBytes = g_recordfd.GetSize() - 256;
EnsureTmpInputSize((size_t)s_totalBytes); EnsureTmpInputSize((size_t)s_totalBytes);
g_recordfd.ReadArray(tmpInput, (size_t)s_totalBytes); g_recordfd.ReadArray(tmpInput, (size_t)s_totalBytes);
@ -1097,6 +1105,7 @@ void EndPlayInput(bool cont)
s_rerecords = 0; s_rerecords = 0;
s_currentByte = 0; s_currentByte = 0;
s_playMode = MODE_NONE; s_playMode = MODE_NONE;
Core::UpdateWantDeterminism();
Core::DisplayMessage("Movie End.", 2000); Core::DisplayMessage("Movie End.", 2000);
s_bRecordingFromSaveState = false; s_bRecordingFromSaveState = false;
// we don't clear these things because otherwise we can't resume playback if we load a movie state later // we don't clear these things because otherwise we can't resume playback if we load a movie state later

View File

@ -13,46 +13,46 @@ void SWLoadCPReg(u32 sub_cmd, u32 value)
switch (sub_cmd & 0xF0) switch (sub_cmd & 0xF0)
{ {
case 0x30: case 0x30:
MatrixIndexA.Hex = value; g_main_cp_state.matrix_index_a.Hex = value;
break; break;
case 0x40: case 0x40:
MatrixIndexB.Hex = value; g_main_cp_state.matrix_index_b.Hex = value;
break; break;
case 0x50: case 0x50:
g_VtxDesc.Hex &= ~0x1FFFF; // keep the Upper bits g_main_cp_state.vtx_desc.Hex &= ~0x1FFFF; // keep the Upper bits
g_VtxDesc.Hex |= value; g_main_cp_state.vtx_desc.Hex |= value;
break; break;
case 0x60: case 0x60:
g_VtxDesc.Hex &= 0x1FFFF; // keep the lower 17Bits g_main_cp_state.vtx_desc.Hex &= 0x1FFFF; // keep the lower 17Bits
g_VtxDesc.Hex |= (u64)value << 17; g_main_cp_state.vtx_desc.Hex |= (u64)value << 17;
break; break;
case 0x70: case 0x70:
_assert_((sub_cmd & 0x0F) < 8); _assert_((sub_cmd & 0x0F) < 8);
g_VtxAttr[sub_cmd & 7].g0.Hex = value; g_main_cp_state.vtx_attr[sub_cmd & 7].g0.Hex = value;
break; break;
case 0x80: case 0x80:
_assert_((sub_cmd & 0x0F) < 8); _assert_((sub_cmd & 0x0F) < 8);
g_VtxAttr[sub_cmd & 7].g1.Hex = value; g_main_cp_state.vtx_attr[sub_cmd & 7].g1.Hex = value;
break; break;
case 0x90: case 0x90:
_assert_((sub_cmd & 0x0F) < 8); _assert_((sub_cmd & 0x0F) < 8);
g_VtxAttr[sub_cmd & 7].g2.Hex = value; g_main_cp_state.vtx_attr[sub_cmd & 7].g2.Hex = value;
break; break;
// Pointers to vertex arrays in GC RAM // Pointers to vertex arrays in GC RAM
case 0xA0: case 0xA0:
arraybases[sub_cmd & 0xF] = value; g_main_cp_state.array_bases[sub_cmd & 0xF] = value;
cached_arraybases[sub_cmd & 0xF] = Memory::GetPointer(value); cached_arraybases[sub_cmd & 0xF] = Memory::GetPointer(value);
break; break;
case 0xB0: case 0xB0:
arraystrides[sub_cmd & 0xF] = value & 0xFF; g_main_cp_state.array_strides[sub_cmd & 0xF] = value & 0xFF;
break; break;
} }
} }

View File

@ -57,7 +57,7 @@ static void DecodePrimitiveStream(u32 iBufferSize)
{ {
while (streamSize > 0 && iBufferSize >= vertexSize) while (streamSize > 0 && iBufferSize >= vertexSize)
{ {
g_pVideoData += vertexSize; g_video_buffer_read_ptr += vertexSize;
iBufferSize -= vertexSize; iBufferSize -= vertexSize;
streamSize--; streamSize--;
} }
@ -94,26 +94,26 @@ static void ReadXFData(u32 iBufferSize)
static void ExecuteDisplayList(u32 addr, u32 count) static void ExecuteDisplayList(u32 addr, u32 count)
{ {
u8 *videoDataSave = g_pVideoData; u8 *videoDataSave = g_video_buffer_read_ptr;
u8 *dlStart = Memory::GetPointer(addr); u8 *dlStart = Memory::GetPointer(addr);
g_pVideoData = dlStart; g_video_buffer_read_ptr = dlStart;
while (OpcodeDecoder::CommandRunnable(count)) while (OpcodeDecoder::CommandRunnable(count))
{ {
OpcodeDecoder::Run(count); OpcodeDecoder::Run(count);
// if data was read by the opcode decoder then the video data pointer changed // if data was read by the opcode decoder then the video data pointer changed
u32 readCount = (u32)(g_pVideoData - dlStart); u32 readCount = (u32)(g_video_buffer_read_ptr - dlStart);
dlStart = g_pVideoData; dlStart = g_video_buffer_read_ptr;
_assert_msg_(VIDEO, count >= readCount, "Display list underrun"); _assert_msg_(VIDEO, count >= readCount, "Display list underrun");
count -= readCount; count -= readCount;
} }
g_pVideoData = videoDataSave; g_video_buffer_read_ptr = videoDataSave;
} }
static void DecodeStandard(u32 bufferSize) static void DecodeStandard(u32 bufferSize)

View File

@ -57,7 +57,7 @@ void DoState(PointerWrap &p)
p.Do(interruptWaiting); p.Do(interruptWaiting);
// Is this right? // Is this right?
p.DoArray(g_pVideoData,writePos); p.DoArray(g_video_buffer_read_ptr,writePos);
} }
static void UpdateInterrupts_Wrapper(u64 userdata, int cyclesLate) static void UpdateInterrupts_Wrapper(u64 userdata, int cyclesLate)
@ -95,7 +95,7 @@ void Init()
interruptSet = false; interruptSet = false;
interruptWaiting = false; interruptWaiting = false;
g_pVideoData = nullptr; g_video_buffer_read_ptr = nullptr;
g_bSkipCurrentFrame = false; g_bSkipCurrentFrame = false;
} }
@ -311,7 +311,7 @@ bool RunBuffer()
_dbg_assert_(COMMANDPROCESSOR, writePos >= readPos); _dbg_assert_(COMMANDPROCESSOR, writePos >= readPos);
g_pVideoData = &commandBuffer[readPos]; g_video_buffer_read_ptr = &commandBuffer[readPos];
u32 availableBytes = writePos - readPos; u32 availableBytes = writePos - readPos;
@ -322,7 +322,7 @@ bool RunBuffer()
OpcodeDecoder::Run(availableBytes); OpcodeDecoder::Run(availableBytes);
// if data was read by the opcode decoder then the video data pointer changed // if data was read by the opcode decoder then the video data pointer changed
readPos = (u32)(g_pVideoData - &commandBuffer[0]); readPos = (u32)(g_video_buffer_read_ptr - &commandBuffer[0]);
_dbg_assert_(VIDEO, writePos >= readPos); _dbg_assert_(VIDEO, writePos >= readPos);
availableBytes = writePos - readPos; availableBytes = writePos - readPos;
} }

View File

@ -39,7 +39,7 @@ SWVertexLoader::~SWVertexLoader()
void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType) void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
{ {
m_CurrentVat = &g_VtxAttr[attributeIndex]; m_CurrentVat = &g_main_cp_state.vtx_attr[attributeIndex];
posScale = 1.0f / float(1 << m_CurrentVat->g0.PosFrac); posScale = 1.0f / float(1 << m_CurrentVat->g0.PosFrac);
tcScale[0] = 1.0f / float(1 << m_CurrentVat->g0.Tex0Frac); tcScale[0] = 1.0f / float(1 << m_CurrentVat->g0.Tex0Frac);
@ -53,20 +53,20 @@ void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
//TexMtx //TexMtx
const u64 tmDesc[8] = { const u64 tmDesc[8] = {
g_VtxDesc.Tex0MatIdx, g_VtxDesc.Tex1MatIdx, g_VtxDesc.Tex2MatIdx, g_VtxDesc.Tex3MatIdx, g_main_cp_state.vtx_desc.Tex0MatIdx, g_main_cp_state.vtx_desc.Tex1MatIdx, g_main_cp_state.vtx_desc.Tex2MatIdx, g_main_cp_state.vtx_desc.Tex3MatIdx,
g_VtxDesc.Tex4MatIdx, g_VtxDesc.Tex5MatIdx, g_VtxDesc.Tex6MatIdx, g_VtxDesc.Tex7MatIdx g_main_cp_state.vtx_desc.Tex4MatIdx, g_main_cp_state.vtx_desc.Tex5MatIdx, g_main_cp_state.vtx_desc.Tex6MatIdx, g_main_cp_state.vtx_desc.Tex7MatIdx
}; };
// Colors // Colors
const u64 colDesc[2] = {g_VtxDesc.Color0, g_VtxDesc.Color1}; const u64 colDesc[2] = {g_main_cp_state.vtx_desc.Color0, g_main_cp_state.vtx_desc.Color1};
colElements[0] = m_CurrentVat->g0.Color0Elements; colElements[0] = m_CurrentVat->g0.Color0Elements;
colElements[1] = m_CurrentVat->g0.Color1Elements; colElements[1] = m_CurrentVat->g0.Color1Elements;
const u32 colComp[2] = {m_CurrentVat->g0.Color0Comp, m_CurrentVat->g0.Color1Comp}; const u32 colComp[2] = {m_CurrentVat->g0.Color0Comp, m_CurrentVat->g0.Color1Comp};
// TextureCoord // TextureCoord
const u64 tcDesc[8] = { const u64 tcDesc[8] = {
g_VtxDesc.Tex0Coord, g_VtxDesc.Tex1Coord, g_VtxDesc.Tex2Coord, g_VtxDesc.Tex3Coord, g_main_cp_state.vtx_desc.Tex0Coord, g_main_cp_state.vtx_desc.Tex1Coord, g_main_cp_state.vtx_desc.Tex2Coord, g_main_cp_state.vtx_desc.Tex3Coord,
g_VtxDesc.Tex4Coord, g_VtxDesc.Tex5Coord, g_VtxDesc.Tex6Coord, g_VtxDesc.Tex7Coord g_main_cp_state.vtx_desc.Tex4Coord, g_main_cp_state.vtx_desc.Tex5Coord, g_main_cp_state.vtx_desc.Tex6Coord, g_main_cp_state.vtx_desc.Tex7Coord
}; };
const u32 tcElements[8] = { const u32 tcElements[8] = {
m_CurrentVat->g0.Tex0CoordElements, m_CurrentVat->g1.Tex1CoordElements, m_CurrentVat->g1.Tex2CoordElements, m_CurrentVat->g0.Tex0CoordElements, m_CurrentVat->g1.Tex1CoordElements, m_CurrentVat->g1.Tex2CoordElements,
@ -89,15 +89,15 @@ void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
// Reset vertex // Reset vertex
// matrix index from xf regs or cp memory? // matrix index from xf regs or cp memory?
if (xfmem.MatrixIndexA.PosNormalMtxIdx != MatrixIndexA.PosNormalMtxIdx || if (xfmem.MatrixIndexA.PosNormalMtxIdx != g_main_cp_state.matrix_index_a.PosNormalMtxIdx ||
xfmem.MatrixIndexA.Tex0MtxIdx != MatrixIndexA.Tex0MtxIdx || xfmem.MatrixIndexA.Tex0MtxIdx != g_main_cp_state.matrix_index_a.Tex0MtxIdx ||
xfmem.MatrixIndexA.Tex1MtxIdx != MatrixIndexA.Tex1MtxIdx || xfmem.MatrixIndexA.Tex1MtxIdx != g_main_cp_state.matrix_index_a.Tex1MtxIdx ||
xfmem.MatrixIndexA.Tex2MtxIdx != MatrixIndexA.Tex2MtxIdx || xfmem.MatrixIndexA.Tex2MtxIdx != g_main_cp_state.matrix_index_a.Tex2MtxIdx ||
xfmem.MatrixIndexA.Tex3MtxIdx != MatrixIndexA.Tex3MtxIdx || xfmem.MatrixIndexA.Tex3MtxIdx != g_main_cp_state.matrix_index_a.Tex3MtxIdx ||
xfmem.MatrixIndexB.Tex4MtxIdx != MatrixIndexB.Tex4MtxIdx || xfmem.MatrixIndexB.Tex4MtxIdx != g_main_cp_state.matrix_index_b.Tex4MtxIdx ||
xfmem.MatrixIndexB.Tex5MtxIdx != MatrixIndexB.Tex5MtxIdx || xfmem.MatrixIndexB.Tex5MtxIdx != g_main_cp_state.matrix_index_b.Tex5MtxIdx ||
xfmem.MatrixIndexB.Tex6MtxIdx != MatrixIndexB.Tex6MtxIdx || xfmem.MatrixIndexB.Tex6MtxIdx != g_main_cp_state.matrix_index_b.Tex6MtxIdx ||
xfmem.MatrixIndexB.Tex7MtxIdx != MatrixIndexB.Tex7MtxIdx) xfmem.MatrixIndexB.Tex7MtxIdx != g_main_cp_state.matrix_index_b.Tex7MtxIdx)
{ {
WARN_LOG(VIDEO, "Matrix indices don't match"); WARN_LOG(VIDEO, "Matrix indices don't match");
@ -118,18 +118,18 @@ void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
m_Vertex.texMtx[6] = xfmem.MatrixIndexB.Tex6MtxIdx; m_Vertex.texMtx[6] = xfmem.MatrixIndexB.Tex6MtxIdx;
m_Vertex.texMtx[7] = xfmem.MatrixIndexB.Tex7MtxIdx; m_Vertex.texMtx[7] = xfmem.MatrixIndexB.Tex7MtxIdx;
#else #else
m_Vertex.posMtx = MatrixIndexA.PosNormalMtxIdx; m_Vertex.posMtx = g_main_cp_state.matrix_index_a.PosNormalMtxIdx;
m_Vertex.texMtx[0] = MatrixIndexA.Tex0MtxIdx; m_Vertex.texMtx[0] = g_main_cp_state.matrix_index_a.Tex0MtxIdx;
m_Vertex.texMtx[1] = MatrixIndexA.Tex1MtxIdx; m_Vertex.texMtx[1] = g_main_cp_state.matrix_index_a.Tex1MtxIdx;
m_Vertex.texMtx[2] = MatrixIndexA.Tex2MtxIdx; m_Vertex.texMtx[2] = g_main_cp_state.matrix_index_a.Tex2MtxIdx;
m_Vertex.texMtx[3] = MatrixIndexA.Tex3MtxIdx; m_Vertex.texMtx[3] = g_main_cp_state.matrix_index_a.Tex3MtxIdx;
m_Vertex.texMtx[4] = MatrixIndexB.Tex4MtxIdx; m_Vertex.texMtx[4] = g_main_cp_state.matrix_index_b.Tex4MtxIdx;
m_Vertex.texMtx[5] = MatrixIndexB.Tex5MtxIdx; m_Vertex.texMtx[5] = g_main_cp_state.matrix_index_b.Tex5MtxIdx;
m_Vertex.texMtx[6] = MatrixIndexB.Tex6MtxIdx; m_Vertex.texMtx[6] = g_main_cp_state.matrix_index_b.Tex6MtxIdx;
m_Vertex.texMtx[7] = MatrixIndexB.Tex7MtxIdx; m_Vertex.texMtx[7] = g_main_cp_state.matrix_index_b.Tex7MtxIdx;
#endif #endif
if (g_VtxDesc.PosMatIdx != NOT_PRESENT) if (g_main_cp_state.vtx_desc.PosMatIdx != NOT_PRESENT)
{ {
AddAttributeLoader(LoadPosMtx); AddAttributeLoader(LoadPosMtx);
m_VertexSize++; m_VertexSize++;
@ -145,17 +145,17 @@ void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
} }
// Write vertex position loader // Write vertex position loader
m_positionLoader = VertexLoader_Position::GetFunction(g_VtxDesc.Position, m_CurrentVat->g0.PosFormat, m_CurrentVat->g0.PosElements); m_positionLoader = VertexLoader_Position::GetFunction(g_main_cp_state.vtx_desc.Position, m_CurrentVat->g0.PosFormat, m_CurrentVat->g0.PosElements);
m_VertexSize += VertexLoader_Position::GetSize(g_VtxDesc.Position, m_CurrentVat->g0.PosFormat, m_CurrentVat->g0.PosElements); m_VertexSize += VertexLoader_Position::GetSize(g_main_cp_state.vtx_desc.Position, m_CurrentVat->g0.PosFormat, m_CurrentVat->g0.PosElements);
AddAttributeLoader(LoadPosition); AddAttributeLoader(LoadPosition);
// Normals // Normals
if (g_VtxDesc.Normal != NOT_PRESENT) if (g_main_cp_state.vtx_desc.Normal != NOT_PRESENT)
{ {
m_VertexSize += VertexLoader_Normal::GetSize(g_VtxDesc.Normal, m_VertexSize += VertexLoader_Normal::GetSize(g_main_cp_state.vtx_desc.Normal,
m_CurrentVat->g0.NormalFormat, m_CurrentVat->g0.NormalElements, m_CurrentVat->g0.NormalIndex3); m_CurrentVat->g0.NormalFormat, m_CurrentVat->g0.NormalElements, m_CurrentVat->g0.NormalIndex3);
m_normalLoader = VertexLoader_Normal::GetFunction(g_VtxDesc.Normal, m_normalLoader = VertexLoader_Normal::GetFunction(g_main_cp_state.vtx_desc.Normal,
m_CurrentVat->g0.NormalFormat, m_CurrentVat->g0.NormalElements, m_CurrentVat->g0.NormalIndex3); m_CurrentVat->g0.NormalFormat, m_CurrentVat->g0.NormalElements, m_CurrentVat->g0.NormalIndex3);
if (m_normalLoader == nullptr) if (m_normalLoader == nullptr)
@ -234,8 +234,8 @@ void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
// special case if only pos and tex coord 0 and tex coord input is AB11 // special case if only pos and tex coord 0 and tex coord input is AB11
m_TexGenSpecialCase = m_TexGenSpecialCase =
((g_VtxDesc.Hex & 0x60600L) == g_VtxDesc.Hex) && // only pos and tex coord 0 ((g_main_cp_state.vtx_desc.Hex & 0x60600L) == g_main_cp_state.vtx_desc.Hex) && // only pos and tex coord 0
(g_VtxDesc.Tex0Coord != NOT_PRESENT) && (g_main_cp_state.vtx_desc.Tex0Coord != NOT_PRESENT) &&
(xfmem.texMtxInfo[0].projection == XF_TEXPROJ_ST); (xfmem.texMtxInfo[0].projection == XF_TEXPROJ_ST);
m_SetupUnit->Init(primitiveType); m_SetupUnit->Init(primitiveType);
@ -252,7 +252,7 @@ void SWVertexLoader::LoadVertex()
// transform input data // transform input data
TransformUnit::TransformPosition(&m_Vertex, outVertex); TransformUnit::TransformPosition(&m_Vertex, outVertex);
if (g_VtxDesc.Normal != NOT_PRESENT) if (g_main_cp_state.vtx_desc.Normal != NOT_PRESENT)
{ {
TransformUnit::TransformNormal(&m_Vertex, m_CurrentVat->g0.NormalElements, outVertex); TransformUnit::TransformNormal(&m_Vertex, m_CurrentVat->g0.NormalElements, outVertex);
} }

View File

@ -116,14 +116,7 @@ void VideoSoftware::DoState(PointerWrap& p)
p.DoPOD(swstats); p.DoPOD(swstats);
// CP Memory // CP Memory
p.DoArray(arraybases, 16); DoCPState(p);
p.DoArray(arraystrides, 16);
p.Do(MatrixIndexA);
p.Do(MatrixIndexB);
p.Do(g_VtxDesc.Hex);
p.DoArray(g_VtxAttr, 8);
p.DoMarker("CP Memory");
} }
void VideoSoftware::CheckInvalidState() void VideoSoftware::CheckInvalidState()

View File

@ -74,7 +74,7 @@ void SWLoadIndexedXF(u32 val, int array)
int size = ((val >> 12) & 0xF) + 1; int size = ((val >> 12) & 0xF) + 1;
//load stuff from array to address in xf mem //load stuff from array to address in xf mem
u32 *pData = (u32*)Memory::GetPointer(arraybases[array] + arraystrides[array]*index); u32 *pData = (u32*)Memory::GetPointer(g_main_cp_state.array_bases[array] + g_main_cp_state.array_strides[array]*index);
// byteswap data // byteswap data
u32 buffer[16]; u32 buffer[16];

View File

@ -1085,5 +1085,6 @@ struct BPMemory
extern BPMemory bpmem; extern BPMemory bpmem;
void LoadBPReg(u32 value0); void LoadBPReg(u32 value0);
void LoadBPRegPreprocess(u32 value0);
void GetBPRegInfo(const u8* data, std::string* name, std::string* desc); void GetBPRegInfo(const u8* data, std::string* name, std::string* desc);

View File

@ -173,7 +173,8 @@ static void BPWritten(const BPCmd& bp)
switch (bp.newvalue & 0xFF) switch (bp.newvalue & 0xFF)
{ {
case 0x02: case 0x02:
PixelEngine::SetFinish(); // may generate interrupt if (!g_use_deterministic_gpu_thread)
PixelEngine::SetFinish(); // may generate interrupt
DEBUG_LOG(VIDEO, "GXSetDrawDone SetPEFinish (value: 0x%02X)", (bp.newvalue & 0xFFFF)); DEBUG_LOG(VIDEO, "GXSetDrawDone SetPEFinish (value: 0x%02X)", (bp.newvalue & 0xFFFF));
return; return;
@ -183,11 +184,13 @@ static void BPWritten(const BPCmd& bp)
} }
return; return;
case BPMEM_PE_TOKEN_ID: // Pixel Engine Token ID case BPMEM_PE_TOKEN_ID: // Pixel Engine Token ID
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), false); if (!g_use_deterministic_gpu_thread)
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), false);
DEBUG_LOG(VIDEO, "SetPEToken 0x%04x", (bp.newvalue & 0xFFFF)); DEBUG_LOG(VIDEO, "SetPEToken 0x%04x", (bp.newvalue & 0xFFFF));
return; return;
case BPMEM_PE_TOKEN_INT_ID: // Pixel Engine Interrupt Token ID case BPMEM_PE_TOKEN_INT_ID: // Pixel Engine Interrupt Token ID
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), true); if (!g_use_deterministic_gpu_thread)
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), true);
DEBUG_LOG(VIDEO, "SetPEToken + INT 0x%04x", (bp.newvalue & 0xFFFF)); DEBUG_LOG(VIDEO, "SetPEToken + INT 0x%04x", (bp.newvalue & 0xFFFF));
return; return;
@ -685,6 +688,26 @@ void LoadBPReg(u32 value0)
BPWritten(bp); BPWritten(bp);
} }
void LoadBPRegPreprocess(u32 value0)
{
int regNum = value0 >> 24;
// masking could hypothetically be a problem
u32 newval = value0 & 0xffffff;
switch (regNum)
{
case BPMEM_SETDRAWDONE:
if ((newval & 0xff) == 0x02)
PixelEngine::SetFinish();
break;
case BPMEM_PE_TOKEN_ID:
PixelEngine::SetToken(newval & 0xffff, false);
break;
case BPMEM_PE_TOKEN_INT_ID: // Pixel Engine Interrupt Token ID
PixelEngine::SetToken(newval & 0xffff, true);
break;
}
}
void GetBPRegInfo(const u8* data, std::string* name, std::string* desc) void GetBPRegInfo(const u8* data, std::string* name, std::string* desc)
{ {
const char* no_yes[2] = { "No", "Yes" }; const char* no_yes[2] = { "No", "Yes" };

View File

@ -7,5 +7,4 @@
#include "VideoCommon/BPMemory.h" #include "VideoCommon/BPMemory.h"
void BPInit(); void BPInit();
void LoadBPReg(u32 value0);
void BPReload(); void BPReload();

View File

@ -2,17 +2,32 @@
// Licensed under GPLv2 // Licensed under GPLv2
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include "Common/ChunkFile.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "VideoCommon/CPMemory.h" #include "VideoCommon/CPMemory.h"
// CP state // CP state
u8 *cached_arraybases[16]; u8 *cached_arraybases[16];
// STATE_TO_SAVE CPState g_main_cp_state;
u32 arraybases[16]; CPState g_preprocess_cp_state;
u32 arraystrides[16];
TMatrixIndexA MatrixIndexA; void DoCPState(PointerWrap& p)
TMatrixIndexB MatrixIndexB; {
TVtxDesc g_VtxDesc; // We don't save g_preprocess_cp_state separately because the GPU should be
// Most games only use the first VtxAttr and simply reconfigure it all the time as needed. // synced around state save/load.
VAT g_VtxAttr[8]; p.DoArray(g_main_cp_state.array_bases, 16);
p.DoArray(g_main_cp_state.array_strides, 16);
p.Do(g_main_cp_state.matrix_index_a);
p.Do(g_main_cp_state.matrix_index_b);
p.Do(g_main_cp_state.vtx_desc.Hex);
p.DoArray(g_main_cp_state.vtx_attr, 8);
p.DoMarker("CP Memory");
if (p.mode == PointerWrap::MODE_READ)
CopyPreprocessCPStateFromMain();
}
void CopyPreprocessCPStateFromMain()
{
memcpy(&g_preprocess_cp_state, &g_main_cp_state, sizeof(CPState));
}

View File

@ -231,12 +231,6 @@ union TMatrixIndexB
#pragma pack() #pragma pack()
extern u32 arraybases[16];
extern u8 *cached_arraybases[16];
extern u32 arraystrides[16];
extern TMatrixIndexA MatrixIndexA;
extern TMatrixIndexB MatrixIndexB;
struct VAT struct VAT
{ {
UVAT_group0 g0; UVAT_group0 g0;
@ -244,11 +238,37 @@ struct VAT
UVAT_group2 g2; UVAT_group2 g2;
}; };
extern TVtxDesc g_VtxDesc; class VertexLoader;
extern VAT g_VtxAttr[8];
// STATE_TO_SAVE
struct CPState final
{
u32 array_bases[16];
u32 array_strides[16];
TMatrixIndexA matrix_index_a;
TMatrixIndexB matrix_index_b;
TVtxDesc vtx_desc;
// Most games only use the first VtxAttr and simply reconfigure it all the time as needed.
VAT vtx_attr[8];
// Attributes that actually belong to VertexLoaderManager:
int attr_dirty; // bitfield
VertexLoader* vertex_loaders[8];
};
class PointerWrap;
extern void DoCPState(PointerWrap& p);
extern void CopyPreprocessCPStateFromMain();
extern CPState g_main_cp_state;
extern CPState g_preprocess_cp_state;
extern u8 *cached_arraybases[16];
// Might move this into its own file later. // Might move this into its own file later.
void LoadCPReg(u32 SubCmd, u32 Value); void LoadCPReg(u32 SubCmd, u32 Value, bool is_preprocess = false);
// Fills memory with data from CP regs // Fills memory with data from CP regs
void FillCPMemoryArray(u32 *memory); void FillCPMemoryArray(u32 *memory);

View File

@ -77,7 +77,7 @@ void DoState(PointerWrap &p)
p.Do(interruptFinishWaiting); p.Do(interruptFinishWaiting);
} }
UNUSED static inline void WriteLow(volatile u32& _reg, u16 lowbits) static inline void WriteLow(volatile u32& _reg, u16 lowbits)
{ {
Common::AtomicStore(_reg, (_reg & 0xFFFF0000) | lowbits); Common::AtomicStore(_reg, (_reg & 0xFFFF0000) | lowbits);
} }
@ -159,9 +159,8 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
{ FIFO_WRITE_POINTER_LO, MMIO::Utils::LowPart(&fifo.CPWritePointer), false, true }, { FIFO_WRITE_POINTER_LO, MMIO::Utils::LowPart(&fifo.CPWritePointer), false, true },
{ FIFO_WRITE_POINTER_HI, MMIO::Utils::HighPart(&fifo.CPWritePointer) }, { FIFO_WRITE_POINTER_HI, MMIO::Utils::HighPart(&fifo.CPWritePointer) },
// FIFO_READ_POINTER has different code for single/dual core. // FIFO_READ_POINTER has different code for single/dual core.
{ FIFO_BP_LO, MMIO::Utils::LowPart(&fifo.CPBreakpoint), false, true },
{ FIFO_BP_HI, MMIO::Utils::HighPart(&fifo.CPBreakpoint) },
}; };
for (auto& mapped_var : directly_mapped_vars) for (auto& mapped_var : directly_mapped_vars)
{ {
u16 wmask = mapped_var.writes_align_to_32_bytes ? 0xFFE0 : 0xFFFF; u16 wmask = mapped_var.writes_align_to_32_bytes ? 0xFFE0 : 0xFFFF;
@ -173,6 +172,19 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
); );
} }
mmio->Register(base | FIFO_BP_LO,
MMIO::DirectRead<u16>(MMIO::Utils::LowPart(&fifo.CPBreakpoint)),
MMIO::ComplexWrite<u16>([](u32, u16 val) {
WriteLow(fifo.CPBreakpoint, val & 0xffe0);
})
);
mmio->Register(base | FIFO_BP_HI,
MMIO::DirectRead<u16>(MMIO::Utils::HighPart(&fifo.CPBreakpoint)),
MMIO::ComplexWrite<u16>([](u32, u16 val) {
WriteHigh(fifo.CPBreakpoint, val);
})
);
// Timing and metrics MMIOs are stubbed with fixed values. // Timing and metrics MMIOs are stubbed with fixed values.
struct { struct {
u32 addr; u32 addr;
@ -216,8 +228,7 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
UCPCtrlReg tmp(val); UCPCtrlReg tmp(val);
m_CPCtrlReg.Hex = tmp.Hex; m_CPCtrlReg.Hex = tmp.Hex;
SetCpControlRegister(); SetCpControlRegister();
if (!IsOnThread()) RunGpu();
RunGpu();
}) })
); );
@ -227,8 +238,7 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
UCPClearReg tmp(val); UCPClearReg tmp(val);
m_CPClearReg.Hex = tmp.Hex; m_CPClearReg.Hex = tmp.Hex;
SetCpClearRegister(); SetCpClearRegister();
if (!IsOnThread()) RunGpu();
RunGpu();
}) })
); );
@ -260,6 +270,7 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
: MMIO::DirectRead<u16>(MMIO::Utils::HighPart(&fifo.CPReadWriteDistance)), : MMIO::DirectRead<u16>(MMIO::Utils::HighPart(&fifo.CPReadWriteDistance)),
MMIO::ComplexWrite<u16>([](u32, u16 val) { MMIO::ComplexWrite<u16>([](u32, u16 val) {
WriteHigh(fifo.CPReadWriteDistance, val); WriteHigh(fifo.CPReadWriteDistance, val);
SyncGPU(SYNC_GPU_OTHER);
if (fifo.CPReadWriteDistance == 0) if (fifo.CPReadWriteDistance == 0)
{ {
GPFifo::ResetGatherPipe(); GPFifo::ResetGatherPipe();
@ -269,8 +280,7 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
{ {
ResetVideoBuffer(); ResetVideoBuffer();
} }
if (!IsOnThread()) RunGpu();
RunGpu();
}) })
); );
mmio->Register(base | FIFO_READ_POINTER_LO, mmio->Register(base | FIFO_READ_POINTER_LO,
@ -298,11 +308,7 @@ void STACKALIGN GatherPipeBursted()
// if we aren't linked, we don't care about gather pipe data // if we aren't linked, we don't care about gather pipe data
if (!m_CPCtrlReg.GPLinkEnable) if (!m_CPCtrlReg.GPLinkEnable)
{ {
if (!IsOnThread()) if (IsOnThread() && !g_use_deterministic_gpu_thread)
{
RunGpu();
}
else
{ {
// In multibuffer mode is not allowed write in the same FIFO attached to the GPU. // In multibuffer mode is not allowed write in the same FIFO attached to the GPU.
// Fix Pokemon XD in DC mode. // Fix Pokemon XD in DC mode.
@ -313,6 +319,10 @@ void STACKALIGN GatherPipeBursted()
ProcessFifoAllDistance(); ProcessFifoAllDistance();
} }
} }
else
{
RunGpu();
}
return; return;
} }
@ -327,8 +337,7 @@ void STACKALIGN GatherPipeBursted()
Common::AtomicAdd(fifo.CPReadWriteDistance, GATHER_PIPE_SIZE); Common::AtomicAdd(fifo.CPReadWriteDistance, GATHER_PIPE_SIZE);
if (!IsOnThread()) RunGpu();
RunGpu();
_assert_msg_(COMMANDPROCESSOR, fifo.CPReadWriteDistance <= fifo.CPEnd - fifo.CPBase, _assert_msg_(COMMANDPROCESSOR, fifo.CPReadWriteDistance <= fifo.CPEnd - fifo.CPBase,
"FIFO is overflowed by GatherPipe !\nCPU thread is too fast!"); "FIFO is overflowed by GatherPipe !\nCPU thread is too fast!");
@ -358,7 +367,8 @@ void UpdateInterrupts(u64 userdata)
void UpdateInterruptsFromVideoBackend(u64 userdata) void UpdateInterruptsFromVideoBackend(u64 userdata)
{ {
CoreTiming::ScheduleEvent_Threadsafe(0, et_UpdateInterrupts, userdata); if (!g_use_deterministic_gpu_thread)
CoreTiming::ScheduleEvent_Threadsafe(0, et_UpdateInterrupts, userdata);
} }
void SetCPStatusFromGPU() void SetCPStatusFromGPU()

View File

@ -16,6 +16,7 @@ namespace CommandProcessor
{ {
extern SCPFifoStruct fifo; //This one is shared between gfx thread and emulator thread. extern SCPFifoStruct fifo; //This one is shared between gfx thread and emulator thread.
extern volatile bool isPossibleWaitingSetDrawDone; //This one is used for sync gfx thread and emulator thread. extern volatile bool isPossibleWaitingSetDrawDone; //This one is used for sync gfx thread and emulator thread.
extern volatile bool interruptSet; extern volatile bool interruptSet;
extern volatile bool interruptWaiting; extern volatile bool interruptWaiting;

View File

@ -6,7 +6,7 @@
#include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VertexManagerBase.h"
extern u8* g_pVideoData; extern u8* g_video_buffer_read_ptr;
#if _M_SSE >= 0x301 && !(defined __GNUC__ && !defined __SSSE3__) #if _M_SSE >= 0x301 && !(defined __GNUC__ && !defined __SSSE3__)
#include <tmmintrin.h> #include <tmmintrin.h>
@ -14,20 +14,20 @@ extern u8* g_pVideoData;
__forceinline void DataSkip(u32 skip) __forceinline void DataSkip(u32 skip)
{ {
g_pVideoData += skip; g_video_buffer_read_ptr += skip;
} }
// probably unnecessary // probably unnecessary
template <int count> template <int count>
__forceinline void DataSkip() __forceinline void DataSkip()
{ {
g_pVideoData += count; g_video_buffer_read_ptr += count;
} }
template <typename T> template <typename T>
__forceinline T DataPeek(int _uOffset) __forceinline T DataPeek(int _uOffset, u8** bufp = &g_video_buffer_read_ptr)
{ {
auto const result = Common::FromBigEndian(*reinterpret_cast<T*>(g_pVideoData + _uOffset)); auto const result = Common::FromBigEndian(*reinterpret_cast<T*>(*bufp + _uOffset));
return result; return result;
} }
@ -48,18 +48,18 @@ __forceinline u32 DataPeek32(int _uOffset)
} }
template <typename T> template <typename T>
__forceinline T DataRead() __forceinline T DataRead(u8** bufp = &g_video_buffer_read_ptr)
{ {
auto const result = DataPeek<T>(0); auto const result = DataPeek<T>(0, bufp);
DataSkip<sizeof(T)>(); *bufp += sizeof(T);
return result; return result;
} }
class DataReader class DataReader
{ {
public: public:
inline DataReader() : buffer(g_pVideoData), offset(0) {} inline DataReader() : buffer(g_video_buffer_read_ptr), offset(0) {}
inline ~DataReader() { g_pVideoData += offset; } inline ~DataReader() { g_video_buffer_read_ptr += offset; }
template <typename T> inline T Read() template <typename T> inline T Read()
{ {
const T result = Common::FromBigEndian(*(T*)(buffer + offset)); const T result = Common::FromBigEndian(*(T*)(buffer + offset));
@ -94,14 +94,14 @@ __forceinline u32 DataReadU32()
__forceinline u32 DataReadU32Unswapped() __forceinline u32 DataReadU32Unswapped()
{ {
u32 tmp = *(u32*)g_pVideoData; u32 tmp = *(u32*)g_video_buffer_read_ptr;
g_pVideoData += 4; g_video_buffer_read_ptr += 4;
return tmp; return tmp;
} }
__forceinline u8* DataGetPosition() __forceinline u8* DataGetPosition()
{ {
return g_pVideoData; return g_video_buffer_read_ptr;
} }
template <typename T> template <typename T>

View File

@ -11,32 +11,63 @@
#include "Core/ConfigManager.h" #include "Core/ConfigManager.h"
#include "Core/Core.h" #include "Core/Core.h"
#include "Core/CoreTiming.h" #include "Core/CoreTiming.h"
#include "Core/NetPlayProto.h"
#include "Core/HW/Memmap.h" #include "Core/HW/Memmap.h"
#include "VideoCommon/CommandProcessor.h" #include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/DataReader.h" #include "VideoCommon/DataReader.h"
#include "VideoCommon/Fifo.h" #include "VideoCommon/Fifo.h"
#include "VideoCommon/OpcodeDecoding.h" #include "VideoCommon/OpcodeDecoding.h"
#include "VideoCommon/PixelEngine.h" #include "VideoCommon/PixelEngine.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VideoConfig.h" #include "VideoCommon/VideoConfig.h"
bool g_bSkipCurrentFrame = false; bool g_bSkipCurrentFrame = false;
namespace
{
static volatile bool GpuRunningState = false; static volatile bool GpuRunningState = false;
static volatile bool EmuRunningState = false; static volatile bool EmuRunningState = false;
static std::mutex m_csHWVidOccupied; static std::mutex m_csHWVidOccupied;
// Most of this array is unlikely to be faulted in...
static u8 s_fifo_aux_data[FIFO_SIZE];
static u8* s_fifo_aux_write_ptr;
static u8* s_fifo_aux_read_ptr;
bool g_use_deterministic_gpu_thread;
// STATE_TO_SAVE // STATE_TO_SAVE
static u8 *videoBuffer; static std::mutex s_video_buffer_lock;
static int size = 0; static std::condition_variable s_video_buffer_cond;
} // namespace static u8* s_video_buffer;
u8* g_video_buffer_read_ptr;
static std::atomic<u8*> s_video_buffer_write_ptr;
static std::atomic<u8*> s_video_buffer_seen_ptr;
u8* g_video_buffer_pp_read_ptr;
// The read_ptr is always owned by the GPU thread. In normal mode, so is the
// write_ptr, despite it being atomic. In g_use_deterministic_gpu_thread mode,
// things get a bit more complicated:
// - The seen_ptr is written by the GPU thread, and points to what it's already
// processed as much of as possible - in the case of a partial command which
// caused it to stop, not the same as the read ptr. It's written by the GPU,
// under the lock, and updating the cond.
// - The write_ptr is written by the CPU thread after it copies data from the
// FIFO. Maybe someday it will be under the lock. For now, because RunGpuLoop
// polls, it's just atomic.
// - The pp_read_ptr is the CPU preprocessing version of the read_ptr.
void Fifo_DoState(PointerWrap &p) void Fifo_DoState(PointerWrap &p)
{ {
p.DoArray(videoBuffer, FIFO_SIZE); p.DoArray(s_video_buffer, FIFO_SIZE);
p.Do(size); u8* write_ptr = s_video_buffer_write_ptr;
p.DoPointer(g_pVideoData, videoBuffer); p.DoPointer(write_ptr, s_video_buffer);
s_video_buffer_write_ptr = write_ptr;
p.DoPointer(g_video_buffer_read_ptr, s_video_buffer);
if (p.mode == PointerWrap::MODE_READ && g_use_deterministic_gpu_thread)
{
// We're good and paused, right?
s_video_buffer_seen_ptr = g_video_buffer_pp_read_ptr = g_video_buffer_read_ptr;
}
p.Do(g_bSkipCurrentFrame); p.Do(g_bSkipCurrentFrame);
} }
@ -44,6 +75,7 @@ void Fifo_PauseAndLock(bool doLock, bool unpauseOnUnlock)
{ {
if (doLock) if (doLock)
{ {
SyncGPU(SYNC_GPU_OTHER);
EmulatorState(false); EmulatorState(false);
if (!Core::IsGPUThread()) if (!Core::IsGPUThread())
m_csHWVidOccupied.lock(); m_csHWVidOccupied.lock();
@ -61,8 +93,8 @@ void Fifo_PauseAndLock(bool doLock, bool unpauseOnUnlock)
void Fifo_Init() void Fifo_Init()
{ {
videoBuffer = (u8*)AllocateMemoryPages(FIFO_SIZE); s_video_buffer = (u8*)AllocateMemoryPages(FIFO_SIZE);
size = 0; ResetVideoBuffer();
GpuRunningState = false; GpuRunningState = false;
Common::AtomicStore(CommandProcessor::VITicks, CommandProcessor::m_cpClockOrigin); Common::AtomicStore(CommandProcessor::VITicks, CommandProcessor::m_cpClockOrigin);
} }
@ -70,18 +102,24 @@ void Fifo_Init()
void Fifo_Shutdown() void Fifo_Shutdown()
{ {
if (GpuRunningState) PanicAlert("Fifo shutting down while active"); if (GpuRunningState) PanicAlert("Fifo shutting down while active");
FreeMemoryPages(videoBuffer, FIFO_SIZE); FreeMemoryPages(s_video_buffer, FIFO_SIZE);
videoBuffer = nullptr; s_video_buffer = nullptr;
s_video_buffer_write_ptr = nullptr;
g_video_buffer_pp_read_ptr = nullptr;
g_video_buffer_read_ptr = nullptr;
s_video_buffer_seen_ptr = nullptr;
s_fifo_aux_write_ptr = nullptr;
s_fifo_aux_read_ptr = nullptr;
} }
u8* GetVideoBufferStartPtr() u8* GetVideoBufferStartPtr()
{ {
return videoBuffer; return s_video_buffer;
} }
u8* GetVideoBufferEndPtr() u8* GetVideoBufferEndPtr()
{ {
return &videoBuffer[size]; return s_video_buffer_write_ptr;
} }
void Fifo_SetRendering(bool enabled) void Fifo_SetRendering(bool enabled)
@ -107,30 +145,123 @@ void EmulatorState(bool running)
EmuRunningState = running; EmuRunningState = running;
} }
void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr)
{
if (g_use_deterministic_gpu_thread && GpuRunningState)
{
std::unique_lock<std::mutex> lk(s_video_buffer_lock);
u8* write_ptr = s_video_buffer_write_ptr;
s_video_buffer_cond.wait(lk, [&]() {
return !GpuRunningState || s_video_buffer_seen_ptr == write_ptr;
});
if (!GpuRunningState)
return;
// Opportunistically reset FIFOs so we don't wrap around.
if (may_move_read_ptr && s_fifo_aux_write_ptr != s_fifo_aux_read_ptr)
PanicAlert("aux fifo not synced (%p, %p)", s_fifo_aux_write_ptr, s_fifo_aux_read_ptr);
memmove(s_fifo_aux_data, s_fifo_aux_read_ptr, s_fifo_aux_write_ptr - s_fifo_aux_read_ptr);
s_fifo_aux_write_ptr -= (s_fifo_aux_read_ptr - s_fifo_aux_data);
s_fifo_aux_read_ptr = s_fifo_aux_data;
if (may_move_read_ptr)
{
// what's left over in the buffer
size_t size = write_ptr - g_video_buffer_pp_read_ptr;
memmove(s_video_buffer, g_video_buffer_pp_read_ptr, size);
// This change always decreases the pointers. We write seen_ptr
// after write_ptr here, and read it before in RunGpuLoop, so
// 'write_ptr > seen_ptr' there cannot become spuriously true.
s_video_buffer_write_ptr = write_ptr = s_video_buffer + size;
g_video_buffer_pp_read_ptr = s_video_buffer;
g_video_buffer_read_ptr = s_video_buffer;
s_video_buffer_seen_ptr = write_ptr;
}
}
}
void PushFifoAuxBuffer(void* ptr, size_t size)
{
if (size > (size_t) (s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr))
{
SyncGPU(SYNC_GPU_AUX_SPACE, /* may_move_read_ptr */ false);
if (size > (size_t) (s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr))
{
// That will sync us up to the last 32 bytes, so this short region
// of FIFO would have to point to a 2MB display list or something.
PanicAlert("absurdly large aux buffer");
return;
}
}
memcpy(s_fifo_aux_write_ptr, ptr, size);
s_fifo_aux_write_ptr += size;
}
void* PopFifoAuxBuffer(size_t size)
{
void* ret = s_fifo_aux_read_ptr;
s_fifo_aux_read_ptr += size;
return ret;
}
// Description: RunGpuLoop() sends data through this function. // Description: RunGpuLoop() sends data through this function.
void ReadDataFromFifo(u8* _uData, u32 len) static void ReadDataFromFifo(u8* _uData, u32 len)
{ {
if (size + len >= FIFO_SIZE) if (len > (s_video_buffer + FIFO_SIZE - s_video_buffer_write_ptr))
{ {
int pos = (int)(g_pVideoData - videoBuffer); size_t size = s_video_buffer_write_ptr - g_video_buffer_read_ptr;
size -= pos; if (len > FIFO_SIZE - size)
if (size + len > FIFO_SIZE)
{ {
PanicAlert("FIFO out of bounds (size = %i, len = %i at %08x)", size, len, pos); PanicAlert("FIFO out of bounds (existing %lu + new %lu > %lu)", (unsigned long) size, (unsigned long) len, (unsigned long) FIFO_SIZE);
return;
} }
memmove(&videoBuffer[0], &videoBuffer[pos], size); memmove(s_video_buffer, g_video_buffer_read_ptr, size);
g_pVideoData = videoBuffer; s_video_buffer_write_ptr = s_video_buffer + size;
g_video_buffer_read_ptr = s_video_buffer;
} }
// Copy new video instructions to videoBuffer for future use in rendering the new picture // Copy new video instructions to s_video_buffer for future use in rendering the new picture
memcpy(videoBuffer + size, _uData, len); memcpy(s_video_buffer_write_ptr, _uData, len);
size += len; s_video_buffer_write_ptr += len;
}
// The deterministic_gpu_thread version.
static void ReadDataFromFifoOnCPU(u8* _uData, u32 len)
{
u8 *write_ptr = s_video_buffer_write_ptr;
if (len > (s_video_buffer + FIFO_SIZE - write_ptr))
{
// We can't wrap around while the GPU is working on the data.
// This should be very rare due to the reset in SyncGPU.
SyncGPU(SYNC_GPU_WRAPAROUND);
if (g_video_buffer_pp_read_ptr != g_video_buffer_read_ptr)
{
PanicAlert("desynced read pointers");
return;
}
write_ptr = s_video_buffer_write_ptr;
size_t size = write_ptr - g_video_buffer_pp_read_ptr;
if (len > FIFO_SIZE - size)
{
PanicAlert("FIFO out of bounds (existing %lu + new %lu > %lu)", (unsigned long) size, (unsigned long) len, (unsigned long) FIFO_SIZE);
return;
}
}
memcpy(write_ptr, _uData, len);
OpcodeDecoder_Preprocess(write_ptr + len);
// This would have to be locked if the GPU thread didn't spin.
s_video_buffer_write_ptr = write_ptr + len;
} }
void ResetVideoBuffer() void ResetVideoBuffer()
{ {
g_pVideoData = videoBuffer; g_video_buffer_read_ptr = s_video_buffer;
size = 0; s_video_buffer_write_ptr = s_video_buffer;
s_video_buffer_seen_ptr = s_video_buffer;
g_video_buffer_pp_read_ptr = s_video_buffer;
s_fifo_aux_write_ptr = s_fifo_aux_data;
s_fifo_aux_read_ptr = s_fifo_aux_data;
} }
@ -148,53 +279,75 @@ void RunGpuLoop()
g_video_backend->PeekMessages(); g_video_backend->PeekMessages();
VideoFifo_CheckAsyncRequest(); VideoFifo_CheckAsyncRequest();
if (g_use_deterministic_gpu_thread)
CommandProcessor::SetCPStatusFromGPU();
Common::AtomicStore(CommandProcessor::VITicks, CommandProcessor::m_cpClockOrigin);
// check if we are able to run this buffer
while (GpuRunningState && EmuRunningState && !CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
{ {
fifo.isGpuReadingData = true; // All the fifo/CP stuff is on the CPU. We just need to run the opcode decoder.
CommandProcessor::isPossibleWaitingSetDrawDone = fifo.bFF_GPLinkEnable ? true : false; u8* seen_ptr = s_video_buffer_seen_ptr;
u8* write_ptr = s_video_buffer_write_ptr;
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU || Common::AtomicLoad(CommandProcessor::VITicks) > CommandProcessor::m_cpClockOrigin) // See comment in SyncGPU
if (write_ptr > seen_ptr)
{ {
u32 readPtr = fifo.CPReadPointer; OpcodeDecoder_Run(write_ptr);
u8 *uData = Memory::GetPointer(readPtr);
if (readPtr == fifo.CPEnd) {
readPtr = fifo.CPBase; std::lock_guard<std::mutex> vblk(s_video_buffer_lock);
else s_video_buffer_seen_ptr = write_ptr;
readPtr += 32; s_video_buffer_cond.notify_all();
}
_assert_msg_(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0 ,
"Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce instability in the game. Please report it.", fifo.CPReadWriteDistance - 32);
ReadDataFromFifo(uData, 32);
cyclesExecuted = OpcodeDecoder_Run(GetVideoBufferEndPtr());
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU && Common::AtomicLoad(CommandProcessor::VITicks) >= cyclesExecuted)
Common::AtomicAdd(CommandProcessor::VITicks, -(s32)cyclesExecuted);
Common::AtomicStore(fifo.CPReadPointer, readPtr);
Common::AtomicAdd(fifo.CPReadWriteDistance, -32);
if ((GetVideoBufferEndPtr() - g_pVideoData) == 0)
Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer);
} }
}
else
{
CommandProcessor::SetCPStatusFromGPU(); CommandProcessor::SetCPStatusFromGPU();
// This call is pretty important in DualCore mode and must be called in the FIFO Loop. Common::AtomicStore(CommandProcessor::VITicks, CommandProcessor::m_cpClockOrigin);
// If we don't, s_swapRequested or s_efbAccessRequested won't be set to false
// leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing things down.
VideoFifo_CheckAsyncRequest();
CommandProcessor::isPossibleWaitingSetDrawDone = false;
}
fifo.isGpuReadingData = false; // check if we are able to run this buffer
while (GpuRunningState && EmuRunningState && !CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
{
fifo.isGpuReadingData = true;
CommandProcessor::isPossibleWaitingSetDrawDone = fifo.bFF_GPLinkEnable ? true : false;
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU || Common::AtomicLoad(CommandProcessor::VITicks) > CommandProcessor::m_cpClockOrigin)
{
u32 readPtr = fifo.CPReadPointer;
u8 *uData = Memory::GetPointer(readPtr);
if (readPtr == fifo.CPEnd)
readPtr = fifo.CPBase;
else
readPtr += 32;
_assert_msg_(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0 ,
"Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce instability in the game. Please report it.", fifo.CPReadWriteDistance - 32);
ReadDataFromFifo(uData, 32);
u8* write_ptr = s_video_buffer_write_ptr;
cyclesExecuted = OpcodeDecoder_Run(write_ptr);
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU && Common::AtomicLoad(CommandProcessor::VITicks) >= cyclesExecuted)
Common::AtomicAdd(CommandProcessor::VITicks, -(s32)cyclesExecuted);
Common::AtomicStore(fifo.CPReadPointer, readPtr);
Common::AtomicAdd(fifo.CPReadWriteDistance, -32);
if ((write_ptr - g_video_buffer_read_ptr) == 0)
Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer);
}
CommandProcessor::SetCPStatusFromGPU();
// This call is pretty important in DualCore mode and must be called in the FIFO Loop.
// If we don't, s_swapRequested or s_efbAccessRequested won't be set to false
// leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing things down.
VideoFifo_CheckAsyncRequest();
CommandProcessor::isPossibleWaitingSetDrawDone = false;
}
fifo.isGpuReadingData = false;
}
if (EmuRunningState) if (EmuRunningState)
{ {
@ -217,6 +370,8 @@ void RunGpuLoop()
} }
} }
} }
// wake up SyncGPU if we were interrupted
s_video_buffer_cond.notify_all();
} }
@ -228,16 +383,27 @@ bool AtBreakpoint()
void RunGpu() void RunGpu()
{ {
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread &&
!g_use_deterministic_gpu_thread)
return;
SCPFifoStruct &fifo = CommandProcessor::fifo; SCPFifoStruct &fifo = CommandProcessor::fifo;
while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() ) while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() )
{ {
u8 *uData = Memory::GetPointer(fifo.CPReadPointer); u8 *uData = Memory::GetPointer(fifo.CPReadPointer);
FPURoundMode::SaveSIMDState(); if (g_use_deterministic_gpu_thread)
FPURoundMode::LoadDefaultSIMDState(); {
ReadDataFromFifo(uData, 32); ReadDataFromFifoOnCPU(uData, 32);
OpcodeDecoder_Run(GetVideoBufferEndPtr()); }
FPURoundMode::LoadSIMDState(); else
{
FPURoundMode::SaveSIMDState();
FPURoundMode::LoadDefaultSIMDState();
ReadDataFromFifo(uData, 32);
OpcodeDecoder_Run(s_video_buffer_write_ptr);
FPURoundMode::LoadSIMDState();
}
//DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base"); //DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base");
@ -250,3 +416,45 @@ void RunGpu()
} }
CommandProcessor::SetCPStatusFromGPU(); CommandProcessor::SetCPStatusFromGPU();
} }
void Fifo_UpdateWantDeterminism(bool want)
{
// We are paused (or not running at all yet) and have m_csHWVidOccupied, so
// it should be safe to change this.
const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter;
bool gpu_thread;
switch (param.m_GPUDeterminismMode)
{
case GPU_DETERMINISM_AUTO:
gpu_thread = want;
// Hack: For now movies are an exception to this being on (but not
// to wanting determinism in general). Once vertex arrays are
// fixed, there should be no reason to want this off for movies by
// default, so this can be removed.
if (!NetPlay::IsNetPlayRunning())
gpu_thread = false;
break;
case GPU_DETERMINISM_NONE:
gpu_thread = false;
break;
case GPU_DETERMINISM_FAKE_COMPLETION:
gpu_thread = true;
break;
}
gpu_thread = gpu_thread && SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread;
if (g_use_deterministic_gpu_thread != gpu_thread)
{
g_use_deterministic_gpu_thread = gpu_thread;
if (gpu_thread)
{
// These haven't been updated in non-deterministic mode.
s_video_buffer_seen_ptr = g_video_buffer_pp_read_ptr = g_video_buffer_read_ptr;
CopyPreprocessCPStateFromMain();
VertexLoaderManager::MarkAllDirty();
}
}
}

View File

@ -13,6 +13,11 @@ class PointerWrap;
extern bool g_bSkipCurrentFrame; extern bool g_bSkipCurrentFrame;
// This could be in SCoreStartupParameter, but it depends on multiple settings
// and can change at runtime.
extern bool g_use_deterministic_gpu_thread;
extern std::atomic<u8*> g_video_buffer_write_ptr_xthread;
extern u8* g_video_buffer_pp_read_ptr;
void Fifo_Init(); void Fifo_Init();
void Fifo_Shutdown(); void Fifo_Shutdown();
@ -22,8 +27,23 @@ u8* GetVideoBufferEndPtr();
void Fifo_DoState(PointerWrap &f); void Fifo_DoState(PointerWrap &f);
void Fifo_PauseAndLock(bool doLock, bool unpauseOnUnlock); void Fifo_PauseAndLock(bool doLock, bool unpauseOnUnlock);
void Fifo_UpdateWantDeterminism(bool want);
void ReadDataFromFifo(u8* _uData, u32 len); // Used for diagnostics.
enum SyncGPUReason {
SYNC_GPU_NONE,
SYNC_GPU_OTHER,
SYNC_GPU_WRAPAROUND,
SYNC_GPU_EFB_POKE,
SYNC_GPU_PERFQUERY,
SYNC_GPU_SWAP,
SYNC_GPU_AUX_SPACE,
};
// In g_use_deterministic_gpu_thread mode, waits for the GPU to be done with pending work.
void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr = true);
void PushFifoAuxBuffer(void* ptr, size_t size);
void* PopFifoAuxBuffer(size_t size);
void RunGpu(); void RunGpu();
void RunGpuLoop(); void RunGpuLoop();

View File

@ -118,6 +118,7 @@ void VideoBackendHardware::Video_EndField()
{ {
if (s_BackendInitialized) if (s_BackendInitialized)
{ {
SyncGPU(SYNC_GPU_SWAP);
s_swapRequested.Set(); s_swapRequested.Set();
} }
} }
@ -153,6 +154,8 @@ u32 VideoBackendHardware::Video_AccessEFB(EFBAccessType type, u32 x, u32 y, u32
{ {
if (s_BackendInitialized && g_ActiveConfig.bEFBAccessEnable) if (s_BackendInitialized && g_ActiveConfig.bEFBAccessEnable)
{ {
SyncGPU(SYNC_GPU_EFB_POKE);
s_accessEFBArgs.type = type; s_accessEFBArgs.type = type;
s_accessEFBArgs.x = x; s_accessEFBArgs.x = x;
s_accessEFBArgs.y = y; s_accessEFBArgs.y = y;
@ -194,6 +197,8 @@ u32 VideoBackendHardware::Video_GetQueryResult(PerfQueryType type)
return 0; return 0;
} }
SyncGPU(SYNC_GPU_PERFQUERY);
// TODO: Is this check sane? // TODO: Is this check sane?
if (!g_perf_query->IsFlushed()) if (!g_perf_query->IsFlushed())
{ {
@ -304,3 +309,8 @@ void VideoBackendHardware::RegisterCPMMIO(MMIO::Mapping* mmio, u32 base)
CommandProcessor::RegisterMMIO(mmio, base); CommandProcessor::RegisterMMIO(mmio, base);
} }
void VideoBackendHardware::UpdateWantDeterminism(bool want)
{
Fifo_UpdateWantDeterminism(want);
}

View File

@ -4,7 +4,7 @@
#pragma once #pragma once
#include "Common/CommonTypes.h" #include "Common/Hash.h"
// m_components // m_components
enum enum
@ -87,6 +87,20 @@ struct PortableVertexDeclaration
} }
}; };
namespace std
{
template <>
struct hash<PortableVertexDeclaration>
{
size_t operator()(const PortableVertexDeclaration& decl) const
{
return HashFletcher((u8 *) &decl, sizeof(decl));
}
};
}
// The implementation of this class is specific for GL/DX, so NativeVertexFormat.cpp // The implementation of this class is specific for GL/DX, so NativeVertexFormat.cpp
// is in the respective backend, not here in VideoCommon. // is in the respective backend, not here in VideoCommon.

View File

@ -24,6 +24,7 @@
#include "VideoCommon/DataReader.h" #include "VideoCommon/DataReader.h"
#include "VideoCommon/Fifo.h" #include "VideoCommon/Fifo.h"
#include "VideoCommon/OpcodeDecoding.h" #include "VideoCommon/OpcodeDecoding.h"
#include "VideoCommon/PixelEngine.h"
#include "VideoCommon/Statistics.h" #include "VideoCommon/Statistics.h"
#include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoCommon.h"
@ -31,25 +32,29 @@
#include "VideoCommon/XFMemory.h" #include "VideoCommon/XFMemory.h"
u8* g_pVideoData = nullptr;
bool g_bRecordFifoData = false; bool g_bRecordFifoData = false;
static u32 InterpretDisplayList(u32 address, u32 size) static u32 InterpretDisplayList(u32 address, u32 size)
{ {
u8* old_pVideoData = g_pVideoData; u8* old_pVideoData = g_video_buffer_read_ptr;
u8* startAddress = Memory::GetPointer(address); u8* startAddress;
if (g_use_deterministic_gpu_thread)
startAddress = (u8*) PopFifoAuxBuffer(size);
else
startAddress = Memory::GetPointer(address);
u32 cycles = 0; u32 cycles = 0;
// Avoid the crash if Memory::GetPointer failed .. // Avoid the crash if Memory::GetPointer failed ..
if (startAddress != nullptr) if (startAddress != nullptr)
{ {
g_pVideoData = startAddress; g_video_buffer_read_ptr = startAddress;
// temporarily swap dl and non-dl (small "hack" for the stats) // temporarily swap dl and non-dl (small "hack" for the stats)
Statistics::SwapDL(); Statistics::SwapDL();
u8 *end = g_pVideoData + size; u8 *end = g_video_buffer_read_ptr + size;
cycles = OpcodeDecoder_Run(end); cycles = OpcodeDecoder_Run(end);
INCSTAT(stats.thisFrame.numDListsCalled); INCSTAT(stats.thisFrame.numDListsCalled);
@ -58,16 +63,34 @@ static u32 InterpretDisplayList(u32 address, u32 size)
} }
// reset to the old pointer // reset to the old pointer
g_pVideoData = old_pVideoData; g_video_buffer_read_ptr = old_pVideoData;
return cycles; return cycles;
} }
static void InterpretDisplayListPreprocess(u32 address, u32 size)
{
u8* old_read_ptr = g_video_buffer_pp_read_ptr;
u8* startAddress = Memory::GetPointer(address);
PushFifoAuxBuffer(startAddress, size);
if (startAddress != nullptr)
{
g_video_buffer_pp_read_ptr = startAddress;
u8 *end = startAddress + size;
OpcodeDecoder_Preprocess(end);
}
g_video_buffer_pp_read_ptr = old_read_ptr;
}
static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess) static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess)
{ {
// TODO(Omega): Maybe dump FIFO to file on this error // TODO(Omega): Maybe dump FIFO to file on this error
std::string temp = StringFromFormat( std::string temp = StringFromFormat(
"GFX FIFO: Unknown Opcode (0x%x @ %p).\n" "GFX FIFO: Unknown Opcode (0x%x @ %p, preprocessing=%s).\n"
"This means one of the following:\n" "This means one of the following:\n"
"* The emulated GPU got desynced, disabling dual core can help\n" "* The emulated GPU got desynced, disabling dual core can help\n"
"* Command stream corrupted by some spurious memory bug\n" "* Command stream corrupted by some spurious memory bug\n"
@ -75,7 +98,8 @@ static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess)
"* Some other sort of bug\n\n" "* Some other sort of bug\n\n"
"Dolphin will now likely crash or hang. Enjoy." , "Dolphin will now likely crash or hang. Enjoy." ,
cmd_byte, cmd_byte,
buffer); buffer,
preprocess ? "yes" : "no");
Host_SysMessage(temp.c_str()); Host_SysMessage(temp.c_str());
INFO_LOG(VIDEO, "%s", temp.c_str()); INFO_LOG(VIDEO, "%s", temp.c_str());
{ {
@ -105,14 +129,16 @@ static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess)
} }
} }
template <bool is_preprocess, u8** bufp>
static u32 Decode(u8* end) static u32 Decode(u8* end)
{ {
u8 *opcodeStart = g_pVideoData; u8 *opcodeStart = *bufp;
if (g_pVideoData == end) if (*bufp == end)
return 0; return 0;
u8 cmd_byte = DataReadU8(); u8 cmd_byte = DataRead<u8>(bufp);
u32 cycles; u32 cycles;
int refarray;
switch (cmd_byte) switch (cmd_byte)
{ {
case GX_NOP: case GX_NOP:
@ -121,64 +147,72 @@ static u32 Decode(u8* end)
case GX_LOAD_CP_REG: //0x08 case GX_LOAD_CP_REG: //0x08
{ {
if (end - g_pVideoData < 1 + 4) if (end - *bufp < 1 + 4)
return 0; return 0;
cycles = 12; cycles = 12;
u8 sub_cmd = DataReadU8(); u8 sub_cmd = DataRead<u8>(bufp);
u32 value = DataReadU32(); u32 value = DataRead<u32>(bufp);
LoadCPReg(sub_cmd, value); LoadCPReg(sub_cmd, value, is_preprocess);
INCSTAT(stats.thisFrame.numCPLoads); if (!is_preprocess)
INCSTAT(stats.thisFrame.numCPLoads);
} }
break; break;
case GX_LOAD_XF_REG: case GX_LOAD_XF_REG:
{ {
if (end - g_pVideoData < 4) if (end - *bufp < 4)
return 0; return 0;
u32 Cmd2 = DataReadU32(); u32 Cmd2 = DataRead<u32>(bufp);
int transfer_size = ((Cmd2 >> 16) & 15) + 1; int transfer_size = ((Cmd2 >> 16) & 15) + 1;
if ((size_t) (end - g_pVideoData) < transfer_size * sizeof(u32)) if ((size_t) (end - *bufp) < transfer_size * sizeof(u32))
return 0; return 0;
cycles = 18 + 6 * transfer_size; cycles = 18 + 6 * transfer_size;
u32 xf_address = Cmd2 & 0xFFFF; if (!is_preprocess)
LoadXFReg(transfer_size, xf_address); {
u32 xf_address = Cmd2 & 0xFFFF;
LoadXFReg(transfer_size, xf_address);
INCSTAT(stats.thisFrame.numXFLoads); INCSTAT(stats.thisFrame.numXFLoads);
}
else
{
*bufp += transfer_size * sizeof(u32);
}
} }
break; break;
case GX_LOAD_INDX_A: //used for position matrices case GX_LOAD_INDX_A: //used for position matrices
if (end - g_pVideoData < 4) refarray = 0xC;
return 0; goto load_indx;
cycles = 6;
LoadIndexedXF(DataReadU32(), 0xC);
break;
case GX_LOAD_INDX_B: //used for normal matrices case GX_LOAD_INDX_B: //used for normal matrices
if (end - g_pVideoData < 4) refarray = 0xD;
return 0; goto load_indx;
cycles = 6;
LoadIndexedXF(DataReadU32(), 0xD);
break;
case GX_LOAD_INDX_C: //used for postmatrices case GX_LOAD_INDX_C: //used for postmatrices
if (end - g_pVideoData < 4) refarray = 0xE;
return 0; goto load_indx;
cycles = 6;
LoadIndexedXF(DataReadU32(), 0xE);
break;
case GX_LOAD_INDX_D: //used for lights case GX_LOAD_INDX_D: //used for lights
if (end - g_pVideoData < 4) refarray = 0xF;
goto load_indx;
load_indx:
if (end - *bufp < 4)
return 0; return 0;
cycles = 6; cycles = 6;
LoadIndexedXF(DataReadU32(), 0xF); if (is_preprocess)
PreprocessIndexedXF(DataRead<u32>(bufp), refarray);
else
LoadIndexedXF(DataRead<u32>(bufp), refarray);
break; break;
case GX_CMD_CALL_DL: case GX_CMD_CALL_DL:
{ {
if (end - g_pVideoData < 8) if (end - *bufp < 8)
return 0; return 0;
u32 address = DataReadU32(); u32 address = DataRead<u32>(bufp);
u32 count = DataReadU32(); u32 count = DataRead<u32>(bufp);
cycles = 6 + InterpretDisplayList(address, count); if (is_preprocess)
InterpretDisplayListPreprocess(address, count);
else
cycles = 6 + InterpretDisplayList(address, count);
} }
break; break;
@ -196,12 +230,19 @@ static u32 Decode(u8* end)
// In skipped_frame case: We have to let BP writes through because they set // In skipped_frame case: We have to let BP writes through because they set
// tokens and stuff. TODO: Call a much simplified LoadBPReg instead. // tokens and stuff. TODO: Call a much simplified LoadBPReg instead.
{ {
if (end - g_pVideoData < 4) if (end - *bufp < 4)
return 0; return 0;
cycles = 12; cycles = 12;
u32 bp_cmd = DataReadU32(); u32 bp_cmd = DataRead<u32>(bufp);
LoadBPReg(bp_cmd); if (is_preprocess)
INCSTAT(stats.thisFrame.numBPLoads); {
LoadBPRegPreprocess(bp_cmd);
}
else
{
LoadBPReg(bp_cmd);
INCSTAT(stats.thisFrame.numBPLoads);
}
} }
break; break;
@ -211,38 +252,48 @@ static u32 Decode(u8* end)
{ {
cycles = 1600; cycles = 1600;
// load vertices // load vertices
if (end - g_pVideoData < 2) if (end - *bufp < 2)
return 0; return 0;
u16 numVertices = DataReadU16(); u16 num_vertices = DataRead<u16>(bufp);
if (!VertexLoaderManager::RunVertices( if (is_preprocess)
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
numVertices,
end - g_pVideoData,
g_bSkipCurrentFrame))
{ {
return 0; size_t size = num_vertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK, is_preprocess);
if ((size_t) (end - *bufp) < size)
return 0;
*bufp += size;
}
else
{
if (!VertexLoaderManager::RunVertices(
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
num_vertices,
end - *bufp,
g_bSkipCurrentFrame))
return 0;
} }
} }
else else
{ {
UnknownOpcode(cmd_byte, opcodeStart, false); UnknownOpcode(cmd_byte, opcodeStart, is_preprocess);
cycles = 1; cycles = 1;
} }
break; break;
} }
// Display lists get added directly into the FIFO stream // Display lists get added directly into the FIFO stream
if (g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL) if (!is_preprocess && g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL)
FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(g_pVideoData - opcodeStart)); FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(*bufp - opcodeStart));
return cycles; // In is_preprocess mode, we don't actually care about cycles, at least for
// now... make sure the compiler realizes that.
return is_preprocess ? 1 : cycles;
} }
void OpcodeDecoder_Init() void OpcodeDecoder_Init()
{ {
g_pVideoData = GetVideoBufferStartPtr(); g_video_buffer_read_ptr = GetVideoBufferStartPtr();
} }
@ -255,14 +306,28 @@ u32 OpcodeDecoder_Run(u8* end)
u32 totalCycles = 0; u32 totalCycles = 0;
while (true) while (true)
{ {
u8* old = g_pVideoData; u8* old = g_video_buffer_read_ptr;
u32 cycles = Decode(end); u32 cycles = Decode</*is_preprocess*/ false, &g_video_buffer_read_ptr>(end);
if (cycles == 0) if (cycles == 0)
{ {
g_pVideoData = old; g_video_buffer_read_ptr = old;
break; break;
} }
totalCycles += cycles; totalCycles += cycles;
} }
return totalCycles; return totalCycles;
} }
void OpcodeDecoder_Preprocess(u8 *end)
{
while (true)
{
u8* old = g_video_buffer_pp_read_ptr;
u32 cycles = Decode</*is_preprocess*/ true, &g_video_buffer_pp_read_ptr>(end);
if (cycles == 0)
{
g_video_buffer_pp_read_ptr = old;
break;
}
}
}

View File

@ -39,3 +39,4 @@ extern bool g_bRecordFifoData;
void OpcodeDecoder_Init(); void OpcodeDecoder_Init();
void OpcodeDecoder_Shutdown(); void OpcodeDecoder_Shutdown();
u32 OpcodeDecoder_Run(u8* end); u32 OpcodeDecoder_Run(u8* end);
void OpcodeDecoder_Preprocess(u8* write_ptr);

View File

@ -33,14 +33,11 @@
// Matrix components are first in GC format but later in PC format - we need to store it temporarily // Matrix components are first in GC format but later in PC format - we need to store it temporarily
// when decoding each vertex. // when decoding each vertex.
static u8 s_curposmtx = MatrixIndexA.PosNormalMtxIdx; static u8 s_curposmtx = g_main_cp_state.matrix_index_a.PosNormalMtxIdx;
static u8 s_curtexmtx[8]; static u8 s_curtexmtx[8];
static int s_texmtxwrite = 0; static int s_texmtxwrite = 0;
static int s_texmtxread = 0; static int s_texmtxread = 0;
static int loop_counter;
// Vertex loaders read these. Although the scale ones should be baked into the shader. // Vertex loaders read these. Although the scale ones should be baked into the shader.
int tcIndex; int tcIndex;
int colIndex; int colIndex;
@ -90,7 +87,7 @@ static void LOADERDECL PosMtx_Write()
DataWrite<u8>(0); DataWrite<u8>(0);
// Resetting current position matrix to default is needed for bbox to behave // Resetting current position matrix to default is needed for bbox to behave
s_curposmtx = (u8) MatrixIndexA.PosNormalMtxIdx; s_curposmtx = (u8) g_main_cp_state.matrix_index_a.PosNormalMtxIdx;
} }
static void LOADERDECL UpdateBoundingBoxPrepare() static void LOADERDECL UpdateBoundingBoxPrepare()
@ -548,7 +545,7 @@ VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr)
m_compiledCode = nullptr; m_compiledCode = nullptr;
m_numLoadedVertices = 0; m_numLoadedVertices = 0;
m_VertexSize = 0; m_VertexSize = 0;
loop_counter = 0; m_native_vertex_format = nullptr;
VertexLoader_Normal::Init(); VertexLoader_Normal::Init();
VertexLoader_Position::Init(); VertexLoader_Position::Init();
VertexLoader_TextCoord::Init(); VertexLoader_TextCoord::Init();
@ -584,8 +581,11 @@ void VertexLoader::CompileVertexTranslator()
PanicAlert("Trying to recompile a vertex translator"); PanicAlert("Trying to recompile a vertex translator");
m_compiledCode = GetCodePtr(); m_compiledCode = GetCodePtr();
// We don't use any callee saved registers or anything but RAX. // We only use RAX (caller saved) and RBX (callee saved).
ABI_PushRegistersAndAdjustStack(0, 8); ABI_PushRegistersAndAdjustStack(1 << RBX, 8);
// save count
MOV(64, R(RBX), R(ABI_PARAM1));
// Start loop here // Start loop here
const u8 *loop_start = GetCodePtr(); const u8 *loop_start = GetCodePtr();
@ -842,11 +842,10 @@ void VertexLoader::CompileVertexTranslator()
#ifdef USE_VERTEX_LOADER_JIT #ifdef USE_VERTEX_LOADER_JIT
// End loop here // End loop here
MOV(64, R(RAX), Imm64((u64)&loop_counter)); SUB(64, R(RBX), Imm8(1));
SUB(32, MatR(RAX), Imm8(1));
J_CC(CC_NZ, loop_start); J_CC(CC_NZ, loop_start);
ABI_PopRegistersAndAdjustStack(0, 8); ABI_PopRegistersAndAdjustStack(1 << RBX, 8);
RET(); RET();
#endif #endif
} }
@ -912,8 +911,7 @@ void VertexLoader::ConvertVertices ( int count )
#ifdef USE_VERTEX_LOADER_JIT #ifdef USE_VERTEX_LOADER_JIT
if (count > 0) if (count > 0)
{ {
loop_counter = count; ((void (*)(int))(void*)m_compiledCode)(count);
((void (*)())(void*)m_compiledCode)();
} }
#else #else
for (int s = 0; s < count; s++) for (int s = 0; s < count; s++)
@ -1035,3 +1033,22 @@ void VertexLoader::AppendToString(std::string *dest) const
} }
dest->append(StringFromFormat(" - %i v\n", m_numLoadedVertices)); dest->append(StringFromFormat(" - %i v\n", m_numLoadedVertices));
} }
NativeVertexFormat* VertexLoader::GetNativeVertexFormat()
{
if (m_native_vertex_format)
return m_native_vertex_format;
auto& native = s_native_vertex_map[m_native_vtx_decl];
if (!native)
{
auto raw_pointer = g_vertex_manager->CreateNativeVertexFormat();
native = std::unique_ptr<NativeVertexFormat>(raw_pointer);
native->Initialize(m_native_vtx_decl);
native->m_components = m_native_components;
}
m_native_vertex_format = native.get();
return native.get();
}
std::unordered_map<PortableVertexDeclaration, std::unique_ptr<NativeVertexFormat>> VertexLoader::s_native_vertex_map;

View File

@ -8,7 +8,9 @@
// Metroid Prime: P I16-flt N I16-s16 T0 I16-u16 T1 i16-flt // Metroid Prime: P I16-flt N I16-s16 T0 I16-u16 T1 i16-flt
#include <algorithm> #include <algorithm>
#include <memory>
#include <string> #include <string>
#include <unordered_map>
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/x64Emitter.h" #include "Common/x64Emitter.h"
@ -114,6 +116,9 @@ public:
void AppendToString(std::string *dest) const; void AppendToString(std::string *dest) const;
int GetNumLoadedVerts() const { return m_numLoadedVertices; } int GetNumLoadedVerts() const { return m_numLoadedVertices; }
NativeVertexFormat* GetNativeVertexFormat();
static void ClearNativeVertexFormatCache() { s_native_vertex_map.clear(); }
private: private:
int m_VertexSize; // number of bytes of a raw GC vertex. Computed by CompileVertexTranslator. int m_VertexSize; // number of bytes of a raw GC vertex. Computed by CompileVertexTranslator.
@ -135,6 +140,9 @@ private:
int m_numLoadedVertices; int m_numLoadedVertices;
NativeVertexFormat* m_native_vertex_format;
static std::unordered_map<PortableVertexDeclaration, std::unique_ptr<NativeVertexFormat>> s_native_vertex_map;
void SetVAT(const VAT& vat); void SetVAT(const VAT& vat);
void CompileVertexTranslator(); void CompileVertexTranslator();

View File

@ -4,6 +4,7 @@
#include <algorithm> #include <algorithm>
#include <memory> #include <memory>
#include <mutex>
#include <unordered_map> #include <unordered_map>
#include <utility> #include <utility>
#include <vector> #include <vector>
@ -20,13 +21,8 @@
#include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VertexShaderManager.h"
#include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoCommon.h"
static int s_attr_dirty; // bitfield
static NativeVertexFormat* s_current_vtx_fmt; static NativeVertexFormat* s_current_vtx_fmt;
typedef std::pair<VertexLoader*, NativeVertexFormat*> VertexLoaderCacheItem;
static VertexLoaderCacheItem s_VertexLoaders[8];
namespace std namespace std
{ {
@ -41,35 +37,30 @@ struct hash<VertexLoaderUID>
} }
typedef std::unordered_map<VertexLoaderUID, VertexLoaderCacheItem> VertexLoaderMap; typedef std::unordered_map<VertexLoaderUID, std::unique_ptr<VertexLoader>> VertexLoaderMap;
typedef std::map<PortableVertexDeclaration, std::unique_ptr<NativeVertexFormat>> NativeVertexLoaderMap;
namespace VertexLoaderManager namespace VertexLoaderManager
{ {
static VertexLoaderMap s_VertexLoaderMap; static std::mutex s_vertex_loader_map_lock;
static NativeVertexLoaderMap s_native_vertex_map; static VertexLoaderMap s_vertex_loader_map;
// TODO - change into array of pointers. Keep a map of all seen so far. // TODO - change into array of pointers. Keep a map of all seen so far.
void Init() void Init()
{ {
MarkAllDirty(); MarkAllDirty();
for (auto& map_entry : s_VertexLoaders) for (auto& map_entry : g_main_cp_state.vertex_loaders)
{ map_entry = nullptr;
map_entry.first = nullptr; for (auto& map_entry : g_preprocess_cp_state.vertex_loaders)
map_entry.second = nullptr; map_entry = nullptr;
}
RecomputeCachedArraybases(); RecomputeCachedArraybases();
} }
void Shutdown() void Shutdown()
{ {
for (auto& map_entry : s_VertexLoaderMap) std::lock_guard<std::mutex> lk(s_vertex_loader_map_lock);
{ s_vertex_loader_map.clear();
delete map_entry.second.first; VertexLoader::ClearNativeVertexFormatCache();
}
s_VertexLoaderMap.clear();
s_native_vertex_map.clear();
} }
namespace namespace
@ -87,14 +78,15 @@ struct entry
void AppendListToString(std::string *dest) void AppendListToString(std::string *dest)
{ {
std::lock_guard<std::mutex> lk(s_vertex_loader_map_lock);
std::vector<entry> entries; std::vector<entry> entries;
size_t total_size = 0; size_t total_size = 0;
for (const auto& map_entry : s_VertexLoaderMap) for (const auto& map_entry : s_vertex_loader_map)
{ {
entry e; entry e;
map_entry.second.first->AppendToString(&e.text); map_entry.second->AppendToString(&e.text);
e.num_verts = map_entry.second.first->GetNumLoadedVerts(); e.num_verts = map_entry.second->GetNumLoadedVerts();
entries.push_back(e); entries.push_back(e);
total_size += e.text.size() + 1; total_size += e.text.size() + 1;
} }
@ -108,57 +100,46 @@ void AppendListToString(std::string *dest)
void MarkAllDirty() void MarkAllDirty()
{ {
s_attr_dirty = 0xff; g_main_cp_state.attr_dirty = 0xff;
g_preprocess_cp_state.attr_dirty = 0xff;
} }
static NativeVertexFormat* GetNativeVertexFormat(const PortableVertexDeclaration& format, static VertexLoader* RefreshLoader(int vtx_attr_group, CPState* state)
u32 components)
{ {
auto& native = s_native_vertex_map[format]; VertexLoader* loader;
if (!native) if ((state->attr_dirty >> vtx_attr_group) & 1)
{ {
auto raw_pointer = g_vertex_manager->CreateNativeVertexFormat(); VertexLoaderUID uid(state->vtx_desc, state->vtx_attr[vtx_attr_group]);
native = std::unique_ptr<NativeVertexFormat>(raw_pointer); std::lock_guard<std::mutex> lk(s_vertex_loader_map_lock);
native->Initialize(format); VertexLoaderMap::iterator iter = s_vertex_loader_map.find(uid);
native->m_components = components; if (iter != s_vertex_loader_map.end())
}
return native.get();
}
static VertexLoaderCacheItem RefreshLoader(int vtx_attr_group)
{
if ((s_attr_dirty >> vtx_attr_group) & 1)
{
VertexLoaderUID uid(g_VtxDesc, g_VtxAttr[vtx_attr_group]);
VertexLoaderMap::iterator iter = s_VertexLoaderMap.find(uid);
if (iter != s_VertexLoaderMap.end())
{ {
s_VertexLoaders[vtx_attr_group] = iter->second; loader = iter->second.get();
} }
else else
{ {
VertexLoader* loader = new VertexLoader(g_VtxDesc, g_VtxAttr[vtx_attr_group]); loader = new VertexLoader(state->vtx_desc, state->vtx_attr[vtx_attr_group]);
s_vertex_loader_map[uid] = std::unique_ptr<VertexLoader>(loader);
NativeVertexFormat* vtx_fmt = GetNativeVertexFormat(
loader->GetNativeVertexDeclaration(),
loader->GetNativeComponents());
s_VertexLoaderMap[uid] = std::make_pair(loader, vtx_fmt);
s_VertexLoaders[vtx_attr_group] = std::make_pair(loader, vtx_fmt);
INCSTAT(stats.numVertexLoaders); INCSTAT(stats.numVertexLoaders);
} }
state->vertex_loaders[vtx_attr_group] = loader;
state->attr_dirty &= ~(1 << vtx_attr_group);
} else {
loader = state->vertex_loaders[vtx_attr_group];
} }
s_attr_dirty &= ~(1 << vtx_attr_group); return loader;
return s_VertexLoaders[vtx_attr_group];
} }
bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size, bool skip_drawing) bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size, bool skip_drawing)
{ {
if (!count) if (!count)
return true; return true;
auto loader = RefreshLoader(vtx_attr_group);
size_t size = count * loader.first->GetVertexSize(); CPState* state = &g_main_cp_state;
VertexLoader* loader = RefreshLoader(vtx_attr_group, state);
size_t size = count * loader->GetVertexSize();
if (buf_size < size) if (buf_size < size)
return false; return false;
@ -169,15 +150,17 @@ bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size,
return true; return true;
} }
NativeVertexFormat* native = loader->GetNativeVertexFormat();
// If the native vertex format changed, force a flush. // If the native vertex format changed, force a flush.
if (loader.second != s_current_vtx_fmt) if (native != s_current_vtx_fmt)
VertexManager::Flush(); VertexManager::Flush();
s_current_vtx_fmt = loader.second; s_current_vtx_fmt = native;
VertexManager::PrepareForAdditionalData(primitive, count, VertexManager::PrepareForAdditionalData(primitive, count,
loader.first->GetNativeVertexDeclaration().stride); loader->GetNativeVertexDeclaration().stride);
loader.first->RunVertices(g_VtxAttr[vtx_attr_group], primitive, count); loader->RunVertices(state->vtx_attr[vtx_attr_group], primitive, count);
IndexGenerator::AddIndices(primitive, count); IndexGenerator::AddIndices(primitive, count);
@ -186,9 +169,9 @@ bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size,
return true; return true;
} }
int GetVertexSize(int vtx_attr_group) int GetVertexSize(int vtx_attr_group, bool preprocess)
{ {
return RefreshLoader(vtx_attr_group).first->GetVertexSize(); return RefreshLoader(vtx_attr_group, preprocess ? &g_preprocess_cp_state : &g_main_cp_state)->GetVertexSize();
} }
NativeVertexFormat* GetCurrentVertexFormat() NativeVertexFormat* GetCurrentVertexFormat()
@ -198,78 +181,83 @@ NativeVertexFormat* GetCurrentVertexFormat()
} // namespace } // namespace
void LoadCPReg(u32 sub_cmd, u32 value) void LoadCPReg(u32 sub_cmd, u32 value, bool is_preprocess)
{ {
bool update_global_state = !is_preprocess;
CPState* state = is_preprocess ? &g_preprocess_cp_state : &g_main_cp_state;
switch (sub_cmd & 0xF0) switch (sub_cmd & 0xF0)
{ {
case 0x30: case 0x30:
VertexShaderManager::SetTexMatrixChangedA(value); if (update_global_state)
VertexShaderManager::SetTexMatrixChangedA(value);
break; break;
case 0x40: case 0x40:
VertexShaderManager::SetTexMatrixChangedB(value); if (update_global_state)
VertexShaderManager::SetTexMatrixChangedB(value);
break; break;
case 0x50: case 0x50:
g_VtxDesc.Hex &= ~0x1FFFF; // keep the Upper bits state->vtx_desc.Hex &= ~0x1FFFF; // keep the Upper bits
g_VtxDesc.Hex |= value; state->vtx_desc.Hex |= value;
s_attr_dirty = 0xFF; state->attr_dirty = 0xFF;
break; break;
case 0x60: case 0x60:
g_VtxDesc.Hex &= 0x1FFFF; // keep the lower 17Bits state->vtx_desc.Hex &= 0x1FFFF; // keep the lower 17Bits
g_VtxDesc.Hex |= (u64)value << 17; state->vtx_desc.Hex |= (u64)value << 17;
s_attr_dirty = 0xFF; state->attr_dirty = 0xFF;
break; break;
case 0x70: case 0x70:
_assert_((sub_cmd & 0x0F) < 8); _assert_((sub_cmd & 0x0F) < 8);
g_VtxAttr[sub_cmd & 7].g0.Hex = value; state->vtx_attr[sub_cmd & 7].g0.Hex = value;
s_attr_dirty |= 1 << (sub_cmd & 7); state->attr_dirty |= 1 << (sub_cmd & 7);
break; break;
case 0x80: case 0x80:
_assert_((sub_cmd & 0x0F) < 8); _assert_((sub_cmd & 0x0F) < 8);
g_VtxAttr[sub_cmd & 7].g1.Hex = value; state->vtx_attr[sub_cmd & 7].g1.Hex = value;
s_attr_dirty |= 1 << (sub_cmd & 7); state->attr_dirty |= 1 << (sub_cmd & 7);
break; break;
case 0x90: case 0x90:
_assert_((sub_cmd & 0x0F) < 8); _assert_((sub_cmd & 0x0F) < 8);
g_VtxAttr[sub_cmd & 7].g2.Hex = value; state->vtx_attr[sub_cmd & 7].g2.Hex = value;
s_attr_dirty |= 1 << (sub_cmd & 7); state->attr_dirty |= 1 << (sub_cmd & 7);
break; break;
// Pointers to vertex arrays in GC RAM // Pointers to vertex arrays in GC RAM
case 0xA0: case 0xA0:
arraybases[sub_cmd & 0xF] = value; state->array_bases[sub_cmd & 0xF] = value;
cached_arraybases[sub_cmd & 0xF] = Memory::GetPointer(value); if (update_global_state)
cached_arraybases[sub_cmd & 0xF] = Memory::GetPointer(value);
break; break;
case 0xB0: case 0xB0:
arraystrides[sub_cmd & 0xF] = value & 0xFF; state->array_strides[sub_cmd & 0xF] = value & 0xFF;
break; break;
} }
} }
void FillCPMemoryArray(u32 *memory) void FillCPMemoryArray(u32 *memory)
{ {
memory[0x30] = MatrixIndexA.Hex; memory[0x30] = g_main_cp_state.matrix_index_a.Hex;
memory[0x40] = MatrixIndexB.Hex; memory[0x40] = g_main_cp_state.matrix_index_b.Hex;
memory[0x50] = (u32)g_VtxDesc.Hex; memory[0x50] = (u32)g_main_cp_state.vtx_desc.Hex;
memory[0x60] = (u32)(g_VtxDesc.Hex >> 17); memory[0x60] = (u32)(g_main_cp_state.vtx_desc.Hex >> 17);
for (int i = 0; i < 8; ++i) for (int i = 0; i < 8; ++i)
{ {
memory[0x70 + i] = g_VtxAttr[i].g0.Hex; memory[0x70 + i] = g_main_cp_state.vtx_attr[i].g0.Hex;
memory[0x80 + i] = g_VtxAttr[i].g1.Hex; memory[0x80 + i] = g_main_cp_state.vtx_attr[i].g1.Hex;
memory[0x90 + i] = g_VtxAttr[i].g2.Hex; memory[0x90 + i] = g_main_cp_state.vtx_attr[i].g2.Hex;
} }
for (int i = 0; i < 16; ++i) for (int i = 0; i < 16; ++i)
{ {
memory[0xA0 + i] = arraybases[i]; memory[0xA0 + i] = g_main_cp_state.array_bases[i];
memory[0xB0 + i] = arraystrides[i]; memory[0xB0 + i] = g_main_cp_state.array_strides[i];
} }
} }
@ -277,6 +265,6 @@ void RecomputeCachedArraybases()
{ {
for (int i = 0; i < 16; i++) for (int i = 0; i < 16; i++)
{ {
cached_arraybases[i] = Memory::GetPointer(arraybases[i]); cached_arraybases[i] = Memory::GetPointer(g_main_cp_state.array_bases[i]);
} }
} }

View File

@ -16,7 +16,7 @@ namespace VertexLoaderManager
void MarkAllDirty(); void MarkAllDirty();
int GetVertexSize(int vtx_attr_group); int GetVertexSize(int vtx_attr_group, bool preprocess);
// Returns false if buf_size is insufficient. // Returns false if buf_size is insufficient.
bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size, bool skip_drawing = false); bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size, bool skip_drawing = false);

View File

@ -117,7 +117,7 @@ template <typename I>
void Color_ReadIndex_16b_565() void Color_ReadIndex_16b_565()
{ {
auto const Index = DataRead<I>(); auto const Index = DataRead<I>();
u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]))); u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex])));
_SetCol565(val); _SetCol565(val);
} }
@ -125,7 +125,7 @@ template <typename I>
void Color_ReadIndex_24b_888() void Color_ReadIndex_24b_888()
{ {
auto const Index = DataRead<I>(); auto const Index = DataRead<I>();
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]); const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]);
_SetCol(_Read24(iAddress)); _SetCol(_Read24(iAddress));
} }
@ -133,7 +133,7 @@ template <typename I>
void Color_ReadIndex_32b_888x() void Color_ReadIndex_32b_888x()
{ {
auto const Index = DataRead<I>(); auto const Index = DataRead<I>();
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]); const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]);
_SetCol(_Read24(iAddress)); _SetCol(_Read24(iAddress));
} }
@ -141,7 +141,7 @@ template <typename I>
void Color_ReadIndex_16b_4444() void Color_ReadIndex_16b_4444()
{ {
auto const Index = DataRead<I>(); auto const Index = DataRead<I>();
u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex])); u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]));
_SetCol4444(val); _SetCol4444(val);
} }
@ -149,7 +149,7 @@ template <typename I>
void Color_ReadIndex_24b_6666() void Color_ReadIndex_24b_6666()
{ {
auto const Index = DataRead<I>(); auto const Index = DataRead<I>();
const u8* pData = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]) - 1; const u8* pData = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]) - 1;
u32 val = Common::swap32(pData); u32 val = Common::swap32(pData);
_SetCol6666(val); _SetCol6666(val);
} }
@ -158,7 +158,7 @@ template <typename I>
void Color_ReadIndex_32b_8888() void Color_ReadIndex_32b_8888()
{ {
auto const Index = DataRead<I>(); auto const Index = DataRead<I>();
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]); const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]);
_SetCol(_Read32(iAddress)); _SetCol(_Read32(iAddress));
} }

View File

@ -80,7 +80,7 @@ __forceinline void Normal_Index_Offset()
auto const index = DataRead<I>(); auto const index = DataRead<I>();
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_NORMAL] auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_NORMAL]
+ (index * arraystrides[ARRAY_NORMAL]) + sizeof(T) * 3 * Offset); + (index * g_main_cp_state.array_strides[ARRAY_NORMAL]) + sizeof(T) * 3 * Offset);
ReadIndirect<T, N * 3>(data); ReadIndirect<T, N * 3>(data);
} }

View File

@ -91,7 +91,7 @@ void LOADERDECL Pos_ReadIndex()
static_assert(N <= 3, "N > 3 is not sane!"); static_assert(N <= 3, "N > 3 is not sane!");
auto const index = DataRead<I>(); auto const index = DataRead<I>();
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_POSITION] + (index * arraystrides[ARRAY_POSITION])); auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION]));
auto const scale = posScale; auto const scale = posScale;
DataWriter dst; DataWriter dst;
@ -109,7 +109,7 @@ template <typename I, bool three>
void LOADERDECL Pos_ReadIndex_Float_SSSE3() void LOADERDECL Pos_ReadIndex_Float_SSSE3()
{ {
auto const index = DataRead<I>(); auto const index = DataRead<I>();
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (index * arraystrides[ARRAY_POSITION])); const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION]));
GC_ALIGNED128(const __m128i a = _mm_loadu_si128((__m128i*)pData)); GC_ALIGNED128(const __m128i a = _mm_loadu_si128((__m128i*)pData));
GC_ALIGNED128(__m128i b = _mm_shuffle_epi8(a, three ? kMaskSwap32_3 : kMaskSwap32_2)); GC_ALIGNED128(__m128i b = _mm_shuffle_epi8(a, three ? kMaskSwap32_3 : kMaskSwap32_2));
_mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, b); _mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, b);

View File

@ -73,7 +73,7 @@ void LOADERDECL TexCoord_ReadIndex()
auto const index = DataRead<I>(); auto const index = DataRead<I>();
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_TEXCOORD0 + tcIndex] auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_TEXCOORD0 + tcIndex]
+ (index * arraystrides[ARRAY_TEXCOORD0 + tcIndex])); + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + tcIndex]));
auto const scale = tcScale[tcIndex]; auto const scale = tcScale[tcIndex];
DataWriter dst; DataWriter dst;
@ -94,7 +94,7 @@ void LOADERDECL TexCoord_ReadIndex_Short2_SSE4()
// Heavy in ZWW // Heavy in ZWW
auto const index = DataRead<I>(); auto const index = DataRead<I>();
const s32 *pData = (const s32*)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); const s32 *pData = (const s32*)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0+tcIndex]));
const __m128i a = _mm_cvtsi32_si128(*pData); const __m128i a = _mm_cvtsi32_si128(*pData);
const __m128i b = _mm_shuffle_epi8(a, kMaskSwap16_2); const __m128i b = _mm_shuffle_epi8(a, kMaskSwap16_2);
const __m128i c = _mm_cvtepi16_epi32(b); const __m128i c = _mm_cvtepi16_epi32(b);
@ -117,7 +117,7 @@ void LOADERDECL TexCoord_ReadIndex_Float2_SSSE3()
static_assert(!std::numeric_limits<I>::is_signed, "Only unsigned I is sane!"); static_assert(!std::numeric_limits<I>::is_signed, "Only unsigned I is sane!");
auto const index = DataRead<I>(); auto const index = DataRead<I>();
const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0+tcIndex]));
GC_ALIGNED128(const __m128i a = _mm_loadl_epi64((__m128i*)pData)); GC_ALIGNED128(const __m128i a = _mm_loadl_epi64((__m128i*)pData));
GC_ALIGNED128(const __m128i b = _mm_shuffle_epi8(a, kMaskSwap32)); GC_ALIGNED128(const __m128i b = _mm_shuffle_epi8(a, kMaskSwap32));
_mm_storel_epi64((__m128i*)VertexManager::s_pCurBufferPointer, b); _mm_storel_epi64((__m128i*)VertexManager::s_pCurBufferPointer, b);

View File

@ -245,8 +245,8 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ
// donko - this has caused problems in some games. removed for now. // donko - this has caused problems in some games. removed for now.
bool texGenSpecialCase = false; bool texGenSpecialCase = false;
/*bool texGenSpecialCase = /*bool texGenSpecialCase =
((g_VtxDesc.Hex & 0x60600L) == g_VtxDesc.Hex) && // only pos and tex coord 0 ((g_main_cp_state.vtx_desc.Hex & 0x60600L) == g_main_cp_state.vtx_desc.Hex) && // only pos and tex coord 0
(g_VtxDesc.Tex0Coord != NOT_PRESENT) && (g_main_cp_state.vtx_desc.Tex0Coord != NOT_PRESENT) &&
(xfmem.texcoords[0].texmtxinfo.inputform == XF_TEXINPUT_AB11); (xfmem.texcoords[0].texmtxinfo.inputform == XF_TEXINPUT_AB11);
*/ */

View File

@ -329,8 +329,8 @@ void VertexShaderManager::SetConstants()
{ {
bPosNormalMatrixChanged = false; bPosNormalMatrixChanged = false;
const float *pos = (const float *)xfmem.posMatrices + MatrixIndexA.PosNormalMtxIdx * 4; const float *pos = (const float *)xfmem.posMatrices + g_main_cp_state.matrix_index_a.PosNormalMtxIdx * 4;
const float *norm = (const float *)xfmem.normalMatrices + 3 * (MatrixIndexA.PosNormalMtxIdx & 31); const float *norm = (const float *)xfmem.normalMatrices + 3 * (g_main_cp_state.matrix_index_a.PosNormalMtxIdx & 31);
memcpy(constants.posnormalmatrix, pos, 3*16); memcpy(constants.posnormalmatrix, pos, 3*16);
memcpy(constants.posnormalmatrix[3], norm, 12); memcpy(constants.posnormalmatrix[3], norm, 12);
@ -344,10 +344,10 @@ void VertexShaderManager::SetConstants()
bTexMatricesChanged[0] = false; bTexMatricesChanged[0] = false;
const float *fptrs[] = const float *fptrs[] =
{ {
(const float *)&xfmem.posMatrices[MatrixIndexA.Tex0MtxIdx * 4], (const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_a.Tex0MtxIdx * 4],
(const float *)&xfmem.posMatrices[MatrixIndexA.Tex1MtxIdx * 4], (const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_a.Tex1MtxIdx * 4],
(const float *)&xfmem.posMatrices[MatrixIndexA.Tex2MtxIdx * 4], (const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_a.Tex2MtxIdx * 4],
(const float *)&xfmem.posMatrices[MatrixIndexA.Tex3MtxIdx * 4] (const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_a.Tex3MtxIdx * 4]
}; };
for (int i = 0; i < 4; ++i) for (int i = 0; i < 4; ++i)
@ -361,10 +361,10 @@ void VertexShaderManager::SetConstants()
{ {
bTexMatricesChanged[1] = false; bTexMatricesChanged[1] = false;
const float *fptrs[] = { const float *fptrs[] = {
(const float *)&xfmem.posMatrices[MatrixIndexB.Tex4MtxIdx * 4], (const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_b.Tex4MtxIdx * 4],
(const float *)&xfmem.posMatrices[MatrixIndexB.Tex5MtxIdx * 4], (const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_b.Tex5MtxIdx * 4],
(const float *)&xfmem.posMatrices[MatrixIndexB.Tex6MtxIdx * 4], (const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_b.Tex6MtxIdx * 4],
(const float *)&xfmem.posMatrices[MatrixIndexB.Tex7MtxIdx * 4] (const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_b.Tex7MtxIdx * 4]
}; };
for (int i = 0; i < 4; ++i) for (int i = 0; i < 4; ++i)
@ -536,26 +536,26 @@ void VertexShaderManager::SetConstants()
void VertexShaderManager::InvalidateXFRange(int start, int end) void VertexShaderManager::InvalidateXFRange(int start, int end)
{ {
if (((u32)start >= (u32)MatrixIndexA.PosNormalMtxIdx * 4 && if (((u32)start >= (u32)g_main_cp_state.matrix_index_a.PosNormalMtxIdx * 4 &&
(u32)start < (u32)MatrixIndexA.PosNormalMtxIdx * 4 + 12) || (u32)start < (u32)g_main_cp_state.matrix_index_a.PosNormalMtxIdx * 4 + 12) ||
((u32)start >= XFMEM_NORMALMATRICES + ((u32)MatrixIndexA.PosNormalMtxIdx & 31) * 3 && ((u32)start >= XFMEM_NORMALMATRICES + ((u32)g_main_cp_state.matrix_index_a.PosNormalMtxIdx & 31) * 3 &&
(u32)start < XFMEM_NORMALMATRICES + ((u32)MatrixIndexA.PosNormalMtxIdx & 31) * 3 + 9)) (u32)start < XFMEM_NORMALMATRICES + ((u32)g_main_cp_state.matrix_index_a.PosNormalMtxIdx & 31) * 3 + 9))
{ {
bPosNormalMatrixChanged = true; bPosNormalMatrixChanged = true;
} }
if (((u32)start >= (u32)MatrixIndexA.Tex0MtxIdx*4 && (u32)start < (u32)MatrixIndexA.Tex0MtxIdx*4+12) || if (((u32)start >= (u32)g_main_cp_state.matrix_index_a.Tex0MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_a.Tex0MtxIdx*4+12) ||
((u32)start >= (u32)MatrixIndexA.Tex1MtxIdx*4 && (u32)start < (u32)MatrixIndexA.Tex1MtxIdx*4+12) || ((u32)start >= (u32)g_main_cp_state.matrix_index_a.Tex1MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_a.Tex1MtxIdx*4+12) ||
((u32)start >= (u32)MatrixIndexA.Tex2MtxIdx*4 && (u32)start < (u32)MatrixIndexA.Tex2MtxIdx*4+12) || ((u32)start >= (u32)g_main_cp_state.matrix_index_a.Tex2MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_a.Tex2MtxIdx*4+12) ||
((u32)start >= (u32)MatrixIndexA.Tex3MtxIdx*4 && (u32)start < (u32)MatrixIndexA.Tex3MtxIdx*4+12)) ((u32)start >= (u32)g_main_cp_state.matrix_index_a.Tex3MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_a.Tex3MtxIdx*4+12))
{ {
bTexMatricesChanged[0] = true; bTexMatricesChanged[0] = true;
} }
if (((u32)start >= (u32)MatrixIndexB.Tex4MtxIdx*4 && (u32)start < (u32)MatrixIndexB.Tex4MtxIdx*4+12) || if (((u32)start >= (u32)g_main_cp_state.matrix_index_b.Tex4MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_b.Tex4MtxIdx*4+12) ||
((u32)start >= (u32)MatrixIndexB.Tex5MtxIdx*4 && (u32)start < (u32)MatrixIndexB.Tex5MtxIdx*4+12) || ((u32)start >= (u32)g_main_cp_state.matrix_index_b.Tex5MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_b.Tex5MtxIdx*4+12) ||
((u32)start >= (u32)MatrixIndexB.Tex6MtxIdx*4 && (u32)start < (u32)MatrixIndexB.Tex6MtxIdx*4+12) || ((u32)start >= (u32)g_main_cp_state.matrix_index_b.Tex6MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_b.Tex6MtxIdx*4+12) ||
((u32)start >= (u32)MatrixIndexB.Tex7MtxIdx*4 && (u32)start < (u32)MatrixIndexB.Tex7MtxIdx*4+12)) ((u32)start >= (u32)g_main_cp_state.matrix_index_b.Tex7MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_b.Tex7MtxIdx*4+12))
{ {
bTexMatricesChanged[1] = true; bTexMatricesChanged[1] = true;
} }
@ -628,23 +628,23 @@ void VertexShaderManager::InvalidateXFRange(int start, int end)
void VertexShaderManager::SetTexMatrixChangedA(u32 Value) void VertexShaderManager::SetTexMatrixChangedA(u32 Value)
{ {
if (MatrixIndexA.Hex != Value) if (g_main_cp_state.matrix_index_a.Hex != Value)
{ {
VertexManager::Flush(); VertexManager::Flush();
if (MatrixIndexA.PosNormalMtxIdx != (Value&0x3f)) if (g_main_cp_state.matrix_index_a.PosNormalMtxIdx != (Value&0x3f))
bPosNormalMatrixChanged = true; bPosNormalMatrixChanged = true;
bTexMatricesChanged[0] = true; bTexMatricesChanged[0] = true;
MatrixIndexA.Hex = Value; g_main_cp_state.matrix_index_a.Hex = Value;
} }
} }
void VertexShaderManager::SetTexMatrixChangedB(u32 Value) void VertexShaderManager::SetTexMatrixChangedB(u32 Value)
{ {
if (MatrixIndexB.Hex != Value) if (g_main_cp_state.matrix_index_b.Hex != Value)
{ {
VertexManager::Flush(); VertexManager::Flush();
bTexMatricesChanged[1] = true; bTexMatricesChanged[1] = true;
MatrixIndexB.Hex = Value; g_main_cp_state.matrix_index_b.Hex = Value;
} }
} }

View File

@ -116,6 +116,8 @@ public:
virtual void DoState(PointerWrap &p) = 0; virtual void DoState(PointerWrap &p) = 0;
virtual void CheckInvalidState() = 0; virtual void CheckInvalidState() = 0;
virtual void UpdateWantDeterminism(bool want) {}
}; };
extern std::vector<VideoBackend*> g_available_video_backends; extern std::vector<VideoBackend*> g_available_video_backends;
@ -151,6 +153,8 @@ class VideoBackendHardware : public VideoBackend
void PauseAndLock(bool doLock, bool unpauseOnUnlock=true) override; void PauseAndLock(bool doLock, bool unpauseOnUnlock=true) override;
void DoState(PointerWrap &p) override; void DoState(PointerWrap &p) override;
void UpdateWantDeterminism(bool want) override;
bool m_invalid; bool m_invalid;
public: public:

View File

@ -22,13 +22,7 @@ static void DoState(PointerWrap &p)
p.DoMarker("BP Memory"); p.DoMarker("BP Memory");
// CP Memory // CP Memory
p.DoArray(arraybases, 16); DoCPState(p);
p.DoArray(arraystrides, 16);
p.Do(MatrixIndexA);
p.Do(MatrixIndexB);
p.Do(g_VtxDesc.Hex);
p.DoArray(g_VtxAttr, 8);
p.DoMarker("CP Memory");
// XF Memory // XF Memory
p.Do(xfmem); p.Do(xfmem);
@ -73,11 +67,7 @@ void VideoCommon_RunLoop(bool enable)
void VideoCommon_Init() void VideoCommon_Init()
{ {
memset(arraybases, 0, sizeof(arraybases)); memset(&g_main_cp_state, 0, sizeof(g_main_cp_state));
memset(arraystrides, 0, sizeof(arraystrides)); memset(&g_preprocess_cp_state, 0, sizeof(g_preprocess_cp_state));
memset(&MatrixIndexA, 0, sizeof(MatrixIndexA));
memset(&MatrixIndexB, 0, sizeof(MatrixIndexB));
memset(&g_VtxDesc, 0, sizeof(g_VtxDesc));
memset(g_VtxAttr, 0, sizeof(g_VtxAttr));
memset(texMem, 0, TMEM_SIZE); memset(texMem, 0, TMEM_SIZE);
} }

View File

@ -275,3 +275,4 @@ extern XFMemory xfmem;
void LoadXFReg(u32 transferSize, u32 address); void LoadXFReg(u32 transferSize, u32 address);
void LoadIndexedXF(u32 val, int array); void LoadIndexedXF(u32 val, int array);
void PreprocessIndexedXF(u32 val, int refarray);

View File

@ -6,6 +6,7 @@
#include "Core/HW/Memmap.h" #include "Core/HW/Memmap.h"
#include "VideoCommon/CPMemory.h" #include "VideoCommon/CPMemory.h"
#include "VideoCommon/DataReader.h" #include "VideoCommon/DataReader.h"
#include "VideoCommon/Fifo.h"
#include "VideoCommon/PixelShaderManager.h" #include "VideoCommon/PixelShaderManager.h"
#include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VertexManagerBase.h"
#include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VertexShaderManager.h"
@ -252,7 +253,15 @@ void LoadIndexedXF(u32 val, int refarray)
//load stuff from array to address in xf mem //load stuff from array to address in xf mem
u32* currData = (u32*)(&xfmem) + address; u32* currData = (u32*)(&xfmem) + address;
u32* newData = (u32*)Memory::GetPointer(arraybases[refarray] + arraystrides[refarray] * index); u32* newData;
if (g_use_deterministic_gpu_thread)
{
newData = (u32*)PopFifoAuxBuffer(size * sizeof(u32));
}
else
{
newData = (u32*)Memory::GetPointer(g_main_cp_state.array_bases[refarray] + g_main_cp_state.array_strides[refarray] * index);
}
bool changed = false; bool changed = false;
for (int i = 0; i < size; ++i) for (int i = 0; i < size; ++i)
{ {
@ -269,3 +278,14 @@ void LoadIndexedXF(u32 val, int refarray)
currData[i] = Common::swap32(newData[i]); currData[i] = Common::swap32(newData[i]);
} }
} }
void PreprocessIndexedXF(u32 val, int refarray)
{
int index = val >> 16;
int size = ((val >> 12) & 0xF) + 1;
u32* new_data = (u32*)Memory::GetPointer(g_preprocess_cp_state.array_bases[refarray] + g_preprocess_cp_state.array_strides[refarray] * index);
size_t buf_size = size * sizeof(u32);
PushFifoAuxBuffer(new_data, buf_size);
}

View File

@ -74,7 +74,7 @@ protected:
void ResetPointers() void ResetPointers()
{ {
g_pVideoData = &input_memory[0]; g_video_buffer_read_ptr = &input_memory[0];
VertexManager::s_pCurBufferPointer = &output_memory[0]; VertexManager::s_pCurBufferPointer = &output_memory[0];
m_input_pos = m_output_pos = 0; m_input_pos = m_output_pos = 0;
} }