Merge pull request #885 from comex/gpu-determinism

GPU determinism (apparently it is ready for merge)
This commit is contained in:
comex 2014-09-28 21:59:27 -04:00
commit fbabc03b3f
44 changed files with 911 additions and 419 deletions

View File

@ -55,10 +55,24 @@ struct ConfigCache
unsigned int framelimit, frameSkip;
TEXIDevices m_EXIDevice[MAX_EXI_CHANNELS];
std::string strBackend, sBackend;
std::string m_strGPUDeterminismMode;
bool bSetFramelimit, bSetEXIDevice[MAX_EXI_CHANNELS], bSetVolume, bSetPads[MAX_SI_CHANNELS], bSetWiimoteSource[MAX_BBMOTES], bSetFrameSkip;
};
static ConfigCache config_cache;
static GPUDeterminismMode ParseGPUDeterminismMode(const std::string& mode)
{
if (mode == "auto")
return GPU_DETERMINISM_AUTO;
if (mode == "none")
return GPU_DETERMINISM_NONE;
if (mode == "fake-completion")
return GPU_DETERMINISM_FAKE_COMPLETION;
NOTICE_LOG(BOOT, "Unknown GPU determinism mode %s", mode.c_str());
return GPU_DETERMINISM_AUTO;
}
// Boot the ISO or file
bool BootCore(const std::string& _rFilename)
{
@ -109,6 +123,7 @@ bool BootCore(const std::string& _rFilename)
config_cache.bMergeBlocks = StartUp.bMergeBlocks;
config_cache.bDSPHLE = StartUp.bDSPHLE;
config_cache.strBackend = StartUp.m_strVideoBackend;
config_cache.m_strGPUDeterminismMode = StartUp.m_strGPUDeterminismMode;
config_cache.m_EnableJIT = SConfig::GetInstance().m_DSPEnableJIT;
config_cache.bDSPThread = StartUp.bDSPThread;
config_cache.Volume = SConfig::GetInstance().m_Volume;
@ -168,6 +183,8 @@ bool BootCore(const std::string& _rFilename)
dsp_section->Get("EnableJIT", &SConfig::GetInstance().m_DSPEnableJIT, SConfig::GetInstance().m_DSPEnableJIT);
dsp_section->Get("Backend", &SConfig::GetInstance().sBackend, SConfig::GetInstance().sBackend);
VideoBackend::ActivateBackend(StartUp.m_strVideoBackend);
core_section->Get("GPUDeterminismMode", &StartUp.m_strGPUDeterminismMode, StartUp.m_strGPUDeterminismMode);
StartUp.m_GPUDeterminismMode = ParseGPUDeterminismMode(StartUp.m_strGPUDeterminismMode);
for (unsigned int i = 0; i < MAX_SI_CHANNELS; ++i)
{
@ -277,6 +294,7 @@ void Stop()
StartUp.bDSPHLE = config_cache.bDSPHLE;
StartUp.bDSPThread = config_cache.bDSPThread;
StartUp.m_strVideoBackend = config_cache.strBackend;
StartUp.m_strGPUDeterminismMode = config_cache.m_strGPUDeterminismMode;
VideoBackend::ActivateBackend(StartUp.m_strVideoBackend);
StartUp.bHLE_BS2 = config_cache.bHLE_BS2;
SConfig::GetInstance().sBackend = config_cache.sBackend;

View File

@ -317,6 +317,7 @@ void SConfig::SaveCoreSettings(IniFile& ini)
core->Set("FrameLimit", m_Framelimit);
core->Set("FrameSkip", m_FrameSkip);
core->Set("GFXBackend", m_LocalCoreStartupParameter.m_strVideoBackend);
core->Set("GPUDeterminismMode", m_LocalCoreStartupParameter.m_strGPUDeterminismMode);
}
void SConfig::SaveMovieSettings(IniFile& ini)
@ -542,6 +543,7 @@ void SConfig::LoadCoreSettings(IniFile& ini)
core->Get("FrameLimit", &m_Framelimit, 1); // auto frame limit by default
core->Get("FrameSkip", &m_FrameSkip, 0);
core->Get("GFXBackend", &m_LocalCoreStartupParameter.m_strVideoBackend, "");
core->Get("GPUDeterminismMode", &m_LocalCoreStartupParameter.m_strGPUDeterminismMode, "auto");
}
void SConfig::LoadMovieSettings(IniFile& ini)

View File

@ -48,6 +48,7 @@
#include "Core/HW/VideoInterface.h"
#include "Core/HW/Wiimote.h"
#include "Core/IPC_HLE/WII_IPC_HLE_Device_usb.h"
#include "Core/IPC_HLE/WII_Socket.h"
#include "Core/PowerPC/PowerPC.h"
#ifdef USE_GDBSTUB
@ -65,6 +66,8 @@ bool g_aspect_wide;
namespace Core
{
bool g_want_determinism;
// Declarations and definitions
static Common::Timer s_timer;
static volatile u32 s_drawn_frame = 0;
@ -177,6 +180,8 @@ bool Init()
s_emu_thread.join();
}
Core::UpdateWantDeterminism(/*initial*/ true);
INFO_LOG(OSREPORT, "Starting core = %s mode",
_CoreParameter.bWii ? "Wii" : "GameCube");
INFO_LOG(OSREPORT, "CPU Thread separate = %s",
@ -564,6 +569,9 @@ void RequestRefreshInfo()
bool PauseAndLock(bool doLock, bool unpauseOnUnlock)
{
if (!IsRunning())
return true;
// let's support recursive locking to simplify things on the caller's side,
// and let's do it at this outer level in case the individual systems don't support it.
if (doLock ? s_pause_and_lock_depth++ : --s_pause_and_lock_depth)
@ -702,4 +710,27 @@ void SetOnStoppedCallback(StoppedCallbackFunc callback)
s_on_stopped_callback = callback;
}
void UpdateWantDeterminism(bool initial)
{
// For now, this value is not itself configurable. Instead, individual
// settings that depend on it, such as GPU determinism mode. should have
// override options for testing,
bool new_want_determinism =
Movie::IsPlayingInput() ||
Movie::IsRecordingInput() ||
NetPlay::IsNetPlayRunning();
if (new_want_determinism != g_want_determinism || initial)
{
WARN_LOG(COMMON, "Want determinism <- %s", new_want_determinism ? "true" : "false");
bool was_unpaused = Core::PauseAndLock(true);
g_want_determinism = new_want_determinism;
WiiSockMan::GetInstance().UpdateWantDeterminism(new_want_determinism);
g_video_backend->UpdateWantDeterminism(new_want_determinism);
Core::PauseAndLock(false, was_unpaused);
}
}
} // Core

View File

@ -23,6 +23,8 @@ extern bool g_aspect_wide;
namespace Core
{
extern bool g_want_determinism;
bool GetIsFramelimiterTempDisabled();
void SetIsFramelimiterTempDisabled(bool disable);
@ -79,4 +81,7 @@ bool PauseAndLock(bool doLock, bool unpauseOnUnlock=true);
typedef void(*StoppedCallbackFunc)(void);
void SetOnStoppedCallback(StoppedCallbackFunc callback);
// Run on the GUI thread when the factors change.
void UpdateWantDeterminism(bool initial = false);
} // namespace

View File

@ -97,6 +97,15 @@ enum Hotkey
NUM_HOTKEYS,
};
enum GPUDeterminismMode
{
GPU_DETERMINISM_AUTO,
GPU_DETERMINISM_NONE,
// This is currently the only mode. There will probably be at least
// one more at some point.
GPU_DETERMINISM_FAKE_COMPLETION,
};
struct SCoreStartupParameter
{
// Settings
@ -200,6 +209,10 @@ struct SCoreStartupParameter
EBootType m_BootType;
std::string m_strVideoBackend;
std::string m_strGPUDeterminismMode;
// set based on the string version
GPUDeterminismMode m_GPUDeterminismMode;
// files
std::string m_strFilename;

View File

@ -331,7 +331,7 @@ bool Wiimote::Step()
m_rumble->controls[0]->control_ref->State(m_rumble_on);
// when a movie is active, this button status update is disabled (moved), because movies only record data reports.
if (!(Movie::IsMovieActive()) || NetPlay::IsNetPlayRunning())
if (!Core::g_want_determinism)
{
UpdateButtonsStatus();
}
@ -385,7 +385,7 @@ void Wiimote::UpdateButtonsStatus()
void Wiimote::GetCoreData(u8* const data)
{
// when a movie is active, the button update happens here instead of Wiimote::Step, to avoid potential desync issues.
if (Movie::IsMovieActive() || NetPlay::IsNetPlayRunning())
if (Core::g_want_determinism)
{
UpdateButtonsStatus();
}

View File

@ -4,8 +4,7 @@
#include <algorithm>
#include "Core/Movie.h"
#include "Core/NetPlayProto.h"
#include "Core/Core.h"
#include "Core/IPC_HLE/WII_IPC_HLE.h"
#include "Core/IPC_HLE/WII_IPC_HLE_Device.h"
#include "Core/IPC_HLE/WII_Socket.h" // No Wii socket support while using NetPlay or TAS
@ -559,9 +558,7 @@ void WiiSockMan::AddSocket(s32 fd)
s32 WiiSockMan::NewSocket(s32 af, s32 type, s32 protocol)
{
if (NetPlay::IsNetPlayRunning() ||
Movie::IsRecordingInput() ||
Movie::IsPlayingInput())
if (Core::g_want_determinism)
{
return SO_ENOMEM;
}
@ -664,5 +661,12 @@ void WiiSockMan::Convert(sockaddr_in const & from, WiiSockAddrIn& to, s32 addrle
to.len = addrlen;
}
void WiiSockMan::UpdateWantDeterminism(bool want)
{
// If we switched into movie recording, kill existing sockets.
if (want)
Clean();
}
#undef ERRORCODE
#undef EITHER

View File

@ -242,6 +242,8 @@ public:
}
}
void UpdateWantDeterminism(bool want);
private:
WiiSockMan() = default;

View File

@ -437,6 +437,8 @@ bool BeginRecordingInput(int controllers)
if (s_playMode != MODE_NONE || controllers == 0)
return false;
bool was_unpaused = Core::PauseAndLock(true);
s_numPads = controllers;
g_currentFrame = g_totalFrames = 0;
g_currentLagCount = s_totalLagCount = 0;
@ -487,6 +489,10 @@ bool BeginRecordingInput(int controllers)
s_currentByte = s_totalBytes = 0;
Core::UpdateWantDeterminism();
Core::PauseAndLock(false, was_unpaused);
Core::DisplayMessage("Starting movie recording", 2000);
return true;
}
@ -764,6 +770,8 @@ bool PlayInput(const std::string& filename)
s_playMode = MODE_PLAYING;
Core::UpdateWantDeterminism();
s_totalBytes = g_recordfd.GetSize() - 256;
EnsureTmpInputSize((size_t)s_totalBytes);
g_recordfd.ReadArray(tmpInput, (size_t)s_totalBytes);
@ -1097,6 +1105,7 @@ void EndPlayInput(bool cont)
s_rerecords = 0;
s_currentByte = 0;
s_playMode = MODE_NONE;
Core::UpdateWantDeterminism();
Core::DisplayMessage("Movie End.", 2000);
s_bRecordingFromSaveState = false;
// we don't clear these things because otherwise we can't resume playback if we load a movie state later

View File

@ -13,46 +13,46 @@ void SWLoadCPReg(u32 sub_cmd, u32 value)
switch (sub_cmd & 0xF0)
{
case 0x30:
MatrixIndexA.Hex = value;
g_main_cp_state.matrix_index_a.Hex = value;
break;
case 0x40:
MatrixIndexB.Hex = value;
g_main_cp_state.matrix_index_b.Hex = value;
break;
case 0x50:
g_VtxDesc.Hex &= ~0x1FFFF; // keep the Upper bits
g_VtxDesc.Hex |= value;
g_main_cp_state.vtx_desc.Hex &= ~0x1FFFF; // keep the Upper bits
g_main_cp_state.vtx_desc.Hex |= value;
break;
case 0x60:
g_VtxDesc.Hex &= 0x1FFFF; // keep the lower 17Bits
g_VtxDesc.Hex |= (u64)value << 17;
g_main_cp_state.vtx_desc.Hex &= 0x1FFFF; // keep the lower 17Bits
g_main_cp_state.vtx_desc.Hex |= (u64)value << 17;
break;
case 0x70:
_assert_((sub_cmd & 0x0F) < 8);
g_VtxAttr[sub_cmd & 7].g0.Hex = value;
g_main_cp_state.vtx_attr[sub_cmd & 7].g0.Hex = value;
break;
case 0x80:
_assert_((sub_cmd & 0x0F) < 8);
g_VtxAttr[sub_cmd & 7].g1.Hex = value;
g_main_cp_state.vtx_attr[sub_cmd & 7].g1.Hex = value;
break;
case 0x90:
_assert_((sub_cmd & 0x0F) < 8);
g_VtxAttr[sub_cmd & 7].g2.Hex = value;
g_main_cp_state.vtx_attr[sub_cmd & 7].g2.Hex = value;
break;
// Pointers to vertex arrays in GC RAM
case 0xA0:
arraybases[sub_cmd & 0xF] = value;
g_main_cp_state.array_bases[sub_cmd & 0xF] = value;
cached_arraybases[sub_cmd & 0xF] = Memory::GetPointer(value);
break;
case 0xB0:
arraystrides[sub_cmd & 0xF] = value & 0xFF;
g_main_cp_state.array_strides[sub_cmd & 0xF] = value & 0xFF;
break;
}
}

View File

@ -57,7 +57,7 @@ static void DecodePrimitiveStream(u32 iBufferSize)
{
while (streamSize > 0 && iBufferSize >= vertexSize)
{
g_pVideoData += vertexSize;
g_video_buffer_read_ptr += vertexSize;
iBufferSize -= vertexSize;
streamSize--;
}
@ -94,26 +94,26 @@ static void ReadXFData(u32 iBufferSize)
static void ExecuteDisplayList(u32 addr, u32 count)
{
u8 *videoDataSave = g_pVideoData;
u8 *videoDataSave = g_video_buffer_read_ptr;
u8 *dlStart = Memory::GetPointer(addr);
g_pVideoData = dlStart;
g_video_buffer_read_ptr = dlStart;
while (OpcodeDecoder::CommandRunnable(count))
{
OpcodeDecoder::Run(count);
// if data was read by the opcode decoder then the video data pointer changed
u32 readCount = (u32)(g_pVideoData - dlStart);
dlStart = g_pVideoData;
u32 readCount = (u32)(g_video_buffer_read_ptr - dlStart);
dlStart = g_video_buffer_read_ptr;
_assert_msg_(VIDEO, count >= readCount, "Display list underrun");
count -= readCount;
}
g_pVideoData = videoDataSave;
g_video_buffer_read_ptr = videoDataSave;
}
static void DecodeStandard(u32 bufferSize)

View File

@ -57,7 +57,7 @@ void DoState(PointerWrap &p)
p.Do(interruptWaiting);
// Is this right?
p.DoArray(g_pVideoData,writePos);
p.DoArray(g_video_buffer_read_ptr,writePos);
}
static void UpdateInterrupts_Wrapper(u64 userdata, int cyclesLate)
@ -95,7 +95,7 @@ void Init()
interruptSet = false;
interruptWaiting = false;
g_pVideoData = nullptr;
g_video_buffer_read_ptr = nullptr;
g_bSkipCurrentFrame = false;
}
@ -311,7 +311,7 @@ bool RunBuffer()
_dbg_assert_(COMMANDPROCESSOR, writePos >= readPos);
g_pVideoData = &commandBuffer[readPos];
g_video_buffer_read_ptr = &commandBuffer[readPos];
u32 availableBytes = writePos - readPos;
@ -322,7 +322,7 @@ bool RunBuffer()
OpcodeDecoder::Run(availableBytes);
// if data was read by the opcode decoder then the video data pointer changed
readPos = (u32)(g_pVideoData - &commandBuffer[0]);
readPos = (u32)(g_video_buffer_read_ptr - &commandBuffer[0]);
_dbg_assert_(VIDEO, writePos >= readPos);
availableBytes = writePos - readPos;
}

View File

@ -39,7 +39,7 @@ SWVertexLoader::~SWVertexLoader()
void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
{
m_CurrentVat = &g_VtxAttr[attributeIndex];
m_CurrentVat = &g_main_cp_state.vtx_attr[attributeIndex];
posScale = 1.0f / float(1 << m_CurrentVat->g0.PosFrac);
tcScale[0] = 1.0f / float(1 << m_CurrentVat->g0.Tex0Frac);
@ -53,20 +53,20 @@ void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
//TexMtx
const u64 tmDesc[8] = {
g_VtxDesc.Tex0MatIdx, g_VtxDesc.Tex1MatIdx, g_VtxDesc.Tex2MatIdx, g_VtxDesc.Tex3MatIdx,
g_VtxDesc.Tex4MatIdx, g_VtxDesc.Tex5MatIdx, g_VtxDesc.Tex6MatIdx, g_VtxDesc.Tex7MatIdx
g_main_cp_state.vtx_desc.Tex0MatIdx, g_main_cp_state.vtx_desc.Tex1MatIdx, g_main_cp_state.vtx_desc.Tex2MatIdx, g_main_cp_state.vtx_desc.Tex3MatIdx,
g_main_cp_state.vtx_desc.Tex4MatIdx, g_main_cp_state.vtx_desc.Tex5MatIdx, g_main_cp_state.vtx_desc.Tex6MatIdx, g_main_cp_state.vtx_desc.Tex7MatIdx
};
// Colors
const u64 colDesc[2] = {g_VtxDesc.Color0, g_VtxDesc.Color1};
const u64 colDesc[2] = {g_main_cp_state.vtx_desc.Color0, g_main_cp_state.vtx_desc.Color1};
colElements[0] = m_CurrentVat->g0.Color0Elements;
colElements[1] = m_CurrentVat->g0.Color1Elements;
const u32 colComp[2] = {m_CurrentVat->g0.Color0Comp, m_CurrentVat->g0.Color1Comp};
// TextureCoord
const u64 tcDesc[8] = {
g_VtxDesc.Tex0Coord, g_VtxDesc.Tex1Coord, g_VtxDesc.Tex2Coord, g_VtxDesc.Tex3Coord,
g_VtxDesc.Tex4Coord, g_VtxDesc.Tex5Coord, g_VtxDesc.Tex6Coord, g_VtxDesc.Tex7Coord
g_main_cp_state.vtx_desc.Tex0Coord, g_main_cp_state.vtx_desc.Tex1Coord, g_main_cp_state.vtx_desc.Tex2Coord, g_main_cp_state.vtx_desc.Tex3Coord,
g_main_cp_state.vtx_desc.Tex4Coord, g_main_cp_state.vtx_desc.Tex5Coord, g_main_cp_state.vtx_desc.Tex6Coord, g_main_cp_state.vtx_desc.Tex7Coord
};
const u32 tcElements[8] = {
m_CurrentVat->g0.Tex0CoordElements, m_CurrentVat->g1.Tex1CoordElements, m_CurrentVat->g1.Tex2CoordElements,
@ -89,15 +89,15 @@ void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
// Reset vertex
// matrix index from xf regs or cp memory?
if (xfmem.MatrixIndexA.PosNormalMtxIdx != MatrixIndexA.PosNormalMtxIdx ||
xfmem.MatrixIndexA.Tex0MtxIdx != MatrixIndexA.Tex0MtxIdx ||
xfmem.MatrixIndexA.Tex1MtxIdx != MatrixIndexA.Tex1MtxIdx ||
xfmem.MatrixIndexA.Tex2MtxIdx != MatrixIndexA.Tex2MtxIdx ||
xfmem.MatrixIndexA.Tex3MtxIdx != MatrixIndexA.Tex3MtxIdx ||
xfmem.MatrixIndexB.Tex4MtxIdx != MatrixIndexB.Tex4MtxIdx ||
xfmem.MatrixIndexB.Tex5MtxIdx != MatrixIndexB.Tex5MtxIdx ||
xfmem.MatrixIndexB.Tex6MtxIdx != MatrixIndexB.Tex6MtxIdx ||
xfmem.MatrixIndexB.Tex7MtxIdx != MatrixIndexB.Tex7MtxIdx)
if (xfmem.MatrixIndexA.PosNormalMtxIdx != g_main_cp_state.matrix_index_a.PosNormalMtxIdx ||
xfmem.MatrixIndexA.Tex0MtxIdx != g_main_cp_state.matrix_index_a.Tex0MtxIdx ||
xfmem.MatrixIndexA.Tex1MtxIdx != g_main_cp_state.matrix_index_a.Tex1MtxIdx ||
xfmem.MatrixIndexA.Tex2MtxIdx != g_main_cp_state.matrix_index_a.Tex2MtxIdx ||
xfmem.MatrixIndexA.Tex3MtxIdx != g_main_cp_state.matrix_index_a.Tex3MtxIdx ||
xfmem.MatrixIndexB.Tex4MtxIdx != g_main_cp_state.matrix_index_b.Tex4MtxIdx ||
xfmem.MatrixIndexB.Tex5MtxIdx != g_main_cp_state.matrix_index_b.Tex5MtxIdx ||
xfmem.MatrixIndexB.Tex6MtxIdx != g_main_cp_state.matrix_index_b.Tex6MtxIdx ||
xfmem.MatrixIndexB.Tex7MtxIdx != g_main_cp_state.matrix_index_b.Tex7MtxIdx)
{
WARN_LOG(VIDEO, "Matrix indices don't match");
@ -118,18 +118,18 @@ void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
m_Vertex.texMtx[6] = xfmem.MatrixIndexB.Tex6MtxIdx;
m_Vertex.texMtx[7] = xfmem.MatrixIndexB.Tex7MtxIdx;
#else
m_Vertex.posMtx = MatrixIndexA.PosNormalMtxIdx;
m_Vertex.texMtx[0] = MatrixIndexA.Tex0MtxIdx;
m_Vertex.texMtx[1] = MatrixIndexA.Tex1MtxIdx;
m_Vertex.texMtx[2] = MatrixIndexA.Tex2MtxIdx;
m_Vertex.texMtx[3] = MatrixIndexA.Tex3MtxIdx;
m_Vertex.texMtx[4] = MatrixIndexB.Tex4MtxIdx;
m_Vertex.texMtx[5] = MatrixIndexB.Tex5MtxIdx;
m_Vertex.texMtx[6] = MatrixIndexB.Tex6MtxIdx;
m_Vertex.texMtx[7] = MatrixIndexB.Tex7MtxIdx;
m_Vertex.posMtx = g_main_cp_state.matrix_index_a.PosNormalMtxIdx;
m_Vertex.texMtx[0] = g_main_cp_state.matrix_index_a.Tex0MtxIdx;
m_Vertex.texMtx[1] = g_main_cp_state.matrix_index_a.Tex1MtxIdx;
m_Vertex.texMtx[2] = g_main_cp_state.matrix_index_a.Tex2MtxIdx;
m_Vertex.texMtx[3] = g_main_cp_state.matrix_index_a.Tex3MtxIdx;
m_Vertex.texMtx[4] = g_main_cp_state.matrix_index_b.Tex4MtxIdx;
m_Vertex.texMtx[5] = g_main_cp_state.matrix_index_b.Tex5MtxIdx;
m_Vertex.texMtx[6] = g_main_cp_state.matrix_index_b.Tex6MtxIdx;
m_Vertex.texMtx[7] = g_main_cp_state.matrix_index_b.Tex7MtxIdx;
#endif
if (g_VtxDesc.PosMatIdx != NOT_PRESENT)
if (g_main_cp_state.vtx_desc.PosMatIdx != NOT_PRESENT)
{
AddAttributeLoader(LoadPosMtx);
m_VertexSize++;
@ -145,17 +145,17 @@ void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
}
// Write vertex position loader
m_positionLoader = VertexLoader_Position::GetFunction(g_VtxDesc.Position, m_CurrentVat->g0.PosFormat, m_CurrentVat->g0.PosElements);
m_VertexSize += VertexLoader_Position::GetSize(g_VtxDesc.Position, m_CurrentVat->g0.PosFormat, m_CurrentVat->g0.PosElements);
m_positionLoader = VertexLoader_Position::GetFunction(g_main_cp_state.vtx_desc.Position, m_CurrentVat->g0.PosFormat, m_CurrentVat->g0.PosElements);
m_VertexSize += VertexLoader_Position::GetSize(g_main_cp_state.vtx_desc.Position, m_CurrentVat->g0.PosFormat, m_CurrentVat->g0.PosElements);
AddAttributeLoader(LoadPosition);
// Normals
if (g_VtxDesc.Normal != NOT_PRESENT)
if (g_main_cp_state.vtx_desc.Normal != NOT_PRESENT)
{
m_VertexSize += VertexLoader_Normal::GetSize(g_VtxDesc.Normal,
m_VertexSize += VertexLoader_Normal::GetSize(g_main_cp_state.vtx_desc.Normal,
m_CurrentVat->g0.NormalFormat, m_CurrentVat->g0.NormalElements, m_CurrentVat->g0.NormalIndex3);
m_normalLoader = VertexLoader_Normal::GetFunction(g_VtxDesc.Normal,
m_normalLoader = VertexLoader_Normal::GetFunction(g_main_cp_state.vtx_desc.Normal,
m_CurrentVat->g0.NormalFormat, m_CurrentVat->g0.NormalElements, m_CurrentVat->g0.NormalIndex3);
if (m_normalLoader == nullptr)
@ -234,8 +234,8 @@ void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
// special case if only pos and tex coord 0 and tex coord input is AB11
m_TexGenSpecialCase =
((g_VtxDesc.Hex & 0x60600L) == g_VtxDesc.Hex) && // only pos and tex coord 0
(g_VtxDesc.Tex0Coord != NOT_PRESENT) &&
((g_main_cp_state.vtx_desc.Hex & 0x60600L) == g_main_cp_state.vtx_desc.Hex) && // only pos and tex coord 0
(g_main_cp_state.vtx_desc.Tex0Coord != NOT_PRESENT) &&
(xfmem.texMtxInfo[0].projection == XF_TEXPROJ_ST);
m_SetupUnit->Init(primitiveType);
@ -252,7 +252,7 @@ void SWVertexLoader::LoadVertex()
// transform input data
TransformUnit::TransformPosition(&m_Vertex, outVertex);
if (g_VtxDesc.Normal != NOT_PRESENT)
if (g_main_cp_state.vtx_desc.Normal != NOT_PRESENT)
{
TransformUnit::TransformNormal(&m_Vertex, m_CurrentVat->g0.NormalElements, outVertex);
}

View File

@ -116,14 +116,7 @@ void VideoSoftware::DoState(PointerWrap& p)
p.DoPOD(swstats);
// CP Memory
p.DoArray(arraybases, 16);
p.DoArray(arraystrides, 16);
p.Do(MatrixIndexA);
p.Do(MatrixIndexB);
p.Do(g_VtxDesc.Hex);
p.DoArray(g_VtxAttr, 8);
p.DoMarker("CP Memory");
DoCPState(p);
}
void VideoSoftware::CheckInvalidState()

View File

@ -74,7 +74,7 @@ void SWLoadIndexedXF(u32 val, int array)
int size = ((val >> 12) & 0xF) + 1;
//load stuff from array to address in xf mem
u32 *pData = (u32*)Memory::GetPointer(arraybases[array] + arraystrides[array]*index);
u32 *pData = (u32*)Memory::GetPointer(g_main_cp_state.array_bases[array] + g_main_cp_state.array_strides[array]*index);
// byteswap data
u32 buffer[16];

View File

@ -1085,5 +1085,6 @@ struct BPMemory
extern BPMemory bpmem;
void LoadBPReg(u32 value0);
void LoadBPRegPreprocess(u32 value0);
void GetBPRegInfo(const u8* data, std::string* name, std::string* desc);

View File

@ -173,7 +173,8 @@ static void BPWritten(const BPCmd& bp)
switch (bp.newvalue & 0xFF)
{
case 0x02:
PixelEngine::SetFinish(); // may generate interrupt
if (!g_use_deterministic_gpu_thread)
PixelEngine::SetFinish(); // may generate interrupt
DEBUG_LOG(VIDEO, "GXSetDrawDone SetPEFinish (value: 0x%02X)", (bp.newvalue & 0xFFFF));
return;
@ -183,11 +184,13 @@ static void BPWritten(const BPCmd& bp)
}
return;
case BPMEM_PE_TOKEN_ID: // Pixel Engine Token ID
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), false);
if (!g_use_deterministic_gpu_thread)
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), false);
DEBUG_LOG(VIDEO, "SetPEToken 0x%04x", (bp.newvalue & 0xFFFF));
return;
case BPMEM_PE_TOKEN_INT_ID: // Pixel Engine Interrupt Token ID
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), true);
if (!g_use_deterministic_gpu_thread)
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), true);
DEBUG_LOG(VIDEO, "SetPEToken + INT 0x%04x", (bp.newvalue & 0xFFFF));
return;
@ -685,6 +688,26 @@ void LoadBPReg(u32 value0)
BPWritten(bp);
}
void LoadBPRegPreprocess(u32 value0)
{
int regNum = value0 >> 24;
// masking could hypothetically be a problem
u32 newval = value0 & 0xffffff;
switch (regNum)
{
case BPMEM_SETDRAWDONE:
if ((newval & 0xff) == 0x02)
PixelEngine::SetFinish();
break;
case BPMEM_PE_TOKEN_ID:
PixelEngine::SetToken(newval & 0xffff, false);
break;
case BPMEM_PE_TOKEN_INT_ID: // Pixel Engine Interrupt Token ID
PixelEngine::SetToken(newval & 0xffff, true);
break;
}
}
void GetBPRegInfo(const u8* data, std::string* name, std::string* desc)
{
const char* no_yes[2] = { "No", "Yes" };

View File

@ -7,5 +7,4 @@
#include "VideoCommon/BPMemory.h"
void BPInit();
void LoadBPReg(u32 value0);
void BPReload();

View File

@ -2,17 +2,32 @@
// Licensed under GPLv2
// Refer to the license.txt file included.
#include "Common/ChunkFile.h"
#include "Common/CommonTypes.h"
#include "VideoCommon/CPMemory.h"
// CP state
u8 *cached_arraybases[16];
// STATE_TO_SAVE
u32 arraybases[16];
u32 arraystrides[16];
TMatrixIndexA MatrixIndexA;
TMatrixIndexB MatrixIndexB;
TVtxDesc g_VtxDesc;
// Most games only use the first VtxAttr and simply reconfigure it all the time as needed.
VAT g_VtxAttr[8];
CPState g_main_cp_state;
CPState g_preprocess_cp_state;
void DoCPState(PointerWrap& p)
{
// We don't save g_preprocess_cp_state separately because the GPU should be
// synced around state save/load.
p.DoArray(g_main_cp_state.array_bases, 16);
p.DoArray(g_main_cp_state.array_strides, 16);
p.Do(g_main_cp_state.matrix_index_a);
p.Do(g_main_cp_state.matrix_index_b);
p.Do(g_main_cp_state.vtx_desc.Hex);
p.DoArray(g_main_cp_state.vtx_attr, 8);
p.DoMarker("CP Memory");
if (p.mode == PointerWrap::MODE_READ)
CopyPreprocessCPStateFromMain();
}
void CopyPreprocessCPStateFromMain()
{
memcpy(&g_preprocess_cp_state, &g_main_cp_state, sizeof(CPState));
}

View File

@ -231,12 +231,6 @@ union TMatrixIndexB
#pragma pack()
extern u32 arraybases[16];
extern u8 *cached_arraybases[16];
extern u32 arraystrides[16];
extern TMatrixIndexA MatrixIndexA;
extern TMatrixIndexB MatrixIndexB;
struct VAT
{
UVAT_group0 g0;
@ -244,11 +238,37 @@ struct VAT
UVAT_group2 g2;
};
extern TVtxDesc g_VtxDesc;
extern VAT g_VtxAttr[8];
class VertexLoader;
// STATE_TO_SAVE
struct CPState final
{
u32 array_bases[16];
u32 array_strides[16];
TMatrixIndexA matrix_index_a;
TMatrixIndexB matrix_index_b;
TVtxDesc vtx_desc;
// Most games only use the first VtxAttr and simply reconfigure it all the time as needed.
VAT vtx_attr[8];
// Attributes that actually belong to VertexLoaderManager:
int attr_dirty; // bitfield
VertexLoader* vertex_loaders[8];
};
class PointerWrap;
extern void DoCPState(PointerWrap& p);
extern void CopyPreprocessCPStateFromMain();
extern CPState g_main_cp_state;
extern CPState g_preprocess_cp_state;
extern u8 *cached_arraybases[16];
// Might move this into its own file later.
void LoadCPReg(u32 SubCmd, u32 Value);
void LoadCPReg(u32 SubCmd, u32 Value, bool is_preprocess = false);
// Fills memory with data from CP regs
void FillCPMemoryArray(u32 *memory);

View File

@ -77,7 +77,7 @@ void DoState(PointerWrap &p)
p.Do(interruptFinishWaiting);
}
UNUSED static inline void WriteLow(volatile u32& _reg, u16 lowbits)
static inline void WriteLow(volatile u32& _reg, u16 lowbits)
{
Common::AtomicStore(_reg, (_reg & 0xFFFF0000) | lowbits);
}
@ -159,9 +159,8 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
{ FIFO_WRITE_POINTER_LO, MMIO::Utils::LowPart(&fifo.CPWritePointer), false, true },
{ FIFO_WRITE_POINTER_HI, MMIO::Utils::HighPart(&fifo.CPWritePointer) },
// FIFO_READ_POINTER has different code for single/dual core.
{ FIFO_BP_LO, MMIO::Utils::LowPart(&fifo.CPBreakpoint), false, true },
{ FIFO_BP_HI, MMIO::Utils::HighPart(&fifo.CPBreakpoint) },
};
for (auto& mapped_var : directly_mapped_vars)
{
u16 wmask = mapped_var.writes_align_to_32_bytes ? 0xFFE0 : 0xFFFF;
@ -173,6 +172,19 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
);
}
mmio->Register(base | FIFO_BP_LO,
MMIO::DirectRead<u16>(MMIO::Utils::LowPart(&fifo.CPBreakpoint)),
MMIO::ComplexWrite<u16>([](u32, u16 val) {
WriteLow(fifo.CPBreakpoint, val & 0xffe0);
})
);
mmio->Register(base | FIFO_BP_HI,
MMIO::DirectRead<u16>(MMIO::Utils::HighPart(&fifo.CPBreakpoint)),
MMIO::ComplexWrite<u16>([](u32, u16 val) {
WriteHigh(fifo.CPBreakpoint, val);
})
);
// Timing and metrics MMIOs are stubbed with fixed values.
struct {
u32 addr;
@ -216,8 +228,7 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
UCPCtrlReg tmp(val);
m_CPCtrlReg.Hex = tmp.Hex;
SetCpControlRegister();
if (!IsOnThread())
RunGpu();
RunGpu();
})
);
@ -227,8 +238,7 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
UCPClearReg tmp(val);
m_CPClearReg.Hex = tmp.Hex;
SetCpClearRegister();
if (!IsOnThread())
RunGpu();
RunGpu();
})
);
@ -260,6 +270,7 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
: MMIO::DirectRead<u16>(MMIO::Utils::HighPart(&fifo.CPReadWriteDistance)),
MMIO::ComplexWrite<u16>([](u32, u16 val) {
WriteHigh(fifo.CPReadWriteDistance, val);
SyncGPU(SYNC_GPU_OTHER);
if (fifo.CPReadWriteDistance == 0)
{
GPFifo::ResetGatherPipe();
@ -269,8 +280,7 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
{
ResetVideoBuffer();
}
if (!IsOnThread())
RunGpu();
RunGpu();
})
);
mmio->Register(base | FIFO_READ_POINTER_LO,
@ -298,11 +308,7 @@ void STACKALIGN GatherPipeBursted()
// if we aren't linked, we don't care about gather pipe data
if (!m_CPCtrlReg.GPLinkEnable)
{
if (!IsOnThread())
{
RunGpu();
}
else
if (IsOnThread() && !g_use_deterministic_gpu_thread)
{
// In multibuffer mode is not allowed write in the same FIFO attached to the GPU.
// Fix Pokemon XD in DC mode.
@ -313,6 +319,10 @@ void STACKALIGN GatherPipeBursted()
ProcessFifoAllDistance();
}
}
else
{
RunGpu();
}
return;
}
@ -327,8 +337,7 @@ void STACKALIGN GatherPipeBursted()
Common::AtomicAdd(fifo.CPReadWriteDistance, GATHER_PIPE_SIZE);
if (!IsOnThread())
RunGpu();
RunGpu();
_assert_msg_(COMMANDPROCESSOR, fifo.CPReadWriteDistance <= fifo.CPEnd - fifo.CPBase,
"FIFO is overflowed by GatherPipe !\nCPU thread is too fast!");
@ -358,7 +367,8 @@ void UpdateInterrupts(u64 userdata)
void UpdateInterruptsFromVideoBackend(u64 userdata)
{
CoreTiming::ScheduleEvent_Threadsafe(0, et_UpdateInterrupts, userdata);
if (!g_use_deterministic_gpu_thread)
CoreTiming::ScheduleEvent_Threadsafe(0, et_UpdateInterrupts, userdata);
}
void SetCPStatusFromGPU()

View File

@ -16,6 +16,7 @@ namespace CommandProcessor
{
extern SCPFifoStruct fifo; //This one is shared between gfx thread and emulator thread.
extern volatile bool isPossibleWaitingSetDrawDone; //This one is used for sync gfx thread and emulator thread.
extern volatile bool interruptSet;
extern volatile bool interruptWaiting;

View File

@ -6,7 +6,7 @@
#include "VideoCommon/VertexManagerBase.h"
extern u8* g_pVideoData;
extern u8* g_video_buffer_read_ptr;
#if _M_SSE >= 0x301 && !(defined __GNUC__ && !defined __SSSE3__)
#include <tmmintrin.h>
@ -14,20 +14,20 @@ extern u8* g_pVideoData;
__forceinline void DataSkip(u32 skip)
{
g_pVideoData += skip;
g_video_buffer_read_ptr += skip;
}
// probably unnecessary
template <int count>
__forceinline void DataSkip()
{
g_pVideoData += count;
g_video_buffer_read_ptr += count;
}
template <typename T>
__forceinline T DataPeek(int _uOffset)
__forceinline T DataPeek(int _uOffset, u8** bufp = &g_video_buffer_read_ptr)
{
auto const result = Common::FromBigEndian(*reinterpret_cast<T*>(g_pVideoData + _uOffset));
auto const result = Common::FromBigEndian(*reinterpret_cast<T*>(*bufp + _uOffset));
return result;
}
@ -48,18 +48,18 @@ __forceinline u32 DataPeek32(int _uOffset)
}
template <typename T>
__forceinline T DataRead()
__forceinline T DataRead(u8** bufp = &g_video_buffer_read_ptr)
{
auto const result = DataPeek<T>(0);
DataSkip<sizeof(T)>();
auto const result = DataPeek<T>(0, bufp);
*bufp += sizeof(T);
return result;
}
class DataReader
{
public:
inline DataReader() : buffer(g_pVideoData), offset(0) {}
inline ~DataReader() { g_pVideoData += offset; }
inline DataReader() : buffer(g_video_buffer_read_ptr), offset(0) {}
inline ~DataReader() { g_video_buffer_read_ptr += offset; }
template <typename T> inline T Read()
{
const T result = Common::FromBigEndian(*(T*)(buffer + offset));
@ -94,14 +94,14 @@ __forceinline u32 DataReadU32()
__forceinline u32 DataReadU32Unswapped()
{
u32 tmp = *(u32*)g_pVideoData;
g_pVideoData += 4;
u32 tmp = *(u32*)g_video_buffer_read_ptr;
g_video_buffer_read_ptr += 4;
return tmp;
}
__forceinline u8* DataGetPosition()
{
return g_pVideoData;
return g_video_buffer_read_ptr;
}
template <typename T>

View File

@ -11,32 +11,63 @@
#include "Core/ConfigManager.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/NetPlayProto.h"
#include "Core/HW/Memmap.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/Fifo.h"
#include "VideoCommon/OpcodeDecoding.h"
#include "VideoCommon/PixelEngine.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VideoConfig.h"
bool g_bSkipCurrentFrame = false;
namespace
{
static volatile bool GpuRunningState = false;
static volatile bool EmuRunningState = false;
static std::mutex m_csHWVidOccupied;
// Most of this array is unlikely to be faulted in...
static u8 s_fifo_aux_data[FIFO_SIZE];
static u8* s_fifo_aux_write_ptr;
static u8* s_fifo_aux_read_ptr;
bool g_use_deterministic_gpu_thread;
// STATE_TO_SAVE
static u8 *videoBuffer;
static int size = 0;
} // namespace
static std::mutex s_video_buffer_lock;
static std::condition_variable s_video_buffer_cond;
static u8* s_video_buffer;
u8* g_video_buffer_read_ptr;
static std::atomic<u8*> s_video_buffer_write_ptr;
static std::atomic<u8*> s_video_buffer_seen_ptr;
u8* g_video_buffer_pp_read_ptr;
// The read_ptr is always owned by the GPU thread. In normal mode, so is the
// write_ptr, despite it being atomic. In g_use_deterministic_gpu_thread mode,
// things get a bit more complicated:
// - The seen_ptr is written by the GPU thread, and points to what it's already
// processed as much of as possible - in the case of a partial command which
// caused it to stop, not the same as the read ptr. It's written by the GPU,
// under the lock, and updating the cond.
// - The write_ptr is written by the CPU thread after it copies data from the
// FIFO. Maybe someday it will be under the lock. For now, because RunGpuLoop
// polls, it's just atomic.
// - The pp_read_ptr is the CPU preprocessing version of the read_ptr.
void Fifo_DoState(PointerWrap &p)
{
p.DoArray(videoBuffer, FIFO_SIZE);
p.Do(size);
p.DoPointer(g_pVideoData, videoBuffer);
p.DoArray(s_video_buffer, FIFO_SIZE);
u8* write_ptr = s_video_buffer_write_ptr;
p.DoPointer(write_ptr, s_video_buffer);
s_video_buffer_write_ptr = write_ptr;
p.DoPointer(g_video_buffer_read_ptr, s_video_buffer);
if (p.mode == PointerWrap::MODE_READ && g_use_deterministic_gpu_thread)
{
// We're good and paused, right?
s_video_buffer_seen_ptr = g_video_buffer_pp_read_ptr = g_video_buffer_read_ptr;
}
p.Do(g_bSkipCurrentFrame);
}
@ -44,6 +75,7 @@ void Fifo_PauseAndLock(bool doLock, bool unpauseOnUnlock)
{
if (doLock)
{
SyncGPU(SYNC_GPU_OTHER);
EmulatorState(false);
if (!Core::IsGPUThread())
m_csHWVidOccupied.lock();
@ -61,8 +93,8 @@ void Fifo_PauseAndLock(bool doLock, bool unpauseOnUnlock)
void Fifo_Init()
{
videoBuffer = (u8*)AllocateMemoryPages(FIFO_SIZE);
size = 0;
s_video_buffer = (u8*)AllocateMemoryPages(FIFO_SIZE);
ResetVideoBuffer();
GpuRunningState = false;
Common::AtomicStore(CommandProcessor::VITicks, CommandProcessor::m_cpClockOrigin);
}
@ -70,18 +102,24 @@ void Fifo_Init()
void Fifo_Shutdown()
{
if (GpuRunningState) PanicAlert("Fifo shutting down while active");
FreeMemoryPages(videoBuffer, FIFO_SIZE);
videoBuffer = nullptr;
FreeMemoryPages(s_video_buffer, FIFO_SIZE);
s_video_buffer = nullptr;
s_video_buffer_write_ptr = nullptr;
g_video_buffer_pp_read_ptr = nullptr;
g_video_buffer_read_ptr = nullptr;
s_video_buffer_seen_ptr = nullptr;
s_fifo_aux_write_ptr = nullptr;
s_fifo_aux_read_ptr = nullptr;
}
u8* GetVideoBufferStartPtr()
{
return videoBuffer;
return s_video_buffer;
}
u8* GetVideoBufferEndPtr()
{
return &videoBuffer[size];
return s_video_buffer_write_ptr;
}
void Fifo_SetRendering(bool enabled)
@ -107,30 +145,123 @@ void EmulatorState(bool running)
EmuRunningState = running;
}
void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr)
{
if (g_use_deterministic_gpu_thread && GpuRunningState)
{
std::unique_lock<std::mutex> lk(s_video_buffer_lock);
u8* write_ptr = s_video_buffer_write_ptr;
s_video_buffer_cond.wait(lk, [&]() {
return !GpuRunningState || s_video_buffer_seen_ptr == write_ptr;
});
if (!GpuRunningState)
return;
// Opportunistically reset FIFOs so we don't wrap around.
if (may_move_read_ptr && s_fifo_aux_write_ptr != s_fifo_aux_read_ptr)
PanicAlert("aux fifo not synced (%p, %p)", s_fifo_aux_write_ptr, s_fifo_aux_read_ptr);
memmove(s_fifo_aux_data, s_fifo_aux_read_ptr, s_fifo_aux_write_ptr - s_fifo_aux_read_ptr);
s_fifo_aux_write_ptr -= (s_fifo_aux_read_ptr - s_fifo_aux_data);
s_fifo_aux_read_ptr = s_fifo_aux_data;
if (may_move_read_ptr)
{
// what's left over in the buffer
size_t size = write_ptr - g_video_buffer_pp_read_ptr;
memmove(s_video_buffer, g_video_buffer_pp_read_ptr, size);
// This change always decreases the pointers. We write seen_ptr
// after write_ptr here, and read it before in RunGpuLoop, so
// 'write_ptr > seen_ptr' there cannot become spuriously true.
s_video_buffer_write_ptr = write_ptr = s_video_buffer + size;
g_video_buffer_pp_read_ptr = s_video_buffer;
g_video_buffer_read_ptr = s_video_buffer;
s_video_buffer_seen_ptr = write_ptr;
}
}
}
void PushFifoAuxBuffer(void* ptr, size_t size)
{
if (size > (size_t) (s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr))
{
SyncGPU(SYNC_GPU_AUX_SPACE, /* may_move_read_ptr */ false);
if (size > (size_t) (s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr))
{
// That will sync us up to the last 32 bytes, so this short region
// of FIFO would have to point to a 2MB display list or something.
PanicAlert("absurdly large aux buffer");
return;
}
}
memcpy(s_fifo_aux_write_ptr, ptr, size);
s_fifo_aux_write_ptr += size;
}
void* PopFifoAuxBuffer(size_t size)
{
void* ret = s_fifo_aux_read_ptr;
s_fifo_aux_read_ptr += size;
return ret;
}
// Description: RunGpuLoop() sends data through this function.
void ReadDataFromFifo(u8* _uData, u32 len)
static void ReadDataFromFifo(u8* _uData, u32 len)
{
if (size + len >= FIFO_SIZE)
if (len > (s_video_buffer + FIFO_SIZE - s_video_buffer_write_ptr))
{
int pos = (int)(g_pVideoData - videoBuffer);
size -= pos;
if (size + len > FIFO_SIZE)
size_t size = s_video_buffer_write_ptr - g_video_buffer_read_ptr;
if (len > FIFO_SIZE - size)
{
PanicAlert("FIFO out of bounds (size = %i, len = %i at %08x)", size, len, pos);
PanicAlert("FIFO out of bounds (existing %lu + new %lu > %lu)", (unsigned long) size, (unsigned long) len, (unsigned long) FIFO_SIZE);
return;
}
memmove(&videoBuffer[0], &videoBuffer[pos], size);
g_pVideoData = videoBuffer;
memmove(s_video_buffer, g_video_buffer_read_ptr, size);
s_video_buffer_write_ptr = s_video_buffer + size;
g_video_buffer_read_ptr = s_video_buffer;
}
// Copy new video instructions to videoBuffer for future use in rendering the new picture
memcpy(videoBuffer + size, _uData, len);
size += len;
// Copy new video instructions to s_video_buffer for future use in rendering the new picture
memcpy(s_video_buffer_write_ptr, _uData, len);
s_video_buffer_write_ptr += len;
}
// The deterministic_gpu_thread version.
static void ReadDataFromFifoOnCPU(u8* _uData, u32 len)
{
u8 *write_ptr = s_video_buffer_write_ptr;
if (len > (s_video_buffer + FIFO_SIZE - write_ptr))
{
// We can't wrap around while the GPU is working on the data.
// This should be very rare due to the reset in SyncGPU.
SyncGPU(SYNC_GPU_WRAPAROUND);
if (g_video_buffer_pp_read_ptr != g_video_buffer_read_ptr)
{
PanicAlert("desynced read pointers");
return;
}
write_ptr = s_video_buffer_write_ptr;
size_t size = write_ptr - g_video_buffer_pp_read_ptr;
if (len > FIFO_SIZE - size)
{
PanicAlert("FIFO out of bounds (existing %lu + new %lu > %lu)", (unsigned long) size, (unsigned long) len, (unsigned long) FIFO_SIZE);
return;
}
}
memcpy(write_ptr, _uData, len);
OpcodeDecoder_Preprocess(write_ptr + len);
// This would have to be locked if the GPU thread didn't spin.
s_video_buffer_write_ptr = write_ptr + len;
}
void ResetVideoBuffer()
{
g_pVideoData = videoBuffer;
size = 0;
g_video_buffer_read_ptr = s_video_buffer;
s_video_buffer_write_ptr = s_video_buffer;
s_video_buffer_seen_ptr = s_video_buffer;
g_video_buffer_pp_read_ptr = s_video_buffer;
s_fifo_aux_write_ptr = s_fifo_aux_data;
s_fifo_aux_read_ptr = s_fifo_aux_data;
}
@ -148,53 +279,75 @@ void RunGpuLoop()
g_video_backend->PeekMessages();
VideoFifo_CheckAsyncRequest();
CommandProcessor::SetCPStatusFromGPU();
Common::AtomicStore(CommandProcessor::VITicks, CommandProcessor::m_cpClockOrigin);
// check if we are able to run this buffer
while (GpuRunningState && EmuRunningState && !CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
if (g_use_deterministic_gpu_thread)
{
fifo.isGpuReadingData = true;
CommandProcessor::isPossibleWaitingSetDrawDone = fifo.bFF_GPLinkEnable ? true : false;
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU || Common::AtomicLoad(CommandProcessor::VITicks) > CommandProcessor::m_cpClockOrigin)
// All the fifo/CP stuff is on the CPU. We just need to run the opcode decoder.
u8* seen_ptr = s_video_buffer_seen_ptr;
u8* write_ptr = s_video_buffer_write_ptr;
// See comment in SyncGPU
if (write_ptr > seen_ptr)
{
u32 readPtr = fifo.CPReadPointer;
u8 *uData = Memory::GetPointer(readPtr);
OpcodeDecoder_Run(write_ptr);
if (readPtr == fifo.CPEnd)
readPtr = fifo.CPBase;
else
readPtr += 32;
_assert_msg_(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0 ,
"Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce instability in the game. Please report it.", fifo.CPReadWriteDistance - 32);
ReadDataFromFifo(uData, 32);
cyclesExecuted = OpcodeDecoder_Run(GetVideoBufferEndPtr());
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU && Common::AtomicLoad(CommandProcessor::VITicks) >= cyclesExecuted)
Common::AtomicAdd(CommandProcessor::VITicks, -(s32)cyclesExecuted);
Common::AtomicStore(fifo.CPReadPointer, readPtr);
Common::AtomicAdd(fifo.CPReadWriteDistance, -32);
if ((GetVideoBufferEndPtr() - g_pVideoData) == 0)
Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer);
{
std::lock_guard<std::mutex> vblk(s_video_buffer_lock);
s_video_buffer_seen_ptr = write_ptr;
s_video_buffer_cond.notify_all();
}
}
}
else
{
CommandProcessor::SetCPStatusFromGPU();
// This call is pretty important in DualCore mode and must be called in the FIFO Loop.
// If we don't, s_swapRequested or s_efbAccessRequested won't be set to false
// leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing things down.
VideoFifo_CheckAsyncRequest();
CommandProcessor::isPossibleWaitingSetDrawDone = false;
}
Common::AtomicStore(CommandProcessor::VITicks, CommandProcessor::m_cpClockOrigin);
fifo.isGpuReadingData = false;
// check if we are able to run this buffer
while (GpuRunningState && EmuRunningState && !CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
{
fifo.isGpuReadingData = true;
CommandProcessor::isPossibleWaitingSetDrawDone = fifo.bFF_GPLinkEnable ? true : false;
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU || Common::AtomicLoad(CommandProcessor::VITicks) > CommandProcessor::m_cpClockOrigin)
{
u32 readPtr = fifo.CPReadPointer;
u8 *uData = Memory::GetPointer(readPtr);
if (readPtr == fifo.CPEnd)
readPtr = fifo.CPBase;
else
readPtr += 32;
_assert_msg_(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0 ,
"Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce instability in the game. Please report it.", fifo.CPReadWriteDistance - 32);
ReadDataFromFifo(uData, 32);
u8* write_ptr = s_video_buffer_write_ptr;
cyclesExecuted = OpcodeDecoder_Run(write_ptr);
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU && Common::AtomicLoad(CommandProcessor::VITicks) >= cyclesExecuted)
Common::AtomicAdd(CommandProcessor::VITicks, -(s32)cyclesExecuted);
Common::AtomicStore(fifo.CPReadPointer, readPtr);
Common::AtomicAdd(fifo.CPReadWriteDistance, -32);
if ((write_ptr - g_video_buffer_read_ptr) == 0)
Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer);
}
CommandProcessor::SetCPStatusFromGPU();
// This call is pretty important in DualCore mode and must be called in the FIFO Loop.
// If we don't, s_swapRequested or s_efbAccessRequested won't be set to false
// leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing things down.
VideoFifo_CheckAsyncRequest();
CommandProcessor::isPossibleWaitingSetDrawDone = false;
}
fifo.isGpuReadingData = false;
}
if (EmuRunningState)
{
@ -217,6 +370,8 @@ void RunGpuLoop()
}
}
}
// wake up SyncGPU if we were interrupted
s_video_buffer_cond.notify_all();
}
@ -228,16 +383,27 @@ bool AtBreakpoint()
void RunGpu()
{
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread &&
!g_use_deterministic_gpu_thread)
return;
SCPFifoStruct &fifo = CommandProcessor::fifo;
while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() )
{
u8 *uData = Memory::GetPointer(fifo.CPReadPointer);
FPURoundMode::SaveSIMDState();
FPURoundMode::LoadDefaultSIMDState();
ReadDataFromFifo(uData, 32);
OpcodeDecoder_Run(GetVideoBufferEndPtr());
FPURoundMode::LoadSIMDState();
if (g_use_deterministic_gpu_thread)
{
ReadDataFromFifoOnCPU(uData, 32);
}
else
{
FPURoundMode::SaveSIMDState();
FPURoundMode::LoadDefaultSIMDState();
ReadDataFromFifo(uData, 32);
OpcodeDecoder_Run(s_video_buffer_write_ptr);
FPURoundMode::LoadSIMDState();
}
//DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base");
@ -250,3 +416,45 @@ void RunGpu()
}
CommandProcessor::SetCPStatusFromGPU();
}
void Fifo_UpdateWantDeterminism(bool want)
{
// We are paused (or not running at all yet) and have m_csHWVidOccupied, so
// it should be safe to change this.
const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter;
bool gpu_thread;
switch (param.m_GPUDeterminismMode)
{
case GPU_DETERMINISM_AUTO:
gpu_thread = want;
// Hack: For now movies are an exception to this being on (but not
// to wanting determinism in general). Once vertex arrays are
// fixed, there should be no reason to want this off for movies by
// default, so this can be removed.
if (!NetPlay::IsNetPlayRunning())
gpu_thread = false;
break;
case GPU_DETERMINISM_NONE:
gpu_thread = false;
break;
case GPU_DETERMINISM_FAKE_COMPLETION:
gpu_thread = true;
break;
}
gpu_thread = gpu_thread && SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread;
if (g_use_deterministic_gpu_thread != gpu_thread)
{
g_use_deterministic_gpu_thread = gpu_thread;
if (gpu_thread)
{
// These haven't been updated in non-deterministic mode.
s_video_buffer_seen_ptr = g_video_buffer_pp_read_ptr = g_video_buffer_read_ptr;
CopyPreprocessCPStateFromMain();
VertexLoaderManager::MarkAllDirty();
}
}
}

View File

@ -13,6 +13,11 @@ class PointerWrap;
extern bool g_bSkipCurrentFrame;
// This could be in SCoreStartupParameter, but it depends on multiple settings
// and can change at runtime.
extern bool g_use_deterministic_gpu_thread;
extern std::atomic<u8*> g_video_buffer_write_ptr_xthread;
extern u8* g_video_buffer_pp_read_ptr;
void Fifo_Init();
void Fifo_Shutdown();
@ -22,8 +27,23 @@ u8* GetVideoBufferEndPtr();
void Fifo_DoState(PointerWrap &f);
void Fifo_PauseAndLock(bool doLock, bool unpauseOnUnlock);
void Fifo_UpdateWantDeterminism(bool want);
void ReadDataFromFifo(u8* _uData, u32 len);
// Used for diagnostics.
enum SyncGPUReason {
SYNC_GPU_NONE,
SYNC_GPU_OTHER,
SYNC_GPU_WRAPAROUND,
SYNC_GPU_EFB_POKE,
SYNC_GPU_PERFQUERY,
SYNC_GPU_SWAP,
SYNC_GPU_AUX_SPACE,
};
// In g_use_deterministic_gpu_thread mode, waits for the GPU to be done with pending work.
void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr = true);
void PushFifoAuxBuffer(void* ptr, size_t size);
void* PopFifoAuxBuffer(size_t size);
void RunGpu();
void RunGpuLoop();

View File

@ -118,6 +118,7 @@ void VideoBackendHardware::Video_EndField()
{
if (s_BackendInitialized)
{
SyncGPU(SYNC_GPU_SWAP);
s_swapRequested.Set();
}
}
@ -153,6 +154,8 @@ u32 VideoBackendHardware::Video_AccessEFB(EFBAccessType type, u32 x, u32 y, u32
{
if (s_BackendInitialized && g_ActiveConfig.bEFBAccessEnable)
{
SyncGPU(SYNC_GPU_EFB_POKE);
s_accessEFBArgs.type = type;
s_accessEFBArgs.x = x;
s_accessEFBArgs.y = y;
@ -194,6 +197,8 @@ u32 VideoBackendHardware::Video_GetQueryResult(PerfQueryType type)
return 0;
}
SyncGPU(SYNC_GPU_PERFQUERY);
// TODO: Is this check sane?
if (!g_perf_query->IsFlushed())
{
@ -304,3 +309,8 @@ void VideoBackendHardware::RegisterCPMMIO(MMIO::Mapping* mmio, u32 base)
CommandProcessor::RegisterMMIO(mmio, base);
}
void VideoBackendHardware::UpdateWantDeterminism(bool want)
{
Fifo_UpdateWantDeterminism(want);
}

View File

@ -4,7 +4,7 @@
#pragma once
#include "Common/CommonTypes.h"
#include "Common/Hash.h"
// m_components
enum
@ -87,6 +87,20 @@ struct PortableVertexDeclaration
}
};
namespace std
{
template <>
struct hash<PortableVertexDeclaration>
{
size_t operator()(const PortableVertexDeclaration& decl) const
{
return HashFletcher((u8 *) &decl, sizeof(decl));
}
};
}
// The implementation of this class is specific for GL/DX, so NativeVertexFormat.cpp
// is in the respective backend, not here in VideoCommon.

View File

@ -24,6 +24,7 @@
#include "VideoCommon/DataReader.h"
#include "VideoCommon/Fifo.h"
#include "VideoCommon/OpcodeDecoding.h"
#include "VideoCommon/PixelEngine.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VideoCommon.h"
@ -31,25 +32,29 @@
#include "VideoCommon/XFMemory.h"
u8* g_pVideoData = nullptr;
bool g_bRecordFifoData = false;
static u32 InterpretDisplayList(u32 address, u32 size)
{
u8* old_pVideoData = g_pVideoData;
u8* startAddress = Memory::GetPointer(address);
u8* old_pVideoData = g_video_buffer_read_ptr;
u8* startAddress;
if (g_use_deterministic_gpu_thread)
startAddress = (u8*) PopFifoAuxBuffer(size);
else
startAddress = Memory::GetPointer(address);
u32 cycles = 0;
// Avoid the crash if Memory::GetPointer failed ..
if (startAddress != nullptr)
{
g_pVideoData = startAddress;
g_video_buffer_read_ptr = startAddress;
// temporarily swap dl and non-dl (small "hack" for the stats)
Statistics::SwapDL();
u8 *end = g_pVideoData + size;
u8 *end = g_video_buffer_read_ptr + size;
cycles = OpcodeDecoder_Run(end);
INCSTAT(stats.thisFrame.numDListsCalled);
@ -58,16 +63,34 @@ static u32 InterpretDisplayList(u32 address, u32 size)
}
// reset to the old pointer
g_pVideoData = old_pVideoData;
g_video_buffer_read_ptr = old_pVideoData;
return cycles;
}
static void InterpretDisplayListPreprocess(u32 address, u32 size)
{
u8* old_read_ptr = g_video_buffer_pp_read_ptr;
u8* startAddress = Memory::GetPointer(address);
PushFifoAuxBuffer(startAddress, size);
if (startAddress != nullptr)
{
g_video_buffer_pp_read_ptr = startAddress;
u8 *end = startAddress + size;
OpcodeDecoder_Preprocess(end);
}
g_video_buffer_pp_read_ptr = old_read_ptr;
}
static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess)
{
// TODO(Omega): Maybe dump FIFO to file on this error
std::string temp = StringFromFormat(
"GFX FIFO: Unknown Opcode (0x%x @ %p).\n"
"GFX FIFO: Unknown Opcode (0x%x @ %p, preprocessing=%s).\n"
"This means one of the following:\n"
"* The emulated GPU got desynced, disabling dual core can help\n"
"* Command stream corrupted by some spurious memory bug\n"
@ -75,7 +98,8 @@ static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess)
"* Some other sort of bug\n\n"
"Dolphin will now likely crash or hang. Enjoy." ,
cmd_byte,
buffer);
buffer,
preprocess ? "yes" : "no");
Host_SysMessage(temp.c_str());
INFO_LOG(VIDEO, "%s", temp.c_str());
{
@ -105,14 +129,16 @@ static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess)
}
}
template <bool is_preprocess, u8** bufp>
static u32 Decode(u8* end)
{
u8 *opcodeStart = g_pVideoData;
if (g_pVideoData == end)
u8 *opcodeStart = *bufp;
if (*bufp == end)
return 0;
u8 cmd_byte = DataReadU8();
u8 cmd_byte = DataRead<u8>(bufp);
u32 cycles;
int refarray;
switch (cmd_byte)
{
case GX_NOP:
@ -121,64 +147,72 @@ static u32 Decode(u8* end)
case GX_LOAD_CP_REG: //0x08
{
if (end - g_pVideoData < 1 + 4)
if (end - *bufp < 1 + 4)
return 0;
cycles = 12;
u8 sub_cmd = DataReadU8();
u32 value = DataReadU32();
LoadCPReg(sub_cmd, value);
INCSTAT(stats.thisFrame.numCPLoads);
u8 sub_cmd = DataRead<u8>(bufp);
u32 value = DataRead<u32>(bufp);
LoadCPReg(sub_cmd, value, is_preprocess);
if (!is_preprocess)
INCSTAT(stats.thisFrame.numCPLoads);
}
break;
case GX_LOAD_XF_REG:
{
if (end - g_pVideoData < 4)
if (end - *bufp < 4)
return 0;
u32 Cmd2 = DataReadU32();
u32 Cmd2 = DataRead<u32>(bufp);
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
if ((size_t) (end - g_pVideoData) < transfer_size * sizeof(u32))
if ((size_t) (end - *bufp) < transfer_size * sizeof(u32))
return 0;
cycles = 18 + 6 * transfer_size;
u32 xf_address = Cmd2 & 0xFFFF;
LoadXFReg(transfer_size, xf_address);
if (!is_preprocess)
{
u32 xf_address = Cmd2 & 0xFFFF;
LoadXFReg(transfer_size, xf_address);
INCSTAT(stats.thisFrame.numXFLoads);
INCSTAT(stats.thisFrame.numXFLoads);
}
else
{
*bufp += transfer_size * sizeof(u32);
}
}
break;
case GX_LOAD_INDX_A: //used for position matrices
if (end - g_pVideoData < 4)
return 0;
cycles = 6;
LoadIndexedXF(DataReadU32(), 0xC);
break;
refarray = 0xC;
goto load_indx;
case GX_LOAD_INDX_B: //used for normal matrices
if (end - g_pVideoData < 4)
return 0;
cycles = 6;
LoadIndexedXF(DataReadU32(), 0xD);
break;
refarray = 0xD;
goto load_indx;
case GX_LOAD_INDX_C: //used for postmatrices
if (end - g_pVideoData < 4)
return 0;
cycles = 6;
LoadIndexedXF(DataReadU32(), 0xE);
break;
refarray = 0xE;
goto load_indx;
case GX_LOAD_INDX_D: //used for lights
if (end - g_pVideoData < 4)
refarray = 0xF;
goto load_indx;
load_indx:
if (end - *bufp < 4)
return 0;
cycles = 6;
LoadIndexedXF(DataReadU32(), 0xF);
if (is_preprocess)
PreprocessIndexedXF(DataRead<u32>(bufp), refarray);
else
LoadIndexedXF(DataRead<u32>(bufp), refarray);
break;
case GX_CMD_CALL_DL:
{
if (end - g_pVideoData < 8)
if (end - *bufp < 8)
return 0;
u32 address = DataReadU32();
u32 count = DataReadU32();
cycles = 6 + InterpretDisplayList(address, count);
u32 address = DataRead<u32>(bufp);
u32 count = DataRead<u32>(bufp);
if (is_preprocess)
InterpretDisplayListPreprocess(address, count);
else
cycles = 6 + InterpretDisplayList(address, count);
}
break;
@ -196,12 +230,19 @@ static u32 Decode(u8* end)
// In skipped_frame case: We have to let BP writes through because they set
// tokens and stuff. TODO: Call a much simplified LoadBPReg instead.
{
if (end - g_pVideoData < 4)
if (end - *bufp < 4)
return 0;
cycles = 12;
u32 bp_cmd = DataReadU32();
LoadBPReg(bp_cmd);
INCSTAT(stats.thisFrame.numBPLoads);
u32 bp_cmd = DataRead<u32>(bufp);
if (is_preprocess)
{
LoadBPRegPreprocess(bp_cmd);
}
else
{
LoadBPReg(bp_cmd);
INCSTAT(stats.thisFrame.numBPLoads);
}
}
break;
@ -211,38 +252,48 @@ static u32 Decode(u8* end)
{
cycles = 1600;
// load vertices
if (end - g_pVideoData < 2)
if (end - *bufp < 2)
return 0;
u16 numVertices = DataReadU16();
u16 num_vertices = DataRead<u16>(bufp);
if (!VertexLoaderManager::RunVertices(
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
numVertices,
end - g_pVideoData,
g_bSkipCurrentFrame))
if (is_preprocess)
{
return 0;
size_t size = num_vertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK, is_preprocess);
if ((size_t) (end - *bufp) < size)
return 0;
*bufp += size;
}
else
{
if (!VertexLoaderManager::RunVertices(
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
num_vertices,
end - *bufp,
g_bSkipCurrentFrame))
return 0;
}
}
else
{
UnknownOpcode(cmd_byte, opcodeStart, false);
UnknownOpcode(cmd_byte, opcodeStart, is_preprocess);
cycles = 1;
}
break;
}
// Display lists get added directly into the FIFO stream
if (g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL)
FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(g_pVideoData - opcodeStart));
if (!is_preprocess && g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL)
FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(*bufp - opcodeStart));
return cycles;
// In is_preprocess mode, we don't actually care about cycles, at least for
// now... make sure the compiler realizes that.
return is_preprocess ? 1 : cycles;
}
void OpcodeDecoder_Init()
{
g_pVideoData = GetVideoBufferStartPtr();
g_video_buffer_read_ptr = GetVideoBufferStartPtr();
}
@ -255,14 +306,28 @@ u32 OpcodeDecoder_Run(u8* end)
u32 totalCycles = 0;
while (true)
{
u8* old = g_pVideoData;
u32 cycles = Decode(end);
u8* old = g_video_buffer_read_ptr;
u32 cycles = Decode</*is_preprocess*/ false, &g_video_buffer_read_ptr>(end);
if (cycles == 0)
{
g_pVideoData = old;
g_video_buffer_read_ptr = old;
break;
}
totalCycles += cycles;
}
return totalCycles;
}
void OpcodeDecoder_Preprocess(u8 *end)
{
while (true)
{
u8* old = g_video_buffer_pp_read_ptr;
u32 cycles = Decode</*is_preprocess*/ true, &g_video_buffer_pp_read_ptr>(end);
if (cycles == 0)
{
g_video_buffer_pp_read_ptr = old;
break;
}
}
}

View File

@ -39,3 +39,4 @@ extern bool g_bRecordFifoData;
void OpcodeDecoder_Init();
void OpcodeDecoder_Shutdown();
u32 OpcodeDecoder_Run(u8* end);
void OpcodeDecoder_Preprocess(u8* write_ptr);

View File

@ -33,14 +33,11 @@
// Matrix components are first in GC format but later in PC format - we need to store it temporarily
// when decoding each vertex.
static u8 s_curposmtx = MatrixIndexA.PosNormalMtxIdx;
static u8 s_curposmtx = g_main_cp_state.matrix_index_a.PosNormalMtxIdx;
static u8 s_curtexmtx[8];
static int s_texmtxwrite = 0;
static int s_texmtxread = 0;
static int loop_counter;
// Vertex loaders read these. Although the scale ones should be baked into the shader.
int tcIndex;
int colIndex;
@ -90,7 +87,7 @@ static void LOADERDECL PosMtx_Write()
DataWrite<u8>(0);
// Resetting current position matrix to default is needed for bbox to behave
s_curposmtx = (u8) MatrixIndexA.PosNormalMtxIdx;
s_curposmtx = (u8) g_main_cp_state.matrix_index_a.PosNormalMtxIdx;
}
static void LOADERDECL UpdateBoundingBoxPrepare()
@ -548,7 +545,7 @@ VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr)
m_compiledCode = nullptr;
m_numLoadedVertices = 0;
m_VertexSize = 0;
loop_counter = 0;
m_native_vertex_format = nullptr;
VertexLoader_Normal::Init();
VertexLoader_Position::Init();
VertexLoader_TextCoord::Init();
@ -584,8 +581,11 @@ void VertexLoader::CompileVertexTranslator()
PanicAlert("Trying to recompile a vertex translator");
m_compiledCode = GetCodePtr();
// We don't use any callee saved registers or anything but RAX.
ABI_PushRegistersAndAdjustStack(0, 8);
// We only use RAX (caller saved) and RBX (callee saved).
ABI_PushRegistersAndAdjustStack(1 << RBX, 8);
// save count
MOV(64, R(RBX), R(ABI_PARAM1));
// Start loop here
const u8 *loop_start = GetCodePtr();
@ -842,11 +842,10 @@ void VertexLoader::CompileVertexTranslator()
#ifdef USE_VERTEX_LOADER_JIT
// End loop here
MOV(64, R(RAX), Imm64((u64)&loop_counter));
SUB(32, MatR(RAX), Imm8(1));
SUB(64, R(RBX), Imm8(1));
J_CC(CC_NZ, loop_start);
ABI_PopRegistersAndAdjustStack(0, 8);
ABI_PopRegistersAndAdjustStack(1 << RBX, 8);
RET();
#endif
}
@ -912,8 +911,7 @@ void VertexLoader::ConvertVertices ( int count )
#ifdef USE_VERTEX_LOADER_JIT
if (count > 0)
{
loop_counter = count;
((void (*)())(void*)m_compiledCode)();
((void (*)(int))(void*)m_compiledCode)(count);
}
#else
for (int s = 0; s < count; s++)
@ -1035,3 +1033,22 @@ void VertexLoader::AppendToString(std::string *dest) const
}
dest->append(StringFromFormat(" - %i v\n", m_numLoadedVertices));
}
NativeVertexFormat* VertexLoader::GetNativeVertexFormat()
{
if (m_native_vertex_format)
return m_native_vertex_format;
auto& native = s_native_vertex_map[m_native_vtx_decl];
if (!native)
{
auto raw_pointer = g_vertex_manager->CreateNativeVertexFormat();
native = std::unique_ptr<NativeVertexFormat>(raw_pointer);
native->Initialize(m_native_vtx_decl);
native->m_components = m_native_components;
}
m_native_vertex_format = native.get();
return native.get();
}
std::unordered_map<PortableVertexDeclaration, std::unique_ptr<NativeVertexFormat>> VertexLoader::s_native_vertex_map;

View File

@ -8,7 +8,9 @@
// Metroid Prime: P I16-flt N I16-s16 T0 I16-u16 T1 i16-flt
#include <algorithm>
#include <memory>
#include <string>
#include <unordered_map>
#include "Common/CommonTypes.h"
#include "Common/x64Emitter.h"
@ -114,6 +116,9 @@ public:
void AppendToString(std::string *dest) const;
int GetNumLoadedVerts() const { return m_numLoadedVertices; }
NativeVertexFormat* GetNativeVertexFormat();
static void ClearNativeVertexFormatCache() { s_native_vertex_map.clear(); }
private:
int m_VertexSize; // number of bytes of a raw GC vertex. Computed by CompileVertexTranslator.
@ -135,6 +140,9 @@ private:
int m_numLoadedVertices;
NativeVertexFormat* m_native_vertex_format;
static std::unordered_map<PortableVertexDeclaration, std::unique_ptr<NativeVertexFormat>> s_native_vertex_map;
void SetVAT(const VAT& vat);
void CompileVertexTranslator();

View File

@ -4,6 +4,7 @@
#include <algorithm>
#include <memory>
#include <mutex>
#include <unordered_map>
#include <utility>
#include <vector>
@ -20,13 +21,8 @@
#include "VideoCommon/VertexShaderManager.h"
#include "VideoCommon/VideoCommon.h"
static int s_attr_dirty; // bitfield
static NativeVertexFormat* s_current_vtx_fmt;
typedef std::pair<VertexLoader*, NativeVertexFormat*> VertexLoaderCacheItem;
static VertexLoaderCacheItem s_VertexLoaders[8];
namespace std
{
@ -41,35 +37,30 @@ struct hash<VertexLoaderUID>
}
typedef std::unordered_map<VertexLoaderUID, VertexLoaderCacheItem> VertexLoaderMap;
typedef std::map<PortableVertexDeclaration, std::unique_ptr<NativeVertexFormat>> NativeVertexLoaderMap;
typedef std::unordered_map<VertexLoaderUID, std::unique_ptr<VertexLoader>> VertexLoaderMap;
namespace VertexLoaderManager
{
static VertexLoaderMap s_VertexLoaderMap;
static NativeVertexLoaderMap s_native_vertex_map;
static std::mutex s_vertex_loader_map_lock;
static VertexLoaderMap s_vertex_loader_map;
// TODO - change into array of pointers. Keep a map of all seen so far.
void Init()
{
MarkAllDirty();
for (auto& map_entry : s_VertexLoaders)
{
map_entry.first = nullptr;
map_entry.second = nullptr;
}
for (auto& map_entry : g_main_cp_state.vertex_loaders)
map_entry = nullptr;
for (auto& map_entry : g_preprocess_cp_state.vertex_loaders)
map_entry = nullptr;
RecomputeCachedArraybases();
}
void Shutdown()
{
for (auto& map_entry : s_VertexLoaderMap)
{
delete map_entry.second.first;
}
s_VertexLoaderMap.clear();
s_native_vertex_map.clear();
std::lock_guard<std::mutex> lk(s_vertex_loader_map_lock);
s_vertex_loader_map.clear();
VertexLoader::ClearNativeVertexFormatCache();
}
namespace
@ -87,14 +78,15 @@ struct entry
void AppendListToString(std::string *dest)
{
std::lock_guard<std::mutex> lk(s_vertex_loader_map_lock);
std::vector<entry> entries;
size_t total_size = 0;
for (const auto& map_entry : s_VertexLoaderMap)
for (const auto& map_entry : s_vertex_loader_map)
{
entry e;
map_entry.second.first->AppendToString(&e.text);
e.num_verts = map_entry.second.first->GetNumLoadedVerts();
map_entry.second->AppendToString(&e.text);
e.num_verts = map_entry.second->GetNumLoadedVerts();
entries.push_back(e);
total_size += e.text.size() + 1;
}
@ -108,57 +100,46 @@ void AppendListToString(std::string *dest)
void MarkAllDirty()
{
s_attr_dirty = 0xff;
g_main_cp_state.attr_dirty = 0xff;
g_preprocess_cp_state.attr_dirty = 0xff;
}
static NativeVertexFormat* GetNativeVertexFormat(const PortableVertexDeclaration& format,
u32 components)
static VertexLoader* RefreshLoader(int vtx_attr_group, CPState* state)
{
auto& native = s_native_vertex_map[format];
if (!native)
VertexLoader* loader;
if ((state->attr_dirty >> vtx_attr_group) & 1)
{
auto raw_pointer = g_vertex_manager->CreateNativeVertexFormat();
native = std::unique_ptr<NativeVertexFormat>(raw_pointer);
native->Initialize(format);
native->m_components = components;
}
return native.get();
}
static VertexLoaderCacheItem RefreshLoader(int vtx_attr_group)
{
if ((s_attr_dirty >> vtx_attr_group) & 1)
{
VertexLoaderUID uid(g_VtxDesc, g_VtxAttr[vtx_attr_group]);
VertexLoaderMap::iterator iter = s_VertexLoaderMap.find(uid);
if (iter != s_VertexLoaderMap.end())
VertexLoaderUID uid(state->vtx_desc, state->vtx_attr[vtx_attr_group]);
std::lock_guard<std::mutex> lk(s_vertex_loader_map_lock);
VertexLoaderMap::iterator iter = s_vertex_loader_map.find(uid);
if (iter != s_vertex_loader_map.end())
{
s_VertexLoaders[vtx_attr_group] = iter->second;
loader = iter->second.get();
}
else
{
VertexLoader* loader = new VertexLoader(g_VtxDesc, g_VtxAttr[vtx_attr_group]);
NativeVertexFormat* vtx_fmt = GetNativeVertexFormat(
loader->GetNativeVertexDeclaration(),
loader->GetNativeComponents());
s_VertexLoaderMap[uid] = std::make_pair(loader, vtx_fmt);
s_VertexLoaders[vtx_attr_group] = std::make_pair(loader, vtx_fmt);
loader = new VertexLoader(state->vtx_desc, state->vtx_attr[vtx_attr_group]);
s_vertex_loader_map[uid] = std::unique_ptr<VertexLoader>(loader);
INCSTAT(stats.numVertexLoaders);
}
state->vertex_loaders[vtx_attr_group] = loader;
state->attr_dirty &= ~(1 << vtx_attr_group);
} else {
loader = state->vertex_loaders[vtx_attr_group];
}
s_attr_dirty &= ~(1 << vtx_attr_group);
return s_VertexLoaders[vtx_attr_group];
return loader;
}
bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size, bool skip_drawing)
{
if (!count)
return true;
auto loader = RefreshLoader(vtx_attr_group);
size_t size = count * loader.first->GetVertexSize();
CPState* state = &g_main_cp_state;
VertexLoader* loader = RefreshLoader(vtx_attr_group, state);
size_t size = count * loader->GetVertexSize();
if (buf_size < size)
return false;
@ -169,15 +150,17 @@ bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size,
return true;
}
NativeVertexFormat* native = loader->GetNativeVertexFormat();
// If the native vertex format changed, force a flush.
if (loader.second != s_current_vtx_fmt)
if (native != s_current_vtx_fmt)
VertexManager::Flush();
s_current_vtx_fmt = loader.second;
s_current_vtx_fmt = native;
VertexManager::PrepareForAdditionalData(primitive, count,
loader.first->GetNativeVertexDeclaration().stride);
loader->GetNativeVertexDeclaration().stride);
loader.first->RunVertices(g_VtxAttr[vtx_attr_group], primitive, count);
loader->RunVertices(state->vtx_attr[vtx_attr_group], primitive, count);
IndexGenerator::AddIndices(primitive, count);
@ -186,9 +169,9 @@ bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size,
return true;
}
int GetVertexSize(int vtx_attr_group)
int GetVertexSize(int vtx_attr_group, bool preprocess)
{
return RefreshLoader(vtx_attr_group).first->GetVertexSize();
return RefreshLoader(vtx_attr_group, preprocess ? &g_preprocess_cp_state : &g_main_cp_state)->GetVertexSize();
}
NativeVertexFormat* GetCurrentVertexFormat()
@ -198,78 +181,83 @@ NativeVertexFormat* GetCurrentVertexFormat()
} // namespace
void LoadCPReg(u32 sub_cmd, u32 value)
void LoadCPReg(u32 sub_cmd, u32 value, bool is_preprocess)
{
bool update_global_state = !is_preprocess;
CPState* state = is_preprocess ? &g_preprocess_cp_state : &g_main_cp_state;
switch (sub_cmd & 0xF0)
{
case 0x30:
VertexShaderManager::SetTexMatrixChangedA(value);
if (update_global_state)
VertexShaderManager::SetTexMatrixChangedA(value);
break;
case 0x40:
VertexShaderManager::SetTexMatrixChangedB(value);
if (update_global_state)
VertexShaderManager::SetTexMatrixChangedB(value);
break;
case 0x50:
g_VtxDesc.Hex &= ~0x1FFFF; // keep the Upper bits
g_VtxDesc.Hex |= value;
s_attr_dirty = 0xFF;
state->vtx_desc.Hex &= ~0x1FFFF; // keep the Upper bits
state->vtx_desc.Hex |= value;
state->attr_dirty = 0xFF;
break;
case 0x60:
g_VtxDesc.Hex &= 0x1FFFF; // keep the lower 17Bits
g_VtxDesc.Hex |= (u64)value << 17;
s_attr_dirty = 0xFF;
state->vtx_desc.Hex &= 0x1FFFF; // keep the lower 17Bits
state->vtx_desc.Hex |= (u64)value << 17;
state->attr_dirty = 0xFF;
break;
case 0x70:
_assert_((sub_cmd & 0x0F) < 8);
g_VtxAttr[sub_cmd & 7].g0.Hex = value;
s_attr_dirty |= 1 << (sub_cmd & 7);
state->vtx_attr[sub_cmd & 7].g0.Hex = value;
state->attr_dirty |= 1 << (sub_cmd & 7);
break;
case 0x80:
_assert_((sub_cmd & 0x0F) < 8);
g_VtxAttr[sub_cmd & 7].g1.Hex = value;
s_attr_dirty |= 1 << (sub_cmd & 7);
state->vtx_attr[sub_cmd & 7].g1.Hex = value;
state->attr_dirty |= 1 << (sub_cmd & 7);
break;
case 0x90:
_assert_((sub_cmd & 0x0F) < 8);
g_VtxAttr[sub_cmd & 7].g2.Hex = value;
s_attr_dirty |= 1 << (sub_cmd & 7);
state->vtx_attr[sub_cmd & 7].g2.Hex = value;
state->attr_dirty |= 1 << (sub_cmd & 7);
break;
// Pointers to vertex arrays in GC RAM
case 0xA0:
arraybases[sub_cmd & 0xF] = value;
cached_arraybases[sub_cmd & 0xF] = Memory::GetPointer(value);
state->array_bases[sub_cmd & 0xF] = value;
if (update_global_state)
cached_arraybases[sub_cmd & 0xF] = Memory::GetPointer(value);
break;
case 0xB0:
arraystrides[sub_cmd & 0xF] = value & 0xFF;
state->array_strides[sub_cmd & 0xF] = value & 0xFF;
break;
}
}
void FillCPMemoryArray(u32 *memory)
{
memory[0x30] = MatrixIndexA.Hex;
memory[0x40] = MatrixIndexB.Hex;
memory[0x50] = (u32)g_VtxDesc.Hex;
memory[0x60] = (u32)(g_VtxDesc.Hex >> 17);
memory[0x30] = g_main_cp_state.matrix_index_a.Hex;
memory[0x40] = g_main_cp_state.matrix_index_b.Hex;
memory[0x50] = (u32)g_main_cp_state.vtx_desc.Hex;
memory[0x60] = (u32)(g_main_cp_state.vtx_desc.Hex >> 17);
for (int i = 0; i < 8; ++i)
{
memory[0x70 + i] = g_VtxAttr[i].g0.Hex;
memory[0x80 + i] = g_VtxAttr[i].g1.Hex;
memory[0x90 + i] = g_VtxAttr[i].g2.Hex;
memory[0x70 + i] = g_main_cp_state.vtx_attr[i].g0.Hex;
memory[0x80 + i] = g_main_cp_state.vtx_attr[i].g1.Hex;
memory[0x90 + i] = g_main_cp_state.vtx_attr[i].g2.Hex;
}
for (int i = 0; i < 16; ++i)
{
memory[0xA0 + i] = arraybases[i];
memory[0xB0 + i] = arraystrides[i];
memory[0xA0 + i] = g_main_cp_state.array_bases[i];
memory[0xB0 + i] = g_main_cp_state.array_strides[i];
}
}
@ -277,6 +265,6 @@ void RecomputeCachedArraybases()
{
for (int i = 0; i < 16; i++)
{
cached_arraybases[i] = Memory::GetPointer(arraybases[i]);
cached_arraybases[i] = Memory::GetPointer(g_main_cp_state.array_bases[i]);
}
}

View File

@ -16,7 +16,7 @@ namespace VertexLoaderManager
void MarkAllDirty();
int GetVertexSize(int vtx_attr_group);
int GetVertexSize(int vtx_attr_group, bool preprocess);
// Returns false if buf_size is insufficient.
bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size, bool skip_drawing = false);

View File

@ -117,7 +117,7 @@ template <typename I>
void Color_ReadIndex_16b_565()
{
auto const Index = DataRead<I>();
u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex])));
u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex])));
_SetCol565(val);
}
@ -125,7 +125,7 @@ template <typename I>
void Color_ReadIndex_24b_888()
{
auto const Index = DataRead<I>();
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]);
_SetCol(_Read24(iAddress));
}
@ -133,7 +133,7 @@ template <typename I>
void Color_ReadIndex_32b_888x()
{
auto const Index = DataRead<I>();
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]);
_SetCol(_Read24(iAddress));
}
@ -141,7 +141,7 @@ template <typename I>
void Color_ReadIndex_16b_4444()
{
auto const Index = DataRead<I>();
u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]));
u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]));
_SetCol4444(val);
}
@ -149,7 +149,7 @@ template <typename I>
void Color_ReadIndex_24b_6666()
{
auto const Index = DataRead<I>();
const u8* pData = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]) - 1;
const u8* pData = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]) - 1;
u32 val = Common::swap32(pData);
_SetCol6666(val);
}
@ -158,7 +158,7 @@ template <typename I>
void Color_ReadIndex_32b_8888()
{
auto const Index = DataRead<I>();
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]);
_SetCol(_Read32(iAddress));
}

View File

@ -80,7 +80,7 @@ __forceinline void Normal_Index_Offset()
auto const index = DataRead<I>();
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_NORMAL]
+ (index * arraystrides[ARRAY_NORMAL]) + sizeof(T) * 3 * Offset);
+ (index * g_main_cp_state.array_strides[ARRAY_NORMAL]) + sizeof(T) * 3 * Offset);
ReadIndirect<T, N * 3>(data);
}

View File

@ -91,7 +91,7 @@ void LOADERDECL Pos_ReadIndex()
static_assert(N <= 3, "N > 3 is not sane!");
auto const index = DataRead<I>();
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_POSITION] + (index * arraystrides[ARRAY_POSITION]));
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION]));
auto const scale = posScale;
DataWriter dst;
@ -109,7 +109,7 @@ template <typename I, bool three>
void LOADERDECL Pos_ReadIndex_Float_SSSE3()
{
auto const index = DataRead<I>();
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (index * arraystrides[ARRAY_POSITION]));
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION]));
GC_ALIGNED128(const __m128i a = _mm_loadu_si128((__m128i*)pData));
GC_ALIGNED128(__m128i b = _mm_shuffle_epi8(a, three ? kMaskSwap32_3 : kMaskSwap32_2));
_mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, b);

View File

@ -73,7 +73,7 @@ void LOADERDECL TexCoord_ReadIndex()
auto const index = DataRead<I>();
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_TEXCOORD0 + tcIndex]
+ (index * arraystrides[ARRAY_TEXCOORD0 + tcIndex]));
+ (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + tcIndex]));
auto const scale = tcScale[tcIndex];
DataWriter dst;
@ -94,7 +94,7 @@ void LOADERDECL TexCoord_ReadIndex_Short2_SSE4()
// Heavy in ZWW
auto const index = DataRead<I>();
const s32 *pData = (const s32*)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
const s32 *pData = (const s32*)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0+tcIndex]));
const __m128i a = _mm_cvtsi32_si128(*pData);
const __m128i b = _mm_shuffle_epi8(a, kMaskSwap16_2);
const __m128i c = _mm_cvtepi16_epi32(b);
@ -117,7 +117,7 @@ void LOADERDECL TexCoord_ReadIndex_Float2_SSSE3()
static_assert(!std::numeric_limits<I>::is_signed, "Only unsigned I is sane!");
auto const index = DataRead<I>();
const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0+tcIndex]));
GC_ALIGNED128(const __m128i a = _mm_loadl_epi64((__m128i*)pData));
GC_ALIGNED128(const __m128i b = _mm_shuffle_epi8(a, kMaskSwap32));
_mm_storel_epi64((__m128i*)VertexManager::s_pCurBufferPointer, b);

View File

@ -245,8 +245,8 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ
// donko - this has caused problems in some games. removed for now.
bool texGenSpecialCase = false;
/*bool texGenSpecialCase =
((g_VtxDesc.Hex & 0x60600L) == g_VtxDesc.Hex) && // only pos and tex coord 0
(g_VtxDesc.Tex0Coord != NOT_PRESENT) &&
((g_main_cp_state.vtx_desc.Hex & 0x60600L) == g_main_cp_state.vtx_desc.Hex) && // only pos and tex coord 0
(g_main_cp_state.vtx_desc.Tex0Coord != NOT_PRESENT) &&
(xfmem.texcoords[0].texmtxinfo.inputform == XF_TEXINPUT_AB11);
*/

View File

@ -329,8 +329,8 @@ void VertexShaderManager::SetConstants()
{
bPosNormalMatrixChanged = false;
const float *pos = (const float *)xfmem.posMatrices + MatrixIndexA.PosNormalMtxIdx * 4;
const float *norm = (const float *)xfmem.normalMatrices + 3 * (MatrixIndexA.PosNormalMtxIdx & 31);
const float *pos = (const float *)xfmem.posMatrices + g_main_cp_state.matrix_index_a.PosNormalMtxIdx * 4;
const float *norm = (const float *)xfmem.normalMatrices + 3 * (g_main_cp_state.matrix_index_a.PosNormalMtxIdx & 31);
memcpy(constants.posnormalmatrix, pos, 3*16);
memcpy(constants.posnormalmatrix[3], norm, 12);
@ -344,10 +344,10 @@ void VertexShaderManager::SetConstants()
bTexMatricesChanged[0] = false;
const float *fptrs[] =
{
(const float *)&xfmem.posMatrices[MatrixIndexA.Tex0MtxIdx * 4],
(const float *)&xfmem.posMatrices[MatrixIndexA.Tex1MtxIdx * 4],
(const float *)&xfmem.posMatrices[MatrixIndexA.Tex2MtxIdx * 4],
(const float *)&xfmem.posMatrices[MatrixIndexA.Tex3MtxIdx * 4]
(const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_a.Tex0MtxIdx * 4],
(const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_a.Tex1MtxIdx * 4],
(const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_a.Tex2MtxIdx * 4],
(const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_a.Tex3MtxIdx * 4]
};
for (int i = 0; i < 4; ++i)
@ -361,10 +361,10 @@ void VertexShaderManager::SetConstants()
{
bTexMatricesChanged[1] = false;
const float *fptrs[] = {
(const float *)&xfmem.posMatrices[MatrixIndexB.Tex4MtxIdx * 4],
(const float *)&xfmem.posMatrices[MatrixIndexB.Tex5MtxIdx * 4],
(const float *)&xfmem.posMatrices[MatrixIndexB.Tex6MtxIdx * 4],
(const float *)&xfmem.posMatrices[MatrixIndexB.Tex7MtxIdx * 4]
(const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_b.Tex4MtxIdx * 4],
(const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_b.Tex5MtxIdx * 4],
(const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_b.Tex6MtxIdx * 4],
(const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_b.Tex7MtxIdx * 4]
};
for (int i = 0; i < 4; ++i)
@ -536,26 +536,26 @@ void VertexShaderManager::SetConstants()
void VertexShaderManager::InvalidateXFRange(int start, int end)
{
if (((u32)start >= (u32)MatrixIndexA.PosNormalMtxIdx * 4 &&
(u32)start < (u32)MatrixIndexA.PosNormalMtxIdx * 4 + 12) ||
((u32)start >= XFMEM_NORMALMATRICES + ((u32)MatrixIndexA.PosNormalMtxIdx & 31) * 3 &&
(u32)start < XFMEM_NORMALMATRICES + ((u32)MatrixIndexA.PosNormalMtxIdx & 31) * 3 + 9))
if (((u32)start >= (u32)g_main_cp_state.matrix_index_a.PosNormalMtxIdx * 4 &&
(u32)start < (u32)g_main_cp_state.matrix_index_a.PosNormalMtxIdx * 4 + 12) ||
((u32)start >= XFMEM_NORMALMATRICES + ((u32)g_main_cp_state.matrix_index_a.PosNormalMtxIdx & 31) * 3 &&
(u32)start < XFMEM_NORMALMATRICES + ((u32)g_main_cp_state.matrix_index_a.PosNormalMtxIdx & 31) * 3 + 9))
{
bPosNormalMatrixChanged = true;
}
if (((u32)start >= (u32)MatrixIndexA.Tex0MtxIdx*4 && (u32)start < (u32)MatrixIndexA.Tex0MtxIdx*4+12) ||
((u32)start >= (u32)MatrixIndexA.Tex1MtxIdx*4 && (u32)start < (u32)MatrixIndexA.Tex1MtxIdx*4+12) ||
((u32)start >= (u32)MatrixIndexA.Tex2MtxIdx*4 && (u32)start < (u32)MatrixIndexA.Tex2MtxIdx*4+12) ||
((u32)start >= (u32)MatrixIndexA.Tex3MtxIdx*4 && (u32)start < (u32)MatrixIndexA.Tex3MtxIdx*4+12))
if (((u32)start >= (u32)g_main_cp_state.matrix_index_a.Tex0MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_a.Tex0MtxIdx*4+12) ||
((u32)start >= (u32)g_main_cp_state.matrix_index_a.Tex1MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_a.Tex1MtxIdx*4+12) ||
((u32)start >= (u32)g_main_cp_state.matrix_index_a.Tex2MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_a.Tex2MtxIdx*4+12) ||
((u32)start >= (u32)g_main_cp_state.matrix_index_a.Tex3MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_a.Tex3MtxIdx*4+12))
{
bTexMatricesChanged[0] = true;
}
if (((u32)start >= (u32)MatrixIndexB.Tex4MtxIdx*4 && (u32)start < (u32)MatrixIndexB.Tex4MtxIdx*4+12) ||
((u32)start >= (u32)MatrixIndexB.Tex5MtxIdx*4 && (u32)start < (u32)MatrixIndexB.Tex5MtxIdx*4+12) ||
((u32)start >= (u32)MatrixIndexB.Tex6MtxIdx*4 && (u32)start < (u32)MatrixIndexB.Tex6MtxIdx*4+12) ||
((u32)start >= (u32)MatrixIndexB.Tex7MtxIdx*4 && (u32)start < (u32)MatrixIndexB.Tex7MtxIdx*4+12))
if (((u32)start >= (u32)g_main_cp_state.matrix_index_b.Tex4MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_b.Tex4MtxIdx*4+12) ||
((u32)start >= (u32)g_main_cp_state.matrix_index_b.Tex5MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_b.Tex5MtxIdx*4+12) ||
((u32)start >= (u32)g_main_cp_state.matrix_index_b.Tex6MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_b.Tex6MtxIdx*4+12) ||
((u32)start >= (u32)g_main_cp_state.matrix_index_b.Tex7MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_b.Tex7MtxIdx*4+12))
{
bTexMatricesChanged[1] = true;
}
@ -628,23 +628,23 @@ void VertexShaderManager::InvalidateXFRange(int start, int end)
void VertexShaderManager::SetTexMatrixChangedA(u32 Value)
{
if (MatrixIndexA.Hex != Value)
if (g_main_cp_state.matrix_index_a.Hex != Value)
{
VertexManager::Flush();
if (MatrixIndexA.PosNormalMtxIdx != (Value&0x3f))
if (g_main_cp_state.matrix_index_a.PosNormalMtxIdx != (Value&0x3f))
bPosNormalMatrixChanged = true;
bTexMatricesChanged[0] = true;
MatrixIndexA.Hex = Value;
g_main_cp_state.matrix_index_a.Hex = Value;
}
}
void VertexShaderManager::SetTexMatrixChangedB(u32 Value)
{
if (MatrixIndexB.Hex != Value)
if (g_main_cp_state.matrix_index_b.Hex != Value)
{
VertexManager::Flush();
bTexMatricesChanged[1] = true;
MatrixIndexB.Hex = Value;
g_main_cp_state.matrix_index_b.Hex = Value;
}
}

View File

@ -116,6 +116,8 @@ public:
virtual void DoState(PointerWrap &p) = 0;
virtual void CheckInvalidState() = 0;
virtual void UpdateWantDeterminism(bool want) {}
};
extern std::vector<VideoBackend*> g_available_video_backends;
@ -151,6 +153,8 @@ class VideoBackendHardware : public VideoBackend
void PauseAndLock(bool doLock, bool unpauseOnUnlock=true) override;
void DoState(PointerWrap &p) override;
void UpdateWantDeterminism(bool want) override;
bool m_invalid;
public:

View File

@ -22,13 +22,7 @@ static void DoState(PointerWrap &p)
p.DoMarker("BP Memory");
// CP Memory
p.DoArray(arraybases, 16);
p.DoArray(arraystrides, 16);
p.Do(MatrixIndexA);
p.Do(MatrixIndexB);
p.Do(g_VtxDesc.Hex);
p.DoArray(g_VtxAttr, 8);
p.DoMarker("CP Memory");
DoCPState(p);
// XF Memory
p.Do(xfmem);
@ -73,11 +67,7 @@ void VideoCommon_RunLoop(bool enable)
void VideoCommon_Init()
{
memset(arraybases, 0, sizeof(arraybases));
memset(arraystrides, 0, sizeof(arraystrides));
memset(&MatrixIndexA, 0, sizeof(MatrixIndexA));
memset(&MatrixIndexB, 0, sizeof(MatrixIndexB));
memset(&g_VtxDesc, 0, sizeof(g_VtxDesc));
memset(g_VtxAttr, 0, sizeof(g_VtxAttr));
memset(&g_main_cp_state, 0, sizeof(g_main_cp_state));
memset(&g_preprocess_cp_state, 0, sizeof(g_preprocess_cp_state));
memset(texMem, 0, TMEM_SIZE);
}

View File

@ -275,3 +275,4 @@ extern XFMemory xfmem;
void LoadXFReg(u32 transferSize, u32 address);
void LoadIndexedXF(u32 val, int array);
void PreprocessIndexedXF(u32 val, int refarray);

View File

@ -6,6 +6,7 @@
#include "Core/HW/Memmap.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/Fifo.h"
#include "VideoCommon/PixelShaderManager.h"
#include "VideoCommon/VertexManagerBase.h"
#include "VideoCommon/VertexShaderManager.h"
@ -252,7 +253,15 @@ void LoadIndexedXF(u32 val, int refarray)
//load stuff from array to address in xf mem
u32* currData = (u32*)(&xfmem) + address;
u32* newData = (u32*)Memory::GetPointer(arraybases[refarray] + arraystrides[refarray] * index);
u32* newData;
if (g_use_deterministic_gpu_thread)
{
newData = (u32*)PopFifoAuxBuffer(size * sizeof(u32));
}
else
{
newData = (u32*)Memory::GetPointer(g_main_cp_state.array_bases[refarray] + g_main_cp_state.array_strides[refarray] * index);
}
bool changed = false;
for (int i = 0; i < size; ++i)
{
@ -269,3 +278,14 @@ void LoadIndexedXF(u32 val, int refarray)
currData[i] = Common::swap32(newData[i]);
}
}
void PreprocessIndexedXF(u32 val, int refarray)
{
int index = val >> 16;
int size = ((val >> 12) & 0xF) + 1;
u32* new_data = (u32*)Memory::GetPointer(g_preprocess_cp_state.array_bases[refarray] + g_preprocess_cp_state.array_strides[refarray] * index);
size_t buf_size = size * sizeof(u32);
PushFifoAuxBuffer(new_data, buf_size);
}

View File

@ -74,7 +74,7 @@ protected:
void ResetPointers()
{
g_pVideoData = &input_memory[0];
g_video_buffer_read_ptr = &input_memory[0];
VertexManager::s_pCurBufferPointer = &output_memory[0];
m_input_pos = m_output_pos = 0;
}