Merge pull request #885 from comex/gpu-determinism
GPU determinism (apparently it is ready for merge)
This commit is contained in:
commit
fbabc03b3f
|
@ -55,10 +55,24 @@ struct ConfigCache
|
|||
unsigned int framelimit, frameSkip;
|
||||
TEXIDevices m_EXIDevice[MAX_EXI_CHANNELS];
|
||||
std::string strBackend, sBackend;
|
||||
std::string m_strGPUDeterminismMode;
|
||||
bool bSetFramelimit, bSetEXIDevice[MAX_EXI_CHANNELS], bSetVolume, bSetPads[MAX_SI_CHANNELS], bSetWiimoteSource[MAX_BBMOTES], bSetFrameSkip;
|
||||
};
|
||||
static ConfigCache config_cache;
|
||||
|
||||
static GPUDeterminismMode ParseGPUDeterminismMode(const std::string& mode)
|
||||
{
|
||||
if (mode == "auto")
|
||||
return GPU_DETERMINISM_AUTO;
|
||||
if (mode == "none")
|
||||
return GPU_DETERMINISM_NONE;
|
||||
if (mode == "fake-completion")
|
||||
return GPU_DETERMINISM_FAKE_COMPLETION;
|
||||
|
||||
NOTICE_LOG(BOOT, "Unknown GPU determinism mode %s", mode.c_str());
|
||||
return GPU_DETERMINISM_AUTO;
|
||||
}
|
||||
|
||||
// Boot the ISO or file
|
||||
bool BootCore(const std::string& _rFilename)
|
||||
{
|
||||
|
@ -109,6 +123,7 @@ bool BootCore(const std::string& _rFilename)
|
|||
config_cache.bMergeBlocks = StartUp.bMergeBlocks;
|
||||
config_cache.bDSPHLE = StartUp.bDSPHLE;
|
||||
config_cache.strBackend = StartUp.m_strVideoBackend;
|
||||
config_cache.m_strGPUDeterminismMode = StartUp.m_strGPUDeterminismMode;
|
||||
config_cache.m_EnableJIT = SConfig::GetInstance().m_DSPEnableJIT;
|
||||
config_cache.bDSPThread = StartUp.bDSPThread;
|
||||
config_cache.Volume = SConfig::GetInstance().m_Volume;
|
||||
|
@ -168,6 +183,8 @@ bool BootCore(const std::string& _rFilename)
|
|||
dsp_section->Get("EnableJIT", &SConfig::GetInstance().m_DSPEnableJIT, SConfig::GetInstance().m_DSPEnableJIT);
|
||||
dsp_section->Get("Backend", &SConfig::GetInstance().sBackend, SConfig::GetInstance().sBackend);
|
||||
VideoBackend::ActivateBackend(StartUp.m_strVideoBackend);
|
||||
core_section->Get("GPUDeterminismMode", &StartUp.m_strGPUDeterminismMode, StartUp.m_strGPUDeterminismMode);
|
||||
StartUp.m_GPUDeterminismMode = ParseGPUDeterminismMode(StartUp.m_strGPUDeterminismMode);
|
||||
|
||||
for (unsigned int i = 0; i < MAX_SI_CHANNELS; ++i)
|
||||
{
|
||||
|
@ -277,6 +294,7 @@ void Stop()
|
|||
StartUp.bDSPHLE = config_cache.bDSPHLE;
|
||||
StartUp.bDSPThread = config_cache.bDSPThread;
|
||||
StartUp.m_strVideoBackend = config_cache.strBackend;
|
||||
StartUp.m_strGPUDeterminismMode = config_cache.m_strGPUDeterminismMode;
|
||||
VideoBackend::ActivateBackend(StartUp.m_strVideoBackend);
|
||||
StartUp.bHLE_BS2 = config_cache.bHLE_BS2;
|
||||
SConfig::GetInstance().sBackend = config_cache.sBackend;
|
||||
|
|
|
@ -317,6 +317,7 @@ void SConfig::SaveCoreSettings(IniFile& ini)
|
|||
core->Set("FrameLimit", m_Framelimit);
|
||||
core->Set("FrameSkip", m_FrameSkip);
|
||||
core->Set("GFXBackend", m_LocalCoreStartupParameter.m_strVideoBackend);
|
||||
core->Set("GPUDeterminismMode", m_LocalCoreStartupParameter.m_strGPUDeterminismMode);
|
||||
}
|
||||
|
||||
void SConfig::SaveMovieSettings(IniFile& ini)
|
||||
|
@ -542,6 +543,7 @@ void SConfig::LoadCoreSettings(IniFile& ini)
|
|||
core->Get("FrameLimit", &m_Framelimit, 1); // auto frame limit by default
|
||||
core->Get("FrameSkip", &m_FrameSkip, 0);
|
||||
core->Get("GFXBackend", &m_LocalCoreStartupParameter.m_strVideoBackend, "");
|
||||
core->Get("GPUDeterminismMode", &m_LocalCoreStartupParameter.m_strGPUDeterminismMode, "auto");
|
||||
}
|
||||
|
||||
void SConfig::LoadMovieSettings(IniFile& ini)
|
||||
|
|
|
@ -48,6 +48,7 @@
|
|||
#include "Core/HW/VideoInterface.h"
|
||||
#include "Core/HW/Wiimote.h"
|
||||
#include "Core/IPC_HLE/WII_IPC_HLE_Device_usb.h"
|
||||
#include "Core/IPC_HLE/WII_Socket.h"
|
||||
#include "Core/PowerPC/PowerPC.h"
|
||||
|
||||
#ifdef USE_GDBSTUB
|
||||
|
@ -65,6 +66,8 @@ bool g_aspect_wide;
|
|||
namespace Core
|
||||
{
|
||||
|
||||
bool g_want_determinism;
|
||||
|
||||
// Declarations and definitions
|
||||
static Common::Timer s_timer;
|
||||
static volatile u32 s_drawn_frame = 0;
|
||||
|
@ -177,6 +180,8 @@ bool Init()
|
|||
s_emu_thread.join();
|
||||
}
|
||||
|
||||
Core::UpdateWantDeterminism(/*initial*/ true);
|
||||
|
||||
INFO_LOG(OSREPORT, "Starting core = %s mode",
|
||||
_CoreParameter.bWii ? "Wii" : "GameCube");
|
||||
INFO_LOG(OSREPORT, "CPU Thread separate = %s",
|
||||
|
@ -564,6 +569,9 @@ void RequestRefreshInfo()
|
|||
|
||||
bool PauseAndLock(bool doLock, bool unpauseOnUnlock)
|
||||
{
|
||||
if (!IsRunning())
|
||||
return true;
|
||||
|
||||
// let's support recursive locking to simplify things on the caller's side,
|
||||
// and let's do it at this outer level in case the individual systems don't support it.
|
||||
if (doLock ? s_pause_and_lock_depth++ : --s_pause_and_lock_depth)
|
||||
|
@ -702,4 +710,27 @@ void SetOnStoppedCallback(StoppedCallbackFunc callback)
|
|||
s_on_stopped_callback = callback;
|
||||
}
|
||||
|
||||
void UpdateWantDeterminism(bool initial)
|
||||
{
|
||||
// For now, this value is not itself configurable. Instead, individual
|
||||
// settings that depend on it, such as GPU determinism mode. should have
|
||||
// override options for testing,
|
||||
bool new_want_determinism =
|
||||
Movie::IsPlayingInput() ||
|
||||
Movie::IsRecordingInput() ||
|
||||
NetPlay::IsNetPlayRunning();
|
||||
if (new_want_determinism != g_want_determinism || initial)
|
||||
{
|
||||
WARN_LOG(COMMON, "Want determinism <- %s", new_want_determinism ? "true" : "false");
|
||||
|
||||
bool was_unpaused = Core::PauseAndLock(true);
|
||||
|
||||
g_want_determinism = new_want_determinism;
|
||||
WiiSockMan::GetInstance().UpdateWantDeterminism(new_want_determinism);
|
||||
g_video_backend->UpdateWantDeterminism(new_want_determinism);
|
||||
|
||||
Core::PauseAndLock(false, was_unpaused);
|
||||
}
|
||||
}
|
||||
|
||||
} // Core
|
||||
|
|
|
@ -23,6 +23,8 @@ extern bool g_aspect_wide;
|
|||
namespace Core
|
||||
{
|
||||
|
||||
extern bool g_want_determinism;
|
||||
|
||||
bool GetIsFramelimiterTempDisabled();
|
||||
void SetIsFramelimiterTempDisabled(bool disable);
|
||||
|
||||
|
@ -79,4 +81,7 @@ bool PauseAndLock(bool doLock, bool unpauseOnUnlock=true);
|
|||
typedef void(*StoppedCallbackFunc)(void);
|
||||
void SetOnStoppedCallback(StoppedCallbackFunc callback);
|
||||
|
||||
// Run on the GUI thread when the factors change.
|
||||
void UpdateWantDeterminism(bool initial = false);
|
||||
|
||||
} // namespace
|
||||
|
|
|
@ -97,6 +97,15 @@ enum Hotkey
|
|||
NUM_HOTKEYS,
|
||||
};
|
||||
|
||||
enum GPUDeterminismMode
|
||||
{
|
||||
GPU_DETERMINISM_AUTO,
|
||||
GPU_DETERMINISM_NONE,
|
||||
// This is currently the only mode. There will probably be at least
|
||||
// one more at some point.
|
||||
GPU_DETERMINISM_FAKE_COMPLETION,
|
||||
};
|
||||
|
||||
struct SCoreStartupParameter
|
||||
{
|
||||
// Settings
|
||||
|
@ -200,6 +209,10 @@ struct SCoreStartupParameter
|
|||
EBootType m_BootType;
|
||||
|
||||
std::string m_strVideoBackend;
|
||||
std::string m_strGPUDeterminismMode;
|
||||
|
||||
// set based on the string version
|
||||
GPUDeterminismMode m_GPUDeterminismMode;
|
||||
|
||||
// files
|
||||
std::string m_strFilename;
|
||||
|
|
|
@ -331,7 +331,7 @@ bool Wiimote::Step()
|
|||
m_rumble->controls[0]->control_ref->State(m_rumble_on);
|
||||
|
||||
// when a movie is active, this button status update is disabled (moved), because movies only record data reports.
|
||||
if (!(Movie::IsMovieActive()) || NetPlay::IsNetPlayRunning())
|
||||
if (!Core::g_want_determinism)
|
||||
{
|
||||
UpdateButtonsStatus();
|
||||
}
|
||||
|
@ -385,7 +385,7 @@ void Wiimote::UpdateButtonsStatus()
|
|||
void Wiimote::GetCoreData(u8* const data)
|
||||
{
|
||||
// when a movie is active, the button update happens here instead of Wiimote::Step, to avoid potential desync issues.
|
||||
if (Movie::IsMovieActive() || NetPlay::IsNetPlayRunning())
|
||||
if (Core::g_want_determinism)
|
||||
{
|
||||
UpdateButtonsStatus();
|
||||
}
|
||||
|
|
|
@ -4,8 +4,7 @@
|
|||
|
||||
#include <algorithm>
|
||||
|
||||
#include "Core/Movie.h"
|
||||
#include "Core/NetPlayProto.h"
|
||||
#include "Core/Core.h"
|
||||
#include "Core/IPC_HLE/WII_IPC_HLE.h"
|
||||
#include "Core/IPC_HLE/WII_IPC_HLE_Device.h"
|
||||
#include "Core/IPC_HLE/WII_Socket.h" // No Wii socket support while using NetPlay or TAS
|
||||
|
@ -559,9 +558,7 @@ void WiiSockMan::AddSocket(s32 fd)
|
|||
|
||||
s32 WiiSockMan::NewSocket(s32 af, s32 type, s32 protocol)
|
||||
{
|
||||
if (NetPlay::IsNetPlayRunning() ||
|
||||
Movie::IsRecordingInput() ||
|
||||
Movie::IsPlayingInput())
|
||||
if (Core::g_want_determinism)
|
||||
{
|
||||
return SO_ENOMEM;
|
||||
}
|
||||
|
@ -664,5 +661,12 @@ void WiiSockMan::Convert(sockaddr_in const & from, WiiSockAddrIn& to, s32 addrle
|
|||
to.len = addrlen;
|
||||
}
|
||||
|
||||
void WiiSockMan::UpdateWantDeterminism(bool want)
|
||||
{
|
||||
// If we switched into movie recording, kill existing sockets.
|
||||
if (want)
|
||||
Clean();
|
||||
}
|
||||
|
||||
#undef ERRORCODE
|
||||
#undef EITHER
|
||||
|
|
|
@ -242,6 +242,8 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
void UpdateWantDeterminism(bool want);
|
||||
|
||||
private:
|
||||
WiiSockMan() = default;
|
||||
|
||||
|
|
|
@ -437,6 +437,8 @@ bool BeginRecordingInput(int controllers)
|
|||
if (s_playMode != MODE_NONE || controllers == 0)
|
||||
return false;
|
||||
|
||||
bool was_unpaused = Core::PauseAndLock(true);
|
||||
|
||||
s_numPads = controllers;
|
||||
g_currentFrame = g_totalFrames = 0;
|
||||
g_currentLagCount = s_totalLagCount = 0;
|
||||
|
@ -487,6 +489,10 @@ bool BeginRecordingInput(int controllers)
|
|||
|
||||
s_currentByte = s_totalBytes = 0;
|
||||
|
||||
Core::UpdateWantDeterminism();
|
||||
|
||||
Core::PauseAndLock(false, was_unpaused);
|
||||
|
||||
Core::DisplayMessage("Starting movie recording", 2000);
|
||||
return true;
|
||||
}
|
||||
|
@ -764,6 +770,8 @@ bool PlayInput(const std::string& filename)
|
|||
|
||||
s_playMode = MODE_PLAYING;
|
||||
|
||||
Core::UpdateWantDeterminism();
|
||||
|
||||
s_totalBytes = g_recordfd.GetSize() - 256;
|
||||
EnsureTmpInputSize((size_t)s_totalBytes);
|
||||
g_recordfd.ReadArray(tmpInput, (size_t)s_totalBytes);
|
||||
|
@ -1097,6 +1105,7 @@ void EndPlayInput(bool cont)
|
|||
s_rerecords = 0;
|
||||
s_currentByte = 0;
|
||||
s_playMode = MODE_NONE;
|
||||
Core::UpdateWantDeterminism();
|
||||
Core::DisplayMessage("Movie End.", 2000);
|
||||
s_bRecordingFromSaveState = false;
|
||||
// we don't clear these things because otherwise we can't resume playback if we load a movie state later
|
||||
|
|
|
@ -13,46 +13,46 @@ void SWLoadCPReg(u32 sub_cmd, u32 value)
|
|||
switch (sub_cmd & 0xF0)
|
||||
{
|
||||
case 0x30:
|
||||
MatrixIndexA.Hex = value;
|
||||
g_main_cp_state.matrix_index_a.Hex = value;
|
||||
break;
|
||||
|
||||
case 0x40:
|
||||
MatrixIndexB.Hex = value;
|
||||
g_main_cp_state.matrix_index_b.Hex = value;
|
||||
break;
|
||||
|
||||
case 0x50:
|
||||
g_VtxDesc.Hex &= ~0x1FFFF; // keep the Upper bits
|
||||
g_VtxDesc.Hex |= value;
|
||||
g_main_cp_state.vtx_desc.Hex &= ~0x1FFFF; // keep the Upper bits
|
||||
g_main_cp_state.vtx_desc.Hex |= value;
|
||||
break;
|
||||
|
||||
case 0x60:
|
||||
g_VtxDesc.Hex &= 0x1FFFF; // keep the lower 17Bits
|
||||
g_VtxDesc.Hex |= (u64)value << 17;
|
||||
g_main_cp_state.vtx_desc.Hex &= 0x1FFFF; // keep the lower 17Bits
|
||||
g_main_cp_state.vtx_desc.Hex |= (u64)value << 17;
|
||||
break;
|
||||
|
||||
case 0x70:
|
||||
_assert_((sub_cmd & 0x0F) < 8);
|
||||
g_VtxAttr[sub_cmd & 7].g0.Hex = value;
|
||||
g_main_cp_state.vtx_attr[sub_cmd & 7].g0.Hex = value;
|
||||
break;
|
||||
|
||||
case 0x80:
|
||||
_assert_((sub_cmd & 0x0F) < 8);
|
||||
g_VtxAttr[sub_cmd & 7].g1.Hex = value;
|
||||
g_main_cp_state.vtx_attr[sub_cmd & 7].g1.Hex = value;
|
||||
break;
|
||||
|
||||
case 0x90:
|
||||
_assert_((sub_cmd & 0x0F) < 8);
|
||||
g_VtxAttr[sub_cmd & 7].g2.Hex = value;
|
||||
g_main_cp_state.vtx_attr[sub_cmd & 7].g2.Hex = value;
|
||||
break;
|
||||
|
||||
// Pointers to vertex arrays in GC RAM
|
||||
case 0xA0:
|
||||
arraybases[sub_cmd & 0xF] = value;
|
||||
g_main_cp_state.array_bases[sub_cmd & 0xF] = value;
|
||||
cached_arraybases[sub_cmd & 0xF] = Memory::GetPointer(value);
|
||||
break;
|
||||
|
||||
case 0xB0:
|
||||
arraystrides[sub_cmd & 0xF] = value & 0xFF;
|
||||
g_main_cp_state.array_strides[sub_cmd & 0xF] = value & 0xFF;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -57,7 +57,7 @@ static void DecodePrimitiveStream(u32 iBufferSize)
|
|||
{
|
||||
while (streamSize > 0 && iBufferSize >= vertexSize)
|
||||
{
|
||||
g_pVideoData += vertexSize;
|
||||
g_video_buffer_read_ptr += vertexSize;
|
||||
iBufferSize -= vertexSize;
|
||||
streamSize--;
|
||||
}
|
||||
|
@ -94,26 +94,26 @@ static void ReadXFData(u32 iBufferSize)
|
|||
|
||||
static void ExecuteDisplayList(u32 addr, u32 count)
|
||||
{
|
||||
u8 *videoDataSave = g_pVideoData;
|
||||
u8 *videoDataSave = g_video_buffer_read_ptr;
|
||||
|
||||
u8 *dlStart = Memory::GetPointer(addr);
|
||||
|
||||
g_pVideoData = dlStart;
|
||||
g_video_buffer_read_ptr = dlStart;
|
||||
|
||||
while (OpcodeDecoder::CommandRunnable(count))
|
||||
{
|
||||
OpcodeDecoder::Run(count);
|
||||
|
||||
// if data was read by the opcode decoder then the video data pointer changed
|
||||
u32 readCount = (u32)(g_pVideoData - dlStart);
|
||||
dlStart = g_pVideoData;
|
||||
u32 readCount = (u32)(g_video_buffer_read_ptr - dlStart);
|
||||
dlStart = g_video_buffer_read_ptr;
|
||||
|
||||
_assert_msg_(VIDEO, count >= readCount, "Display list underrun");
|
||||
|
||||
count -= readCount;
|
||||
}
|
||||
|
||||
g_pVideoData = videoDataSave;
|
||||
g_video_buffer_read_ptr = videoDataSave;
|
||||
}
|
||||
|
||||
static void DecodeStandard(u32 bufferSize)
|
||||
|
|
|
@ -57,7 +57,7 @@ void DoState(PointerWrap &p)
|
|||
p.Do(interruptWaiting);
|
||||
|
||||
// Is this right?
|
||||
p.DoArray(g_pVideoData,writePos);
|
||||
p.DoArray(g_video_buffer_read_ptr,writePos);
|
||||
}
|
||||
|
||||
static void UpdateInterrupts_Wrapper(u64 userdata, int cyclesLate)
|
||||
|
@ -95,7 +95,7 @@ void Init()
|
|||
interruptSet = false;
|
||||
interruptWaiting = false;
|
||||
|
||||
g_pVideoData = nullptr;
|
||||
g_video_buffer_read_ptr = nullptr;
|
||||
g_bSkipCurrentFrame = false;
|
||||
}
|
||||
|
||||
|
@ -311,7 +311,7 @@ bool RunBuffer()
|
|||
|
||||
_dbg_assert_(COMMANDPROCESSOR, writePos >= readPos);
|
||||
|
||||
g_pVideoData = &commandBuffer[readPos];
|
||||
g_video_buffer_read_ptr = &commandBuffer[readPos];
|
||||
|
||||
u32 availableBytes = writePos - readPos;
|
||||
|
||||
|
@ -322,7 +322,7 @@ bool RunBuffer()
|
|||
OpcodeDecoder::Run(availableBytes);
|
||||
|
||||
// if data was read by the opcode decoder then the video data pointer changed
|
||||
readPos = (u32)(g_pVideoData - &commandBuffer[0]);
|
||||
readPos = (u32)(g_video_buffer_read_ptr - &commandBuffer[0]);
|
||||
_dbg_assert_(VIDEO, writePos >= readPos);
|
||||
availableBytes = writePos - readPos;
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ SWVertexLoader::~SWVertexLoader()
|
|||
|
||||
void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
|
||||
{
|
||||
m_CurrentVat = &g_VtxAttr[attributeIndex];
|
||||
m_CurrentVat = &g_main_cp_state.vtx_attr[attributeIndex];
|
||||
|
||||
posScale = 1.0f / float(1 << m_CurrentVat->g0.PosFrac);
|
||||
tcScale[0] = 1.0f / float(1 << m_CurrentVat->g0.Tex0Frac);
|
||||
|
@ -53,20 +53,20 @@ void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
|
|||
|
||||
//TexMtx
|
||||
const u64 tmDesc[8] = {
|
||||
g_VtxDesc.Tex0MatIdx, g_VtxDesc.Tex1MatIdx, g_VtxDesc.Tex2MatIdx, g_VtxDesc.Tex3MatIdx,
|
||||
g_VtxDesc.Tex4MatIdx, g_VtxDesc.Tex5MatIdx, g_VtxDesc.Tex6MatIdx, g_VtxDesc.Tex7MatIdx
|
||||
g_main_cp_state.vtx_desc.Tex0MatIdx, g_main_cp_state.vtx_desc.Tex1MatIdx, g_main_cp_state.vtx_desc.Tex2MatIdx, g_main_cp_state.vtx_desc.Tex3MatIdx,
|
||||
g_main_cp_state.vtx_desc.Tex4MatIdx, g_main_cp_state.vtx_desc.Tex5MatIdx, g_main_cp_state.vtx_desc.Tex6MatIdx, g_main_cp_state.vtx_desc.Tex7MatIdx
|
||||
};
|
||||
|
||||
// Colors
|
||||
const u64 colDesc[2] = {g_VtxDesc.Color0, g_VtxDesc.Color1};
|
||||
const u64 colDesc[2] = {g_main_cp_state.vtx_desc.Color0, g_main_cp_state.vtx_desc.Color1};
|
||||
colElements[0] = m_CurrentVat->g0.Color0Elements;
|
||||
colElements[1] = m_CurrentVat->g0.Color1Elements;
|
||||
const u32 colComp[2] = {m_CurrentVat->g0.Color0Comp, m_CurrentVat->g0.Color1Comp};
|
||||
|
||||
// TextureCoord
|
||||
const u64 tcDesc[8] = {
|
||||
g_VtxDesc.Tex0Coord, g_VtxDesc.Tex1Coord, g_VtxDesc.Tex2Coord, g_VtxDesc.Tex3Coord,
|
||||
g_VtxDesc.Tex4Coord, g_VtxDesc.Tex5Coord, g_VtxDesc.Tex6Coord, g_VtxDesc.Tex7Coord
|
||||
g_main_cp_state.vtx_desc.Tex0Coord, g_main_cp_state.vtx_desc.Tex1Coord, g_main_cp_state.vtx_desc.Tex2Coord, g_main_cp_state.vtx_desc.Tex3Coord,
|
||||
g_main_cp_state.vtx_desc.Tex4Coord, g_main_cp_state.vtx_desc.Tex5Coord, g_main_cp_state.vtx_desc.Tex6Coord, g_main_cp_state.vtx_desc.Tex7Coord
|
||||
};
|
||||
const u32 tcElements[8] = {
|
||||
m_CurrentVat->g0.Tex0CoordElements, m_CurrentVat->g1.Tex1CoordElements, m_CurrentVat->g1.Tex2CoordElements,
|
||||
|
@ -89,15 +89,15 @@ void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
|
|||
|
||||
// Reset vertex
|
||||
// matrix index from xf regs or cp memory?
|
||||
if (xfmem.MatrixIndexA.PosNormalMtxIdx != MatrixIndexA.PosNormalMtxIdx ||
|
||||
xfmem.MatrixIndexA.Tex0MtxIdx != MatrixIndexA.Tex0MtxIdx ||
|
||||
xfmem.MatrixIndexA.Tex1MtxIdx != MatrixIndexA.Tex1MtxIdx ||
|
||||
xfmem.MatrixIndexA.Tex2MtxIdx != MatrixIndexA.Tex2MtxIdx ||
|
||||
xfmem.MatrixIndexA.Tex3MtxIdx != MatrixIndexA.Tex3MtxIdx ||
|
||||
xfmem.MatrixIndexB.Tex4MtxIdx != MatrixIndexB.Tex4MtxIdx ||
|
||||
xfmem.MatrixIndexB.Tex5MtxIdx != MatrixIndexB.Tex5MtxIdx ||
|
||||
xfmem.MatrixIndexB.Tex6MtxIdx != MatrixIndexB.Tex6MtxIdx ||
|
||||
xfmem.MatrixIndexB.Tex7MtxIdx != MatrixIndexB.Tex7MtxIdx)
|
||||
if (xfmem.MatrixIndexA.PosNormalMtxIdx != g_main_cp_state.matrix_index_a.PosNormalMtxIdx ||
|
||||
xfmem.MatrixIndexA.Tex0MtxIdx != g_main_cp_state.matrix_index_a.Tex0MtxIdx ||
|
||||
xfmem.MatrixIndexA.Tex1MtxIdx != g_main_cp_state.matrix_index_a.Tex1MtxIdx ||
|
||||
xfmem.MatrixIndexA.Tex2MtxIdx != g_main_cp_state.matrix_index_a.Tex2MtxIdx ||
|
||||
xfmem.MatrixIndexA.Tex3MtxIdx != g_main_cp_state.matrix_index_a.Tex3MtxIdx ||
|
||||
xfmem.MatrixIndexB.Tex4MtxIdx != g_main_cp_state.matrix_index_b.Tex4MtxIdx ||
|
||||
xfmem.MatrixIndexB.Tex5MtxIdx != g_main_cp_state.matrix_index_b.Tex5MtxIdx ||
|
||||
xfmem.MatrixIndexB.Tex6MtxIdx != g_main_cp_state.matrix_index_b.Tex6MtxIdx ||
|
||||
xfmem.MatrixIndexB.Tex7MtxIdx != g_main_cp_state.matrix_index_b.Tex7MtxIdx)
|
||||
{
|
||||
WARN_LOG(VIDEO, "Matrix indices don't match");
|
||||
|
||||
|
@ -118,18 +118,18 @@ void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
|
|||
m_Vertex.texMtx[6] = xfmem.MatrixIndexB.Tex6MtxIdx;
|
||||
m_Vertex.texMtx[7] = xfmem.MatrixIndexB.Tex7MtxIdx;
|
||||
#else
|
||||
m_Vertex.posMtx = MatrixIndexA.PosNormalMtxIdx;
|
||||
m_Vertex.texMtx[0] = MatrixIndexA.Tex0MtxIdx;
|
||||
m_Vertex.texMtx[1] = MatrixIndexA.Tex1MtxIdx;
|
||||
m_Vertex.texMtx[2] = MatrixIndexA.Tex2MtxIdx;
|
||||
m_Vertex.texMtx[3] = MatrixIndexA.Tex3MtxIdx;
|
||||
m_Vertex.texMtx[4] = MatrixIndexB.Tex4MtxIdx;
|
||||
m_Vertex.texMtx[5] = MatrixIndexB.Tex5MtxIdx;
|
||||
m_Vertex.texMtx[6] = MatrixIndexB.Tex6MtxIdx;
|
||||
m_Vertex.texMtx[7] = MatrixIndexB.Tex7MtxIdx;
|
||||
m_Vertex.posMtx = g_main_cp_state.matrix_index_a.PosNormalMtxIdx;
|
||||
m_Vertex.texMtx[0] = g_main_cp_state.matrix_index_a.Tex0MtxIdx;
|
||||
m_Vertex.texMtx[1] = g_main_cp_state.matrix_index_a.Tex1MtxIdx;
|
||||
m_Vertex.texMtx[2] = g_main_cp_state.matrix_index_a.Tex2MtxIdx;
|
||||
m_Vertex.texMtx[3] = g_main_cp_state.matrix_index_a.Tex3MtxIdx;
|
||||
m_Vertex.texMtx[4] = g_main_cp_state.matrix_index_b.Tex4MtxIdx;
|
||||
m_Vertex.texMtx[5] = g_main_cp_state.matrix_index_b.Tex5MtxIdx;
|
||||
m_Vertex.texMtx[6] = g_main_cp_state.matrix_index_b.Tex6MtxIdx;
|
||||
m_Vertex.texMtx[7] = g_main_cp_state.matrix_index_b.Tex7MtxIdx;
|
||||
#endif
|
||||
|
||||
if (g_VtxDesc.PosMatIdx != NOT_PRESENT)
|
||||
if (g_main_cp_state.vtx_desc.PosMatIdx != NOT_PRESENT)
|
||||
{
|
||||
AddAttributeLoader(LoadPosMtx);
|
||||
m_VertexSize++;
|
||||
|
@ -145,17 +145,17 @@ void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
|
|||
}
|
||||
|
||||
// Write vertex position loader
|
||||
m_positionLoader = VertexLoader_Position::GetFunction(g_VtxDesc.Position, m_CurrentVat->g0.PosFormat, m_CurrentVat->g0.PosElements);
|
||||
m_VertexSize += VertexLoader_Position::GetSize(g_VtxDesc.Position, m_CurrentVat->g0.PosFormat, m_CurrentVat->g0.PosElements);
|
||||
m_positionLoader = VertexLoader_Position::GetFunction(g_main_cp_state.vtx_desc.Position, m_CurrentVat->g0.PosFormat, m_CurrentVat->g0.PosElements);
|
||||
m_VertexSize += VertexLoader_Position::GetSize(g_main_cp_state.vtx_desc.Position, m_CurrentVat->g0.PosFormat, m_CurrentVat->g0.PosElements);
|
||||
AddAttributeLoader(LoadPosition);
|
||||
|
||||
// Normals
|
||||
if (g_VtxDesc.Normal != NOT_PRESENT)
|
||||
if (g_main_cp_state.vtx_desc.Normal != NOT_PRESENT)
|
||||
{
|
||||
m_VertexSize += VertexLoader_Normal::GetSize(g_VtxDesc.Normal,
|
||||
m_VertexSize += VertexLoader_Normal::GetSize(g_main_cp_state.vtx_desc.Normal,
|
||||
m_CurrentVat->g0.NormalFormat, m_CurrentVat->g0.NormalElements, m_CurrentVat->g0.NormalIndex3);
|
||||
|
||||
m_normalLoader = VertexLoader_Normal::GetFunction(g_VtxDesc.Normal,
|
||||
m_normalLoader = VertexLoader_Normal::GetFunction(g_main_cp_state.vtx_desc.Normal,
|
||||
m_CurrentVat->g0.NormalFormat, m_CurrentVat->g0.NormalElements, m_CurrentVat->g0.NormalIndex3);
|
||||
|
||||
if (m_normalLoader == nullptr)
|
||||
|
@ -234,8 +234,8 @@ void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType)
|
|||
|
||||
// special case if only pos and tex coord 0 and tex coord input is AB11
|
||||
m_TexGenSpecialCase =
|
||||
((g_VtxDesc.Hex & 0x60600L) == g_VtxDesc.Hex) && // only pos and tex coord 0
|
||||
(g_VtxDesc.Tex0Coord != NOT_PRESENT) &&
|
||||
((g_main_cp_state.vtx_desc.Hex & 0x60600L) == g_main_cp_state.vtx_desc.Hex) && // only pos and tex coord 0
|
||||
(g_main_cp_state.vtx_desc.Tex0Coord != NOT_PRESENT) &&
|
||||
(xfmem.texMtxInfo[0].projection == XF_TEXPROJ_ST);
|
||||
|
||||
m_SetupUnit->Init(primitiveType);
|
||||
|
@ -252,7 +252,7 @@ void SWVertexLoader::LoadVertex()
|
|||
// transform input data
|
||||
TransformUnit::TransformPosition(&m_Vertex, outVertex);
|
||||
|
||||
if (g_VtxDesc.Normal != NOT_PRESENT)
|
||||
if (g_main_cp_state.vtx_desc.Normal != NOT_PRESENT)
|
||||
{
|
||||
TransformUnit::TransformNormal(&m_Vertex, m_CurrentVat->g0.NormalElements, outVertex);
|
||||
}
|
||||
|
|
|
@ -116,14 +116,7 @@ void VideoSoftware::DoState(PointerWrap& p)
|
|||
p.DoPOD(swstats);
|
||||
|
||||
// CP Memory
|
||||
p.DoArray(arraybases, 16);
|
||||
p.DoArray(arraystrides, 16);
|
||||
p.Do(MatrixIndexA);
|
||||
p.Do(MatrixIndexB);
|
||||
p.Do(g_VtxDesc.Hex);
|
||||
p.DoArray(g_VtxAttr, 8);
|
||||
p.DoMarker("CP Memory");
|
||||
|
||||
DoCPState(p);
|
||||
}
|
||||
|
||||
void VideoSoftware::CheckInvalidState()
|
||||
|
|
|
@ -74,7 +74,7 @@ void SWLoadIndexedXF(u32 val, int array)
|
|||
int size = ((val >> 12) & 0xF) + 1;
|
||||
//load stuff from array to address in xf mem
|
||||
|
||||
u32 *pData = (u32*)Memory::GetPointer(arraybases[array] + arraystrides[array]*index);
|
||||
u32 *pData = (u32*)Memory::GetPointer(g_main_cp_state.array_bases[array] + g_main_cp_state.array_strides[array]*index);
|
||||
|
||||
// byteswap data
|
||||
u32 buffer[16];
|
||||
|
|
|
@ -1085,5 +1085,6 @@ struct BPMemory
|
|||
extern BPMemory bpmem;
|
||||
|
||||
void LoadBPReg(u32 value0);
|
||||
void LoadBPRegPreprocess(u32 value0);
|
||||
|
||||
void GetBPRegInfo(const u8* data, std::string* name, std::string* desc);
|
||||
|
|
|
@ -173,7 +173,8 @@ static void BPWritten(const BPCmd& bp)
|
|||
switch (bp.newvalue & 0xFF)
|
||||
{
|
||||
case 0x02:
|
||||
PixelEngine::SetFinish(); // may generate interrupt
|
||||
if (!g_use_deterministic_gpu_thread)
|
||||
PixelEngine::SetFinish(); // may generate interrupt
|
||||
DEBUG_LOG(VIDEO, "GXSetDrawDone SetPEFinish (value: 0x%02X)", (bp.newvalue & 0xFFFF));
|
||||
return;
|
||||
|
||||
|
@ -183,11 +184,13 @@ static void BPWritten(const BPCmd& bp)
|
|||
}
|
||||
return;
|
||||
case BPMEM_PE_TOKEN_ID: // Pixel Engine Token ID
|
||||
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), false);
|
||||
if (!g_use_deterministic_gpu_thread)
|
||||
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), false);
|
||||
DEBUG_LOG(VIDEO, "SetPEToken 0x%04x", (bp.newvalue & 0xFFFF));
|
||||
return;
|
||||
case BPMEM_PE_TOKEN_INT_ID: // Pixel Engine Interrupt Token ID
|
||||
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), true);
|
||||
if (!g_use_deterministic_gpu_thread)
|
||||
PixelEngine::SetToken(static_cast<u16>(bp.newvalue & 0xFFFF), true);
|
||||
DEBUG_LOG(VIDEO, "SetPEToken + INT 0x%04x", (bp.newvalue & 0xFFFF));
|
||||
return;
|
||||
|
||||
|
@ -685,6 +688,26 @@ void LoadBPReg(u32 value0)
|
|||
BPWritten(bp);
|
||||
}
|
||||
|
||||
void LoadBPRegPreprocess(u32 value0)
|
||||
{
|
||||
int regNum = value0 >> 24;
|
||||
// masking could hypothetically be a problem
|
||||
u32 newval = value0 & 0xffffff;
|
||||
switch (regNum)
|
||||
{
|
||||
case BPMEM_SETDRAWDONE:
|
||||
if ((newval & 0xff) == 0x02)
|
||||
PixelEngine::SetFinish();
|
||||
break;
|
||||
case BPMEM_PE_TOKEN_ID:
|
||||
PixelEngine::SetToken(newval & 0xffff, false);
|
||||
break;
|
||||
case BPMEM_PE_TOKEN_INT_ID: // Pixel Engine Interrupt Token ID
|
||||
PixelEngine::SetToken(newval & 0xffff, true);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void GetBPRegInfo(const u8* data, std::string* name, std::string* desc)
|
||||
{
|
||||
const char* no_yes[2] = { "No", "Yes" };
|
||||
|
|
|
@ -7,5 +7,4 @@
|
|||
#include "VideoCommon/BPMemory.h"
|
||||
|
||||
void BPInit();
|
||||
void LoadBPReg(u32 value0);
|
||||
void BPReload();
|
||||
|
|
|
@ -2,17 +2,32 @@
|
|||
// Licensed under GPLv2
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "Common/ChunkFile.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "VideoCommon/CPMemory.h"
|
||||
|
||||
// CP state
|
||||
u8 *cached_arraybases[16];
|
||||
|
||||
// STATE_TO_SAVE
|
||||
u32 arraybases[16];
|
||||
u32 arraystrides[16];
|
||||
TMatrixIndexA MatrixIndexA;
|
||||
TMatrixIndexB MatrixIndexB;
|
||||
TVtxDesc g_VtxDesc;
|
||||
// Most games only use the first VtxAttr and simply reconfigure it all the time as needed.
|
||||
VAT g_VtxAttr[8];
|
||||
CPState g_main_cp_state;
|
||||
CPState g_preprocess_cp_state;
|
||||
|
||||
void DoCPState(PointerWrap& p)
|
||||
{
|
||||
// We don't save g_preprocess_cp_state separately because the GPU should be
|
||||
// synced around state save/load.
|
||||
p.DoArray(g_main_cp_state.array_bases, 16);
|
||||
p.DoArray(g_main_cp_state.array_strides, 16);
|
||||
p.Do(g_main_cp_state.matrix_index_a);
|
||||
p.Do(g_main_cp_state.matrix_index_b);
|
||||
p.Do(g_main_cp_state.vtx_desc.Hex);
|
||||
p.DoArray(g_main_cp_state.vtx_attr, 8);
|
||||
p.DoMarker("CP Memory");
|
||||
if (p.mode == PointerWrap::MODE_READ)
|
||||
CopyPreprocessCPStateFromMain();
|
||||
}
|
||||
|
||||
void CopyPreprocessCPStateFromMain()
|
||||
{
|
||||
memcpy(&g_preprocess_cp_state, &g_main_cp_state, sizeof(CPState));
|
||||
}
|
||||
|
|
|
@ -231,12 +231,6 @@ union TMatrixIndexB
|
|||
|
||||
#pragma pack()
|
||||
|
||||
extern u32 arraybases[16];
|
||||
extern u8 *cached_arraybases[16];
|
||||
extern u32 arraystrides[16];
|
||||
extern TMatrixIndexA MatrixIndexA;
|
||||
extern TMatrixIndexB MatrixIndexB;
|
||||
|
||||
struct VAT
|
||||
{
|
||||
UVAT_group0 g0;
|
||||
|
@ -244,11 +238,37 @@ struct VAT
|
|||
UVAT_group2 g2;
|
||||
};
|
||||
|
||||
extern TVtxDesc g_VtxDesc;
|
||||
extern VAT g_VtxAttr[8];
|
||||
class VertexLoader;
|
||||
|
||||
// STATE_TO_SAVE
|
||||
struct CPState final
|
||||
{
|
||||
u32 array_bases[16];
|
||||
u32 array_strides[16];
|
||||
TMatrixIndexA matrix_index_a;
|
||||
TMatrixIndexB matrix_index_b;
|
||||
TVtxDesc vtx_desc;
|
||||
// Most games only use the first VtxAttr and simply reconfigure it all the time as needed.
|
||||
VAT vtx_attr[8];
|
||||
|
||||
// Attributes that actually belong to VertexLoaderManager:
|
||||
int attr_dirty; // bitfield
|
||||
VertexLoader* vertex_loaders[8];
|
||||
};
|
||||
|
||||
class PointerWrap;
|
||||
|
||||
extern void DoCPState(PointerWrap& p);
|
||||
|
||||
extern void CopyPreprocessCPStateFromMain();
|
||||
|
||||
extern CPState g_main_cp_state;
|
||||
extern CPState g_preprocess_cp_state;
|
||||
|
||||
extern u8 *cached_arraybases[16];
|
||||
|
||||
// Might move this into its own file later.
|
||||
void LoadCPReg(u32 SubCmd, u32 Value);
|
||||
void LoadCPReg(u32 SubCmd, u32 Value, bool is_preprocess = false);
|
||||
|
||||
// Fills memory with data from CP regs
|
||||
void FillCPMemoryArray(u32 *memory);
|
||||
|
|
|
@ -77,7 +77,7 @@ void DoState(PointerWrap &p)
|
|||
p.Do(interruptFinishWaiting);
|
||||
}
|
||||
|
||||
UNUSED static inline void WriteLow(volatile u32& _reg, u16 lowbits)
|
||||
static inline void WriteLow(volatile u32& _reg, u16 lowbits)
|
||||
{
|
||||
Common::AtomicStore(_reg, (_reg & 0xFFFF0000) | lowbits);
|
||||
}
|
||||
|
@ -159,9 +159,8 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
|
|||
{ FIFO_WRITE_POINTER_LO, MMIO::Utils::LowPart(&fifo.CPWritePointer), false, true },
|
||||
{ FIFO_WRITE_POINTER_HI, MMIO::Utils::HighPart(&fifo.CPWritePointer) },
|
||||
// FIFO_READ_POINTER has different code for single/dual core.
|
||||
{ FIFO_BP_LO, MMIO::Utils::LowPart(&fifo.CPBreakpoint), false, true },
|
||||
{ FIFO_BP_HI, MMIO::Utils::HighPart(&fifo.CPBreakpoint) },
|
||||
};
|
||||
|
||||
for (auto& mapped_var : directly_mapped_vars)
|
||||
{
|
||||
u16 wmask = mapped_var.writes_align_to_32_bytes ? 0xFFE0 : 0xFFFF;
|
||||
|
@ -173,6 +172,19 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
|
|||
);
|
||||
}
|
||||
|
||||
mmio->Register(base | FIFO_BP_LO,
|
||||
MMIO::DirectRead<u16>(MMIO::Utils::LowPart(&fifo.CPBreakpoint)),
|
||||
MMIO::ComplexWrite<u16>([](u32, u16 val) {
|
||||
WriteLow(fifo.CPBreakpoint, val & 0xffe0);
|
||||
})
|
||||
);
|
||||
mmio->Register(base | FIFO_BP_HI,
|
||||
MMIO::DirectRead<u16>(MMIO::Utils::HighPart(&fifo.CPBreakpoint)),
|
||||
MMIO::ComplexWrite<u16>([](u32, u16 val) {
|
||||
WriteHigh(fifo.CPBreakpoint, val);
|
||||
})
|
||||
);
|
||||
|
||||
// Timing and metrics MMIOs are stubbed with fixed values.
|
||||
struct {
|
||||
u32 addr;
|
||||
|
@ -216,8 +228,7 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
|
|||
UCPCtrlReg tmp(val);
|
||||
m_CPCtrlReg.Hex = tmp.Hex;
|
||||
SetCpControlRegister();
|
||||
if (!IsOnThread())
|
||||
RunGpu();
|
||||
RunGpu();
|
||||
})
|
||||
);
|
||||
|
||||
|
@ -227,8 +238,7 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
|
|||
UCPClearReg tmp(val);
|
||||
m_CPClearReg.Hex = tmp.Hex;
|
||||
SetCpClearRegister();
|
||||
if (!IsOnThread())
|
||||
RunGpu();
|
||||
RunGpu();
|
||||
})
|
||||
);
|
||||
|
||||
|
@ -260,6 +270,7 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
|
|||
: MMIO::DirectRead<u16>(MMIO::Utils::HighPart(&fifo.CPReadWriteDistance)),
|
||||
MMIO::ComplexWrite<u16>([](u32, u16 val) {
|
||||
WriteHigh(fifo.CPReadWriteDistance, val);
|
||||
SyncGPU(SYNC_GPU_OTHER);
|
||||
if (fifo.CPReadWriteDistance == 0)
|
||||
{
|
||||
GPFifo::ResetGatherPipe();
|
||||
|
@ -269,8 +280,7 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
|
|||
{
|
||||
ResetVideoBuffer();
|
||||
}
|
||||
if (!IsOnThread())
|
||||
RunGpu();
|
||||
RunGpu();
|
||||
})
|
||||
);
|
||||
mmio->Register(base | FIFO_READ_POINTER_LO,
|
||||
|
@ -298,11 +308,7 @@ void STACKALIGN GatherPipeBursted()
|
|||
// if we aren't linked, we don't care about gather pipe data
|
||||
if (!m_CPCtrlReg.GPLinkEnable)
|
||||
{
|
||||
if (!IsOnThread())
|
||||
{
|
||||
RunGpu();
|
||||
}
|
||||
else
|
||||
if (IsOnThread() && !g_use_deterministic_gpu_thread)
|
||||
{
|
||||
// In multibuffer mode is not allowed write in the same FIFO attached to the GPU.
|
||||
// Fix Pokemon XD in DC mode.
|
||||
|
@ -313,6 +319,10 @@ void STACKALIGN GatherPipeBursted()
|
|||
ProcessFifoAllDistance();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
RunGpu();
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -327,8 +337,7 @@ void STACKALIGN GatherPipeBursted()
|
|||
|
||||
Common::AtomicAdd(fifo.CPReadWriteDistance, GATHER_PIPE_SIZE);
|
||||
|
||||
if (!IsOnThread())
|
||||
RunGpu();
|
||||
RunGpu();
|
||||
|
||||
_assert_msg_(COMMANDPROCESSOR, fifo.CPReadWriteDistance <= fifo.CPEnd - fifo.CPBase,
|
||||
"FIFO is overflowed by GatherPipe !\nCPU thread is too fast!");
|
||||
|
@ -358,7 +367,8 @@ void UpdateInterrupts(u64 userdata)
|
|||
|
||||
void UpdateInterruptsFromVideoBackend(u64 userdata)
|
||||
{
|
||||
CoreTiming::ScheduleEvent_Threadsafe(0, et_UpdateInterrupts, userdata);
|
||||
if (!g_use_deterministic_gpu_thread)
|
||||
CoreTiming::ScheduleEvent_Threadsafe(0, et_UpdateInterrupts, userdata);
|
||||
}
|
||||
|
||||
void SetCPStatusFromGPU()
|
||||
|
|
|
@ -16,6 +16,7 @@ namespace CommandProcessor
|
|||
{
|
||||
|
||||
extern SCPFifoStruct fifo; //This one is shared between gfx thread and emulator thread.
|
||||
|
||||
extern volatile bool isPossibleWaitingSetDrawDone; //This one is used for sync gfx thread and emulator thread.
|
||||
extern volatile bool interruptSet;
|
||||
extern volatile bool interruptWaiting;
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
#include "VideoCommon/VertexManagerBase.h"
|
||||
|
||||
extern u8* g_pVideoData;
|
||||
extern u8* g_video_buffer_read_ptr;
|
||||
|
||||
#if _M_SSE >= 0x301 && !(defined __GNUC__ && !defined __SSSE3__)
|
||||
#include <tmmintrin.h>
|
||||
|
@ -14,20 +14,20 @@ extern u8* g_pVideoData;
|
|||
|
||||
__forceinline void DataSkip(u32 skip)
|
||||
{
|
||||
g_pVideoData += skip;
|
||||
g_video_buffer_read_ptr += skip;
|
||||
}
|
||||
|
||||
// probably unnecessary
|
||||
template <int count>
|
||||
__forceinline void DataSkip()
|
||||
{
|
||||
g_pVideoData += count;
|
||||
g_video_buffer_read_ptr += count;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__forceinline T DataPeek(int _uOffset)
|
||||
__forceinline T DataPeek(int _uOffset, u8** bufp = &g_video_buffer_read_ptr)
|
||||
{
|
||||
auto const result = Common::FromBigEndian(*reinterpret_cast<T*>(g_pVideoData + _uOffset));
|
||||
auto const result = Common::FromBigEndian(*reinterpret_cast<T*>(*bufp + _uOffset));
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -48,18 +48,18 @@ __forceinline u32 DataPeek32(int _uOffset)
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
__forceinline T DataRead()
|
||||
__forceinline T DataRead(u8** bufp = &g_video_buffer_read_ptr)
|
||||
{
|
||||
auto const result = DataPeek<T>(0);
|
||||
DataSkip<sizeof(T)>();
|
||||
auto const result = DataPeek<T>(0, bufp);
|
||||
*bufp += sizeof(T);
|
||||
return result;
|
||||
}
|
||||
|
||||
class DataReader
|
||||
{
|
||||
public:
|
||||
inline DataReader() : buffer(g_pVideoData), offset(0) {}
|
||||
inline ~DataReader() { g_pVideoData += offset; }
|
||||
inline DataReader() : buffer(g_video_buffer_read_ptr), offset(0) {}
|
||||
inline ~DataReader() { g_video_buffer_read_ptr += offset; }
|
||||
template <typename T> inline T Read()
|
||||
{
|
||||
const T result = Common::FromBigEndian(*(T*)(buffer + offset));
|
||||
|
@ -94,14 +94,14 @@ __forceinline u32 DataReadU32()
|
|||
|
||||
__forceinline u32 DataReadU32Unswapped()
|
||||
{
|
||||
u32 tmp = *(u32*)g_pVideoData;
|
||||
g_pVideoData += 4;
|
||||
u32 tmp = *(u32*)g_video_buffer_read_ptr;
|
||||
g_video_buffer_read_ptr += 4;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
__forceinline u8* DataGetPosition()
|
||||
{
|
||||
return g_pVideoData;
|
||||
return g_video_buffer_read_ptr;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
|
|
@ -11,32 +11,63 @@
|
|||
#include "Core/ConfigManager.h"
|
||||
#include "Core/Core.h"
|
||||
#include "Core/CoreTiming.h"
|
||||
#include "Core/NetPlayProto.h"
|
||||
#include "Core/HW/Memmap.h"
|
||||
|
||||
#include "VideoCommon/CommandProcessor.h"
|
||||
#include "VideoCommon/CPMemory.h"
|
||||
#include "VideoCommon/DataReader.h"
|
||||
#include "VideoCommon/Fifo.h"
|
||||
#include "VideoCommon/OpcodeDecoding.h"
|
||||
#include "VideoCommon/PixelEngine.h"
|
||||
#include "VideoCommon/VertexLoaderManager.h"
|
||||
#include "VideoCommon/VideoConfig.h"
|
||||
|
||||
bool g_bSkipCurrentFrame = false;
|
||||
|
||||
namespace
|
||||
{
|
||||
static volatile bool GpuRunningState = false;
|
||||
static volatile bool EmuRunningState = false;
|
||||
static std::mutex m_csHWVidOccupied;
|
||||
|
||||
// Most of this array is unlikely to be faulted in...
|
||||
static u8 s_fifo_aux_data[FIFO_SIZE];
|
||||
static u8* s_fifo_aux_write_ptr;
|
||||
static u8* s_fifo_aux_read_ptr;
|
||||
|
||||
bool g_use_deterministic_gpu_thread;
|
||||
|
||||
// STATE_TO_SAVE
|
||||
static u8 *videoBuffer;
|
||||
static int size = 0;
|
||||
} // namespace
|
||||
static std::mutex s_video_buffer_lock;
|
||||
static std::condition_variable s_video_buffer_cond;
|
||||
static u8* s_video_buffer;
|
||||
u8* g_video_buffer_read_ptr;
|
||||
static std::atomic<u8*> s_video_buffer_write_ptr;
|
||||
static std::atomic<u8*> s_video_buffer_seen_ptr;
|
||||
u8* g_video_buffer_pp_read_ptr;
|
||||
// The read_ptr is always owned by the GPU thread. In normal mode, so is the
|
||||
// write_ptr, despite it being atomic. In g_use_deterministic_gpu_thread mode,
|
||||
// things get a bit more complicated:
|
||||
// - The seen_ptr is written by the GPU thread, and points to what it's already
|
||||
// processed as much of as possible - in the case of a partial command which
|
||||
// caused it to stop, not the same as the read ptr. It's written by the GPU,
|
||||
// under the lock, and updating the cond.
|
||||
// - The write_ptr is written by the CPU thread after it copies data from the
|
||||
// FIFO. Maybe someday it will be under the lock. For now, because RunGpuLoop
|
||||
// polls, it's just atomic.
|
||||
// - The pp_read_ptr is the CPU preprocessing version of the read_ptr.
|
||||
|
||||
void Fifo_DoState(PointerWrap &p)
|
||||
{
|
||||
p.DoArray(videoBuffer, FIFO_SIZE);
|
||||
p.Do(size);
|
||||
p.DoPointer(g_pVideoData, videoBuffer);
|
||||
p.DoArray(s_video_buffer, FIFO_SIZE);
|
||||
u8* write_ptr = s_video_buffer_write_ptr;
|
||||
p.DoPointer(write_ptr, s_video_buffer);
|
||||
s_video_buffer_write_ptr = write_ptr;
|
||||
p.DoPointer(g_video_buffer_read_ptr, s_video_buffer);
|
||||
if (p.mode == PointerWrap::MODE_READ && g_use_deterministic_gpu_thread)
|
||||
{
|
||||
// We're good and paused, right?
|
||||
s_video_buffer_seen_ptr = g_video_buffer_pp_read_ptr = g_video_buffer_read_ptr;
|
||||
}
|
||||
p.Do(g_bSkipCurrentFrame);
|
||||
}
|
||||
|
||||
|
@ -44,6 +75,7 @@ void Fifo_PauseAndLock(bool doLock, bool unpauseOnUnlock)
|
|||
{
|
||||
if (doLock)
|
||||
{
|
||||
SyncGPU(SYNC_GPU_OTHER);
|
||||
EmulatorState(false);
|
||||
if (!Core::IsGPUThread())
|
||||
m_csHWVidOccupied.lock();
|
||||
|
@ -61,8 +93,8 @@ void Fifo_PauseAndLock(bool doLock, bool unpauseOnUnlock)
|
|||
|
||||
void Fifo_Init()
|
||||
{
|
||||
videoBuffer = (u8*)AllocateMemoryPages(FIFO_SIZE);
|
||||
size = 0;
|
||||
s_video_buffer = (u8*)AllocateMemoryPages(FIFO_SIZE);
|
||||
ResetVideoBuffer();
|
||||
GpuRunningState = false;
|
||||
Common::AtomicStore(CommandProcessor::VITicks, CommandProcessor::m_cpClockOrigin);
|
||||
}
|
||||
|
@ -70,18 +102,24 @@ void Fifo_Init()
|
|||
void Fifo_Shutdown()
|
||||
{
|
||||
if (GpuRunningState) PanicAlert("Fifo shutting down while active");
|
||||
FreeMemoryPages(videoBuffer, FIFO_SIZE);
|
||||
videoBuffer = nullptr;
|
||||
FreeMemoryPages(s_video_buffer, FIFO_SIZE);
|
||||
s_video_buffer = nullptr;
|
||||
s_video_buffer_write_ptr = nullptr;
|
||||
g_video_buffer_pp_read_ptr = nullptr;
|
||||
g_video_buffer_read_ptr = nullptr;
|
||||
s_video_buffer_seen_ptr = nullptr;
|
||||
s_fifo_aux_write_ptr = nullptr;
|
||||
s_fifo_aux_read_ptr = nullptr;
|
||||
}
|
||||
|
||||
u8* GetVideoBufferStartPtr()
|
||||
{
|
||||
return videoBuffer;
|
||||
return s_video_buffer;
|
||||
}
|
||||
|
||||
u8* GetVideoBufferEndPtr()
|
||||
{
|
||||
return &videoBuffer[size];
|
||||
return s_video_buffer_write_ptr;
|
||||
}
|
||||
|
||||
void Fifo_SetRendering(bool enabled)
|
||||
|
@ -107,30 +145,123 @@ void EmulatorState(bool running)
|
|||
EmuRunningState = running;
|
||||
}
|
||||
|
||||
void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr)
|
||||
{
|
||||
if (g_use_deterministic_gpu_thread && GpuRunningState)
|
||||
{
|
||||
std::unique_lock<std::mutex> lk(s_video_buffer_lock);
|
||||
u8* write_ptr = s_video_buffer_write_ptr;
|
||||
s_video_buffer_cond.wait(lk, [&]() {
|
||||
return !GpuRunningState || s_video_buffer_seen_ptr == write_ptr;
|
||||
});
|
||||
if (!GpuRunningState)
|
||||
return;
|
||||
|
||||
// Opportunistically reset FIFOs so we don't wrap around.
|
||||
if (may_move_read_ptr && s_fifo_aux_write_ptr != s_fifo_aux_read_ptr)
|
||||
PanicAlert("aux fifo not synced (%p, %p)", s_fifo_aux_write_ptr, s_fifo_aux_read_ptr);
|
||||
|
||||
memmove(s_fifo_aux_data, s_fifo_aux_read_ptr, s_fifo_aux_write_ptr - s_fifo_aux_read_ptr);
|
||||
s_fifo_aux_write_ptr -= (s_fifo_aux_read_ptr - s_fifo_aux_data);
|
||||
s_fifo_aux_read_ptr = s_fifo_aux_data;
|
||||
|
||||
if (may_move_read_ptr)
|
||||
{
|
||||
// what's left over in the buffer
|
||||
size_t size = write_ptr - g_video_buffer_pp_read_ptr;
|
||||
|
||||
memmove(s_video_buffer, g_video_buffer_pp_read_ptr, size);
|
||||
// This change always decreases the pointers. We write seen_ptr
|
||||
// after write_ptr here, and read it before in RunGpuLoop, so
|
||||
// 'write_ptr > seen_ptr' there cannot become spuriously true.
|
||||
s_video_buffer_write_ptr = write_ptr = s_video_buffer + size;
|
||||
g_video_buffer_pp_read_ptr = s_video_buffer;
|
||||
g_video_buffer_read_ptr = s_video_buffer;
|
||||
s_video_buffer_seen_ptr = write_ptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PushFifoAuxBuffer(void* ptr, size_t size)
|
||||
{
|
||||
if (size > (size_t) (s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr))
|
||||
{
|
||||
SyncGPU(SYNC_GPU_AUX_SPACE, /* may_move_read_ptr */ false);
|
||||
if (size > (size_t) (s_fifo_aux_data + FIFO_SIZE - s_fifo_aux_write_ptr))
|
||||
{
|
||||
// That will sync us up to the last 32 bytes, so this short region
|
||||
// of FIFO would have to point to a 2MB display list or something.
|
||||
PanicAlert("absurdly large aux buffer");
|
||||
return;
|
||||
}
|
||||
}
|
||||
memcpy(s_fifo_aux_write_ptr, ptr, size);
|
||||
s_fifo_aux_write_ptr += size;
|
||||
}
|
||||
|
||||
void* PopFifoAuxBuffer(size_t size)
|
||||
{
|
||||
void* ret = s_fifo_aux_read_ptr;
|
||||
s_fifo_aux_read_ptr += size;
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Description: RunGpuLoop() sends data through this function.
|
||||
void ReadDataFromFifo(u8* _uData, u32 len)
|
||||
static void ReadDataFromFifo(u8* _uData, u32 len)
|
||||
{
|
||||
if (size + len >= FIFO_SIZE)
|
||||
if (len > (s_video_buffer + FIFO_SIZE - s_video_buffer_write_ptr))
|
||||
{
|
||||
int pos = (int)(g_pVideoData - videoBuffer);
|
||||
size -= pos;
|
||||
if (size + len > FIFO_SIZE)
|
||||
size_t size = s_video_buffer_write_ptr - g_video_buffer_read_ptr;
|
||||
if (len > FIFO_SIZE - size)
|
||||
{
|
||||
PanicAlert("FIFO out of bounds (size = %i, len = %i at %08x)", size, len, pos);
|
||||
PanicAlert("FIFO out of bounds (existing %lu + new %lu > %lu)", (unsigned long) size, (unsigned long) len, (unsigned long) FIFO_SIZE);
|
||||
return;
|
||||
}
|
||||
memmove(&videoBuffer[0], &videoBuffer[pos], size);
|
||||
g_pVideoData = videoBuffer;
|
||||
memmove(s_video_buffer, g_video_buffer_read_ptr, size);
|
||||
s_video_buffer_write_ptr = s_video_buffer + size;
|
||||
g_video_buffer_read_ptr = s_video_buffer;
|
||||
}
|
||||
// Copy new video instructions to videoBuffer for future use in rendering the new picture
|
||||
memcpy(videoBuffer + size, _uData, len);
|
||||
size += len;
|
||||
// Copy new video instructions to s_video_buffer for future use in rendering the new picture
|
||||
memcpy(s_video_buffer_write_ptr, _uData, len);
|
||||
s_video_buffer_write_ptr += len;
|
||||
}
|
||||
|
||||
// The deterministic_gpu_thread version.
|
||||
static void ReadDataFromFifoOnCPU(u8* _uData, u32 len)
|
||||
{
|
||||
u8 *write_ptr = s_video_buffer_write_ptr;
|
||||
if (len > (s_video_buffer + FIFO_SIZE - write_ptr))
|
||||
{
|
||||
// We can't wrap around while the GPU is working on the data.
|
||||
// This should be very rare due to the reset in SyncGPU.
|
||||
SyncGPU(SYNC_GPU_WRAPAROUND);
|
||||
if (g_video_buffer_pp_read_ptr != g_video_buffer_read_ptr)
|
||||
{
|
||||
PanicAlert("desynced read pointers");
|
||||
return;
|
||||
}
|
||||
write_ptr = s_video_buffer_write_ptr;
|
||||
size_t size = write_ptr - g_video_buffer_pp_read_ptr;
|
||||
if (len > FIFO_SIZE - size)
|
||||
{
|
||||
PanicAlert("FIFO out of bounds (existing %lu + new %lu > %lu)", (unsigned long) size, (unsigned long) len, (unsigned long) FIFO_SIZE);
|
||||
return;
|
||||
}
|
||||
}
|
||||
memcpy(write_ptr, _uData, len);
|
||||
OpcodeDecoder_Preprocess(write_ptr + len);
|
||||
// This would have to be locked if the GPU thread didn't spin.
|
||||
s_video_buffer_write_ptr = write_ptr + len;
|
||||
}
|
||||
|
||||
void ResetVideoBuffer()
|
||||
{
|
||||
g_pVideoData = videoBuffer;
|
||||
size = 0;
|
||||
g_video_buffer_read_ptr = s_video_buffer;
|
||||
s_video_buffer_write_ptr = s_video_buffer;
|
||||
s_video_buffer_seen_ptr = s_video_buffer;
|
||||
g_video_buffer_pp_read_ptr = s_video_buffer;
|
||||
s_fifo_aux_write_ptr = s_fifo_aux_data;
|
||||
s_fifo_aux_read_ptr = s_fifo_aux_data;
|
||||
}
|
||||
|
||||
|
||||
|
@ -148,53 +279,75 @@ void RunGpuLoop()
|
|||
g_video_backend->PeekMessages();
|
||||
|
||||
VideoFifo_CheckAsyncRequest();
|
||||
|
||||
CommandProcessor::SetCPStatusFromGPU();
|
||||
|
||||
Common::AtomicStore(CommandProcessor::VITicks, CommandProcessor::m_cpClockOrigin);
|
||||
|
||||
// check if we are able to run this buffer
|
||||
while (GpuRunningState && EmuRunningState && !CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
|
||||
if (g_use_deterministic_gpu_thread)
|
||||
{
|
||||
fifo.isGpuReadingData = true;
|
||||
CommandProcessor::isPossibleWaitingSetDrawDone = fifo.bFF_GPLinkEnable ? true : false;
|
||||
|
||||
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU || Common::AtomicLoad(CommandProcessor::VITicks) > CommandProcessor::m_cpClockOrigin)
|
||||
// All the fifo/CP stuff is on the CPU. We just need to run the opcode decoder.
|
||||
u8* seen_ptr = s_video_buffer_seen_ptr;
|
||||
u8* write_ptr = s_video_buffer_write_ptr;
|
||||
// See comment in SyncGPU
|
||||
if (write_ptr > seen_ptr)
|
||||
{
|
||||
u32 readPtr = fifo.CPReadPointer;
|
||||
u8 *uData = Memory::GetPointer(readPtr);
|
||||
OpcodeDecoder_Run(write_ptr);
|
||||
|
||||
if (readPtr == fifo.CPEnd)
|
||||
readPtr = fifo.CPBase;
|
||||
else
|
||||
readPtr += 32;
|
||||
|
||||
_assert_msg_(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0 ,
|
||||
"Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce instability in the game. Please report it.", fifo.CPReadWriteDistance - 32);
|
||||
|
||||
ReadDataFromFifo(uData, 32);
|
||||
|
||||
cyclesExecuted = OpcodeDecoder_Run(GetVideoBufferEndPtr());
|
||||
|
||||
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU && Common::AtomicLoad(CommandProcessor::VITicks) >= cyclesExecuted)
|
||||
Common::AtomicAdd(CommandProcessor::VITicks, -(s32)cyclesExecuted);
|
||||
|
||||
Common::AtomicStore(fifo.CPReadPointer, readPtr);
|
||||
Common::AtomicAdd(fifo.CPReadWriteDistance, -32);
|
||||
if ((GetVideoBufferEndPtr() - g_pVideoData) == 0)
|
||||
Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer);
|
||||
{
|
||||
std::lock_guard<std::mutex> vblk(s_video_buffer_lock);
|
||||
s_video_buffer_seen_ptr = write_ptr;
|
||||
s_video_buffer_cond.notify_all();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
CommandProcessor::SetCPStatusFromGPU();
|
||||
|
||||
// This call is pretty important in DualCore mode and must be called in the FIFO Loop.
|
||||
// If we don't, s_swapRequested or s_efbAccessRequested won't be set to false
|
||||
// leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing things down.
|
||||
VideoFifo_CheckAsyncRequest();
|
||||
CommandProcessor::isPossibleWaitingSetDrawDone = false;
|
||||
}
|
||||
Common::AtomicStore(CommandProcessor::VITicks, CommandProcessor::m_cpClockOrigin);
|
||||
|
||||
fifo.isGpuReadingData = false;
|
||||
// check if we are able to run this buffer
|
||||
while (GpuRunningState && EmuRunningState && !CommandProcessor::interruptWaiting && fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint())
|
||||
{
|
||||
fifo.isGpuReadingData = true;
|
||||
CommandProcessor::isPossibleWaitingSetDrawDone = fifo.bFF_GPLinkEnable ? true : false;
|
||||
|
||||
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU || Common::AtomicLoad(CommandProcessor::VITicks) > CommandProcessor::m_cpClockOrigin)
|
||||
{
|
||||
u32 readPtr = fifo.CPReadPointer;
|
||||
u8 *uData = Memory::GetPointer(readPtr);
|
||||
|
||||
if (readPtr == fifo.CPEnd)
|
||||
readPtr = fifo.CPBase;
|
||||
else
|
||||
readPtr += 32;
|
||||
|
||||
_assert_msg_(COMMANDPROCESSOR, (s32)fifo.CPReadWriteDistance - 32 >= 0 ,
|
||||
"Negative fifo.CPReadWriteDistance = %i in FIFO Loop !\nThat can produce instability in the game. Please report it.", fifo.CPReadWriteDistance - 32);
|
||||
|
||||
ReadDataFromFifo(uData, 32);
|
||||
|
||||
u8* write_ptr = s_video_buffer_write_ptr;
|
||||
|
||||
cyclesExecuted = OpcodeDecoder_Run(write_ptr);
|
||||
|
||||
|
||||
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU && Common::AtomicLoad(CommandProcessor::VITicks) >= cyclesExecuted)
|
||||
Common::AtomicAdd(CommandProcessor::VITicks, -(s32)cyclesExecuted);
|
||||
|
||||
Common::AtomicStore(fifo.CPReadPointer, readPtr);
|
||||
Common::AtomicAdd(fifo.CPReadWriteDistance, -32);
|
||||
if ((write_ptr - g_video_buffer_read_ptr) == 0)
|
||||
Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer);
|
||||
}
|
||||
|
||||
CommandProcessor::SetCPStatusFromGPU();
|
||||
|
||||
// This call is pretty important in DualCore mode and must be called in the FIFO Loop.
|
||||
// If we don't, s_swapRequested or s_efbAccessRequested won't be set to false
|
||||
// leading the CPU thread to wait in Video_BeginField or Video_AccessEFB thus slowing things down.
|
||||
VideoFifo_CheckAsyncRequest();
|
||||
CommandProcessor::isPossibleWaitingSetDrawDone = false;
|
||||
}
|
||||
|
||||
fifo.isGpuReadingData = false;
|
||||
}
|
||||
|
||||
if (EmuRunningState)
|
||||
{
|
||||
|
@ -217,6 +370,8 @@ void RunGpuLoop()
|
|||
}
|
||||
}
|
||||
}
|
||||
// wake up SyncGPU if we were interrupted
|
||||
s_video_buffer_cond.notify_all();
|
||||
}
|
||||
|
||||
|
||||
|
@ -228,16 +383,27 @@ bool AtBreakpoint()
|
|||
|
||||
void RunGpu()
|
||||
{
|
||||
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread &&
|
||||
!g_use_deterministic_gpu_thread)
|
||||
return;
|
||||
|
||||
SCPFifoStruct &fifo = CommandProcessor::fifo;
|
||||
while (fifo.bFF_GPReadEnable && fifo.CPReadWriteDistance && !AtBreakpoint() )
|
||||
{
|
||||
u8 *uData = Memory::GetPointer(fifo.CPReadPointer);
|
||||
|
||||
FPURoundMode::SaveSIMDState();
|
||||
FPURoundMode::LoadDefaultSIMDState();
|
||||
ReadDataFromFifo(uData, 32);
|
||||
OpcodeDecoder_Run(GetVideoBufferEndPtr());
|
||||
FPURoundMode::LoadSIMDState();
|
||||
if (g_use_deterministic_gpu_thread)
|
||||
{
|
||||
ReadDataFromFifoOnCPU(uData, 32);
|
||||
}
|
||||
else
|
||||
{
|
||||
FPURoundMode::SaveSIMDState();
|
||||
FPURoundMode::LoadDefaultSIMDState();
|
||||
ReadDataFromFifo(uData, 32);
|
||||
OpcodeDecoder_Run(s_video_buffer_write_ptr);
|
||||
FPURoundMode::LoadSIMDState();
|
||||
}
|
||||
|
||||
//DEBUG_LOG(COMMANDPROCESSOR, "Fifo wraps to base");
|
||||
|
||||
|
@ -250,3 +416,45 @@ void RunGpu()
|
|||
}
|
||||
CommandProcessor::SetCPStatusFromGPU();
|
||||
}
|
||||
|
||||
void Fifo_UpdateWantDeterminism(bool want)
|
||||
{
|
||||
// We are paused (or not running at all yet) and have m_csHWVidOccupied, so
|
||||
// it should be safe to change this.
|
||||
const SCoreStartupParameter& param = SConfig::GetInstance().m_LocalCoreStartupParameter;
|
||||
bool gpu_thread;
|
||||
switch (param.m_GPUDeterminismMode)
|
||||
{
|
||||
case GPU_DETERMINISM_AUTO:
|
||||
gpu_thread = want;
|
||||
|
||||
// Hack: For now movies are an exception to this being on (but not
|
||||
// to wanting determinism in general). Once vertex arrays are
|
||||
// fixed, there should be no reason to want this off for movies by
|
||||
// default, so this can be removed.
|
||||
if (!NetPlay::IsNetPlayRunning())
|
||||
gpu_thread = false;
|
||||
|
||||
break;
|
||||
case GPU_DETERMINISM_NONE:
|
||||
gpu_thread = false;
|
||||
break;
|
||||
case GPU_DETERMINISM_FAKE_COMPLETION:
|
||||
gpu_thread = true;
|
||||
break;
|
||||
}
|
||||
|
||||
gpu_thread = gpu_thread && SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread;
|
||||
|
||||
if (g_use_deterministic_gpu_thread != gpu_thread)
|
||||
{
|
||||
g_use_deterministic_gpu_thread = gpu_thread;
|
||||
if (gpu_thread)
|
||||
{
|
||||
// These haven't been updated in non-deterministic mode.
|
||||
s_video_buffer_seen_ptr = g_video_buffer_pp_read_ptr = g_video_buffer_read_ptr;
|
||||
CopyPreprocessCPStateFromMain();
|
||||
VertexLoaderManager::MarkAllDirty();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -13,6 +13,11 @@ class PointerWrap;
|
|||
|
||||
extern bool g_bSkipCurrentFrame;
|
||||
|
||||
// This could be in SCoreStartupParameter, but it depends on multiple settings
|
||||
// and can change at runtime.
|
||||
extern bool g_use_deterministic_gpu_thread;
|
||||
extern std::atomic<u8*> g_video_buffer_write_ptr_xthread;
|
||||
extern u8* g_video_buffer_pp_read_ptr;
|
||||
|
||||
void Fifo_Init();
|
||||
void Fifo_Shutdown();
|
||||
|
@ -22,8 +27,23 @@ u8* GetVideoBufferEndPtr();
|
|||
|
||||
void Fifo_DoState(PointerWrap &f);
|
||||
void Fifo_PauseAndLock(bool doLock, bool unpauseOnUnlock);
|
||||
void Fifo_UpdateWantDeterminism(bool want);
|
||||
|
||||
void ReadDataFromFifo(u8* _uData, u32 len);
|
||||
// Used for diagnostics.
|
||||
enum SyncGPUReason {
|
||||
SYNC_GPU_NONE,
|
||||
SYNC_GPU_OTHER,
|
||||
SYNC_GPU_WRAPAROUND,
|
||||
SYNC_GPU_EFB_POKE,
|
||||
SYNC_GPU_PERFQUERY,
|
||||
SYNC_GPU_SWAP,
|
||||
SYNC_GPU_AUX_SPACE,
|
||||
};
|
||||
// In g_use_deterministic_gpu_thread mode, waits for the GPU to be done with pending work.
|
||||
void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr = true);
|
||||
|
||||
void PushFifoAuxBuffer(void* ptr, size_t size);
|
||||
void* PopFifoAuxBuffer(size_t size);
|
||||
|
||||
void RunGpu();
|
||||
void RunGpuLoop();
|
||||
|
|
|
@ -118,6 +118,7 @@ void VideoBackendHardware::Video_EndField()
|
|||
{
|
||||
if (s_BackendInitialized)
|
||||
{
|
||||
SyncGPU(SYNC_GPU_SWAP);
|
||||
s_swapRequested.Set();
|
||||
}
|
||||
}
|
||||
|
@ -153,6 +154,8 @@ u32 VideoBackendHardware::Video_AccessEFB(EFBAccessType type, u32 x, u32 y, u32
|
|||
{
|
||||
if (s_BackendInitialized && g_ActiveConfig.bEFBAccessEnable)
|
||||
{
|
||||
SyncGPU(SYNC_GPU_EFB_POKE);
|
||||
|
||||
s_accessEFBArgs.type = type;
|
||||
s_accessEFBArgs.x = x;
|
||||
s_accessEFBArgs.y = y;
|
||||
|
@ -194,6 +197,8 @@ u32 VideoBackendHardware::Video_GetQueryResult(PerfQueryType type)
|
|||
return 0;
|
||||
}
|
||||
|
||||
SyncGPU(SYNC_GPU_PERFQUERY);
|
||||
|
||||
// TODO: Is this check sane?
|
||||
if (!g_perf_query->IsFlushed())
|
||||
{
|
||||
|
@ -304,3 +309,8 @@ void VideoBackendHardware::RegisterCPMMIO(MMIO::Mapping* mmio, u32 base)
|
|||
CommandProcessor::RegisterMMIO(mmio, base);
|
||||
}
|
||||
|
||||
void VideoBackendHardware::UpdateWantDeterminism(bool want)
|
||||
{
|
||||
Fifo_UpdateWantDeterminism(want);
|
||||
}
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/Hash.h"
|
||||
|
||||
// m_components
|
||||
enum
|
||||
|
@ -87,6 +87,20 @@ struct PortableVertexDeclaration
|
|||
}
|
||||
};
|
||||
|
||||
namespace std
|
||||
{
|
||||
|
||||
template <>
|
||||
struct hash<PortableVertexDeclaration>
|
||||
{
|
||||
size_t operator()(const PortableVertexDeclaration& decl) const
|
||||
{
|
||||
return HashFletcher((u8 *) &decl, sizeof(decl));
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
// The implementation of this class is specific for GL/DX, so NativeVertexFormat.cpp
|
||||
// is in the respective backend, not here in VideoCommon.
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "VideoCommon/DataReader.h"
|
||||
#include "VideoCommon/Fifo.h"
|
||||
#include "VideoCommon/OpcodeDecoding.h"
|
||||
#include "VideoCommon/PixelEngine.h"
|
||||
#include "VideoCommon/Statistics.h"
|
||||
#include "VideoCommon/VertexLoaderManager.h"
|
||||
#include "VideoCommon/VideoCommon.h"
|
||||
|
@ -31,25 +32,29 @@
|
|||
#include "VideoCommon/XFMemory.h"
|
||||
|
||||
|
||||
u8* g_pVideoData = nullptr;
|
||||
bool g_bRecordFifoData = false;
|
||||
|
||||
static u32 InterpretDisplayList(u32 address, u32 size)
|
||||
{
|
||||
u8* old_pVideoData = g_pVideoData;
|
||||
u8* startAddress = Memory::GetPointer(address);
|
||||
u8* old_pVideoData = g_video_buffer_read_ptr;
|
||||
u8* startAddress;
|
||||
|
||||
if (g_use_deterministic_gpu_thread)
|
||||
startAddress = (u8*) PopFifoAuxBuffer(size);
|
||||
else
|
||||
startAddress = Memory::GetPointer(address);
|
||||
|
||||
u32 cycles = 0;
|
||||
|
||||
// Avoid the crash if Memory::GetPointer failed ..
|
||||
if (startAddress != nullptr)
|
||||
{
|
||||
g_pVideoData = startAddress;
|
||||
g_video_buffer_read_ptr = startAddress;
|
||||
|
||||
// temporarily swap dl and non-dl (small "hack" for the stats)
|
||||
Statistics::SwapDL();
|
||||
|
||||
u8 *end = g_pVideoData + size;
|
||||
u8 *end = g_video_buffer_read_ptr + size;
|
||||
cycles = OpcodeDecoder_Run(end);
|
||||
INCSTAT(stats.thisFrame.numDListsCalled);
|
||||
|
||||
|
@ -58,16 +63,34 @@ static u32 InterpretDisplayList(u32 address, u32 size)
|
|||
}
|
||||
|
||||
// reset to the old pointer
|
||||
g_pVideoData = old_pVideoData;
|
||||
g_video_buffer_read_ptr = old_pVideoData;
|
||||
|
||||
return cycles;
|
||||
}
|
||||
|
||||
static void InterpretDisplayListPreprocess(u32 address, u32 size)
|
||||
{
|
||||
u8* old_read_ptr = g_video_buffer_pp_read_ptr;
|
||||
u8* startAddress = Memory::GetPointer(address);
|
||||
|
||||
PushFifoAuxBuffer(startAddress, size);
|
||||
|
||||
if (startAddress != nullptr)
|
||||
{
|
||||
g_video_buffer_pp_read_ptr = startAddress;
|
||||
|
||||
u8 *end = startAddress + size;
|
||||
OpcodeDecoder_Preprocess(end);
|
||||
}
|
||||
|
||||
g_video_buffer_pp_read_ptr = old_read_ptr;
|
||||
}
|
||||
|
||||
static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess)
|
||||
{
|
||||
// TODO(Omega): Maybe dump FIFO to file on this error
|
||||
std::string temp = StringFromFormat(
|
||||
"GFX FIFO: Unknown Opcode (0x%x @ %p).\n"
|
||||
"GFX FIFO: Unknown Opcode (0x%x @ %p, preprocessing=%s).\n"
|
||||
"This means one of the following:\n"
|
||||
"* The emulated GPU got desynced, disabling dual core can help\n"
|
||||
"* Command stream corrupted by some spurious memory bug\n"
|
||||
|
@ -75,7 +98,8 @@ static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess)
|
|||
"* Some other sort of bug\n\n"
|
||||
"Dolphin will now likely crash or hang. Enjoy." ,
|
||||
cmd_byte,
|
||||
buffer);
|
||||
buffer,
|
||||
preprocess ? "yes" : "no");
|
||||
Host_SysMessage(temp.c_str());
|
||||
INFO_LOG(VIDEO, "%s", temp.c_str());
|
||||
{
|
||||
|
@ -105,14 +129,16 @@ static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess)
|
|||
}
|
||||
}
|
||||
|
||||
template <bool is_preprocess, u8** bufp>
|
||||
static u32 Decode(u8* end)
|
||||
{
|
||||
u8 *opcodeStart = g_pVideoData;
|
||||
if (g_pVideoData == end)
|
||||
u8 *opcodeStart = *bufp;
|
||||
if (*bufp == end)
|
||||
return 0;
|
||||
|
||||
u8 cmd_byte = DataReadU8();
|
||||
u8 cmd_byte = DataRead<u8>(bufp);
|
||||
u32 cycles;
|
||||
int refarray;
|
||||
switch (cmd_byte)
|
||||
{
|
||||
case GX_NOP:
|
||||
|
@ -121,64 +147,72 @@ static u32 Decode(u8* end)
|
|||
|
||||
case GX_LOAD_CP_REG: //0x08
|
||||
{
|
||||
if (end - g_pVideoData < 1 + 4)
|
||||
if (end - *bufp < 1 + 4)
|
||||
return 0;
|
||||
cycles = 12;
|
||||
u8 sub_cmd = DataReadU8();
|
||||
u32 value = DataReadU32();
|
||||
LoadCPReg(sub_cmd, value);
|
||||
INCSTAT(stats.thisFrame.numCPLoads);
|
||||
u8 sub_cmd = DataRead<u8>(bufp);
|
||||
u32 value = DataRead<u32>(bufp);
|
||||
LoadCPReg(sub_cmd, value, is_preprocess);
|
||||
if (!is_preprocess)
|
||||
INCSTAT(stats.thisFrame.numCPLoads);
|
||||
}
|
||||
break;
|
||||
|
||||
case GX_LOAD_XF_REG:
|
||||
{
|
||||
if (end - g_pVideoData < 4)
|
||||
if (end - *bufp < 4)
|
||||
return 0;
|
||||
u32 Cmd2 = DataReadU32();
|
||||
u32 Cmd2 = DataRead<u32>(bufp);
|
||||
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
|
||||
if ((size_t) (end - g_pVideoData) < transfer_size * sizeof(u32))
|
||||
if ((size_t) (end - *bufp) < transfer_size * sizeof(u32))
|
||||
return 0;
|
||||
cycles = 18 + 6 * transfer_size;
|
||||
u32 xf_address = Cmd2 & 0xFFFF;
|
||||
LoadXFReg(transfer_size, xf_address);
|
||||
if (!is_preprocess)
|
||||
{
|
||||
u32 xf_address = Cmd2 & 0xFFFF;
|
||||
LoadXFReg(transfer_size, xf_address);
|
||||
|
||||
INCSTAT(stats.thisFrame.numXFLoads);
|
||||
INCSTAT(stats.thisFrame.numXFLoads);
|
||||
}
|
||||
else
|
||||
{
|
||||
*bufp += transfer_size * sizeof(u32);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case GX_LOAD_INDX_A: //used for position matrices
|
||||
if (end - g_pVideoData < 4)
|
||||
return 0;
|
||||
cycles = 6;
|
||||
LoadIndexedXF(DataReadU32(), 0xC);
|
||||
break;
|
||||
refarray = 0xC;
|
||||
goto load_indx;
|
||||
case GX_LOAD_INDX_B: //used for normal matrices
|
||||
if (end - g_pVideoData < 4)
|
||||
return 0;
|
||||
cycles = 6;
|
||||
LoadIndexedXF(DataReadU32(), 0xD);
|
||||
break;
|
||||
refarray = 0xD;
|
||||
goto load_indx;
|
||||
case GX_LOAD_INDX_C: //used for postmatrices
|
||||
if (end - g_pVideoData < 4)
|
||||
return 0;
|
||||
cycles = 6;
|
||||
LoadIndexedXF(DataReadU32(), 0xE);
|
||||
break;
|
||||
refarray = 0xE;
|
||||
goto load_indx;
|
||||
case GX_LOAD_INDX_D: //used for lights
|
||||
if (end - g_pVideoData < 4)
|
||||
refarray = 0xF;
|
||||
goto load_indx;
|
||||
load_indx:
|
||||
if (end - *bufp < 4)
|
||||
return 0;
|
||||
cycles = 6;
|
||||
LoadIndexedXF(DataReadU32(), 0xF);
|
||||
if (is_preprocess)
|
||||
PreprocessIndexedXF(DataRead<u32>(bufp), refarray);
|
||||
else
|
||||
LoadIndexedXF(DataRead<u32>(bufp), refarray);
|
||||
break;
|
||||
|
||||
case GX_CMD_CALL_DL:
|
||||
{
|
||||
if (end - g_pVideoData < 8)
|
||||
if (end - *bufp < 8)
|
||||
return 0;
|
||||
u32 address = DataReadU32();
|
||||
u32 count = DataReadU32();
|
||||
cycles = 6 + InterpretDisplayList(address, count);
|
||||
u32 address = DataRead<u32>(bufp);
|
||||
u32 count = DataRead<u32>(bufp);
|
||||
if (is_preprocess)
|
||||
InterpretDisplayListPreprocess(address, count);
|
||||
else
|
||||
cycles = 6 + InterpretDisplayList(address, count);
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -196,12 +230,19 @@ static u32 Decode(u8* end)
|
|||
// In skipped_frame case: We have to let BP writes through because they set
|
||||
// tokens and stuff. TODO: Call a much simplified LoadBPReg instead.
|
||||
{
|
||||
if (end - g_pVideoData < 4)
|
||||
if (end - *bufp < 4)
|
||||
return 0;
|
||||
cycles = 12;
|
||||
u32 bp_cmd = DataReadU32();
|
||||
LoadBPReg(bp_cmd);
|
||||
INCSTAT(stats.thisFrame.numBPLoads);
|
||||
u32 bp_cmd = DataRead<u32>(bufp);
|
||||
if (is_preprocess)
|
||||
{
|
||||
LoadBPRegPreprocess(bp_cmd);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoadBPReg(bp_cmd);
|
||||
INCSTAT(stats.thisFrame.numBPLoads);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -211,38 +252,48 @@ static u32 Decode(u8* end)
|
|||
{
|
||||
cycles = 1600;
|
||||
// load vertices
|
||||
if (end - g_pVideoData < 2)
|
||||
if (end - *bufp < 2)
|
||||
return 0;
|
||||
u16 numVertices = DataReadU16();
|
||||
u16 num_vertices = DataRead<u16>(bufp);
|
||||
|
||||
if (!VertexLoaderManager::RunVertices(
|
||||
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
|
||||
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
|
||||
numVertices,
|
||||
end - g_pVideoData,
|
||||
g_bSkipCurrentFrame))
|
||||
if (is_preprocess)
|
||||
{
|
||||
return 0;
|
||||
size_t size = num_vertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK, is_preprocess);
|
||||
if ((size_t) (end - *bufp) < size)
|
||||
return 0;
|
||||
*bufp += size;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!VertexLoaderManager::RunVertices(
|
||||
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
|
||||
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
|
||||
num_vertices,
|
||||
end - *bufp,
|
||||
g_bSkipCurrentFrame))
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
UnknownOpcode(cmd_byte, opcodeStart, false);
|
||||
UnknownOpcode(cmd_byte, opcodeStart, is_preprocess);
|
||||
cycles = 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Display lists get added directly into the FIFO stream
|
||||
if (g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL)
|
||||
FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(g_pVideoData - opcodeStart));
|
||||
if (!is_preprocess && g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL)
|
||||
FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(*bufp - opcodeStart));
|
||||
|
||||
return cycles;
|
||||
// In is_preprocess mode, we don't actually care about cycles, at least for
|
||||
// now... make sure the compiler realizes that.
|
||||
return is_preprocess ? 1 : cycles;
|
||||
}
|
||||
|
||||
void OpcodeDecoder_Init()
|
||||
{
|
||||
g_pVideoData = GetVideoBufferStartPtr();
|
||||
g_video_buffer_read_ptr = GetVideoBufferStartPtr();
|
||||
}
|
||||
|
||||
|
||||
|
@ -255,14 +306,28 @@ u32 OpcodeDecoder_Run(u8* end)
|
|||
u32 totalCycles = 0;
|
||||
while (true)
|
||||
{
|
||||
u8* old = g_pVideoData;
|
||||
u32 cycles = Decode(end);
|
||||
u8* old = g_video_buffer_read_ptr;
|
||||
u32 cycles = Decode</*is_preprocess*/ false, &g_video_buffer_read_ptr>(end);
|
||||
if (cycles == 0)
|
||||
{
|
||||
g_pVideoData = old;
|
||||
g_video_buffer_read_ptr = old;
|
||||
break;
|
||||
}
|
||||
totalCycles += cycles;
|
||||
}
|
||||
return totalCycles;
|
||||
}
|
||||
|
||||
void OpcodeDecoder_Preprocess(u8 *end)
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
u8* old = g_video_buffer_pp_read_ptr;
|
||||
u32 cycles = Decode</*is_preprocess*/ true, &g_video_buffer_pp_read_ptr>(end);
|
||||
if (cycles == 0)
|
||||
{
|
||||
g_video_buffer_pp_read_ptr = old;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,3 +39,4 @@ extern bool g_bRecordFifoData;
|
|||
void OpcodeDecoder_Init();
|
||||
void OpcodeDecoder_Shutdown();
|
||||
u32 OpcodeDecoder_Run(u8* end);
|
||||
void OpcodeDecoder_Preprocess(u8* write_ptr);
|
||||
|
|
|
@ -33,14 +33,11 @@
|
|||
|
||||
// Matrix components are first in GC format but later in PC format - we need to store it temporarily
|
||||
// when decoding each vertex.
|
||||
static u8 s_curposmtx = MatrixIndexA.PosNormalMtxIdx;
|
||||
static u8 s_curposmtx = g_main_cp_state.matrix_index_a.PosNormalMtxIdx;
|
||||
static u8 s_curtexmtx[8];
|
||||
static int s_texmtxwrite = 0;
|
||||
static int s_texmtxread = 0;
|
||||
|
||||
static int loop_counter;
|
||||
|
||||
|
||||
// Vertex loaders read these. Although the scale ones should be baked into the shader.
|
||||
int tcIndex;
|
||||
int colIndex;
|
||||
|
@ -90,7 +87,7 @@ static void LOADERDECL PosMtx_Write()
|
|||
DataWrite<u8>(0);
|
||||
|
||||
// Resetting current position matrix to default is needed for bbox to behave
|
||||
s_curposmtx = (u8) MatrixIndexA.PosNormalMtxIdx;
|
||||
s_curposmtx = (u8) g_main_cp_state.matrix_index_a.PosNormalMtxIdx;
|
||||
}
|
||||
|
||||
static void LOADERDECL UpdateBoundingBoxPrepare()
|
||||
|
@ -548,7 +545,7 @@ VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr)
|
|||
m_compiledCode = nullptr;
|
||||
m_numLoadedVertices = 0;
|
||||
m_VertexSize = 0;
|
||||
loop_counter = 0;
|
||||
m_native_vertex_format = nullptr;
|
||||
VertexLoader_Normal::Init();
|
||||
VertexLoader_Position::Init();
|
||||
VertexLoader_TextCoord::Init();
|
||||
|
@ -584,8 +581,11 @@ void VertexLoader::CompileVertexTranslator()
|
|||
PanicAlert("Trying to recompile a vertex translator");
|
||||
|
||||
m_compiledCode = GetCodePtr();
|
||||
// We don't use any callee saved registers or anything but RAX.
|
||||
ABI_PushRegistersAndAdjustStack(0, 8);
|
||||
// We only use RAX (caller saved) and RBX (callee saved).
|
||||
ABI_PushRegistersAndAdjustStack(1 << RBX, 8);
|
||||
|
||||
// save count
|
||||
MOV(64, R(RBX), R(ABI_PARAM1));
|
||||
|
||||
// Start loop here
|
||||
const u8 *loop_start = GetCodePtr();
|
||||
|
@ -842,11 +842,10 @@ void VertexLoader::CompileVertexTranslator()
|
|||
|
||||
#ifdef USE_VERTEX_LOADER_JIT
|
||||
// End loop here
|
||||
MOV(64, R(RAX), Imm64((u64)&loop_counter));
|
||||
SUB(32, MatR(RAX), Imm8(1));
|
||||
SUB(64, R(RBX), Imm8(1));
|
||||
|
||||
J_CC(CC_NZ, loop_start);
|
||||
ABI_PopRegistersAndAdjustStack(0, 8);
|
||||
ABI_PopRegistersAndAdjustStack(1 << RBX, 8);
|
||||
RET();
|
||||
#endif
|
||||
}
|
||||
|
@ -912,8 +911,7 @@ void VertexLoader::ConvertVertices ( int count )
|
|||
#ifdef USE_VERTEX_LOADER_JIT
|
||||
if (count > 0)
|
||||
{
|
||||
loop_counter = count;
|
||||
((void (*)())(void*)m_compiledCode)();
|
||||
((void (*)(int))(void*)m_compiledCode)(count);
|
||||
}
|
||||
#else
|
||||
for (int s = 0; s < count; s++)
|
||||
|
@ -1035,3 +1033,22 @@ void VertexLoader::AppendToString(std::string *dest) const
|
|||
}
|
||||
dest->append(StringFromFormat(" - %i v\n", m_numLoadedVertices));
|
||||
}
|
||||
|
||||
NativeVertexFormat* VertexLoader::GetNativeVertexFormat()
|
||||
{
|
||||
if (m_native_vertex_format)
|
||||
return m_native_vertex_format;
|
||||
auto& native = s_native_vertex_map[m_native_vtx_decl];
|
||||
if (!native)
|
||||
{
|
||||
auto raw_pointer = g_vertex_manager->CreateNativeVertexFormat();
|
||||
native = std::unique_ptr<NativeVertexFormat>(raw_pointer);
|
||||
native->Initialize(m_native_vtx_decl);
|
||||
native->m_components = m_native_components;
|
||||
}
|
||||
m_native_vertex_format = native.get();
|
||||
return native.get();
|
||||
|
||||
}
|
||||
|
||||
std::unordered_map<PortableVertexDeclaration, std::unique_ptr<NativeVertexFormat>> VertexLoader::s_native_vertex_map;
|
||||
|
|
|
@ -8,7 +8,9 @@
|
|||
// Metroid Prime: P I16-flt N I16-s16 T0 I16-u16 T1 i16-flt
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
|
@ -114,6 +116,9 @@ public:
|
|||
void AppendToString(std::string *dest) const;
|
||||
int GetNumLoadedVerts() const { return m_numLoadedVertices; }
|
||||
|
||||
NativeVertexFormat* GetNativeVertexFormat();
|
||||
static void ClearNativeVertexFormatCache() { s_native_vertex_map.clear(); }
|
||||
|
||||
private:
|
||||
int m_VertexSize; // number of bytes of a raw GC vertex. Computed by CompileVertexTranslator.
|
||||
|
||||
|
@ -135,6 +140,9 @@ private:
|
|||
|
||||
int m_numLoadedVertices;
|
||||
|
||||
NativeVertexFormat* m_native_vertex_format;
|
||||
static std::unordered_map<PortableVertexDeclaration, std::unique_ptr<NativeVertexFormat>> s_native_vertex_map;
|
||||
|
||||
void SetVAT(const VAT& vat);
|
||||
|
||||
void CompileVertexTranslator();
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
@ -20,13 +21,8 @@
|
|||
#include "VideoCommon/VertexShaderManager.h"
|
||||
#include "VideoCommon/VideoCommon.h"
|
||||
|
||||
static int s_attr_dirty; // bitfield
|
||||
|
||||
static NativeVertexFormat* s_current_vtx_fmt;
|
||||
|
||||
typedef std::pair<VertexLoader*, NativeVertexFormat*> VertexLoaderCacheItem;
|
||||
static VertexLoaderCacheItem s_VertexLoaders[8];
|
||||
|
||||
namespace std
|
||||
{
|
||||
|
||||
|
@ -41,35 +37,30 @@ struct hash<VertexLoaderUID>
|
|||
|
||||
}
|
||||
|
||||
typedef std::unordered_map<VertexLoaderUID, VertexLoaderCacheItem> VertexLoaderMap;
|
||||
typedef std::map<PortableVertexDeclaration, std::unique_ptr<NativeVertexFormat>> NativeVertexLoaderMap;
|
||||
typedef std::unordered_map<VertexLoaderUID, std::unique_ptr<VertexLoader>> VertexLoaderMap;
|
||||
|
||||
namespace VertexLoaderManager
|
||||
{
|
||||
|
||||
static VertexLoaderMap s_VertexLoaderMap;
|
||||
static NativeVertexLoaderMap s_native_vertex_map;
|
||||
static std::mutex s_vertex_loader_map_lock;
|
||||
static VertexLoaderMap s_vertex_loader_map;
|
||||
// TODO - change into array of pointers. Keep a map of all seen so far.
|
||||
|
||||
void Init()
|
||||
{
|
||||
MarkAllDirty();
|
||||
for (auto& map_entry : s_VertexLoaders)
|
||||
{
|
||||
map_entry.first = nullptr;
|
||||
map_entry.second = nullptr;
|
||||
}
|
||||
for (auto& map_entry : g_main_cp_state.vertex_loaders)
|
||||
map_entry = nullptr;
|
||||
for (auto& map_entry : g_preprocess_cp_state.vertex_loaders)
|
||||
map_entry = nullptr;
|
||||
RecomputeCachedArraybases();
|
||||
}
|
||||
|
||||
void Shutdown()
|
||||
{
|
||||
for (auto& map_entry : s_VertexLoaderMap)
|
||||
{
|
||||
delete map_entry.second.first;
|
||||
}
|
||||
s_VertexLoaderMap.clear();
|
||||
s_native_vertex_map.clear();
|
||||
std::lock_guard<std::mutex> lk(s_vertex_loader_map_lock);
|
||||
s_vertex_loader_map.clear();
|
||||
VertexLoader::ClearNativeVertexFormatCache();
|
||||
}
|
||||
|
||||
namespace
|
||||
|
@ -87,14 +78,15 @@ struct entry
|
|||
|
||||
void AppendListToString(std::string *dest)
|
||||
{
|
||||
std::lock_guard<std::mutex> lk(s_vertex_loader_map_lock);
|
||||
std::vector<entry> entries;
|
||||
|
||||
size_t total_size = 0;
|
||||
for (const auto& map_entry : s_VertexLoaderMap)
|
||||
for (const auto& map_entry : s_vertex_loader_map)
|
||||
{
|
||||
entry e;
|
||||
map_entry.second.first->AppendToString(&e.text);
|
||||
e.num_verts = map_entry.second.first->GetNumLoadedVerts();
|
||||
map_entry.second->AppendToString(&e.text);
|
||||
e.num_verts = map_entry.second->GetNumLoadedVerts();
|
||||
entries.push_back(e);
|
||||
total_size += e.text.size() + 1;
|
||||
}
|
||||
|
@ -108,57 +100,46 @@ void AppendListToString(std::string *dest)
|
|||
|
||||
void MarkAllDirty()
|
||||
{
|
||||
s_attr_dirty = 0xff;
|
||||
g_main_cp_state.attr_dirty = 0xff;
|
||||
g_preprocess_cp_state.attr_dirty = 0xff;
|
||||
}
|
||||
|
||||
static NativeVertexFormat* GetNativeVertexFormat(const PortableVertexDeclaration& format,
|
||||
u32 components)
|
||||
static VertexLoader* RefreshLoader(int vtx_attr_group, CPState* state)
|
||||
{
|
||||
auto& native = s_native_vertex_map[format];
|
||||
if (!native)
|
||||
VertexLoader* loader;
|
||||
if ((state->attr_dirty >> vtx_attr_group) & 1)
|
||||
{
|
||||
auto raw_pointer = g_vertex_manager->CreateNativeVertexFormat();
|
||||
native = std::unique_ptr<NativeVertexFormat>(raw_pointer);
|
||||
native->Initialize(format);
|
||||
native->m_components = components;
|
||||
}
|
||||
return native.get();
|
||||
}
|
||||
|
||||
static VertexLoaderCacheItem RefreshLoader(int vtx_attr_group)
|
||||
{
|
||||
if ((s_attr_dirty >> vtx_attr_group) & 1)
|
||||
{
|
||||
VertexLoaderUID uid(g_VtxDesc, g_VtxAttr[vtx_attr_group]);
|
||||
VertexLoaderMap::iterator iter = s_VertexLoaderMap.find(uid);
|
||||
if (iter != s_VertexLoaderMap.end())
|
||||
VertexLoaderUID uid(state->vtx_desc, state->vtx_attr[vtx_attr_group]);
|
||||
std::lock_guard<std::mutex> lk(s_vertex_loader_map_lock);
|
||||
VertexLoaderMap::iterator iter = s_vertex_loader_map.find(uid);
|
||||
if (iter != s_vertex_loader_map.end())
|
||||
{
|
||||
s_VertexLoaders[vtx_attr_group] = iter->second;
|
||||
loader = iter->second.get();
|
||||
}
|
||||
else
|
||||
{
|
||||
VertexLoader* loader = new VertexLoader(g_VtxDesc, g_VtxAttr[vtx_attr_group]);
|
||||
|
||||
NativeVertexFormat* vtx_fmt = GetNativeVertexFormat(
|
||||
loader->GetNativeVertexDeclaration(),
|
||||
loader->GetNativeComponents());
|
||||
|
||||
s_VertexLoaderMap[uid] = std::make_pair(loader, vtx_fmt);
|
||||
s_VertexLoaders[vtx_attr_group] = std::make_pair(loader, vtx_fmt);
|
||||
loader = new VertexLoader(state->vtx_desc, state->vtx_attr[vtx_attr_group]);
|
||||
s_vertex_loader_map[uid] = std::unique_ptr<VertexLoader>(loader);
|
||||
INCSTAT(stats.numVertexLoaders);
|
||||
}
|
||||
state->vertex_loaders[vtx_attr_group] = loader;
|
||||
state->attr_dirty &= ~(1 << vtx_attr_group);
|
||||
} else {
|
||||
loader = state->vertex_loaders[vtx_attr_group];
|
||||
}
|
||||
s_attr_dirty &= ~(1 << vtx_attr_group);
|
||||
return s_VertexLoaders[vtx_attr_group];
|
||||
return loader;
|
||||
}
|
||||
|
||||
bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size, bool skip_drawing)
|
||||
{
|
||||
if (!count)
|
||||
return true;
|
||||
auto loader = RefreshLoader(vtx_attr_group);
|
||||
|
||||
size_t size = count * loader.first->GetVertexSize();
|
||||
CPState* state = &g_main_cp_state;
|
||||
|
||||
VertexLoader* loader = RefreshLoader(vtx_attr_group, state);
|
||||
|
||||
size_t size = count * loader->GetVertexSize();
|
||||
if (buf_size < size)
|
||||
return false;
|
||||
|
||||
|
@ -169,15 +150,17 @@ bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size,
|
|||
return true;
|
||||
}
|
||||
|
||||
NativeVertexFormat* native = loader->GetNativeVertexFormat();
|
||||
|
||||
// If the native vertex format changed, force a flush.
|
||||
if (loader.second != s_current_vtx_fmt)
|
||||
if (native != s_current_vtx_fmt)
|
||||
VertexManager::Flush();
|
||||
s_current_vtx_fmt = loader.second;
|
||||
s_current_vtx_fmt = native;
|
||||
|
||||
VertexManager::PrepareForAdditionalData(primitive, count,
|
||||
loader.first->GetNativeVertexDeclaration().stride);
|
||||
loader->GetNativeVertexDeclaration().stride);
|
||||
|
||||
loader.first->RunVertices(g_VtxAttr[vtx_attr_group], primitive, count);
|
||||
loader->RunVertices(state->vtx_attr[vtx_attr_group], primitive, count);
|
||||
|
||||
IndexGenerator::AddIndices(primitive, count);
|
||||
|
||||
|
@ -186,9 +169,9 @@ bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size,
|
|||
return true;
|
||||
}
|
||||
|
||||
int GetVertexSize(int vtx_attr_group)
|
||||
int GetVertexSize(int vtx_attr_group, bool preprocess)
|
||||
{
|
||||
return RefreshLoader(vtx_attr_group).first->GetVertexSize();
|
||||
return RefreshLoader(vtx_attr_group, preprocess ? &g_preprocess_cp_state : &g_main_cp_state)->GetVertexSize();
|
||||
}
|
||||
|
||||
NativeVertexFormat* GetCurrentVertexFormat()
|
||||
|
@ -198,78 +181,83 @@ NativeVertexFormat* GetCurrentVertexFormat()
|
|||
|
||||
} // namespace
|
||||
|
||||
void LoadCPReg(u32 sub_cmd, u32 value)
|
||||
void LoadCPReg(u32 sub_cmd, u32 value, bool is_preprocess)
|
||||
{
|
||||
bool update_global_state = !is_preprocess;
|
||||
CPState* state = is_preprocess ? &g_preprocess_cp_state : &g_main_cp_state;
|
||||
switch (sub_cmd & 0xF0)
|
||||
{
|
||||
case 0x30:
|
||||
VertexShaderManager::SetTexMatrixChangedA(value);
|
||||
if (update_global_state)
|
||||
VertexShaderManager::SetTexMatrixChangedA(value);
|
||||
break;
|
||||
|
||||
case 0x40:
|
||||
VertexShaderManager::SetTexMatrixChangedB(value);
|
||||
if (update_global_state)
|
||||
VertexShaderManager::SetTexMatrixChangedB(value);
|
||||
break;
|
||||
|
||||
case 0x50:
|
||||
g_VtxDesc.Hex &= ~0x1FFFF; // keep the Upper bits
|
||||
g_VtxDesc.Hex |= value;
|
||||
s_attr_dirty = 0xFF;
|
||||
state->vtx_desc.Hex &= ~0x1FFFF; // keep the Upper bits
|
||||
state->vtx_desc.Hex |= value;
|
||||
state->attr_dirty = 0xFF;
|
||||
break;
|
||||
|
||||
case 0x60:
|
||||
g_VtxDesc.Hex &= 0x1FFFF; // keep the lower 17Bits
|
||||
g_VtxDesc.Hex |= (u64)value << 17;
|
||||
s_attr_dirty = 0xFF;
|
||||
state->vtx_desc.Hex &= 0x1FFFF; // keep the lower 17Bits
|
||||
state->vtx_desc.Hex |= (u64)value << 17;
|
||||
state->attr_dirty = 0xFF;
|
||||
break;
|
||||
|
||||
case 0x70:
|
||||
_assert_((sub_cmd & 0x0F) < 8);
|
||||
g_VtxAttr[sub_cmd & 7].g0.Hex = value;
|
||||
s_attr_dirty |= 1 << (sub_cmd & 7);
|
||||
state->vtx_attr[sub_cmd & 7].g0.Hex = value;
|
||||
state->attr_dirty |= 1 << (sub_cmd & 7);
|
||||
break;
|
||||
|
||||
case 0x80:
|
||||
_assert_((sub_cmd & 0x0F) < 8);
|
||||
g_VtxAttr[sub_cmd & 7].g1.Hex = value;
|
||||
s_attr_dirty |= 1 << (sub_cmd & 7);
|
||||
state->vtx_attr[sub_cmd & 7].g1.Hex = value;
|
||||
state->attr_dirty |= 1 << (sub_cmd & 7);
|
||||
break;
|
||||
|
||||
case 0x90:
|
||||
_assert_((sub_cmd & 0x0F) < 8);
|
||||
g_VtxAttr[sub_cmd & 7].g2.Hex = value;
|
||||
s_attr_dirty |= 1 << (sub_cmd & 7);
|
||||
state->vtx_attr[sub_cmd & 7].g2.Hex = value;
|
||||
state->attr_dirty |= 1 << (sub_cmd & 7);
|
||||
break;
|
||||
|
||||
// Pointers to vertex arrays in GC RAM
|
||||
case 0xA0:
|
||||
arraybases[sub_cmd & 0xF] = value;
|
||||
cached_arraybases[sub_cmd & 0xF] = Memory::GetPointer(value);
|
||||
state->array_bases[sub_cmd & 0xF] = value;
|
||||
if (update_global_state)
|
||||
cached_arraybases[sub_cmd & 0xF] = Memory::GetPointer(value);
|
||||
break;
|
||||
|
||||
case 0xB0:
|
||||
arraystrides[sub_cmd & 0xF] = value & 0xFF;
|
||||
state->array_strides[sub_cmd & 0xF] = value & 0xFF;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void FillCPMemoryArray(u32 *memory)
|
||||
{
|
||||
memory[0x30] = MatrixIndexA.Hex;
|
||||
memory[0x40] = MatrixIndexB.Hex;
|
||||
memory[0x50] = (u32)g_VtxDesc.Hex;
|
||||
memory[0x60] = (u32)(g_VtxDesc.Hex >> 17);
|
||||
memory[0x30] = g_main_cp_state.matrix_index_a.Hex;
|
||||
memory[0x40] = g_main_cp_state.matrix_index_b.Hex;
|
||||
memory[0x50] = (u32)g_main_cp_state.vtx_desc.Hex;
|
||||
memory[0x60] = (u32)(g_main_cp_state.vtx_desc.Hex >> 17);
|
||||
|
||||
for (int i = 0; i < 8; ++i)
|
||||
{
|
||||
memory[0x70 + i] = g_VtxAttr[i].g0.Hex;
|
||||
memory[0x80 + i] = g_VtxAttr[i].g1.Hex;
|
||||
memory[0x90 + i] = g_VtxAttr[i].g2.Hex;
|
||||
memory[0x70 + i] = g_main_cp_state.vtx_attr[i].g0.Hex;
|
||||
memory[0x80 + i] = g_main_cp_state.vtx_attr[i].g1.Hex;
|
||||
memory[0x90 + i] = g_main_cp_state.vtx_attr[i].g2.Hex;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 16; ++i)
|
||||
{
|
||||
memory[0xA0 + i] = arraybases[i];
|
||||
memory[0xB0 + i] = arraystrides[i];
|
||||
memory[0xA0 + i] = g_main_cp_state.array_bases[i];
|
||||
memory[0xB0 + i] = g_main_cp_state.array_strides[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -277,6 +265,6 @@ void RecomputeCachedArraybases()
|
|||
{
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
cached_arraybases[i] = Memory::GetPointer(arraybases[i]);
|
||||
cached_arraybases[i] = Memory::GetPointer(g_main_cp_state.array_bases[i]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,7 +16,7 @@ namespace VertexLoaderManager
|
|||
|
||||
void MarkAllDirty();
|
||||
|
||||
int GetVertexSize(int vtx_attr_group);
|
||||
int GetVertexSize(int vtx_attr_group, bool preprocess);
|
||||
// Returns false if buf_size is insufficient.
|
||||
bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size, bool skip_drawing = false);
|
||||
|
||||
|
|
|
@ -117,7 +117,7 @@ template <typename I>
|
|||
void Color_ReadIndex_16b_565()
|
||||
{
|
||||
auto const Index = DataRead<I>();
|
||||
u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex])));
|
||||
u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex])));
|
||||
_SetCol565(val);
|
||||
}
|
||||
|
||||
|
@ -125,7 +125,7 @@ template <typename I>
|
|||
void Color_ReadIndex_24b_888()
|
||||
{
|
||||
auto const Index = DataRead<I>();
|
||||
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
|
||||
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]);
|
||||
_SetCol(_Read24(iAddress));
|
||||
}
|
||||
|
||||
|
@ -133,7 +133,7 @@ template <typename I>
|
|||
void Color_ReadIndex_32b_888x()
|
||||
{
|
||||
auto const Index = DataRead<I>();
|
||||
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
|
||||
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]);
|
||||
_SetCol(_Read24(iAddress));
|
||||
}
|
||||
|
||||
|
@ -141,7 +141,7 @@ template <typename I>
|
|||
void Color_ReadIndex_16b_4444()
|
||||
{
|
||||
auto const Index = DataRead<I>();
|
||||
u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]));
|
||||
u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]));
|
||||
_SetCol4444(val);
|
||||
}
|
||||
|
||||
|
@ -149,7 +149,7 @@ template <typename I>
|
|||
void Color_ReadIndex_24b_6666()
|
||||
{
|
||||
auto const Index = DataRead<I>();
|
||||
const u8* pData = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]) - 1;
|
||||
const u8* pData = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]) - 1;
|
||||
u32 val = Common::swap32(pData);
|
||||
_SetCol6666(val);
|
||||
}
|
||||
|
@ -158,7 +158,7 @@ template <typename I>
|
|||
void Color_ReadIndex_32b_8888()
|
||||
{
|
||||
auto const Index = DataRead<I>();
|
||||
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
|
||||
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]);
|
||||
_SetCol(_Read32(iAddress));
|
||||
}
|
||||
|
||||
|
|
|
@ -80,7 +80,7 @@ __forceinline void Normal_Index_Offset()
|
|||
|
||||
auto const index = DataRead<I>();
|
||||
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_NORMAL]
|
||||
+ (index * arraystrides[ARRAY_NORMAL]) + sizeof(T) * 3 * Offset);
|
||||
+ (index * g_main_cp_state.array_strides[ARRAY_NORMAL]) + sizeof(T) * 3 * Offset);
|
||||
ReadIndirect<T, N * 3>(data);
|
||||
}
|
||||
|
||||
|
|
|
@ -91,7 +91,7 @@ void LOADERDECL Pos_ReadIndex()
|
|||
static_assert(N <= 3, "N > 3 is not sane!");
|
||||
|
||||
auto const index = DataRead<I>();
|
||||
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_POSITION] + (index * arraystrides[ARRAY_POSITION]));
|
||||
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION]));
|
||||
auto const scale = posScale;
|
||||
DataWriter dst;
|
||||
|
||||
|
@ -109,7 +109,7 @@ template <typename I, bool three>
|
|||
void LOADERDECL Pos_ReadIndex_Float_SSSE3()
|
||||
{
|
||||
auto const index = DataRead<I>();
|
||||
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (index * arraystrides[ARRAY_POSITION]));
|
||||
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION]));
|
||||
GC_ALIGNED128(const __m128i a = _mm_loadu_si128((__m128i*)pData));
|
||||
GC_ALIGNED128(__m128i b = _mm_shuffle_epi8(a, three ? kMaskSwap32_3 : kMaskSwap32_2));
|
||||
_mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, b);
|
||||
|
|
|
@ -73,7 +73,7 @@ void LOADERDECL TexCoord_ReadIndex()
|
|||
|
||||
auto const index = DataRead<I>();
|
||||
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_TEXCOORD0 + tcIndex]
|
||||
+ (index * arraystrides[ARRAY_TEXCOORD0 + tcIndex]));
|
||||
+ (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + tcIndex]));
|
||||
auto const scale = tcScale[tcIndex];
|
||||
DataWriter dst;
|
||||
|
||||
|
@ -94,7 +94,7 @@ void LOADERDECL TexCoord_ReadIndex_Short2_SSE4()
|
|||
|
||||
// Heavy in ZWW
|
||||
auto const index = DataRead<I>();
|
||||
const s32 *pData = (const s32*)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
|
||||
const s32 *pData = (const s32*)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0+tcIndex]));
|
||||
const __m128i a = _mm_cvtsi32_si128(*pData);
|
||||
const __m128i b = _mm_shuffle_epi8(a, kMaskSwap16_2);
|
||||
const __m128i c = _mm_cvtepi16_epi32(b);
|
||||
|
@ -117,7 +117,7 @@ void LOADERDECL TexCoord_ReadIndex_Float2_SSSE3()
|
|||
static_assert(!std::numeric_limits<I>::is_signed, "Only unsigned I is sane!");
|
||||
|
||||
auto const index = DataRead<I>();
|
||||
const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
|
||||
const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0+tcIndex]));
|
||||
GC_ALIGNED128(const __m128i a = _mm_loadl_epi64((__m128i*)pData));
|
||||
GC_ALIGNED128(const __m128i b = _mm_shuffle_epi8(a, kMaskSwap32));
|
||||
_mm_storel_epi64((__m128i*)VertexManager::s_pCurBufferPointer, b);
|
||||
|
|
|
@ -245,8 +245,8 @@ static inline void GenerateVertexShader(T& out, u32 components, API_TYPE api_typ
|
|||
// donko - this has caused problems in some games. removed for now.
|
||||
bool texGenSpecialCase = false;
|
||||
/*bool texGenSpecialCase =
|
||||
((g_VtxDesc.Hex & 0x60600L) == g_VtxDesc.Hex) && // only pos and tex coord 0
|
||||
(g_VtxDesc.Tex0Coord != NOT_PRESENT) &&
|
||||
((g_main_cp_state.vtx_desc.Hex & 0x60600L) == g_main_cp_state.vtx_desc.Hex) && // only pos and tex coord 0
|
||||
(g_main_cp_state.vtx_desc.Tex0Coord != NOT_PRESENT) &&
|
||||
(xfmem.texcoords[0].texmtxinfo.inputform == XF_TEXINPUT_AB11);
|
||||
*/
|
||||
|
||||
|
|
|
@ -329,8 +329,8 @@ void VertexShaderManager::SetConstants()
|
|||
{
|
||||
bPosNormalMatrixChanged = false;
|
||||
|
||||
const float *pos = (const float *)xfmem.posMatrices + MatrixIndexA.PosNormalMtxIdx * 4;
|
||||
const float *norm = (const float *)xfmem.normalMatrices + 3 * (MatrixIndexA.PosNormalMtxIdx & 31);
|
||||
const float *pos = (const float *)xfmem.posMatrices + g_main_cp_state.matrix_index_a.PosNormalMtxIdx * 4;
|
||||
const float *norm = (const float *)xfmem.normalMatrices + 3 * (g_main_cp_state.matrix_index_a.PosNormalMtxIdx & 31);
|
||||
|
||||
memcpy(constants.posnormalmatrix, pos, 3*16);
|
||||
memcpy(constants.posnormalmatrix[3], norm, 12);
|
||||
|
@ -344,10 +344,10 @@ void VertexShaderManager::SetConstants()
|
|||
bTexMatricesChanged[0] = false;
|
||||
const float *fptrs[] =
|
||||
{
|
||||
(const float *)&xfmem.posMatrices[MatrixIndexA.Tex0MtxIdx * 4],
|
||||
(const float *)&xfmem.posMatrices[MatrixIndexA.Tex1MtxIdx * 4],
|
||||
(const float *)&xfmem.posMatrices[MatrixIndexA.Tex2MtxIdx * 4],
|
||||
(const float *)&xfmem.posMatrices[MatrixIndexA.Tex3MtxIdx * 4]
|
||||
(const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_a.Tex0MtxIdx * 4],
|
||||
(const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_a.Tex1MtxIdx * 4],
|
||||
(const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_a.Tex2MtxIdx * 4],
|
||||
(const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_a.Tex3MtxIdx * 4]
|
||||
};
|
||||
|
||||
for (int i = 0; i < 4; ++i)
|
||||
|
@ -361,10 +361,10 @@ void VertexShaderManager::SetConstants()
|
|||
{
|
||||
bTexMatricesChanged[1] = false;
|
||||
const float *fptrs[] = {
|
||||
(const float *)&xfmem.posMatrices[MatrixIndexB.Tex4MtxIdx * 4],
|
||||
(const float *)&xfmem.posMatrices[MatrixIndexB.Tex5MtxIdx * 4],
|
||||
(const float *)&xfmem.posMatrices[MatrixIndexB.Tex6MtxIdx * 4],
|
||||
(const float *)&xfmem.posMatrices[MatrixIndexB.Tex7MtxIdx * 4]
|
||||
(const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_b.Tex4MtxIdx * 4],
|
||||
(const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_b.Tex5MtxIdx * 4],
|
||||
(const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_b.Tex6MtxIdx * 4],
|
||||
(const float *)&xfmem.posMatrices[g_main_cp_state.matrix_index_b.Tex7MtxIdx * 4]
|
||||
};
|
||||
|
||||
for (int i = 0; i < 4; ++i)
|
||||
|
@ -536,26 +536,26 @@ void VertexShaderManager::SetConstants()
|
|||
|
||||
void VertexShaderManager::InvalidateXFRange(int start, int end)
|
||||
{
|
||||
if (((u32)start >= (u32)MatrixIndexA.PosNormalMtxIdx * 4 &&
|
||||
(u32)start < (u32)MatrixIndexA.PosNormalMtxIdx * 4 + 12) ||
|
||||
((u32)start >= XFMEM_NORMALMATRICES + ((u32)MatrixIndexA.PosNormalMtxIdx & 31) * 3 &&
|
||||
(u32)start < XFMEM_NORMALMATRICES + ((u32)MatrixIndexA.PosNormalMtxIdx & 31) * 3 + 9))
|
||||
if (((u32)start >= (u32)g_main_cp_state.matrix_index_a.PosNormalMtxIdx * 4 &&
|
||||
(u32)start < (u32)g_main_cp_state.matrix_index_a.PosNormalMtxIdx * 4 + 12) ||
|
||||
((u32)start >= XFMEM_NORMALMATRICES + ((u32)g_main_cp_state.matrix_index_a.PosNormalMtxIdx & 31) * 3 &&
|
||||
(u32)start < XFMEM_NORMALMATRICES + ((u32)g_main_cp_state.matrix_index_a.PosNormalMtxIdx & 31) * 3 + 9))
|
||||
{
|
||||
bPosNormalMatrixChanged = true;
|
||||
}
|
||||
|
||||
if (((u32)start >= (u32)MatrixIndexA.Tex0MtxIdx*4 && (u32)start < (u32)MatrixIndexA.Tex0MtxIdx*4+12) ||
|
||||
((u32)start >= (u32)MatrixIndexA.Tex1MtxIdx*4 && (u32)start < (u32)MatrixIndexA.Tex1MtxIdx*4+12) ||
|
||||
((u32)start >= (u32)MatrixIndexA.Tex2MtxIdx*4 && (u32)start < (u32)MatrixIndexA.Tex2MtxIdx*4+12) ||
|
||||
((u32)start >= (u32)MatrixIndexA.Tex3MtxIdx*4 && (u32)start < (u32)MatrixIndexA.Tex3MtxIdx*4+12))
|
||||
if (((u32)start >= (u32)g_main_cp_state.matrix_index_a.Tex0MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_a.Tex0MtxIdx*4+12) ||
|
||||
((u32)start >= (u32)g_main_cp_state.matrix_index_a.Tex1MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_a.Tex1MtxIdx*4+12) ||
|
||||
((u32)start >= (u32)g_main_cp_state.matrix_index_a.Tex2MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_a.Tex2MtxIdx*4+12) ||
|
||||
((u32)start >= (u32)g_main_cp_state.matrix_index_a.Tex3MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_a.Tex3MtxIdx*4+12))
|
||||
{
|
||||
bTexMatricesChanged[0] = true;
|
||||
}
|
||||
|
||||
if (((u32)start >= (u32)MatrixIndexB.Tex4MtxIdx*4 && (u32)start < (u32)MatrixIndexB.Tex4MtxIdx*4+12) ||
|
||||
((u32)start >= (u32)MatrixIndexB.Tex5MtxIdx*4 && (u32)start < (u32)MatrixIndexB.Tex5MtxIdx*4+12) ||
|
||||
((u32)start >= (u32)MatrixIndexB.Tex6MtxIdx*4 && (u32)start < (u32)MatrixIndexB.Tex6MtxIdx*4+12) ||
|
||||
((u32)start >= (u32)MatrixIndexB.Tex7MtxIdx*4 && (u32)start < (u32)MatrixIndexB.Tex7MtxIdx*4+12))
|
||||
if (((u32)start >= (u32)g_main_cp_state.matrix_index_b.Tex4MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_b.Tex4MtxIdx*4+12) ||
|
||||
((u32)start >= (u32)g_main_cp_state.matrix_index_b.Tex5MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_b.Tex5MtxIdx*4+12) ||
|
||||
((u32)start >= (u32)g_main_cp_state.matrix_index_b.Tex6MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_b.Tex6MtxIdx*4+12) ||
|
||||
((u32)start >= (u32)g_main_cp_state.matrix_index_b.Tex7MtxIdx*4 && (u32)start < (u32)g_main_cp_state.matrix_index_b.Tex7MtxIdx*4+12))
|
||||
{
|
||||
bTexMatricesChanged[1] = true;
|
||||
}
|
||||
|
@ -628,23 +628,23 @@ void VertexShaderManager::InvalidateXFRange(int start, int end)
|
|||
|
||||
void VertexShaderManager::SetTexMatrixChangedA(u32 Value)
|
||||
{
|
||||
if (MatrixIndexA.Hex != Value)
|
||||
if (g_main_cp_state.matrix_index_a.Hex != Value)
|
||||
{
|
||||
VertexManager::Flush();
|
||||
if (MatrixIndexA.PosNormalMtxIdx != (Value&0x3f))
|
||||
if (g_main_cp_state.matrix_index_a.PosNormalMtxIdx != (Value&0x3f))
|
||||
bPosNormalMatrixChanged = true;
|
||||
bTexMatricesChanged[0] = true;
|
||||
MatrixIndexA.Hex = Value;
|
||||
g_main_cp_state.matrix_index_a.Hex = Value;
|
||||
}
|
||||
}
|
||||
|
||||
void VertexShaderManager::SetTexMatrixChangedB(u32 Value)
|
||||
{
|
||||
if (MatrixIndexB.Hex != Value)
|
||||
if (g_main_cp_state.matrix_index_b.Hex != Value)
|
||||
{
|
||||
VertexManager::Flush();
|
||||
bTexMatricesChanged[1] = true;
|
||||
MatrixIndexB.Hex = Value;
|
||||
g_main_cp_state.matrix_index_b.Hex = Value;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -116,6 +116,8 @@ public:
|
|||
virtual void DoState(PointerWrap &p) = 0;
|
||||
|
||||
virtual void CheckInvalidState() = 0;
|
||||
|
||||
virtual void UpdateWantDeterminism(bool want) {}
|
||||
};
|
||||
|
||||
extern std::vector<VideoBackend*> g_available_video_backends;
|
||||
|
@ -151,6 +153,8 @@ class VideoBackendHardware : public VideoBackend
|
|||
void PauseAndLock(bool doLock, bool unpauseOnUnlock=true) override;
|
||||
void DoState(PointerWrap &p) override;
|
||||
|
||||
void UpdateWantDeterminism(bool want) override;
|
||||
|
||||
bool m_invalid;
|
||||
|
||||
public:
|
||||
|
|
|
@ -22,13 +22,7 @@ static void DoState(PointerWrap &p)
|
|||
p.DoMarker("BP Memory");
|
||||
|
||||
// CP Memory
|
||||
p.DoArray(arraybases, 16);
|
||||
p.DoArray(arraystrides, 16);
|
||||
p.Do(MatrixIndexA);
|
||||
p.Do(MatrixIndexB);
|
||||
p.Do(g_VtxDesc.Hex);
|
||||
p.DoArray(g_VtxAttr, 8);
|
||||
p.DoMarker("CP Memory");
|
||||
DoCPState(p);
|
||||
|
||||
// XF Memory
|
||||
p.Do(xfmem);
|
||||
|
@ -73,11 +67,7 @@ void VideoCommon_RunLoop(bool enable)
|
|||
|
||||
void VideoCommon_Init()
|
||||
{
|
||||
memset(arraybases, 0, sizeof(arraybases));
|
||||
memset(arraystrides, 0, sizeof(arraystrides));
|
||||
memset(&MatrixIndexA, 0, sizeof(MatrixIndexA));
|
||||
memset(&MatrixIndexB, 0, sizeof(MatrixIndexB));
|
||||
memset(&g_VtxDesc, 0, sizeof(g_VtxDesc));
|
||||
memset(g_VtxAttr, 0, sizeof(g_VtxAttr));
|
||||
memset(&g_main_cp_state, 0, sizeof(g_main_cp_state));
|
||||
memset(&g_preprocess_cp_state, 0, sizeof(g_preprocess_cp_state));
|
||||
memset(texMem, 0, TMEM_SIZE);
|
||||
}
|
||||
|
|
|
@ -275,3 +275,4 @@ extern XFMemory xfmem;
|
|||
|
||||
void LoadXFReg(u32 transferSize, u32 address);
|
||||
void LoadIndexedXF(u32 val, int array);
|
||||
void PreprocessIndexedXF(u32 val, int refarray);
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include "Core/HW/Memmap.h"
|
||||
#include "VideoCommon/CPMemory.h"
|
||||
#include "VideoCommon/DataReader.h"
|
||||
#include "VideoCommon/Fifo.h"
|
||||
#include "VideoCommon/PixelShaderManager.h"
|
||||
#include "VideoCommon/VertexManagerBase.h"
|
||||
#include "VideoCommon/VertexShaderManager.h"
|
||||
|
@ -252,7 +253,15 @@ void LoadIndexedXF(u32 val, int refarray)
|
|||
//load stuff from array to address in xf mem
|
||||
|
||||
u32* currData = (u32*)(&xfmem) + address;
|
||||
u32* newData = (u32*)Memory::GetPointer(arraybases[refarray] + arraystrides[refarray] * index);
|
||||
u32* newData;
|
||||
if (g_use_deterministic_gpu_thread)
|
||||
{
|
||||
newData = (u32*)PopFifoAuxBuffer(size * sizeof(u32));
|
||||
}
|
||||
else
|
||||
{
|
||||
newData = (u32*)Memory::GetPointer(g_main_cp_state.array_bases[refarray] + g_main_cp_state.array_strides[refarray] * index);
|
||||
}
|
||||
bool changed = false;
|
||||
for (int i = 0; i < size; ++i)
|
||||
{
|
||||
|
@ -269,3 +278,14 @@ void LoadIndexedXF(u32 val, int refarray)
|
|||
currData[i] = Common::swap32(newData[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void PreprocessIndexedXF(u32 val, int refarray)
|
||||
{
|
||||
int index = val >> 16;
|
||||
int size = ((val >> 12) & 0xF) + 1;
|
||||
|
||||
u32* new_data = (u32*)Memory::GetPointer(g_preprocess_cp_state.array_bases[refarray] + g_preprocess_cp_state.array_strides[refarray] * index);
|
||||
|
||||
size_t buf_size = size * sizeof(u32);
|
||||
PushFifoAuxBuffer(new_data, buf_size);
|
||||
}
|
||||
|
|
|
@ -74,7 +74,7 @@ protected:
|
|||
|
||||
void ResetPointers()
|
||||
{
|
||||
g_pVideoData = &input_memory[0];
|
||||
g_video_buffer_read_ptr = &input_memory[0];
|
||||
VertexManager::s_pCurBufferPointer = &output_memory[0];
|
||||
m_input_pos = m_output_pos = 0;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue