Merge pull request #1609 from degasus/datareader
VideoCommon: Datareader
This commit is contained in:
commit
4c139ca7cc
|
@ -14,8 +14,8 @@
|
|||
#include "VideoBackends/Software/SWVertexLoader.h"
|
||||
#include "VideoBackends/Software/SWVideoConfig.h"
|
||||
#include "VideoBackends/Software/XFMemLoader.h"
|
||||
#include "VideoCommon/DataReader.h"
|
||||
#include "VideoCommon/Fifo.h"
|
||||
#include "VideoCommon/VertexLoaderUtils.h"
|
||||
|
||||
typedef void (*DecodingFunction)(u32);
|
||||
|
||||
|
|
|
@ -20,8 +20,8 @@
|
|||
#include "VideoBackends/Software/SWCommandProcessor.h"
|
||||
#include "VideoBackends/Software/VideoBackend.h"
|
||||
|
||||
#include "VideoCommon/DataReader.h"
|
||||
#include "VideoCommon/Fifo.h"
|
||||
#include "VideoCommon/VertexLoaderUtils.h"
|
||||
|
||||
namespace SWCommandProcessor
|
||||
{
|
||||
|
|
|
@ -13,7 +13,6 @@
|
|||
#include "VideoBackends/Software/TransformUnit.h"
|
||||
#include "VideoBackends/Software/XFMemLoader.h"
|
||||
|
||||
#include "VideoCommon/DataReader.h"
|
||||
#include "VideoCommon/VertexLoader.h"
|
||||
#include "VideoCommon/VertexLoader_Color.h"
|
||||
#include "VideoCommon/VertexLoader_Normal.h"
|
||||
|
@ -286,20 +285,20 @@ void SWVertexLoader::LoadTexMtx(SWVertexLoader *vertexLoader, InputVertexData *v
|
|||
|
||||
void SWVertexLoader::LoadPosition(SWVertexLoader *vertexLoader, InputVertexData *vertex, u8 unused)
|
||||
{
|
||||
VertexManager::s_pCurBufferPointer = (u8*)&vertex->position;
|
||||
g_vertex_manager_write_ptr = (u8*)&vertex->position;
|
||||
vertexLoader->m_positionLoader();
|
||||
}
|
||||
|
||||
void SWVertexLoader::LoadNormal(SWVertexLoader *vertexLoader, InputVertexData *vertex, u8 unused)
|
||||
{
|
||||
VertexManager::s_pCurBufferPointer = (u8*)&vertex->normal;
|
||||
g_vertex_manager_write_ptr = (u8*)&vertex->normal;
|
||||
vertexLoader->m_normalLoader();
|
||||
}
|
||||
|
||||
void SWVertexLoader::LoadColor(SWVertexLoader *vertexLoader, InputVertexData *vertex, u8 index)
|
||||
{
|
||||
u32 color;
|
||||
VertexManager::s_pCurBufferPointer = (u8*)&color;
|
||||
g_vertex_manager_write_ptr = (u8*)&color;
|
||||
colIndex = index;
|
||||
vertexLoader->m_colorLoader[index]();
|
||||
|
||||
|
@ -309,7 +308,7 @@ void SWVertexLoader::LoadColor(SWVertexLoader *vertexLoader, InputVertexData *ve
|
|||
|
||||
void SWVertexLoader::LoadTexCoord(SWVertexLoader *vertexLoader, InputVertexData *vertex, u8 index)
|
||||
{
|
||||
VertexManager::s_pCurBufferPointer = (u8*)&vertex->texCoords[index];
|
||||
g_vertex_manager_write_ptr = (u8*)&vertex->texCoords[index];
|
||||
tcIndex = index;
|
||||
vertexLoader->m_texCoordLoader[index]();
|
||||
}
|
||||
|
|
|
@ -32,7 +32,7 @@ static PortableVertexDeclaration vertexDecl;
|
|||
// Gets the pointer to the current buffer position
|
||||
void LOADERDECL SetVertexBufferPosition()
|
||||
{
|
||||
bufferPos = VertexManager::s_pCurBufferPointer;
|
||||
bufferPos = g_vertex_manager_write_ptr;
|
||||
}
|
||||
|
||||
// Prepares the bounding box for new primitive data
|
||||
|
|
|
@ -5,124 +5,61 @@
|
|||
#pragma once
|
||||
|
||||
#include "Common/Common.h"
|
||||
#include "VideoCommon/VertexManagerBase.h"
|
||||
|
||||
extern u8* g_video_buffer_read_ptr;
|
||||
|
||||
#if _M_SSE >= 0x301 && !(defined __GNUC__ && !defined __SSSE3__)
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
|
||||
__forceinline void DataSkip(u32 skip)
|
||||
{
|
||||
g_video_buffer_read_ptr += skip;
|
||||
}
|
||||
|
||||
// probably unnecessary
|
||||
template <int count>
|
||||
__forceinline void DataSkip()
|
||||
{
|
||||
g_video_buffer_read_ptr += count;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__forceinline T DataPeek(int _uOffset, u8** bufp = &g_video_buffer_read_ptr)
|
||||
{
|
||||
auto const result = Common::FromBigEndian(*reinterpret_cast<T*>(*bufp + _uOffset));
|
||||
return result;
|
||||
}
|
||||
|
||||
// TODO: kill these
|
||||
__forceinline u8 DataPeek8(int _uOffset)
|
||||
{
|
||||
return DataPeek<u8>(_uOffset);
|
||||
}
|
||||
|
||||
__forceinline u16 DataPeek16(int _uOffset)
|
||||
{
|
||||
return DataPeek<u16>(_uOffset);
|
||||
}
|
||||
|
||||
__forceinline u32 DataPeek32(int _uOffset)
|
||||
{
|
||||
return DataPeek<u32>(_uOffset);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__forceinline T DataRead(u8** bufp = &g_video_buffer_read_ptr)
|
||||
{
|
||||
auto const result = DataPeek<T>(0, bufp);
|
||||
*bufp += sizeof(T);
|
||||
return result;
|
||||
}
|
||||
|
||||
class DataReader
|
||||
{
|
||||
public:
|
||||
inline DataReader() : buffer(g_video_buffer_read_ptr), offset(0) {}
|
||||
inline ~DataReader() { g_video_buffer_read_ptr += offset; }
|
||||
template <typename T> inline T Read()
|
||||
__forceinline DataReader()
|
||||
: buffer(nullptr), end(nullptr) {}
|
||||
|
||||
__forceinline DataReader(u8* src, u8* _end)
|
||||
: buffer(src), end(_end) {}
|
||||
|
||||
__forceinline void WritePointer(u8** src)
|
||||
{
|
||||
const T result = Common::FromBigEndian(*(T*)(buffer + offset));
|
||||
offset += sizeof(T);
|
||||
*src = buffer;
|
||||
}
|
||||
|
||||
__forceinline u8* operator=(u8* src)
|
||||
{
|
||||
buffer = src;
|
||||
return src;
|
||||
}
|
||||
|
||||
__forceinline size_t size()
|
||||
{
|
||||
return end - buffer;
|
||||
}
|
||||
|
||||
template <typename T, bool swapped = true> __forceinline T Peek(int offset = 0)
|
||||
{
|
||||
T data = *(T*)(buffer + offset);
|
||||
if (swapped)
|
||||
data = Common::FromBigEndian(data);
|
||||
return data;
|
||||
}
|
||||
|
||||
template <typename T, bool swapped = true> __forceinline T Read()
|
||||
{
|
||||
const T result = Peek<T, swapped>();
|
||||
buffer += sizeof(T);
|
||||
return result;
|
||||
}
|
||||
private:
|
||||
u8 *buffer;
|
||||
int offset;
|
||||
};
|
||||
|
||||
// TODO: kill these
|
||||
__forceinline u8 DataReadU8()
|
||||
{
|
||||
return DataRead<u8>();
|
||||
}
|
||||
|
||||
__forceinline s8 DataReadS8()
|
||||
{
|
||||
return DataRead<s8>();
|
||||
}
|
||||
|
||||
__forceinline u16 DataReadU16()
|
||||
{
|
||||
return DataRead<u16>();
|
||||
}
|
||||
|
||||
__forceinline u32 DataReadU32()
|
||||
{
|
||||
return DataRead<u32>();
|
||||
}
|
||||
|
||||
__forceinline u32 DataReadU32Unswapped()
|
||||
{
|
||||
u32 tmp = *(u32*)g_video_buffer_read_ptr;
|
||||
g_video_buffer_read_ptr += 4;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
__forceinline u8* DataGetPosition()
|
||||
{
|
||||
return g_video_buffer_read_ptr;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__forceinline void DataWrite(T data)
|
||||
{
|
||||
*(T*)VertexManager::s_pCurBufferPointer = data;
|
||||
VertexManager::s_pCurBufferPointer += sizeof(T);
|
||||
}
|
||||
|
||||
class DataWriter
|
||||
{
|
||||
public:
|
||||
inline DataWriter() : buffer(VertexManager::s_pCurBufferPointer), offset(0) {}
|
||||
inline ~DataWriter() { VertexManager::s_pCurBufferPointer += offset; }
|
||||
template <typename T> inline void Write(T data)
|
||||
template <typename T, bool swapped = false> __forceinline void Write(T data)
|
||||
{
|
||||
*(T*)(buffer+offset) = data;
|
||||
offset += sizeof(T);
|
||||
if (swapped)
|
||||
data = Common::FromBigEndian(data);
|
||||
*(T*)(buffer) = data;
|
||||
buffer += sizeof(T);
|
||||
}
|
||||
|
||||
template <typename T = u8> __forceinline void Skip(size_t data = 1)
|
||||
{
|
||||
buffer += sizeof(T) * data;
|
||||
}
|
||||
|
||||
private:
|
||||
u8 *buffer;
|
||||
int offset;
|
||||
u8* __restrict buffer;
|
||||
u8* end;
|
||||
};
|
||||
|
|
|
@ -41,10 +41,10 @@ bool g_use_deterministic_gpu_thread;
|
|||
static std::mutex s_video_buffer_lock;
|
||||
static std::condition_variable s_video_buffer_cond;
|
||||
static u8* s_video_buffer;
|
||||
u8* g_video_buffer_read_ptr;
|
||||
static u8* s_video_buffer_read_ptr;
|
||||
static std::atomic<u8*> s_video_buffer_write_ptr;
|
||||
static std::atomic<u8*> s_video_buffer_seen_ptr;
|
||||
u8* g_video_buffer_pp_read_ptr;
|
||||
static u8* s_video_buffer_pp_read_ptr;
|
||||
// The read_ptr is always owned by the GPU thread. In normal mode, so is the
|
||||
// write_ptr, despite it being atomic. In g_use_deterministic_gpu_thread mode,
|
||||
// things get a bit more complicated:
|
||||
|
@ -63,11 +63,11 @@ void Fifo_DoState(PointerWrap &p)
|
|||
u8* write_ptr = s_video_buffer_write_ptr;
|
||||
p.DoPointer(write_ptr, s_video_buffer);
|
||||
s_video_buffer_write_ptr = write_ptr;
|
||||
p.DoPointer(g_video_buffer_read_ptr, s_video_buffer);
|
||||
p.DoPointer(s_video_buffer_read_ptr, s_video_buffer);
|
||||
if (p.mode == PointerWrap::MODE_READ && g_use_deterministic_gpu_thread)
|
||||
{
|
||||
// We're good and paused, right?
|
||||
s_video_buffer_seen_ptr = g_video_buffer_pp_read_ptr = g_video_buffer_read_ptr;
|
||||
s_video_buffer_seen_ptr = s_video_buffer_pp_read_ptr = s_video_buffer_read_ptr;
|
||||
}
|
||||
p.Do(g_bSkipCurrentFrame);
|
||||
}
|
||||
|
@ -106,8 +106,8 @@ void Fifo_Shutdown()
|
|||
FreeMemoryPages(s_video_buffer, FIFO_SIZE);
|
||||
s_video_buffer = nullptr;
|
||||
s_video_buffer_write_ptr = nullptr;
|
||||
g_video_buffer_pp_read_ptr = nullptr;
|
||||
g_video_buffer_read_ptr = nullptr;
|
||||
s_video_buffer_pp_read_ptr = nullptr;
|
||||
s_video_buffer_read_ptr = nullptr;
|
||||
s_video_buffer_seen_ptr = nullptr;
|
||||
s_fifo_aux_write_ptr = nullptr;
|
||||
s_fifo_aux_read_ptr = nullptr;
|
||||
|
@ -169,15 +169,15 @@ void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr)
|
|||
if (may_move_read_ptr)
|
||||
{
|
||||
// what's left over in the buffer
|
||||
size_t size = write_ptr - g_video_buffer_pp_read_ptr;
|
||||
size_t size = write_ptr - s_video_buffer_pp_read_ptr;
|
||||
|
||||
memmove(s_video_buffer, g_video_buffer_pp_read_ptr, size);
|
||||
memmove(s_video_buffer, s_video_buffer_pp_read_ptr, size);
|
||||
// This change always decreases the pointers. We write seen_ptr
|
||||
// after write_ptr here, and read it before in RunGpuLoop, so
|
||||
// 'write_ptr > seen_ptr' there cannot become spuriously true.
|
||||
s_video_buffer_write_ptr = write_ptr = s_video_buffer + size;
|
||||
g_video_buffer_pp_read_ptr = s_video_buffer;
|
||||
g_video_buffer_read_ptr = s_video_buffer;
|
||||
s_video_buffer_pp_read_ptr = s_video_buffer;
|
||||
s_video_buffer_read_ptr = s_video_buffer;
|
||||
s_video_buffer_seen_ptr = write_ptr;
|
||||
}
|
||||
}
|
||||
|
@ -213,15 +213,15 @@ static void ReadDataFromFifo(u32 readPtr)
|
|||
size_t len = 32;
|
||||
if (len > (size_t)(s_video_buffer + FIFO_SIZE - s_video_buffer_write_ptr))
|
||||
{
|
||||
size_t existing_len = s_video_buffer_write_ptr - g_video_buffer_read_ptr;
|
||||
size_t existing_len = s_video_buffer_write_ptr - s_video_buffer_read_ptr;
|
||||
if (len > (size_t)(FIFO_SIZE - existing_len))
|
||||
{
|
||||
PanicAlert("FIFO out of bounds (existing %lu + new %lu > %lu)", (unsigned long) existing_len, (unsigned long) len, (unsigned long) FIFO_SIZE);
|
||||
return;
|
||||
}
|
||||
memmove(s_video_buffer, g_video_buffer_read_ptr, existing_len);
|
||||
memmove(s_video_buffer, s_video_buffer_read_ptr, existing_len);
|
||||
s_video_buffer_write_ptr = s_video_buffer + existing_len;
|
||||
g_video_buffer_read_ptr = s_video_buffer;
|
||||
s_video_buffer_read_ptr = s_video_buffer;
|
||||
}
|
||||
// Copy new video instructions to s_video_buffer for future use in rendering the new picture
|
||||
Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len);
|
||||
|
@ -238,13 +238,13 @@ static void ReadDataFromFifoOnCPU(u32 readPtr)
|
|||
// We can't wrap around while the GPU is working on the data.
|
||||
// This should be very rare due to the reset in SyncGPU.
|
||||
SyncGPU(SYNC_GPU_WRAPAROUND);
|
||||
if (g_video_buffer_pp_read_ptr != g_video_buffer_read_ptr)
|
||||
if (s_video_buffer_pp_read_ptr != s_video_buffer_read_ptr)
|
||||
{
|
||||
PanicAlert("desynced read pointers");
|
||||
return;
|
||||
}
|
||||
write_ptr = s_video_buffer_write_ptr;
|
||||
size_t existing_len = write_ptr - g_video_buffer_pp_read_ptr;
|
||||
size_t existing_len = write_ptr - s_video_buffer_pp_read_ptr;
|
||||
if (len > (size_t)(FIFO_SIZE - existing_len))
|
||||
{
|
||||
PanicAlert("FIFO out of bounds (existing %lu + new %lu > %lu)", (unsigned long) existing_len, (unsigned long) len, (unsigned long) FIFO_SIZE);
|
||||
|
@ -252,17 +252,17 @@ static void ReadDataFromFifoOnCPU(u32 readPtr)
|
|||
}
|
||||
}
|
||||
Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len);
|
||||
OpcodeDecoder_Preprocess(write_ptr + len, false);
|
||||
s_video_buffer_pp_read_ptr = OpcodeDecoder_Run<true>(DataReader(s_video_buffer_pp_read_ptr, write_ptr + len), nullptr, false);
|
||||
// This would have to be locked if the GPU thread didn't spin.
|
||||
s_video_buffer_write_ptr = write_ptr + len;
|
||||
}
|
||||
|
||||
void ResetVideoBuffer()
|
||||
{
|
||||
g_video_buffer_read_ptr = s_video_buffer;
|
||||
s_video_buffer_read_ptr = s_video_buffer;
|
||||
s_video_buffer_write_ptr = s_video_buffer;
|
||||
s_video_buffer_seen_ptr = s_video_buffer;
|
||||
g_video_buffer_pp_read_ptr = s_video_buffer;
|
||||
s_video_buffer_pp_read_ptr = s_video_buffer;
|
||||
s_fifo_aux_write_ptr = s_fifo_aux_data;
|
||||
s_fifo_aux_read_ptr = s_fifo_aux_data;
|
||||
}
|
||||
|
@ -294,7 +294,7 @@ void RunGpuLoop()
|
|||
// See comment in SyncGPU
|
||||
if (write_ptr > seen_ptr)
|
||||
{
|
||||
OpcodeDecoder_Run(write_ptr, false);
|
||||
s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr, false);
|
||||
|
||||
{
|
||||
std::lock_guard<std::mutex> vblk(s_video_buffer_lock);
|
||||
|
@ -330,7 +330,7 @@ void RunGpuLoop()
|
|||
|
||||
|
||||
u8* write_ptr = s_video_buffer_write_ptr;
|
||||
cyclesExecuted = OpcodeDecoder_Run(write_ptr, false);
|
||||
s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false);
|
||||
|
||||
|
||||
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU && Common::AtomicLoad(CommandProcessor::VITicks) >= cyclesExecuted)
|
||||
|
@ -338,7 +338,7 @@ void RunGpuLoop()
|
|||
|
||||
Common::AtomicStore(fifo.CPReadPointer, readPtr);
|
||||
Common::AtomicAdd(fifo.CPReadWriteDistance, -32);
|
||||
if ((write_ptr - g_video_buffer_read_ptr) == 0)
|
||||
if ((write_ptr - s_video_buffer_read_ptr) == 0)
|
||||
Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer);
|
||||
}
|
||||
|
||||
|
@ -403,7 +403,7 @@ void RunGpu()
|
|||
FPURoundMode::SaveSIMDState();
|
||||
FPURoundMode::LoadDefaultSIMDState();
|
||||
ReadDataFromFifo(fifo.CPReadPointer);
|
||||
OpcodeDecoder_Run(s_video_buffer_write_ptr, false);
|
||||
s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), nullptr, false);
|
||||
FPURoundMode::LoadSIMDState();
|
||||
}
|
||||
|
||||
|
@ -454,7 +454,7 @@ void Fifo_UpdateWantDeterminism(bool want)
|
|||
if (gpu_thread)
|
||||
{
|
||||
// These haven't been updated in non-deterministic mode.
|
||||
s_video_buffer_seen_ptr = g_video_buffer_pp_read_ptr = g_video_buffer_read_ptr;
|
||||
s_video_buffer_seen_ptr = s_video_buffer_pp_read_ptr = s_video_buffer_read_ptr;
|
||||
CopyPreprocessCPStateFromMain();
|
||||
VertexLoaderManager::MarkAllDirty();
|
||||
}
|
||||
|
|
|
@ -17,7 +17,6 @@ extern bool g_bSkipCurrentFrame;
|
|||
// and can change at runtime.
|
||||
extern bool g_use_deterministic_gpu_thread;
|
||||
extern std::atomic<u8*> g_video_buffer_write_ptr_xthread;
|
||||
extern u8* g_video_buffer_pp_read_ptr;
|
||||
|
||||
void Fifo_Init();
|
||||
void Fifo_Shutdown();
|
||||
|
|
|
@ -36,7 +36,6 @@ bool g_bRecordFifoData = false;
|
|||
|
||||
static u32 InterpretDisplayList(u32 address, u32 size)
|
||||
{
|
||||
u8* old_pVideoData = g_video_buffer_read_ptr;
|
||||
u8* startAddress;
|
||||
|
||||
if (g_use_deterministic_gpu_thread)
|
||||
|
@ -49,41 +48,29 @@ static u32 InterpretDisplayList(u32 address, u32 size)
|
|||
// Avoid the crash if Memory::GetPointer failed ..
|
||||
if (startAddress != nullptr)
|
||||
{
|
||||
g_video_buffer_read_ptr = startAddress;
|
||||
|
||||
// temporarily swap dl and non-dl (small "hack" for the stats)
|
||||
Statistics::SwapDL();
|
||||
|
||||
u8 *end = g_video_buffer_read_ptr + size;
|
||||
cycles = OpcodeDecoder_Run(end, true);
|
||||
OpcodeDecoder_Run(DataReader(startAddress, startAddress + size), &cycles, true);
|
||||
INCSTAT(stats.thisFrame.numDListsCalled);
|
||||
|
||||
// un-swap
|
||||
Statistics::SwapDL();
|
||||
}
|
||||
|
||||
// reset to the old pointer
|
||||
g_video_buffer_read_ptr = old_pVideoData;
|
||||
|
||||
return cycles;
|
||||
}
|
||||
|
||||
static void InterpretDisplayListPreprocess(u32 address, u32 size)
|
||||
{
|
||||
u8* old_read_ptr = g_video_buffer_pp_read_ptr;
|
||||
u8* startAddress = Memory::GetPointer(address);
|
||||
|
||||
PushFifoAuxBuffer(startAddress, size);
|
||||
|
||||
if (startAddress != nullptr)
|
||||
{
|
||||
g_video_buffer_pp_read_ptr = startAddress;
|
||||
|
||||
u8 *end = startAddress + size;
|
||||
OpcodeDecoder_Preprocess(end, true);
|
||||
OpcodeDecoder_Run<true>(DataReader(startAddress, startAddress + size), nullptr, true);
|
||||
}
|
||||
|
||||
g_video_buffer_pp_read_ptr = old_read_ptr;
|
||||
}
|
||||
|
||||
static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess)
|
||||
|
@ -134,180 +121,8 @@ static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess)
|
|||
}
|
||||
}
|
||||
|
||||
template <bool is_preprocess, u8** bufp>
|
||||
static u32 Decode(u8* end, bool in_display_list)
|
||||
{
|
||||
u8 *opcodeStart = *bufp;
|
||||
if (*bufp == end)
|
||||
return 0;
|
||||
|
||||
u8 cmd_byte = DataRead<u8>(bufp);
|
||||
u32 cycles;
|
||||
int refarray;
|
||||
switch (cmd_byte)
|
||||
{
|
||||
case GX_NOP:
|
||||
cycles = 6; // Hm, this means that we scan over nop streams pretty slowly...
|
||||
break;
|
||||
|
||||
case GX_LOAD_CP_REG: //0x08
|
||||
{
|
||||
if (end - *bufp < 1 + 4)
|
||||
return 0;
|
||||
cycles = 12;
|
||||
u8 sub_cmd = DataRead<u8>(bufp);
|
||||
u32 value = DataRead<u32>(bufp);
|
||||
LoadCPReg(sub_cmd, value, is_preprocess);
|
||||
if (!is_preprocess)
|
||||
INCSTAT(stats.thisFrame.numCPLoads);
|
||||
}
|
||||
break;
|
||||
|
||||
case GX_LOAD_XF_REG:
|
||||
{
|
||||
if (end - *bufp < 4)
|
||||
return 0;
|
||||
u32 Cmd2 = DataRead<u32>(bufp);
|
||||
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
|
||||
if ((size_t) (end - *bufp) < transfer_size * sizeof(u32))
|
||||
return 0;
|
||||
cycles = 18 + 6 * transfer_size;
|
||||
if (!is_preprocess)
|
||||
{
|
||||
u32 xf_address = Cmd2 & 0xFFFF;
|
||||
LoadXFReg(transfer_size, xf_address);
|
||||
|
||||
INCSTAT(stats.thisFrame.numXFLoads);
|
||||
}
|
||||
else
|
||||
{
|
||||
*bufp += transfer_size * sizeof(u32);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case GX_LOAD_INDX_A: //used for position matrices
|
||||
refarray = 0xC;
|
||||
goto load_indx;
|
||||
case GX_LOAD_INDX_B: //used for normal matrices
|
||||
refarray = 0xD;
|
||||
goto load_indx;
|
||||
case GX_LOAD_INDX_C: //used for postmatrices
|
||||
refarray = 0xE;
|
||||
goto load_indx;
|
||||
case GX_LOAD_INDX_D: //used for lights
|
||||
refarray = 0xF;
|
||||
goto load_indx;
|
||||
load_indx:
|
||||
if (end - *bufp < 4)
|
||||
return 0;
|
||||
cycles = 6;
|
||||
if (is_preprocess)
|
||||
PreprocessIndexedXF(DataRead<u32>(bufp), refarray);
|
||||
else
|
||||
LoadIndexedXF(DataRead<u32>(bufp), refarray);
|
||||
break;
|
||||
|
||||
case GX_CMD_CALL_DL:
|
||||
{
|
||||
if (end - *bufp < 8)
|
||||
return 0;
|
||||
u32 address = DataRead<u32>(bufp);
|
||||
u32 count = DataRead<u32>(bufp);
|
||||
|
||||
if (in_display_list)
|
||||
{
|
||||
cycles = 6;
|
||||
WARN_LOG(VIDEO,"recursive display list detected");
|
||||
}
|
||||
else
|
||||
{
|
||||
if (is_preprocess)
|
||||
InterpretDisplayListPreprocess(address, count);
|
||||
else
|
||||
cycles = 6 + InterpretDisplayList(address, count);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that
|
||||
cycles = 6;
|
||||
DEBUG_LOG(VIDEO, "GX 0x44: %08x", cmd_byte);
|
||||
break;
|
||||
|
||||
case GX_CMD_INVL_VC: // Invalidate Vertex Cache
|
||||
cycles = 6;
|
||||
DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)");
|
||||
break;
|
||||
|
||||
case GX_LOAD_BP_REG: //0x61
|
||||
// In skipped_frame case: We have to let BP writes through because they set
|
||||
// tokens and stuff. TODO: Call a much simplified LoadBPReg instead.
|
||||
{
|
||||
if (end - *bufp < 4)
|
||||
return 0;
|
||||
cycles = 12;
|
||||
u32 bp_cmd = DataRead<u32>(bufp);
|
||||
if (is_preprocess)
|
||||
{
|
||||
LoadBPRegPreprocess(bp_cmd);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoadBPReg(bp_cmd);
|
||||
INCSTAT(stats.thisFrame.numBPLoads);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
// draw primitives
|
||||
default:
|
||||
if ((cmd_byte & 0xC0) == 0x80)
|
||||
{
|
||||
cycles = 1600;
|
||||
// load vertices
|
||||
if (end - *bufp < 2)
|
||||
return 0;
|
||||
u16 num_vertices = DataRead<u16>(bufp);
|
||||
|
||||
if (is_preprocess)
|
||||
{
|
||||
size_t size = num_vertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK, is_preprocess);
|
||||
if ((size_t) (end - *bufp) < size)
|
||||
return 0;
|
||||
*bufp += size;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!VertexLoaderManager::RunVertices(
|
||||
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
|
||||
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
|
||||
num_vertices,
|
||||
end - *bufp,
|
||||
g_bSkipCurrentFrame))
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
UnknownOpcode(cmd_byte, opcodeStart, is_preprocess);
|
||||
cycles = 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Display lists get added directly into the FIFO stream
|
||||
if (!is_preprocess && g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL)
|
||||
FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(*bufp - opcodeStart));
|
||||
|
||||
// In is_preprocess mode, we don't actually care about cycles, at least for
|
||||
// now... make sure the compiler realizes that.
|
||||
return is_preprocess ? 1 : cycles;
|
||||
}
|
||||
|
||||
void OpcodeDecoder_Init()
|
||||
{
|
||||
g_video_buffer_read_ptr = GetVideoBufferStartPtr();
|
||||
}
|
||||
|
||||
|
||||
|
@ -315,33 +130,189 @@ void OpcodeDecoder_Shutdown()
|
|||
{
|
||||
}
|
||||
|
||||
u32 OpcodeDecoder_Run(u8* end, bool in_display_list)
|
||||
template <bool is_preprocess>
|
||||
u8* OpcodeDecoder_Run(DataReader src, u32* cycles, bool in_display_list)
|
||||
{
|
||||
u32 totalCycles = 0;
|
||||
u8* opcodeStart;
|
||||
while (true)
|
||||
{
|
||||
u8* old = g_video_buffer_read_ptr;
|
||||
u32 cycles = Decode</*is_preprocess*/ false, &g_video_buffer_read_ptr>(end, in_display_list);
|
||||
if (cycles == 0)
|
||||
src.WritePointer(&opcodeStart);
|
||||
|
||||
if (!src.size())
|
||||
goto end;
|
||||
|
||||
u8 cmd_byte = src.Read<u8>();
|
||||
int refarray;
|
||||
switch (cmd_byte)
|
||||
{
|
||||
g_video_buffer_read_ptr = old;
|
||||
case GX_NOP:
|
||||
totalCycles += 6; // Hm, this means that we scan over nop streams pretty slowly...
|
||||
break;
|
||||
|
||||
case GX_LOAD_CP_REG: //0x08
|
||||
{
|
||||
if (src.size() < 1 + 4)
|
||||
goto end;
|
||||
totalCycles += 12;
|
||||
u8 sub_cmd = src.Read<u8>();
|
||||
u32 value = src.Read<u32>();
|
||||
LoadCPReg(sub_cmd, value, is_preprocess);
|
||||
if (!is_preprocess)
|
||||
INCSTAT(stats.thisFrame.numCPLoads);
|
||||
}
|
||||
break;
|
||||
|
||||
case GX_LOAD_XF_REG:
|
||||
{
|
||||
if (src.size() < 4)
|
||||
goto end;
|
||||
u32 Cmd2 = src.Read<u32>();
|
||||
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
|
||||
if (src.size() < transfer_size * sizeof(u32))
|
||||
goto end;
|
||||
totalCycles += 18 + 6 * transfer_size;
|
||||
if (!is_preprocess)
|
||||
{
|
||||
u32 xf_address = Cmd2 & 0xFFFF;
|
||||
LoadXFReg(transfer_size, xf_address, src);
|
||||
|
||||
INCSTAT(stats.thisFrame.numXFLoads);
|
||||
}
|
||||
src.Skip<u32>(transfer_size);
|
||||
}
|
||||
break;
|
||||
|
||||
case GX_LOAD_INDX_A: //used for position matrices
|
||||
refarray = 0xC;
|
||||
goto load_indx;
|
||||
case GX_LOAD_INDX_B: //used for normal matrices
|
||||
refarray = 0xD;
|
||||
goto load_indx;
|
||||
case GX_LOAD_INDX_C: //used for postmatrices
|
||||
refarray = 0xE;
|
||||
goto load_indx;
|
||||
case GX_LOAD_INDX_D: //used for lights
|
||||
refarray = 0xF;
|
||||
goto load_indx;
|
||||
load_indx:
|
||||
if (src.size() < 4)
|
||||
goto end;
|
||||
totalCycles += 6;
|
||||
if (is_preprocess)
|
||||
PreprocessIndexedXF(src.Read<u32>(), refarray);
|
||||
else
|
||||
LoadIndexedXF(src.Read<u32>(), refarray);
|
||||
break;
|
||||
|
||||
case GX_CMD_CALL_DL:
|
||||
{
|
||||
if (src.size() < 8)
|
||||
goto end;
|
||||
u32 address = src.Read<u32>();
|
||||
u32 count = src.Read<u32>();
|
||||
|
||||
if (in_display_list)
|
||||
{
|
||||
totalCycles += 6;
|
||||
WARN_LOG(VIDEO,"recursive display list detected");
|
||||
}
|
||||
else
|
||||
{
|
||||
if (is_preprocess)
|
||||
InterpretDisplayListPreprocess(address, count);
|
||||
else
|
||||
totalCycles += 6 + InterpretDisplayList(address, count);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that
|
||||
totalCycles += 6;
|
||||
DEBUG_LOG(VIDEO, "GX 0x44: %08x", cmd_byte);
|
||||
break;
|
||||
|
||||
case GX_CMD_INVL_VC: // Invalidate Vertex Cache
|
||||
totalCycles += 6;
|
||||
DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)");
|
||||
break;
|
||||
|
||||
case GX_LOAD_BP_REG: //0x61
|
||||
// In skipped_frame case: We have to let BP writes through because they set
|
||||
// tokens and stuff. TODO: Call a much simplified LoadBPReg instead.
|
||||
{
|
||||
if (src.size() < 4)
|
||||
goto end;
|
||||
totalCycles += 12;
|
||||
u32 bp_cmd = src.Read<u32>();
|
||||
if (is_preprocess)
|
||||
{
|
||||
LoadBPRegPreprocess(bp_cmd);
|
||||
}
|
||||
else
|
||||
{
|
||||
LoadBPReg(bp_cmd);
|
||||
INCSTAT(stats.thisFrame.numBPLoads);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
// draw primitives
|
||||
default:
|
||||
if ((cmd_byte & 0xC0) == 0x80)
|
||||
{
|
||||
// load vertices
|
||||
if (src.size() < 2)
|
||||
goto end;
|
||||
u16 num_vertices = src.Read<u16>();
|
||||
|
||||
if (is_preprocess)
|
||||
{
|
||||
size_t size = num_vertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK, is_preprocess);
|
||||
if (src.size() < size)
|
||||
goto end;
|
||||
src.Skip(size);
|
||||
}
|
||||
else
|
||||
{
|
||||
int bytes = VertexLoaderManager::RunVertices(
|
||||
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
|
||||
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
|
||||
num_vertices,
|
||||
src,
|
||||
g_bSkipCurrentFrame);
|
||||
|
||||
if (bytes < 0)
|
||||
goto end;
|
||||
else
|
||||
src.Skip(bytes);
|
||||
}
|
||||
totalCycles += 1600;
|
||||
}
|
||||
else
|
||||
{
|
||||
UnknownOpcode(cmd_byte, opcodeStart, is_preprocess);
|
||||
totalCycles += 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
totalCycles += cycles;
|
||||
|
||||
// Display lists get added directly into the FIFO stream
|
||||
if (!is_preprocess && g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL)
|
||||
{
|
||||
u8* opcodeEnd;
|
||||
src.WritePointer(&opcodeEnd);
|
||||
FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(opcodeEnd - opcodeStart));
|
||||
}
|
||||
}
|
||||
return totalCycles;
|
||||
|
||||
end:
|
||||
if (cycles)
|
||||
{
|
||||
*cycles = totalCycles;
|
||||
}
|
||||
return opcodeStart;
|
||||
}
|
||||
|
||||
void OpcodeDecoder_Preprocess(u8 *end, bool in_display_list)
|
||||
{
|
||||
while (true)
|
||||
{
|
||||
u8* old = g_video_buffer_pp_read_ptr;
|
||||
u32 cycles = Decode</*is_preprocess*/ true, &g_video_buffer_pp_read_ptr>(end, in_display_list);
|
||||
if (cycles == 0)
|
||||
{
|
||||
g_video_buffer_pp_read_ptr = old;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
template u8* OpcodeDecoder_Run<true>(DataReader src, u32* cycles, bool in_display_list);
|
||||
template u8* OpcodeDecoder_Run<false>(DataReader src, u32* cycles, bool in_display_list);
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "VideoCommon/DataReader.h"
|
||||
|
||||
#define GX_NOP 0x00
|
||||
|
||||
|
@ -40,5 +41,6 @@ extern bool g_bRecordFifoData;
|
|||
|
||||
void OpcodeDecoder_Init();
|
||||
void OpcodeDecoder_Shutdown();
|
||||
u32 OpcodeDecoder_Run(u8* end, bool in_display_list);
|
||||
void OpcodeDecoder_Preprocess(u8* end, bool in_display_list);
|
||||
|
||||
template <bool is_preprocess = false>
|
||||
u8* OpcodeDecoder_Run(DataReader src, u32* cycles, bool in_display_list);
|
||||
|
|
|
@ -45,6 +45,10 @@ int colElements[2];
|
|||
GC_ALIGNED128(float posScale[4]);
|
||||
GC_ALIGNED64(float tcScale[8][2]);
|
||||
|
||||
// This pointer is used as the source/dst for all fixed function loader calls
|
||||
u8* g_video_buffer_read_ptr;
|
||||
u8* g_vertex_manager_write_ptr;
|
||||
|
||||
static const float fractionTable[32] = {
|
||||
1.0f / (1U << 0), 1.0f / (1U << 1), 1.0f / (1U << 2), 1.0f / (1U << 3),
|
||||
1.0f / (1U << 4), 1.0f / (1U << 5), 1.0f / (1U << 6), 1.0f / (1U << 7),
|
||||
|
@ -93,8 +97,8 @@ static void LOADERDECL TexMtx_Write_Float4()
|
|||
{
|
||||
#if _M_SSE >= 0x200
|
||||
__m128 output = _mm_cvtsi32_ss(_mm_castsi128_ps(_mm_setzero_si128()), s_curtexmtx[s_texmtxwrite++]);
|
||||
_mm_storeu_ps((float*)VertexManager::s_pCurBufferPointer, _mm_shuffle_ps(output, output, 0x45 /* 1, 1, 0, 1 */));
|
||||
VertexManager::s_pCurBufferPointer += sizeof(float) * 4;
|
||||
_mm_storeu_ps((float*)g_vertex_manager_write_ptr, _mm_shuffle_ps(output, output, 0x45 /* 1, 1, 0, 1 */));
|
||||
g_vertex_manager_write_ptr += sizeof(float) * 4;
|
||||
#else
|
||||
DataWrite(0.f);
|
||||
DataWrite(0.f);
|
||||
|
@ -487,10 +491,13 @@ void VertexLoader::ConvertVertices ( int count )
|
|||
#endif
|
||||
}
|
||||
|
||||
void VertexLoader::RunVertices(const VAT& vat, int primitive, int const count)
|
||||
int VertexLoader::RunVertices(const VAT& vat, int primitive, int count, DataReader src, DataReader dst)
|
||||
{
|
||||
dst.WritePointer(&g_vertex_manager_write_ptr);
|
||||
src.WritePointer(&g_video_buffer_read_ptr);
|
||||
SetupRunVertices(vat, primitive, count);
|
||||
ConvertVertices(count);
|
||||
return count;
|
||||
}
|
||||
|
||||
void VertexLoader::SetVAT(const VAT& vat)
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "VideoCommon/CPMemory.h"
|
||||
#include "VideoCommon/DataReader.h"
|
||||
#include "VideoCommon/NativeVertexFormat.h"
|
||||
#include "VideoCommon/VertexLoaderUtils.h"
|
||||
|
||||
#if _M_SSE >= 0x401
|
||||
#include <smmintrin.h>
|
||||
|
@ -117,7 +118,7 @@ public:
|
|||
{ return m_native_vtx_decl; }
|
||||
|
||||
void SetupRunVertices(const VAT& vat, int primitive, int const count);
|
||||
void RunVertices(const VAT& vat, int primitive, int count);
|
||||
int RunVertices(const VAT& vat, int primitive, int count, DataReader src, DataReader dst);
|
||||
|
||||
// For debugging / profiling
|
||||
void AppendToString(std::string *dest) const;
|
||||
|
@ -193,9 +194,9 @@ __forceinline void Vertex_Read_SSSE3(const T* pData, __m128 scale)
|
|||
{
|
||||
coords = _mm_shuffle_epi8(coords, threeIn ? kMaskSwap32_3 : kMaskSwap32_2);
|
||||
if (threeOut)
|
||||
_mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, coords);
|
||||
_mm_storeu_si128((__m128i*)g_vertex_manager_write_ptr, coords);
|
||||
else
|
||||
_mm_storel_epi64((__m128i*)VertexManager::s_pCurBufferPointer, coords);
|
||||
_mm_storel_epi64((__m128i*)g_vertex_manager_write_ptr, coords);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -212,11 +213,11 @@ __forceinline void Vertex_Read_SSSE3(const T* pData, __m128 scale)
|
|||
|
||||
__m128 out = _mm_mul_ps(_mm_cvtepi32_ps(coords), scale);
|
||||
if (threeOut)
|
||||
_mm_storeu_ps((float*)VertexManager::s_pCurBufferPointer, out);
|
||||
_mm_storeu_ps((float*)g_vertex_manager_write_ptr, out);
|
||||
else
|
||||
_mm_storel_pi((__m64*)VertexManager::s_pCurBufferPointer, out);
|
||||
_mm_storel_pi((__m64*)g_vertex_manager_write_ptr, out);
|
||||
}
|
||||
|
||||
VertexManager::s_pCurBufferPointer += sizeof(float) * (2 + threeOut);
|
||||
g_vertex_manager_write_ptr += sizeof(float) * (2 + threeOut);
|
||||
}
|
||||
#endif
|
|
@ -130,24 +130,23 @@ static VertexLoader* RefreshLoader(int vtx_attr_group, CPState* state)
|
|||
return loader;
|
||||
}
|
||||
|
||||
bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size, bool skip_drawing)
|
||||
int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bool skip_drawing)
|
||||
{
|
||||
if (!count)
|
||||
return true;
|
||||
return 0;
|
||||
|
||||
CPState* state = &g_main_cp_state;
|
||||
|
||||
VertexLoader* loader = RefreshLoader(vtx_attr_group, state);
|
||||
|
||||
size_t size = count * loader->GetVertexSize();
|
||||
if (buf_size < size)
|
||||
return false;
|
||||
int size = count * loader->GetVertexSize();
|
||||
if ((int)src.size() < size)
|
||||
return -1;
|
||||
|
||||
if (skip_drawing || (bpmem.genMode.cullmode == GenMode::CULL_ALL && primitive < 5))
|
||||
{
|
||||
// if cull mode is CULL_ALL, ignore triangles and quads
|
||||
DataSkip((u32)size);
|
||||
return true;
|
||||
return size;
|
||||
}
|
||||
|
||||
NativeVertexFormat* native = loader->GetNativeVertexFormat();
|
||||
|
@ -157,16 +156,18 @@ bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size,
|
|||
VertexManager::Flush();
|
||||
s_current_vtx_fmt = native;
|
||||
|
||||
VertexManager::PrepareForAdditionalData(primitive, count,
|
||||
DataReader dst = VertexManager::PrepareForAdditionalData(primitive, count,
|
||||
loader->GetNativeVertexDeclaration().stride);
|
||||
|
||||
loader->RunVertices(state->vtx_attr[vtx_attr_group], primitive, count);
|
||||
count = loader->RunVertices(state->vtx_attr[vtx_attr_group], primitive, count, src, dst);
|
||||
|
||||
IndexGenerator::AddIndices(primitive, count);
|
||||
|
||||
VertexManager::FlushData(count, loader->GetNativeVertexDeclaration().stride);
|
||||
|
||||
ADDSTAT(stats.thisFrame.numPrims, count);
|
||||
INCSTAT(stats.thisFrame.numPrimitiveJoins);
|
||||
return true;
|
||||
return size;
|
||||
}
|
||||
|
||||
int GetVertexSize(int vtx_attr_group, bool preprocess)
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include <string>
|
||||
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "VideoCommon/DataReader.h"
|
||||
#include "VideoCommon/NativeVertexFormat.h"
|
||||
|
||||
namespace VertexLoaderManager
|
||||
|
@ -17,8 +18,9 @@ namespace VertexLoaderManager
|
|||
void MarkAllDirty();
|
||||
|
||||
int GetVertexSize(int vtx_attr_group, bool preprocess);
|
||||
// Returns false if buf_size is insufficient.
|
||||
bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size, bool skip_drawing = false);
|
||||
|
||||
// Returns -1 if buf_size is insufficient, else the amount of bytes consumed
|
||||
int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bool skip_drawing = false);
|
||||
|
||||
// For debugging
|
||||
void AppendListToString(std::string *dest);
|
||||
|
|
|
@ -0,0 +1,94 @@
|
|||
// Copyright 2014 Dolphin Emulator Project
|
||||
// Licensed under GPLv2
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
|
||||
#include "Common/Common.h"
|
||||
#include "VideoCommon/VertexManagerBase.h"
|
||||
|
||||
extern u8* g_video_buffer_read_ptr;
|
||||
extern u8* g_vertex_manager_write_ptr;
|
||||
|
||||
|
||||
__forceinline void DataSkip(u32 skip)
|
||||
{
|
||||
g_video_buffer_read_ptr += skip;
|
||||
}
|
||||
|
||||
// probably unnecessary
|
||||
template <int count>
|
||||
__forceinline void DataSkip()
|
||||
{
|
||||
g_video_buffer_read_ptr += count;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__forceinline T DataPeek(int _uOffset, u8** bufp = &g_video_buffer_read_ptr)
|
||||
{
|
||||
auto const result = Common::FromBigEndian(*reinterpret_cast<T*>(*bufp + _uOffset));
|
||||
return result;
|
||||
}
|
||||
|
||||
// TODO: kill these
|
||||
__forceinline u8 DataPeek8(int _uOffset)
|
||||
{
|
||||
return DataPeek<u8>(_uOffset);
|
||||
}
|
||||
|
||||
__forceinline u16 DataPeek16(int _uOffset)
|
||||
{
|
||||
return DataPeek<u16>(_uOffset);
|
||||
}
|
||||
|
||||
__forceinline u32 DataPeek32(int _uOffset)
|
||||
{
|
||||
return DataPeek<u32>(_uOffset);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__forceinline T DataRead(u8** bufp = &g_video_buffer_read_ptr)
|
||||
{
|
||||
auto const result = DataPeek<T>(0, bufp);
|
||||
*bufp += sizeof(T);
|
||||
return result;
|
||||
}
|
||||
|
||||
// TODO: kill these
|
||||
__forceinline u8 DataReadU8()
|
||||
{
|
||||
return DataRead<u8>();
|
||||
}
|
||||
|
||||
__forceinline s8 DataReadS8()
|
||||
{
|
||||
return DataRead<s8>();
|
||||
}
|
||||
|
||||
__forceinline u16 DataReadU16()
|
||||
{
|
||||
return DataRead<u16>();
|
||||
}
|
||||
|
||||
__forceinline u32 DataReadU32()
|
||||
{
|
||||
return DataRead<u32>();
|
||||
}
|
||||
|
||||
__forceinline u32 DataReadU32Unswapped()
|
||||
{
|
||||
u32 tmp = *(u32*)g_video_buffer_read_ptr;
|
||||
g_video_buffer_read_ptr += 4;
|
||||
return tmp;
|
||||
}
|
||||
|
||||
__forceinline u8* DataGetPosition()
|
||||
{
|
||||
return g_video_buffer_read_ptr;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__forceinline void DataWrite(T data)
|
||||
{
|
||||
*(T*)g_vertex_manager_write_ptr = data;
|
||||
g_vertex_manager_write_ptr += sizeof(T);
|
||||
}
|
|
@ -14,7 +14,7 @@
|
|||
#include "VideoCommon/VideoCommon.h"
|
||||
|
||||
// warning: mapping buffer should be disabled to use this
|
||||
#define LOG_NORM() // PRIM_LOG("norm: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]);
|
||||
#define LOG_NORM() // PRIM_LOG("norm: %f %f %f, ", ((float*)g_vertex_manager_write_ptr)[-3], ((float*)g_vertex_manager_write_ptr)[-2], ((float*)g_vertex_manager_write_ptr)[-1]);
|
||||
|
||||
VertexLoader_Normal::Set VertexLoader_Normal::m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT];
|
||||
|
||||
|
@ -43,13 +43,14 @@ template <typename T, int N>
|
|||
__forceinline void ReadIndirect(const T* data)
|
||||
{
|
||||
static_assert(3 == N || 9 == N, "N is only sane as 3 or 9!");
|
||||
DataWriter dst;
|
||||
DataReader dst(g_vertex_manager_write_ptr, nullptr);
|
||||
|
||||
for (int i = 0; i != N; ++i)
|
||||
{
|
||||
dst.Write(FracAdjust(Common::FromBigEndian(data[i])));
|
||||
}
|
||||
|
||||
dst.WritePointer(&g_vertex_manager_write_ptr);
|
||||
LOG_NORM();
|
||||
}
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
#include "VideoCommon/VideoCommon.h"
|
||||
|
||||
// Thoughts on the implementation of a vertex loader compiler.
|
||||
// s_pCurBufferPointer should definitely be in a register.
|
||||
// g_vertex_manager_write_ptr should definitely be in a register.
|
||||
// Could load the position scale factor in XMM7, for example.
|
||||
|
||||
// The pointer inside DataReadU8 in another.
|
||||
|
@ -75,12 +75,14 @@ void LOADERDECL Pos_ReadDirect()
|
|||
{
|
||||
static_assert(N <= 3, "N > 3 is not sane!");
|
||||
auto const scale = posScale[0];
|
||||
DataWriter dst;
|
||||
DataReader src;
|
||||
DataReader dst(g_vertex_manager_write_ptr, nullptr);
|
||||
DataReader src(g_video_buffer_read_ptr, nullptr);
|
||||
|
||||
for (int i = 0; i < 3; ++i)
|
||||
dst.Write(i<N ? PosScale(src.Read<T>(), scale) : 0.f);
|
||||
|
||||
dst.WritePointer(&g_vertex_manager_write_ptr);
|
||||
src.WritePointer(&g_video_buffer_read_ptr);
|
||||
LOG_VTX();
|
||||
}
|
||||
|
||||
|
@ -93,11 +95,12 @@ void LOADERDECL Pos_ReadIndex()
|
|||
auto const index = DataRead<I>();
|
||||
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION]));
|
||||
auto const scale = posScale[0];
|
||||
DataWriter dst;
|
||||
DataReader dst(g_vertex_manager_write_ptr, nullptr);
|
||||
|
||||
for (int i = 0; i < 3; ++i)
|
||||
dst.Write(i<N ? PosScale(Common::FromBigEndian(data[i]), scale) : 0.f);
|
||||
|
||||
dst.WritePointer(&g_vertex_manager_write_ptr);
|
||||
LOG_VTX();
|
||||
}
|
||||
|
||||
|
|
|
@ -19,14 +19,14 @@ template <>
|
|||
__forceinline void LOG_TEX<1>()
|
||||
{
|
||||
// warning: mapping buffer should be disabled to use this
|
||||
// PRIM_LOG("tex: %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-1]);
|
||||
// PRIM_LOG("tex: %f, ", ((float*)g_vertex_manager_write_ptr)[-1]);
|
||||
}
|
||||
|
||||
template <>
|
||||
__forceinline void LOG_TEX<2>()
|
||||
{
|
||||
// warning: mapping buffer should be disabled to use this
|
||||
// PRIM_LOG("tex: %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]);
|
||||
// PRIM_LOG("tex: %f %f, ", ((float*)g_vertex_manager_write_ptr)[-2], ((float*)g_vertex_manager_write_ptr)[-1]);
|
||||
}
|
||||
|
||||
static void LOADERDECL TexCoord_Read_Dummy()
|
||||
|
@ -50,12 +50,14 @@ template <typename T, int N>
|
|||
void LOADERDECL TexCoord_ReadDirect()
|
||||
{
|
||||
auto const scale = tcScale[tcIndex][0];
|
||||
DataWriter dst;
|
||||
DataReader src;
|
||||
DataReader dst(g_vertex_manager_write_ptr, nullptr);
|
||||
DataReader src(g_video_buffer_read_ptr, nullptr);
|
||||
|
||||
for (int i = 0; i != N; ++i)
|
||||
dst.Write(TCScale(src.Read<T>(), scale));
|
||||
|
||||
dst.WritePointer(&g_vertex_manager_write_ptr);
|
||||
src.WritePointer(&g_video_buffer_read_ptr);
|
||||
LOG_TEX<N>();
|
||||
|
||||
++tcIndex;
|
||||
|
@ -70,11 +72,12 @@ void LOADERDECL TexCoord_ReadIndex()
|
|||
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_TEXCOORD0 + tcIndex]
|
||||
+ (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + tcIndex]));
|
||||
auto const scale = tcScale[tcIndex][0];
|
||||
DataWriter dst;
|
||||
DataReader dst(g_vertex_manager_write_ptr, nullptr);
|
||||
|
||||
for (int i = 0; i != N; ++i)
|
||||
dst.Write(TCScale(Common::FromBigEndian(data[i]), scale));
|
||||
|
||||
dst.WritePointer(&g_vertex_manager_write_ptr);
|
||||
LOG_TEX<N>();
|
||||
++tcIndex;
|
||||
}
|
||||
|
|
|
@ -51,7 +51,7 @@ u32 VertexManager::GetRemainingSize()
|
|||
return (u32)(s_pEndBufferPointer - s_pCurBufferPointer);
|
||||
}
|
||||
|
||||
void VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 stride)
|
||||
DataReader VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 stride)
|
||||
{
|
||||
// The SSE vertex loader can write up to 4 bytes past the end
|
||||
u32 const needed_vertex_bytes = count * stride + 4;
|
||||
|
@ -83,6 +83,13 @@ void VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 strid
|
|||
g_vertex_manager->ResetBuffer(stride);
|
||||
IsFlushed = false;
|
||||
}
|
||||
|
||||
return DataReader(s_pCurBufferPointer, s_pEndBufferPointer);
|
||||
}
|
||||
|
||||
void VertexManager::FlushData(u32 count, u32 stride)
|
||||
{
|
||||
s_pCurBufferPointer += count * stride;
|
||||
}
|
||||
|
||||
u32 VertexManager::GetRemainingIndices(int primitive)
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include <vector>
|
||||
#include "Common/CommonFuncs.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "VideoCommon/DataReader.h"
|
||||
|
||||
class NativeVertexFormat;
|
||||
class PointerWrap;
|
||||
|
@ -31,21 +32,14 @@ public:
|
|||
// needs to be virtual for DX11's dtor
|
||||
virtual ~VertexManager();
|
||||
|
||||
static u8 *s_pCurBufferPointer;
|
||||
static u8 *s_pBaseBufferPointer;
|
||||
static u8 *s_pEndBufferPointer;
|
||||
|
||||
static u32 GetRemainingSize();
|
||||
static void PrepareForAdditionalData(int primitive, u32 count, u32 stride);
|
||||
static u32 GetRemainingIndices(int primitive);
|
||||
static DataReader PrepareForAdditionalData(int primitive, u32 count, u32 stride);
|
||||
static void FlushData(u32 count, u32 stride);
|
||||
|
||||
static void Flush();
|
||||
|
||||
virtual ::NativeVertexFormat* CreateNativeVertexFormat() = 0;
|
||||
|
||||
static void DoState(PointerWrap& p);
|
||||
virtual void CreateDeviceObjects(){}
|
||||
virtual void DestroyDeviceObjects(){}
|
||||
|
||||
protected:
|
||||
virtual void vDoState(PointerWrap& p) { }
|
||||
|
@ -54,12 +48,20 @@ protected:
|
|||
|
||||
virtual void ResetBuffer(u32 stride) = 0;
|
||||
|
||||
static u8* s_pCurBufferPointer;
|
||||
static u8* s_pBaseBufferPointer;
|
||||
static u8* s_pEndBufferPointer;
|
||||
|
||||
static u32 GetRemainingSize();
|
||||
static u32 GetRemainingIndices(int primitive);
|
||||
|
||||
private:
|
||||
static bool IsFlushed;
|
||||
|
||||
// virtual void Draw(u32 stride, bool alphapass) = 0;
|
||||
// temp
|
||||
virtual void vFlush(bool useDstAlpha) = 0;
|
||||
|
||||
virtual void CreateDeviceObjects() {}
|
||||
virtual void DestroyDeviceObjects() {}
|
||||
};
|
||||
|
||||
extern VertexManager *g_vertex_manager;
|
||||
|
|
|
@ -61,7 +61,7 @@ struct TargetRectangle : public MathUtil::Rectangle<int>
|
|||
#endif
|
||||
|
||||
// warning: mapping buffer should be disabled to use this
|
||||
// #define LOG_VTX() DEBUG_LOG(VIDEO, "vtx: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]);
|
||||
// #define LOG_VTX() DEBUG_LOG(VIDEO, "vtx: %f %f %f, ", ((float*)g_vertex_manager_write_ptr)[-3], ((float*)g_vertex_manager_write_ptr)[-2], ((float*)g_vertex_manager_write_ptr)[-1]);
|
||||
|
||||
#define LOG_VTX()
|
||||
|
||||
|
|
|
@ -117,6 +117,7 @@
|
|||
<ClInclude Include="TextureDecoder.h" />
|
||||
<ClInclude Include="VertexLoader.h" />
|
||||
<ClInclude Include="VertexLoaderManager.h" />
|
||||
<ClInclude Include="VertexLoaderUtils.h" />
|
||||
<ClInclude Include="VertexLoader_Color.h" />
|
||||
<ClInclude Include="VertexLoader_Normal.h" />
|
||||
<ClInclude Include="VertexLoader_Position.h" />
|
||||
|
|
|
@ -275,6 +275,9 @@
|
|||
<ClInclude Include="VertexLoaderManager.h">
|
||||
<Filter>Vertex Loading</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="VertexLoaderUtils.h">
|
||||
<Filter>Vertex Loading</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="BoundingBox.h">
|
||||
<Filter>Util</Filter>
|
||||
</ClInclude>
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "VideoCommon/CPMemory.h"
|
||||
#include "VideoCommon/DataReader.h"
|
||||
|
||||
|
||||
// Lighting
|
||||
|
@ -273,6 +274,6 @@ struct XFMemory
|
|||
|
||||
extern XFMemory xfmem;
|
||||
|
||||
void LoadXFReg(u32 transferSize, u32 address);
|
||||
void LoadXFReg(u32 transferSize, u32 address, DataReader src);
|
||||
void LoadIndexedXF(u32 val, int array);
|
||||
void PreprocessIndexedXF(u32 val, int refarray);
|
||||
|
|
|
@ -19,14 +19,14 @@ static void XFMemWritten(u32 transferSize, u32 baseAddress)
|
|||
VertexShaderManager::InvalidateXFRange(baseAddress, baseAddress + transferSize);
|
||||
}
|
||||
|
||||
static void XFRegWritten(int transferSize, u32 baseAddress)
|
||||
static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
|
||||
{
|
||||
u32 address = baseAddress;
|
||||
u32 dataIndex = 0;
|
||||
|
||||
while (transferSize > 0 && address < 0x1058)
|
||||
{
|
||||
u32 newValue = DataPeek<u32>(dataIndex * sizeof(u32));
|
||||
u32 newValue = src.Peek<u32>(dataIndex * sizeof(u32));
|
||||
u32 nextAddress = address + 1;
|
||||
|
||||
switch (address)
|
||||
|
@ -193,7 +193,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress)
|
|||
}
|
||||
}
|
||||
|
||||
void LoadXFReg(u32 transferSize, u32 baseAddress)
|
||||
void LoadXFReg(u32 transferSize, u32 baseAddress, DataReader src)
|
||||
{
|
||||
// do not allow writes past registers
|
||||
if (baseAddress + transferSize > 0x1058)
|
||||
|
@ -229,17 +229,17 @@ void LoadXFReg(u32 transferSize, u32 baseAddress)
|
|||
XFMemWritten(xfMemTransferSize, xfMemBase);
|
||||
for (u32 i = 0; i < xfMemTransferSize; i++)
|
||||
{
|
||||
((u32*)&xfmem)[xfMemBase + i] = DataRead<u32>();
|
||||
((u32*)&xfmem)[xfMemBase + i] = src.Read<u32>();
|
||||
}
|
||||
}
|
||||
|
||||
// write to XF regs
|
||||
if (transferSize > 0)
|
||||
{
|
||||
XFRegWritten(transferSize, baseAddress);
|
||||
XFRegWritten(transferSize, baseAddress, src);
|
||||
for (u32 i = 0; i < transferSize; i++)
|
||||
{
|
||||
((u32*)&xfmem)[baseAddress + i] = DataRead<u32>();
|
||||
((u32*)&xfmem)[baseAddress + i] = src.Read<u32>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -74,12 +74,14 @@ protected:
|
|||
|
||||
void ResetPointers()
|
||||
{
|
||||
g_video_buffer_read_ptr = &input_memory[0];
|
||||
VertexManager::s_pCurBufferPointer = &output_memory[0];
|
||||
m_input_pos = m_output_pos = 0;
|
||||
src = DataReader(input_memory, input_memory+sizeof(input_memory));
|
||||
dst = DataReader(output_memory, output_memory+sizeof(output_memory));
|
||||
}
|
||||
|
||||
u32 m_input_pos, m_output_pos;
|
||||
DataReader src;
|
||||
DataReader dst;
|
||||
|
||||
TVtxDesc m_vtx_desc;
|
||||
VAT m_vtx_attr;
|
||||
|
@ -103,7 +105,9 @@ TEST_F(VertexLoaderTest, PositionDirectFloatXYZ)
|
|||
Input(0.0f); Input(0.0f); Input(1.0f);
|
||||
|
||||
// Convert 4 points. "7" -> primitive are points.
|
||||
loader.RunVertices(m_vtx_attr, 7, 4);
|
||||
int count = loader.RunVertices(m_vtx_attr, 7, 4, src, dst);
|
||||
src.Skip(4 * loader.GetVertexSize());
|
||||
dst.Skip(count * loader.GetNativeVertexDeclaration().stride);
|
||||
|
||||
ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(0.0f);
|
||||
ExpectOut(1.0f); ExpectOut(0.0f); ExpectOut(0.0f);
|
||||
|
@ -113,7 +117,9 @@ TEST_F(VertexLoaderTest, PositionDirectFloatXYZ)
|
|||
// Test that scale does nothing for floating point inputs.
|
||||
Input(1.0f); Input(2.0f); Input(4.0f);
|
||||
m_vtx_attr.g0.PosFrac = 1;
|
||||
loader.RunVertices(m_vtx_attr, 7, 1);
|
||||
count = loader.RunVertices(m_vtx_attr, 7, 1, src, dst);
|
||||
src.Skip(1 * loader.GetVertexSize());
|
||||
dst.Skip(count * loader.GetNativeVertexDeclaration().stride);
|
||||
ExpectOut(1.0f); ExpectOut(2.0f); ExpectOut(4.0f);
|
||||
}
|
||||
|
||||
|
@ -136,7 +142,9 @@ TEST_F(VertexLoaderTest, PositionDirectU16XY)
|
|||
Input<u16>(12345); Input<u16>(54321);
|
||||
|
||||
// Convert 5 points. "7" -> primitive are points.
|
||||
loader.RunVertices(m_vtx_attr, 7, 5);
|
||||
int count = loader.RunVertices(m_vtx_attr, 7, 5, src, dst);
|
||||
src.Skip(5 * loader.GetVertexSize());
|
||||
dst.Skip(count * loader.GetNativeVertexDeclaration().stride);
|
||||
|
||||
ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(0.0f);
|
||||
ExpectOut(1.0f); ExpectOut(2.0f); ExpectOut(0.0f);
|
||||
|
@ -147,7 +155,9 @@ TEST_F(VertexLoaderTest, PositionDirectU16XY)
|
|||
// Test that scale works on U16 inputs.
|
||||
Input<u16>(42); Input<u16>(24);
|
||||
m_vtx_attr.g0.PosFrac = 1;
|
||||
loader.RunVertices(m_vtx_attr, 7, 1);
|
||||
count = loader.RunVertices(m_vtx_attr, 7, 1, src, dst);
|
||||
src.Skip(1 * loader.GetVertexSize());
|
||||
dst.Skip(count * loader.GetNativeVertexDeclaration().stride);
|
||||
ExpectOut(21.0f); ExpectOut(12.0f); ExpectOut(0.0f);
|
||||
}
|
||||
|
||||
|
@ -165,7 +175,9 @@ TEST_F(VertexLoaderTest, PositionDirectFloatXYZSpeed)
|
|||
for (int i = 0; i < 1000; ++i)
|
||||
{
|
||||
ResetPointers();
|
||||
loader.RunVertices(m_vtx_attr, 7, 100000);
|
||||
int count = loader.RunVertices(m_vtx_attr, 7, 100000, src, dst);
|
||||
src.Skip(100000 * loader.GetVertexSize());
|
||||
dst.Skip(count * loader.GetNativeVertexDeclaration().stride);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -183,7 +195,9 @@ TEST_F(VertexLoaderTest, PositionDirectU16XYSpeed)
|
|||
for (int i = 0; i < 1000; ++i)
|
||||
{
|
||||
ResetPointers();
|
||||
loader.RunVertices(m_vtx_attr, 7, 100000);
|
||||
int count = loader.RunVertices(m_vtx_attr, 7, 100000, src, dst);
|
||||
src.Skip(100000 * loader.GetVertexSize());
|
||||
dst.Skip(count * loader.GetNativeVertexDeclaration().stride);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -244,6 +258,8 @@ TEST_F(VertexLoaderTest, LargeFloatVertexSpeed)
|
|||
for (int i = 0; i < 100; ++i)
|
||||
{
|
||||
ResetPointers();
|
||||
loader.RunVertices(m_vtx_attr, 7, 100000);
|
||||
int count = loader.RunVertices(m_vtx_attr, 7, 100000, src, dst);
|
||||
src.Skip(100000 * loader.GetVertexSize());
|
||||
dst.Skip(count * loader.GetNativeVertexDeclaration().stride);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue