Merge pull request #1609 from degasus/datareader

VideoCommon: Datareader
This commit is contained in:
Markus Wick 2014-12-10 06:53:11 +01:00
commit 4c139ca7cc
25 changed files with 464 additions and 414 deletions

View File

@ -14,8 +14,8 @@
#include "VideoBackends/Software/SWVertexLoader.h"
#include "VideoBackends/Software/SWVideoConfig.h"
#include "VideoBackends/Software/XFMemLoader.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/Fifo.h"
#include "VideoCommon/VertexLoaderUtils.h"
typedef void (*DecodingFunction)(u32);

View File

@ -20,8 +20,8 @@
#include "VideoBackends/Software/SWCommandProcessor.h"
#include "VideoBackends/Software/VideoBackend.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/Fifo.h"
#include "VideoCommon/VertexLoaderUtils.h"
namespace SWCommandProcessor
{

View File

@ -13,7 +13,6 @@
#include "VideoBackends/Software/TransformUnit.h"
#include "VideoBackends/Software/XFMemLoader.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/VertexLoader.h"
#include "VideoCommon/VertexLoader_Color.h"
#include "VideoCommon/VertexLoader_Normal.h"
@ -286,20 +285,20 @@ void SWVertexLoader::LoadTexMtx(SWVertexLoader *vertexLoader, InputVertexData *v
void SWVertexLoader::LoadPosition(SWVertexLoader *vertexLoader, InputVertexData *vertex, u8 unused)
{
VertexManager::s_pCurBufferPointer = (u8*)&vertex->position;
g_vertex_manager_write_ptr = (u8*)&vertex->position;
vertexLoader->m_positionLoader();
}
void SWVertexLoader::LoadNormal(SWVertexLoader *vertexLoader, InputVertexData *vertex, u8 unused)
{
VertexManager::s_pCurBufferPointer = (u8*)&vertex->normal;
g_vertex_manager_write_ptr = (u8*)&vertex->normal;
vertexLoader->m_normalLoader();
}
void SWVertexLoader::LoadColor(SWVertexLoader *vertexLoader, InputVertexData *vertex, u8 index)
{
u32 color;
VertexManager::s_pCurBufferPointer = (u8*)&color;
g_vertex_manager_write_ptr = (u8*)&color;
colIndex = index;
vertexLoader->m_colorLoader[index]();
@ -309,7 +308,7 @@ void SWVertexLoader::LoadColor(SWVertexLoader *vertexLoader, InputVertexData *ve
void SWVertexLoader::LoadTexCoord(SWVertexLoader *vertexLoader, InputVertexData *vertex, u8 index)
{
VertexManager::s_pCurBufferPointer = (u8*)&vertex->texCoords[index];
g_vertex_manager_write_ptr = (u8*)&vertex->texCoords[index];
tcIndex = index;
vertexLoader->m_texCoordLoader[index]();
}

View File

@ -32,7 +32,7 @@ static PortableVertexDeclaration vertexDecl;
// Gets the pointer to the current buffer position
void LOADERDECL SetVertexBufferPosition()
{
bufferPos = VertexManager::s_pCurBufferPointer;
bufferPos = g_vertex_manager_write_ptr;
}
// Prepares the bounding box for new primitive data

View File

@ -5,124 +5,61 @@
#pragma once
#include "Common/Common.h"
#include "VideoCommon/VertexManagerBase.h"
extern u8* g_video_buffer_read_ptr;
#if _M_SSE >= 0x301 && !(defined __GNUC__ && !defined __SSSE3__)
#include <tmmintrin.h>
#endif
__forceinline void DataSkip(u32 skip)
{
g_video_buffer_read_ptr += skip;
}
// probably unnecessary
template <int count>
__forceinline void DataSkip()
{
g_video_buffer_read_ptr += count;
}
template <typename T>
__forceinline T DataPeek(int _uOffset, u8** bufp = &g_video_buffer_read_ptr)
{
auto const result = Common::FromBigEndian(*reinterpret_cast<T*>(*bufp + _uOffset));
return result;
}
// TODO: kill these
__forceinline u8 DataPeek8(int _uOffset)
{
return DataPeek<u8>(_uOffset);
}
__forceinline u16 DataPeek16(int _uOffset)
{
return DataPeek<u16>(_uOffset);
}
__forceinline u32 DataPeek32(int _uOffset)
{
return DataPeek<u32>(_uOffset);
}
template <typename T>
__forceinline T DataRead(u8** bufp = &g_video_buffer_read_ptr)
{
auto const result = DataPeek<T>(0, bufp);
*bufp += sizeof(T);
return result;
}
class DataReader
{
public:
inline DataReader() : buffer(g_video_buffer_read_ptr), offset(0) {}
inline ~DataReader() { g_video_buffer_read_ptr += offset; }
template <typename T> inline T Read()
__forceinline DataReader()
: buffer(nullptr), end(nullptr) {}
__forceinline DataReader(u8* src, u8* _end)
: buffer(src), end(_end) {}
__forceinline void WritePointer(u8** src)
{
const T result = Common::FromBigEndian(*(T*)(buffer + offset));
offset += sizeof(T);
*src = buffer;
}
__forceinline u8* operator=(u8* src)
{
buffer = src;
return src;
}
__forceinline size_t size()
{
return end - buffer;
}
template <typename T, bool swapped = true> __forceinline T Peek(int offset = 0)
{
T data = *(T*)(buffer + offset);
if (swapped)
data = Common::FromBigEndian(data);
return data;
}
template <typename T, bool swapped = true> __forceinline T Read()
{
const T result = Peek<T, swapped>();
buffer += sizeof(T);
return result;
}
private:
u8 *buffer;
int offset;
};
// TODO: kill these
__forceinline u8 DataReadU8()
{
return DataRead<u8>();
}
__forceinline s8 DataReadS8()
{
return DataRead<s8>();
}
__forceinline u16 DataReadU16()
{
return DataRead<u16>();
}
__forceinline u32 DataReadU32()
{
return DataRead<u32>();
}
__forceinline u32 DataReadU32Unswapped()
{
u32 tmp = *(u32*)g_video_buffer_read_ptr;
g_video_buffer_read_ptr += 4;
return tmp;
}
__forceinline u8* DataGetPosition()
{
return g_video_buffer_read_ptr;
}
template <typename T>
__forceinline void DataWrite(T data)
{
*(T*)VertexManager::s_pCurBufferPointer = data;
VertexManager::s_pCurBufferPointer += sizeof(T);
}
class DataWriter
{
public:
inline DataWriter() : buffer(VertexManager::s_pCurBufferPointer), offset(0) {}
inline ~DataWriter() { VertexManager::s_pCurBufferPointer += offset; }
template <typename T> inline void Write(T data)
template <typename T, bool swapped = false> __forceinline void Write(T data)
{
*(T*)(buffer+offset) = data;
offset += sizeof(T);
if (swapped)
data = Common::FromBigEndian(data);
*(T*)(buffer) = data;
buffer += sizeof(T);
}
template <typename T = u8> __forceinline void Skip(size_t data = 1)
{
buffer += sizeof(T) * data;
}
private:
u8 *buffer;
int offset;
u8* __restrict buffer;
u8* end;
};

View File

@ -41,10 +41,10 @@ bool g_use_deterministic_gpu_thread;
static std::mutex s_video_buffer_lock;
static std::condition_variable s_video_buffer_cond;
static u8* s_video_buffer;
u8* g_video_buffer_read_ptr;
static u8* s_video_buffer_read_ptr;
static std::atomic<u8*> s_video_buffer_write_ptr;
static std::atomic<u8*> s_video_buffer_seen_ptr;
u8* g_video_buffer_pp_read_ptr;
static u8* s_video_buffer_pp_read_ptr;
// The read_ptr is always owned by the GPU thread. In normal mode, so is the
// write_ptr, despite it being atomic. In g_use_deterministic_gpu_thread mode,
// things get a bit more complicated:
@ -63,11 +63,11 @@ void Fifo_DoState(PointerWrap &p)
u8* write_ptr = s_video_buffer_write_ptr;
p.DoPointer(write_ptr, s_video_buffer);
s_video_buffer_write_ptr = write_ptr;
p.DoPointer(g_video_buffer_read_ptr, s_video_buffer);
p.DoPointer(s_video_buffer_read_ptr, s_video_buffer);
if (p.mode == PointerWrap::MODE_READ && g_use_deterministic_gpu_thread)
{
// We're good and paused, right?
s_video_buffer_seen_ptr = g_video_buffer_pp_read_ptr = g_video_buffer_read_ptr;
s_video_buffer_seen_ptr = s_video_buffer_pp_read_ptr = s_video_buffer_read_ptr;
}
p.Do(g_bSkipCurrentFrame);
}
@ -106,8 +106,8 @@ void Fifo_Shutdown()
FreeMemoryPages(s_video_buffer, FIFO_SIZE);
s_video_buffer = nullptr;
s_video_buffer_write_ptr = nullptr;
g_video_buffer_pp_read_ptr = nullptr;
g_video_buffer_read_ptr = nullptr;
s_video_buffer_pp_read_ptr = nullptr;
s_video_buffer_read_ptr = nullptr;
s_video_buffer_seen_ptr = nullptr;
s_fifo_aux_write_ptr = nullptr;
s_fifo_aux_read_ptr = nullptr;
@ -169,15 +169,15 @@ void SyncGPU(SyncGPUReason reason, bool may_move_read_ptr)
if (may_move_read_ptr)
{
// what's left over in the buffer
size_t size = write_ptr - g_video_buffer_pp_read_ptr;
size_t size = write_ptr - s_video_buffer_pp_read_ptr;
memmove(s_video_buffer, g_video_buffer_pp_read_ptr, size);
memmove(s_video_buffer, s_video_buffer_pp_read_ptr, size);
// This change always decreases the pointers. We write seen_ptr
// after write_ptr here, and read it before in RunGpuLoop, so
// 'write_ptr > seen_ptr' there cannot become spuriously true.
s_video_buffer_write_ptr = write_ptr = s_video_buffer + size;
g_video_buffer_pp_read_ptr = s_video_buffer;
g_video_buffer_read_ptr = s_video_buffer;
s_video_buffer_pp_read_ptr = s_video_buffer;
s_video_buffer_read_ptr = s_video_buffer;
s_video_buffer_seen_ptr = write_ptr;
}
}
@ -213,15 +213,15 @@ static void ReadDataFromFifo(u32 readPtr)
size_t len = 32;
if (len > (size_t)(s_video_buffer + FIFO_SIZE - s_video_buffer_write_ptr))
{
size_t existing_len = s_video_buffer_write_ptr - g_video_buffer_read_ptr;
size_t existing_len = s_video_buffer_write_ptr - s_video_buffer_read_ptr;
if (len > (size_t)(FIFO_SIZE - existing_len))
{
PanicAlert("FIFO out of bounds (existing %lu + new %lu > %lu)", (unsigned long) existing_len, (unsigned long) len, (unsigned long) FIFO_SIZE);
return;
}
memmove(s_video_buffer, g_video_buffer_read_ptr, existing_len);
memmove(s_video_buffer, s_video_buffer_read_ptr, existing_len);
s_video_buffer_write_ptr = s_video_buffer + existing_len;
g_video_buffer_read_ptr = s_video_buffer;
s_video_buffer_read_ptr = s_video_buffer;
}
// Copy new video instructions to s_video_buffer for future use in rendering the new picture
Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len);
@ -238,13 +238,13 @@ static void ReadDataFromFifoOnCPU(u32 readPtr)
// We can't wrap around while the GPU is working on the data.
// This should be very rare due to the reset in SyncGPU.
SyncGPU(SYNC_GPU_WRAPAROUND);
if (g_video_buffer_pp_read_ptr != g_video_buffer_read_ptr)
if (s_video_buffer_pp_read_ptr != s_video_buffer_read_ptr)
{
PanicAlert("desynced read pointers");
return;
}
write_ptr = s_video_buffer_write_ptr;
size_t existing_len = write_ptr - g_video_buffer_pp_read_ptr;
size_t existing_len = write_ptr - s_video_buffer_pp_read_ptr;
if (len > (size_t)(FIFO_SIZE - existing_len))
{
PanicAlert("FIFO out of bounds (existing %lu + new %lu > %lu)", (unsigned long) existing_len, (unsigned long) len, (unsigned long) FIFO_SIZE);
@ -252,17 +252,17 @@ static void ReadDataFromFifoOnCPU(u32 readPtr)
}
}
Memory::CopyFromEmu(s_video_buffer_write_ptr, readPtr, len);
OpcodeDecoder_Preprocess(write_ptr + len, false);
s_video_buffer_pp_read_ptr = OpcodeDecoder_Run<true>(DataReader(s_video_buffer_pp_read_ptr, write_ptr + len), nullptr, false);
// This would have to be locked if the GPU thread didn't spin.
s_video_buffer_write_ptr = write_ptr + len;
}
void ResetVideoBuffer()
{
g_video_buffer_read_ptr = s_video_buffer;
s_video_buffer_read_ptr = s_video_buffer;
s_video_buffer_write_ptr = s_video_buffer;
s_video_buffer_seen_ptr = s_video_buffer;
g_video_buffer_pp_read_ptr = s_video_buffer;
s_video_buffer_pp_read_ptr = s_video_buffer;
s_fifo_aux_write_ptr = s_fifo_aux_data;
s_fifo_aux_read_ptr = s_fifo_aux_data;
}
@ -294,7 +294,7 @@ void RunGpuLoop()
// See comment in SyncGPU
if (write_ptr > seen_ptr)
{
OpcodeDecoder_Run(write_ptr, false);
s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), nullptr, false);
{
std::lock_guard<std::mutex> vblk(s_video_buffer_lock);
@ -330,7 +330,7 @@ void RunGpuLoop()
u8* write_ptr = s_video_buffer_write_ptr;
cyclesExecuted = OpcodeDecoder_Run(write_ptr, false);
s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, write_ptr), &cyclesExecuted, false);
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSyncGPU && Common::AtomicLoad(CommandProcessor::VITicks) >= cyclesExecuted)
@ -338,7 +338,7 @@ void RunGpuLoop()
Common::AtomicStore(fifo.CPReadPointer, readPtr);
Common::AtomicAdd(fifo.CPReadWriteDistance, -32);
if ((write_ptr - g_video_buffer_read_ptr) == 0)
if ((write_ptr - s_video_buffer_read_ptr) == 0)
Common::AtomicStore(fifo.SafeCPReadPointer, fifo.CPReadPointer);
}
@ -403,7 +403,7 @@ void RunGpu()
FPURoundMode::SaveSIMDState();
FPURoundMode::LoadDefaultSIMDState();
ReadDataFromFifo(fifo.CPReadPointer);
OpcodeDecoder_Run(s_video_buffer_write_ptr, false);
s_video_buffer_read_ptr = OpcodeDecoder_Run(DataReader(s_video_buffer_read_ptr, s_video_buffer_write_ptr), nullptr, false);
FPURoundMode::LoadSIMDState();
}
@ -454,7 +454,7 @@ void Fifo_UpdateWantDeterminism(bool want)
if (gpu_thread)
{
// These haven't been updated in non-deterministic mode.
s_video_buffer_seen_ptr = g_video_buffer_pp_read_ptr = g_video_buffer_read_ptr;
s_video_buffer_seen_ptr = s_video_buffer_pp_read_ptr = s_video_buffer_read_ptr;
CopyPreprocessCPStateFromMain();
VertexLoaderManager::MarkAllDirty();
}

View File

@ -17,7 +17,6 @@ extern bool g_bSkipCurrentFrame;
// and can change at runtime.
extern bool g_use_deterministic_gpu_thread;
extern std::atomic<u8*> g_video_buffer_write_ptr_xthread;
extern u8* g_video_buffer_pp_read_ptr;
void Fifo_Init();
void Fifo_Shutdown();

View File

@ -36,7 +36,6 @@ bool g_bRecordFifoData = false;
static u32 InterpretDisplayList(u32 address, u32 size)
{
u8* old_pVideoData = g_video_buffer_read_ptr;
u8* startAddress;
if (g_use_deterministic_gpu_thread)
@ -49,41 +48,29 @@ static u32 InterpretDisplayList(u32 address, u32 size)
// Avoid the crash if Memory::GetPointer failed ..
if (startAddress != nullptr)
{
g_video_buffer_read_ptr = startAddress;
// temporarily swap dl and non-dl (small "hack" for the stats)
Statistics::SwapDL();
u8 *end = g_video_buffer_read_ptr + size;
cycles = OpcodeDecoder_Run(end, true);
OpcodeDecoder_Run(DataReader(startAddress, startAddress + size), &cycles, true);
INCSTAT(stats.thisFrame.numDListsCalled);
// un-swap
Statistics::SwapDL();
}
// reset to the old pointer
g_video_buffer_read_ptr = old_pVideoData;
return cycles;
}
static void InterpretDisplayListPreprocess(u32 address, u32 size)
{
u8* old_read_ptr = g_video_buffer_pp_read_ptr;
u8* startAddress = Memory::GetPointer(address);
PushFifoAuxBuffer(startAddress, size);
if (startAddress != nullptr)
{
g_video_buffer_pp_read_ptr = startAddress;
u8 *end = startAddress + size;
OpcodeDecoder_Preprocess(end, true);
OpcodeDecoder_Run<true>(DataReader(startAddress, startAddress + size), nullptr, true);
}
g_video_buffer_pp_read_ptr = old_read_ptr;
}
static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess)
@ -134,180 +121,8 @@ static void UnknownOpcode(u8 cmd_byte, void *buffer, bool preprocess)
}
}
template <bool is_preprocess, u8** bufp>
static u32 Decode(u8* end, bool in_display_list)
{
u8 *opcodeStart = *bufp;
if (*bufp == end)
return 0;
u8 cmd_byte = DataRead<u8>(bufp);
u32 cycles;
int refarray;
switch (cmd_byte)
{
case GX_NOP:
cycles = 6; // Hm, this means that we scan over nop streams pretty slowly...
break;
case GX_LOAD_CP_REG: //0x08
{
if (end - *bufp < 1 + 4)
return 0;
cycles = 12;
u8 sub_cmd = DataRead<u8>(bufp);
u32 value = DataRead<u32>(bufp);
LoadCPReg(sub_cmd, value, is_preprocess);
if (!is_preprocess)
INCSTAT(stats.thisFrame.numCPLoads);
}
break;
case GX_LOAD_XF_REG:
{
if (end - *bufp < 4)
return 0;
u32 Cmd2 = DataRead<u32>(bufp);
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
if ((size_t) (end - *bufp) < transfer_size * sizeof(u32))
return 0;
cycles = 18 + 6 * transfer_size;
if (!is_preprocess)
{
u32 xf_address = Cmd2 & 0xFFFF;
LoadXFReg(transfer_size, xf_address);
INCSTAT(stats.thisFrame.numXFLoads);
}
else
{
*bufp += transfer_size * sizeof(u32);
}
}
break;
case GX_LOAD_INDX_A: //used for position matrices
refarray = 0xC;
goto load_indx;
case GX_LOAD_INDX_B: //used for normal matrices
refarray = 0xD;
goto load_indx;
case GX_LOAD_INDX_C: //used for postmatrices
refarray = 0xE;
goto load_indx;
case GX_LOAD_INDX_D: //used for lights
refarray = 0xF;
goto load_indx;
load_indx:
if (end - *bufp < 4)
return 0;
cycles = 6;
if (is_preprocess)
PreprocessIndexedXF(DataRead<u32>(bufp), refarray);
else
LoadIndexedXF(DataRead<u32>(bufp), refarray);
break;
case GX_CMD_CALL_DL:
{
if (end - *bufp < 8)
return 0;
u32 address = DataRead<u32>(bufp);
u32 count = DataRead<u32>(bufp);
if (in_display_list)
{
cycles = 6;
WARN_LOG(VIDEO,"recursive display list detected");
}
else
{
if (is_preprocess)
InterpretDisplayListPreprocess(address, count);
else
cycles = 6 + InterpretDisplayList(address, count);
}
}
break;
case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that
cycles = 6;
DEBUG_LOG(VIDEO, "GX 0x44: %08x", cmd_byte);
break;
case GX_CMD_INVL_VC: // Invalidate Vertex Cache
cycles = 6;
DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)");
break;
case GX_LOAD_BP_REG: //0x61
// In skipped_frame case: We have to let BP writes through because they set
// tokens and stuff. TODO: Call a much simplified LoadBPReg instead.
{
if (end - *bufp < 4)
return 0;
cycles = 12;
u32 bp_cmd = DataRead<u32>(bufp);
if (is_preprocess)
{
LoadBPRegPreprocess(bp_cmd);
}
else
{
LoadBPReg(bp_cmd);
INCSTAT(stats.thisFrame.numBPLoads);
}
}
break;
// draw primitives
default:
if ((cmd_byte & 0xC0) == 0x80)
{
cycles = 1600;
// load vertices
if (end - *bufp < 2)
return 0;
u16 num_vertices = DataRead<u16>(bufp);
if (is_preprocess)
{
size_t size = num_vertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK, is_preprocess);
if ((size_t) (end - *bufp) < size)
return 0;
*bufp += size;
}
else
{
if (!VertexLoaderManager::RunVertices(
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
num_vertices,
end - *bufp,
g_bSkipCurrentFrame))
return 0;
}
}
else
{
UnknownOpcode(cmd_byte, opcodeStart, is_preprocess);
cycles = 1;
}
break;
}
// Display lists get added directly into the FIFO stream
if (!is_preprocess && g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL)
FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(*bufp - opcodeStart));
// In is_preprocess mode, we don't actually care about cycles, at least for
// now... make sure the compiler realizes that.
return is_preprocess ? 1 : cycles;
}
void OpcodeDecoder_Init()
{
g_video_buffer_read_ptr = GetVideoBufferStartPtr();
}
@ -315,33 +130,189 @@ void OpcodeDecoder_Shutdown()
{
}
u32 OpcodeDecoder_Run(u8* end, bool in_display_list)
template <bool is_preprocess>
u8* OpcodeDecoder_Run(DataReader src, u32* cycles, bool in_display_list)
{
u32 totalCycles = 0;
u8* opcodeStart;
while (true)
{
u8* old = g_video_buffer_read_ptr;
u32 cycles = Decode</*is_preprocess*/ false, &g_video_buffer_read_ptr>(end, in_display_list);
if (cycles == 0)
src.WritePointer(&opcodeStart);
if (!src.size())
goto end;
u8 cmd_byte = src.Read<u8>();
int refarray;
switch (cmd_byte)
{
g_video_buffer_read_ptr = old;
case GX_NOP:
totalCycles += 6; // Hm, this means that we scan over nop streams pretty slowly...
break;
case GX_LOAD_CP_REG: //0x08
{
if (src.size() < 1 + 4)
goto end;
totalCycles += 12;
u8 sub_cmd = src.Read<u8>();
u32 value = src.Read<u32>();
LoadCPReg(sub_cmd, value, is_preprocess);
if (!is_preprocess)
INCSTAT(stats.thisFrame.numCPLoads);
}
break;
case GX_LOAD_XF_REG:
{
if (src.size() < 4)
goto end;
u32 Cmd2 = src.Read<u32>();
int transfer_size = ((Cmd2 >> 16) & 15) + 1;
if (src.size() < transfer_size * sizeof(u32))
goto end;
totalCycles += 18 + 6 * transfer_size;
if (!is_preprocess)
{
u32 xf_address = Cmd2 & 0xFFFF;
LoadXFReg(transfer_size, xf_address, src);
INCSTAT(stats.thisFrame.numXFLoads);
}
src.Skip<u32>(transfer_size);
}
break;
case GX_LOAD_INDX_A: //used for position matrices
refarray = 0xC;
goto load_indx;
case GX_LOAD_INDX_B: //used for normal matrices
refarray = 0xD;
goto load_indx;
case GX_LOAD_INDX_C: //used for postmatrices
refarray = 0xE;
goto load_indx;
case GX_LOAD_INDX_D: //used for lights
refarray = 0xF;
goto load_indx;
load_indx:
if (src.size() < 4)
goto end;
totalCycles += 6;
if (is_preprocess)
PreprocessIndexedXF(src.Read<u32>(), refarray);
else
LoadIndexedXF(src.Read<u32>(), refarray);
break;
case GX_CMD_CALL_DL:
{
if (src.size() < 8)
goto end;
u32 address = src.Read<u32>();
u32 count = src.Read<u32>();
if (in_display_list)
{
totalCycles += 6;
WARN_LOG(VIDEO,"recursive display list detected");
}
else
{
if (is_preprocess)
InterpretDisplayListPreprocess(address, count);
else
totalCycles += 6 + InterpretDisplayList(address, count);
}
}
break;
case GX_CMD_UNKNOWN_METRICS: // zelda 4 swords calls it and checks the metrics registers after that
totalCycles += 6;
DEBUG_LOG(VIDEO, "GX 0x44: %08x", cmd_byte);
break;
case GX_CMD_INVL_VC: // Invalidate Vertex Cache
totalCycles += 6;
DEBUG_LOG(VIDEO, "Invalidate (vertex cache?)");
break;
case GX_LOAD_BP_REG: //0x61
// In skipped_frame case: We have to let BP writes through because they set
// tokens and stuff. TODO: Call a much simplified LoadBPReg instead.
{
if (src.size() < 4)
goto end;
totalCycles += 12;
u32 bp_cmd = src.Read<u32>();
if (is_preprocess)
{
LoadBPRegPreprocess(bp_cmd);
}
else
{
LoadBPReg(bp_cmd);
INCSTAT(stats.thisFrame.numBPLoads);
}
}
break;
// draw primitives
default:
if ((cmd_byte & 0xC0) == 0x80)
{
// load vertices
if (src.size() < 2)
goto end;
u16 num_vertices = src.Read<u16>();
if (is_preprocess)
{
size_t size = num_vertices * VertexLoaderManager::GetVertexSize(cmd_byte & GX_VAT_MASK, is_preprocess);
if (src.size() < size)
goto end;
src.Skip(size);
}
else
{
int bytes = VertexLoaderManager::RunVertices(
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
num_vertices,
src,
g_bSkipCurrentFrame);
if (bytes < 0)
goto end;
else
src.Skip(bytes);
}
totalCycles += 1600;
}
else
{
UnknownOpcode(cmd_byte, opcodeStart, is_preprocess);
totalCycles += 1;
}
break;
}
totalCycles += cycles;
// Display lists get added directly into the FIFO stream
if (!is_preprocess && g_bRecordFifoData && cmd_byte != GX_CMD_CALL_DL)
{
u8* opcodeEnd;
src.WritePointer(&opcodeEnd);
FifoRecorder::GetInstance().WriteGPCommand(opcodeStart, u32(opcodeEnd - opcodeStart));
}
}
return totalCycles;
end:
if (cycles)
{
*cycles = totalCycles;
}
return opcodeStart;
}
void OpcodeDecoder_Preprocess(u8 *end, bool in_display_list)
{
while (true)
{
u8* old = g_video_buffer_pp_read_ptr;
u32 cycles = Decode</*is_preprocess*/ true, &g_video_buffer_pp_read_ptr>(end, in_display_list);
if (cycles == 0)
{
g_video_buffer_pp_read_ptr = old;
break;
}
}
}
template u8* OpcodeDecoder_Run<true>(DataReader src, u32* cycles, bool in_display_list);
template u8* OpcodeDecoder_Run<false>(DataReader src, u32* cycles, bool in_display_list);

View File

@ -5,6 +5,7 @@
#pragma once
#include "Common/CommonTypes.h"
#include "VideoCommon/DataReader.h"
#define GX_NOP 0x00
@ -40,5 +41,6 @@ extern bool g_bRecordFifoData;
void OpcodeDecoder_Init();
void OpcodeDecoder_Shutdown();
u32 OpcodeDecoder_Run(u8* end, bool in_display_list);
void OpcodeDecoder_Preprocess(u8* end, bool in_display_list);
template <bool is_preprocess = false>
u8* OpcodeDecoder_Run(DataReader src, u32* cycles, bool in_display_list);

View File

@ -45,6 +45,10 @@ int colElements[2];
GC_ALIGNED128(float posScale[4]);
GC_ALIGNED64(float tcScale[8][2]);
// This pointer is used as the source/dst for all fixed function loader calls
u8* g_video_buffer_read_ptr;
u8* g_vertex_manager_write_ptr;
static const float fractionTable[32] = {
1.0f / (1U << 0), 1.0f / (1U << 1), 1.0f / (1U << 2), 1.0f / (1U << 3),
1.0f / (1U << 4), 1.0f / (1U << 5), 1.0f / (1U << 6), 1.0f / (1U << 7),
@ -93,8 +97,8 @@ static void LOADERDECL TexMtx_Write_Float4()
{
#if _M_SSE >= 0x200
__m128 output = _mm_cvtsi32_ss(_mm_castsi128_ps(_mm_setzero_si128()), s_curtexmtx[s_texmtxwrite++]);
_mm_storeu_ps((float*)VertexManager::s_pCurBufferPointer, _mm_shuffle_ps(output, output, 0x45 /* 1, 1, 0, 1 */));
VertexManager::s_pCurBufferPointer += sizeof(float) * 4;
_mm_storeu_ps((float*)g_vertex_manager_write_ptr, _mm_shuffle_ps(output, output, 0x45 /* 1, 1, 0, 1 */));
g_vertex_manager_write_ptr += sizeof(float) * 4;
#else
DataWrite(0.f);
DataWrite(0.f);
@ -487,10 +491,13 @@ void VertexLoader::ConvertVertices ( int count )
#endif
}
void VertexLoader::RunVertices(const VAT& vat, int primitive, int const count)
int VertexLoader::RunVertices(const VAT& vat, int primitive, int count, DataReader src, DataReader dst)
{
dst.WritePointer(&g_vertex_manager_write_ptr);
src.WritePointer(&g_video_buffer_read_ptr);
SetupRunVertices(vat, primitive, count);
ConvertVertices(count);
return count;
}
void VertexLoader::SetVAT(const VAT& vat)

View File

@ -18,6 +18,7 @@
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/NativeVertexFormat.h"
#include "VideoCommon/VertexLoaderUtils.h"
#if _M_SSE >= 0x401
#include <smmintrin.h>
@ -117,7 +118,7 @@ public:
{ return m_native_vtx_decl; }
void SetupRunVertices(const VAT& vat, int primitive, int const count);
void RunVertices(const VAT& vat, int primitive, int count);
int RunVertices(const VAT& vat, int primitive, int count, DataReader src, DataReader dst);
// For debugging / profiling
void AppendToString(std::string *dest) const;
@ -193,9 +194,9 @@ __forceinline void Vertex_Read_SSSE3(const T* pData, __m128 scale)
{
coords = _mm_shuffle_epi8(coords, threeIn ? kMaskSwap32_3 : kMaskSwap32_2);
if (threeOut)
_mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, coords);
_mm_storeu_si128((__m128i*)g_vertex_manager_write_ptr, coords);
else
_mm_storel_epi64((__m128i*)VertexManager::s_pCurBufferPointer, coords);
_mm_storel_epi64((__m128i*)g_vertex_manager_write_ptr, coords);
}
else
{
@ -212,11 +213,11 @@ __forceinline void Vertex_Read_SSSE3(const T* pData, __m128 scale)
__m128 out = _mm_mul_ps(_mm_cvtepi32_ps(coords), scale);
if (threeOut)
_mm_storeu_ps((float*)VertexManager::s_pCurBufferPointer, out);
_mm_storeu_ps((float*)g_vertex_manager_write_ptr, out);
else
_mm_storel_pi((__m64*)VertexManager::s_pCurBufferPointer, out);
_mm_storel_pi((__m64*)g_vertex_manager_write_ptr, out);
}
VertexManager::s_pCurBufferPointer += sizeof(float) * (2 + threeOut);
g_vertex_manager_write_ptr += sizeof(float) * (2 + threeOut);
}
#endif
#endif

View File

@ -130,24 +130,23 @@ static VertexLoader* RefreshLoader(int vtx_attr_group, CPState* state)
return loader;
}
bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size, bool skip_drawing)
int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bool skip_drawing)
{
if (!count)
return true;
return 0;
CPState* state = &g_main_cp_state;
VertexLoader* loader = RefreshLoader(vtx_attr_group, state);
size_t size = count * loader->GetVertexSize();
if (buf_size < size)
return false;
int size = count * loader->GetVertexSize();
if ((int)src.size() < size)
return -1;
if (skip_drawing || (bpmem.genMode.cullmode == GenMode::CULL_ALL && primitive < 5))
{
// if cull mode is CULL_ALL, ignore triangles and quads
DataSkip((u32)size);
return true;
return size;
}
NativeVertexFormat* native = loader->GetNativeVertexFormat();
@ -157,16 +156,18 @@ bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size,
VertexManager::Flush();
s_current_vtx_fmt = native;
VertexManager::PrepareForAdditionalData(primitive, count,
DataReader dst = VertexManager::PrepareForAdditionalData(primitive, count,
loader->GetNativeVertexDeclaration().stride);
loader->RunVertices(state->vtx_attr[vtx_attr_group], primitive, count);
count = loader->RunVertices(state->vtx_attr[vtx_attr_group], primitive, count, src, dst);
IndexGenerator::AddIndices(primitive, count);
VertexManager::FlushData(count, loader->GetNativeVertexDeclaration().stride);
ADDSTAT(stats.thisFrame.numPrims, count);
INCSTAT(stats.thisFrame.numPrimitiveJoins);
return true;
return size;
}
int GetVertexSize(int vtx_attr_group, bool preprocess)

View File

@ -7,6 +7,7 @@
#include <string>
#include "Common/CommonTypes.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/NativeVertexFormat.h"
namespace VertexLoaderManager
@ -17,8 +18,9 @@ namespace VertexLoaderManager
void MarkAllDirty();
int GetVertexSize(int vtx_attr_group, bool preprocess);
// Returns false if buf_size is insufficient.
bool RunVertices(int vtx_attr_group, int primitive, int count, size_t buf_size, bool skip_drawing = false);
// Returns -1 if buf_size is insufficient, else the amount of bytes consumed
int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bool skip_drawing = false);
// For debugging
void AppendListToString(std::string *dest);

View File

@ -0,0 +1,94 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2
// Refer to the license.txt file included.
#include "Common/Common.h"
#include "VideoCommon/VertexManagerBase.h"
extern u8* g_video_buffer_read_ptr;
extern u8* g_vertex_manager_write_ptr;
__forceinline void DataSkip(u32 skip)
{
g_video_buffer_read_ptr += skip;
}
// probably unnecessary
template <int count>
__forceinline void DataSkip()
{
g_video_buffer_read_ptr += count;
}
template <typename T>
__forceinline T DataPeek(int _uOffset, u8** bufp = &g_video_buffer_read_ptr)
{
auto const result = Common::FromBigEndian(*reinterpret_cast<T*>(*bufp + _uOffset));
return result;
}
// TODO: kill these
__forceinline u8 DataPeek8(int _uOffset)
{
return DataPeek<u8>(_uOffset);
}
__forceinline u16 DataPeek16(int _uOffset)
{
return DataPeek<u16>(_uOffset);
}
__forceinline u32 DataPeek32(int _uOffset)
{
return DataPeek<u32>(_uOffset);
}
template <typename T>
__forceinline T DataRead(u8** bufp = &g_video_buffer_read_ptr)
{
auto const result = DataPeek<T>(0, bufp);
*bufp += sizeof(T);
return result;
}
// TODO: kill these
__forceinline u8 DataReadU8()
{
return DataRead<u8>();
}
__forceinline s8 DataReadS8()
{
return DataRead<s8>();
}
__forceinline u16 DataReadU16()
{
return DataRead<u16>();
}
__forceinline u32 DataReadU32()
{
return DataRead<u32>();
}
__forceinline u32 DataReadU32Unswapped()
{
u32 tmp = *(u32*)g_video_buffer_read_ptr;
g_video_buffer_read_ptr += 4;
return tmp;
}
__forceinline u8* DataGetPosition()
{
return g_video_buffer_read_ptr;
}
template <typename T>
__forceinline void DataWrite(T data)
{
*(T*)g_vertex_manager_write_ptr = data;
g_vertex_manager_write_ptr += sizeof(T);
}

View File

@ -14,7 +14,7 @@
#include "VideoCommon/VideoCommon.h"
// warning: mapping buffer should be disabled to use this
#define LOG_NORM() // PRIM_LOG("norm: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]);
#define LOG_NORM() // PRIM_LOG("norm: %f %f %f, ", ((float*)g_vertex_manager_write_ptr)[-3], ((float*)g_vertex_manager_write_ptr)[-2], ((float*)g_vertex_manager_write_ptr)[-1]);
VertexLoader_Normal::Set VertexLoader_Normal::m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT];
@ -43,13 +43,14 @@ template <typename T, int N>
__forceinline void ReadIndirect(const T* data)
{
static_assert(3 == N || 9 == N, "N is only sane as 3 or 9!");
DataWriter dst;
DataReader dst(g_vertex_manager_write_ptr, nullptr);
for (int i = 0; i != N; ++i)
{
dst.Write(FracAdjust(Common::FromBigEndian(data[i])));
}
dst.WritePointer(&g_vertex_manager_write_ptr);
LOG_NORM();
}

View File

@ -13,7 +13,7 @@
#include "VideoCommon/VideoCommon.h"
// Thoughts on the implementation of a vertex loader compiler.
// s_pCurBufferPointer should definitely be in a register.
// g_vertex_manager_write_ptr should definitely be in a register.
// Could load the position scale factor in XMM7, for example.
// The pointer inside DataReadU8 in another.
@ -75,12 +75,14 @@ void LOADERDECL Pos_ReadDirect()
{
static_assert(N <= 3, "N > 3 is not sane!");
auto const scale = posScale[0];
DataWriter dst;
DataReader src;
DataReader dst(g_vertex_manager_write_ptr, nullptr);
DataReader src(g_video_buffer_read_ptr, nullptr);
for (int i = 0; i < 3; ++i)
dst.Write(i<N ? PosScale(src.Read<T>(), scale) : 0.f);
dst.WritePointer(&g_vertex_manager_write_ptr);
src.WritePointer(&g_video_buffer_read_ptr);
LOG_VTX();
}
@ -93,11 +95,12 @@ void LOADERDECL Pos_ReadIndex()
auto const index = DataRead<I>();
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION]));
auto const scale = posScale[0];
DataWriter dst;
DataReader dst(g_vertex_manager_write_ptr, nullptr);
for (int i = 0; i < 3; ++i)
dst.Write(i<N ? PosScale(Common::FromBigEndian(data[i]), scale) : 0.f);
dst.WritePointer(&g_vertex_manager_write_ptr);
LOG_VTX();
}

View File

@ -19,14 +19,14 @@ template <>
__forceinline void LOG_TEX<1>()
{
// warning: mapping buffer should be disabled to use this
// PRIM_LOG("tex: %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-1]);
// PRIM_LOG("tex: %f, ", ((float*)g_vertex_manager_write_ptr)[-1]);
}
template <>
__forceinline void LOG_TEX<2>()
{
// warning: mapping buffer should be disabled to use this
// PRIM_LOG("tex: %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]);
// PRIM_LOG("tex: %f %f, ", ((float*)g_vertex_manager_write_ptr)[-2], ((float*)g_vertex_manager_write_ptr)[-1]);
}
static void LOADERDECL TexCoord_Read_Dummy()
@ -50,12 +50,14 @@ template <typename T, int N>
void LOADERDECL TexCoord_ReadDirect()
{
auto const scale = tcScale[tcIndex][0];
DataWriter dst;
DataReader src;
DataReader dst(g_vertex_manager_write_ptr, nullptr);
DataReader src(g_video_buffer_read_ptr, nullptr);
for (int i = 0; i != N; ++i)
dst.Write(TCScale(src.Read<T>(), scale));
dst.WritePointer(&g_vertex_manager_write_ptr);
src.WritePointer(&g_video_buffer_read_ptr);
LOG_TEX<N>();
++tcIndex;
@ -70,11 +72,12 @@ void LOADERDECL TexCoord_ReadIndex()
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_TEXCOORD0 + tcIndex]
+ (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + tcIndex]));
auto const scale = tcScale[tcIndex][0];
DataWriter dst;
DataReader dst(g_vertex_manager_write_ptr, nullptr);
for (int i = 0; i != N; ++i)
dst.Write(TCScale(Common::FromBigEndian(data[i]), scale));
dst.WritePointer(&g_vertex_manager_write_ptr);
LOG_TEX<N>();
++tcIndex;
}

View File

@ -51,7 +51,7 @@ u32 VertexManager::GetRemainingSize()
return (u32)(s_pEndBufferPointer - s_pCurBufferPointer);
}
void VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 stride)
DataReader VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 stride)
{
// The SSE vertex loader can write up to 4 bytes past the end
u32 const needed_vertex_bytes = count * stride + 4;
@ -83,6 +83,13 @@ void VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 strid
g_vertex_manager->ResetBuffer(stride);
IsFlushed = false;
}
return DataReader(s_pCurBufferPointer, s_pEndBufferPointer);
}
void VertexManager::FlushData(u32 count, u32 stride)
{
s_pCurBufferPointer += count * stride;
}
u32 VertexManager::GetRemainingIndices(int primitive)

View File

@ -3,6 +3,7 @@
#include <vector>
#include "Common/CommonFuncs.h"
#include "Common/CommonTypes.h"
#include "VideoCommon/DataReader.h"
class NativeVertexFormat;
class PointerWrap;
@ -31,21 +32,14 @@ public:
// needs to be virtual for DX11's dtor
virtual ~VertexManager();
static u8 *s_pCurBufferPointer;
static u8 *s_pBaseBufferPointer;
static u8 *s_pEndBufferPointer;
static u32 GetRemainingSize();
static void PrepareForAdditionalData(int primitive, u32 count, u32 stride);
static u32 GetRemainingIndices(int primitive);
static DataReader PrepareForAdditionalData(int primitive, u32 count, u32 stride);
static void FlushData(u32 count, u32 stride);
static void Flush();
virtual ::NativeVertexFormat* CreateNativeVertexFormat() = 0;
static void DoState(PointerWrap& p);
virtual void CreateDeviceObjects(){}
virtual void DestroyDeviceObjects(){}
protected:
virtual void vDoState(PointerWrap& p) { }
@ -54,12 +48,20 @@ protected:
virtual void ResetBuffer(u32 stride) = 0;
static u8* s_pCurBufferPointer;
static u8* s_pBaseBufferPointer;
static u8* s_pEndBufferPointer;
static u32 GetRemainingSize();
static u32 GetRemainingIndices(int primitive);
private:
static bool IsFlushed;
// virtual void Draw(u32 stride, bool alphapass) = 0;
// temp
virtual void vFlush(bool useDstAlpha) = 0;
virtual void CreateDeviceObjects() {}
virtual void DestroyDeviceObjects() {}
};
extern VertexManager *g_vertex_manager;

View File

@ -61,7 +61,7 @@ struct TargetRectangle : public MathUtil::Rectangle<int>
#endif
// warning: mapping buffer should be disabled to use this
// #define LOG_VTX() DEBUG_LOG(VIDEO, "vtx: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]);
// #define LOG_VTX() DEBUG_LOG(VIDEO, "vtx: %f %f %f, ", ((float*)g_vertex_manager_write_ptr)[-3], ((float*)g_vertex_manager_write_ptr)[-2], ((float*)g_vertex_manager_write_ptr)[-1]);
#define LOG_VTX()

View File

@ -117,6 +117,7 @@
<ClInclude Include="TextureDecoder.h" />
<ClInclude Include="VertexLoader.h" />
<ClInclude Include="VertexLoaderManager.h" />
<ClInclude Include="VertexLoaderUtils.h" />
<ClInclude Include="VertexLoader_Color.h" />
<ClInclude Include="VertexLoader_Normal.h" />
<ClInclude Include="VertexLoader_Position.h" />
@ -151,4 +152,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

View File

@ -275,6 +275,9 @@
<ClInclude Include="VertexLoaderManager.h">
<Filter>Vertex Loading</Filter>
</ClInclude>
<ClInclude Include="VertexLoaderUtils.h">
<Filter>Vertex Loading</Filter>
</ClInclude>
<ClInclude Include="BoundingBox.h">
<Filter>Util</Filter>
</ClInclude>
@ -285,4 +288,4 @@
<ItemGroup>
<Text Include="CMakeLists.txt" />
</ItemGroup>
</Project>
</Project>

View File

@ -6,6 +6,7 @@
#include "Common/CommonTypes.h"
#include "VideoCommon/CPMemory.h"
#include "VideoCommon/DataReader.h"
// Lighting
@ -273,6 +274,6 @@ struct XFMemory
extern XFMemory xfmem;
void LoadXFReg(u32 transferSize, u32 address);
void LoadXFReg(u32 transferSize, u32 address, DataReader src);
void LoadIndexedXF(u32 val, int array);
void PreprocessIndexedXF(u32 val, int refarray);

View File

@ -19,14 +19,14 @@ static void XFMemWritten(u32 transferSize, u32 baseAddress)
VertexShaderManager::InvalidateXFRange(baseAddress, baseAddress + transferSize);
}
static void XFRegWritten(int transferSize, u32 baseAddress)
static void XFRegWritten(int transferSize, u32 baseAddress, DataReader src)
{
u32 address = baseAddress;
u32 dataIndex = 0;
while (transferSize > 0 && address < 0x1058)
{
u32 newValue = DataPeek<u32>(dataIndex * sizeof(u32));
u32 newValue = src.Peek<u32>(dataIndex * sizeof(u32));
u32 nextAddress = address + 1;
switch (address)
@ -193,7 +193,7 @@ static void XFRegWritten(int transferSize, u32 baseAddress)
}
}
void LoadXFReg(u32 transferSize, u32 baseAddress)
void LoadXFReg(u32 transferSize, u32 baseAddress, DataReader src)
{
// do not allow writes past registers
if (baseAddress + transferSize > 0x1058)
@ -229,17 +229,17 @@ void LoadXFReg(u32 transferSize, u32 baseAddress)
XFMemWritten(xfMemTransferSize, xfMemBase);
for (u32 i = 0; i < xfMemTransferSize; i++)
{
((u32*)&xfmem)[xfMemBase + i] = DataRead<u32>();
((u32*)&xfmem)[xfMemBase + i] = src.Read<u32>();
}
}
// write to XF regs
if (transferSize > 0)
{
XFRegWritten(transferSize, baseAddress);
XFRegWritten(transferSize, baseAddress, src);
for (u32 i = 0; i < transferSize; i++)
{
((u32*)&xfmem)[baseAddress + i] = DataRead<u32>();
((u32*)&xfmem)[baseAddress + i] = src.Read<u32>();
}
}
}

View File

@ -74,12 +74,14 @@ protected:
void ResetPointers()
{
g_video_buffer_read_ptr = &input_memory[0];
VertexManager::s_pCurBufferPointer = &output_memory[0];
m_input_pos = m_output_pos = 0;
src = DataReader(input_memory, input_memory+sizeof(input_memory));
dst = DataReader(output_memory, output_memory+sizeof(output_memory));
}
u32 m_input_pos, m_output_pos;
DataReader src;
DataReader dst;
TVtxDesc m_vtx_desc;
VAT m_vtx_attr;
@ -103,7 +105,9 @@ TEST_F(VertexLoaderTest, PositionDirectFloatXYZ)
Input(0.0f); Input(0.0f); Input(1.0f);
// Convert 4 points. "7" -> primitive are points.
loader.RunVertices(m_vtx_attr, 7, 4);
int count = loader.RunVertices(m_vtx_attr, 7, 4, src, dst);
src.Skip(4 * loader.GetVertexSize());
dst.Skip(count * loader.GetNativeVertexDeclaration().stride);
ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(0.0f);
ExpectOut(1.0f); ExpectOut(0.0f); ExpectOut(0.0f);
@ -113,7 +117,9 @@ TEST_F(VertexLoaderTest, PositionDirectFloatXYZ)
// Test that scale does nothing for floating point inputs.
Input(1.0f); Input(2.0f); Input(4.0f);
m_vtx_attr.g0.PosFrac = 1;
loader.RunVertices(m_vtx_attr, 7, 1);
count = loader.RunVertices(m_vtx_attr, 7, 1, src, dst);
src.Skip(1 * loader.GetVertexSize());
dst.Skip(count * loader.GetNativeVertexDeclaration().stride);
ExpectOut(1.0f); ExpectOut(2.0f); ExpectOut(4.0f);
}
@ -136,7 +142,9 @@ TEST_F(VertexLoaderTest, PositionDirectU16XY)
Input<u16>(12345); Input<u16>(54321);
// Convert 5 points. "7" -> primitive are points.
loader.RunVertices(m_vtx_attr, 7, 5);
int count = loader.RunVertices(m_vtx_attr, 7, 5, src, dst);
src.Skip(5 * loader.GetVertexSize());
dst.Skip(count * loader.GetNativeVertexDeclaration().stride);
ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(0.0f);
ExpectOut(1.0f); ExpectOut(2.0f); ExpectOut(0.0f);
@ -147,7 +155,9 @@ TEST_F(VertexLoaderTest, PositionDirectU16XY)
// Test that scale works on U16 inputs.
Input<u16>(42); Input<u16>(24);
m_vtx_attr.g0.PosFrac = 1;
loader.RunVertices(m_vtx_attr, 7, 1);
count = loader.RunVertices(m_vtx_attr, 7, 1, src, dst);
src.Skip(1 * loader.GetVertexSize());
dst.Skip(count * loader.GetNativeVertexDeclaration().stride);
ExpectOut(21.0f); ExpectOut(12.0f); ExpectOut(0.0f);
}
@ -165,7 +175,9 @@ TEST_F(VertexLoaderTest, PositionDirectFloatXYZSpeed)
for (int i = 0; i < 1000; ++i)
{
ResetPointers();
loader.RunVertices(m_vtx_attr, 7, 100000);
int count = loader.RunVertices(m_vtx_attr, 7, 100000, src, dst);
src.Skip(100000 * loader.GetVertexSize());
dst.Skip(count * loader.GetNativeVertexDeclaration().stride);
}
}
@ -183,7 +195,9 @@ TEST_F(VertexLoaderTest, PositionDirectU16XYSpeed)
for (int i = 0; i < 1000; ++i)
{
ResetPointers();
loader.RunVertices(m_vtx_attr, 7, 100000);
int count = loader.RunVertices(m_vtx_attr, 7, 100000, src, dst);
src.Skip(100000 * loader.GetVertexSize());
dst.Skip(count * loader.GetNativeVertexDeclaration().stride);
}
}
@ -244,6 +258,8 @@ TEST_F(VertexLoaderTest, LargeFloatVertexSpeed)
for (int i = 0; i < 100; ++i)
{
ResetPointers();
loader.RunVertices(m_vtx_attr, 7, 100000);
int count = loader.RunVertices(m_vtx_attr, 7, 100000, src, dst);
src.Skip(100000 * loader.GetVertexSize());
dst.Skip(count * loader.GetNativeVertexDeclaration().stride);
}
}