From 7c486a8c243c3bb223b1d31aab34bf039c6a02a1 Mon Sep 17 00:00:00 2001 From: degasus Date: Sat, 13 Dec 2014 10:57:46 +0100 Subject: [PATCH] VertexLoader: Add a VertexLoader pointer to each function call --- Source/Core/VideoCommon/BoundingBox.cpp | 4 +- Source/Core/VideoCommon/BoundingBox.h | 4 +- Source/Core/VideoCommon/VertexLoader.cpp | 110 ++++++++---------- Source/Core/VideoCommon/VertexLoader.h | 30 +++-- .../Core/VideoCommon/VertexLoader_Color.cpp | 102 ++++++++-------- Source/Core/VideoCommon/VertexLoader_Color.h | 38 +++--- .../Core/VideoCommon/VertexLoader_Normal.cpp | 12 +- .../VideoCommon/VertexLoader_Position.cpp | 16 +-- .../VideoCommon/VertexLoader_TextCoord.cpp | 34 +++--- 9 files changed, 178 insertions(+), 172 deletions(-) diff --git a/Source/Core/VideoCommon/BoundingBox.cpp b/Source/Core/VideoCommon/BoundingBox.cpp index 4b4400f2e7..9ea0fa5689 100644 --- a/Source/Core/VideoCommon/BoundingBox.cpp +++ b/Source/Core/VideoCommon/BoundingBox.cpp @@ -30,7 +30,7 @@ static TVtxDesc vertexDesc; static PortableVertexDeclaration vertexDecl; // Gets the pointer to the current buffer position -void LOADERDECL SetVertexBufferPosition() +void LOADERDECL SetVertexBufferPosition(VertexLoader* loader) { bufferPos = g_vertex_manager_write_ptr; } @@ -76,7 +76,7 @@ void Prepare(const VAT & vat, int primitive, const TVtxDesc & vtxDesc, const Por } // Updates the bounding box -void LOADERDECL Update() +void LOADERDECL Update(VertexLoader* loader) { if (!active) return; diff --git a/Source/Core/VideoCommon/BoundingBox.h b/Source/Core/VideoCommon/BoundingBox.h index d6952b184e..afff0e3e72 100644 --- a/Source/Core/VideoCommon/BoundingBox.h +++ b/Source/Core/VideoCommon/BoundingBox.h @@ -31,8 +31,8 @@ extern u8 posMtxIdx; // Texture matrix indexes extern u8 texMtxIdx[8]; -void LOADERDECL SetVertexBufferPosition(); -void LOADERDECL Update(); +void LOADERDECL SetVertexBufferPosition(VertexLoader* loader); +void LOADERDECL Update(VertexLoader* loader); void Prepare(const VAT & vat, int primitive, const TVtxDesc & vtxDesc, const PortableVertexDeclaration & vtxDecl); // Save state diff --git a/Source/Core/VideoCommon/VertexLoader.cpp b/Source/Core/VideoCommon/VertexLoader.cpp index 32cb95607b..d7c8fe3cfa 100644 --- a/Source/Core/VideoCommon/VertexLoader.cpp +++ b/Source/Core/VideoCommon/VertexLoader.cpp @@ -29,79 +29,64 @@ #define inline #endif -// Matrix components are first in GC format but later in PC format - we need to store it temporarily -// when decoding each vertex. -static u8 s_curposmtx = g_main_cp_state.matrix_index_a.PosNormalMtxIdx; -static u8 s_curtexmtx[8]; -static int s_texmtxwrite = 0; -static int s_texmtxread = 0; - -// Vertex loaders read these. Although the scale ones should be baked into the shader. -int tcIndex; -int colIndex; -int colElements[2]; -// Duplicated (4x and 2x respectively) and used in SSE code in the vertex loader JIT -GC_ALIGNED128(float posScale[4]); -GC_ALIGNED64(float tcScale[8][2]); - // This pointer is used as the source/dst for all fixed function loader calls u8* g_video_buffer_read_ptr; u8* g_vertex_manager_write_ptr; -static const float fractionTable[32] = { - 1.0f / (1U << 0), 1.0f / (1U << 1), 1.0f / (1U << 2), 1.0f / (1U << 3), - 1.0f / (1U << 4), 1.0f / (1U << 5), 1.0f / (1U << 6), 1.0f / (1U << 7), - 1.0f / (1U << 8), 1.0f / (1U << 9), 1.0f / (1U << 10), 1.0f / (1U << 11), - 1.0f / (1U << 12), 1.0f / (1U << 13), 1.0f / (1U << 14), 1.0f / (1U << 15), - 1.0f / (1U << 16), 1.0f / (1U << 17), 1.0f / (1U << 18), 1.0f / (1U << 19), - 1.0f / (1U << 20), 1.0f / (1U << 21), 1.0f / (1U << 22), 1.0f / (1U << 23), - 1.0f / (1U << 24), 1.0f / (1U << 25), 1.0f / (1U << 26), 1.0f / (1U << 27), - 1.0f / (1U << 28), 1.0f / (1U << 29), 1.0f / (1U << 30), 1.0f / (1U << 31), -}; - using namespace Gen; -static void LOADERDECL PosMtx_ReadDirect_UByte() + +void* VertexLoader::operator new (size_t size) { - BoundingBox::posMtxIdx = s_curposmtx = DataReadU8() & 0x3f; - PRIM_LOG("posmtx: %d, ", s_curposmtx); + return AllocateAlignedMemory(size, 16); } -static void LOADERDECL PosMtx_Write() +void VertexLoader::operator delete (void *p) +{ + FreeAlignedMemory(p); +} + +static void LOADERDECL PosMtx_ReadDirect_UByte(VertexLoader* loader) +{ + BoundingBox::posMtxIdx = loader->m_curposmtx = DataReadU8() & 0x3f; + PRIM_LOG("posmtx: %d, ", loader->m_curposmtx); +} + +static void LOADERDECL PosMtx_Write(VertexLoader* loader) { // u8, 0, 0, 0 - DataWrite(s_curposmtx); + DataWrite(loader->m_curposmtx); } -static void LOADERDECL TexMtx_ReadDirect_UByte() +static void LOADERDECL TexMtx_ReadDirect_UByte(VertexLoader* loader) { - BoundingBox::texMtxIdx[s_texmtxread] = s_curtexmtx[s_texmtxread] = DataReadU8() & 0x3f; + BoundingBox::texMtxIdx[loader->m_texmtxread] = loader->m_curtexmtx[loader->m_texmtxread] = DataReadU8() & 0x3f; - PRIM_LOG("texmtx%d: %d, ", s_texmtxread, s_curtexmtx[s_texmtxread]); - s_texmtxread++; + PRIM_LOG("texmtx%d: %d, ", loader->m_texmtxread, loader->m_curtexmtx[loader->m_texmtxread]); + loader->m_texmtxread++; } -static void LOADERDECL TexMtx_Write_Float() +static void LOADERDECL TexMtx_Write_Float(VertexLoader* loader) { - DataWrite(float(s_curtexmtx[s_texmtxwrite++])); + DataWrite(float(loader->m_curtexmtx[loader->m_texmtxwrite++])); } -static void LOADERDECL TexMtx_Write_Float2() +static void LOADERDECL TexMtx_Write_Float2(VertexLoader* loader) { DataWrite(0.f); - DataWrite(float(s_curtexmtx[s_texmtxwrite++])); + DataWrite(float(loader->m_curtexmtx[loader->m_texmtxwrite++])); } -static void LOADERDECL TexMtx_Write_Float4() +static void LOADERDECL TexMtx_Write_Float4(VertexLoader* loader) { #if _M_SSE >= 0x200 - __m128 output = _mm_cvtsi32_ss(_mm_castsi128_ps(_mm_setzero_si128()), s_curtexmtx[s_texmtxwrite++]); + __m128 output = _mm_cvtsi32_ss(_mm_castsi128_ps(_mm_setzero_si128()), loader->m_curtexmtx[loader->m_texmtxwrite++]); _mm_storeu_ps((float*)g_vertex_manager_write_ptr, _mm_shuffle_ps(output, output, 0x45 /* 1, 1, 0, 1 */)); g_vertex_manager_write_ptr += sizeof(float) * 4; #else DataWrite(0.f); DataWrite(0.f); - DataWrite(float(s_curtexmtx[s_texmtxwrite++])); + DataWrite(float(loader->m_curtexmtx[loader->m_texmtxwrite++])); // Just to fill out with 0. DataWrite(0.f); #endif @@ -123,6 +108,14 @@ VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr) m_numPipelineStages = 0; CompileVertexTranslator(); #endif + + // generate frac factors + m_posScale[0] = m_posScale[1] = m_posScale[2] = m_posScale[3] = 1.0f / (1U << m_VtxAttr.PosFrac); + for (int i = 0; i < 8; i++) + m_tcScale[i][0] = m_tcScale[i][1] = 1.0f / (1U << m_VtxAttr.texCoord[i].Frac); + + for (int i = 0; i < 2; i++) + m_colElements[i] = m_VtxAttr.color[i].Elements; } VertexLoader::~VertexLoader() @@ -143,11 +136,14 @@ void VertexLoader::CompileVertexTranslator() m_compiledCode = GetCodePtr(); // We only use RAX (caller saved) and RBX (callee saved). - ABI_PushRegistersAndAdjustStack({RBX}, 8); + ABI_PushRegistersAndAdjustStack({RBX, RBP}, 8); // save count MOV(64, R(RBX), R(ABI_PARAM1)); + // save loader + MOV(64, R(RBP), R(ABI_PARAM2)); + // Start loop here const u8 *loop_start = GetCodePtr(); @@ -155,17 +151,17 @@ void VertexLoader::CompileVertexTranslator() if (m_VtxDesc.Tex0Coord || m_VtxDesc.Tex1Coord || m_VtxDesc.Tex2Coord || m_VtxDesc.Tex3Coord || m_VtxDesc.Tex4Coord || m_VtxDesc.Tex5Coord || m_VtxDesc.Tex6Coord || m_VtxDesc.Tex7Coord) { - WriteSetVariable(32, &tcIndex, Imm32(0)); + WriteSetVariable(32, &m_tcIndex, Imm32(0)); } if (m_VtxDesc.Color0 || m_VtxDesc.Color1) { - WriteSetVariable(32, &colIndex, Imm32(0)); + WriteSetVariable(32, &m_colIndex, Imm32(0)); } if (m_VtxDesc.Tex0MatIdx || m_VtxDesc.Tex1MatIdx || m_VtxDesc.Tex2MatIdx || m_VtxDesc.Tex3MatIdx || m_VtxDesc.Tex4MatIdx || m_VtxDesc.Tex5MatIdx || m_VtxDesc.Tex6MatIdx || m_VtxDesc.Tex7MatIdx) { - WriteSetVariable(32, &s_texmtxwrite, Imm32(0)); - WriteSetVariable(32, &s_texmtxread, Imm32(0)); + WriteSetVariable(32, &m_texmtxwrite, Imm32(0)); + WriteSetVariable(32, &m_texmtxread, Imm32(0)); } #else // Reset pipeline @@ -405,7 +401,7 @@ void VertexLoader::CompileVertexTranslator() SUB(64, R(RBX), Imm8(1)); J_CC(CC_NZ, loop_start); - ABI_PopRegistersAndAdjustStack({RBX}, 8); + ABI_PopRegistersAndAdjustStack({RBX, RBP}, 8); RET(); #endif } @@ -413,6 +409,7 @@ void VertexLoader::CompileVertexTranslator() void VertexLoader::WriteCall(TPipelineFunction func) { #ifdef USE_VERTEX_LOADER_JIT + MOV(64, R(ABI_PARAM1), R(RBP)); ABI_CallFunction((const void*)func); #else m_PipelineStages[m_numPipelineStages++] = func; @@ -441,13 +438,6 @@ void VertexLoader::SetupRunVertices(int primitive, int const count) { m_numLoadedVertices += count; - posScale[0] = posScale[1] = posScale[2] = posScale[3] = fractionTable[m_VtxAttr.PosFrac]; - if (m_native_components & VB_HAS_UVALL) - for (int i = 0; i < 8; i++) - tcScale[i][0] = tcScale[i][1] = fractionTable[m_VtxAttr.texCoord[i].Frac]; - for (int i = 0; i < 2; i++) - colElements[i] = m_VtxAttr.color[i].Elements; - // Prepare bounding box if (!g_ActiveConfig.backend_info.bSupportsBBox) BoundingBox::Prepare(m_vat, primitive, m_VtxDesc, m_native_vtx_decl); @@ -458,16 +448,16 @@ void VertexLoader::ConvertVertices ( int count ) #ifdef USE_VERTEX_LOADER_JIT if (count > 0) { - ((void (*)(int))(void*)m_compiledCode)(count); + ((void (*)(int, VertexLoader* loader))(void*)m_compiledCode)(count, this); } #else for (int s = 0; s < count; s++) { - tcIndex = 0; - colIndex = 0; - s_texmtxwrite = s_texmtxread = 0; + m_tcIndex = 0; + m_colIndex = 0; + m_texmtxwrite = m_texmtxread = 0; for (int i = 0; i < m_numPipelineStages; i++) - m_PipelineStages[i](); + m_PipelineStages[i](this); PRIM_LOG("\n"); } #endif diff --git a/Source/Core/VideoCommon/VertexLoader.h b/Source/Core/VideoCommon/VertexLoader.h index 0d886735ca..749f738b25 100644 --- a/Source/Core/VideoCommon/VertexLoader.h +++ b/Source/Core/VideoCommon/VertexLoader.h @@ -36,14 +36,8 @@ #define LOADERDECL #endif -typedef void (LOADERDECL *TPipelineFunction)(); - -// They are used for the communication with the loader functions -extern int tcIndex; -extern int colIndex; -extern int colElements[2]; -GC_ALIGNED128(extern float posScale[4]); -GC_ALIGNED64(extern float tcScale[8][2]); +class VertexLoader; +typedef void (LOADERDECL *TPipelineFunction)(VertexLoader* loader); // ARMTODO: This should be done in a better way #ifndef _M_GENERIC @@ -53,6 +47,11 @@ class VertexLoader : public VertexLoaderBase #endif { public: + // This class need a 16 byte alignment. As this is broken on + // MSVC right now (Dec 2014), we use custom allocation. + void* operator new (size_t size); + void operator delete (void *p); + VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr); ~VertexLoader(); @@ -60,6 +59,21 @@ public: std::string GetName() const override { return "OldLoader"; } bool IsInitialized() override { return true; } // This vertex loader supports all formats + // They are used for the communication with the loader functions + // Duplicated (4x and 2x respectively) and used in SSE code in the vertex loader JIT + GC_ALIGNED128(float m_posScale[4]); + GC_ALIGNED64(float m_tcScale[8][2]); + int m_tcIndex; + int m_colIndex; + int m_colElements[2]; + + // Matrix components are first in GC format but later in PC format - we need to store it temporarily + // when decoding each vertex. + u8 m_curposmtx; + u8 m_curtexmtx[8]; + int m_texmtxwrite; + int m_texmtxread; + private: #ifndef USE_VERTEX_LOADER_JIT // Pipeline. diff --git a/Source/Core/VideoCommon/VertexLoader_Color.cpp b/Source/Core/VideoCommon/VertexLoader_Color.cpp index 0120e3bea5..58b1b96f31 100644 --- a/Source/Core/VideoCommon/VertexLoader_Color.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Color.cpp @@ -16,46 +16,46 @@ #define ASHIFT 24 #define AMASK 0xFF000000 -__forceinline void _SetCol(u32 val) +__forceinline void _SetCol(VertexLoader* loader, u32 val) { DataWrite(val); - colIndex++; + loader->m_colIndex++; } //color comes in format BARG in 16 bits //BARG -> AABBGGRR -__forceinline void _SetCol4444(u16 val) +__forceinline void _SetCol4444(VertexLoader* loader, u16 val) { u32 col = (val & 0xF0); // col = 000000R0; col |= (val & 0xF ) << 12; // col |= 0000G000; col |= (((u32)val) & 0xF000) << 8; // col |= 00B00000; col |= (((u32)val) & 0x0F00) << 20; // col |= A0000000; col |= col >> 4; // col = A0B0G0R0 | 0A0B0G0R; - _SetCol(col); + _SetCol(loader, col); } //color comes in format RGBA //RRRRRRGG GGGGBBBB BBAAAAAA -__forceinline void _SetCol6666(u32 val) +__forceinline void _SetCol6666(VertexLoader* loader, u32 val) { u32 col = (val >> 16) & 0xFC; col |= (val >> 2) & 0xFC00; col |= (val << 12) & 0xFC0000; col |= (val << 26) & 0xFC000000; col |= (col >> 6) & 0x03030303; - _SetCol(col); + _SetCol(loader, col); } //color comes in RGB //RRRRRGGG GGGBBBBB -__forceinline void _SetCol565(u16 val) +__forceinline void _SetCol565(VertexLoader* loader, u16 val) { u32 col = (val >> 8) & 0xF8; col |= (val << 5) & 0xFC00; col |=(((u32)val) << 19) & 0xF80000; col |= (col >> 5) & 0x070007; col |= (col >> 6) & 0x000300; - _SetCol(col | AMASK); + _SetCol(loader, col | AMASK); } __forceinline u32 _Read24(const u8 *addr) @@ -69,29 +69,29 @@ __forceinline u32 _Read32(const u8 *addr) } -void LOADERDECL Color_ReadDirect_24b_888() +void LOADERDECL Color_ReadDirect_24b_888(VertexLoader* loader) { - _SetCol(_Read24(DataGetPosition())); + _SetCol(loader, _Read24(DataGetPosition())); DataSkip(3); } -void LOADERDECL Color_ReadDirect_32b_888x() +void LOADERDECL Color_ReadDirect_32b_888x(VertexLoader* loader) { - _SetCol(_Read24(DataGetPosition())); + _SetCol(loader, _Read24(DataGetPosition())); DataSkip(4); } -void LOADERDECL Color_ReadDirect_16b_565() +void LOADERDECL Color_ReadDirect_16b_565(VertexLoader* loader) { - _SetCol565(DataReadU16()); + _SetCol565(loader, DataReadU16()); } -void LOADERDECL Color_ReadDirect_16b_4444() +void LOADERDECL Color_ReadDirect_16b_4444(VertexLoader* loader) { - _SetCol4444(*(u16*)DataGetPosition()); + _SetCol4444(loader, *(u16*)DataGetPosition()); DataSkip(2); } -void LOADERDECL Color_ReadDirect_24b_6666() +void LOADERDECL Color_ReadDirect_24b_6666(VertexLoader* loader) { - _SetCol6666(Common::swap32(DataGetPosition() - 1)); + _SetCol6666(loader, Common::swap32(DataGetPosition() - 1)); DataSkip(3); } // F|RES: i am not 100 percent sure, but the colElements seems to be important for rendering only @@ -101,77 +101,77 @@ void LOADERDECL Color_ReadDirect_24b_6666() // else // col |= 0xFF<m_colElements[loader->m_colIndex]) col |= 0xFF << ASHIFT; - _SetCol(col); + _SetCol(loader, col); } template -void Color_ReadIndex_16b_565() +void Color_ReadIndex_16b_565(VertexLoader* loader) { auto const Index = DataRead(); - u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]))); - _SetCol565(val); + u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+loader->m_colIndex]))); + _SetCol565(loader, val); } template -void Color_ReadIndex_24b_888() +void Color_ReadIndex_24b_888(VertexLoader* loader) { auto const Index = DataRead(); - const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]); - _SetCol(_Read24(iAddress)); + const u8 *iAddress = cached_arraybases[ARRAY_COLOR+loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+loader->m_colIndex]); + _SetCol(loader, _Read24(iAddress)); } template -void Color_ReadIndex_32b_888x() +void Color_ReadIndex_32b_888x(VertexLoader* loader) { auto const Index = DataRead(); - const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]); - _SetCol(_Read24(iAddress)); + const u8 *iAddress = cached_arraybases[ARRAY_COLOR+loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+loader->m_colIndex]); + _SetCol(loader, _Read24(iAddress)); } template -void Color_ReadIndex_16b_4444() +void Color_ReadIndex_16b_4444(VertexLoader* loader) { auto const Index = DataRead(); - u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex])); - _SetCol4444(val); + u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+loader->m_colIndex])); + _SetCol4444(loader, val); } template -void Color_ReadIndex_24b_6666() +void Color_ReadIndex_24b_6666(VertexLoader* loader) { auto const Index = DataRead(); - const u8* pData = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]) - 1; + const u8* pData = cached_arraybases[ARRAY_COLOR+loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+loader->m_colIndex]) - 1; u32 val = Common::swap32(pData); - _SetCol6666(val); + _SetCol6666(loader, val); } template -void Color_ReadIndex_32b_8888() +void Color_ReadIndex_32b_8888(VertexLoader* loader) { auto const Index = DataRead(); - const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]); - _SetCol(_Read32(iAddress)); + const u8 *iAddress = cached_arraybases[ARRAY_COLOR+loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+loader->m_colIndex]); + _SetCol(loader, _Read32(iAddress)); } -void LOADERDECL Color_ReadIndex8_16b_565() { Color_ReadIndex_16b_565(); } -void LOADERDECL Color_ReadIndex8_24b_888() { Color_ReadIndex_24b_888(); } -void LOADERDECL Color_ReadIndex8_32b_888x() { Color_ReadIndex_32b_888x(); } -void LOADERDECL Color_ReadIndex8_16b_4444() { Color_ReadIndex_16b_4444(); } -void LOADERDECL Color_ReadIndex8_24b_6666() { Color_ReadIndex_24b_6666(); } -void LOADERDECL Color_ReadIndex8_32b_8888() { Color_ReadIndex_32b_8888(); } +void LOADERDECL Color_ReadIndex8_16b_565(VertexLoader* loader) { Color_ReadIndex_16b_565(loader); } +void LOADERDECL Color_ReadIndex8_24b_888(VertexLoader* loader) { Color_ReadIndex_24b_888(loader); } +void LOADERDECL Color_ReadIndex8_32b_888x(VertexLoader* loader) { Color_ReadIndex_32b_888x(loader); } +void LOADERDECL Color_ReadIndex8_16b_4444(VertexLoader* loader) { Color_ReadIndex_16b_4444(loader); } +void LOADERDECL Color_ReadIndex8_24b_6666(VertexLoader* loader) { Color_ReadIndex_24b_6666(loader); } +void LOADERDECL Color_ReadIndex8_32b_8888(VertexLoader* loader) { Color_ReadIndex_32b_8888(loader); } -void LOADERDECL Color_ReadIndex16_16b_565() { Color_ReadIndex_16b_565(); } -void LOADERDECL Color_ReadIndex16_24b_888() { Color_ReadIndex_24b_888(); } -void LOADERDECL Color_ReadIndex16_32b_888x() { Color_ReadIndex_32b_888x(); } -void LOADERDECL Color_ReadIndex16_16b_4444() { Color_ReadIndex_16b_4444(); } -void LOADERDECL Color_ReadIndex16_24b_6666() { Color_ReadIndex_24b_6666(); } -void LOADERDECL Color_ReadIndex16_32b_8888() { Color_ReadIndex_32b_8888(); } +void LOADERDECL Color_ReadIndex16_16b_565(VertexLoader* loader) { Color_ReadIndex_16b_565(loader); } +void LOADERDECL Color_ReadIndex16_24b_888(VertexLoader* loader) { Color_ReadIndex_24b_888(loader); } +void LOADERDECL Color_ReadIndex16_32b_888x(VertexLoader* loader) { Color_ReadIndex_32b_888x(loader); } +void LOADERDECL Color_ReadIndex16_16b_4444(VertexLoader* loader) { Color_ReadIndex_16b_4444(loader); } +void LOADERDECL Color_ReadIndex16_24b_6666(VertexLoader* loader) { Color_ReadIndex_24b_6666(loader); } +void LOADERDECL Color_ReadIndex16_32b_8888(VertexLoader* loader) { Color_ReadIndex_32b_8888(loader); } diff --git a/Source/Core/VideoCommon/VertexLoader_Color.h b/Source/Core/VideoCommon/VertexLoader_Color.h index 90b267064e..b3bee00c13 100644 --- a/Source/Core/VideoCommon/VertexLoader_Color.h +++ b/Source/Core/VideoCommon/VertexLoader_Color.h @@ -6,23 +6,25 @@ #include "VideoCommon/NativeVertexFormat.h" -void LOADERDECL Color_ReadDirect_24b_888(); -void LOADERDECL Color_ReadDirect_32b_888x(); -void LOADERDECL Color_ReadDirect_16b_565(); -void LOADERDECL Color_ReadDirect_16b_4444(); -void LOADERDECL Color_ReadDirect_24b_6666(); -void LOADERDECL Color_ReadDirect_32b_8888(); +class VertexLoader; -void LOADERDECL Color_ReadIndex8_16b_565(); -void LOADERDECL Color_ReadIndex8_24b_888(); -void LOADERDECL Color_ReadIndex8_32b_888x(); -void LOADERDECL Color_ReadIndex8_16b_4444(); -void LOADERDECL Color_ReadIndex8_24b_6666(); -void LOADERDECL Color_ReadIndex8_32b_8888(); +void LOADERDECL Color_ReadDirect_24b_888(VertexLoader* loader); +void LOADERDECL Color_ReadDirect_32b_888x(VertexLoader* loader); +void LOADERDECL Color_ReadDirect_16b_565(VertexLoader* loader); +void LOADERDECL Color_ReadDirect_16b_4444(VertexLoader* loader); +void LOADERDECL Color_ReadDirect_24b_6666(VertexLoader* loader); +void LOADERDECL Color_ReadDirect_32b_8888(VertexLoader* loader); -void LOADERDECL Color_ReadIndex16_16b_565(); -void LOADERDECL Color_ReadIndex16_24b_888(); -void LOADERDECL Color_ReadIndex16_32b_888x(); -void LOADERDECL Color_ReadIndex16_16b_4444(); -void LOADERDECL Color_ReadIndex16_24b_6666(); -void LOADERDECL Color_ReadIndex16_32b_8888(); +void LOADERDECL Color_ReadIndex8_16b_565(VertexLoader* loader); +void LOADERDECL Color_ReadIndex8_24b_888(VertexLoader* loader); +void LOADERDECL Color_ReadIndex8_32b_888x(VertexLoader* loader); +void LOADERDECL Color_ReadIndex8_16b_4444(VertexLoader* loader); +void LOADERDECL Color_ReadIndex8_24b_6666(VertexLoader* loader); +void LOADERDECL Color_ReadIndex8_32b_8888(VertexLoader* loader); + +void LOADERDECL Color_ReadIndex16_16b_565(VertexLoader* loader); +void LOADERDECL Color_ReadIndex16_24b_888(VertexLoader* loader); +void LOADERDECL Color_ReadIndex16_32b_888x(VertexLoader* loader); +void LOADERDECL Color_ReadIndex16_16b_4444(VertexLoader* loader); +void LOADERDECL Color_ReadIndex16_24b_6666(VertexLoader* loader); +void LOADERDECL Color_ReadIndex16_32b_8888(VertexLoader* loader); diff --git a/Source/Core/VideoCommon/VertexLoader_Normal.cpp b/Source/Core/VideoCommon/VertexLoader_Normal.cpp index 1e428b66a2..70081ead11 100644 --- a/Source/Core/VideoCommon/VertexLoader_Normal.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Normal.cpp @@ -57,7 +57,7 @@ __forceinline void ReadIndirect(const T* data) template struct Normal_Direct { - static void LOADERDECL function() + static void LOADERDECL function(VertexLoader* loader) { auto const source = reinterpret_cast(DataGetPosition()); ReadIndirect(source); @@ -81,7 +81,7 @@ __forceinline void Normal_Index_Offset() template struct Normal_Index { - static void LOADERDECL function() + static void LOADERDECL function(VertexLoader* loader) { Normal_Index_Offset(); } @@ -92,7 +92,7 @@ struct Normal_Index template struct Normal_Index_Indices3 { - static void LOADERDECL function() + static void LOADERDECL function(VertexLoader* loader) { Normal_Index_Offset(); Normal_Index_Offset(); @@ -106,7 +106,7 @@ struct Normal_Index_Indices3 template struct Normal_Direct_SSSE3 { - static void LOADERDECL function() + static void LOADERDECL function(VertexLoader* loader) { const T* pData = reinterpret_cast(DataGetPosition()); const float frac = 1. / float(1u << (sizeof(T) * 8 - std::is_signed::value - 1)); @@ -136,7 +136,7 @@ __forceinline void Normal_Index_Offset_SSSE3() template struct Normal_Index_SSSE3 { - static void LOADERDECL function() + static void LOADERDECL function(VertexLoader* loader) { Normal_Index_Offset_SSSE3(); } @@ -147,7 +147,7 @@ struct Normal_Index_SSSE3 template struct Normal_Index_Indices3_SSSE3 { - static void LOADERDECL function() + static void LOADERDECL function(VertexLoader* loader) { Normal_Index_Offset_SSSE3(); Normal_Index_Offset_SSSE3(); diff --git a/Source/Core/VideoCommon/VertexLoader_Position.cpp b/Source/Core/VideoCommon/VertexLoader_Position.cpp index 60a6115ee0..c0fac7ef93 100644 --- a/Source/Core/VideoCommon/VertexLoader_Position.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Position.cpp @@ -71,10 +71,10 @@ float PosScale(float val, float scale) } template -void LOADERDECL Pos_ReadDirect() +void LOADERDECL Pos_ReadDirect(VertexLoader* loader) { static_assert(N <= 3, "N > 3 is not sane!"); - auto const scale = posScale[0]; + auto const scale = loader->m_posScale[0];; DataReader dst(g_vertex_manager_write_ptr, nullptr); DataReader src(g_video_buffer_read_ptr, nullptr); @@ -87,14 +87,14 @@ void LOADERDECL Pos_ReadDirect() } template -void LOADERDECL Pos_ReadIndex() +void LOADERDECL Pos_ReadIndex(VertexLoader* loader) { static_assert(std::is_unsigned::value, "Only unsigned I is sane!"); static_assert(N <= 3, "N > 3 is not sane!"); auto const index = DataRead(); auto const data = reinterpret_cast(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION])); - auto const scale = posScale[0]; + auto const scale = loader->m_posScale[0]; DataReader dst(g_vertex_manager_write_ptr, nullptr); for (int i = 0; i < 3; ++i) @@ -106,21 +106,21 @@ void LOADERDECL Pos_ReadIndex() #if _M_SSE >= 0x301 template -void LOADERDECL Pos_ReadDirect_SSSE3() +void LOADERDECL Pos_ReadDirect_SSSE3(VertexLoader* loader) { const T* pData = reinterpret_cast(DataGetPosition()); - Vertex_Read_SSSE3(pData, *(__m128*)posScale); + Vertex_Read_SSSE3(pData, *(__m128*)loader->m_posScale); DataSkip<(2 + three) * sizeof(T)>(); LOG_VTX(); } template -void LOADERDECL Pos_ReadIndex_SSSE3() +void LOADERDECL Pos_ReadIndex_SSSE3(VertexLoader* loader) { static_assert(std::is_unsigned::value, "Only unsigned I is sane!"); auto const index = DataRead(); const T* pData = (const T*)(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION])); - Vertex_Read_SSSE3(pData, *(__m128*)posScale); + Vertex_Read_SSSE3(pData, *(__m128*)loader->m_posScale); LOG_VTX(); } #endif diff --git a/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp b/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp index 002ec13e55..4a858c84f9 100644 --- a/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp +++ b/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp @@ -29,9 +29,9 @@ __forceinline void LOG_TEX<2>() // PRIM_LOG("tex: %f %f, ", ((float*)g_vertex_manager_write_ptr)[-2], ((float*)g_vertex_manager_write_ptr)[-1]); } -static void LOADERDECL TexCoord_Read_Dummy() +static void LOADERDECL TexCoord_Read_Dummy(VertexLoader* loader) { - tcIndex++; + loader->m_tcIndex++; } template @@ -47,9 +47,9 @@ float TCScale(float val, float scale) } template -void LOADERDECL TexCoord_ReadDirect() +void LOADERDECL TexCoord_ReadDirect(VertexLoader* loader) { - auto const scale = tcScale[tcIndex][0]; + auto const scale = loader->m_tcScale[loader->m_tcIndex][0]; DataReader dst(g_vertex_manager_write_ptr, nullptr); DataReader src(g_video_buffer_read_ptr, nullptr); @@ -60,18 +60,18 @@ void LOADERDECL TexCoord_ReadDirect() src.WritePointer(&g_video_buffer_read_ptr); LOG_TEX(); - ++tcIndex; + ++loader->m_tcIndex; } template -void LOADERDECL TexCoord_ReadIndex() +void LOADERDECL TexCoord_ReadIndex(VertexLoader* loader) { static_assert(std::is_unsigned::value, "Only unsigned I is sane!"); auto const index = DataRead(); - auto const data = reinterpret_cast(cached_arraybases[ARRAY_TEXCOORD0 + tcIndex] - + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + tcIndex])); - auto const scale = tcScale[tcIndex][0]; + auto const data = reinterpret_cast(cached_arraybases[ARRAY_TEXCOORD0 + loader->m_tcIndex] + + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + loader->m_tcIndex])); + auto const scale = loader->m_tcScale[loader->m_tcIndex][0]; DataReader dst(g_vertex_manager_write_ptr, nullptr); for (int i = 0; i != N; ++i) @@ -79,32 +79,32 @@ void LOADERDECL TexCoord_ReadIndex() dst.WritePointer(&g_vertex_manager_write_ptr); LOG_TEX(); - ++tcIndex; + ++loader->m_tcIndex; } #if _M_SSE >= 0x301 template -void LOADERDECL TexCoord_ReadDirect2_SSSE3() +void LOADERDECL TexCoord_ReadDirect2_SSSE3(VertexLoader* loader) { const T* pData = reinterpret_cast(DataGetPosition()); - __m128 scale = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)tcScale[tcIndex])); + __m128 scale = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)loader->m_tcScale[loader->m_tcIndex])); Vertex_Read_SSSE3(pData, scale); DataSkip<2 * sizeof(T)>(); LOG_TEX<2>(); - tcIndex++; + loader->m_tcIndex++; } template -void LOADERDECL TexCoord_ReadIndex2_SSSE3() +void LOADERDECL TexCoord_ReadIndex2_SSSE3(VertexLoader* loader) { static_assert(std::is_unsigned::value, "Only unsigned I is sane!"); auto const index = DataRead(); - const T* pData = (const T*)(cached_arraybases[ARRAY_TEXCOORD0 + tcIndex] + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + tcIndex])); - __m128 scale = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)tcScale[tcIndex])); + const T* pData = (const T*)(cached_arraybases[ARRAY_TEXCOORD0 + loader->m_tcIndex] + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + loader->m_tcIndex])); + __m128 scale = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)loader->m_tcScale[loader->m_tcIndex])); Vertex_Read_SSSE3(pData, scale); LOG_TEX<2>(); - tcIndex++; + loader->m_tcIndex++; } #endif