diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index af344c344c..a03f8cd61d 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -43,7 +43,6 @@ #include "VideoCommon/OnScreenDisplay.h" #include "VideoCommon/PixelEngine.h" #include "VideoCommon/Statistics.h" -#include "VideoCommon/VertexLoader.h" #include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexShaderGen.h" #include "VideoCommon/VertexShaderManager.h" diff --git a/Source/Core/VideoBackends/OGL/main.cpp b/Source/Core/VideoBackends/OGL/main.cpp index 8642d78cc1..6969b97ba4 100644 --- a/Source/Core/VideoBackends/OGL/main.cpp +++ b/Source/Core/VideoBackends/OGL/main.cpp @@ -74,7 +74,6 @@ Make AA apply instantly during gameplay if possible #include "VideoCommon/OpcodeDecoding.h" #include "VideoCommon/PixelEngine.h" #include "VideoCommon/PixelShaderManager.h" -#include "VideoCommon/VertexLoader.h" #include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VideoConfig.h" diff --git a/Source/Core/VideoBackends/Software/NativeVertexFormat.h b/Source/Core/VideoBackends/Software/NativeVertexFormat.h index ad71e2caf9..c4f140cac4 100644 --- a/Source/Core/VideoBackends/Software/NativeVertexFormat.h +++ b/Source/Core/VideoBackends/Software/NativeVertexFormat.h @@ -7,14 +7,6 @@ #include "Common/ChunkFile.h" #include "VideoBackends/Software/Vec3.h" -#ifdef WIN32 -#define LOADERDECL __cdecl -#else -#define LOADERDECL -#endif - -typedef void (LOADERDECL *TPipelineFunction)(); - struct Vec4 { float x; diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp index ec53bc6c3b..2523b5b01a 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp @@ -13,12 +13,8 @@ #include "VideoBackends/Software/TransformUnit.h" #include "VideoBackends/Software/XFMemLoader.h" -#include "VideoCommon/VertexLoader.h" -#include "VideoCommon/VertexLoader_Color.h" -#include "VideoCommon/VertexLoader_Normal.h" -#include "VideoCommon/VertexLoader_Position.h" -#include "VideoCommon/VertexLoader_TextCoord.h" -#include "VideoCommon/VertexManagerBase.h" +#include "VideoCommon/VertexLoaderBase.h" +#include "VideoCommon/VertexLoaderUtils.h" SWVertexLoader::SWVertexLoader() : m_VertexSize(0) @@ -42,11 +38,11 @@ void SWVertexLoader::SetFormat(u8 attributeIndex, u8 primitiveType) if (!m_CurrentLoader) { - m_CurrentLoader = new VertexLoader(g_main_cp_state.vtx_desc, g_main_cp_state.vtx_attr[m_attributeIndex]); - m_VertexLoaderMap[uid] = std::unique_ptr(m_CurrentLoader); + m_CurrentLoader = VertexLoaderBase::CreateVertexLoader(g_main_cp_state.vtx_desc, g_main_cp_state.vtx_attr[m_attributeIndex]); + m_VertexLoaderMap[uid] = std::unique_ptr(m_CurrentLoader); } - m_VertexSize = m_CurrentLoader->GetVertexSize(); + m_VertexSize = m_CurrentLoader->m_VertexSize; m_CurrentVat = &g_main_cp_state.vtx_attr[m_attributeIndex]; @@ -168,7 +164,7 @@ void SWVertexLoader::ParseVertex(const PortableVertexDeclaration& vdec) void SWVertexLoader::LoadVertex() { - const PortableVertexDeclaration& vdec = m_CurrentLoader->GetNativeVertexDeclaration(); + const PortableVertexDeclaration& vdec = m_CurrentLoader->m_native_vtx_decl; // reserve memory for the destination of the vertex loader m_LoadedVertices.resize(vdec.stride + 4); @@ -176,11 +172,11 @@ void SWVertexLoader::LoadVertex() // convert the vertex from the gc format to the videocommon (hardware optimized) format u8* old = g_video_buffer_read_ptr; m_CurrentLoader->RunVertices( - g_main_cp_state.vtx_attr[m_attributeIndex], m_primitiveType, 1, + m_primitiveType, 1, DataReader(g_video_buffer_read_ptr, nullptr), // src DataReader(m_LoadedVertices.data(), m_LoadedVertices.data() + m_LoadedVertices.size()) // dst ); - g_video_buffer_read_ptr = old + m_CurrentLoader->GetVertexSize(); + g_video_buffer_read_ptr = old + m_CurrentLoader->m_VertexSize; // parse the videocommon format to our own struct format (m_Vertex) ParseVertex(vdec); diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.h b/Source/Core/VideoBackends/Software/SWVertexLoader.h index 4a5d38d52a..890d1fa52d 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.h +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.h @@ -9,7 +9,7 @@ #include "VideoBackends/Software/CPMemLoader.h" #include "VideoBackends/Software/NativeVertexFormat.h" -#include "VideoCommon/VertexLoader.h" +#include "VideoCommon/VertexLoaderBase.h" class PointerWrap; class SetupUnit; @@ -28,9 +28,9 @@ class SWVertexLoader bool m_TexGenSpecialCase; - std::map> m_VertexLoaderMap; + std::unordered_map> m_VertexLoaderMap; std::vector m_LoadedVertices; - VertexLoader* m_CurrentLoader; + VertexLoaderBase* m_CurrentLoader; u8 m_attributeIndex; u8 m_primitiveType; diff --git a/Source/Core/VideoCommon/BPStructs.cpp b/Source/Core/VideoCommon/BPStructs.cpp index 6c4ad70331..44f6964cfb 100644 --- a/Source/Core/VideoCommon/BPStructs.cpp +++ b/Source/Core/VideoCommon/BPStructs.cpp @@ -21,7 +21,6 @@ #include "VideoCommon/RenderBase.h" #include "VideoCommon/Statistics.h" #include "VideoCommon/TextureDecoder.h" -#include "VideoCommon/VertexLoader.h" #include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoConfig.h" diff --git a/Source/Core/VideoCommon/BoundingBox.cpp b/Source/Core/VideoCommon/BoundingBox.cpp index 4b4400f2e7..9ea0fa5689 100644 --- a/Source/Core/VideoCommon/BoundingBox.cpp +++ b/Source/Core/VideoCommon/BoundingBox.cpp @@ -30,7 +30,7 @@ static TVtxDesc vertexDesc; static PortableVertexDeclaration vertexDecl; // Gets the pointer to the current buffer position -void LOADERDECL SetVertexBufferPosition() +void LOADERDECL SetVertexBufferPosition(VertexLoader* loader) { bufferPos = g_vertex_manager_write_ptr; } @@ -76,7 +76,7 @@ void Prepare(const VAT & vat, int primitive, const TVtxDesc & vtxDesc, const Por } // Updates the bounding box -void LOADERDECL Update() +void LOADERDECL Update(VertexLoader* loader) { if (!active) return; diff --git a/Source/Core/VideoCommon/BoundingBox.h b/Source/Core/VideoCommon/BoundingBox.h index d6952b184e..afff0e3e72 100644 --- a/Source/Core/VideoCommon/BoundingBox.h +++ b/Source/Core/VideoCommon/BoundingBox.h @@ -31,8 +31,8 @@ extern u8 posMtxIdx; // Texture matrix indexes extern u8 texMtxIdx[8]; -void LOADERDECL SetVertexBufferPosition(); -void LOADERDECL Update(); +void LOADERDECL SetVertexBufferPosition(VertexLoader* loader); +void LOADERDECL Update(VertexLoader* loader); void Prepare(const VAT & vat, int primitive, const TVtxDesc & vtxDesc, const PortableVertexDeclaration & vtxDecl); // Save state diff --git a/Source/Core/VideoCommon/CMakeLists.txt b/Source/Core/VideoCommon/CMakeLists.txt index 95ebaf7433..a450a8e379 100644 --- a/Source/Core/VideoCommon/CMakeLists.txt +++ b/Source/Core/VideoCommon/CMakeLists.txt @@ -28,6 +28,7 @@ set(SRCS BoundingBox.cpp TextureConversionShader.cpp TextureDecoder_Common.cpp VertexLoader.cpp + VertexLoaderBase.cpp VertexLoaderManager.cpp VertexLoader_Color.cpp VertexLoader_Normal.cpp diff --git a/Source/Core/VideoCommon/CPMemory.h b/Source/Core/VideoCommon/CPMemory.h index a256fc6849..e348f7f38d 100644 --- a/Source/Core/VideoCommon/CPMemory.h +++ b/Source/Core/VideoCommon/CPMemory.h @@ -45,13 +45,6 @@ enum FORMAT_32B_8888 = 5, }; -enum -{ - VAT_0_FRACBITS = 0x3e0001f0, - VAT_1_FRACBITS = 0x07c3e1f0, - VAT_2_FRACBITS = 0xf87c3e1f, -}; - #pragma pack(4) union TVtxDesc { @@ -239,7 +232,7 @@ struct VAT UVAT_group2 g2; }; -class VertexLoader; +class VertexLoaderBase; // STATE_TO_SAVE struct CPState final @@ -254,7 +247,7 @@ struct CPState final // Attributes that actually belong to VertexLoaderManager: BitSet32 attr_dirty; - VertexLoader* vertex_loaders[8]; + VertexLoaderBase* vertex_loaders[8]; }; class PointerWrap; diff --git a/Source/Core/VideoCommon/NativeVertexFormat.h b/Source/Core/VideoCommon/NativeVertexFormat.h index 612f671f5d..024f4f070d 100644 --- a/Source/Core/VideoCommon/NativeVertexFormat.h +++ b/Source/Core/VideoCommon/NativeVertexFormat.h @@ -44,14 +44,6 @@ enum VB_HAS_UVTEXMTXSHIFT=13, }; -#ifdef WIN32 -#define LOADERDECL __cdecl -#else -#define LOADERDECL -#endif - -typedef void (LOADERDECL *TPipelineFunction)(); - enum VarType { VAR_UNSIGNED_BYTE, // GX_U8 = 0 diff --git a/Source/Core/VideoCommon/VertexLoader.cpp b/Source/Core/VideoCommon/VertexLoader.cpp index f293564e85..e2a2073ccf 100644 --- a/Source/Core/VideoCommon/VertexLoader.cpp +++ b/Source/Core/VideoCommon/VertexLoader.cpp @@ -4,7 +4,6 @@ #include "Common/CommonTypes.h" #include "Common/MemoryUtil.h" -#include "Common/StringUtil.h" #include "Common/x64ABI.h" #include "Common/x64Emitter.h" @@ -30,97 +29,88 @@ #define inline #endif -// Matrix components are first in GC format but later in PC format - we need to store it temporarily -// when decoding each vertex. -static u8 s_curposmtx = g_main_cp_state.matrix_index_a.PosNormalMtxIdx; -static u8 s_curtexmtx[8]; -static int s_texmtxwrite = 0; -static int s_texmtxread = 0; - -// Vertex loaders read these. Although the scale ones should be baked into the shader. -int tcIndex; -int colIndex; -int colElements[2]; -// Duplicated (4x and 2x respectively) and used in SSE code in the vertex loader JIT -GC_ALIGNED128(float posScale[4]); -GC_ALIGNED64(float tcScale[8][2]); - // This pointer is used as the source/dst for all fixed function loader calls u8* g_video_buffer_read_ptr; u8* g_vertex_manager_write_ptr; -static const float fractionTable[32] = { - 1.0f / (1U << 0), 1.0f / (1U << 1), 1.0f / (1U << 2), 1.0f / (1U << 3), - 1.0f / (1U << 4), 1.0f / (1U << 5), 1.0f / (1U << 6), 1.0f / (1U << 7), - 1.0f / (1U << 8), 1.0f / (1U << 9), 1.0f / (1U << 10), 1.0f / (1U << 11), - 1.0f / (1U << 12), 1.0f / (1U << 13), 1.0f / (1U << 14), 1.0f / (1U << 15), - 1.0f / (1U << 16), 1.0f / (1U << 17), 1.0f / (1U << 18), 1.0f / (1U << 19), - 1.0f / (1U << 20), 1.0f / (1U << 21), 1.0f / (1U << 22), 1.0f / (1U << 23), - 1.0f / (1U << 24), 1.0f / (1U << 25), 1.0f / (1U << 26), 1.0f / (1U << 27), - 1.0f / (1U << 28), 1.0f / (1U << 29), 1.0f / (1U << 30), 1.0f / (1U << 31), -}; - using namespace Gen; -static void LOADERDECL PosMtx_ReadDirect_UByte() + +void* VertexLoader::operator new (size_t size) { - BoundingBox::posMtxIdx = s_curposmtx = DataReadU8() & 0x3f; - PRIM_LOG("posmtx: %d, ", s_curposmtx); + return AllocateAlignedMemory(size, 16); } -static void LOADERDECL PosMtx_Write() +void VertexLoader::operator delete (void *p) +{ + FreeAlignedMemory(p); +} + +static void LOADERDECL PosMtx_ReadDirect_UByte(VertexLoader* loader) +{ + BoundingBox::posMtxIdx = loader->m_curposmtx = DataReadU8() & 0x3f; + PRIM_LOG("posmtx: %d, ", loader->m_curposmtx); +} + +static void LOADERDECL PosMtx_Write(VertexLoader* loader) { // u8, 0, 0, 0 - DataWrite(s_curposmtx); + DataWrite(loader->m_curposmtx); } -static void LOADERDECL TexMtx_ReadDirect_UByte() +static void LOADERDECL TexMtx_ReadDirect_UByte(VertexLoader* loader) { - BoundingBox::texMtxIdx[s_texmtxread] = s_curtexmtx[s_texmtxread] = DataReadU8() & 0x3f; + BoundingBox::texMtxIdx[loader->m_texmtxread] = loader->m_curtexmtx[loader->m_texmtxread] = DataReadU8() & 0x3f; - PRIM_LOG("texmtx%d: %d, ", s_texmtxread, s_curtexmtx[s_texmtxread]); - s_texmtxread++; + PRIM_LOG("texmtx%d: %d, ", loader->m_texmtxread, loader->m_curtexmtx[loader->m_texmtxread]); + loader->m_texmtxread++; } -static void LOADERDECL TexMtx_Write_Float() +static void LOADERDECL TexMtx_Write_Float(VertexLoader* loader) { - DataWrite(float(s_curtexmtx[s_texmtxwrite++])); + DataWrite(float(loader->m_curtexmtx[loader->m_texmtxwrite++])); } -static void LOADERDECL TexMtx_Write_Float2() +static void LOADERDECL TexMtx_Write_Float2(VertexLoader* loader) { DataWrite(0.f); - DataWrite(float(s_curtexmtx[s_texmtxwrite++])); + DataWrite(float(loader->m_curtexmtx[loader->m_texmtxwrite++])); } -static void LOADERDECL TexMtx_Write_Float4() +static void LOADERDECL TexMtx_Write_Float4(VertexLoader* loader) { #if _M_SSE >= 0x200 - __m128 output = _mm_cvtsi32_ss(_mm_castsi128_ps(_mm_setzero_si128()), s_curtexmtx[s_texmtxwrite++]); + __m128 output = _mm_cvtsi32_ss(_mm_castsi128_ps(_mm_setzero_si128()), loader->m_curtexmtx[loader->m_texmtxwrite++]); _mm_storeu_ps((float*)g_vertex_manager_write_ptr, _mm_shuffle_ps(output, output, 0x45 /* 1, 1, 0, 1 */)); g_vertex_manager_write_ptr += sizeof(float) * 4; #else DataWrite(0.f); DataWrite(0.f); - DataWrite(float(s_curtexmtx[s_texmtxwrite++])); + DataWrite(float(loader->m_curtexmtx[loader->m_texmtxwrite++])); // Just to fill out with 0. DataWrite(0.f); #endif } +static void LOADERDECL SkipVertex(VertexLoader* loader) +{ + if (loader->m_vertexSkip) + { + // reset the output buffer + g_vertex_manager_write_ptr -= loader->m_native_vtx_decl.stride; + + loader->m_skippedVertices++; + } +} + VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr) +: VertexLoaderBase(vtx_desc, vtx_attr) { m_compiledCode = nullptr; - m_numLoadedVertices = 0; - m_VertexSize = 0; - m_native_vertex_format = nullptr; VertexLoader_Normal::Init(); VertexLoader_Position::Init(); VertexLoader_TextCoord::Init(); - m_VtxDesc = vtx_desc; - SetVAT(vtx_attr); - #ifdef USE_VERTEX_LOADER_JIT AllocCodeSpace(COMPILED_CODE_SIZE); CompileVertexTranslator(); @@ -130,6 +120,13 @@ VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr) CompileVertexTranslator(); #endif + // generate frac factors + m_posScale[0] = m_posScale[1] = m_posScale[2] = m_posScale[3] = 1.0f / (1U << m_VtxAttr.PosFrac); + for (int i = 0; i < 8; i++) + m_tcScale[i][0] = m_tcScale[i][1] = 1.0f / (1U << m_VtxAttr.texCoord[i].Frac); + + for (int i = 0; i < 2; i++) + m_colElements[i] = m_VtxAttr.color[i].Elements; } VertexLoader::~VertexLoader() @@ -150,11 +147,14 @@ void VertexLoader::CompileVertexTranslator() m_compiledCode = GetCodePtr(); // We only use RAX (caller saved) and RBX (callee saved). - ABI_PushRegistersAndAdjustStack({RBX}, 8); + ABI_PushRegistersAndAdjustStack({RBX, RBP}, 8); // save count MOV(64, R(RBX), R(ABI_PARAM1)); + // save loader + MOV(64, R(RBP), R(ABI_PARAM2)); + // Start loop here const u8 *loop_start = GetCodePtr(); @@ -162,17 +162,17 @@ void VertexLoader::CompileVertexTranslator() if (m_VtxDesc.Tex0Coord || m_VtxDesc.Tex1Coord || m_VtxDesc.Tex2Coord || m_VtxDesc.Tex3Coord || m_VtxDesc.Tex4Coord || m_VtxDesc.Tex5Coord || m_VtxDesc.Tex6Coord || m_VtxDesc.Tex7Coord) { - WriteSetVariable(32, &tcIndex, Imm32(0)); + WriteSetVariable(32, &m_tcIndex, Imm32(0)); } if (m_VtxDesc.Color0 || m_VtxDesc.Color1) { - WriteSetVariable(32, &colIndex, Imm32(0)); + WriteSetVariable(32, &m_colIndex, Imm32(0)); } if (m_VtxDesc.Tex0MatIdx || m_VtxDesc.Tex1MatIdx || m_VtxDesc.Tex2MatIdx || m_VtxDesc.Tex3MatIdx || m_VtxDesc.Tex4MatIdx || m_VtxDesc.Tex5MatIdx || m_VtxDesc.Tex6MatIdx || m_VtxDesc.Tex7MatIdx) { - WriteSetVariable(32, &s_texmtxwrite, Imm32(0)); - WriteSetVariable(32, &s_texmtxread, Imm32(0)); + WriteSetVariable(32, &m_texmtxwrite, Imm32(0)); + WriteSetVariable(32, &m_texmtxread, Imm32(0)); } #else // Reset pipeline @@ -404,6 +404,12 @@ void VertexLoader::CompileVertexTranslator() nat_offset += 4; } + // indexed position formats may skip a the vertex + if (m_VtxDesc.Position & 2) + { + WriteCall(SkipVertex); + } + m_native_components = components; m_native_vtx_decl.stride = nat_offset; @@ -412,7 +418,7 @@ void VertexLoader::CompileVertexTranslator() SUB(64, R(RBX), Imm8(1)); J_CC(CC_NZ, loop_start); - ABI_PopRegistersAndAdjustStack({RBX}, 8); + ABI_PopRegistersAndAdjustStack({RBX, RBP}, 8); RET(); #endif } @@ -420,6 +426,7 @@ void VertexLoader::CompileVertexTranslator() void VertexLoader::WriteCall(TPipelineFunction func) { #ifdef USE_VERTEX_LOADER_JIT + MOV(64, R(ABI_PARAM1), R(RBP)); ABI_CallFunction((const void*)func); #else m_PipelineStages[m_numPipelineStages++] = func; @@ -444,179 +451,34 @@ void VertexLoader::WriteSetVariable(int bits, void *address, OpArg value) } #endif -void VertexLoader::SetupRunVertices(const VAT& vat, int primitive, int const count) +int VertexLoader::RunVertices(int primitive, int count, DataReader src, DataReader dst) { + dst.WritePointer(&g_vertex_manager_write_ptr); + src.WritePointer(&g_video_buffer_read_ptr); + m_numLoadedVertices += count; - - // Load position and texcoord scale factors. - m_VtxAttr.PosFrac = vat.g0.PosFrac; - m_VtxAttr.texCoord[0].Frac = vat.g0.Tex0Frac; - m_VtxAttr.texCoord[1].Frac = vat.g1.Tex1Frac; - m_VtxAttr.texCoord[2].Frac = vat.g1.Tex2Frac; - m_VtxAttr.texCoord[3].Frac = vat.g1.Tex3Frac; - m_VtxAttr.texCoord[4].Frac = vat.g2.Tex4Frac; - m_VtxAttr.texCoord[5].Frac = vat.g2.Tex5Frac; - m_VtxAttr.texCoord[6].Frac = vat.g2.Tex6Frac; - m_VtxAttr.texCoord[7].Frac = vat.g2.Tex7Frac; - - posScale[0] = posScale[1] = posScale[2] = posScale[3] = fractionTable[m_VtxAttr.PosFrac]; - if (m_native_components & VB_HAS_UVALL) - for (int i = 0; i < 8; i++) - tcScale[i][0] = tcScale[i][1] = fractionTable[m_VtxAttr.texCoord[i].Frac]; - for (int i = 0; i < 2; i++) - colElements[i] = m_VtxAttr.color[i].Elements; + m_skippedVertices = 0; // Prepare bounding box if (!g_ActiveConfig.backend_info.bSupportsBBox) - BoundingBox::Prepare(vat, primitive, m_VtxDesc, m_native_vtx_decl); -} + BoundingBox::Prepare(m_vat, primitive, m_VtxDesc, m_native_vtx_decl); -void VertexLoader::ConvertVertices ( int count ) -{ #ifdef USE_VERTEX_LOADER_JIT if (count > 0) { - ((void (*)(int))(void*)m_compiledCode)(count); + ((void (*)(int, VertexLoader* loader))(void*)m_compiledCode)(count, this); } #else for (int s = 0; s < count; s++) { - tcIndex = 0; - colIndex = 0; - s_texmtxwrite = s_texmtxread = 0; + m_tcIndex = 0; + m_colIndex = 0; + m_texmtxwrite = m_texmtxread = 0; for (int i = 0; i < m_numPipelineStages; i++) - m_PipelineStages[i](); + m_PipelineStages[i](this); PRIM_LOG("\n"); } #endif + + return count - m_skippedVertices; } - -int VertexLoader::RunVertices(const VAT& vat, int primitive, int count, DataReader src, DataReader dst) -{ - dst.WritePointer(&g_vertex_manager_write_ptr); - src.WritePointer(&g_video_buffer_read_ptr); - SetupRunVertices(vat, primitive, count); - ConvertVertices(count); - return count; -} - -void VertexLoader::SetVAT(const VAT& vat) -{ - m_VtxAttr.PosElements = vat.g0.PosElements; - m_VtxAttr.PosFormat = vat.g0.PosFormat; - m_VtxAttr.PosFrac = vat.g0.PosFrac; - m_VtxAttr.NormalElements = vat.g0.NormalElements; - m_VtxAttr.NormalFormat = vat.g0.NormalFormat; - m_VtxAttr.color[0].Elements = vat.g0.Color0Elements; - m_VtxAttr.color[0].Comp = vat.g0.Color0Comp; - m_VtxAttr.color[1].Elements = vat.g0.Color1Elements; - m_VtxAttr.color[1].Comp = vat.g0.Color1Comp; - m_VtxAttr.texCoord[0].Elements = vat.g0.Tex0CoordElements; - m_VtxAttr.texCoord[0].Format = vat.g0.Tex0CoordFormat; - m_VtxAttr.texCoord[0].Frac = vat.g0.Tex0Frac; - m_VtxAttr.ByteDequant = vat.g0.ByteDequant; - m_VtxAttr.NormalIndex3 = vat.g0.NormalIndex3; - - m_VtxAttr.texCoord[1].Elements = vat.g1.Tex1CoordElements; - m_VtxAttr.texCoord[1].Format = vat.g1.Tex1CoordFormat; - m_VtxAttr.texCoord[1].Frac = vat.g1.Tex1Frac; - m_VtxAttr.texCoord[2].Elements = vat.g1.Tex2CoordElements; - m_VtxAttr.texCoord[2].Format = vat.g1.Tex2CoordFormat; - m_VtxAttr.texCoord[2].Frac = vat.g1.Tex2Frac; - m_VtxAttr.texCoord[3].Elements = vat.g1.Tex3CoordElements; - m_VtxAttr.texCoord[3].Format = vat.g1.Tex3CoordFormat; - m_VtxAttr.texCoord[3].Frac = vat.g1.Tex3Frac; - m_VtxAttr.texCoord[4].Elements = vat.g1.Tex4CoordElements; - m_VtxAttr.texCoord[4].Format = vat.g1.Tex4CoordFormat; - - m_VtxAttr.texCoord[4].Frac = vat.g2.Tex4Frac; - m_VtxAttr.texCoord[5].Elements = vat.g2.Tex5CoordElements; - m_VtxAttr.texCoord[5].Format = vat.g2.Tex5CoordFormat; - m_VtxAttr.texCoord[5].Frac = vat.g2.Tex5Frac; - m_VtxAttr.texCoord[6].Elements = vat.g2.Tex6CoordElements; - m_VtxAttr.texCoord[6].Format = vat.g2.Tex6CoordFormat; - m_VtxAttr.texCoord[6].Frac = vat.g2.Tex6Frac; - m_VtxAttr.texCoord[7].Elements = vat.g2.Tex7CoordElements; - m_VtxAttr.texCoord[7].Format = vat.g2.Tex7CoordFormat; - m_VtxAttr.texCoord[7].Frac = vat.g2.Tex7Frac; - - if (!m_VtxAttr.ByteDequant) - { - ERROR_LOG(VIDEO, "ByteDequant is set to zero"); - } -}; - -void VertexLoader::AppendToString(std::string *dest) const -{ - dest->reserve(250); - static const char *posMode[4] = { - "Inv", - "Dir", - "I8", - "I16", - }; - static const char *posFormats[5] = { - "u8", "s8", "u16", "s16", "flt", - }; - static const char *colorFormat[8] = { - "565", - "888", - "888x", - "4444", - "6666", - "8888", - "Inv", - "Inv", - }; - - dest->append(StringFromFormat("%ib skin: %i P: %i %s-%s ", - m_VertexSize, (u32)m_VtxDesc.PosMatIdx, - m_VtxAttr.PosElements ? 3 : 2, posMode[m_VtxDesc.Position], posFormats[m_VtxAttr.PosFormat])); - - if (m_VtxDesc.Normal) - { - dest->append(StringFromFormat("Nrm: %i %s-%s ", - m_VtxAttr.NormalElements, posMode[m_VtxDesc.Normal], posFormats[m_VtxAttr.NormalFormat])); - } - - u64 color_mode[2] = {m_VtxDesc.Color0, m_VtxDesc.Color1}; - for (int i = 0; i < 2; i++) - { - if (color_mode[i]) - { - dest->append(StringFromFormat("C%i: %i %s-%s ", i, m_VtxAttr.color[i].Elements, posMode[color_mode[i]], colorFormat[m_VtxAttr.color[i].Comp])); - } - } - u64 tex_mode[8] = { - m_VtxDesc.Tex0Coord, m_VtxDesc.Tex1Coord, m_VtxDesc.Tex2Coord, m_VtxDesc.Tex3Coord, - m_VtxDesc.Tex4Coord, m_VtxDesc.Tex5Coord, m_VtxDesc.Tex6Coord, m_VtxDesc.Tex7Coord - }; - for (int i = 0; i < 8; i++) - { - if (tex_mode[i]) - { - dest->append(StringFromFormat("T%i: %i %s-%s ", - i, m_VtxAttr.texCoord[i].Elements, posMode[tex_mode[i]], posFormats[m_VtxAttr.texCoord[i].Format])); - } - } - dest->append(StringFromFormat(" - %i v\n", m_numLoadedVertices)); -} - -NativeVertexFormat* VertexLoader::GetNativeVertexFormat() -{ - if (m_native_vertex_format) - return m_native_vertex_format; - auto& native = s_native_vertex_map[m_native_vtx_decl]; - if (!native) - { - auto raw_pointer = g_vertex_manager->CreateNativeVertexFormat(); - native = std::unique_ptr(raw_pointer); - native->Initialize(m_native_vtx_decl); - native->m_components = m_native_components; - } - m_native_vertex_format = native.get(); - return native.get(); - -} - -std::unordered_map> VertexLoader::s_native_vertex_map; diff --git a/Source/Core/VideoCommon/VertexLoader.h b/Source/Core/VideoCommon/VertexLoader.h index df1bb445a5..4c883a3242 100644 --- a/Source/Core/VideoCommon/VertexLoader.h +++ b/Source/Core/VideoCommon/VertexLoader.h @@ -8,9 +8,7 @@ // Metroid Prime: P I16-flt N I16-s16 T0 I16-u16 T1 i16-flt #include -#include #include -#include #include "Common/CommonTypes.h" #include "Common/x64Emitter.h" @@ -18,6 +16,7 @@ #include "VideoCommon/CPMemory.h" #include "VideoCommon/DataReader.h" #include "VideoCommon/NativeVertexFormat.h" +#include "VideoCommon/VertexLoaderBase.h" #include "VideoCommon/VertexLoaderUtils.h" #if _M_SSE >= 0x401 @@ -31,130 +30,60 @@ #define USE_VERTEX_LOADER_JIT #endif -// They are used for the communication with the loader functions -extern int tcIndex; -extern int colIndex; -extern int colElements[2]; -GC_ALIGNED128(extern float posScale[4]); -GC_ALIGNED64(extern float tcScale[8][2]); +#ifdef WIN32 +#define LOADERDECL __cdecl +#else +#define LOADERDECL +#endif -class VertexLoaderUID -{ - u32 vid[5]; - size_t hash; -public: - VertexLoaderUID() - { - } - - VertexLoaderUID(const TVtxDesc& vtx_desc, const VAT& vat) - { - vid[0] = vtx_desc.Hex & 0xFFFFFFFF; - vid[1] = vtx_desc.Hex >> 32; - vid[2] = vat.g0.Hex & ~VAT_0_FRACBITS; - vid[3] = vat.g1.Hex & ~VAT_1_FRACBITS; - vid[4] = vat.g2.Hex & ~VAT_2_FRACBITS; - hash = CalculateHash(); - } - - bool operator < (const VertexLoaderUID &other) const - { - // This is complex because of speed. - if (vid[0] < other.vid[0]) - return true; - else if (vid[0] > other.vid[0]) - return false; - - for (int i = 1; i < 5; ++i) - { - if (vid[i] < other.vid[i]) - return true; - else if (vid[i] > other.vid[i]) - return false; - } - - return false; - } - - bool operator == (const VertexLoaderUID& rh) const - { - return hash == rh.hash && std::equal(vid, vid + sizeof(vid) / sizeof(vid[0]), rh.vid); - } - - size_t GetHash() const - { - return hash; - } - -private: - - size_t CalculateHash() - { - size_t h = -1; - - for (auto word : vid) - { - h = h * 137 + word; - } - - return h; - } -}; +class VertexLoader; +typedef void (LOADERDECL *TPipelineFunction)(VertexLoader* loader); // ARMTODO: This should be done in a better way #ifndef _M_GENERIC -class VertexLoader : public Gen::X64CodeBlock +class VertexLoader : public Gen::X64CodeBlock, public VertexLoaderBase #else -class VertexLoader +class VertexLoader : public VertexLoaderBase #endif { public: + // This class need a 16 byte alignment. As this is broken on + // MSVC right now (Dec 2014), we use custom allocation. + void* operator new (size_t size); + void operator delete (void *p); + VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr); ~VertexLoader(); - int GetVertexSize() const {return m_VertexSize;} - u32 GetNativeComponents() const { return m_native_components; } - const PortableVertexDeclaration& GetNativeVertexDeclaration() const - { return m_native_vtx_decl; } + int RunVertices(int primitive, int count, DataReader src, DataReader dst) override; + std::string GetName() const override { return "OldLoader"; } + bool IsInitialized() override { return true; } // This vertex loader supports all formats - void SetupRunVertices(const VAT& vat, int primitive, int const count); - int RunVertices(const VAT& vat, int primitive, int count, DataReader src, DataReader dst); + // They are used for the communication with the loader functions + // Duplicated (4x and 2x respectively) and used in SSE code in the vertex loader JIT + GC_ALIGNED128(float m_posScale[4]); + GC_ALIGNED64(float m_tcScale[8][2]); + int m_tcIndex; + int m_colIndex; + int m_colElements[2]; - // For debugging / profiling - void AppendToString(std::string *dest) const; - int GetNumLoadedVerts() const { return m_numLoadedVertices; } - - NativeVertexFormat* GetNativeVertexFormat(); - static void ClearNativeVertexFormatCache() { s_native_vertex_map.clear(); } + // Matrix components are first in GC format but later in PC format - we need to store it temporarily + // when decoding each vertex. + u8 m_curposmtx; + u8 m_curtexmtx[8]; + int m_texmtxwrite; + int m_texmtxread; + bool m_vertexSkip; + int m_skippedVertices; private: - int m_VertexSize; // number of bytes of a raw GC vertex. Computed by CompileVertexTranslator. - - // GC vertex format - TVtxAttr m_VtxAttr; // VAT decoded into easy format - TVtxDesc m_VtxDesc; // Not really used currently - or well it is, but could be easily avoided. - - // PC vertex format - u32 m_native_components; - PortableVertexDeclaration m_native_vtx_decl; - #ifndef USE_VERTEX_LOADER_JIT // Pipeline. TPipelineFunction m_PipelineStages[64]; // TODO - figure out real max. it's lower. int m_numPipelineStages; #endif - const u8 *m_compiledCode; - - int m_numLoadedVertices; - - NativeVertexFormat* m_native_vertex_format; - static std::unordered_map> s_native_vertex_map; - - void SetVAT(const VAT& vat); - void CompileVertexTranslator(); - void ConvertVertices(int count); void WriteCall(TPipelineFunction); @@ -162,6 +91,8 @@ private: void WriteGetVariable(int bits, Gen::OpArg dest, void *address); void WriteSetVariable(int bits, void *address, Gen::OpArg dest); #endif + + const u8 *m_compiledCode; }; #if _M_SSE >= 0x301 diff --git a/Source/Core/VideoCommon/VertexLoaderBase.cpp b/Source/Core/VideoCommon/VertexLoaderBase.cpp new file mode 100644 index 0000000000..ed56db0655 --- /dev/null +++ b/Source/Core/VideoCommon/VertexLoaderBase.cpp @@ -0,0 +1,199 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include + +#include "Common/StringUtil.h" + +#include "VideoCommon/VertexLoader.h" +#include "VideoCommon/VertexLoaderBase.h" + +VertexLoaderBase::VertexLoaderBase(const TVtxDesc &vtx_desc, const VAT &vtx_attr) +{ + m_numLoadedVertices = 0; + m_VertexSize = 0; + m_native_vertex_format = nullptr; + + SetVAT(vtx_attr); + m_VtxDesc = vtx_desc; + m_vat = vtx_attr; +} + +void VertexLoaderBase::SetVAT(const VAT& vat) +{ + m_VtxAttr.PosElements = vat.g0.PosElements; + m_VtxAttr.PosFormat = vat.g0.PosFormat; + m_VtxAttr.PosFrac = vat.g0.PosFrac; + m_VtxAttr.NormalElements = vat.g0.NormalElements; + m_VtxAttr.NormalFormat = vat.g0.NormalFormat; + m_VtxAttr.color[0].Elements = vat.g0.Color0Elements; + m_VtxAttr.color[0].Comp = vat.g0.Color0Comp; + m_VtxAttr.color[1].Elements = vat.g0.Color1Elements; + m_VtxAttr.color[1].Comp = vat.g0.Color1Comp; + m_VtxAttr.texCoord[0].Elements = vat.g0.Tex0CoordElements; + m_VtxAttr.texCoord[0].Format = vat.g0.Tex0CoordFormat; + m_VtxAttr.texCoord[0].Frac = vat.g0.Tex0Frac; + m_VtxAttr.ByteDequant = vat.g0.ByteDequant; + m_VtxAttr.NormalIndex3 = vat.g0.NormalIndex3; + + m_VtxAttr.texCoord[1].Elements = vat.g1.Tex1CoordElements; + m_VtxAttr.texCoord[1].Format = vat.g1.Tex1CoordFormat; + m_VtxAttr.texCoord[1].Frac = vat.g1.Tex1Frac; + m_VtxAttr.texCoord[2].Elements = vat.g1.Tex2CoordElements; + m_VtxAttr.texCoord[2].Format = vat.g1.Tex2CoordFormat; + m_VtxAttr.texCoord[2].Frac = vat.g1.Tex2Frac; + m_VtxAttr.texCoord[3].Elements = vat.g1.Tex3CoordElements; + m_VtxAttr.texCoord[3].Format = vat.g1.Tex3CoordFormat; + m_VtxAttr.texCoord[3].Frac = vat.g1.Tex3Frac; + m_VtxAttr.texCoord[4].Elements = vat.g1.Tex4CoordElements; + m_VtxAttr.texCoord[4].Format = vat.g1.Tex4CoordFormat; + + m_VtxAttr.texCoord[4].Frac = vat.g2.Tex4Frac; + m_VtxAttr.texCoord[5].Elements = vat.g2.Tex5CoordElements; + m_VtxAttr.texCoord[5].Format = vat.g2.Tex5CoordFormat; + m_VtxAttr.texCoord[5].Frac = vat.g2.Tex5Frac; + m_VtxAttr.texCoord[6].Elements = vat.g2.Tex6CoordElements; + m_VtxAttr.texCoord[6].Format = vat.g2.Tex6CoordFormat; + m_VtxAttr.texCoord[6].Frac = vat.g2.Tex6Frac; + m_VtxAttr.texCoord[7].Elements = vat.g2.Tex7CoordElements; + m_VtxAttr.texCoord[7].Format = vat.g2.Tex7CoordFormat; + m_VtxAttr.texCoord[7].Frac = vat.g2.Tex7Frac; + + if (!m_VtxAttr.ByteDequant) + { + ERROR_LOG(VIDEO, "ByteDequant is set to zero"); + } +}; + +void VertexLoaderBase::AppendToString(std::string *dest) const +{ + dest->reserve(250); + + dest->append(GetName()); + dest->append(": "); + + static const char *posMode[4] = { + "Inv", + "Dir", + "I8", + "I16", + }; + static const char *posFormats[5] = { + "u8", "s8", "u16", "s16", "flt", + }; + static const char *colorFormat[8] = { + "565", + "888", + "888x", + "4444", + "6666", + "8888", + "Inv", + "Inv", + }; + + dest->append(StringFromFormat("%ib skin: %i P: %i %s-%s ", + m_VertexSize, (u32)m_VtxDesc.PosMatIdx, + m_VtxAttr.PosElements ? 3 : 2, posMode[m_VtxDesc.Position], posFormats[m_VtxAttr.PosFormat])); + + if (m_VtxDesc.Normal) + { + dest->append(StringFromFormat("Nrm: %i %s-%s ", + m_VtxAttr.NormalElements, posMode[m_VtxDesc.Normal], posFormats[m_VtxAttr.NormalFormat])); + } + + u64 color_mode[2] = {m_VtxDesc.Color0, m_VtxDesc.Color1}; + for (int i = 0; i < 2; i++) + { + if (color_mode[i]) + { + dest->append(StringFromFormat("C%i: %i %s-%s ", i, m_VtxAttr.color[i].Elements, posMode[color_mode[i]], colorFormat[m_VtxAttr.color[i].Comp])); + } + } + u64 tex_mode[8] = { + m_VtxDesc.Tex0Coord, m_VtxDesc.Tex1Coord, m_VtxDesc.Tex2Coord, m_VtxDesc.Tex3Coord, + m_VtxDesc.Tex4Coord, m_VtxDesc.Tex5Coord, m_VtxDesc.Tex6Coord, m_VtxDesc.Tex7Coord + }; + for (int i = 0; i < 8; i++) + { + if (tex_mode[i]) + { + dest->append(StringFromFormat("T%i: %i %s-%s ", + i, m_VtxAttr.texCoord[i].Elements, posMode[tex_mode[i]], posFormats[m_VtxAttr.texCoord[i].Format])); + } + } + dest->append(StringFromFormat(" - %i v\n", m_numLoadedVertices)); +} + +// a hacky implementation to compare two vertex loaders +class VertexLoaderTester : public VertexLoaderBase +{ +public: + VertexLoaderTester(VertexLoaderBase* _a, VertexLoaderBase* _b, const TVtxDesc& vtx_desc, const VAT& vtx_attr) + : VertexLoaderBase(vtx_desc, vtx_attr) + { + a = _a; + b = _b; + m_initialized = a && b && a->IsInitialized() && b->IsInitialized(); + m_initialized = m_initialized && (a->m_VertexSize == b->m_VertexSize); + m_initialized = m_initialized && (a->m_native_vtx_decl.stride == b->m_native_vtx_decl.stride); + } + ~VertexLoaderTester() + { + delete a; + delete b; + } + + int RunVertices(int primitive, int count, DataReader src, DataReader dst) override + { + buffer_a.resize(count * a->m_native_vtx_decl.stride); + buffer_b.resize(count * b->m_native_vtx_decl.stride); + + int count_a = a->RunVertices(primitive, count, src, DataReader(buffer_a.data(), buffer_a.data()+buffer_a.size())); + int count_b = b->RunVertices(primitive, count, src, DataReader(buffer_b.data(), buffer_b.data()+buffer_b.size())); + + if (count_a != count_b) + ERROR_LOG(VIDEO, "Both vertexloaders have loaded a different amount of vertices."); + + if (memcmp(buffer_a.data(), buffer_b.data(), std::min(count_a, count_b))) + ERROR_LOG(VIDEO, "Both vertexloaders have loaded different data."); + + u8* dstptr; + dst.WritePointer(&dstptr); + memcpy(dstptr, buffer_a.data(), count_a); + return count_a; + } + std::string GetName() const override { return "CompareLoader"; } + bool IsInitialized() override { return m_initialized; } + +private: + VertexLoaderBase *a, *b; + bool m_initialized; + std::vector buffer_a, buffer_b; +}; + +VertexLoaderBase* VertexLoaderBase::CreateVertexLoader(const TVtxDesc& vtx_desc, const VAT& vtx_attr) +{ + VertexLoaderBase* loader; + +#if 0 + // first try: Any new VertexLoader vs the old one + loader = new VertexLoaderTester( + new VertexLoader(vtx_desc, vtx_attr), // the software one + new VertexLoader(vtx_desc, vtx_attr), // the new one to compare + vtx_desc, vtx_attr); + if (loader->IsInitialized()) + return loader; + delete loader; +#endif + + // last try: The old VertexLoader + loader = new VertexLoader(vtx_desc, vtx_attr); + if (loader->IsInitialized()) + return loader; + delete loader; + + PanicAlert("No Vertex Loader found."); + return nullptr; +} diff --git a/Source/Core/VideoCommon/VertexLoaderBase.h b/Source/Core/VideoCommon/VertexLoaderBase.h new file mode 100644 index 0000000000..7da2d866f9 --- /dev/null +++ b/Source/Core/VideoCommon/VertexLoaderBase.h @@ -0,0 +1,103 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "Common/CommonTypes.h" + +#include "VideoCommon/CPMemory.h" +#include "VideoCommon/DataReader.h" +#include "VideoCommon/NativeVertexFormat.h" + +class VertexLoaderUID +{ + std::array vid; + size_t hash; +public: + VertexLoaderUID() + { + } + + VertexLoaderUID(const TVtxDesc& vtx_desc, const VAT& vat) + { + vid[0] = vtx_desc.Hex & 0xFFFFFFFF; + vid[1] = vtx_desc.Hex >> 32; + vid[2] = vat.g0.Hex; + vid[3] = vat.g1.Hex; + vid[4] = vat.g2.Hex; + hash = CalculateHash(); + } + + bool operator == (const VertexLoaderUID& rh) const + { + return vid == rh.vid; + } + + size_t GetHash() const + { + return hash; + } + +private: + + size_t CalculateHash() const + { + size_t h = -1; + + for (auto word : vid) + { + h = h * 137 + word; + } + + return h; + } +}; + +namespace std +{ +template <> struct hash +{ + size_t operator()(const VertexLoaderUID& uid) const + { + return uid.GetHash(); + } +}; +} + +class VertexLoaderBase +{ +public: + static VertexLoaderBase* CreateVertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr); + virtual ~VertexLoaderBase() {}; + + virtual int RunVertices(int primitive, int count, DataReader src, DataReader dst) = 0; + + virtual bool IsInitialized() = 0; + + // For debugging / profiling + void AppendToString(std::string *dest) const; + + virtual std::string GetName() const = 0; + + // per loader public state + int m_VertexSize; // number of bytes of a raw GC vertex + PortableVertexDeclaration m_native_vtx_decl; + u32 m_native_components; + + // used by VertexLoaderManager + NativeVertexFormat* m_native_vertex_format; + int m_numLoadedVertices; + +protected: + VertexLoaderBase(const TVtxDesc &vtx_desc, const VAT &vtx_attr); + void SetVAT(const VAT& vat); + + // GC vertex format + TVtxAttr m_VtxAttr; // VAT decoded into easy format + TVtxDesc m_VtxDesc; // Not really used currently - or well it is, but could be easily avoided. + VAT m_vat; +}; diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index cf638c2495..0ab87b323d 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -15,33 +15,22 @@ #include "VideoCommon/BPMemory.h" #include "VideoCommon/IndexGenerator.h" #include "VideoCommon/Statistics.h" -#include "VideoCommon/VertexLoader.h" +#include "VideoCommon/VertexLoaderBase.h" #include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VertexShaderManager.h" #include "VideoCommon/VideoCommon.h" -static NativeVertexFormat* s_current_vtx_fmt; -namespace std -{ - -template <> -struct hash -{ - size_t operator()(const VertexLoaderUID& uid) const - { - return uid.GetHash(); - } -}; - -} - -typedef std::unordered_map> VertexLoaderMap; namespace VertexLoaderManager { +typedef std::unordered_map> NativeVertexFormatMap; +static NativeVertexFormatMap s_native_vertex_map; +static NativeVertexFormat* s_current_vtx_fmt; + +typedef std::unordered_map> VertexLoaderMap; static std::mutex s_vertex_loader_map_lock; static VertexLoaderMap s_vertex_loader_map; // TODO - change into array of pointers. Keep a map of all seen so far. @@ -60,7 +49,7 @@ void Shutdown() { std::lock_guard lk(s_vertex_loader_map_lock); s_vertex_loader_map.clear(); - VertexLoader::ClearNativeVertexFormatCache(); + s_native_vertex_map.clear(); } namespace @@ -86,7 +75,7 @@ void AppendListToString(std::string *dest) { entry e; map_entry.second->AppendToString(&e.text); - e.num_verts = map_entry.second->GetNumLoadedVerts(); + e.num_verts = map_entry.second->m_numLoadedVertices; entries.push_back(e); total_size += e.text.size() + 1; } @@ -104,9 +93,9 @@ void MarkAllDirty() g_preprocess_cp_state.attr_dirty = BitSet32::AllTrue(8); } -static VertexLoader* RefreshLoader(int vtx_attr_group, CPState* state) +static VertexLoaderBase* RefreshLoader(int vtx_attr_group, CPState* state) { - VertexLoader* loader; + VertexLoaderBase* loader; if (state->attr_dirty[vtx_attr_group]) { VertexLoaderUID uid(state->vtx_desc, state->vtx_attr[vtx_attr_group]); @@ -118,8 +107,21 @@ static VertexLoader* RefreshLoader(int vtx_attr_group, CPState* state) } else { - loader = new VertexLoader(state->vtx_desc, state->vtx_attr[vtx_attr_group]); - s_vertex_loader_map[uid] = std::unique_ptr(loader); + loader = VertexLoaderBase::CreateVertexLoader(state->vtx_desc, state->vtx_attr[vtx_attr_group]); + s_vertex_loader_map[uid] = std::unique_ptr(loader); + + // search for a cached native vertex format + const PortableVertexDeclaration& format = loader->m_native_vtx_decl; + auto& native = s_native_vertex_map[format]; + if (!native) + { + auto raw_pointer = g_vertex_manager->CreateNativeVertexFormat(); + native = std::unique_ptr(raw_pointer); + native->Initialize(format); + native->m_components = loader->m_native_components; + } + loader->m_native_vertex_format = native.get(); + INCSTAT(stats.numVertexLoaders); } state->vertex_loaders[vtx_attr_group] = loader; @@ -137,9 +139,9 @@ int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bo CPState* state = &g_main_cp_state; - VertexLoader* loader = RefreshLoader(vtx_attr_group, state); + VertexLoaderBase* loader = RefreshLoader(vtx_attr_group, state); - int size = count * loader->GetVertexSize(); + int size = count * loader->m_VertexSize; if ((int)src.size() < size) return -1; @@ -149,21 +151,19 @@ int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bo return size; } - NativeVertexFormat* native = loader->GetNativeVertexFormat(); - // If the native vertex format changed, force a flush. - if (native != s_current_vtx_fmt) + if (loader->m_native_vertex_format != s_current_vtx_fmt) VertexManager::Flush(); - s_current_vtx_fmt = native; + s_current_vtx_fmt = loader->m_native_vertex_format; DataReader dst = VertexManager::PrepareForAdditionalData(primitive, count, - loader->GetNativeVertexDeclaration().stride); + loader->m_native_vtx_decl.stride); - count = loader->RunVertices(state->vtx_attr[vtx_attr_group], primitive, count, src, dst); + count = loader->RunVertices(primitive, count, src, dst); IndexGenerator::AddIndices(primitive, count); - VertexManager::FlushData(count, loader->GetNativeVertexDeclaration().stride); + VertexManager::FlushData(count, loader->m_native_vtx_decl.stride); ADDSTAT(stats.thisFrame.numPrims, count); INCSTAT(stats.thisFrame.numPrimitiveJoins); @@ -172,7 +172,7 @@ int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bo int GetVertexSize(int vtx_attr_group, bool preprocess) { - return RefreshLoader(vtx_attr_group, preprocess ? &g_preprocess_cp_state : &g_main_cp_state)->GetVertexSize(); + return RefreshLoader(vtx_attr_group, preprocess ? &g_preprocess_cp_state : &g_main_cp_state)->m_VertexSize; } NativeVertexFormat* GetCurrentVertexFormat() diff --git a/Source/Core/VideoCommon/VertexLoader_Color.cpp b/Source/Core/VideoCommon/VertexLoader_Color.cpp index 0120e3bea5..58b1b96f31 100644 --- a/Source/Core/VideoCommon/VertexLoader_Color.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Color.cpp @@ -16,46 +16,46 @@ #define ASHIFT 24 #define AMASK 0xFF000000 -__forceinline void _SetCol(u32 val) +__forceinline void _SetCol(VertexLoader* loader, u32 val) { DataWrite(val); - colIndex++; + loader->m_colIndex++; } //color comes in format BARG in 16 bits //BARG -> AABBGGRR -__forceinline void _SetCol4444(u16 val) +__forceinline void _SetCol4444(VertexLoader* loader, u16 val) { u32 col = (val & 0xF0); // col = 000000R0; col |= (val & 0xF ) << 12; // col |= 0000G000; col |= (((u32)val) & 0xF000) << 8; // col |= 00B00000; col |= (((u32)val) & 0x0F00) << 20; // col |= A0000000; col |= col >> 4; // col = A0B0G0R0 | 0A0B0G0R; - _SetCol(col); + _SetCol(loader, col); } //color comes in format RGBA //RRRRRRGG GGGGBBBB BBAAAAAA -__forceinline void _SetCol6666(u32 val) +__forceinline void _SetCol6666(VertexLoader* loader, u32 val) { u32 col = (val >> 16) & 0xFC; col |= (val >> 2) & 0xFC00; col |= (val << 12) & 0xFC0000; col |= (val << 26) & 0xFC000000; col |= (col >> 6) & 0x03030303; - _SetCol(col); + _SetCol(loader, col); } //color comes in RGB //RRRRRGGG GGGBBBBB -__forceinline void _SetCol565(u16 val) +__forceinline void _SetCol565(VertexLoader* loader, u16 val) { u32 col = (val >> 8) & 0xF8; col |= (val << 5) & 0xFC00; col |=(((u32)val) << 19) & 0xF80000; col |= (col >> 5) & 0x070007; col |= (col >> 6) & 0x000300; - _SetCol(col | AMASK); + _SetCol(loader, col | AMASK); } __forceinline u32 _Read24(const u8 *addr) @@ -69,29 +69,29 @@ __forceinline u32 _Read32(const u8 *addr) } -void LOADERDECL Color_ReadDirect_24b_888() +void LOADERDECL Color_ReadDirect_24b_888(VertexLoader* loader) { - _SetCol(_Read24(DataGetPosition())); + _SetCol(loader, _Read24(DataGetPosition())); DataSkip(3); } -void LOADERDECL Color_ReadDirect_32b_888x() +void LOADERDECL Color_ReadDirect_32b_888x(VertexLoader* loader) { - _SetCol(_Read24(DataGetPosition())); + _SetCol(loader, _Read24(DataGetPosition())); DataSkip(4); } -void LOADERDECL Color_ReadDirect_16b_565() +void LOADERDECL Color_ReadDirect_16b_565(VertexLoader* loader) { - _SetCol565(DataReadU16()); + _SetCol565(loader, DataReadU16()); } -void LOADERDECL Color_ReadDirect_16b_4444() +void LOADERDECL Color_ReadDirect_16b_4444(VertexLoader* loader) { - _SetCol4444(*(u16*)DataGetPosition()); + _SetCol4444(loader, *(u16*)DataGetPosition()); DataSkip(2); } -void LOADERDECL Color_ReadDirect_24b_6666() +void LOADERDECL Color_ReadDirect_24b_6666(VertexLoader* loader) { - _SetCol6666(Common::swap32(DataGetPosition() - 1)); + _SetCol6666(loader, Common::swap32(DataGetPosition() - 1)); DataSkip(3); } // F|RES: i am not 100 percent sure, but the colElements seems to be important for rendering only @@ -101,77 +101,77 @@ void LOADERDECL Color_ReadDirect_24b_6666() // else // col |= 0xFF<m_colElements[loader->m_colIndex]) col |= 0xFF << ASHIFT; - _SetCol(col); + _SetCol(loader, col); } template -void Color_ReadIndex_16b_565() +void Color_ReadIndex_16b_565(VertexLoader* loader) { auto const Index = DataRead(); - u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]))); - _SetCol565(val); + u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+loader->m_colIndex]))); + _SetCol565(loader, val); } template -void Color_ReadIndex_24b_888() +void Color_ReadIndex_24b_888(VertexLoader* loader) { auto const Index = DataRead(); - const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]); - _SetCol(_Read24(iAddress)); + const u8 *iAddress = cached_arraybases[ARRAY_COLOR+loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+loader->m_colIndex]); + _SetCol(loader, _Read24(iAddress)); } template -void Color_ReadIndex_32b_888x() +void Color_ReadIndex_32b_888x(VertexLoader* loader) { auto const Index = DataRead(); - const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]); - _SetCol(_Read24(iAddress)); + const u8 *iAddress = cached_arraybases[ARRAY_COLOR+loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+loader->m_colIndex]); + _SetCol(loader, _Read24(iAddress)); } template -void Color_ReadIndex_16b_4444() +void Color_ReadIndex_16b_4444(VertexLoader* loader) { auto const Index = DataRead(); - u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex])); - _SetCol4444(val); + u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+loader->m_colIndex])); + _SetCol4444(loader, val); } template -void Color_ReadIndex_24b_6666() +void Color_ReadIndex_24b_6666(VertexLoader* loader) { auto const Index = DataRead(); - const u8* pData = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]) - 1; + const u8* pData = cached_arraybases[ARRAY_COLOR+loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+loader->m_colIndex]) - 1; u32 val = Common::swap32(pData); - _SetCol6666(val); + _SetCol6666(loader, val); } template -void Color_ReadIndex_32b_8888() +void Color_ReadIndex_32b_8888(VertexLoader* loader) { auto const Index = DataRead(); - const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+colIndex]); - _SetCol(_Read32(iAddress)); + const u8 *iAddress = cached_arraybases[ARRAY_COLOR+loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR+loader->m_colIndex]); + _SetCol(loader, _Read32(iAddress)); } -void LOADERDECL Color_ReadIndex8_16b_565() { Color_ReadIndex_16b_565(); } -void LOADERDECL Color_ReadIndex8_24b_888() { Color_ReadIndex_24b_888(); } -void LOADERDECL Color_ReadIndex8_32b_888x() { Color_ReadIndex_32b_888x(); } -void LOADERDECL Color_ReadIndex8_16b_4444() { Color_ReadIndex_16b_4444(); } -void LOADERDECL Color_ReadIndex8_24b_6666() { Color_ReadIndex_24b_6666(); } -void LOADERDECL Color_ReadIndex8_32b_8888() { Color_ReadIndex_32b_8888(); } +void LOADERDECL Color_ReadIndex8_16b_565(VertexLoader* loader) { Color_ReadIndex_16b_565(loader); } +void LOADERDECL Color_ReadIndex8_24b_888(VertexLoader* loader) { Color_ReadIndex_24b_888(loader); } +void LOADERDECL Color_ReadIndex8_32b_888x(VertexLoader* loader) { Color_ReadIndex_32b_888x(loader); } +void LOADERDECL Color_ReadIndex8_16b_4444(VertexLoader* loader) { Color_ReadIndex_16b_4444(loader); } +void LOADERDECL Color_ReadIndex8_24b_6666(VertexLoader* loader) { Color_ReadIndex_24b_6666(loader); } +void LOADERDECL Color_ReadIndex8_32b_8888(VertexLoader* loader) { Color_ReadIndex_32b_8888(loader); } -void LOADERDECL Color_ReadIndex16_16b_565() { Color_ReadIndex_16b_565(); } -void LOADERDECL Color_ReadIndex16_24b_888() { Color_ReadIndex_24b_888(); } -void LOADERDECL Color_ReadIndex16_32b_888x() { Color_ReadIndex_32b_888x(); } -void LOADERDECL Color_ReadIndex16_16b_4444() { Color_ReadIndex_16b_4444(); } -void LOADERDECL Color_ReadIndex16_24b_6666() { Color_ReadIndex_24b_6666(); } -void LOADERDECL Color_ReadIndex16_32b_8888() { Color_ReadIndex_32b_8888(); } +void LOADERDECL Color_ReadIndex16_16b_565(VertexLoader* loader) { Color_ReadIndex_16b_565(loader); } +void LOADERDECL Color_ReadIndex16_24b_888(VertexLoader* loader) { Color_ReadIndex_24b_888(loader); } +void LOADERDECL Color_ReadIndex16_32b_888x(VertexLoader* loader) { Color_ReadIndex_32b_888x(loader); } +void LOADERDECL Color_ReadIndex16_16b_4444(VertexLoader* loader) { Color_ReadIndex_16b_4444(loader); } +void LOADERDECL Color_ReadIndex16_24b_6666(VertexLoader* loader) { Color_ReadIndex_24b_6666(loader); } +void LOADERDECL Color_ReadIndex16_32b_8888(VertexLoader* loader) { Color_ReadIndex_32b_8888(loader); } diff --git a/Source/Core/VideoCommon/VertexLoader_Color.h b/Source/Core/VideoCommon/VertexLoader_Color.h index 90b267064e..b3bee00c13 100644 --- a/Source/Core/VideoCommon/VertexLoader_Color.h +++ b/Source/Core/VideoCommon/VertexLoader_Color.h @@ -6,23 +6,25 @@ #include "VideoCommon/NativeVertexFormat.h" -void LOADERDECL Color_ReadDirect_24b_888(); -void LOADERDECL Color_ReadDirect_32b_888x(); -void LOADERDECL Color_ReadDirect_16b_565(); -void LOADERDECL Color_ReadDirect_16b_4444(); -void LOADERDECL Color_ReadDirect_24b_6666(); -void LOADERDECL Color_ReadDirect_32b_8888(); +class VertexLoader; -void LOADERDECL Color_ReadIndex8_16b_565(); -void LOADERDECL Color_ReadIndex8_24b_888(); -void LOADERDECL Color_ReadIndex8_32b_888x(); -void LOADERDECL Color_ReadIndex8_16b_4444(); -void LOADERDECL Color_ReadIndex8_24b_6666(); -void LOADERDECL Color_ReadIndex8_32b_8888(); +void LOADERDECL Color_ReadDirect_24b_888(VertexLoader* loader); +void LOADERDECL Color_ReadDirect_32b_888x(VertexLoader* loader); +void LOADERDECL Color_ReadDirect_16b_565(VertexLoader* loader); +void LOADERDECL Color_ReadDirect_16b_4444(VertexLoader* loader); +void LOADERDECL Color_ReadDirect_24b_6666(VertexLoader* loader); +void LOADERDECL Color_ReadDirect_32b_8888(VertexLoader* loader); -void LOADERDECL Color_ReadIndex16_16b_565(); -void LOADERDECL Color_ReadIndex16_24b_888(); -void LOADERDECL Color_ReadIndex16_32b_888x(); -void LOADERDECL Color_ReadIndex16_16b_4444(); -void LOADERDECL Color_ReadIndex16_24b_6666(); -void LOADERDECL Color_ReadIndex16_32b_8888(); +void LOADERDECL Color_ReadIndex8_16b_565(VertexLoader* loader); +void LOADERDECL Color_ReadIndex8_24b_888(VertexLoader* loader); +void LOADERDECL Color_ReadIndex8_32b_888x(VertexLoader* loader); +void LOADERDECL Color_ReadIndex8_16b_4444(VertexLoader* loader); +void LOADERDECL Color_ReadIndex8_24b_6666(VertexLoader* loader); +void LOADERDECL Color_ReadIndex8_32b_8888(VertexLoader* loader); + +void LOADERDECL Color_ReadIndex16_16b_565(VertexLoader* loader); +void LOADERDECL Color_ReadIndex16_24b_888(VertexLoader* loader); +void LOADERDECL Color_ReadIndex16_32b_888x(VertexLoader* loader); +void LOADERDECL Color_ReadIndex16_16b_4444(VertexLoader* loader); +void LOADERDECL Color_ReadIndex16_24b_6666(VertexLoader* loader); +void LOADERDECL Color_ReadIndex16_32b_8888(VertexLoader* loader); diff --git a/Source/Core/VideoCommon/VertexLoader_Normal.cpp b/Source/Core/VideoCommon/VertexLoader_Normal.cpp index 1e428b66a2..70081ead11 100644 --- a/Source/Core/VideoCommon/VertexLoader_Normal.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Normal.cpp @@ -57,7 +57,7 @@ __forceinline void ReadIndirect(const T* data) template struct Normal_Direct { - static void LOADERDECL function() + static void LOADERDECL function(VertexLoader* loader) { auto const source = reinterpret_cast(DataGetPosition()); ReadIndirect(source); @@ -81,7 +81,7 @@ __forceinline void Normal_Index_Offset() template struct Normal_Index { - static void LOADERDECL function() + static void LOADERDECL function(VertexLoader* loader) { Normal_Index_Offset(); } @@ -92,7 +92,7 @@ struct Normal_Index template struct Normal_Index_Indices3 { - static void LOADERDECL function() + static void LOADERDECL function(VertexLoader* loader) { Normal_Index_Offset(); Normal_Index_Offset(); @@ -106,7 +106,7 @@ struct Normal_Index_Indices3 template struct Normal_Direct_SSSE3 { - static void LOADERDECL function() + static void LOADERDECL function(VertexLoader* loader) { const T* pData = reinterpret_cast(DataGetPosition()); const float frac = 1. / float(1u << (sizeof(T) * 8 - std::is_signed::value - 1)); @@ -136,7 +136,7 @@ __forceinline void Normal_Index_Offset_SSSE3() template struct Normal_Index_SSSE3 { - static void LOADERDECL function() + static void LOADERDECL function(VertexLoader* loader) { Normal_Index_Offset_SSSE3(); } @@ -147,7 +147,7 @@ struct Normal_Index_SSSE3 template struct Normal_Index_Indices3_SSSE3 { - static void LOADERDECL function() + static void LOADERDECL function(VertexLoader* loader) { Normal_Index_Offset_SSSE3(); Normal_Index_Offset_SSSE3(); diff --git a/Source/Core/VideoCommon/VertexLoader_Position.cpp b/Source/Core/VideoCommon/VertexLoader_Position.cpp index 60a6115ee0..419c041b5b 100644 --- a/Source/Core/VideoCommon/VertexLoader_Position.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Position.cpp @@ -71,10 +71,10 @@ float PosScale(float val, float scale) } template -void LOADERDECL Pos_ReadDirect() +void LOADERDECL Pos_ReadDirect(VertexLoader* loader) { static_assert(N <= 3, "N > 3 is not sane!"); - auto const scale = posScale[0]; + auto const scale = loader->m_posScale[0];; DataReader dst(g_vertex_manager_write_ptr, nullptr); DataReader src(g_video_buffer_read_ptr, nullptr); @@ -87,14 +87,15 @@ void LOADERDECL Pos_ReadDirect() } template -void LOADERDECL Pos_ReadIndex() +void LOADERDECL Pos_ReadIndex(VertexLoader* loader) { static_assert(std::is_unsigned::value, "Only unsigned I is sane!"); static_assert(N <= 3, "N > 3 is not sane!"); auto const index = DataRead(); + loader->m_vertexSkip = index == std::numeric_limits::max(); auto const data = reinterpret_cast(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION])); - auto const scale = posScale[0]; + auto const scale = loader->m_posScale[0]; DataReader dst(g_vertex_manager_write_ptr, nullptr); for (int i = 0; i < 3; ++i) @@ -106,21 +107,22 @@ void LOADERDECL Pos_ReadIndex() #if _M_SSE >= 0x301 template -void LOADERDECL Pos_ReadDirect_SSSE3() +void LOADERDECL Pos_ReadDirect_SSSE3(VertexLoader* loader) { const T* pData = reinterpret_cast(DataGetPosition()); - Vertex_Read_SSSE3(pData, *(__m128*)posScale); + Vertex_Read_SSSE3(pData, *(__m128*)loader->m_posScale); DataSkip<(2 + three) * sizeof(T)>(); LOG_VTX(); } template -void LOADERDECL Pos_ReadIndex_SSSE3() +void LOADERDECL Pos_ReadIndex_SSSE3(VertexLoader* loader) { static_assert(std::is_unsigned::value, "Only unsigned I is sane!"); auto const index = DataRead(); + loader->m_vertexSkip = index == std::numeric_limits::max(); const T* pData = (const T*)(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION])); - Vertex_Read_SSSE3(pData, *(__m128*)posScale); + Vertex_Read_SSSE3(pData, *(__m128*)loader->m_posScale); LOG_VTX(); } #endif diff --git a/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp b/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp index 002ec13e55..4a858c84f9 100644 --- a/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp +++ b/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp @@ -29,9 +29,9 @@ __forceinline void LOG_TEX<2>() // PRIM_LOG("tex: %f %f, ", ((float*)g_vertex_manager_write_ptr)[-2], ((float*)g_vertex_manager_write_ptr)[-1]); } -static void LOADERDECL TexCoord_Read_Dummy() +static void LOADERDECL TexCoord_Read_Dummy(VertexLoader* loader) { - tcIndex++; + loader->m_tcIndex++; } template @@ -47,9 +47,9 @@ float TCScale(float val, float scale) } template -void LOADERDECL TexCoord_ReadDirect() +void LOADERDECL TexCoord_ReadDirect(VertexLoader* loader) { - auto const scale = tcScale[tcIndex][0]; + auto const scale = loader->m_tcScale[loader->m_tcIndex][0]; DataReader dst(g_vertex_manager_write_ptr, nullptr); DataReader src(g_video_buffer_read_ptr, nullptr); @@ -60,18 +60,18 @@ void LOADERDECL TexCoord_ReadDirect() src.WritePointer(&g_video_buffer_read_ptr); LOG_TEX(); - ++tcIndex; + ++loader->m_tcIndex; } template -void LOADERDECL TexCoord_ReadIndex() +void LOADERDECL TexCoord_ReadIndex(VertexLoader* loader) { static_assert(std::is_unsigned::value, "Only unsigned I is sane!"); auto const index = DataRead(); - auto const data = reinterpret_cast(cached_arraybases[ARRAY_TEXCOORD0 + tcIndex] - + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + tcIndex])); - auto const scale = tcScale[tcIndex][0]; + auto const data = reinterpret_cast(cached_arraybases[ARRAY_TEXCOORD0 + loader->m_tcIndex] + + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + loader->m_tcIndex])); + auto const scale = loader->m_tcScale[loader->m_tcIndex][0]; DataReader dst(g_vertex_manager_write_ptr, nullptr); for (int i = 0; i != N; ++i) @@ -79,32 +79,32 @@ void LOADERDECL TexCoord_ReadIndex() dst.WritePointer(&g_vertex_manager_write_ptr); LOG_TEX(); - ++tcIndex; + ++loader->m_tcIndex; } #if _M_SSE >= 0x301 template -void LOADERDECL TexCoord_ReadDirect2_SSSE3() +void LOADERDECL TexCoord_ReadDirect2_SSSE3(VertexLoader* loader) { const T* pData = reinterpret_cast(DataGetPosition()); - __m128 scale = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)tcScale[tcIndex])); + __m128 scale = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)loader->m_tcScale[loader->m_tcIndex])); Vertex_Read_SSSE3(pData, scale); DataSkip<2 * sizeof(T)>(); LOG_TEX<2>(); - tcIndex++; + loader->m_tcIndex++; } template -void LOADERDECL TexCoord_ReadIndex2_SSSE3() +void LOADERDECL TexCoord_ReadIndex2_SSSE3(VertexLoader* loader) { static_assert(std::is_unsigned::value, "Only unsigned I is sane!"); auto const index = DataRead(); - const T* pData = (const T*)(cached_arraybases[ARRAY_TEXCOORD0 + tcIndex] + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + tcIndex])); - __m128 scale = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)tcScale[tcIndex])); + const T* pData = (const T*)(cached_arraybases[ARRAY_TEXCOORD0 + loader->m_tcIndex] + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + loader->m_tcIndex])); + __m128 scale = _mm_castsi128_ps(_mm_loadl_epi64((__m128i*)loader->m_tcScale[loader->m_tcIndex])); Vertex_Read_SSSE3(pData, scale); LOG_TEX<2>(); - tcIndex++; + loader->m_tcIndex++; } #endif diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj b/Source/Core/VideoCommon/VideoCommon.vcxproj index 65488aa4fa..b4db374e28 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj @@ -65,6 +65,7 @@ + @@ -118,6 +119,7 @@ + diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters index cd4901f303..d00ee5518c 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters @@ -119,6 +119,9 @@ Vertex Loading + + Vertex Loading + Vertex Loading @@ -263,6 +266,9 @@ Vertex Loading + + Vertex Loading + Vertex Loading diff --git a/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp b/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp index 45d8a23ab1..601e90dcc2 100644 --- a/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp +++ b/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp @@ -1,8 +1,8 @@ -#include +#include #include "Common/Common.h" #include "VideoCommon/DataReader.h" -#include "VideoCommon/VertexLoader.h" +#include "VideoCommon/VertexLoaderBase.h" // Needs to be included later because it defines a TEST macro that conflicts // with a TEST method definition in x64Emitter.h. @@ -10,7 +10,7 @@ TEST(VertexLoaderUID, UniqueEnough) { - std::set uids; + std::unordered_set uids; TVtxDesc vtx_desc; memset(&vtx_desc, 0, sizeof (vtx_desc)); @@ -93,10 +93,10 @@ TEST_F(VertexLoaderTest, PositionDirectFloatXYZ) m_vtx_attr.g0.PosElements = 1; // XYZ m_vtx_attr.g0.PosFormat = 4; // Float - VertexLoader loader(m_vtx_desc, m_vtx_attr); + VertexLoaderBase* loader = VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr); - ASSERT_EQ(3 * sizeof (float), (u32)loader.GetNativeVertexDeclaration().stride); - ASSERT_EQ(3 * sizeof (float), (u32)loader.GetVertexSize()); + ASSERT_EQ(3 * sizeof (float), (u32)loader->m_native_vtx_decl.stride); + ASSERT_EQ(3 * sizeof (float), (u32)loader->m_VertexSize); // Write some vertices. Input(0.0f); Input(0.0f); Input(0.0f); @@ -105,9 +105,10 @@ TEST_F(VertexLoaderTest, PositionDirectFloatXYZ) Input(0.0f); Input(0.0f); Input(1.0f); // Convert 4 points. "7" -> primitive are points. - int count = loader.RunVertices(m_vtx_attr, 7, 4, src, dst); - src.Skip(4 * loader.GetVertexSize()); - dst.Skip(count * loader.GetNativeVertexDeclaration().stride); + int count = loader->RunVertices(7, 4, src, dst); + src.Skip(4 * loader->m_VertexSize); + dst.Skip(count * loader->m_native_vtx_decl.stride); + delete loader; ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(1.0f); ExpectOut(0.0f); ExpectOut(0.0f); @@ -117,10 +118,12 @@ TEST_F(VertexLoaderTest, PositionDirectFloatXYZ) // Test that scale does nothing for floating point inputs. Input(1.0f); Input(2.0f); Input(4.0f); m_vtx_attr.g0.PosFrac = 1; - count = loader.RunVertices(m_vtx_attr, 7, 1, src, dst); - src.Skip(1 * loader.GetVertexSize()); - dst.Skip(count * loader.GetNativeVertexDeclaration().stride); + loader = VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr); + count = loader->RunVertices(7, 1, src, dst); + src.Skip(1 * loader->m_VertexSize); + dst.Skip(count * loader->m_native_vtx_decl.stride); ExpectOut(1.0f); ExpectOut(2.0f); ExpectOut(4.0f); + delete loader; } TEST_F(VertexLoaderTest, PositionDirectU16XY) @@ -129,10 +132,10 @@ TEST_F(VertexLoaderTest, PositionDirectU16XY) m_vtx_attr.g0.PosElements = 0; // XY m_vtx_attr.g0.PosFormat = 2; // U16 - VertexLoader loader(m_vtx_desc, m_vtx_attr); + VertexLoaderBase* loader = VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr); - ASSERT_EQ(3 * sizeof (float), (u32)loader.GetNativeVertexDeclaration().stride); - ASSERT_EQ(2 * sizeof (u16), (u32)loader.GetVertexSize()); + ASSERT_EQ(3 * sizeof (float), (u32)loader->m_native_vtx_decl.stride); + ASSERT_EQ(2 * sizeof (u16), (u32)loader->m_VertexSize); // Write some vertices. Input(0); Input(0); @@ -142,9 +145,10 @@ TEST_F(VertexLoaderTest, PositionDirectU16XY) Input(12345); Input(54321); // Convert 5 points. "7" -> primitive are points. - int count = loader.RunVertices(m_vtx_attr, 7, 5, src, dst); - src.Skip(5 * loader.GetVertexSize()); - dst.Skip(count * loader.GetNativeVertexDeclaration().stride); + int count = loader->RunVertices(7, 5, src, dst); + src.Skip(5 * loader->m_VertexSize); + dst.Skip(count * loader->m_native_vtx_decl.stride); + delete loader; ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(0.0f); ExpectOut(1.0f); ExpectOut(2.0f); ExpectOut(0.0f); @@ -155,10 +159,12 @@ TEST_F(VertexLoaderTest, PositionDirectU16XY) // Test that scale works on U16 inputs. Input(42); Input(24); m_vtx_attr.g0.PosFrac = 1; - count = loader.RunVertices(m_vtx_attr, 7, 1, src, dst); - src.Skip(1 * loader.GetVertexSize()); - dst.Skip(count * loader.GetNativeVertexDeclaration().stride); + loader = VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr); + count = loader->RunVertices(7, 1, src, dst); + src.Skip(1 * loader->m_VertexSize); + dst.Skip(count * loader->m_native_vtx_decl.stride); ExpectOut(21.0f); ExpectOut(12.0f); ExpectOut(0.0f); + delete loader; } TEST_F(VertexLoaderTest, PositionDirectFloatXYZSpeed) @@ -167,18 +173,19 @@ TEST_F(VertexLoaderTest, PositionDirectFloatXYZSpeed) m_vtx_attr.g0.PosElements = 1; // XYZ m_vtx_attr.g0.PosFormat = 4; // Float - VertexLoader loader(m_vtx_desc, m_vtx_attr); + VertexLoaderBase* loader = VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr); - ASSERT_EQ(3 * sizeof (float), (u32)loader.GetNativeVertexDeclaration().stride); - ASSERT_EQ(3 * sizeof (float), (u32)loader.GetVertexSize()); + ASSERT_EQ(3 * sizeof (float), (u32)loader->m_native_vtx_decl.stride); + ASSERT_EQ(3 * sizeof (float), (u32)loader->m_VertexSize); for (int i = 0; i < 1000; ++i) { ResetPointers(); - int count = loader.RunVertices(m_vtx_attr, 7, 100000, src, dst); - src.Skip(100000 * loader.GetVertexSize()); - dst.Skip(count * loader.GetNativeVertexDeclaration().stride); + int count = loader->RunVertices(7, 100000, src, dst); + src.Skip(100000 * loader->m_VertexSize); + dst.Skip(count * loader->m_native_vtx_decl.stride); } + delete loader; } TEST_F(VertexLoaderTest, PositionDirectU16XYSpeed) @@ -187,18 +194,19 @@ TEST_F(VertexLoaderTest, PositionDirectU16XYSpeed) m_vtx_attr.g0.PosElements = 0; // XY m_vtx_attr.g0.PosFormat = 2; // U16 - VertexLoader loader(m_vtx_desc, m_vtx_attr); + VertexLoaderBase* loader = VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr); - ASSERT_EQ(3 * sizeof (float), (u32)loader.GetNativeVertexDeclaration().stride); - ASSERT_EQ(2 * sizeof (u16), (u32)loader.GetVertexSize()); + ASSERT_EQ(3 * sizeof (float), (u32)loader->m_native_vtx_decl.stride); + ASSERT_EQ(2 * sizeof (u16), (u32)loader->m_VertexSize); for (int i = 0; i < 1000; ++i) { ResetPointers(); - int count = loader.RunVertices(m_vtx_attr, 7, 100000, src, dst); - src.Skip(100000 * loader.GetVertexSize()); - dst.Skip(count * loader.GetNativeVertexDeclaration().stride); + int count = loader->RunVertices(7, 100000, src, dst); + src.Skip(100000 * loader->m_VertexSize); + dst.Skip(count * loader->m_native_vtx_decl.stride); } + delete loader; } TEST_F(VertexLoaderTest, LargeFloatVertexSpeed) @@ -251,15 +259,16 @@ TEST_F(VertexLoaderTest, LargeFloatVertexSpeed) m_vtx_attr.g2.Tex7CoordElements = 1; // ST m_vtx_attr.g2.Tex7CoordFormat = 4; // Float - VertexLoader loader(m_vtx_desc, m_vtx_attr); + VertexLoaderBase* loader = VertexLoaderBase::CreateVertexLoader(m_vtx_desc, m_vtx_attr); // This test is only done 100x in a row since it's ~20x slower using the // current vertex loader implementation. for (int i = 0; i < 100; ++i) { ResetPointers(); - int count = loader.RunVertices(m_vtx_attr, 7, 100000, src, dst); - src.Skip(100000 * loader.GetVertexSize()); - dst.Skip(count * loader.GetNativeVertexDeclaration().stride); + int count = loader->RunVertices(7, 100000, src, dst); + src.Skip(100000 * loader->m_VertexSize); + dst.Skip(count * loader->m_native_vtx_decl.stride); } + delete loader; }