Put the vertex loop inside the JITted code in the vertex loader. Small (yeah, small) speedup.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1296 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2008-11-25 21:57:02 +00:00
parent 30a5c91c99
commit 72af432491
2 changed files with 31 additions and 13 deletions

View File

@ -38,6 +38,7 @@
NativeVertexFormat *g_nativeVertexFmt; NativeVertexFormat *g_nativeVertexFmt;
//these don't need to be saved //these don't need to be saved
static float posScale; static float posScale;
static int colElements[2]; static int colElements[2];
@ -57,6 +58,7 @@ static u8 s_curtexmtx[8];
static int s_texmtxwrite = 0; static int s_texmtxwrite = 0;
static int s_texmtxread = 0; static int s_texmtxread = 0;
static TVtxAttr* pVtxAttr; static TVtxAttr* pVtxAttr;
static int loop_counter;
using namespace Gen; using namespace Gen;
@ -107,7 +109,7 @@ void LOADERDECL TexMtx_Write_Short3()
#include "VertexLoader_Color.h" #include "VertexLoader_Color.h"
#include "VertexLoader_TextCoord.h" #include "VertexLoader_TextCoord.h"
#define COMPILED_CODE_SIZE 4096 #define COMPILED_CODE_SIZE 4096*4
VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr) VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr)
{ {
@ -115,6 +117,7 @@ VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr)
m_VertexSize = 0; m_VertexSize = 0;
m_numPipelineStages = 0; m_numPipelineStages = 0;
m_NativeFmt = new NativeVertexFormat(); m_NativeFmt = new NativeVertexFormat();
loop_counter = 0;
VertexLoader_Normal::Init(); VertexLoader_Normal::Init();
m_VtxDesc = vtx_desc; m_VtxDesc = vtx_desc;
@ -143,10 +146,21 @@ void VertexLoader::CompileVertexTranslator()
ABI_EmitPrologue(4); ABI_EmitPrologue(4);
// Start loop here // Start loop here
MOV(32, M(&tcIndex), Imm32(0)); const u8 *loop_start = GetCodePtr();
MOV(32, M(&colIndex), Imm32(0));
MOV(32, M(&s_texmtxwrite), Imm32(0)); // Reset component counters if present in vertex format only.
MOV(32, M(&s_texmtxread), Imm32(0)); if (m_VtxDesc.Tex0Coord || m_VtxDesc.Tex1Coord || m_VtxDesc.Tex2Coord || m_VtxDesc.Tex3Coord ||
m_VtxDesc.Tex4Coord || m_VtxDesc.Tex5Coord || m_VtxDesc.Tex6Coord || m_VtxDesc.Tex7Coord) {
MOV(32, M(&tcIndex), Imm32(0));
}
if (m_VtxDesc.Color0 || m_VtxDesc.Color1) {
MOV(32, M(&colIndex), Imm32(0));
}
if (m_VtxDesc.Tex0MatIdx || m_VtxDesc.Tex1MatIdx || m_VtxDesc.Tex2MatIdx || m_VtxDesc.Tex3MatIdx ||
m_VtxDesc.Tex4MatIdx || m_VtxDesc.Tex5MatIdx || m_VtxDesc.Tex6MatIdx || m_VtxDesc.Tex7MatIdx) {
MOV(32, M(&s_texmtxwrite), Imm32(0));
MOV(32, M(&s_texmtxread), Imm32(0));
}
#endif #endif
// Colors // Colors
@ -460,13 +474,14 @@ void VertexLoader::CompileVertexTranslator()
vtx_decl.stride = native_stride; vtx_decl.stride = native_stride;
if (vtx_decl.stride != offset) if (vtx_decl.stride != offset)
PanicAlert("offset/stride mismatch, %i %i", vtx_decl.stride, offset); PanicAlert("offset/stride mismatch, %i %i", vtx_decl.stride, offset);
#ifdef USE_JIT #ifdef USE_JIT
// End loop here // End loop here
// SUB(32, M(&vtxCounter), Imm8(1)); SUB(32, M(&loop_counter), Imm8(1));
// J_CC(CC_NZ, loop); J_CC(CC_NZ, loop_start, true);
ABI_EmitEpilogue(4); ABI_EmitEpilogue(4);
#endif
SetCodePtr(old_code_ptr); SetCodePtr(old_code_ptr);
#endif
m_NativeFmt->Initialize(vtx_decl); m_NativeFmt->Initialize(vtx_decl);
} }
@ -692,19 +707,22 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
remainingVerts = count - v; remainingVerts = count - v;
// Clean tight loader loop. Todo - build the loop into the JIT code. // Clean tight loader loop. Todo - build the loop into the JIT code.
#ifdef USE_JIT
if (remainingVerts > 0) {
loop_counter = remainingVerts;
((void (*)())(void*)m_compiledCode)();
}
#else
for (int s = 0; s < remainingVerts; s++) for (int s = 0; s < remainingVerts; s++)
{ {
#ifdef USE_JIT
((void (*)())(void*)m_compiledCode)();
#else
tcIndex = 0; tcIndex = 0;
colIndex = 0; colIndex = 0;
s_texmtxwrite = s_texmtxread = 0; s_texmtxwrite = s_texmtxread = 0;
for (int i = 0; i < m_numPipelineStages; i++) for (int i = 0; i < m_numPipelineStages; i++)
m_PipelineStages[i](); m_PipelineStages[i]();
#endif
PRIM_LOG("\n"); PRIM_LOG("\n");
} }
#endif
v += remainingVerts; v += remainingVerts;
} }

View File

@ -83,7 +83,7 @@ private:
int native_stride; int native_stride;
// Pipeline. To be JIT compiled in the future. // Pipeline. To be JIT compiled in the future.
TPipelineFunction m_PipelineStages[32]; // TODO - figure out real max. it's lower. TPipelineFunction m_PipelineStages[64]; // TODO - figure out real max. it's lower.
int m_numPipelineStages; int m_numPipelineStages;
u8 *m_compiledCode; u8 *m_compiledCode;