Merge branch 'vertexloader_cleanup' into vertex-loader-cleanup

Conflicts:
	Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp
This commit is contained in:
degasus 2013-02-21 12:18:50 +01:00
commit a849bb5242
6 changed files with 73 additions and 45 deletions

View File

@ -550,8 +550,7 @@ void CompileAndRunDisplayList(u32 address, u32 size, CachedDisplayList *dl)
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7) cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
numVertices); numVertices);
u8* EndAddress = VertexManager::s_pCurBufferPointer; u32 Vdatasize = (u32)(VertexManager::s_pCurBufferPointer - StartAddress);
u32 Vdatasize = (u32)(EndAddress - StartAddress);
if (Vdatasize > 0) if (Vdatasize > 0)
{ {
// Compile // Compile

View File

@ -72,6 +72,10 @@ int colElements[2];
float posScale; float posScale;
float tcScale[8]; float tcScale[8];
// bbox must read vertex position, so convert it to this buffer
static float s_bbox_vertex_buffer[3];
static u8 *s_bbox_pCurBufferPointer_orig;
static const float fractionTable[32] = { static const float fractionTable[32] = {
1.0f / (1U << 0), 1.0f / (1U << 1), 1.0f / (1U << 2), 1.0f / (1U << 3), 1.0f / (1U << 0), 1.0f / (1U << 1), 1.0f / (1U << 2), 1.0f / (1U << 3),
1.0f / (1U << 4), 1.0f / (1U << 5), 1.0f / (1U << 6), 1.0f / (1U << 7), 1.0f / (1U << 4), 1.0f / (1U << 5), 1.0f / (1U << 6), 1.0f / (1U << 7),
@ -99,17 +103,32 @@ void LOADERDECL PosMtx_Write()
*VertexManager::s_pCurBufferPointer++ = 0; *VertexManager::s_pCurBufferPointer++ = 0;
} }
void LOADERDECL UpdateBoundingBoxPrepare()
{
if (!PixelEngine::bbox_active)
return;
// set our buffer as videodata buffer, so we will get a copy of the vertex positions
// this is a big hack, but so we can use the same converting function then without bbox
s_bbox_pCurBufferPointer_orig = VertexManager::s_pCurBufferPointer;
VertexManager::s_pCurBufferPointer = (u8*)s_bbox_vertex_buffer;
}
void LOADERDECL UpdateBoundingBox() void LOADERDECL UpdateBoundingBox()
{ {
if (!PixelEngine::bbox_active) if (!PixelEngine::bbox_active)
return; return;
// reset videodata pointer
VertexManager::s_pCurBufferPointer = s_bbox_pCurBufferPointer_orig;
// copy vertex pointers
memcpy(VertexManager::s_pCurBufferPointer, s_bbox_vertex_buffer, 12);
VertexManager::s_pCurBufferPointer += 12;
// Truly evil hack, reading backwards from the write pointer. If we were writing to write-only
// memory like we might have been with a D3D vertex buffer, this would have been a bad idea.
float *data = (float *)(VertexManager::s_pCurBufferPointer - 12);
// We must transform the just loaded point by the current world and projection matrix - in software. // We must transform the just loaded point by the current world and projection matrix - in software.
// Then convert to screen space and update the bounding box. // Then convert to screen space and update the bounding box.
float p[3] = {data[0], data[1], data[2]}; float p[3] = {s_bbox_vertex_buffer[0], s_bbox_vertex_buffer[1], s_bbox_vertex_buffer[2]};
const float *world_matrix = (float*)xfmem + MatrixIndexA.PosNormalMtxIdx * 4; const float *world_matrix = (float*)xfmem + MatrixIndexA.PosNormalMtxIdx * 4;
const float *proj_matrix = &g_fProjectionMatrix[0]; const float *proj_matrix = &g_fProjectionMatrix[0];
@ -267,15 +286,16 @@ void VertexLoader::CompileVertexTranslator()
if (m_VtxDesc.Tex7MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX7; WriteCall(TexMtx_ReadDirect_UByte); } if (m_VtxDesc.Tex7MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX7; WriteCall(TexMtx_ReadDirect_UByte); }
// Write vertex position loader // Write vertex position loader
WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements)); if(g_ActiveConfig.bUseBBox) {
WriteCall(UpdateBoundingBoxPrepare);
WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements));
WriteCall(UpdateBoundingBox);
} else {
WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements));
}
m_VertexSize += VertexLoader_Position::GetSize(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements); m_VertexSize += VertexLoader_Position::GetSize(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements);
nat_offset += 12; nat_offset += 12;
// OK, so we just got a point. Let's go back and read it for the bounding box.
if(g_ActiveConfig.bUseBBox)
WriteCall(UpdateBoundingBox);
// Normals // Normals
vtx_decl.num_normals = 0; vtx_decl.num_normals = 0;
if (m_VtxDesc.Normal != NOT_PRESENT) if (m_VtxDesc.Normal != NOT_PRESENT)
@ -571,7 +591,6 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
if (remainingVerts < granularity) { if (remainingVerts < granularity) {
INCSTAT(stats.thisFrame.numBufferSplits); INCSTAT(stats.thisFrame.numBufferSplits);
// This buffer full - break current primitive and flush, to switch to the next buffer. // This buffer full - break current primitive and flush, to switch to the next buffer.
u8* plastptr = VertexManager::s_pCurBufferPointer;
if (v - startv > 0) if (v - startv > 0)
VertexManager::AddVertices(primitive, v - startv + extraverts); VertexManager::AddVertices(primitive, v - startv + extraverts);
VertexManager::Flush(); VertexManager::Flush();
@ -581,27 +600,28 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
case 3: // triangle strip, copy last two vertices case 3: // triangle strip, copy last two vertices
// a little trick since we have to keep track of signs // a little trick since we have to keep track of signs
if (v & 1) { if (v & 1) {
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-2*native_stride, native_stride); g_pVideoData -= m_VertexSize*2;
memcpy_gc(VertexManager::s_pCurBufferPointer+native_stride, plastptr-native_stride*2, 2*native_stride); ConvertVertices(1);
VertexManager::s_pCurBufferPointer += native_stride*3; g_pVideoData -= m_VertexSize;
ConvertVertices(2);
extraverts = 3; extraverts = 3;
} }
else { else {
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*2, native_stride*2); g_pVideoData -= m_VertexSize*2;
VertexManager::s_pCurBufferPointer += native_stride*2; ConvertVertices(2);
extraverts = 2; extraverts = 2;
} }
break; break;
case 4: // tri fan, copy first and last vert case 4: // tri fan, copy first and last vert
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*(v-startv+extraverts), native_stride); g_pVideoData -= m_VertexSize*(v-startv+extraverts);
VertexManager::s_pCurBufferPointer += native_stride; ConvertVertices(1);
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride); g_pVideoData += m_VertexSize*(v-startv+extraverts-2);
VertexManager::s_pCurBufferPointer += native_stride; ConvertVertices(1);
extraverts = 2; extraverts = 2;
break; break;
case 6: // line strip case 6: // line strip
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride); g_pVideoData -= m_VertexSize*1;
VertexManager::s_pCurBufferPointer += native_stride; ConvertVertices(1);
extraverts = 1; extraverts = 1;
break; break;
default: default:
@ -615,22 +635,8 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
if (count - v < remainingVerts) if (count - v < remainingVerts)
remainingVerts = count - v; remainingVerts = count - v;
#ifdef USE_JIT ConvertVertices(remainingVerts);
if (remainingVerts > 0) {
loop_counter = remainingVerts;
((void (*)())(void*)m_compiledCode)();
}
#else
for (int s = 0; s < remainingVerts; s++)
{
tcIndex = 0;
colIndex = 0;
s_texmtxwrite = s_texmtxread = 0;
for (int i = 0; i < m_numPipelineStages; i++)
m_PipelineStages[i]();
PRIM_LOG("\n");
}
#endif
v += remainingVerts; v += remainingVerts;
} }
@ -639,6 +645,27 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
} }
void VertexLoader::ConvertVertices ( int count )
{
#ifdef USE_JIT
if (count > 0) {
loop_counter = count;
((void (*)())(void*)m_compiledCode)();
}
#else
for (int s = 0; s < count; s++)
{
tcIndex = 0;
colIndex = 0;
s_texmtxwrite = s_texmtxread = 0;
for (int i = 0; i < m_numPipelineStages; i++)
m_PipelineStages[i]();
PRIM_LOG("\n");
}
#endif
}
void VertexLoader::RunCompiledVertices(int vtx_attr_group, int primitive, int count, u8* Data) void VertexLoader::RunCompiledVertices(int vtx_attr_group, int primitive, int count, u8* Data)

View File

@ -119,6 +119,7 @@ private:
void SetVAT(u32 _group0, u32 _group1, u32 _group2); void SetVAT(u32 _group0, u32 _group1, u32 _group2);
void CompileVertexTranslator(); void CompileVertexTranslator();
void ConvertVertices(int count);
void WriteCall(TPipelineFunction); void WriteCall(TPipelineFunction);

View File

@ -31,6 +31,7 @@
#include <tmmintrin.h> #include <tmmintrin.h>
#endif #endif
// warning: mapping buffer should be disabled to use this
#define LOG_NORM() // PRIM_LOG("norm: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]); #define LOG_NORM() // PRIM_LOG("norm: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]);
VertexLoader_Normal::Set VertexLoader_Normal::m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT]; VertexLoader_Normal::Set VertexLoader_Normal::m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT];

View File

@ -34,12 +34,14 @@ void LOG_TEX();
template <> template <>
__forceinline void LOG_TEX<1>() __forceinline void LOG_TEX<1>()
{ {
// warning: mapping buffer should be disabled to use this
// PRIM_LOG("tex: %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0]); // PRIM_LOG("tex: %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0]);
} }
template <> template <>
__forceinline void LOG_TEX<2>() __forceinline void LOG_TEX<2>()
{ {
// warning: mapping buffer should be disabled to use this
// PRIM_LOG("tex: %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1]); // PRIM_LOG("tex: %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1]);
} }
@ -125,11 +127,9 @@ void LOADERDECL TexCoord_ReadIndex16_Float2_SSSE3()
const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
GC_ALIGNED128(const __m128i a = _mm_loadl_epi64((__m128i*)pData)); GC_ALIGNED128(const __m128i a = _mm_loadl_epi64((__m128i*)pData));
GC_ALIGNED128(const __m128i b = _mm_shuffle_epi8(a, kMaskSwap32)); GC_ALIGNED128(const __m128i b = _mm_shuffle_epi8(a, kMaskSwap32));
u8* p = VertexManager::s_pCurBufferPointer; _mm_storel_epi64((__m128i*)VertexManager::s_pCurBufferPointer, b);
_mm_storel_epi64((__m128i*)p, b);
LOG_TEX2(); LOG_TEX2();
p += 8; VertexManager::s_pCurBufferPointer += 8;
VertexManager::s_pCurBufferPointer = p;
tcIndex++; tcIndex++;
} }
#endif #endif

View File

@ -90,7 +90,7 @@ struct TargetRectangle : public MathUtil::Rectangle<int>
#define PRIM_LOG(...) DEBUG_LOG(VIDEO, ##__VA_ARGS__) #define PRIM_LOG(...) DEBUG_LOG(VIDEO, ##__VA_ARGS__)
#endif #endif
// warning: mapping buffer should be disabled to use this
// #define LOG_VTX() DEBUG_LOG(VIDEO, "vtx: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1], ((float*)VertexManager::s_pCurBufferPointer)[2]); // #define LOG_VTX() DEBUG_LOG(VIDEO, "vtx: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1], ((float*)VertexManager::s_pCurBufferPointer)[2]);
#define LOG_VTX() #define LOG_VTX()