some cleaning in dlist cache, runvertices and runcompiledvertices

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@7246 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
Rodolfo Osvaldo Bogado 2011-02-25 20:27:57 +00:00
parent c40ca6553f
commit ac6c18a5e4
2 changed files with 33 additions and 150 deletions

View File

@ -41,7 +41,7 @@
#include "VideoConfig.h" #include "VideoConfig.h"
#define DL_CODE_CACHE_SIZE (1024*1024*16) #define DL_CODE_CACHE_SIZE (1024*1024*16)
#define DL_CODE_CLEAR_THRESHOLD (128 * 1024) #define DL_CODE_CLEAR_THRESHOLD (16 * 1024)
extern int frameCount; extern int frameCount;
static u32 CheckContextId; static u32 CheckContextId;
using namespace Gen; using namespace Gen;
@ -64,17 +64,13 @@ struct ReferencedDataRegion
start_address(NULL), start_address(NULL),
NextRegion(NULL), NextRegion(NULL),
size(0), size(0),
MustClean(0), MustClean(0)
ReferencedArray(0xFFFFFFFF),
ReferencedArrayStride(0)
{} {}
u64 hash; u64 hash;
u8* start_address; u8* start_address;
ReferencedDataRegion* NextRegion; ReferencedDataRegion* NextRegion;
u32 size; u32 size;
u32 MustClean; u32 MustClean;
u32 ReferencedArray;
u32 ReferencedArrayStride;
int IntersectsMemoryRange(u8* range_address, u32 range_size) int IntersectsMemoryRange(u8* range_address, u32 range_size)
@ -104,8 +100,10 @@ struct CachedDisplayList
frame_count = frameCount; frame_count = frameCount;
} }
u64 dl_hash; u64 dl_hash;
// ... Something containing cached vertex buffers here ...
ReferencedDataRegion* Regions; ReferencedDataRegion* Regions;
ReferencedDataRegion* LastRegion; ReferencedDataRegion* LastRegion;
// Compile the commands themselves down to native code.
const u8* compiled_code; const u8* compiled_code;
u32 uncachable; // if set, this DL will always be interpreted. This gets set if hash ever changes. u32 uncachable; // if set, this DL will always be interpreted. This gets set if hash ever changes.
// Analitic data // Analitic data
@ -114,22 +112,11 @@ struct CachedDisplayList
u32 num_bp_reg; u32 num_bp_reg;
u32 num_index_xf; u32 num_index_xf;
u32 num_draw_call; u32 num_draw_call;
u32 pass; u32 pass;
u32 check; u32 check;
int frame_count; int frame_count;
// ... Something containing cached vertex buffers here ...
u32 BufferCount; u32 BufferCount;
// Compile the commands themselves down to native code.
void InsertRegion(ReferencedDataRegion* NewRegion) void InsertRegion(ReferencedDataRegion* NewRegion)
{ {
if(LastRegion) if(LastRegion)
@ -144,7 +131,7 @@ struct CachedDisplayList
BufferCount++; BufferCount++;
} }
void InsertOverlapingRegion(u8* RegionStartAddress, u32 Size,u32 referencedArray,u32 referencedArrayStride) void InsertOverlapingRegion(u8* RegionStartAddress, u32 Size)
{ {
ReferencedDataRegion* NewRegion = FindOverlapingRegion(RegionStartAddress, Size); ReferencedDataRegion* NewRegion = FindOverlapingRegion(RegionStartAddress, Size);
if(NewRegion) if(NewRegion)
@ -170,9 +157,6 @@ struct CachedDisplayList
NewRegion->size = Size; NewRegion->size = Size;
NewRegion->start_address = RegionStartAddress; NewRegion->start_address = RegionStartAddress;
NewRegion->hash = GetHash64(RegionStartAddress, Size, DL_HASH_STEPS); NewRegion->hash = GetHash64(RegionStartAddress, Size, DL_HASH_STEPS);
NewRegion->ReferencedArray = referencedArray;
NewRegion->ReferencedArrayStride = referencedArrayStride;
InsertRegion(NewRegion); InsertRegion(NewRegion);
} }
} }
@ -184,13 +168,6 @@ struct CachedDisplayList
{ {
if(Current->hash) if(Current->hash)
{ {
//this test is here to grant that the referenced arrays by the dlist dont change
// in the current implementation is not necesary as the id of the dlist is calculated based on the arrays it refers
// but i let this test for extra safety
if(Current->ReferencedArray != 0xFFFFFFFF && (cached_arraybases[Current->ReferencedArray] != Current->start_address || arraystrides[Current->ReferencedArray] != Current->ReferencedArrayStride))
{
return false;
}
if(Current->hash != GetHash64(Current->start_address, Current->size, DL_HASH_STEPS)) if(Current->hash != GetHash64(Current->start_address, Current->size, DL_HASH_STEPS))
return false; return false;
} }
@ -600,7 +577,7 @@ void CompileAndRunDisplayList(u32 address, u32 size, CachedDisplayList *dl)
{ {
u8* saddr = cached_arraybases[i]; u8* saddr = cached_arraybases[i];
int arraySize = arraystrides[i] * ((tc[i] == 2)? numVertices : ((numVertices < 1024)? 2 * numVertices : numVertices)); int arraySize = arraystrides[i] * ((tc[i] == 2)? numVertices : ((numVertices < 1024)? 2 * numVertices : numVertices));
dl->InsertOverlapingRegion(saddr, arraySize,i,arraystrides[i]); dl->InsertOverlapingRegion(saddr, arraySize);
} }
} }
} }

View File

@ -514,18 +514,20 @@ void VertexLoader::WriteSetVariable(int bits, void *address, OpArg value)
void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count) void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
{ {
if(count == 0)
return;
m_numLoadedVertices += count; m_numLoadedVertices += count;
INCSTAT(stats.thisFrame.numDrawCalls);
// Flush if our vertex format is different from the currently set. // Flush if our vertex format is different from the currently set.
if (g_nativeVertexFmt != NULL && g_nativeVertexFmt != m_NativeFmt) if (g_nativeVertexFmt != m_NativeFmt)
{ {
// We really must flush here. It's possible that the native representations // We really must flush here. It's possible that the native representations
// of the two vtx formats are the same, but we have no way to easily check that // of the two vtx formats are the same, but we have no way to easily check that
// now. // now.
VertexManager::Flush(); VertexManager::Flush();
// Also move the Set() here? g_nativeVertexFmt = m_NativeFmt;
m_NativeFmt->EnableComponents(m_NativeFmt->m_components);
} }
g_nativeVertexFmt = m_NativeFmt;
if (bpmem.genMode.cullmode == 3 && primitive < 5) if (bpmem.genMode.cullmode == 3 && primitive < 5)
{ {
@ -534,8 +536,6 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
return; return;
} }
m_NativeFmt->EnableComponents(m_NativeFmt->m_components);
// Load position and texcoord scale factors. // Load position and texcoord scale factors.
m_VtxAttr.PosFrac = g_VtxAttr[vtx_attr_group].g0.PosFrac; m_VtxAttr.PosFrac = g_VtxAttr[vtx_attr_group].g0.PosFrac;
m_VtxAttr.texCoord[0].Frac = g_VtxAttr[vtx_attr_group].g0.Tex0Frac; m_VtxAttr.texCoord[0].Frac = g_VtxAttr[vtx_attr_group].g0.Tex0Frac;
@ -555,86 +555,18 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
for (int i = 0; i < 2; i++) for (int i = 0; i < 2; i++)
colElements[i] = m_VtxAttr.color[i].Elements; colElements[i] = m_VtxAttr.color[i].Elements;
// if strips or fans, make sure all vertices can fit in buffer, otherwise flush if(VertexManager::GetRemainingSize() < native_stride * count)
int granularity = 1; VertexManager::Flush();
switch (primitive) {
case 3: // strip .. hm, weird
case 4: // fan
if (VertexManager::GetRemainingSize() < 3 * native_stride)
VertexManager::Flush();
break;
case 6: // line strip
if (VertexManager::GetRemainingSize() < 2 * native_stride)
VertexManager::Flush();
break;
case 0: granularity = 4; break; // quads
case 2: granularity = 3; break; // tris
case 5: granularity = 2; break; // lines
}
int startv = 0, extraverts = 0; VertexManager::AddVertices(primitive,count);
int v = 0;
//int remainingVerts2 = VertexManager::GetRemainingVertices(primitive);
while (v < count)
{
int remainingVerts = VertexManager::GetRemainingSize() / native_stride;
//if (remainingVerts2 - v + startv < remainingVerts)
//remainingVerts = remainingVerts2 - v + startv;
if (remainingVerts < granularity) {
INCSTAT(stats.thisFrame.numBufferSplits);
// This buffer full - break current primitive and flush, to switch to the next buffer.
u8* plastptr = VertexManager::s_pCurBufferPointer;
if (v - startv > 0)
VertexManager::AddVertices(primitive, v - startv + extraverts);
VertexManager::Flush();
//remainingVerts2 = VertexManager::GetRemainingVertices(primitive);
// Why does this need to be so complicated?
switch (primitive) {
case 3: // triangle strip, copy last two vertices
// a little trick since we have to keep track of signs
if (v & 1) {
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-2*native_stride, native_stride);
memcpy_gc(VertexManager::s_pCurBufferPointer+native_stride, plastptr-native_stride*2, 2*native_stride);
VertexManager::s_pCurBufferPointer += native_stride*3;
extraverts = 3;
}
else {
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*2, native_stride*2);
VertexManager::s_pCurBufferPointer += native_stride*2;
extraverts = 2;
}
break;
case 4: // tri fan, copy first and last vert
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*(v-startv+extraverts), native_stride);
VertexManager::s_pCurBufferPointer += native_stride;
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride);
VertexManager::s_pCurBufferPointer += native_stride;
extraverts = 2;
break;
case 6: // line strip
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride);
VertexManager::s_pCurBufferPointer += native_stride;
extraverts = 1;
break;
default:
extraverts = 0;
break;
}
startv = v;
}
int remainingPrims = remainingVerts / granularity;
remainingVerts = remainingPrims * granularity;
if (count - v < remainingVerts)
remainingVerts = count - v;
#ifdef USE_JIT #ifdef USE_JIT
if (remainingVerts > 0) { if (count > 0) {
loop_counter = remainingVerts; loop_counter = count;
((void (*)())(void*)m_compiledCode)(); ((void (*)())(void*)m_compiledCode)();
} }
#else #else
for (int s = 0; s < remainingVerts; s++) for (int s = 0; s < count; s++)
{ {
tcIndex = 0; tcIndex = 0;
colIndex = 0; colIndex = 0;
@ -644,11 +576,6 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
PRIM_LOG("\n"); PRIM_LOG("\n");
} }
#endif #endif
v += remainingVerts;
}
if (startv < count)
VertexManager::AddVertices(primitive, count - startv + extraverts);
} }
@ -657,17 +584,17 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
void VertexLoader::RunCompiledVertices(int vtx_attr_group, int primitive, int count, u8* Data) void VertexLoader::RunCompiledVertices(int vtx_attr_group, int primitive, int count, u8* Data)
{ {
m_numLoadedVertices += count; m_numLoadedVertices += count;
INCSTAT(stats.thisFrame.numDrawCalls);
// Flush if our vertex format is different from the currently set. // Flush if our vertex format is different from the currently set.
if (g_nativeVertexFmt != NULL && g_nativeVertexFmt != m_NativeFmt) if (g_nativeVertexFmt != m_NativeFmt)
{ {
// We really must flush here. It's possible that the native representations // We really must flush here. It's possible that the native representations
// of the two vtx formats are the same, but we have no way to easily check that // of the two vtx formats are the same, but we have no way to easily check that
// now. // now.
VertexManager::Flush(); VertexManager::Flush();
// Also move the Set() here? g_nativeVertexFmt = m_NativeFmt;
m_NativeFmt->EnableComponents(m_NativeFmt->m_components);
} }
g_nativeVertexFmt = m_NativeFmt;
if (bpmem.genMode.cullmode == 3 && primitive < 5) if (bpmem.genMode.cullmode == 3 && primitive < 5)
{ {
@ -676,27 +603,6 @@ void VertexLoader::RunCompiledVertices(int vtx_attr_group, int primitive, int co
return; return;
} }
m_NativeFmt->EnableComponents(m_NativeFmt->m_components);
// Load position and texcoord scale factors.
m_VtxAttr.PosFrac = g_VtxAttr[vtx_attr_group].g0.PosFrac;
m_VtxAttr.texCoord[0].Frac = g_VtxAttr[vtx_attr_group].g0.Tex0Frac;
m_VtxAttr.texCoord[1].Frac = g_VtxAttr[vtx_attr_group].g1.Tex1Frac;
m_VtxAttr.texCoord[2].Frac = g_VtxAttr[vtx_attr_group].g1.Tex2Frac;
m_VtxAttr.texCoord[3].Frac = g_VtxAttr[vtx_attr_group].g1.Tex3Frac;
m_VtxAttr.texCoord[4].Frac = g_VtxAttr[vtx_attr_group].g2.Tex4Frac;
m_VtxAttr.texCoord[5].Frac = g_VtxAttr[vtx_attr_group].g2.Tex5Frac;
m_VtxAttr.texCoord[6].Frac = g_VtxAttr[vtx_attr_group].g2.Tex6Frac;
m_VtxAttr.texCoord[7].Frac = g_VtxAttr[vtx_attr_group].g2.Tex7Frac;
pVtxAttr = &m_VtxAttr;
posScale = fractionTable[m_VtxAttr.PosFrac];
if (m_NativeFmt->m_components & VB_HAS_UVALL)
for (int i = 0; i < 8; i++)
tcScale[i] = fractionTable[m_VtxAttr.texCoord[i].Frac];
for (int i = 0; i < 2; i++)
colElements[i] = m_VtxAttr.color[i].Elements;
if(VertexManager::GetRemainingSize() < native_stride * count) if(VertexManager::GetRemainingSize() < native_stride * count)
VertexManager::Flush(); VertexManager::Flush();
memcpy_gc(VertexManager::s_pCurBufferPointer, Data, native_stride * count); memcpy_gc(VertexManager::s_pCurBufferPointer, Data, native_stride * count);