some cleaning in dlist cache, runvertices and runcompiledvertices

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@7246 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
Rodolfo Osvaldo Bogado 2011-02-25 20:27:57 +00:00
parent c40ca6553f
commit ac6c18a5e4
2 changed files with 33 additions and 150 deletions

View File

@ -41,7 +41,7 @@
#include "VideoConfig.h"
#define DL_CODE_CACHE_SIZE (1024*1024*16)
#define DL_CODE_CLEAR_THRESHOLD (128 * 1024)
#define DL_CODE_CLEAR_THRESHOLD (16 * 1024)
extern int frameCount;
static u32 CheckContextId;
using namespace Gen;
@ -64,17 +64,13 @@ struct ReferencedDataRegion
start_address(NULL),
NextRegion(NULL),
size(0),
MustClean(0),
ReferencedArray(0xFFFFFFFF),
ReferencedArrayStride(0)
MustClean(0)
{}
u64 hash;
u8* start_address;
ReferencedDataRegion* NextRegion;
u32 size;
u32 MustClean;
u32 ReferencedArray;
u32 ReferencedArrayStride;
int IntersectsMemoryRange(u8* range_address, u32 range_size)
@ -104,8 +100,10 @@ struct CachedDisplayList
frame_count = frameCount;
}
u64 dl_hash;
// ... Something containing cached vertex buffers here ...
ReferencedDataRegion* Regions;
ReferencedDataRegion* LastRegion;
// Compile the commands themselves down to native code.
const u8* compiled_code;
u32 uncachable; // if set, this DL will always be interpreted. This gets set if hash ever changes.
// Analitic data
@ -113,22 +111,11 @@ struct CachedDisplayList
u32 num_cp_reg;
u32 num_bp_reg;
u32 num_index_xf;
u32 num_draw_call;
u32 num_draw_call;
u32 pass;
u32 check;
int frame_count;
// ... Something containing cached vertex buffers here ...
int frame_count;
u32 BufferCount;
// Compile the commands themselves down to native code.
void InsertRegion(ReferencedDataRegion* NewRegion)
{
@ -144,7 +131,7 @@ struct CachedDisplayList
BufferCount++;
}
void InsertOverlapingRegion(u8* RegionStartAddress, u32 Size,u32 referencedArray,u32 referencedArrayStride)
void InsertOverlapingRegion(u8* RegionStartAddress, u32 Size)
{
ReferencedDataRegion* NewRegion = FindOverlapingRegion(RegionStartAddress, Size);
if(NewRegion)
@ -169,10 +156,7 @@ struct CachedDisplayList
NewRegion->MustClean = false;
NewRegion->size = Size;
NewRegion->start_address = RegionStartAddress;
NewRegion->hash = GetHash64(RegionStartAddress, Size, DL_HASH_STEPS);
NewRegion->ReferencedArray = referencedArray;
NewRegion->ReferencedArrayStride = referencedArrayStride;
NewRegion->hash = GetHash64(RegionStartAddress, Size, DL_HASH_STEPS);
InsertRegion(NewRegion);
}
}
@ -184,13 +168,6 @@ struct CachedDisplayList
{
if(Current->hash)
{
//this test is here to grant that the referenced arrays by the dlist dont change
// in the current implementation is not necesary as the id of the dlist is calculated based on the arrays it refers
// but i let this test for extra safety
if(Current->ReferencedArray != 0xFFFFFFFF && (cached_arraybases[Current->ReferencedArray] != Current->start_address || arraystrides[Current->ReferencedArray] != Current->ReferencedArrayStride))
{
return false;
}
if(Current->hash != GetHash64(Current->start_address, Current->size, DL_HASH_STEPS))
return false;
}
@ -600,7 +577,7 @@ void CompileAndRunDisplayList(u32 address, u32 size, CachedDisplayList *dl)
{
u8* saddr = cached_arraybases[i];
int arraySize = arraystrides[i] * ((tc[i] == 2)? numVertices : ((numVertices < 1024)? 2 * numVertices : numVertices));
dl->InsertOverlapingRegion(saddr, arraySize,i,arraystrides[i]);
dl->InsertOverlapingRegion(saddr, arraySize);
}
}
}

View File

@ -514,27 +514,27 @@ void VertexLoader::WriteSetVariable(int bits, void *address, OpArg value)
void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
{
if(count == 0)
return;
m_numLoadedVertices += count;
INCSTAT(stats.thisFrame.numDrawCalls);
// Flush if our vertex format is different from the currently set.
if (g_nativeVertexFmt != NULL && g_nativeVertexFmt != m_NativeFmt)
if (g_nativeVertexFmt != m_NativeFmt)
{
// We really must flush here. It's possible that the native representations
// of the two vtx formats are the same, but we have no way to easily check that
// now.
VertexManager::Flush();
// Also move the Set() here?
}
g_nativeVertexFmt = m_NativeFmt;
g_nativeVertexFmt = m_NativeFmt;
m_NativeFmt->EnableComponents(m_NativeFmt->m_components);
}
if (bpmem.genMode.cullmode == 3 && primitive < 5)
{
// if cull mode is none, ignore triangles and quads
DataSkip(count * m_VertexSize);
return;
}
m_NativeFmt->EnableComponents(m_NativeFmt->m_components);
}
// Load position and texcoord scale factors.
m_VtxAttr.PosFrac = g_VtxAttr[vtx_attr_group].g0.PosFrac;
@ -554,87 +554,19 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
tcScale[i] = fractionTable[m_VtxAttr.texCoord[i].Frac];
for (int i = 0; i < 2; i++)
colElements[i] = m_VtxAttr.color[i].Elements;
// if strips or fans, make sure all vertices can fit in buffer, otherwise flush
int granularity = 1;
switch (primitive) {
case 3: // strip .. hm, weird
case 4: // fan
if (VertexManager::GetRemainingSize() < 3 * native_stride)
VertexManager::Flush();
break;
case 6: // line strip
if (VertexManager::GetRemainingSize() < 2 * native_stride)
VertexManager::Flush();
break;
case 0: granularity = 4; break; // quads
case 2: granularity = 3; break; // tris
case 5: granularity = 2; break; // lines
}
int startv = 0, extraverts = 0;
int v = 0;
//int remainingVerts2 = VertexManager::GetRemainingVertices(primitive);
while (v < count)
{
int remainingVerts = VertexManager::GetRemainingSize() / native_stride;
//if (remainingVerts2 - v + startv < remainingVerts)
//remainingVerts = remainingVerts2 - v + startv;
if (remainingVerts < granularity) {
INCSTAT(stats.thisFrame.numBufferSplits);
// This buffer full - break current primitive and flush, to switch to the next buffer.
u8* plastptr = VertexManager::s_pCurBufferPointer;
if (v - startv > 0)
VertexManager::AddVertices(primitive, v - startv + extraverts);
VertexManager::Flush();
//remainingVerts2 = VertexManager::GetRemainingVertices(primitive);
// Why does this need to be so complicated?
switch (primitive) {
case 3: // triangle strip, copy last two vertices
// a little trick since we have to keep track of signs
if (v & 1) {
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-2*native_stride, native_stride);
memcpy_gc(VertexManager::s_pCurBufferPointer+native_stride, plastptr-native_stride*2, 2*native_stride);
VertexManager::s_pCurBufferPointer += native_stride*3;
extraverts = 3;
}
else {
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*2, native_stride*2);
VertexManager::s_pCurBufferPointer += native_stride*2;
extraverts = 2;
}
break;
case 4: // tri fan, copy first and last vert
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*(v-startv+extraverts), native_stride);
VertexManager::s_pCurBufferPointer += native_stride;
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride);
VertexManager::s_pCurBufferPointer += native_stride;
extraverts = 2;
break;
case 6: // line strip
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride);
VertexManager::s_pCurBufferPointer += native_stride;
extraverts = 1;
break;
default:
extraverts = 0;
break;
}
startv = v;
}
int remainingPrims = remainingVerts / granularity;
remainingVerts = remainingPrims * granularity;
if (count - v < remainingVerts)
remainingVerts = count - v;
if(VertexManager::GetRemainingSize() < native_stride * count)
VertexManager::Flush();
VertexManager::AddVertices(primitive,count);
#ifdef USE_JIT
if (remainingVerts > 0) {
loop_counter = remainingVerts;
if (count > 0) {
loop_counter = count;
((void (*)())(void*)m_compiledCode)();
}
#else
for (int s = 0; s < remainingVerts; s++)
for (int s = 0; s < count; s++)
{
tcIndex = 0;
colIndex = 0;
@ -643,12 +575,7 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
m_PipelineStages[i]();
PRIM_LOG("\n");
}
#endif
v += remainingVerts;
}
if (startv < count)
VertexManager::AddVertices(primitive, count - startv + extraverts);
#endif
}
@ -657,18 +584,18 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
void VertexLoader::RunCompiledVertices(int vtx_attr_group, int primitive, int count, u8* Data)
{
m_numLoadedVertices += count;
INCSTAT(stats.thisFrame.numDrawCalls);
// Flush if our vertex format is different from the currently set.
if (g_nativeVertexFmt != NULL && g_nativeVertexFmt != m_NativeFmt)
if (g_nativeVertexFmt != m_NativeFmt)
{
// We really must flush here. It's possible that the native representations
// of the two vtx formats are the same, but we have no way to easily check that
// now.
VertexManager::Flush();
// Also move the Set() here?
}
g_nativeVertexFmt = m_NativeFmt;
g_nativeVertexFmt = m_NativeFmt;
m_NativeFmt->EnableComponents(m_NativeFmt->m_components);
}
if (bpmem.genMode.cullmode == 3 && primitive < 5)
{
// if cull mode is none, ignore triangles and quads
@ -676,27 +603,6 @@ void VertexLoader::RunCompiledVertices(int vtx_attr_group, int primitive, int co
return;
}
m_NativeFmt->EnableComponents(m_NativeFmt->m_components);
// Load position and texcoord scale factors.
m_VtxAttr.PosFrac = g_VtxAttr[vtx_attr_group].g0.PosFrac;
m_VtxAttr.texCoord[0].Frac = g_VtxAttr[vtx_attr_group].g0.Tex0Frac;
m_VtxAttr.texCoord[1].Frac = g_VtxAttr[vtx_attr_group].g1.Tex1Frac;
m_VtxAttr.texCoord[2].Frac = g_VtxAttr[vtx_attr_group].g1.Tex2Frac;
m_VtxAttr.texCoord[3].Frac = g_VtxAttr[vtx_attr_group].g1.Tex3Frac;
m_VtxAttr.texCoord[4].Frac = g_VtxAttr[vtx_attr_group].g2.Tex4Frac;
m_VtxAttr.texCoord[5].Frac = g_VtxAttr[vtx_attr_group].g2.Tex5Frac;
m_VtxAttr.texCoord[6].Frac = g_VtxAttr[vtx_attr_group].g2.Tex6Frac;
m_VtxAttr.texCoord[7].Frac = g_VtxAttr[vtx_attr_group].g2.Tex7Frac;
pVtxAttr = &m_VtxAttr;
posScale = fractionTable[m_VtxAttr.PosFrac];
if (m_NativeFmt->m_components & VB_HAS_UVALL)
for (int i = 0; i < 8; i++)
tcScale[i] = fractionTable[m_VtxAttr.texCoord[i].Frac];
for (int i = 0; i < 2; i++)
colElements[i] = m_VtxAttr.color[i].Elements;
if(VertexManager::GetRemainingSize() < native_stride * count)
VertexManager::Flush();
memcpy_gc(VertexManager::s_pCurBufferPointer, Data, native_stride * count);