some cleaning in dlist cache, runvertices and runcompiledvertices
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@7246 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
c40ca6553f
commit
ac6c18a5e4
|
@ -41,7 +41,7 @@
|
|||
#include "VideoConfig.h"
|
||||
|
||||
#define DL_CODE_CACHE_SIZE (1024*1024*16)
|
||||
#define DL_CODE_CLEAR_THRESHOLD (128 * 1024)
|
||||
#define DL_CODE_CLEAR_THRESHOLD (16 * 1024)
|
||||
extern int frameCount;
|
||||
static u32 CheckContextId;
|
||||
using namespace Gen;
|
||||
|
@ -64,17 +64,13 @@ struct ReferencedDataRegion
|
|||
start_address(NULL),
|
||||
NextRegion(NULL),
|
||||
size(0),
|
||||
MustClean(0),
|
||||
ReferencedArray(0xFFFFFFFF),
|
||||
ReferencedArrayStride(0)
|
||||
MustClean(0)
|
||||
{}
|
||||
u64 hash;
|
||||
u8* start_address;
|
||||
ReferencedDataRegion* NextRegion;
|
||||
u32 size;
|
||||
u32 MustClean;
|
||||
u32 ReferencedArray;
|
||||
u32 ReferencedArrayStride;
|
||||
|
||||
|
||||
int IntersectsMemoryRange(u8* range_address, u32 range_size)
|
||||
|
@ -104,8 +100,10 @@ struct CachedDisplayList
|
|||
frame_count = frameCount;
|
||||
}
|
||||
u64 dl_hash;
|
||||
// ... Something containing cached vertex buffers here ...
|
||||
ReferencedDataRegion* Regions;
|
||||
ReferencedDataRegion* LastRegion;
|
||||
// Compile the commands themselves down to native code.
|
||||
const u8* compiled_code;
|
||||
u32 uncachable; // if set, this DL will always be interpreted. This gets set if hash ever changes.
|
||||
// Analitic data
|
||||
|
@ -113,22 +111,11 @@ struct CachedDisplayList
|
|||
u32 num_cp_reg;
|
||||
u32 num_bp_reg;
|
||||
u32 num_index_xf;
|
||||
u32 num_draw_call;
|
||||
|
||||
u32 num_draw_call;
|
||||
u32 pass;
|
||||
|
||||
|
||||
u32 check;
|
||||
|
||||
int frame_count;
|
||||
|
||||
// ... Something containing cached vertex buffers here ...
|
||||
int frame_count;
|
||||
u32 BufferCount;
|
||||
|
||||
|
||||
// Compile the commands themselves down to native code.
|
||||
|
||||
|
||||
|
||||
void InsertRegion(ReferencedDataRegion* NewRegion)
|
||||
{
|
||||
|
@ -144,7 +131,7 @@ struct CachedDisplayList
|
|||
BufferCount++;
|
||||
}
|
||||
|
||||
void InsertOverlapingRegion(u8* RegionStartAddress, u32 Size,u32 referencedArray,u32 referencedArrayStride)
|
||||
void InsertOverlapingRegion(u8* RegionStartAddress, u32 Size)
|
||||
{
|
||||
ReferencedDataRegion* NewRegion = FindOverlapingRegion(RegionStartAddress, Size);
|
||||
if(NewRegion)
|
||||
|
@ -169,10 +156,7 @@ struct CachedDisplayList
|
|||
NewRegion->MustClean = false;
|
||||
NewRegion->size = Size;
|
||||
NewRegion->start_address = RegionStartAddress;
|
||||
NewRegion->hash = GetHash64(RegionStartAddress, Size, DL_HASH_STEPS);
|
||||
NewRegion->ReferencedArray = referencedArray;
|
||||
NewRegion->ReferencedArrayStride = referencedArrayStride;
|
||||
|
||||
NewRegion->hash = GetHash64(RegionStartAddress, Size, DL_HASH_STEPS);
|
||||
InsertRegion(NewRegion);
|
||||
}
|
||||
}
|
||||
|
@ -184,13 +168,6 @@ struct CachedDisplayList
|
|||
{
|
||||
if(Current->hash)
|
||||
{
|
||||
//this test is here to grant that the referenced arrays by the dlist dont change
|
||||
// in the current implementation is not necesary as the id of the dlist is calculated based on the arrays it refers
|
||||
// but i let this test for extra safety
|
||||
if(Current->ReferencedArray != 0xFFFFFFFF && (cached_arraybases[Current->ReferencedArray] != Current->start_address || arraystrides[Current->ReferencedArray] != Current->ReferencedArrayStride))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if(Current->hash != GetHash64(Current->start_address, Current->size, DL_HASH_STEPS))
|
||||
return false;
|
||||
}
|
||||
|
@ -600,7 +577,7 @@ void CompileAndRunDisplayList(u32 address, u32 size, CachedDisplayList *dl)
|
|||
{
|
||||
u8* saddr = cached_arraybases[i];
|
||||
int arraySize = arraystrides[i] * ((tc[i] == 2)? numVertices : ((numVertices < 1024)? 2 * numVertices : numVertices));
|
||||
dl->InsertOverlapingRegion(saddr, arraySize,i,arraystrides[i]);
|
||||
dl->InsertOverlapingRegion(saddr, arraySize);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -514,27 +514,27 @@ void VertexLoader::WriteSetVariable(int bits, void *address, OpArg value)
|
|||
|
||||
void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
|
||||
{
|
||||
if(count == 0)
|
||||
return;
|
||||
m_numLoadedVertices += count;
|
||||
|
||||
INCSTAT(stats.thisFrame.numDrawCalls);
|
||||
// Flush if our vertex format is different from the currently set.
|
||||
if (g_nativeVertexFmt != NULL && g_nativeVertexFmt != m_NativeFmt)
|
||||
if (g_nativeVertexFmt != m_NativeFmt)
|
||||
{
|
||||
// We really must flush here. It's possible that the native representations
|
||||
// of the two vtx formats are the same, but we have no way to easily check that
|
||||
// now.
|
||||
VertexManager::Flush();
|
||||
// Also move the Set() here?
|
||||
}
|
||||
g_nativeVertexFmt = m_NativeFmt;
|
||||
|
||||
g_nativeVertexFmt = m_NativeFmt;
|
||||
m_NativeFmt->EnableComponents(m_NativeFmt->m_components);
|
||||
}
|
||||
|
||||
if (bpmem.genMode.cullmode == 3 && primitive < 5)
|
||||
{
|
||||
// if cull mode is none, ignore triangles and quads
|
||||
DataSkip(count * m_VertexSize);
|
||||
return;
|
||||
}
|
||||
|
||||
m_NativeFmt->EnableComponents(m_NativeFmt->m_components);
|
||||
}
|
||||
|
||||
// Load position and texcoord scale factors.
|
||||
m_VtxAttr.PosFrac = g_VtxAttr[vtx_attr_group].g0.PosFrac;
|
||||
|
@ -554,87 +554,19 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
|
|||
tcScale[i] = fractionTable[m_VtxAttr.texCoord[i].Frac];
|
||||
for (int i = 0; i < 2; i++)
|
||||
colElements[i] = m_VtxAttr.color[i].Elements;
|
||||
|
||||
// if strips or fans, make sure all vertices can fit in buffer, otherwise flush
|
||||
int granularity = 1;
|
||||
switch (primitive) {
|
||||
case 3: // strip .. hm, weird
|
||||
case 4: // fan
|
||||
if (VertexManager::GetRemainingSize() < 3 * native_stride)
|
||||
VertexManager::Flush();
|
||||
break;
|
||||
case 6: // line strip
|
||||
if (VertexManager::GetRemainingSize() < 2 * native_stride)
|
||||
VertexManager::Flush();
|
||||
break;
|
||||
case 0: granularity = 4; break; // quads
|
||||
case 2: granularity = 3; break; // tris
|
||||
case 5: granularity = 2; break; // lines
|
||||
}
|
||||
|
||||
int startv = 0, extraverts = 0;
|
||||
int v = 0;
|
||||
|
||||
//int remainingVerts2 = VertexManager::GetRemainingVertices(primitive);
|
||||
while (v < count)
|
||||
{
|
||||
int remainingVerts = VertexManager::GetRemainingSize() / native_stride;
|
||||
//if (remainingVerts2 - v + startv < remainingVerts)
|
||||
//remainingVerts = remainingVerts2 - v + startv;
|
||||
if (remainingVerts < granularity) {
|
||||
INCSTAT(stats.thisFrame.numBufferSplits);
|
||||
// This buffer full - break current primitive and flush, to switch to the next buffer.
|
||||
u8* plastptr = VertexManager::s_pCurBufferPointer;
|
||||
if (v - startv > 0)
|
||||
VertexManager::AddVertices(primitive, v - startv + extraverts);
|
||||
VertexManager::Flush();
|
||||
//remainingVerts2 = VertexManager::GetRemainingVertices(primitive);
|
||||
// Why does this need to be so complicated?
|
||||
switch (primitive) {
|
||||
case 3: // triangle strip, copy last two vertices
|
||||
// a little trick since we have to keep track of signs
|
||||
if (v & 1) {
|
||||
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-2*native_stride, native_stride);
|
||||
memcpy_gc(VertexManager::s_pCurBufferPointer+native_stride, plastptr-native_stride*2, 2*native_stride);
|
||||
VertexManager::s_pCurBufferPointer += native_stride*3;
|
||||
extraverts = 3;
|
||||
}
|
||||
else {
|
||||
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*2, native_stride*2);
|
||||
VertexManager::s_pCurBufferPointer += native_stride*2;
|
||||
extraverts = 2;
|
||||
}
|
||||
break;
|
||||
case 4: // tri fan, copy first and last vert
|
||||
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*(v-startv+extraverts), native_stride);
|
||||
VertexManager::s_pCurBufferPointer += native_stride;
|
||||
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride);
|
||||
VertexManager::s_pCurBufferPointer += native_stride;
|
||||
extraverts = 2;
|
||||
break;
|
||||
case 6: // line strip
|
||||
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride);
|
||||
VertexManager::s_pCurBufferPointer += native_stride;
|
||||
extraverts = 1;
|
||||
break;
|
||||
default:
|
||||
extraverts = 0;
|
||||
break;
|
||||
}
|
||||
startv = v;
|
||||
}
|
||||
int remainingPrims = remainingVerts / granularity;
|
||||
remainingVerts = remainingPrims * granularity;
|
||||
if (count - v < remainingVerts)
|
||||
remainingVerts = count - v;
|
||||
|
||||
if(VertexManager::GetRemainingSize() < native_stride * count)
|
||||
VertexManager::Flush();
|
||||
|
||||
VertexManager::AddVertices(primitive,count);
|
||||
|
||||
#ifdef USE_JIT
|
||||
if (remainingVerts > 0) {
|
||||
loop_counter = remainingVerts;
|
||||
if (count > 0) {
|
||||
loop_counter = count;
|
||||
((void (*)())(void*)m_compiledCode)();
|
||||
}
|
||||
#else
|
||||
for (int s = 0; s < remainingVerts; s++)
|
||||
for (int s = 0; s < count; s++)
|
||||
{
|
||||
tcIndex = 0;
|
||||
colIndex = 0;
|
||||
|
@ -643,12 +575,7 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
|
|||
m_PipelineStages[i]();
|
||||
PRIM_LOG("\n");
|
||||
}
|
||||
#endif
|
||||
v += remainingVerts;
|
||||
}
|
||||
|
||||
if (startv < count)
|
||||
VertexManager::AddVertices(primitive, count - startv + extraverts);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
@ -657,18 +584,18 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
|
|||
void VertexLoader::RunCompiledVertices(int vtx_attr_group, int primitive, int count, u8* Data)
|
||||
{
|
||||
m_numLoadedVertices += count;
|
||||
|
||||
INCSTAT(stats.thisFrame.numDrawCalls);
|
||||
// Flush if our vertex format is different from the currently set.
|
||||
if (g_nativeVertexFmt != NULL && g_nativeVertexFmt != m_NativeFmt)
|
||||
if (g_nativeVertexFmt != m_NativeFmt)
|
||||
{
|
||||
// We really must flush here. It's possible that the native representations
|
||||
// of the two vtx formats are the same, but we have no way to easily check that
|
||||
// now.
|
||||
VertexManager::Flush();
|
||||
// Also move the Set() here?
|
||||
}
|
||||
g_nativeVertexFmt = m_NativeFmt;
|
||||
|
||||
g_nativeVertexFmt = m_NativeFmt;
|
||||
m_NativeFmt->EnableComponents(m_NativeFmt->m_components);
|
||||
}
|
||||
|
||||
if (bpmem.genMode.cullmode == 3 && primitive < 5)
|
||||
{
|
||||
// if cull mode is none, ignore triangles and quads
|
||||
|
@ -676,27 +603,6 @@ void VertexLoader::RunCompiledVertices(int vtx_attr_group, int primitive, int co
|
|||
return;
|
||||
}
|
||||
|
||||
m_NativeFmt->EnableComponents(m_NativeFmt->m_components);
|
||||
|
||||
// Load position and texcoord scale factors.
|
||||
m_VtxAttr.PosFrac = g_VtxAttr[vtx_attr_group].g0.PosFrac;
|
||||
m_VtxAttr.texCoord[0].Frac = g_VtxAttr[vtx_attr_group].g0.Tex0Frac;
|
||||
m_VtxAttr.texCoord[1].Frac = g_VtxAttr[vtx_attr_group].g1.Tex1Frac;
|
||||
m_VtxAttr.texCoord[2].Frac = g_VtxAttr[vtx_attr_group].g1.Tex2Frac;
|
||||
m_VtxAttr.texCoord[3].Frac = g_VtxAttr[vtx_attr_group].g1.Tex3Frac;
|
||||
m_VtxAttr.texCoord[4].Frac = g_VtxAttr[vtx_attr_group].g2.Tex4Frac;
|
||||
m_VtxAttr.texCoord[5].Frac = g_VtxAttr[vtx_attr_group].g2.Tex5Frac;
|
||||
m_VtxAttr.texCoord[6].Frac = g_VtxAttr[vtx_attr_group].g2.Tex6Frac;
|
||||
m_VtxAttr.texCoord[7].Frac = g_VtxAttr[vtx_attr_group].g2.Tex7Frac;
|
||||
|
||||
pVtxAttr = &m_VtxAttr;
|
||||
posScale = fractionTable[m_VtxAttr.PosFrac];
|
||||
if (m_NativeFmt->m_components & VB_HAS_UVALL)
|
||||
for (int i = 0; i < 8; i++)
|
||||
tcScale[i] = fractionTable[m_VtxAttr.texCoord[i].Frac];
|
||||
for (int i = 0; i < 2; i++)
|
||||
colElements[i] = m_VtxAttr.color[i].Elements;
|
||||
|
||||
if(VertexManager::GetRemainingSize() < native_stride * count)
|
||||
VertexManager::Flush();
|
||||
memcpy_gc(VertexManager::s_pCurBufferPointer, Data, native_stride * count);
|
||||
|
|
Loading…
Reference in New Issue