more work on dlist caching now should be a little faster
a little bugfix in vertex loading and some fixes. not much time to work these days but at least i can spend a little time fixing thing. please test for regressions. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6409 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
b2e1ea3279
commit
c21f804c48
|
@ -40,7 +40,7 @@
|
||||||
#include "VideoConfig.h"
|
#include "VideoConfig.h"
|
||||||
|
|
||||||
#define DL_CODE_CACHE_SIZE (1024*1024*16)
|
#define DL_CODE_CACHE_SIZE (1024*1024*16)
|
||||||
#define DL_CODE_CLEAR_THRESHOLD (256 * 1024)
|
#define DL_CODE_CLEAR_THRESHOLD (128 * 1024)
|
||||||
extern int frameCount;
|
extern int frameCount;
|
||||||
|
|
||||||
using namespace Gen;
|
using namespace Gen;
|
||||||
|
@ -222,22 +222,54 @@ inline u64 CreateMapId(u32 address, u32 size)
|
||||||
return ((u64)address << 32) | size;
|
return ((u64)address << 32) | size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline u64 CreateVMapId(u8 VATUSED)
|
||||||
|
{
|
||||||
|
u64 id = 0;
|
||||||
|
for(int i = 0; i < 8 ; i++)
|
||||||
|
{
|
||||||
|
if(VATUSED & (1 << i))
|
||||||
|
{
|
||||||
|
//id ^= GetHash64((u8*)(&g_VtxAttr[i].g0.),sizeof(VAT),0);
|
||||||
|
if(id != 0)
|
||||||
|
{
|
||||||
|
id ^= (g_VtxAttr[i].g0.Hex | (g_VtxAttr[i].g1.Hex << 32)) ^ (g_VtxAttr[i].g2.Hex << 16);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
id = (g_VtxAttr[i].g0.Hex | (g_VtxAttr[i].g1.Hex << 32)) ^ (g_VtxAttr[i].g2.Hex << 16);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
typedef std::map<u64, CachedDisplayList> DLMap;
|
typedef std::map<u64, CachedDisplayList> DLMap;
|
||||||
|
|
||||||
static DLMap dl_map;
|
struct VDlist
|
||||||
|
{
|
||||||
|
DLMap dl_map;
|
||||||
|
u8 VATUsed;
|
||||||
|
int count;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef std::map<u64, VDlist> VDLMap;
|
||||||
|
|
||||||
|
static VDLMap dl_map;
|
||||||
static u8* dlcode_cache;
|
static u8* dlcode_cache;
|
||||||
|
|
||||||
static Gen::XEmitter emitter;
|
static Gen::XEmitter emitter;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// First pass - analyze
|
// First pass - analyze
|
||||||
bool AnalyzeAndRunDisplayList(u32 address, int size, CachedDisplayList *dl)
|
u8 AnalyzeAndRunDisplayList(u32 address, int size, CachedDisplayList *dl)
|
||||||
{
|
{
|
||||||
int num_xf_reg = 0;
|
int num_xf_reg = 0;
|
||||||
int num_cp_reg = 0;
|
int num_cp_reg = 0;
|
||||||
int num_bp_reg = 0;
|
int num_bp_reg = 0;
|
||||||
int num_index_xf = 0;
|
int num_index_xf = 0;
|
||||||
int num_draw_call = 0;
|
int num_draw_call = 0;
|
||||||
|
u8 result = 0;
|
||||||
u8* old_pVideoData = g_pVideoData;
|
u8* old_pVideoData = g_pVideoData;
|
||||||
u8* startAddress = Memory_GetPtr(address);
|
u8* startAddress = Memory_GetPtr(address);
|
||||||
|
|
||||||
|
@ -336,7 +368,7 @@ bool AnalyzeAndRunDisplayList(u32 address, int size, CachedDisplayList *dl)
|
||||||
{
|
{
|
||||||
// load vertices (use computed vertex size from FifoCommandRunnable above)
|
// load vertices (use computed vertex size from FifoCommandRunnable above)
|
||||||
u16 numVertices = DataReadU16();
|
u16 numVertices = DataReadU16();
|
||||||
|
result |= 1 << (cmd_byte & GX_VAT_MASK);
|
||||||
VertexLoaderManager::RunVertices(
|
VertexLoaderManager::RunVertices(
|
||||||
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
|
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
|
||||||
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
|
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
|
||||||
|
@ -363,7 +395,7 @@ bool AnalyzeAndRunDisplayList(u32 address, int size, CachedDisplayList *dl)
|
||||||
dl->num_xf_reg = num_xf_reg;
|
dl->num_xf_reg = num_xf_reg;
|
||||||
// reset to the old pointer
|
// reset to the old pointer
|
||||||
g_pVideoData = old_pVideoData;
|
g_pVideoData = old_pVideoData;
|
||||||
return true;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
// The only sensible way to detect changes to vertex data is to convert several times
|
// The only sensible way to detect changes to vertex data is to convert several times
|
||||||
|
@ -532,6 +564,12 @@ bool CompileAndRunDisplayList(u32 address, int size, CachedDisplayList *dl)
|
||||||
dl->InsertRegion(NewRegion);
|
dl->InsertRegion(NewRegion);
|
||||||
memcpy(NewRegion->start_address, StartAddress, Vdatasize);
|
memcpy(NewRegion->start_address, StartAddress, Vdatasize);
|
||||||
emitter.ABI_CallFunctionCCCP((void *)&VertexLoaderManager::RunCompiledVertices, cmd_byte & GX_VAT_MASK, (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, numVertices, NewRegion->start_address);
|
emitter.ABI_CallFunctionCCCP((void *)&VertexLoaderManager::RunCompiledVertices, cmd_byte & GX_VAT_MASK, (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, numVertices, NewRegion->start_address);
|
||||||
|
/*ReferencedDataRegion* VatRegion = new ReferencedDataRegion;
|
||||||
|
VatRegion->MustClean = false;
|
||||||
|
VatRegion->size = sizeof(VAT);
|
||||||
|
VatRegion->start_address = (u8*)(&g_VtxAttr[cmd_byte & GX_VAT_MASK]);
|
||||||
|
VatRegion->hash = GetHash64(NewRegion->start_address, VatRegion->size, 0);
|
||||||
|
dl->InsertRegion(VatRegion);*/
|
||||||
}
|
}
|
||||||
const int tc[12] = {
|
const int tc[12] = {
|
||||||
g_VtxDesc.Position, g_VtxDesc.Normal, g_VtxDesc.Color0, g_VtxDesc.Color1, g_VtxDesc.Tex0Coord, g_VtxDesc.Tex1Coord,
|
g_VtxDesc.Position, g_VtxDesc.Normal, g_VtxDesc.Color0, g_VtxDesc.Color1, g_VtxDesc.Tex0Coord, g_VtxDesc.Tex1Coord,
|
||||||
|
@ -542,7 +580,7 @@ bool CompileAndRunDisplayList(u32 address, int size, CachedDisplayList *dl)
|
||||||
if(tc[i] > 1)
|
if(tc[i] > 1)
|
||||||
{
|
{
|
||||||
u8* saddr = cached_arraybases[i];
|
u8* saddr = cached_arraybases[i];
|
||||||
int arraySize = arraystrides[i] * ((tc[i] == 2)? 256 : 8192);
|
int arraySize = arraystrides[i] * ((tc[i] == 2)? numVertices : ((numVertices < 1024)? 2 * numVertices : numVertices));
|
||||||
dl->InsertOverlapingRegion(saddr, arraySize);
|
dl->InsertOverlapingRegion(saddr, arraySize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -579,10 +617,16 @@ void Shutdown()
|
||||||
|
|
||||||
void Clear()
|
void Clear()
|
||||||
{
|
{
|
||||||
DLMap::iterator iter = dl_map.begin();
|
VDLMap::iterator iter = dl_map.begin();
|
||||||
while (iter != dl_map.end()) {
|
while (iter != dl_map.end()) {
|
||||||
CachedDisplayList &entry = iter->second;
|
VDlist &ParentEntry = iter->second;
|
||||||
entry.ClearRegions();
|
DLMap::iterator childiter = ParentEntry.dl_map.begin();
|
||||||
|
while (childiter != ParentEntry.dl_map.end()) {
|
||||||
|
CachedDisplayList &entry = childiter->second;
|
||||||
|
entry.ClearRegions();
|
||||||
|
childiter++;
|
||||||
|
}
|
||||||
|
ParentEntry.dl_map.clear();
|
||||||
iter++;
|
iter++;
|
||||||
}
|
}
|
||||||
dl_map.clear();
|
dl_map.clear();
|
||||||
|
@ -592,17 +636,28 @@ void Clear()
|
||||||
|
|
||||||
void ProgressiveCleanup()
|
void ProgressiveCleanup()
|
||||||
{
|
{
|
||||||
DLMap::iterator iter = dl_map.begin();
|
VDLMap::iterator iter = dl_map.begin();
|
||||||
while (iter != dl_map.end()) {
|
while (iter != dl_map.end()) {
|
||||||
CachedDisplayList &entry = iter->second;
|
VDlist &ParentEntry = iter->second;
|
||||||
int limit = iter->second.uncachable ? 1200 : 400;
|
DLMap::iterator childiter = ParentEntry.dl_map.begin();
|
||||||
if (entry.frame_count < frameCount - limit) {
|
while (childiter != ParentEntry.dl_map.end())
|
||||||
// entry.Destroy();
|
{
|
||||||
entry.ClearRegions();
|
CachedDisplayList &entry = childiter->second;
|
||||||
dl_map.erase(iter++); // (this is gcc standard!)
|
int limit = 3600;
|
||||||
|
if (entry.frame_count < frameCount - limit) {
|
||||||
|
// entry.Destroy();
|
||||||
|
entry.ClearRegions();
|
||||||
|
ParentEntry.dl_map.erase(childiter++); // (this is gcc standard!)
|
||||||
|
}
|
||||||
|
else
|
||||||
|
++childiter;
|
||||||
|
}
|
||||||
|
if(ParentEntry.dl_map.empty())
|
||||||
|
{
|
||||||
|
dl_map.erase(iter++);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
++iter;
|
iter++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -627,10 +682,19 @@ bool HandleDisplayList(u32 address, u32 size)
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 dl_id = DLCache::CreateMapId(address, size);
|
u64 dl_id = DLCache::CreateMapId(address, size);
|
||||||
DLCache::DLMap::iterator iter = DLCache::dl_map.find(dl_id);
|
u64 vhash = 0;
|
||||||
|
DLCache::VDLMap::iterator Parentiter = DLCache::dl_map.find(dl_id);
|
||||||
stats.numDListsAlive = (int)DLCache::dl_map.size();
|
DLCache::DLMap::iterator iter;
|
||||||
if (iter != DLCache::dl_map.end())
|
bool childexist = false;
|
||||||
|
if (Parentiter != DLCache::dl_map.end())
|
||||||
|
{
|
||||||
|
vhash = DLCache::CreateVMapId(Parentiter->second.VATUsed);
|
||||||
|
DLCache::VDlist &tvdl = Parentiter->second;
|
||||||
|
iter = Parentiter->second.dl_map.find(vhash);
|
||||||
|
childexist = iter != Parentiter->second.dl_map.end();
|
||||||
|
}
|
||||||
|
//INCSTAT(stats.numDListsAlive);
|
||||||
|
if (Parentiter != DLCache::dl_map.end() && childexist)
|
||||||
{
|
{
|
||||||
DLCache::CachedDisplayList &dl = iter->second;
|
DLCache::CachedDisplayList &dl = iter->second;
|
||||||
if (dl.uncachable)
|
if (dl.uncachable)
|
||||||
|
@ -641,18 +705,6 @@ bool HandleDisplayList(u32 address, u32 size)
|
||||||
// Got one! And it's been compiled too, so let's run the compiled code!
|
// Got one! And it's been compiled too, so let's run the compiled code!
|
||||||
switch (dl.pass)
|
switch (dl.pass)
|
||||||
{
|
{
|
||||||
case DLCache::DLPASS_ANALYZE:
|
|
||||||
if (DLCache::AnalyzeAndRunDisplayList(address, size, &dl)) {
|
|
||||||
dl.dl_hash = GetHash64(Memory_GetPtr(address), size, 0);
|
|
||||||
dl.pass = DLCache::DLPASS_COMPILE;
|
|
||||||
dl.check = 1;
|
|
||||||
dl.next_check = 1;
|
|
||||||
return true;
|
|
||||||
} else {
|
|
||||||
dl.uncachable = true;
|
|
||||||
return true; // don't also interpret the list.
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case DLCache::DLPASS_COMPILE:
|
case DLCache::DLPASS_COMPILE:
|
||||||
// First, check that the hash is the same as the last time.
|
// First, check that the hash is the same as the last time.
|
||||||
if (dl.dl_hash != GetHash64(Memory_GetPtr(address), size, 0))
|
if (dl.dl_hash != GetHash64(Memory_GetPtr(address), size, 0))
|
||||||
|
@ -707,16 +759,28 @@ bool HandleDisplayList(u32 address, u32 size)
|
||||||
|
|
||||||
DLCache::CachedDisplayList dl;
|
DLCache::CachedDisplayList dl;
|
||||||
|
|
||||||
if (DLCache::AnalyzeAndRunDisplayList(address, size, &dl)) {
|
u8 dlvatused = DLCache::AnalyzeAndRunDisplayList(address, size, &dl);
|
||||||
dl.dl_hash = GetHash64(Memory_GetPtr(address), size,0);
|
dl.dl_hash = GetHash64(Memory_GetPtr(address), size,0);
|
||||||
dl.pass = DLCache::DLPASS_COMPILE;
|
dl.pass = DLCache::DLPASS_COMPILE;
|
||||||
dl.check = 1;
|
dl.check = 1;
|
||||||
dl.next_check = 1;
|
dl.next_check = 1;
|
||||||
DLCache::dl_map[dl_id] = dl;
|
|
||||||
return true;
|
if(Parentiter != DLCache::dl_map.end())
|
||||||
} else {
|
{
|
||||||
dl.uncachable = true;
|
DLCache::VDlist &vdl = Parentiter->second;
|
||||||
DLCache::dl_map[dl_id] = dl;
|
vdl.dl_map[vhash] = dl;
|
||||||
return true; // don't also interpret the list.
|
vdl.VATUsed = dlvatused;
|
||||||
|
vdl.count++;
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
DLCache::VDlist vdl;
|
||||||
|
vdl.dl_map[vhash] = dl;
|
||||||
|
vdl.VATUsed = dlvatused;
|
||||||
|
vdl.count = 1;
|
||||||
|
DLCache::dl_map[dl_id] = vdl;
|
||||||
|
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -308,27 +308,27 @@ void VertexLoader::CompileVertexTranslator()
|
||||||
case FORMAT_UBYTE:
|
case FORMAT_UBYTE:
|
||||||
case FORMAT_BYTE:
|
case FORMAT_BYTE:
|
||||||
{
|
{
|
||||||
vtx_decl.normal_gl_type = VAR_BYTE;
|
vtx_decl.normal_gl_type = (vtx_attr.NormalFormat == FORMAT_BYTE)? VAR_BYTE : VAR_UNSIGNED_BYTE;
|
||||||
int native_size = 4;
|
int native_size = 4;
|
||||||
if (vtx_attr.NormalFormat == FORMAT_BYTE && !g_Config.bAllowSignedBytes)
|
if (vtx_attr.NormalFormat == FORMAT_BYTE && !g_Config.bAllowSignedBytes)
|
||||||
{
|
{
|
||||||
vtx_decl.normal_gl_type = VAR_SHORT;
|
vtx_decl.normal_gl_type = VAR_SHORT;
|
||||||
native_size = 8;
|
native_size = 8;
|
||||||
}
|
}
|
||||||
vtx_decl.normal_gl_size = 4;
|
vtx_decl.normal_gl_size = 4;
|
||||||
vtx_decl.normal_offset[0] = nat_offset;
|
vtx_decl.normal_offset[0] = nat_offset;
|
||||||
nat_offset += native_size;
|
|
||||||
if (vtx_attr.NormalElements) {
|
|
||||||
vtx_decl.normal_offset[1] = nat_offset;
|
|
||||||
nat_offset += native_size;
|
nat_offset += native_size;
|
||||||
vtx_decl.normal_offset[2] = nat_offset;
|
if (vtx_attr.NormalElements) {
|
||||||
nat_offset += native_size;
|
vtx_decl.normal_offset[1] = nat_offset;
|
||||||
}
|
nat_offset += native_size;
|
||||||
break;
|
vtx_decl.normal_offset[2] = nat_offset;
|
||||||
|
nat_offset += native_size;
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
case FORMAT_USHORT:
|
case FORMAT_USHORT:
|
||||||
case FORMAT_SHORT:
|
case FORMAT_SHORT:
|
||||||
vtx_decl.normal_gl_type = VAR_SHORT;
|
vtx_decl.normal_gl_type = (vtx_attr.NormalFormat == FORMAT_SHORT)? VAR_SHORT : VAR_UNSIGNED_SHORT;
|
||||||
vtx_decl.normal_gl_size = 4;
|
vtx_decl.normal_gl_size = 4;
|
||||||
vtx_decl.normal_offset[0] = nat_offset;
|
vtx_decl.normal_offset[0] = nat_offset;
|
||||||
nat_offset += 8;
|
nat_offset += 8;
|
||||||
|
|
|
@ -122,61 +122,73 @@ void LOADERDECL Pos_ReadDirect_Float2()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class T, bool three>
|
template<class T, bool three,int MaxSize>
|
||||||
inline void Pos_ReadIndex_Byte(int Index)
|
inline void Pos_ReadIndex_Byte(int Index)
|
||||||
{
|
{
|
||||||
const u8* pData = cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]);
|
if(Index < MaxSize)
|
||||||
((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)(pData[0])) * posScale;
|
{
|
||||||
((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)(pData[1])) * posScale;
|
const u8* pData = cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]);
|
||||||
if (three)
|
((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)(pData[0])) * posScale;
|
||||||
((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)(pData[2])) * posScale;
|
((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)(pData[1])) * posScale;
|
||||||
else
|
if (three)
|
||||||
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
|
((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)(pData[2])) * posScale;
|
||||||
LOG_VTX();
|
else
|
||||||
VertexManager::s_pCurBufferPointer += 12;
|
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
|
||||||
|
LOG_VTX();
|
||||||
|
VertexManager::s_pCurBufferPointer += 12;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class T, bool three>
|
template<class T, bool three,int MaxSize>
|
||||||
inline void Pos_ReadIndex_Short(int Index)
|
inline void Pos_ReadIndex_Short(int Index)
|
||||||
{
|
{
|
||||||
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]));
|
if(Index < MaxSize)
|
||||||
((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)Common::swap16(pData[0])) * posScale;
|
{
|
||||||
((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)Common::swap16(pData[1])) * posScale;
|
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]));
|
||||||
if (three)
|
((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)Common::swap16(pData[0])) * posScale;
|
||||||
((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)Common::swap16(pData[2])) * posScale;
|
((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)Common::swap16(pData[1])) * posScale;
|
||||||
else
|
if (three)
|
||||||
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
|
((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)Common::swap16(pData[2])) * posScale;
|
||||||
LOG_VTX();
|
else
|
||||||
VertexManager::s_pCurBufferPointer += 12;
|
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
|
||||||
|
LOG_VTX();
|
||||||
|
VertexManager::s_pCurBufferPointer += 12;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<bool three>
|
template<bool three,int MaxSize>
|
||||||
void Pos_ReadIndex_Float(int Index)
|
void Pos_ReadIndex_Float(int Index)
|
||||||
{
|
{
|
||||||
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION]));
|
if(Index < MaxSize)
|
||||||
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
|
{
|
||||||
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
|
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION]));
|
||||||
if (three)
|
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
|
||||||
((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]);
|
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
|
||||||
else
|
if (three)
|
||||||
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
|
((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]);
|
||||||
LOG_VTX();
|
else
|
||||||
VertexManager::s_pCurBufferPointer += 12;
|
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
|
||||||
|
LOG_VTX();
|
||||||
|
VertexManager::s_pCurBufferPointer += 12;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if _M_SSE >= 0x301
|
#if _M_SSE >= 0x301
|
||||||
static const __m128i kMaskSwap32_3 = _mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x04050607L, 0x00010203L);
|
static const __m128i kMaskSwap32_3 = _mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x04050607L, 0x00010203L);
|
||||||
static const __m128i kMaskSwap32_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L);
|
static const __m128i kMaskSwap32_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L);
|
||||||
|
|
||||||
template<bool three>
|
template<bool three,int MaxSize>
|
||||||
void Pos_ReadIndex_Float_SSSE3(int Index)
|
void Pos_ReadIndex_Float_SSSE3(int Index)
|
||||||
{
|
{
|
||||||
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION]));
|
if(Index < MaxSize)
|
||||||
const __m128i a = _mm_loadu_si128((__m128i*)pData);
|
{
|
||||||
__m128i b = _mm_shuffle_epi8(a, three ? kMaskSwap32_3 : kMaskSwap32_2);
|
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION]));
|
||||||
_mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, b);
|
const __m128i a = _mm_loadu_si128((__m128i*)pData);
|
||||||
LOG_VTX();
|
__m128i b = _mm_shuffle_epi8(a, three ? kMaskSwap32_3 : kMaskSwap32_2);
|
||||||
VertexManager::s_pCurBufferPointer += 12;
|
_mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, b);
|
||||||
|
LOG_VTX();
|
||||||
|
VertexManager::s_pCurBufferPointer += 12;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -190,50 +202,60 @@ template void Pos_ReadDirect<u8, false>();
|
||||||
template void Pos_ReadDirect<s8, false>();
|
template void Pos_ReadDirect<s8, false>();
|
||||||
template void Pos_ReadDirect<u16, false>();
|
template void Pos_ReadDirect<u16, false>();
|
||||||
template void Pos_ReadDirect<s16, false>();
|
template void Pos_ReadDirect<s16, false>();
|
||||||
template void Pos_ReadIndex_Byte<u8, true>(int Index);
|
template void Pos_ReadIndex_Byte<u8, true, 255>(int Index);
|
||||||
template void Pos_ReadIndex_Byte<s8, true>(int Index);
|
template void Pos_ReadIndex_Byte<s8, true, 255>(int Index);
|
||||||
template void Pos_ReadIndex_Short<u16, true>(int Index);
|
template void Pos_ReadIndex_Short<u16, true, 255>(int Index);
|
||||||
template void Pos_ReadIndex_Short<s16, true>(int Index);
|
template void Pos_ReadIndex_Short<s16, true, 255>(int Index);
|
||||||
template void Pos_ReadIndex_Float<true>(int Index);
|
template void Pos_ReadIndex_Float<true, 255>(int Index);
|
||||||
template void Pos_ReadIndex_Byte<u8, false>(int Index);
|
template void Pos_ReadIndex_Byte<u8, false, 255>(int Index);
|
||||||
template void Pos_ReadIndex_Byte<s8, false>(int Index);
|
template void Pos_ReadIndex_Byte<s8, false, 255>(int Index);
|
||||||
template void Pos_ReadIndex_Short<u16, false>(int Index);
|
template void Pos_ReadIndex_Short<u16, false, 255>(int Index);
|
||||||
template void Pos_ReadIndex_Short<s16, false>(int Index);
|
template void Pos_ReadIndex_Short<s16, false, 255>(int Index);
|
||||||
template void Pos_ReadIndex_Float<false>(int Index);
|
template void Pos_ReadIndex_Float<false, 255>(int Index);
|
||||||
|
template void Pos_ReadIndex_Byte<u8, true, 65535>(int Index);
|
||||||
|
template void Pos_ReadIndex_Byte<s8, true, 65535>(int Index);
|
||||||
|
template void Pos_ReadIndex_Short<u16, true, 65535>(int Index);
|
||||||
|
template void Pos_ReadIndex_Short<s16, true, 65535>(int Index);
|
||||||
|
template void Pos_ReadIndex_Float<true, 65535>(int Index);
|
||||||
|
template void Pos_ReadIndex_Byte<u8, false, 65535>(int Index);
|
||||||
|
template void Pos_ReadIndex_Byte<s8, false, 65535>(int Index);
|
||||||
|
template void Pos_ReadIndex_Short<u16, false, 65535>(int Index);
|
||||||
|
template void Pos_ReadIndex_Short<s16, false, 65535>(int Index);
|
||||||
|
template void Pos_ReadIndex_Float<false, 65535>(int Index);
|
||||||
|
|
||||||
// ==============================================================================
|
// ==============================================================================
|
||||||
// Index 8
|
// Index 8
|
||||||
// ==============================================================================
|
// ==============================================================================
|
||||||
void LOADERDECL Pos_ReadIndex8_UByte3() {Pos_ReadIndex_Byte<u8, true> (DataReadU8());}
|
void LOADERDECL Pos_ReadIndex8_UByte3() {Pos_ReadIndex_Byte<u8, true, 255> (DataReadU8());}
|
||||||
void LOADERDECL Pos_ReadIndex8_Byte3() {Pos_ReadIndex_Byte<s8, true> (DataReadU8());}
|
void LOADERDECL Pos_ReadIndex8_Byte3() {Pos_ReadIndex_Byte<s8, true, 255> (DataReadU8());}
|
||||||
void LOADERDECL Pos_ReadIndex8_UShort3() {Pos_ReadIndex_Short<u16, true> (DataReadU8());}
|
void LOADERDECL Pos_ReadIndex8_UShort3() {Pos_ReadIndex_Short<u16, true, 255> (DataReadU8());}
|
||||||
void LOADERDECL Pos_ReadIndex8_Short3() {Pos_ReadIndex_Short<s16, true> (DataReadU8());}
|
void LOADERDECL Pos_ReadIndex8_Short3() {Pos_ReadIndex_Short<s16, true, 255> (DataReadU8());}
|
||||||
void LOADERDECL Pos_ReadIndex8_Float3() {Pos_ReadIndex_Float<true> (DataReadU8());}
|
void LOADERDECL Pos_ReadIndex8_Float3() {Pos_ReadIndex_Float<true, 255> (DataReadU8());}
|
||||||
void LOADERDECL Pos_ReadIndex8_UByte2() {Pos_ReadIndex_Byte<u8, false>(DataReadU8());}
|
void LOADERDECL Pos_ReadIndex8_UByte2() {Pos_ReadIndex_Byte<u8, false, 255>(DataReadU8());}
|
||||||
void LOADERDECL Pos_ReadIndex8_Byte2() {Pos_ReadIndex_Byte<s8, false>(DataReadU8());}
|
void LOADERDECL Pos_ReadIndex8_Byte2() {Pos_ReadIndex_Byte<s8, false, 255>(DataReadU8());}
|
||||||
void LOADERDECL Pos_ReadIndex8_UShort2() {Pos_ReadIndex_Short<u16, false>(DataReadU8());}
|
void LOADERDECL Pos_ReadIndex8_UShort2() {Pos_ReadIndex_Short<u16, false, 255>(DataReadU8());}
|
||||||
void LOADERDECL Pos_ReadIndex8_Short2() {Pos_ReadIndex_Short<s16, false>(DataReadU8());}
|
void LOADERDECL Pos_ReadIndex8_Short2() {Pos_ReadIndex_Short<s16, false, 255>(DataReadU8());}
|
||||||
void LOADERDECL Pos_ReadIndex8_Float2() {Pos_ReadIndex_Float<false> (DataReadU8());}
|
void LOADERDECL Pos_ReadIndex8_Float2() {Pos_ReadIndex_Float<false, 255> (DataReadU8());}
|
||||||
|
|
||||||
// ==============================================================================
|
// ==============================================================================
|
||||||
// Index 16
|
// Index 16
|
||||||
// ==============================================================================
|
// ==============================================================================
|
||||||
void LOADERDECL Pos_ReadIndex16_UByte3() {Pos_ReadIndex_Byte<u8, true> (DataReadU16());}
|
void LOADERDECL Pos_ReadIndex16_UByte3() {Pos_ReadIndex_Byte<u8, true, 65535> (DataReadU16());}
|
||||||
void LOADERDECL Pos_ReadIndex16_Byte3() {Pos_ReadIndex_Byte<s8, true> (DataReadU16());}
|
void LOADERDECL Pos_ReadIndex16_Byte3() {Pos_ReadIndex_Byte<s8, true, 65535> (DataReadU16());}
|
||||||
void LOADERDECL Pos_ReadIndex16_UShort3() {Pos_ReadIndex_Short<u16, true> (DataReadU16());}
|
void LOADERDECL Pos_ReadIndex16_UShort3() {Pos_ReadIndex_Short<u16, true, 65535> (DataReadU16());}
|
||||||
void LOADERDECL Pos_ReadIndex16_Short3() {Pos_ReadIndex_Short<s16, true> (DataReadU16());}
|
void LOADERDECL Pos_ReadIndex16_Short3() {Pos_ReadIndex_Short<s16, true, 65535> (DataReadU16());}
|
||||||
void LOADERDECL Pos_ReadIndex16_Float3() {Pos_ReadIndex_Float<true> (DataReadU16());}
|
void LOADERDECL Pos_ReadIndex16_Float3() {Pos_ReadIndex_Float<true, 65535> (DataReadU16());}
|
||||||
void LOADERDECL Pos_ReadIndex16_UByte2() {Pos_ReadIndex_Byte<u8, false>(DataReadU16());}
|
void LOADERDECL Pos_ReadIndex16_UByte2() {Pos_ReadIndex_Byte<u8, false, 65535>(DataReadU16());}
|
||||||
void LOADERDECL Pos_ReadIndex16_Byte2() {Pos_ReadIndex_Byte<s8, false>(DataReadU16());}
|
void LOADERDECL Pos_ReadIndex16_Byte2() {Pos_ReadIndex_Byte<s8, false, 65535>(DataReadU16());}
|
||||||
void LOADERDECL Pos_ReadIndex16_UShort2() {Pos_ReadIndex_Short<u16, false>(DataReadU16());}
|
void LOADERDECL Pos_ReadIndex16_UShort2() {Pos_ReadIndex_Short<u16, false, 65535>(DataReadU16());}
|
||||||
void LOADERDECL Pos_ReadIndex16_Short2() {Pos_ReadIndex_Short<s16, false>(DataReadU16());}
|
void LOADERDECL Pos_ReadIndex16_Short2() {Pos_ReadIndex_Short<s16, false, 65535>(DataReadU16());}
|
||||||
void LOADERDECL Pos_ReadIndex16_Float2() {Pos_ReadIndex_Float<false> (DataReadU16());}
|
void LOADERDECL Pos_ReadIndex16_Float2() {Pos_ReadIndex_Float<false, 65535> (DataReadU16());}
|
||||||
|
|
||||||
#if _M_SSE >= 0x301
|
#if _M_SSE >= 0x301
|
||||||
void LOADERDECL Pos_ReadIndex8_Float3_SSSE3() {Pos_ReadIndex_Float_SSSE3<true> (DataReadU8());}
|
void LOADERDECL Pos_ReadIndex8_Float3_SSSE3() {Pos_ReadIndex_Float_SSSE3<true, 255> (DataReadU8());}
|
||||||
void LOADERDECL Pos_ReadIndex8_Float2_SSSE3() {Pos_ReadIndex_Float_SSSE3<false> (DataReadU8());}
|
void LOADERDECL Pos_ReadIndex8_Float2_SSSE3() {Pos_ReadIndex_Float_SSSE3<false, 255> (DataReadU8());}
|
||||||
void LOADERDECL Pos_ReadIndex16_Float3_SSSE3() {Pos_ReadIndex_Float_SSSE3<true> (DataReadU16());}
|
void LOADERDECL Pos_ReadIndex16_Float3_SSSE3() {Pos_ReadIndex_Float_SSSE3<true, 65535> (DataReadU16());}
|
||||||
void LOADERDECL Pos_ReadIndex16_Float2_SSSE3() {Pos_ReadIndex_Float_SSSE3<false> (DataReadU16());}
|
void LOADERDECL Pos_ReadIndex16_Float2_SSSE3() {Pos_ReadIndex_Float_SSSE3<false, 65535> (DataReadU16());}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static TPipelineFunction tableReadPosition[4][8][2] = {
|
static TPipelineFunction tableReadPosition[4][8][2] = {
|
||||||
|
|
|
@ -147,7 +147,8 @@ typedef enum
|
||||||
{
|
{
|
||||||
API_OPENGL,
|
API_OPENGL,
|
||||||
API_D3D9,
|
API_D3D9,
|
||||||
API_D3D11
|
API_D3D11,
|
||||||
|
API_GLSL
|
||||||
} API_TYPE;
|
} API_TYPE;
|
||||||
|
|
||||||
#endif // _VIDEOCOMMON_H
|
#endif // _VIDEOCOMMON_H
|
||||||
|
|
|
@ -82,46 +82,6 @@ struct XFBSource
|
||||||
|
|
||||||
class FramebufferManager
|
class FramebufferManager
|
||||||
{
|
{
|
||||||
public:
|
|
||||||
FramebufferManager()
|
|
||||||
{
|
|
||||||
s_efb_color_texture = NULL;
|
|
||||||
LPDIRECT3DTEXTURE9 s_efb_colorRead_texture = NULL;
|
|
||||||
LPDIRECT3DTEXTURE9 s_efb_depth_texture = NULL;
|
|
||||||
LPDIRECT3DTEXTURE9 s_efb_depthRead_texture = NULL;
|
|
||||||
|
|
||||||
LPDIRECT3DSURFACE9 s_efb_depth_surface = NULL;
|
|
||||||
LPDIRECT3DSURFACE9 s_efb_color_surface = NULL;
|
|
||||||
LPDIRECT3DSURFACE9 s_efb_color_ReadBuffer = NULL;
|
|
||||||
LPDIRECT3DSURFACE9 s_efb_depth_ReadBuffer = NULL;
|
|
||||||
LPDIRECT3DSURFACE9 s_efb_color_OffScreenReadBuffer = NULL;
|
|
||||||
LPDIRECT3DSURFACE9 s_efb_depth_OffScreenReadBuffer = NULL;
|
|
||||||
|
|
||||||
D3DFORMAT s_efb_color_surface_Format = D3DFMT_FORCE_DWORD;
|
|
||||||
D3DFORMAT s_efb_depth_surface_Format = D3DFMT_FORCE_DWORD;
|
|
||||||
D3DFORMAT s_efb_depth_ReadBuffer_Format = D3DFMT_FORCE_DWORD;
|
|
||||||
m_realXFBSource.texture = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Create();
|
|
||||||
void Destroy();
|
|
||||||
|
|
||||||
void CopyToXFB(u32 xfbAddr, u32 fbWidth, u32 fbHeight, const EFBRectangle& sourceRc);
|
|
||||||
const XFBSource** GetXFBSource(u32 xfbAddr, u32 fbWidth, u32 fbHeight, u32 &xfbCount);
|
|
||||||
|
|
||||||
LPDIRECT3DTEXTURE9 GetEFBColorTexture();
|
|
||||||
LPDIRECT3DTEXTURE9 GetEFBDepthTexture();
|
|
||||||
|
|
||||||
LPDIRECT3DSURFACE9 GetEFBColorRTSurface();
|
|
||||||
LPDIRECT3DSURFACE9 GetEFBDepthRTSurface();
|
|
||||||
LPDIRECT3DSURFACE9 GetEFBColorOffScreenRTSurface();
|
|
||||||
LPDIRECT3DSURFACE9 GetEFBDepthOffScreenRTSurface();
|
|
||||||
D3DFORMAT GetEFBDepthRTSurfaceFormat();
|
|
||||||
D3DFORMAT GetEFBColorRTSurfaceFormat();
|
|
||||||
D3DFORMAT GetEFBDepthReadSurfaceFormat();
|
|
||||||
LPDIRECT3DSURFACE9 GetEFBColorReadSurface();
|
|
||||||
LPDIRECT3DSURFACE9 GetEFBDepthReadSurface();
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
struct VirtualXFB
|
struct VirtualXFB
|
||||||
|
@ -155,7 +115,7 @@ private:
|
||||||
LPDIRECT3DTEXTURE9 s_efb_depth_texture;//Texture thats contains the depth data of the render target
|
LPDIRECT3DTEXTURE9 s_efb_depth_texture;//Texture thats contains the depth data of the render target
|
||||||
LPDIRECT3DTEXTURE9 s_efb_depthRead_texture;//4 pixel texture for temporal data store
|
LPDIRECT3DTEXTURE9 s_efb_depthRead_texture;//4 pixel texture for temporal data store
|
||||||
|
|
||||||
LPDIRECT3DSURFACE9 s_efb_depth_surface;//Depth Surface
|
LPDIRECT3DSURFACE9 s_efb_depth_surface;//Depth Surface
|
||||||
LPDIRECT3DSURFACE9 s_efb_color_surface;//Color Surface
|
LPDIRECT3DSURFACE9 s_efb_color_surface;//Color Surface
|
||||||
LPDIRECT3DSURFACE9 s_efb_color_ReadBuffer;//Surface 0 of s_efb_colorRead_texture
|
LPDIRECT3DSURFACE9 s_efb_color_ReadBuffer;//Surface 0 of s_efb_colorRead_texture
|
||||||
LPDIRECT3DSURFACE9 s_efb_depth_ReadBuffer;//Surface 0 of s_efb_depthRead_texture
|
LPDIRECT3DSURFACE9 s_efb_depth_ReadBuffer;//Surface 0 of s_efb_depthRead_texture
|
||||||
|
@ -165,6 +125,47 @@ private:
|
||||||
D3DFORMAT s_efb_color_surface_Format;//Format of the color Surface
|
D3DFORMAT s_efb_color_surface_Format;//Format of the color Surface
|
||||||
D3DFORMAT s_efb_depth_surface_Format;//Format of the Depth Surface
|
D3DFORMAT s_efb_depth_surface_Format;//Format of the Depth Surface
|
||||||
D3DFORMAT s_efb_depth_ReadBuffer_Format;//Format of the Depth color Read Surface
|
D3DFORMAT s_efb_depth_ReadBuffer_Format;//Format of the Depth color Read Surface
|
||||||
|
public:
|
||||||
|
FramebufferManager()
|
||||||
|
{
|
||||||
|
s_efb_color_texture = NULL;
|
||||||
|
s_efb_colorRead_texture = NULL;
|
||||||
|
s_efb_depth_texture = NULL;
|
||||||
|
s_efb_depthRead_texture = NULL;
|
||||||
|
|
||||||
|
s_efb_depth_surface = NULL;
|
||||||
|
s_efb_color_surface = NULL;
|
||||||
|
s_efb_color_ReadBuffer = NULL;
|
||||||
|
s_efb_depth_ReadBuffer = NULL;
|
||||||
|
s_efb_color_OffScreenReadBuffer = NULL;
|
||||||
|
s_efb_depth_OffScreenReadBuffer = NULL;
|
||||||
|
|
||||||
|
s_efb_color_surface_Format = D3DFMT_FORCE_DWORD;
|
||||||
|
s_efb_depth_surface_Format = D3DFMT_FORCE_DWORD;
|
||||||
|
s_efb_depth_ReadBuffer_Format = D3DFMT_FORCE_DWORD;
|
||||||
|
m_realXFBSource.texture = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Create();
|
||||||
|
void Destroy();
|
||||||
|
|
||||||
|
void CopyToXFB(u32 xfbAddr, u32 fbWidth, u32 fbHeight, const EFBRectangle& sourceRc);
|
||||||
|
const XFBSource** GetXFBSource(u32 xfbAddr, u32 fbWidth, u32 fbHeight, u32 &xfbCount);
|
||||||
|
|
||||||
|
LPDIRECT3DTEXTURE9 GetEFBColorTexture();
|
||||||
|
LPDIRECT3DTEXTURE9 GetEFBDepthTexture();
|
||||||
|
|
||||||
|
LPDIRECT3DSURFACE9 GetEFBColorRTSurface();
|
||||||
|
LPDIRECT3DSURFACE9 GetEFBDepthRTSurface();
|
||||||
|
LPDIRECT3DSURFACE9 GetEFBColorOffScreenRTSurface();
|
||||||
|
LPDIRECT3DSURFACE9 GetEFBDepthOffScreenRTSurface();
|
||||||
|
D3DFORMAT GetEFBDepthRTSurfaceFormat();
|
||||||
|
D3DFORMAT GetEFBColorRTSurfaceFormat();
|
||||||
|
D3DFORMAT GetEFBDepthReadSurfaceFormat();
|
||||||
|
LPDIRECT3DSURFACE9 GetEFBColorReadSurface();
|
||||||
|
LPDIRECT3DSURFACE9 GetEFBDepthReadSurface();
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
extern FramebufferManager g_framebufferManager;
|
extern FramebufferManager g_framebufferManager;
|
||||||
|
|
|
@ -1314,7 +1314,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight,cons
|
||||||
|
|
||||||
u32 newAA = g_ActiveConfig.iMultisampleMode;
|
u32 newAA = g_ActiveConfig.iMultisampleMode;
|
||||||
|
|
||||||
if (xfbchanged || WindowResized || s_LastEFBScale != g_ActiveConfig.iEFBScale)
|
if (xfbchanged || WindowResized || s_LastEFBScale != g_ActiveConfig.iEFBScale || s_LastAA != newAA)
|
||||||
{
|
{
|
||||||
s_LastAA = newAA;
|
s_LastAA = newAA;
|
||||||
|
|
||||||
|
|
|
@ -1614,7 +1614,7 @@ void Renderer::RestoreAPIState()
|
||||||
SetColorMask();
|
SetColorMask();
|
||||||
SetBlendMode(true);
|
SetBlendMode(true);
|
||||||
|
|
||||||
VertexShaderCache::SetCurrentShader(0);
|
VertexShaderCache::SetCurrentShader(1);
|
||||||
PixelShaderCache::SetCurrentShader(0);
|
PixelShaderCache::SetCurrentShader(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue