From c21f804c48ad0bf857ed9b5171f6e0cc704f609a Mon Sep 17 00:00:00 2001 From: Rodolfo Osvaldo Bogado Date: Sun, 14 Nov 2010 14:42:11 +0000 Subject: [PATCH] more work on dlist caching now should be a little faster a little bugfix in vertex loading and some fixes. not much time to work these days but at least i can spend a little time fixing thing. please test for regressions. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6409 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/VideoCommon/Src/DLCache.cpp | 154 +++++++++++----- Source/Core/VideoCommon/Src/VertexLoader.cpp | 34 ++-- .../VideoCommon/Src/VertexLoader_Position.cpp | 164 ++++++++++-------- Source/Core/VideoCommon/Src/VideoCommon.h | 3 +- .../Plugin_VideoDX9/Src/FramebufferManager.h | 83 ++++----- Source/Plugins/Plugin_VideoDX9/Src/Render.cpp | 2 +- Source/Plugins/Plugin_VideoOGL/Src/Render.cpp | 2 +- 7 files changed, 265 insertions(+), 177 deletions(-) diff --git a/Source/Core/VideoCommon/Src/DLCache.cpp b/Source/Core/VideoCommon/Src/DLCache.cpp index 23895e050e..c2509f988c 100644 --- a/Source/Core/VideoCommon/Src/DLCache.cpp +++ b/Source/Core/VideoCommon/Src/DLCache.cpp @@ -40,7 +40,7 @@ #include "VideoConfig.h" #define DL_CODE_CACHE_SIZE (1024*1024*16) -#define DL_CODE_CLEAR_THRESHOLD (256 * 1024) +#define DL_CODE_CLEAR_THRESHOLD (128 * 1024) extern int frameCount; using namespace Gen; @@ -222,22 +222,54 @@ inline u64 CreateMapId(u32 address, u32 size) return ((u64)address << 32) | size; } +inline u64 CreateVMapId(u8 VATUSED) +{ + u64 id = 0; + for(int i = 0; i < 8 ; i++) + { + if(VATUSED & (1 << i)) + { + //id ^= GetHash64((u8*)(&g_VtxAttr[i].g0.),sizeof(VAT),0); + if(id != 0) + { + id ^= (g_VtxAttr[i].g0.Hex | (g_VtxAttr[i].g1.Hex << 32)) ^ (g_VtxAttr[i].g2.Hex << 16); + } + else + { + id = (g_VtxAttr[i].g0.Hex | (g_VtxAttr[i].g1.Hex << 32)) ^ (g_VtxAttr[i].g2.Hex << 16); + } + } + } + return id; +} + typedef std::map DLMap; -static DLMap dl_map; +struct VDlist +{ + DLMap dl_map; + u8 VATUsed; + int count; +}; + +typedef std::map VDLMap; + +static VDLMap dl_map; static u8* dlcode_cache; static Gen::XEmitter emitter; + + // First pass - analyze -bool AnalyzeAndRunDisplayList(u32 address, int size, CachedDisplayList *dl) +u8 AnalyzeAndRunDisplayList(u32 address, int size, CachedDisplayList *dl) { int num_xf_reg = 0; int num_cp_reg = 0; int num_bp_reg = 0; int num_index_xf = 0; int num_draw_call = 0; - + u8 result = 0; u8* old_pVideoData = g_pVideoData; u8* startAddress = Memory_GetPtr(address); @@ -336,7 +368,7 @@ bool AnalyzeAndRunDisplayList(u32 address, int size, CachedDisplayList *dl) { // load vertices (use computed vertex size from FifoCommandRunnable above) u16 numVertices = DataReadU16(); - + result |= 1 << (cmd_byte & GX_VAT_MASK); VertexLoaderManager::RunVertices( cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7) (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, @@ -363,7 +395,7 @@ bool AnalyzeAndRunDisplayList(u32 address, int size, CachedDisplayList *dl) dl->num_xf_reg = num_xf_reg; // reset to the old pointer g_pVideoData = old_pVideoData; - return true; + return result; } // The only sensible way to detect changes to vertex data is to convert several times @@ -532,6 +564,12 @@ bool CompileAndRunDisplayList(u32 address, int size, CachedDisplayList *dl) dl->InsertRegion(NewRegion); memcpy(NewRegion->start_address, StartAddress, Vdatasize); emitter.ABI_CallFunctionCCCP((void *)&VertexLoaderManager::RunCompiledVertices, cmd_byte & GX_VAT_MASK, (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, numVertices, NewRegion->start_address); + /*ReferencedDataRegion* VatRegion = new ReferencedDataRegion; + VatRegion->MustClean = false; + VatRegion->size = sizeof(VAT); + VatRegion->start_address = (u8*)(&g_VtxAttr[cmd_byte & GX_VAT_MASK]); + VatRegion->hash = GetHash64(NewRegion->start_address, VatRegion->size, 0); + dl->InsertRegion(VatRegion);*/ } const int tc[12] = { g_VtxDesc.Position, g_VtxDesc.Normal, g_VtxDesc.Color0, g_VtxDesc.Color1, g_VtxDesc.Tex0Coord, g_VtxDesc.Tex1Coord, @@ -542,7 +580,7 @@ bool CompileAndRunDisplayList(u32 address, int size, CachedDisplayList *dl) if(tc[i] > 1) { u8* saddr = cached_arraybases[i]; - int arraySize = arraystrides[i] * ((tc[i] == 2)? 256 : 8192); + int arraySize = arraystrides[i] * ((tc[i] == 2)? numVertices : ((numVertices < 1024)? 2 * numVertices : numVertices)); dl->InsertOverlapingRegion(saddr, arraySize); } } @@ -579,10 +617,16 @@ void Shutdown() void Clear() { - DLMap::iterator iter = dl_map.begin(); + VDLMap::iterator iter = dl_map.begin(); while (iter != dl_map.end()) { - CachedDisplayList &entry = iter->second; - entry.ClearRegions(); + VDlist &ParentEntry = iter->second; + DLMap::iterator childiter = ParentEntry.dl_map.begin(); + while (childiter != ParentEntry.dl_map.end()) { + CachedDisplayList &entry = childiter->second; + entry.ClearRegions(); + childiter++; + } + ParentEntry.dl_map.clear(); iter++; } dl_map.clear(); @@ -592,17 +636,28 @@ void Clear() void ProgressiveCleanup() { - DLMap::iterator iter = dl_map.begin(); + VDLMap::iterator iter = dl_map.begin(); while (iter != dl_map.end()) { - CachedDisplayList &entry = iter->second; - int limit = iter->second.uncachable ? 1200 : 400; - if (entry.frame_count < frameCount - limit) { - // entry.Destroy(); - entry.ClearRegions(); - dl_map.erase(iter++); // (this is gcc standard!) + VDlist &ParentEntry = iter->second; + DLMap::iterator childiter = ParentEntry.dl_map.begin(); + while (childiter != ParentEntry.dl_map.end()) + { + CachedDisplayList &entry = childiter->second; + int limit = 3600; + if (entry.frame_count < frameCount - limit) { + // entry.Destroy(); + entry.ClearRegions(); + ParentEntry.dl_map.erase(childiter++); // (this is gcc standard!) + } + else + ++childiter; + } + if(ParentEntry.dl_map.empty()) + { + dl_map.erase(iter++); } else - ++iter; + iter++; } } @@ -627,10 +682,19 @@ bool HandleDisplayList(u32 address, u32 size) } u64 dl_id = DLCache::CreateMapId(address, size); - DLCache::DLMap::iterator iter = DLCache::dl_map.find(dl_id); - - stats.numDListsAlive = (int)DLCache::dl_map.size(); - if (iter != DLCache::dl_map.end()) + u64 vhash = 0; + DLCache::VDLMap::iterator Parentiter = DLCache::dl_map.find(dl_id); + DLCache::DLMap::iterator iter; + bool childexist = false; + if (Parentiter != DLCache::dl_map.end()) + { + vhash = DLCache::CreateVMapId(Parentiter->second.VATUsed); + DLCache::VDlist &tvdl = Parentiter->second; + iter = Parentiter->second.dl_map.find(vhash); + childexist = iter != Parentiter->second.dl_map.end(); + } + //INCSTAT(stats.numDListsAlive); + if (Parentiter != DLCache::dl_map.end() && childexist) { DLCache::CachedDisplayList &dl = iter->second; if (dl.uncachable) @@ -641,18 +705,6 @@ bool HandleDisplayList(u32 address, u32 size) // Got one! And it's been compiled too, so let's run the compiled code! switch (dl.pass) { - case DLCache::DLPASS_ANALYZE: - if (DLCache::AnalyzeAndRunDisplayList(address, size, &dl)) { - dl.dl_hash = GetHash64(Memory_GetPtr(address), size, 0); - dl.pass = DLCache::DLPASS_COMPILE; - dl.check = 1; - dl.next_check = 1; - return true; - } else { - dl.uncachable = true; - return true; // don't also interpret the list. - } - break; case DLCache::DLPASS_COMPILE: // First, check that the hash is the same as the last time. if (dl.dl_hash != GetHash64(Memory_GetPtr(address), size, 0)) @@ -707,16 +759,28 @@ bool HandleDisplayList(u32 address, u32 size) DLCache::CachedDisplayList dl; - if (DLCache::AnalyzeAndRunDisplayList(address, size, &dl)) { - dl.dl_hash = GetHash64(Memory_GetPtr(address), size,0); - dl.pass = DLCache::DLPASS_COMPILE; - dl.check = 1; - dl.next_check = 1; - DLCache::dl_map[dl_id] = dl; - return true; - } else { - dl.uncachable = true; - DLCache::dl_map[dl_id] = dl; - return true; // don't also interpret the list. + u8 dlvatused = DLCache::AnalyzeAndRunDisplayList(address, size, &dl); + dl.dl_hash = GetHash64(Memory_GetPtr(address), size,0); + dl.pass = DLCache::DLPASS_COMPILE; + dl.check = 1; + dl.next_check = 1; + + if(Parentiter != DLCache::dl_map.end()) + { + DLCache::VDlist &vdl = Parentiter->second; + vdl.dl_map[vhash] = dl; + vdl.VATUsed = dlvatused; + vdl.count++; } + else + { + DLCache::VDlist vdl; + vdl.dl_map[vhash] = dl; + vdl.VATUsed = dlvatused; + vdl.count = 1; + DLCache::dl_map[dl_id] = vdl; + + } + return true; + } diff --git a/Source/Core/VideoCommon/Src/VertexLoader.cpp b/Source/Core/VideoCommon/Src/VertexLoader.cpp index fba141d4cc..2f146da617 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader.cpp @@ -308,27 +308,27 @@ void VertexLoader::CompileVertexTranslator() case FORMAT_UBYTE: case FORMAT_BYTE: { - vtx_decl.normal_gl_type = VAR_BYTE; - int native_size = 4; - if (vtx_attr.NormalFormat == FORMAT_BYTE && !g_Config.bAllowSignedBytes) - { - vtx_decl.normal_gl_type = VAR_SHORT; - native_size = 8; - } - vtx_decl.normal_gl_size = 4; - vtx_decl.normal_offset[0] = nat_offset; - nat_offset += native_size; - if (vtx_attr.NormalElements) { - vtx_decl.normal_offset[1] = nat_offset; + vtx_decl.normal_gl_type = (vtx_attr.NormalFormat == FORMAT_BYTE)? VAR_BYTE : VAR_UNSIGNED_BYTE; + int native_size = 4; + if (vtx_attr.NormalFormat == FORMAT_BYTE && !g_Config.bAllowSignedBytes) + { + vtx_decl.normal_gl_type = VAR_SHORT; + native_size = 8; + } + vtx_decl.normal_gl_size = 4; + vtx_decl.normal_offset[0] = nat_offset; nat_offset += native_size; - vtx_decl.normal_offset[2] = nat_offset; - nat_offset += native_size; - } - break; + if (vtx_attr.NormalElements) { + vtx_decl.normal_offset[1] = nat_offset; + nat_offset += native_size; + vtx_decl.normal_offset[2] = nat_offset; + nat_offset += native_size; + } + break; } case FORMAT_USHORT: case FORMAT_SHORT: - vtx_decl.normal_gl_type = VAR_SHORT; + vtx_decl.normal_gl_type = (vtx_attr.NormalFormat == FORMAT_SHORT)? VAR_SHORT : VAR_UNSIGNED_SHORT; vtx_decl.normal_gl_size = 4; vtx_decl.normal_offset[0] = nat_offset; nat_offset += 8; diff --git a/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp b/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp index 1ed6c6ef63..6f5d7e680c 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp @@ -122,61 +122,73 @@ void LOADERDECL Pos_ReadDirect_Float2() } -template +template inline void Pos_ReadIndex_Byte(int Index) { - const u8* pData = cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]); - ((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)(pData[0])) * posScale; - ((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)(pData[1])) * posScale; - if (three) - ((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)(pData[2])) * posScale; - else - ((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f; - LOG_VTX(); - VertexManager::s_pCurBufferPointer += 12; + if(Index < MaxSize) + { + const u8* pData = cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]); + ((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)(pData[0])) * posScale; + ((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)(pData[1])) * posScale; + if (three) + ((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)(pData[2])) * posScale; + else + ((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f; + LOG_VTX(); + VertexManager::s_pCurBufferPointer += 12; + } } -template +template inline void Pos_ReadIndex_Short(int Index) { - const u16* pData = (const u16 *)(cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION])); - ((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)Common::swap16(pData[0])) * posScale; - ((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)Common::swap16(pData[1])) * posScale; - if (three) - ((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)Common::swap16(pData[2])) * posScale; - else - ((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f; - LOG_VTX(); - VertexManager::s_pCurBufferPointer += 12; + if(Index < MaxSize) + { + const u16* pData = (const u16 *)(cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION])); + ((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)Common::swap16(pData[0])) * posScale; + ((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)Common::swap16(pData[1])) * posScale; + if (three) + ((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)Common::swap16(pData[2])) * posScale; + else + ((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f; + LOG_VTX(); + VertexManager::s_pCurBufferPointer += 12; + } } -template +template void Pos_ReadIndex_Float(int Index) { - const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION])); - ((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]); - ((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]); - if (three) - ((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]); - else - ((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f; - LOG_VTX(); - VertexManager::s_pCurBufferPointer += 12; + if(Index < MaxSize) + { + const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION])); + ((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]); + ((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]); + if (three) + ((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]); + else + ((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f; + LOG_VTX(); + VertexManager::s_pCurBufferPointer += 12; + } } #if _M_SSE >= 0x301 static const __m128i kMaskSwap32_3 = _mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x04050607L, 0x00010203L); static const __m128i kMaskSwap32_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L); -template +template void Pos_ReadIndex_Float_SSSE3(int Index) { - const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION])); - const __m128i a = _mm_loadu_si128((__m128i*)pData); - __m128i b = _mm_shuffle_epi8(a, three ? kMaskSwap32_3 : kMaskSwap32_2); - _mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, b); - LOG_VTX(); - VertexManager::s_pCurBufferPointer += 12; + if(Index < MaxSize) + { + const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION])); + const __m128i a = _mm_loadu_si128((__m128i*)pData); + __m128i b = _mm_shuffle_epi8(a, three ? kMaskSwap32_3 : kMaskSwap32_2); + _mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, b); + LOG_VTX(); + VertexManager::s_pCurBufferPointer += 12; + } } #endif @@ -190,50 +202,60 @@ template void Pos_ReadDirect(); template void Pos_ReadDirect(); template void Pos_ReadDirect(); template void Pos_ReadDirect(); -template void Pos_ReadIndex_Byte(int Index); -template void Pos_ReadIndex_Byte(int Index); -template void Pos_ReadIndex_Short(int Index); -template void Pos_ReadIndex_Short(int Index); -template void Pos_ReadIndex_Float(int Index); -template void Pos_ReadIndex_Byte(int Index); -template void Pos_ReadIndex_Byte(int Index); -template void Pos_ReadIndex_Short(int Index); -template void Pos_ReadIndex_Short(int Index); -template void Pos_ReadIndex_Float(int Index); +template void Pos_ReadIndex_Byte(int Index); +template void Pos_ReadIndex_Byte(int Index); +template void Pos_ReadIndex_Short(int Index); +template void Pos_ReadIndex_Short(int Index); +template void Pos_ReadIndex_Float(int Index); +template void Pos_ReadIndex_Byte(int Index); +template void Pos_ReadIndex_Byte(int Index); +template void Pos_ReadIndex_Short(int Index); +template void Pos_ReadIndex_Short(int Index); +template void Pos_ReadIndex_Float(int Index); +template void Pos_ReadIndex_Byte(int Index); +template void Pos_ReadIndex_Byte(int Index); +template void Pos_ReadIndex_Short(int Index); +template void Pos_ReadIndex_Short(int Index); +template void Pos_ReadIndex_Float(int Index); +template void Pos_ReadIndex_Byte(int Index); +template void Pos_ReadIndex_Byte(int Index); +template void Pos_ReadIndex_Short(int Index); +template void Pos_ReadIndex_Short(int Index); +template void Pos_ReadIndex_Float(int Index); // ============================================================================== // Index 8 // ============================================================================== -void LOADERDECL Pos_ReadIndex8_UByte3() {Pos_ReadIndex_Byte (DataReadU8());} -void LOADERDECL Pos_ReadIndex8_Byte3() {Pos_ReadIndex_Byte (DataReadU8());} -void LOADERDECL Pos_ReadIndex8_UShort3() {Pos_ReadIndex_Short (DataReadU8());} -void LOADERDECL Pos_ReadIndex8_Short3() {Pos_ReadIndex_Short (DataReadU8());} -void LOADERDECL Pos_ReadIndex8_Float3() {Pos_ReadIndex_Float (DataReadU8());} -void LOADERDECL Pos_ReadIndex8_UByte2() {Pos_ReadIndex_Byte(DataReadU8());} -void LOADERDECL Pos_ReadIndex8_Byte2() {Pos_ReadIndex_Byte(DataReadU8());} -void LOADERDECL Pos_ReadIndex8_UShort2() {Pos_ReadIndex_Short(DataReadU8());} -void LOADERDECL Pos_ReadIndex8_Short2() {Pos_ReadIndex_Short(DataReadU8());} -void LOADERDECL Pos_ReadIndex8_Float2() {Pos_ReadIndex_Float (DataReadU8());} +void LOADERDECL Pos_ReadIndex8_UByte3() {Pos_ReadIndex_Byte (DataReadU8());} +void LOADERDECL Pos_ReadIndex8_Byte3() {Pos_ReadIndex_Byte (DataReadU8());} +void LOADERDECL Pos_ReadIndex8_UShort3() {Pos_ReadIndex_Short (DataReadU8());} +void LOADERDECL Pos_ReadIndex8_Short3() {Pos_ReadIndex_Short (DataReadU8());} +void LOADERDECL Pos_ReadIndex8_Float3() {Pos_ReadIndex_Float (DataReadU8());} +void LOADERDECL Pos_ReadIndex8_UByte2() {Pos_ReadIndex_Byte(DataReadU8());} +void LOADERDECL Pos_ReadIndex8_Byte2() {Pos_ReadIndex_Byte(DataReadU8());} +void LOADERDECL Pos_ReadIndex8_UShort2() {Pos_ReadIndex_Short(DataReadU8());} +void LOADERDECL Pos_ReadIndex8_Short2() {Pos_ReadIndex_Short(DataReadU8());} +void LOADERDECL Pos_ReadIndex8_Float2() {Pos_ReadIndex_Float (DataReadU8());} // ============================================================================== // Index 16 // ============================================================================== -void LOADERDECL Pos_ReadIndex16_UByte3() {Pos_ReadIndex_Byte (DataReadU16());} -void LOADERDECL Pos_ReadIndex16_Byte3() {Pos_ReadIndex_Byte (DataReadU16());} -void LOADERDECL Pos_ReadIndex16_UShort3() {Pos_ReadIndex_Short (DataReadU16());} -void LOADERDECL Pos_ReadIndex16_Short3() {Pos_ReadIndex_Short (DataReadU16());} -void LOADERDECL Pos_ReadIndex16_Float3() {Pos_ReadIndex_Float (DataReadU16());} -void LOADERDECL Pos_ReadIndex16_UByte2() {Pos_ReadIndex_Byte(DataReadU16());} -void LOADERDECL Pos_ReadIndex16_Byte2() {Pos_ReadIndex_Byte(DataReadU16());} -void LOADERDECL Pos_ReadIndex16_UShort2() {Pos_ReadIndex_Short(DataReadU16());} -void LOADERDECL Pos_ReadIndex16_Short2() {Pos_ReadIndex_Short(DataReadU16());} -void LOADERDECL Pos_ReadIndex16_Float2() {Pos_ReadIndex_Float (DataReadU16());} +void LOADERDECL Pos_ReadIndex16_UByte3() {Pos_ReadIndex_Byte (DataReadU16());} +void LOADERDECL Pos_ReadIndex16_Byte3() {Pos_ReadIndex_Byte (DataReadU16());} +void LOADERDECL Pos_ReadIndex16_UShort3() {Pos_ReadIndex_Short (DataReadU16());} +void LOADERDECL Pos_ReadIndex16_Short3() {Pos_ReadIndex_Short (DataReadU16());} +void LOADERDECL Pos_ReadIndex16_Float3() {Pos_ReadIndex_Float (DataReadU16());} +void LOADERDECL Pos_ReadIndex16_UByte2() {Pos_ReadIndex_Byte(DataReadU16());} +void LOADERDECL Pos_ReadIndex16_Byte2() {Pos_ReadIndex_Byte(DataReadU16());} +void LOADERDECL Pos_ReadIndex16_UShort2() {Pos_ReadIndex_Short(DataReadU16());} +void LOADERDECL Pos_ReadIndex16_Short2() {Pos_ReadIndex_Short(DataReadU16());} +void LOADERDECL Pos_ReadIndex16_Float2() {Pos_ReadIndex_Float (DataReadU16());} #if _M_SSE >= 0x301 -void LOADERDECL Pos_ReadIndex8_Float3_SSSE3() {Pos_ReadIndex_Float_SSSE3 (DataReadU8());} -void LOADERDECL Pos_ReadIndex8_Float2_SSSE3() {Pos_ReadIndex_Float_SSSE3 (DataReadU8());} -void LOADERDECL Pos_ReadIndex16_Float3_SSSE3() {Pos_ReadIndex_Float_SSSE3 (DataReadU16());} -void LOADERDECL Pos_ReadIndex16_Float2_SSSE3() {Pos_ReadIndex_Float_SSSE3 (DataReadU16());} +void LOADERDECL Pos_ReadIndex8_Float3_SSSE3() {Pos_ReadIndex_Float_SSSE3 (DataReadU8());} +void LOADERDECL Pos_ReadIndex8_Float2_SSSE3() {Pos_ReadIndex_Float_SSSE3 (DataReadU8());} +void LOADERDECL Pos_ReadIndex16_Float3_SSSE3() {Pos_ReadIndex_Float_SSSE3 (DataReadU16());} +void LOADERDECL Pos_ReadIndex16_Float2_SSSE3() {Pos_ReadIndex_Float_SSSE3 (DataReadU16());} #endif static TPipelineFunction tableReadPosition[4][8][2] = { diff --git a/Source/Core/VideoCommon/Src/VideoCommon.h b/Source/Core/VideoCommon/Src/VideoCommon.h index e071bad2ba..cb79d92d7a 100644 --- a/Source/Core/VideoCommon/Src/VideoCommon.h +++ b/Source/Core/VideoCommon/Src/VideoCommon.h @@ -147,7 +147,8 @@ typedef enum { API_OPENGL, API_D3D9, - API_D3D11 + API_D3D11, + API_GLSL } API_TYPE; #endif // _VIDEOCOMMON_H diff --git a/Source/Plugins/Plugin_VideoDX9/Src/FramebufferManager.h b/Source/Plugins/Plugin_VideoDX9/Src/FramebufferManager.h index a3bcea3c09..23be22b507 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/FramebufferManager.h +++ b/Source/Plugins/Plugin_VideoDX9/Src/FramebufferManager.h @@ -82,46 +82,6 @@ struct XFBSource class FramebufferManager { -public: - FramebufferManager() - { - s_efb_color_texture = NULL; - LPDIRECT3DTEXTURE9 s_efb_colorRead_texture = NULL; - LPDIRECT3DTEXTURE9 s_efb_depth_texture = NULL; - LPDIRECT3DTEXTURE9 s_efb_depthRead_texture = NULL; - - LPDIRECT3DSURFACE9 s_efb_depth_surface = NULL; - LPDIRECT3DSURFACE9 s_efb_color_surface = NULL; - LPDIRECT3DSURFACE9 s_efb_color_ReadBuffer = NULL; - LPDIRECT3DSURFACE9 s_efb_depth_ReadBuffer = NULL; - LPDIRECT3DSURFACE9 s_efb_color_OffScreenReadBuffer = NULL; - LPDIRECT3DSURFACE9 s_efb_depth_OffScreenReadBuffer = NULL; - - D3DFORMAT s_efb_color_surface_Format = D3DFMT_FORCE_DWORD; - D3DFORMAT s_efb_depth_surface_Format = D3DFMT_FORCE_DWORD; - D3DFORMAT s_efb_depth_ReadBuffer_Format = D3DFMT_FORCE_DWORD; - m_realXFBSource.texture = NULL; - } - - void Create(); - void Destroy(); - - void CopyToXFB(u32 xfbAddr, u32 fbWidth, u32 fbHeight, const EFBRectangle& sourceRc); - const XFBSource** GetXFBSource(u32 xfbAddr, u32 fbWidth, u32 fbHeight, u32 &xfbCount); - - LPDIRECT3DTEXTURE9 GetEFBColorTexture(); - LPDIRECT3DTEXTURE9 GetEFBDepthTexture(); - - LPDIRECT3DSURFACE9 GetEFBColorRTSurface(); - LPDIRECT3DSURFACE9 GetEFBDepthRTSurface(); - LPDIRECT3DSURFACE9 GetEFBColorOffScreenRTSurface(); - LPDIRECT3DSURFACE9 GetEFBDepthOffScreenRTSurface(); - D3DFORMAT GetEFBDepthRTSurfaceFormat(); - D3DFORMAT GetEFBColorRTSurfaceFormat(); - D3DFORMAT GetEFBDepthReadSurfaceFormat(); - LPDIRECT3DSURFACE9 GetEFBColorReadSurface(); - LPDIRECT3DSURFACE9 GetEFBDepthReadSurface(); - private: struct VirtualXFB @@ -155,7 +115,7 @@ private: LPDIRECT3DTEXTURE9 s_efb_depth_texture;//Texture thats contains the depth data of the render target LPDIRECT3DTEXTURE9 s_efb_depthRead_texture;//4 pixel texture for temporal data store - LPDIRECT3DSURFACE9 s_efb_depth_surface;//Depth Surface + LPDIRECT3DSURFACE9 s_efb_depth_surface;//Depth Surface LPDIRECT3DSURFACE9 s_efb_color_surface;//Color Surface LPDIRECT3DSURFACE9 s_efb_color_ReadBuffer;//Surface 0 of s_efb_colorRead_texture LPDIRECT3DSURFACE9 s_efb_depth_ReadBuffer;//Surface 0 of s_efb_depthRead_texture @@ -165,6 +125,47 @@ private: D3DFORMAT s_efb_color_surface_Format;//Format of the color Surface D3DFORMAT s_efb_depth_surface_Format;//Format of the Depth Surface D3DFORMAT s_efb_depth_ReadBuffer_Format;//Format of the Depth color Read Surface +public: + FramebufferManager() + { + s_efb_color_texture = NULL; + s_efb_colorRead_texture = NULL; + s_efb_depth_texture = NULL; + s_efb_depthRead_texture = NULL; + + s_efb_depth_surface = NULL; + s_efb_color_surface = NULL; + s_efb_color_ReadBuffer = NULL; + s_efb_depth_ReadBuffer = NULL; + s_efb_color_OffScreenReadBuffer = NULL; + s_efb_depth_OffScreenReadBuffer = NULL; + + s_efb_color_surface_Format = D3DFMT_FORCE_DWORD; + s_efb_depth_surface_Format = D3DFMT_FORCE_DWORD; + s_efb_depth_ReadBuffer_Format = D3DFMT_FORCE_DWORD; + m_realXFBSource.texture = NULL; + } + + void Create(); + void Destroy(); + + void CopyToXFB(u32 xfbAddr, u32 fbWidth, u32 fbHeight, const EFBRectangle& sourceRc); + const XFBSource** GetXFBSource(u32 xfbAddr, u32 fbWidth, u32 fbHeight, u32 &xfbCount); + + LPDIRECT3DTEXTURE9 GetEFBColorTexture(); + LPDIRECT3DTEXTURE9 GetEFBDepthTexture(); + + LPDIRECT3DSURFACE9 GetEFBColorRTSurface(); + LPDIRECT3DSURFACE9 GetEFBDepthRTSurface(); + LPDIRECT3DSURFACE9 GetEFBColorOffScreenRTSurface(); + LPDIRECT3DSURFACE9 GetEFBDepthOffScreenRTSurface(); + D3DFORMAT GetEFBDepthRTSurfaceFormat(); + D3DFORMAT GetEFBColorRTSurfaceFormat(); + D3DFORMAT GetEFBDepthReadSurfaceFormat(); + LPDIRECT3DSURFACE9 GetEFBColorReadSurface(); + LPDIRECT3DSURFACE9 GetEFBDepthReadSurface(); + + }; extern FramebufferManager g_framebufferManager; diff --git a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp index 1fe95dfa88..d4c02ccd07 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/Render.cpp @@ -1314,7 +1314,7 @@ void Renderer::Swap(u32 xfbAddr, FieldType field, u32 fbWidth, u32 fbHeight,cons u32 newAA = g_ActiveConfig.iMultisampleMode; - if (xfbchanged || WindowResized || s_LastEFBScale != g_ActiveConfig.iEFBScale) + if (xfbchanged || WindowResized || s_LastEFBScale != g_ActiveConfig.iEFBScale || s_LastAA != newAA) { s_LastAA = newAA; diff --git a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp index d4dede65d7..aa443ae4e8 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp @@ -1614,7 +1614,7 @@ void Renderer::RestoreAPIState() SetColorMask(); SetBlendMode(true); - VertexShaderCache::SetCurrentShader(0); + VertexShaderCache::SetCurrentShader(1); PixelShaderCache::SetCurrentShader(0); }