From 6d916762fb52a85aa086ef0cb6516cc63fbe775b Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Fri, 29 May 2015 18:25:19 +1200 Subject: [PATCH 1/3] Fix invalid pointer errors in Burnout 2. Yet another story of games loading weird shit into registers. For some reason, Burnout 2 would (in rare situations) load invalid addresses into cp_state.array_bases. What would the real hardware do in this situation? Who knows, Burnout 2 doesn't actually enable the vertex array with the invalid address so nothing kinky happens. But dolphin tries to optimise things and starts using the address as soon as it is loaded into memory. This causes GetPointer (which is now much more vocal) to throw an error. The Fix: We don't call GetPointer until we are sure the vertex array has been enabled. --- Source/Core/VideoCommon/CPMemory.h | 7 +++++++ Source/Core/VideoCommon/MainBase.cpp | 1 - .../Core/VideoCommon/VertexLoaderManager.cpp | 19 +++++++++++++------ Source/Core/VideoCommon/VertexLoaderManager.h | 2 +- 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/Source/Core/VideoCommon/CPMemory.h b/Source/Core/VideoCommon/CPMemory.h index 4626f35f33..89f790b9a1 100644 --- a/Source/Core/VideoCommon/CPMemory.h +++ b/Source/Core/VideoCommon/CPMemory.h @@ -88,6 +88,12 @@ union TVtxDesc { u32 Hex0, Hex1; }; + + // Easily index into the Position..Tex7Coord fields. + u32 GetVertexArrayStatus(int idx) + { + return (Hex >> (9 + idx * 2)) & 0x3; + } }; union UVAT_group0 @@ -239,6 +245,7 @@ class VertexLoaderBase; // STATE_TO_SAVE struct CPState final { + // Only 12 of these arrays are used. u32 array_bases[16]; u32 array_strides[16]; TMatrixIndexA matrix_index_a; diff --git a/Source/Core/VideoCommon/MainBase.cpp b/Source/Core/VideoCommon/MainBase.cpp index 0b9dd58f27..96aa34de90 100644 --- a/Source/Core/VideoCommon/MainBase.cpp +++ b/Source/Core/VideoCommon/MainBase.cpp @@ -212,7 +212,6 @@ void VideoBackendHardware::DoState(PointerWrap& p) if (p.GetMode() == PointerWrap::MODE_READ) { m_invalid = true; - RecomputeCachedArraybases(); // Clear all caches that touch RAM // (? these don't appear to touch any emulation state that gets saved. moved to on load only.) diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index 4168f7c777..8d2acecc66 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -42,7 +42,6 @@ void Init() map_entry = nullptr; for (auto& map_entry : g_preprocess_cp_state.vertex_loaders) map_entry = nullptr; - RecomputeCachedArraybases(); SETSTAT(stats.numVertexLoaders, 0); } @@ -136,6 +135,11 @@ static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = fal } else { loader = state->vertex_loaders[vtx_attr_group]; } + + // Lookup pointers for any vertex arrays. + if (!preprocess) + ComputeCachedArrayBases(); + return loader; } @@ -232,8 +236,6 @@ void LoadCPReg(u32 sub_cmd, u32 value, bool is_preprocess) // Pointers to vertex arrays in GC RAM case 0xA0: state->array_bases[sub_cmd & 0xF] = value; - if (update_global_state) - cached_arraybases[sub_cmd & 0xF] = Memory::GetPointer(value); break; case 0xB0: @@ -263,10 +265,15 @@ void FillCPMemoryArray(u32 *memory) } } -void RecomputeCachedArraybases() +void ComputeCachedArrayBases() { - for (int i = 0; i < 16; i++) + // Some games such as Burnout 2 can put invalid addresses into + // the array base registers. (see issue 8591) + // But the vertex arrays with invalid addresses aren't actually enabled. + for (int i = 0; i < 12; i++) { - cached_arraybases[i] = Memory::GetPointer(g_main_cp_state.array_bases[i]); + // Only update the array base if the vertex description states we are going to use it. + if (g_main_cp_state.vtx_desc.GetVertexArrayStatus(i) >= 0x2) + cached_arraybases[i] = Memory::GetPointer(g_main_cp_state.array_bases[i]); } } diff --git a/Source/Core/VideoCommon/VertexLoaderManager.h b/Source/Core/VideoCommon/VertexLoaderManager.h index 25c40cb440..f745ec26a7 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.h +++ b/Source/Core/VideoCommon/VertexLoaderManager.h @@ -26,4 +26,4 @@ namespace VertexLoaderManager NativeVertexFormat* GetCurrentVertexFormat(); } -void RecomputeCachedArraybases(); +void ComputeCachedArrayBases(); From f57517f1a05db1790f853bf43d6093c19478b4ce Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Sat, 30 May 2015 00:42:45 +1200 Subject: [PATCH 2/3] Clean up cached_arraybases. Update VideoSW to new scheme. Move ownership of cached_arraybases from CPMemory to VertexLoaderManager to better match it usage. --- .../VideoBackends/Software/CPMemLoader.cpp | 1 - .../VideoBackends/Software/SWVertexLoader.cpp | 4 +++ Source/Core/VideoCommon/CPMemory.cpp | 2 -- Source/Core/VideoCommon/CPMemory.h | 2 -- Source/Core/VideoCommon/VertexLoaderARM64.cpp | 3 +- .../Core/VideoCommon/VertexLoaderManager.cpp | 32 +++++++++++-------- Source/Core/VideoCommon/VertexLoaderManager.h | 5 ++- Source/Core/VideoCommon/VertexLoaderX64.cpp | 3 +- .../Core/VideoCommon/VertexLoader_Color.cpp | 13 ++++---- .../Core/VideoCommon/VertexLoader_Normal.cpp | 3 +- .../VideoCommon/VertexLoader_Position.cpp | 3 +- .../VideoCommon/VertexLoader_TextCoord.cpp | 3 +- .../VideoCommon/VertexLoaderTest.cpp | 9 +++--- 13 files changed, 48 insertions(+), 35 deletions(-) diff --git a/Source/Core/VideoBackends/Software/CPMemLoader.cpp b/Source/Core/VideoBackends/Software/CPMemLoader.cpp index 39627f1953..d1ebe6cc87 100644 --- a/Source/Core/VideoBackends/Software/CPMemLoader.cpp +++ b/Source/Core/VideoBackends/Software/CPMemLoader.cpp @@ -48,7 +48,6 @@ void SWLoadCPReg(u32 sub_cmd, u32 value) // Pointers to vertex arrays in GC RAM case 0xA0: g_main_cp_state.array_bases[sub_cmd & 0xF] = value; - cached_arraybases[sub_cmd & 0xF] = Memory::GetPointer(value); break; case 0xB0: diff --git a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp index d77bbc810b..d745ed9507 100644 --- a/Source/Core/VideoBackends/Software/SWVertexLoader.cpp +++ b/Source/Core/VideoBackends/Software/SWVertexLoader.cpp @@ -16,8 +16,10 @@ #include "VideoBackends/Software/XFMemLoader.h" #include "VideoCommon/VertexLoaderBase.h" +#include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexLoaderUtils.h" + SWVertexLoader::SWVertexLoader() : m_VertexSize(0) { @@ -176,6 +178,8 @@ void SWVertexLoader::LoadVertex() // reserve memory for the destination of the vertex loader m_LoadedVertices.resize(vdec.stride + 4); + VertexLoaderManager::UpdateVertexArrayPointers(); + // convert the vertex from the gc format to the videocommon (hardware optimized) format u8* old = g_video_buffer_read_ptr; int converted_vertices = m_CurrentLoader->RunVertices( diff --git a/Source/Core/VideoCommon/CPMemory.cpp b/Source/Core/VideoCommon/CPMemory.cpp index ca710ae738..55904f5684 100644 --- a/Source/Core/VideoCommon/CPMemory.cpp +++ b/Source/Core/VideoCommon/CPMemory.cpp @@ -7,8 +7,6 @@ #include "VideoCommon/CPMemory.h" // CP state -u8 *cached_arraybases[16]; - CPState g_main_cp_state; CPState g_preprocess_cp_state; diff --git a/Source/Core/VideoCommon/CPMemory.h b/Source/Core/VideoCommon/CPMemory.h index 89f790b9a1..47b1e169ab 100644 --- a/Source/Core/VideoCommon/CPMemory.h +++ b/Source/Core/VideoCommon/CPMemory.h @@ -245,7 +245,6 @@ class VertexLoaderBase; // STATE_TO_SAVE struct CPState final { - // Only 12 of these arrays are used. u32 array_bases[16]; u32 array_strides[16]; TMatrixIndexA matrix_index_a; @@ -268,7 +267,6 @@ extern void CopyPreprocessCPStateFromMain(); extern CPState g_main_cp_state; extern CPState g_preprocess_cp_state; -extern u8 *cached_arraybases[16]; // Might move this into its own file later. void LoadCPReg(u32 SubCmd, u32 Value, bool is_preprocess = false); diff --git a/Source/Core/VideoCommon/VertexLoaderARM64.cpp b/Source/Core/VideoCommon/VertexLoaderARM64.cpp index 038a576e74..36b6aae0f5 100644 --- a/Source/Core/VideoCommon/VertexLoaderARM64.cpp +++ b/Source/Core/VideoCommon/VertexLoaderARM64.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "VideoCommon/VertexLoaderARM64.h" +#include "VideoCommon/VertexLoaderManager.h" using namespace Arm64Gen; @@ -331,7 +332,7 @@ void VertexLoaderARM64::GenerateVertexLoader() MOV(saved_count, count_reg); MOVI2R(stride_reg, (u64)&g_main_cp_state.array_strides); - MOVI2R(arraybase_reg, (u64)&cached_arraybases); + MOVI2R(arraybase_reg, (u64)&VertexLoaderManager::cached_arraybases); MOVI2R(scale_reg, (u64)&scale_factors); const u8* loop_start = GetCodePtr(); diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index 8d2acecc66..4ebb07788b 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -35,6 +35,8 @@ static std::mutex s_vertex_loader_map_lock; static VertexLoaderMap s_vertex_loader_map; // TODO - change into array of pointers. Keep a map of all seen so far. +u8 *cached_arraybases[12]; + void Init() { MarkAllDirty(); @@ -52,6 +54,21 @@ void Shutdown() s_native_vertex_map.clear(); } +void UpdateVertexArrayPointers() +{ + // Some games such as Burnout 2 can put invalid addresses into + // the array base registers. (see issue 8591) + // But the vertex arrays with invalid addresses aren't actually enabled. + // Note: Only array bases 0 through 11 are used by the Vertex loaders. + // 12 through 15 are used for loading data into xfmem. + for (int i = 0; i < 12; i++) + { + // Only update the array base if the vertex description states we are going to use it. + if (g_main_cp_state.vtx_desc.GetVertexArrayStatus(i) >= 0x2) + cached_arraybases[i] = Memory::GetPointer(g_main_cp_state.array_bases[i]); + } +} + namespace { struct entry @@ -138,7 +155,7 @@ static VertexLoaderBase* RefreshLoader(int vtx_attr_group, bool preprocess = fal // Lookup pointers for any vertex arrays. if (!preprocess) - ComputeCachedArrayBases(); + UpdateVertexArrayPointers(); return loader; } @@ -264,16 +281,3 @@ void FillCPMemoryArray(u32 *memory) memory[0xB0 + i] = g_main_cp_state.array_strides[i]; } } - -void ComputeCachedArrayBases() -{ - // Some games such as Burnout 2 can put invalid addresses into - // the array base registers. (see issue 8591) - // But the vertex arrays with invalid addresses aren't actually enabled. - for (int i = 0; i < 12; i++) - { - // Only update the array base if the vertex description states we are going to use it. - if (g_main_cp_state.vtx_desc.GetVertexArrayStatus(i) >= 0x2) - cached_arraybases[i] = Memory::GetPointer(g_main_cp_state.array_bases[i]); - } -} diff --git a/Source/Core/VideoCommon/VertexLoaderManager.h b/Source/Core/VideoCommon/VertexLoaderManager.h index f745ec26a7..99336fc333 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.h +++ b/Source/Core/VideoCommon/VertexLoaderManager.h @@ -24,6 +24,9 @@ namespace VertexLoaderManager void AppendListToString(std::string *dest); NativeVertexFormat* GetCurrentVertexFormat(); + + // Resolved pointers to array bases. Used by vertex loaders. + extern u8 *cached_arraybases[12]; + void UpdateVertexArrayPointers(); } -void ComputeCachedArrayBases(); diff --git a/Source/Core/VideoCommon/VertexLoaderX64.cpp b/Source/Core/VideoCommon/VertexLoaderX64.cpp index 476566ab49..65d43b9688 100644 --- a/Source/Core/VideoCommon/VertexLoaderX64.cpp +++ b/Source/Core/VideoCommon/VertexLoaderX64.cpp @@ -7,6 +7,7 @@ #include "Common/Intrinsics.h" #include "Common/JitRegister.h" #include "Common/x64ABI.h" +#include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexLoaderX64.h" using namespace Gen; @@ -58,7 +59,7 @@ OpArg VertexLoaderX64::GetVertexAddr(int array, u64 attribute) } // TODO: Move cached_arraybases into CPState and use MDisp() relative to a constant register loaded with &g_main_cp_state. IMUL(32, scratch1, M(&g_main_cp_state.array_strides[array])); - MOV(64, R(scratch2), M(&cached_arraybases[array])); + MOV(64, R(scratch2), M(&VertexLoaderManager::cached_arraybases[array])); return MRegSum(scratch1, scratch2); } else diff --git a/Source/Core/VideoCommon/VertexLoader_Color.cpp b/Source/Core/VideoCommon/VertexLoader_Color.cpp index 65f34c7ba8..cf9ca88280 100644 --- a/Source/Core/VideoCommon/VertexLoader_Color.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Color.cpp @@ -6,6 +6,7 @@ #include "VideoCommon/VertexLoader.h" #include "VideoCommon/VertexLoader_Color.h" +#include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VideoCommon.h" @@ -100,7 +101,7 @@ template void Color_ReadIndex_16b_565(VertexLoader* loader) { auto const Index = DataRead(); - u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR + loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR + loader->m_colIndex]))); + u16 val = Common::swap16(*(const u16 *)(VertexLoaderManager::cached_arraybases[ARRAY_COLOR + loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR + loader->m_colIndex]))); _SetCol565(loader, val); } @@ -108,7 +109,7 @@ template void Color_ReadIndex_24b_888(VertexLoader* loader) { auto const Index = DataRead(); - const u8 *iAddress = cached_arraybases[ARRAY_COLOR + loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR + loader->m_colIndex]); + const u8 *iAddress = VertexLoaderManager::cached_arraybases[ARRAY_COLOR + loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR + loader->m_colIndex]); _SetCol(loader, _Read24(iAddress)); } @@ -116,7 +117,7 @@ template void Color_ReadIndex_32b_888x(VertexLoader* loader) { auto const Index = DataRead(); - const u8 *iAddress = cached_arraybases[ARRAY_COLOR + loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR + loader->m_colIndex]); + const u8 *iAddress = VertexLoaderManager::cached_arraybases[ARRAY_COLOR + loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR + loader->m_colIndex]); _SetCol(loader, _Read24(iAddress)); } @@ -124,7 +125,7 @@ template void Color_ReadIndex_16b_4444(VertexLoader* loader) { auto const Index = DataRead(); - u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR + loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR + loader->m_colIndex])); + u16 val = *(const u16 *)(VertexLoaderManager::cached_arraybases[ARRAY_COLOR + loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR + loader->m_colIndex])); _SetCol4444(loader, val); } @@ -132,7 +133,7 @@ template void Color_ReadIndex_24b_6666(VertexLoader* loader) { auto const Index = DataRead(); - const u8* pData = cached_arraybases[ARRAY_COLOR + loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR + loader->m_colIndex]) - 1; + const u8* pData = VertexLoaderManager::cached_arraybases[ARRAY_COLOR + loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR + loader->m_colIndex]) - 1; u32 val = Common::swap32(pData); _SetCol6666(loader, val); } @@ -141,7 +142,7 @@ template void Color_ReadIndex_32b_8888(VertexLoader* loader) { auto const Index = DataRead(); - const u8 *iAddress = cached_arraybases[ARRAY_COLOR + loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR + loader->m_colIndex]); + const u8 *iAddress = VertexLoaderManager::cached_arraybases[ARRAY_COLOR + loader->m_colIndex] + (Index * g_main_cp_state.array_strides[ARRAY_COLOR + loader->m_colIndex]); _SetCol(loader, _Read32(iAddress)); } diff --git a/Source/Core/VideoCommon/VertexLoader_Normal.cpp b/Source/Core/VideoCommon/VertexLoader_Normal.cpp index 68f98dac4a..939b902a9d 100644 --- a/Source/Core/VideoCommon/VertexLoader_Normal.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Normal.cpp @@ -8,6 +8,7 @@ #include "Common/CommonTypes.h" #include "VideoCommon/VertexLoader.h" #include "VideoCommon/VertexLoader_Normal.h" +#include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VideoCommon.h" @@ -71,7 +72,7 @@ __forceinline void Normal_Index_Offset() static_assert(std::is_unsigned::value, "Only unsigned I is sane!"); auto const index = DataRead(); - auto const data = reinterpret_cast(cached_arraybases[ARRAY_NORMAL] + auto const data = reinterpret_cast(VertexLoaderManager::cached_arraybases[ARRAY_NORMAL] + (index * g_main_cp_state.array_strides[ARRAY_NORMAL]) + sizeof(T) * 3 * Offset); ReadIndirect(data); } diff --git a/Source/Core/VideoCommon/VertexLoader_Position.cpp b/Source/Core/VideoCommon/VertexLoader_Position.cpp index f7cc43c7a4..ed6b4587b3 100644 --- a/Source/Core/VideoCommon/VertexLoader_Position.cpp +++ b/Source/Core/VideoCommon/VertexLoader_Position.cpp @@ -7,6 +7,7 @@ #include "Common/CommonTypes.h" #include "VideoCommon/VertexLoader.h" #include "VideoCommon/VertexLoader_Position.h" +#include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VideoCommon.h" @@ -46,7 +47,7 @@ void LOADERDECL Pos_ReadIndex(VertexLoader* loader) auto const index = DataRead(); loader->m_vertexSkip = index == std::numeric_limits::max(); - auto const data = reinterpret_cast(cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION])); + auto const data = reinterpret_cast(VertexLoaderManager::cached_arraybases[ARRAY_POSITION] + (index * g_main_cp_state.array_strides[ARRAY_POSITION])); auto const scale = loader->m_posScale; DataReader dst(g_vertex_manager_write_ptr, nullptr); diff --git a/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp b/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp index 441ec81dd8..7904a5ce88 100644 --- a/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp +++ b/Source/Core/VideoCommon/VertexLoader_TextCoord.cpp @@ -7,6 +7,7 @@ #include "Common/CommonTypes.h" #include "VideoCommon/VertexLoader.h" #include "VideoCommon/VertexLoader_TextCoord.h" +#include "VideoCommon/VertexLoaderManager.h" #include "VideoCommon/VertexManagerBase.h" #include "VideoCommon/VideoCommon.h" @@ -67,7 +68,7 @@ void LOADERDECL TexCoord_ReadIndex(VertexLoader* loader) static_assert(std::is_unsigned::value, "Only unsigned I is sane!"); auto const index = DataRead(); - auto const data = reinterpret_cast(cached_arraybases[ARRAY_TEXCOORD0 + loader->m_tcIndex] + auto const data = reinterpret_cast(VertexLoaderManager::cached_arraybases[ARRAY_TEXCOORD0 + loader->m_tcIndex] + (index * g_main_cp_state.array_strides[ARRAY_TEXCOORD0 + loader->m_tcIndex])); auto const scale = loader->m_tcScale[loader->m_tcIndex]; DataReader dst(g_vertex_manager_write_ptr, nullptr); diff --git a/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp b/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp index 8c4ea12964..84e1029e2e 100644 --- a/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp +++ b/Source/UnitTests/VideoCommon/VertexLoaderTest.cpp @@ -16,6 +16,7 @@ #include "VideoCommon/DataReader.h" #include "VideoCommon/OpcodeDecoding.h" #include "VideoCommon/VertexLoaderBase.h" +#include "VideoCommon/VertexLoaderManager.h" TEST(VertexLoaderUID, UniqueEnough) { @@ -151,7 +152,7 @@ TEST_P(VertexLoaderParamTest, PositionAll) Input(i); else Input(i); - cached_arraybases[ARRAY_POSITION] = m_src.GetPointer(); + VertexLoaderManager::cached_arraybases[ARRAY_POSITION] = m_src.GetPointer(); g_main_cp_state.array_strides[ARRAY_POSITION] = elements * elem_size; } CreateAndCheckSizes(input_size, elements * sizeof(float)); @@ -192,7 +193,7 @@ TEST_F(VertexLoaderTest, PositionIndex16FloatXY) m_vtx_attr.g0.PosFormat = FORMAT_FLOAT; CreateAndCheckSizes(sizeof(u16), 2 * sizeof(float)); Input(1); Input(0); - cached_arraybases[ARRAY_POSITION] = m_src.GetPointer(); + VertexLoaderManager::cached_arraybases[ARRAY_POSITION] = m_src.GetPointer(); g_main_cp_state.array_strides[ARRAY_POSITION] = sizeof(float); // ;) Input(1.f); Input(2.f); Input(3.f); RunVertices(2); @@ -297,9 +298,9 @@ TEST_F(VertexLoaderTest, LargeFloatVertexSpeed) CreateAndCheckSizes(33, 156); - for (int i = 0; i < 16; i++) + for (int i = 0; i < 12; i++) { - cached_arraybases[i] = m_src.GetPointer(); + VertexLoaderManager::cached_arraybases[i] = m_src.GetPointer(); g_main_cp_state.array_strides[i] = 129; } From 7df69829732745d98b843d37d68a65920d1d883b Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Sat, 30 May 2015 03:58:27 +1200 Subject: [PATCH 3/3] Add a dirty flag for arraybases. Only loop through and call getPointers when something has actually changed. Worth about 2-4% speedup un SMG over the previous commit. --- Source/Core/VideoBackends/Software/CPMemLoader.cpp | 3 +++ Source/Core/VideoCommon/CPMemory.cpp | 3 +++ Source/Core/VideoCommon/CPMemory.h | 1 + Source/Core/VideoCommon/VertexLoaderManager.cpp | 9 +++++++++ 4 files changed, 16 insertions(+) diff --git a/Source/Core/VideoBackends/Software/CPMemLoader.cpp b/Source/Core/VideoBackends/Software/CPMemLoader.cpp index d1ebe6cc87..c33cd2dd0e 100644 --- a/Source/Core/VideoBackends/Software/CPMemLoader.cpp +++ b/Source/Core/VideoBackends/Software/CPMemLoader.cpp @@ -23,11 +23,13 @@ void SWLoadCPReg(u32 sub_cmd, u32 value) case 0x50: g_main_cp_state.vtx_desc.Hex &= ~0x1FFFF; // keep the Upper bits g_main_cp_state.vtx_desc.Hex |= value; + g_main_cp_state.bases_dirty = true; break; case 0x60: g_main_cp_state.vtx_desc.Hex &= 0x1FFFF; // keep the lower 17Bits g_main_cp_state.vtx_desc.Hex |= (u64)value << 17; + g_main_cp_state.bases_dirty = true; break; case 0x70: @@ -48,6 +50,7 @@ void SWLoadCPReg(u32 sub_cmd, u32 value) // Pointers to vertex arrays in GC RAM case 0xA0: g_main_cp_state.array_bases[sub_cmd & 0xF] = value; + g_main_cp_state.bases_dirty = true; break; case 0xB0: diff --git a/Source/Core/VideoCommon/CPMemory.cpp b/Source/Core/VideoCommon/CPMemory.cpp index 55904f5684..5fc7bbe98d 100644 --- a/Source/Core/VideoCommon/CPMemory.cpp +++ b/Source/Core/VideoCommon/CPMemory.cpp @@ -22,7 +22,10 @@ void DoCPState(PointerWrap& p) p.DoArray(g_main_cp_state.vtx_attr, 8); p.DoMarker("CP Memory"); if (p.mode == PointerWrap::MODE_READ) + { CopyPreprocessCPStateFromMain(); + g_main_cp_state.bases_dirty = true; + } } void CopyPreprocessCPStateFromMain() diff --git a/Source/Core/VideoCommon/CPMemory.h b/Source/Core/VideoCommon/CPMemory.h index 47b1e169ab..491f818305 100644 --- a/Source/Core/VideoCommon/CPMemory.h +++ b/Source/Core/VideoCommon/CPMemory.h @@ -255,6 +255,7 @@ struct CPState final // Attributes that actually belong to VertexLoaderManager: BitSet32 attr_dirty; + bool bases_dirty; VertexLoaderBase* vertex_loaders[8]; }; diff --git a/Source/Core/VideoCommon/VertexLoaderManager.cpp b/Source/Core/VideoCommon/VertexLoaderManager.cpp index 4ebb07788b..dbb421991c 100644 --- a/Source/Core/VideoCommon/VertexLoaderManager.cpp +++ b/Source/Core/VideoCommon/VertexLoaderManager.cpp @@ -56,6 +56,10 @@ void Shutdown() void UpdateVertexArrayPointers() { + // Anything to update? + if (!g_main_cp_state.bases_dirty) + return; + // Some games such as Burnout 2 can put invalid addresses into // the array base registers. (see issue 8591) // But the vertex arrays with invalid addresses aren't actually enabled. @@ -67,6 +71,8 @@ void UpdateVertexArrayPointers() if (g_main_cp_state.vtx_desc.GetVertexArrayStatus(i) >= 0x2) cached_arraybases[i] = Memory::GetPointer(g_main_cp_state.array_bases[i]); } + + g_main_cp_state.bases_dirty = false; } namespace @@ -224,12 +230,14 @@ void LoadCPReg(u32 sub_cmd, u32 value, bool is_preprocess) state->vtx_desc.Hex &= ~0x1FFFF; // keep the Upper bits state->vtx_desc.Hex |= value; state->attr_dirty = BitSet32::AllTrue(8); + state->bases_dirty = true; break; case 0x60: state->vtx_desc.Hex &= 0x1FFFF; // keep the lower 17Bits state->vtx_desc.Hex |= (u64)value << 17; state->attr_dirty = BitSet32::AllTrue(8); + state->bases_dirty = true; break; case 0x70: @@ -253,6 +261,7 @@ void LoadCPReg(u32 sub_cmd, u32 value, bool is_preprocess) // Pointers to vertex arrays in GC RAM case 0xA0: state->array_bases[sub_cmd & 0xF] = value; + state->bases_dirty = true; break; case 0xB0: