From a355d9868e855e0bbba766d6a5550bf88f1defe7 Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Tue, 8 Sep 2015 03:05:47 +1200 Subject: [PATCH] FifoRecorder: Use Video Common to record efb2ram correctly. Texture updates have been moved into TextureCache, while TMEM updates where moved into bpmem. Code for handling efb2ram updates was added to TextureCache. There was a bug for preloaded RGBA8 textures, it only copied half the texture. The TODO was wrong too. --- Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp | 12 -- Source/Core/Core/FifoPlayer/FifoDataFile.cpp | 5 + Source/Core/Core/FifoPlayer/FifoDataFile.h | 2 + Source/Core/Core/FifoPlayer/FifoFileStruct.h | 2 +- Source/Core/Core/FifoPlayer/FifoPlayer.cpp | 3 +- .../Core/FifoPlayer/FifoRecordAnalyzer.cpp | 105 +----------------- .../Core/Core/FifoPlayer/FifoRecordAnalyzer.h | 6 +- Source/Core/Core/FifoPlayer/FifoRecorder.cpp | 9 +- Source/Core/Core/FifoPlayer/FifoRecorder.h | 5 +- Source/Core/VideoCommon/BPStructs.cpp | 29 +++-- Source/Core/VideoCommon/OpcodeDecoding.h | 2 - Source/Core/VideoCommon/TextureCacheBase.cpp | 38 ++++++- Source/Core/VideoCommon/VideoCommon.h | 3 + 13 files changed, 77 insertions(+), 144 deletions(-) diff --git a/Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp b/Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp index f7f70d5b1d..5c8d956ab0 100644 --- a/Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp +++ b/Source/Core/Core/FifoPlayer/FifoAnalyzer.cpp @@ -68,18 +68,6 @@ void LoadBPReg(const BPCmd &bp, BPMemory &bpMem) bpMem.bpMask = 0xFFFFFF; } -void GetTlutLoadData(u32 &tlutAddr, u32 &memAddr, u32 &tlutXferCount, BPMemory &bpMem) -{ - tlutAddr = (bpMem.tmem_config.tlut_dest & 0x3FF) << 9; - tlutXferCount = (bpMem.tmem_config.tlut_dest & 0x1FFC00) >> 5; - - // TODO - figure out a cleaner way. - if (SConfig::GetInstance().bWii) - memAddr = bpMem.tmem_config.tlut_src << 5; - else - memAddr = (bpMem.tmem_config.tlut_src & 0xFFFFF) << 5; -} - void LoadCPReg(u32 subCmd, u32 value, CPMemory &cpMem) { switch (subCmd & 0xF0) diff --git a/Source/Core/Core/FifoPlayer/FifoDataFile.cpp b/Source/Core/Core/FifoPlayer/FifoDataFile.cpp index 7281018d3e..8bc8b79f9c 100644 --- a/Source/Core/Core/FifoPlayer/FifoDataFile.cpp +++ b/Source/Core/Core/FifoPlayer/FifoDataFile.cpp @@ -28,6 +28,11 @@ FifoDataFile::~FifoDataFile() } } +bool FifoDataFile::HasBrokenEFBCopies() const +{ + return version < 2; +} + void FifoDataFile::SetIsWii(bool isWii) { SetFlag(FLAG_IS_WII, isWii); diff --git a/Source/Core/Core/FifoPlayer/FifoDataFile.h b/Source/Core/Core/FifoPlayer/FifoDataFile.h index 607168f2dc..1bfde6a0a0 100644 --- a/Source/Core/Core/FifoPlayer/FifoDataFile.h +++ b/Source/Core/Core/FifoPlayer/FifoDataFile.h @@ -59,6 +59,7 @@ public: void SetIsWii(bool isWii); bool GetIsWii() const; + bool HasBrokenEFBCopies() const; u32 *GetBPMem() { return m_BPMem; } u32 *GetCPMem() { return m_CPMem; } @@ -93,6 +94,7 @@ private: u32 m_XFRegs[XF_REGS_SIZE]; u32 m_Flags; + u32 version; std::vector m_Frames; }; diff --git a/Source/Core/Core/FifoPlayer/FifoFileStruct.h b/Source/Core/Core/FifoPlayer/FifoFileStruct.h index 9ca2ade332..4cf81aca72 100644 --- a/Source/Core/Core/FifoPlayer/FifoFileStruct.h +++ b/Source/Core/Core/FifoPlayer/FifoFileStruct.h @@ -12,7 +12,7 @@ namespace FifoFileStruct enum { FILE_ID = 0x0d01f1f0, - VERSION_NUMBER = 1, + VERSION_NUMBER = 2, MIN_LOADER_VERSION = 1, }; diff --git a/Source/Core/Core/FifoPlayer/FifoPlayer.cpp b/Source/Core/Core/FifoPlayer/FifoPlayer.cpp index 0d4696c161..3357fb3f25 100644 --- a/Source/Core/Core/FifoPlayer/FifoPlayer.cpp +++ b/Source/Core/Core/FifoPlayer/FifoPlayer.cpp @@ -63,8 +63,7 @@ bool FifoPlayer::Play() if (m_File->GetFrameCount() == 0) return false; - // Currently these is no such thing as a Fifolog without broken EFB copies. - IsPlayingBackFifologWithBrokenEFBCopies = true; + IsPlayingBackFifologWithBrokenEFBCopies = m_File->HasBrokenEFBCopies(); m_CurrentFrame = m_FrameRangeStart; diff --git a/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.cpp b/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.cpp index cb8f82350e..67f878b209 100644 --- a/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.cpp +++ b/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.cpp @@ -99,11 +99,6 @@ void FifoRecordAnalyzer::DecodeOpcode(u8 *data) u32 cmd2 = ReadFifo32(data); BPCmd bp = FifoAnalyzer::DecodeBPCmd(cmd2, *m_BpMem); - - if (bp.address == BPMEM_LOADTLUT1) - ProcessLoadTlut1(); - if (bp.address == BPMEM_PRELOAD_MODE) - ProcessPreloadTexture(); } break; @@ -113,7 +108,6 @@ void FifoRecordAnalyzer::DecodeOpcode(u8 *data) if (!m_DrawingObject) { m_DrawingObject = true; - ProcessTexMaps(); } ProcessVertexArrays(data, cmd & GX_VAT_MASK); @@ -125,26 +119,6 @@ void FifoRecordAnalyzer::DecodeOpcode(u8 *data) } } -void FifoRecordAnalyzer::ProcessLoadTlut1() -{ - u32 tlutXferCount; - u32 tlutMemAddr; - u32 memAddr; - - GetTlutLoadData(tlutMemAddr, memAddr, tlutXferCount, *m_BpMem); - - FifoRecorder::GetInstance().WriteMemory(memAddr, tlutXferCount, MemoryUpdate::TMEM); -} - -void FifoRecordAnalyzer::ProcessPreloadTexture() -{ - BPS_TmemConfig& tmem_cfg = m_BpMem->tmem_config; - //u32 tmem_addr = tmem_cfg.preload_tmem_even * TMEM_LINE_SIZE; - u32 size = tmem_cfg.preload_tile_info.count * TMEM_LINE_SIZE; // TODO: Should this be half size for RGBA8 preloads? - - FifoRecorder::GetInstance().WriteMemory(tmem_cfg.preload_addr << 5, size, MemoryUpdate::TMEM); -} - void FifoRecordAnalyzer::ProcessLoadIndexedXf(u32 val, int array) { int index = val >> 16; @@ -152,7 +126,7 @@ void FifoRecordAnalyzer::ProcessLoadIndexedXf(u32 val, int array) u32 address = m_CpMem.arrayBases[array] + m_CpMem.arrayStrides[array] * index; - FifoRecorder::GetInstance().WriteMemory(address, size * 4, MemoryUpdate::XF_DATA); + FifoRecorder::GetInstance().UseMemory(address, size * 4, MemoryUpdate::XF_DATA); } void FifoRecordAnalyzer::ProcessVertexArrays(u8 *data, u8 vtxAttrGroup) @@ -225,80 +199,5 @@ void FifoRecordAnalyzer::WriteVertexArray(int arrayIndex, u8 *vertexData, int ve u32 arrayStart = m_CpMem.arrayBases[arrayIndex]; u32 arraySize = m_CpMem.arrayStrides[arrayIndex] * (maxIndex + 1); - FifoRecorder::GetInstance().WriteMemory(arrayStart, arraySize, MemoryUpdate::VERTEX_STREAM); -} - -void FifoRecordAnalyzer::ProcessTexMaps() -{ - u32 writtenTexMaps = 0; - - // Texture maps used in TEV indirect stages - for (u32 i = 0; i < m_BpMem->genMode.numindstages; ++i) - { - u32 texMap = m_BpMem->tevindref.getTexMap(i); - - WriteTexMapMemory(texMap, writtenTexMaps); - } - - // Texture maps used in TEV direct stages - for (u32 i = 0; i <= m_BpMem->genMode.numtevstages; ++i) - { - int stageNum2 = i >> 1; - int stageOdd = i & 1; - TwoTevStageOrders &order = m_BpMem->tevorders[stageNum2]; - int texMap = order.getTexMap(stageOdd); - - if (order.getEnable(stageOdd)) - WriteTexMapMemory(texMap, writtenTexMaps); - } -} - -void FifoRecordAnalyzer::WriteTexMapMemory(int texMap, u32 &writtenTexMaps) -{ - // Avoid rechecking the same texture map - u32 texMapMask = 1 << texMap; - if (writtenTexMaps & texMapMask) - return; - - writtenTexMaps |= texMapMask; - - FourTexUnits& texUnit = m_BpMem->tex[(texMap >> 2) & 1]; - u8 subTexmap = texMap & 3; - - TexImage0& ti0 = texUnit.texImage0[subTexmap]; - - u32 width = ti0.width + 1; - u32 height = ti0.height + 1; - u32 imageBase = texUnit.texImage3[subTexmap].image_base << 5; - - u32 fmtWidth = TexDecoder_GetBlockWidthInTexels(ti0.format) - 1; - u32 fmtHeight = TexDecoder_GetBlockHeightInTexels(ti0.format) - 1; - int fmtDepth = TexDecoder_GetTexelSizeInNibbles(ti0.format); - - // Round width and height up to the next block - width = (width + fmtWidth) & (~fmtWidth); - height = (height + fmtHeight) & (~fmtHeight); - - u32 textureSize = (width * height * fmtDepth) / 2; - - // TODO: mip maps - int mip = texUnit.texMode1[subTexmap].max_lod; - if ((texUnit.texMode0[subTexmap].min_filter & 3) == 0) - mip = 0; - - while (mip) - { - width >>= 1; - height >>= 1; - - width = std::max(width, fmtWidth); - height = std::max(height, fmtHeight); - u32 size = (width * height * fmtDepth) >> 1; - - textureSize += size; - - mip--; - } - - FifoRecorder::GetInstance().WriteMemory(imageBase, textureSize, MemoryUpdate::TEXTURE_MAP); + FifoRecorder::GetInstance().UseMemory(arrayStart, arraySize, MemoryUpdate::VERTEX_STREAM); } diff --git a/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.h b/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.h index cc0268acdc..5ff8c8988e 100644 --- a/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.h +++ b/Source/Core/Core/FifoPlayer/FifoRecordAnalyzer.h @@ -19,20 +19,16 @@ public: void Initialize(u32 *bpMem, u32 *cpMem); // Assumes data contains all information for the command - // Calls FifoRecorder::WriteMemory + // Calls FifoRecorder::UseMemory void AnalyzeGPCommand(u8 *data); private: void DecodeOpcode(u8 *data); - void ProcessLoadTlut1(); - void ProcessPreloadTexture(); void ProcessLoadIndexedXf(u32 val, int array); void ProcessVertexArrays(u8 *data, u8 vtxAttrGroup); - void ProcessTexMaps(); void WriteVertexArray(int arrayIndex, u8 *vertexData, int vertexSize, int numVertices); - void WriteTexMapMemory(int texMap, u32 &writtenTexMaps); bool m_DrawingObject; diff --git a/Source/Core/Core/FifoPlayer/FifoRecorder.cpp b/Source/Core/Core/FifoPlayer/FifoRecorder.cpp index eaf118012c..7c2db3c10b 100644 --- a/Source/Core/Core/FifoPlayer/FifoRecorder.cpp +++ b/Source/Core/Core/FifoPlayer/FifoRecorder.cpp @@ -102,7 +102,7 @@ void FifoRecorder::WriteGPCommand(u8 *data, u32 size) m_SkipNextData = m_SkipFutureData; } -void FifoRecorder::WriteMemory(u32 address, u32 size, MemoryUpdate::Type type) +void FifoRecorder::UseMemory(u32 address, u32 size, MemoryUpdate::Type type, bool dynamicUpdate) { u8 *curData; u8 *newData; @@ -117,7 +117,7 @@ void FifoRecorder::WriteMemory(u32 address, u32 size, MemoryUpdate::Type type) newData = &Memory::m_pRAM[address & Memory::RAM_MASK]; } - if (memcmp(curData, newData, size) != 0) + if (!dynamicUpdate && memcmp(curData, newData, size) != 0) { // Update current memory memcpy(curData, newData, size); @@ -133,6 +133,11 @@ void FifoRecorder::WriteMemory(u32 address, u32 size, MemoryUpdate::Type type) m_CurrentFrame.memoryUpdates.push_back(memUpdate); } + else if (dynamicUpdate) + { + // Shadow the data so it won't be recorded as changed by a future UseMemory + memcpy(curData, newData, size); + } } void FifoRecorder::EndFrame(u32 fifoStart, u32 fifoEnd) diff --git a/Source/Core/Core/FifoPlayer/FifoRecorder.h b/Source/Core/Core/FifoPlayer/FifoRecorder.h index f39c92bf03..0e2d130ba7 100644 --- a/Source/Core/Core/FifoPlayer/FifoRecorder.h +++ b/Source/Core/Core/FifoPlayer/FifoRecorder.h @@ -27,7 +27,10 @@ public: // Must write one full GP command at a time void WriteGPCommand(u8 *data, u32 size); - void WriteMemory(u32 address, u32 size, MemoryUpdate::Type type); + // Track memory that has been used and write it to the fifolog if it has changed. + // If memory is updated by the video backend (dynamicUpdate == true) take special care to make sure the data + // isn't baked into the fifolog. + void UseMemory(u32 address, u32 size, MemoryUpdate::Type type, bool dynamicUpdate = false); void EndFrame(u32 fifoStart, u32 fifoEnd); diff --git a/Source/Core/VideoCommon/BPStructs.cpp b/Source/Core/VideoCommon/BPStructs.cpp index e30aa2fcaa..4dc4643afb 100644 --- a/Source/Core/VideoCommon/BPStructs.cpp +++ b/Source/Core/VideoCommon/BPStructs.cpp @@ -8,6 +8,7 @@ #include "Common/Thread.h" #include "Core/ConfigManager.h" #include "Core/Core.h" +#include "Core/FifoPlayer/FifoRecorder.h" #include "Core/HW/Memmap.h" #include "VideoCommon/BoundingBox.h" @@ -281,6 +282,9 @@ static void BPWritten(const BPCmd& bp) Memory::CopyFromEmu(texMem + tlutTMemAddr, addr, tlutXferCount); + if (g_bRecordFifoData) + FifoRecorder::GetInstance().UseMemory(addr, tlutXferCount, MemoryUpdate::TMEM); + return; } case BPMEM_FOGRANGE: // Fog Settings Control @@ -455,15 +459,16 @@ static void BPWritten(const BPCmd& bp) BPS_TmemConfig& tmem_cfg = bpmem.tmem_config; u32 src_addr = tmem_cfg.preload_addr << 5; // TODO: Should we add mask here on GC? - u32 size = tmem_cfg.preload_tile_info.count * TMEM_LINE_SIZE; + u32 bytes_read = 0; u32 tmem_addr_even = tmem_cfg.preload_tmem_even * TMEM_LINE_SIZE; if (tmem_cfg.preload_tile_info.type != 3) { - if (tmem_addr_even + size > TMEM_SIZE) - size = TMEM_SIZE - tmem_addr_even; + bytes_read = tmem_cfg.preload_tile_info.count * TMEM_LINE_SIZE; + if (tmem_addr_even + bytes_read > TMEM_SIZE) + bytes_read = TMEM_SIZE - tmem_addr_even; - Memory::CopyFromEmu(texMem + tmem_addr_even, src_addr, size); + Memory::CopyFromEmu(texMem + tmem_addr_even, src_addr, bytes_read); } else // RGBA8 tiles (and CI14, but that might just be stupid libogc!) { @@ -471,21 +476,23 @@ static void BPWritten(const BPCmd& bp) // AR and GB tiles are stored in separate TMEM banks => can't use a single memcpy for everything u32 tmem_addr_odd = tmem_cfg.preload_tmem_odd * TMEM_LINE_SIZE; + u32 bytes_read = 0; for (u32 i = 0; i < tmem_cfg.preload_tile_info.count; ++i) { - if (tmem_addr_even + TMEM_LINE_SIZE > TMEM_SIZE || - tmem_addr_odd + TMEM_LINE_SIZE > TMEM_SIZE) - return; + if (tmem_addr_even + TMEM_LINE_SIZE > TMEM_SIZE || tmem_addr_odd + TMEM_LINE_SIZE > TMEM_SIZE) + break; - // TODO: This isn't very optimised, does a whole lot of small memcpys - memcpy(texMem + tmem_addr_even, src_ptr, TMEM_LINE_SIZE); - memcpy(texMem + tmem_addr_odd, src_ptr + TMEM_LINE_SIZE, TMEM_LINE_SIZE); + memcpy(texMem + tmem_addr_even, src_ptr + bytes_read, TMEM_LINE_SIZE); + memcpy(texMem + tmem_addr_odd, src_ptr + bytes_read + TMEM_LINE_SIZE, TMEM_LINE_SIZE); tmem_addr_even += TMEM_LINE_SIZE; tmem_addr_odd += TMEM_LINE_SIZE; - src_ptr += TMEM_LINE_SIZE * 2; + bytes_read += TMEM_LINE_SIZE * 2; } } + + if (g_bRecordFifoData) + FifoRecorder::GetInstance().UseMemory(src_addr, bytes_read, MemoryUpdate::TMEM); } return; diff --git a/Source/Core/VideoCommon/OpcodeDecoding.h b/Source/Core/VideoCommon/OpcodeDecoding.h index ec1b2652e4..a79bf54be2 100644 --- a/Source/Core/VideoCommon/OpcodeDecoding.h +++ b/Source/Core/VideoCommon/OpcodeDecoding.h @@ -38,8 +38,6 @@ #define GX_DRAW_LINE_STRIP 0x6 // 0xB0 #define GX_DRAW_POINTS 0x7 // 0xB8 -extern bool g_bRecordFifoData; - void OpcodeDecoder_Init(); void OpcodeDecoder_Shutdown(); diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index 694aa80c0a..e93e6955f4 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -11,6 +11,7 @@ #include "Core/ConfigManager.h" #include "Core/FifoPlayer/FifoPlayer.h" +#include "Core/FifoPlayer/FifoRecorder.h" #include "Core/HW/Memmap.h" #include "VideoCommon/Debugger.h" @@ -19,6 +20,7 @@ #include "VideoCommon/RenderBase.h" #include "VideoCommon/Statistics.h" #include "VideoCommon/TextureCacheBase.h" +#include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoConfig.h" static const u64 TEXHASH_INVALID = 0; @@ -395,6 +397,25 @@ TextureCache::TCacheEntryBase* TextureCache::Load(const u32 stage) full_format = texformat | (tlutfmt << 16); const u32 texture_size = TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, texformat); + u32 additional_mips_size = 0; // not including level 0, which is texture_size + + // GPUs don't like when the specified mipmap count would require more than one 1x1-sized LOD in the mipmap chain + // e.g. 64x64 with 7 LODs would have the mipmap chain 64x64,32x32,16x16,8x8,4x4,2x2,1x1,0x0, so we limit the mipmap count to 6 there + tex_levels = std::min(IntLog2(std::max(width, height)) + 1, tex_levels); + + for (u32 level = 1; level != tex_levels; ++level) + { + // We still need to calculate the original size of the mips + const u32 expanded_mip_width = ROUND_UP(CalculateLevelSize(width, level), bsw); + const u32 expanded_mip_height = ROUND_UP(CalculateLevelSize(height, level), bsh); + + additional_mips_size += TexDecoder_GetTextureSizeInBytes(expanded_mip_width, expanded_mip_height, texformat); + } + + // If we are recording a FifoLog, keep track of what memory we read. + // FifiRecorder does it's own memory modification tracking independant of the texture hashing below. + if (g_bRecordFifoData && !from_tmem) + FifoRecorder::GetInstance().UseMemory(address, texture_size + additional_mips_size, MemoryUpdate::TEXTURE_MAP); const u8* src_data; if (from_tmem) @@ -415,10 +436,6 @@ TextureCache::TCacheEntryBase* TextureCache::Load(const u32 stage) full_hash = base_hash; } - // GPUs don't like when the specified mipmap count would require more than one 1x1-sized LOD in the mipmap chain - // e.g. 64x64 with 7 LODs would have the mipmap chain 64x64,32x32,16x16,8x8,4x4,2x2,1x1,0x0, so we limit the mipmap count to 6 there - tex_levels = std::min(IntLog2(std::max(width, height)) + 1, tex_levels); - // Search the texture cache for textures by address // // Find all texture cache entries for the current texture address, and decide whether to use one of @@ -740,7 +757,7 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat // // For historical reasons, Dolphin doesn't actually implement "pure" EFB to RAM emulation, but only EFB to texture and hybrid EFB copies. - float colmat[28] = {0}; + float colmat[28] = { 0 }; float *const fConstAdd = colmat + 16; float *const ColorMask = colmat + 20; ColorMask[0] = ColorMask[1] = ColorMask[2] = ColorMask[3] = 255.0f; @@ -1058,6 +1075,17 @@ void TextureCache::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat count++), 0); } + if (g_bRecordFifoData) + { + // Mark the memory behind this efb copy as dynamicly generated for the Fifo log + u32 address = dstAddr; + for (u32 i = 0; i < entry->NumBlocksY(); i++) + { + FifoRecorder::GetInstance().UseMemory(address, entry->CacheLinesPerRow() * 32, MemoryUpdate::TEXTURE_MAP, true); + address += entry->memory_stride; + } + } + textures_by_address.emplace((u64)dstAddr, entry); } diff --git a/Source/Core/VideoCommon/VideoCommon.h b/Source/Core/VideoCommon/VideoCommon.h index 4234bbb8be..ba339e5c4f 100644 --- a/Source/Core/VideoCommon/VideoCommon.h +++ b/Source/Core/VideoCommon/VideoCommon.h @@ -12,6 +12,9 @@ #include "Common/MathUtil.h" #include "VideoCommon/VideoBackendBase.h" +// Global flag to signal if FifoRecorder is active. +extern bool g_bRecordFifoData; + // These are accurate (disregarding AA modes). enum {