From ef0e401708dc534e8bfac52e5e43086b4255c22d Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Sun, 12 Sep 2021 18:12:51 +1200 Subject: [PATCH 1/2] BPMem: Abstract TexUnit Addressing into struct The addressing of the texture units is a bit non-obvious. This struct abstracts the complexity away. --- Source/Core/VideoCommon/BPMemory.h | 43 ++++++++++++++ Source/Core/VideoCommon/BPStructs.cpp | 80 +++++++++++++-------------- 2 files changed, 83 insertions(+), 40 deletions(-) diff --git a/Source/Core/VideoCommon/BPMemory.h b/Source/Core/VideoCommon/BPMemory.h index a3cb28fc39..6c5b77220b 100644 --- a/Source/Core/VideoCommon/BPMemory.h +++ b/Source/Core/VideoCommon/BPMemory.h @@ -1979,6 +1979,49 @@ struct BPS_TmemConfig u32 texinvalidate; }; +// The addressing of the texture units is a bit non-obvious. +// This struct abstracts the complexity away. +union TexUnitAddress +{ + enum class Register : u32 + { + SETMODE0 = 0, + SETMODE1 = 1, + SETIMAGE0 = 2, + SETIMAGE1 = 3, + SETIMAGE2 = 4, + SETIMAGE3 = 5, + SETTLUT = 6, + UNKNOWN = 7, + }; + + BitField<0, 2, u32> UnitIdLow; + BitField<2, 3, Register> Reg; + BitField<5, 1, u32> UnitIdHigh; + + BitField<0, 6, u32> FullAddress; + u32 hex; + + TexUnitAddress() : hex(0) {} + TexUnitAddress(u32 unit_id, Register reg = Register::SETMODE0) : hex(0) + { + UnitIdLow = unit_id & 3; + UnitIdHigh = unit_id >> 2; + Reg = reg; + } + + static TexUnitAddress FromBPAddress(u32 Address) + { + TexUnitAddress Val; + // Clear upper two bits (which should always be 0x80) + Val.FullAddress = Address & 0x3f; + return Val; + } + + u32 GetUnitID() const { return UnitIdLow | (UnitIdHigh << 2); } +}; +static_assert(sizeof(TexUnitAddress) == sizeof(u32)); + // All of BP memory struct BPCmd diff --git a/Source/Core/VideoCommon/BPStructs.cpp b/Source/Core/VideoCommon/BPStructs.cpp index fb6372f09c..110ae5e803 100644 --- a/Source/Core/VideoCommon/BPStructs.cpp +++ b/Source/Core/VideoCommon/BPStructs.cpp @@ -646,48 +646,48 @@ static void BPWritten(const BPCmd& bp) GeometryShaderManager::SetTexCoordChanged((bp.address - BPMEM_SU_SSIZE) >> 1); } return; - // ------------------------ - // BPMEM_TX_SETMODE0 - (Texture lookup and filtering mode) LOD/BIAS Clamp, MaxAnsio, LODBIAS, - // DiagLoad, Min Filter, Mag Filter, Wrap T, S - // BPMEM_TX_SETMODE1 - (LOD Stuff) - Max LOD, Min LOD - // ------------------------ - case BPMEM_TX_SETMODE0: // (0x90 for linear) - case BPMEM_TX_SETMODE0_4: - TextureCacheBase::InvalidateAllBindPoints(); - return; + } - case BPMEM_TX_SETMODE1: - case BPMEM_TX_SETMODE1_4: - TextureCacheBase::InvalidateAllBindPoints(); - return; - // -------------------------------------------- - // BPMEM_TX_SETIMAGE0 - Texture width, height, format - // BPMEM_TX_SETIMAGE1 - even LOD address in TMEM - Image Type, Cache Height, Cache Width, TMEM - // Offset - // BPMEM_TX_SETIMAGE2 - odd LOD address in TMEM - Cache Height, Cache Width, TMEM Offset - // BPMEM_TX_SETIMAGE3 - Address of Texture in main memory - // -------------------------------------------- - case BPMEM_TX_SETIMAGE0: - case BPMEM_TX_SETIMAGE0_4: - case BPMEM_TX_SETIMAGE1: - case BPMEM_TX_SETIMAGE1_4: - case BPMEM_TX_SETIMAGE2: - case BPMEM_TX_SETIMAGE2_4: - case BPMEM_TX_SETIMAGE3: - case BPMEM_TX_SETIMAGE3_4: - TextureCacheBase::InvalidateAllBindPoints(); - return; - // ------------------------------- - // Set a TLUT - // BPMEM_TX_SETTLUT - Format, TMEM Offset (offset of TLUT from start of TMEM high bank > > 5) - // ------------------------------- - case BPMEM_TX_SETTLUT: - case BPMEM_TX_SETTLUT_4: - TextureCacheBase::InvalidateAllBindPoints(); - return; + if ((bp.address & 0xc0) == 0x80) + { + auto tex_address = TexUnitAddress::FromBPAddress(bp.address); - default: - break; + switch (tex_address.Reg) + { + // ------------------------ + // BPMEM_TX_SETMODE0 - (Texture lookup and filtering mode) LOD/BIAS Clamp, MaxAnsio, LODBIAS, + // DiagLoad, Min Filter, Mag Filter, Wrap T, S + // BPMEM_TX_SETMODE1 - (LOD Stuff) - Max LOD, Min LOD + // ------------------------ + case TexUnitAddress::Register::SETMODE0: + case TexUnitAddress::Register::SETMODE1: + TextureCacheBase::InvalidateAllBindPoints(); + return; + + // -------------------------------------------- + // BPMEM_TX_SETIMAGE0 - Texture width, height, format + // BPMEM_TX_SETIMAGE1 - even LOD address in TMEM - Image Type, Cache Height, Cache Width, + // TMEM Offset + // BPMEM_TX_SETIMAGE2 - odd LOD address in TMEM - Cache Height, Cache Width, TMEM Offset + // BPMEM_TX_SETIMAGE3 - Address of Texture in main memory + // -------------------------------------------- + case TexUnitAddress::Register::SETIMAGE0: + case TexUnitAddress::Register::SETIMAGE1: + case TexUnitAddress::Register::SETIMAGE2: + case TexUnitAddress::Register::SETIMAGE3: + TextureCacheBase::InvalidateAllBindPoints(); + return; + + // ------------------------------- + // Set a TLUT + // BPMEM_TX_SETTLUT - Format, TMEM Offset (offset of TLUT from start of TMEM high bank > > 5) + // ------------------------------- + case TexUnitAddress::Register::SETTLUT: + TextureCacheBase::InvalidateAllBindPoints(); + return; + case TexUnitAddress::Register::UNKNOWN: + break; // Not handled + } } switch (bp.address & 0xF0) From 9fa26624b02562fb8626c93c45acf2015fc06f40 Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Sun, 10 Oct 2021 08:16:15 +1300 Subject: [PATCH 2/2] BPMemory: Refactor/consolidate TexUnit Addressing Currently the logic for addressing the individual TexUnits is splattered all across dolphin's codebase, this commit attempts to consolidate it all into a single place and formalise it using our new TexUnitAddress struct. --- .../Core/VideoBackends/Software/DebugUtil.cpp | 14 +- .../VideoBackends/Software/Rasterizer.cpp | 7 +- .../VideoBackends/Software/TextureSampler.cpp | 25 ++-- Source/Core/VideoCommon/BPMemory.h | 125 ++++++++++++++++-- Source/Core/VideoCommon/RenderState.cpp | 6 +- Source/Core/VideoCommon/TextureCacheBase.cpp | 3 +- Source/Core/VideoCommon/TextureInfo.cpp | 25 ++-- 7 files changed, 146 insertions(+), 59 deletions(-) diff --git a/Source/Core/VideoBackends/Software/DebugUtil.cpp b/Source/Core/VideoBackends/Software/DebugUtil.cpp index 397442f8e8..c580461172 100644 --- a/Source/Core/VideoBackends/Software/DebugUtil.cpp +++ b/Source/Core/VideoBackends/Software/DebugUtil.cpp @@ -52,13 +52,8 @@ void Shutdown() static void SaveTexture(const std::string& filename, u32 texmap, s32 mip) { - FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1]; - u8 subTexmap = texmap & 3; - - TexImage0& ti0 = texUnit.texImage0[subTexmap]; - - u32 width = ti0.width + 1; - u32 height = ti0.height + 1; + u32 width = bpmem.tex.GetUnit(texmap).texImage0.width + 1; + u32 height = bpmem.tex.GetUnit(texmap).texImage0.height + 1; auto data = std::make_unique(width * height * 4); @@ -80,10 +75,7 @@ void GetTextureRGBA(u8* dst, u32 texmap, s32 mip, u32 width, u32 height) static s32 GetMaxTextureLod(u32 texmap) { - FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1]; - u8 subTexmap = texmap & 3; - - u8 maxLod = texUnit.texMode1[subTexmap].max_lod; + u8 maxLod = bpmem.tex.GetUnit(texmap).texMode1.max_lod; u8 mip = maxLod >> 4; u8 fract = maxLod & 0xf; diff --git a/Source/Core/VideoBackends/Software/Rasterizer.cpp b/Source/Core/VideoBackends/Software/Rasterizer.cpp index de9925d596..df04ab5a31 100644 --- a/Source/Core/VideoBackends/Software/Rasterizer.cpp +++ b/Source/Core/VideoBackends/Software/Rasterizer.cpp @@ -163,13 +163,12 @@ static void InitSlope(Slope* slope, float f1, float f2, float f3, float DX31, fl static inline void CalculateLOD(s32* lodp, bool* linear, u32 texmap, u32 texcoord) { - const FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1]; - const u8 subTexmap = texmap & 3; + auto texUnit = bpmem.tex.GetUnit(texmap); // LOD calculation requires data from the texture mode for bias, etc. // it does not seem to use the actual texture size - const TexMode0& tm0 = texUnit.texMode0[subTexmap]; - const TexMode1& tm1 = texUnit.texMode1[subTexmap]; + const TexMode0& tm0 = texUnit.texMode0; + const TexMode1& tm1 = texUnit.texMode1; float sDelta, tDelta; if (tm0.diag_lod == LODType::Diagonal) diff --git a/Source/Core/VideoBackends/Software/TextureSampler.cpp b/Source/Core/VideoBackends/Software/TextureSampler.cpp index f54002bdb5..064e9df41c 100644 --- a/Source/Core/VideoBackends/Software/TextureSampler.cpp +++ b/Source/Core/VideoBackends/Software/TextureSampler.cpp @@ -74,8 +74,8 @@ void Sample(s32 s, s32 t, s32 lod, bool linear, u8 texmap, u8* sample) bool mipLinear = false; #if (ALLOW_MIPMAP) - const FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1]; - const TexMode0& tm0 = texUnit.texMode0[texmap & 3]; + auto texUnit = bpmem.tex.GetUnit(texmap); + const TexMode0& tm0 = texUnit.texMode0; const s32 lodFract = lod & 0xf; @@ -115,26 +115,25 @@ void Sample(s32 s, s32 t, s32 lod, bool linear, u8 texmap, u8* sample) void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8* sample) { - const FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1]; - const u8 subTexmap = texmap & 3; + auto texUnit = bpmem.tex.GetUnit(texmap); - const TexMode0& tm0 = texUnit.texMode0[subTexmap]; - const TexImage0& ti0 = texUnit.texImage0[subTexmap]; - const TexTLUT& texTlut = texUnit.texTlut[subTexmap]; + const TexMode0& tm0 = texUnit.texMode0; + const TexImage0& ti0 = texUnit.texImage0; + const TexTLUT& texTlut = texUnit.texTlut; const TextureFormat texfmt = ti0.format; const TLUTFormat tlutfmt = texTlut.tlut_format; const u8* imageSrc; const u8* imageSrcOdd = nullptr; - if (texUnit.texImage1[subTexmap].cache_manually_managed) + if (texUnit.texImage1.cache_manually_managed) { - imageSrc = &texMem[texUnit.texImage1[subTexmap].tmem_even * TMEM_LINE_SIZE]; + imageSrc = &texMem[texUnit.texImage1.tmem_even * TMEM_LINE_SIZE]; if (texfmt == TextureFormat::RGBA8) - imageSrcOdd = &texMem[texUnit.texImage2[subTexmap].tmem_odd * TMEM_LINE_SIZE]; + imageSrcOdd = &texMem[texUnit.texImage2.tmem_odd * TMEM_LINE_SIZE]; } else { - const u32 imageBase = texUnit.texImage3[subTexmap].image_base << 5; + const u32 imageBase = texUnit.texImage3.image_base << 5; imageSrc = Memory::GetPointer(imageBase); } @@ -198,7 +197,7 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8* sample) WrapCoord(&imageSPlus1, tm0.wrap_s, image_width_minus_1 + 1); WrapCoord(&imageTPlus1, tm0.wrap_t, image_height_minus_1 + 1); - if (!(texfmt == TextureFormat::RGBA8 && texUnit.texImage1[subTexmap].cache_manually_managed)) + if (!(texfmt == TextureFormat::RGBA8 && texUnit.texImage1.cache_manually_managed)) { TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, image_width_minus_1, texfmt, tlut, tlutfmt); @@ -250,7 +249,7 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8* sample) WrapCoord(&imageS, tm0.wrap_s, image_width_minus_1 + 1); WrapCoord(&imageT, tm0.wrap_t, image_height_minus_1 + 1); - if (!(texfmt == TextureFormat::RGBA8 && texUnit.texImage1[subTexmap].cache_manually_managed)) + if (!(texfmt == TextureFormat::RGBA8 && texUnit.texImage1.cache_manually_managed)) TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, image_width_minus_1, texfmt, tlut, tlutfmt); else diff --git a/Source/Core/VideoCommon/BPMemory.h b/Source/Core/VideoCommon/BPMemory.h index 6c5b77220b..21636224c0 100644 --- a/Source/Core/VideoCommon/BPMemory.h +++ b/Source/Core/VideoCommon/BPMemory.h @@ -951,18 +951,6 @@ struct fmt::formatter } }; -struct FourTexUnits -{ - TexMode0 texMode0[4]; - TexMode1 texMode1[4]; - TexImage0 texImage0[4]; - TexImage1 texImage1[4]; - TexImage2 texImage2[4]; - TexImage3 texImage3[4]; - TexTLUT texTlut[4]; - u32 unknown[4]; -}; - // Geometry/other structs enum class CullMode : u32 { @@ -1979,6 +1967,8 @@ struct BPS_TmemConfig u32 texinvalidate; }; +union AllTexUnits; + // The addressing of the texture units is a bit non-obvious. // This struct abstracts the complexity away. union TexUnitAddress @@ -2019,9 +2009,118 @@ union TexUnitAddress } u32 GetUnitID() const { return UnitIdLow | (UnitIdHigh << 2); } + +private: + friend AllTexUnits; + + size_t GetOffset() const { return FullAddress; } + size_t GetBPAddress() const { return FullAddress | 0x80; } + + static constexpr size_t ComputeOffset(u32 unit_id) + { + // FIXME: Would be nice to construct a TexUnitAddress and get its offset, + // but that doesn't seem to be possible in c++17 + + // So we manually re-implement the calculation + return (unit_id & 3) | ((unit_id & 4) << 3); + } }; static_assert(sizeof(TexUnitAddress) == sizeof(u32)); +// A view of the registers of a single TexUnit +struct TexUnit +{ + TexMode0 texMode0; + u32 : 32; // doing u32 : 96 is legal according to the standard, but msvc + u32 : 32; // doesn't like it. So we stack multiple lines of u32 : 32; + u32 : 32; + TexMode1 texMode1; + u32 : 32; + u32 : 32; + u32 : 32; + TexImage0 texImage0; + u32 : 32; + u32 : 32; + u32 : 32; + TexImage1 texImage1; + u32 : 32; + u32 : 32; + u32 : 32; + TexImage2 texImage2; + u32 : 32; + u32 : 32; + u32 : 32; + TexImage3 texImage3; + u32 : 32; + u32 : 32; + u32 : 32; + TexTLUT texTlut; + u32 : 32; + u32 : 32; + u32 : 32; + u32 unknown; +}; +static_assert(sizeof(TexUnit) == sizeof(u32) * 4 * 7 + sizeof(u32)); + +union AllTexUnits +{ + std::array AllRegisters; + + const TexUnit& GetUnit(u32 UnitId) const + { + auto address = TexUnitAddress(UnitId); + const u32* ptr = &AllRegisters[address.GetOffset()]; + return *reinterpret_cast(ptr); + } + +private: + // For debuggers since GetUnit can be optimised out in release builds + template + struct TexUnitPadding + { + static_assert(UnitId != 0, "Can't use 0 as sizeof(std::array) != 0"); + std::array pad; + }; + + TexUnit tex0; + struct + { + TexUnitPadding<1> pad1; + TexUnit tex1; + }; + struct + { + TexUnitPadding<2> pad2; + TexUnit tex2; + }; + struct + { + TexUnitPadding<3> pad3; + TexUnit tex3; + }; + struct + { + TexUnitPadding<4> pad4; + TexUnit tex4; + }; + struct + { + TexUnitPadding<5> pad5; + TexUnit tex5; + }; + struct + { + TexUnitPadding<6> pad6; + TexUnit tex6; + }; + struct + { + TexUnitPadding<7> pad7; + TexUnit tex7; + }; +}; +static_assert(sizeof(AllTexUnits) == 8 * 8 * sizeof(u32)); + // All of BP memory struct BPCmd @@ -2086,7 +2185,7 @@ struct BPMemory FieldMode fieldmode; // 68 u32 unknown10[7]; // 69-6F u32 unknown11[16]; // 70-7F - FourTexUnits tex[2]; // 80-bf + AllTexUnits tex; // 80-bf TevStageCombiner combiners[16]; // 0xC0-0xDF TevReg tevregs[4]; // 0xE0 FogRangeParams fogRange; // 0xE8 diff --git a/Source/Core/VideoCommon/RenderState.cpp b/Source/Core/VideoCommon/RenderState.cpp index 291aa9ce91..358c99974e 100644 --- a/Source/Core/VideoCommon/RenderState.cpp +++ b/Source/Core/VideoCommon/RenderState.cpp @@ -224,9 +224,9 @@ BlendingState& BlendingState::operator=(const BlendingState& rhs) void SamplerState::Generate(const BPMemory& bp, u32 index) { - const FourTexUnits& tex = bpmem.tex[index / 4]; - const TexMode0& tm0 = tex.texMode0[index % 4]; - const TexMode1& tm1 = tex.texMode1[index % 4]; + auto tex = bp.tex.GetUnit(index); + const TexMode0& tm0 = tex.texMode0; + const TexMode1& tm1 = tex.texMode1; // GX can configure the mip filter to none. However, D3D and Vulkan can't express this in their // sampler states. Therefore, we set the min/max LOD to zero if this option is used. diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index 766c017915..e113f15832 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -970,8 +970,7 @@ void TextureCacheBase::DumpTexture(TCacheEntry* entry, std::string basename, uns static void SetSamplerState(u32 index, float custom_tex_scale, bool custom_tex, bool has_arbitrary_mips) { - const FourTexUnits& tex = bpmem.tex[index / 4]; - const TexMode0& tm0 = tex.texMode0[index % 4]; + const TexMode0& tm0 = bpmem.tex.GetUnit(index).texMode0; SamplerState state = {}; state.Generate(bpmem, index); diff --git a/Source/Core/VideoCommon/TextureInfo.cpp b/Source/Core/VideoCommon/TextureInfo.cpp index 631e91c6ee..6959b95534 100644 --- a/Source/Core/VideoCommon/TextureInfo.cpp +++ b/Source/Core/VideoCommon/TextureInfo.cpp @@ -14,30 +14,29 @@ TextureInfo TextureInfo::FromStage(u32 stage) { - const FourTexUnits& tex = bpmem.tex[stage >> 2]; - const u32 id = stage & 3; + const auto tex = bpmem.tex.GetUnit(stage); - const auto texture_format = tex.texImage0[id].format; - const auto tlut_format = tex.texTlut[id].tlut_format; + const auto texture_format = tex.texImage0.format; + const auto tlut_format = tex.texTlut.tlut_format; - const auto width = tex.texImage0[id].width + 1; - const auto height = tex.texImage0[id].height + 1; + const auto width = tex.texImage0.width + 1; + const auto height = tex.texImage0.height + 1; - const u32 address = (tex.texImage3[id].image_base /* & 0x1FFFFF*/) << 5; + const u32 address = (tex.texImage3.image_base /* & 0x1FFFFF*/) << 5; - const u32 tlutaddr = tex.texTlut[id].tmem_offset << 9; + const u32 tlutaddr = tex.texTlut.tmem_offset << 9; const u8* tlut_ptr = &texMem[tlutaddr]; std::optional mip_count; - const bool has_mipmaps = SamplerCommon::AreBpTexMode0MipmapsEnabled(tex.texMode0[id]); + const bool has_mipmaps = SamplerCommon::AreBpTexMode0MipmapsEnabled(tex.texMode0); if (has_mipmaps) { - mip_count = (tex.texMode1[id].max_lod + 0xf) / 0x10; + mip_count = (tex.texMode1.max_lod + 0xf) / 0x10; } - const bool from_tmem = tex.texImage1[id].cache_manually_managed != 0; - const u32 tmem_address_even = from_tmem ? tex.texImage1[id].tmem_even * TMEM_LINE_SIZE : 0; - const u32 tmem_address_odd = from_tmem ? tex.texImage2[id].tmem_odd * TMEM_LINE_SIZE : 0; + const bool from_tmem = tex.texImage1.cache_manually_managed != 0; + const u32 tmem_address_even = from_tmem ? tex.texImage1.tmem_even * TMEM_LINE_SIZE : 0; + const u32 tmem_address_odd = from_tmem ? tex.texImage2.tmem_odd * TMEM_LINE_SIZE : 0; if (from_tmem) {