BPMemory: Refactor/consolidate TexUnit Addressing

Currently the logic for addressing the individual TexUnits is splattered all
across dolphin's codebase, this commit attempts to consolidate it all into a
single place and formalise it using our new TexUnitAddress struct.
This commit is contained in:
Scott Mansell 2021-10-10 08:16:15 +13:00
parent ef0e401708
commit 9fa26624b0
7 changed files with 146 additions and 59 deletions

View File

@ -52,13 +52,8 @@ void Shutdown()
static void SaveTexture(const std::string& filename, u32 texmap, s32 mip)
{
FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
u8 subTexmap = texmap & 3;
TexImage0& ti0 = texUnit.texImage0[subTexmap];
u32 width = ti0.width + 1;
u32 height = ti0.height + 1;
u32 width = bpmem.tex.GetUnit(texmap).texImage0.width + 1;
u32 height = bpmem.tex.GetUnit(texmap).texImage0.height + 1;
auto data = std::make_unique<u8[]>(width * height * 4);
@ -80,10 +75,7 @@ void GetTextureRGBA(u8* dst, u32 texmap, s32 mip, u32 width, u32 height)
static s32 GetMaxTextureLod(u32 texmap)
{
FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
u8 subTexmap = texmap & 3;
u8 maxLod = texUnit.texMode1[subTexmap].max_lod;
u8 maxLod = bpmem.tex.GetUnit(texmap).texMode1.max_lod;
u8 mip = maxLod >> 4;
u8 fract = maxLod & 0xf;

View File

@ -163,13 +163,12 @@ static void InitSlope(Slope* slope, float f1, float f2, float f3, float DX31, fl
static inline void CalculateLOD(s32* lodp, bool* linear, u32 texmap, u32 texcoord)
{
const FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
const u8 subTexmap = texmap & 3;
auto texUnit = bpmem.tex.GetUnit(texmap);
// LOD calculation requires data from the texture mode for bias, etc.
// it does not seem to use the actual texture size
const TexMode0& tm0 = texUnit.texMode0[subTexmap];
const TexMode1& tm1 = texUnit.texMode1[subTexmap];
const TexMode0& tm0 = texUnit.texMode0;
const TexMode1& tm1 = texUnit.texMode1;
float sDelta, tDelta;
if (tm0.diag_lod == LODType::Diagonal)

View File

@ -74,8 +74,8 @@ void Sample(s32 s, s32 t, s32 lod, bool linear, u8 texmap, u8* sample)
bool mipLinear = false;
#if (ALLOW_MIPMAP)
const FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
const TexMode0& tm0 = texUnit.texMode0[texmap & 3];
auto texUnit = bpmem.tex.GetUnit(texmap);
const TexMode0& tm0 = texUnit.texMode0;
const s32 lodFract = lod & 0xf;
@ -115,26 +115,25 @@ void Sample(s32 s, s32 t, s32 lod, bool linear, u8 texmap, u8* sample)
void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8* sample)
{
const FourTexUnits& texUnit = bpmem.tex[(texmap >> 2) & 1];
const u8 subTexmap = texmap & 3;
auto texUnit = bpmem.tex.GetUnit(texmap);
const TexMode0& tm0 = texUnit.texMode0[subTexmap];
const TexImage0& ti0 = texUnit.texImage0[subTexmap];
const TexTLUT& texTlut = texUnit.texTlut[subTexmap];
const TexMode0& tm0 = texUnit.texMode0;
const TexImage0& ti0 = texUnit.texImage0;
const TexTLUT& texTlut = texUnit.texTlut;
const TextureFormat texfmt = ti0.format;
const TLUTFormat tlutfmt = texTlut.tlut_format;
const u8* imageSrc;
const u8* imageSrcOdd = nullptr;
if (texUnit.texImage1[subTexmap].cache_manually_managed)
if (texUnit.texImage1.cache_manually_managed)
{
imageSrc = &texMem[texUnit.texImage1[subTexmap].tmem_even * TMEM_LINE_SIZE];
imageSrc = &texMem[texUnit.texImage1.tmem_even * TMEM_LINE_SIZE];
if (texfmt == TextureFormat::RGBA8)
imageSrcOdd = &texMem[texUnit.texImage2[subTexmap].tmem_odd * TMEM_LINE_SIZE];
imageSrcOdd = &texMem[texUnit.texImage2.tmem_odd * TMEM_LINE_SIZE];
}
else
{
const u32 imageBase = texUnit.texImage3[subTexmap].image_base << 5;
const u32 imageBase = texUnit.texImage3.image_base << 5;
imageSrc = Memory::GetPointer(imageBase);
}
@ -198,7 +197,7 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8* sample)
WrapCoord(&imageSPlus1, tm0.wrap_s, image_width_minus_1 + 1);
WrapCoord(&imageTPlus1, tm0.wrap_t, image_height_minus_1 + 1);
if (!(texfmt == TextureFormat::RGBA8 && texUnit.texImage1[subTexmap].cache_manually_managed))
if (!(texfmt == TextureFormat::RGBA8 && texUnit.texImage1.cache_manually_managed))
{
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, image_width_minus_1, texfmt,
tlut, tlutfmt);
@ -250,7 +249,7 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8* sample)
WrapCoord(&imageS, tm0.wrap_s, image_width_minus_1 + 1);
WrapCoord(&imageT, tm0.wrap_t, image_height_minus_1 + 1);
if (!(texfmt == TextureFormat::RGBA8 && texUnit.texImage1[subTexmap].cache_manually_managed))
if (!(texfmt == TextureFormat::RGBA8 && texUnit.texImage1.cache_manually_managed))
TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, image_width_minus_1, texfmt, tlut,
tlutfmt);
else

View File

@ -951,18 +951,6 @@ struct fmt::formatter<ZTex2>
}
};
struct FourTexUnits
{
TexMode0 texMode0[4];
TexMode1 texMode1[4];
TexImage0 texImage0[4];
TexImage1 texImage1[4];
TexImage2 texImage2[4];
TexImage3 texImage3[4];
TexTLUT texTlut[4];
u32 unknown[4];
};
// Geometry/other structs
enum class CullMode : u32
{
@ -1979,6 +1967,8 @@ struct BPS_TmemConfig
u32 texinvalidate;
};
union AllTexUnits;
// The addressing of the texture units is a bit non-obvious.
// This struct abstracts the complexity away.
union TexUnitAddress
@ -2019,9 +2009,118 @@ union TexUnitAddress
}
u32 GetUnitID() const { return UnitIdLow | (UnitIdHigh << 2); }
private:
friend AllTexUnits;
size_t GetOffset() const { return FullAddress; }
size_t GetBPAddress() const { return FullAddress | 0x80; }
static constexpr size_t ComputeOffset(u32 unit_id)
{
// FIXME: Would be nice to construct a TexUnitAddress and get its offset,
// but that doesn't seem to be possible in c++17
// So we manually re-implement the calculation
return (unit_id & 3) | ((unit_id & 4) << 3);
}
};
static_assert(sizeof(TexUnitAddress) == sizeof(u32));
// A view of the registers of a single TexUnit
struct TexUnit
{
TexMode0 texMode0;
u32 : 32; // doing u32 : 96 is legal according to the standard, but msvc
u32 : 32; // doesn't like it. So we stack multiple lines of u32 : 32;
u32 : 32;
TexMode1 texMode1;
u32 : 32;
u32 : 32;
u32 : 32;
TexImage0 texImage0;
u32 : 32;
u32 : 32;
u32 : 32;
TexImage1 texImage1;
u32 : 32;
u32 : 32;
u32 : 32;
TexImage2 texImage2;
u32 : 32;
u32 : 32;
u32 : 32;
TexImage3 texImage3;
u32 : 32;
u32 : 32;
u32 : 32;
TexTLUT texTlut;
u32 : 32;
u32 : 32;
u32 : 32;
u32 unknown;
};
static_assert(sizeof(TexUnit) == sizeof(u32) * 4 * 7 + sizeof(u32));
union AllTexUnits
{
std::array<u32, 8 * 8> AllRegisters;
const TexUnit& GetUnit(u32 UnitId) const
{
auto address = TexUnitAddress(UnitId);
const u32* ptr = &AllRegisters[address.GetOffset()];
return *reinterpret_cast<const TexUnit*>(ptr);
}
private:
// For debuggers since GetUnit can be optimised out in release builds
template <u32 UnitId>
struct TexUnitPadding
{
static_assert(UnitId != 0, "Can't use 0 as sizeof(std::array<u32, 0>) != 0");
std::array<u32, TexUnitAddress::ComputeOffset(UnitId)> pad;
};
TexUnit tex0;
struct
{
TexUnitPadding<1> pad1;
TexUnit tex1;
};
struct
{
TexUnitPadding<2> pad2;
TexUnit tex2;
};
struct
{
TexUnitPadding<3> pad3;
TexUnit tex3;
};
struct
{
TexUnitPadding<4> pad4;
TexUnit tex4;
};
struct
{
TexUnitPadding<5> pad5;
TexUnit tex5;
};
struct
{
TexUnitPadding<6> pad6;
TexUnit tex6;
};
struct
{
TexUnitPadding<7> pad7;
TexUnit tex7;
};
};
static_assert(sizeof(AllTexUnits) == 8 * 8 * sizeof(u32));
// All of BP memory
struct BPCmd
@ -2086,7 +2185,7 @@ struct BPMemory
FieldMode fieldmode; // 68
u32 unknown10[7]; // 69-6F
u32 unknown11[16]; // 70-7F
FourTexUnits tex[2]; // 80-bf
AllTexUnits tex; // 80-bf
TevStageCombiner combiners[16]; // 0xC0-0xDF
TevReg tevregs[4]; // 0xE0
FogRangeParams fogRange; // 0xE8

View File

@ -224,9 +224,9 @@ BlendingState& BlendingState::operator=(const BlendingState& rhs)
void SamplerState::Generate(const BPMemory& bp, u32 index)
{
const FourTexUnits& tex = bpmem.tex[index / 4];
const TexMode0& tm0 = tex.texMode0[index % 4];
const TexMode1& tm1 = tex.texMode1[index % 4];
auto tex = bp.tex.GetUnit(index);
const TexMode0& tm0 = tex.texMode0;
const TexMode1& tm1 = tex.texMode1;
// GX can configure the mip filter to none. However, D3D and Vulkan can't express this in their
// sampler states. Therefore, we set the min/max LOD to zero if this option is used.

View File

@ -970,8 +970,7 @@ void TextureCacheBase::DumpTexture(TCacheEntry* entry, std::string basename, uns
static void SetSamplerState(u32 index, float custom_tex_scale, bool custom_tex,
bool has_arbitrary_mips)
{
const FourTexUnits& tex = bpmem.tex[index / 4];
const TexMode0& tm0 = tex.texMode0[index % 4];
const TexMode0& tm0 = bpmem.tex.GetUnit(index).texMode0;
SamplerState state = {};
state.Generate(bpmem, index);

View File

@ -14,30 +14,29 @@
TextureInfo TextureInfo::FromStage(u32 stage)
{
const FourTexUnits& tex = bpmem.tex[stage >> 2];
const u32 id = stage & 3;
const auto tex = bpmem.tex.GetUnit(stage);
const auto texture_format = tex.texImage0[id].format;
const auto tlut_format = tex.texTlut[id].tlut_format;
const auto texture_format = tex.texImage0.format;
const auto tlut_format = tex.texTlut.tlut_format;
const auto width = tex.texImage0[id].width + 1;
const auto height = tex.texImage0[id].height + 1;
const auto width = tex.texImage0.width + 1;
const auto height = tex.texImage0.height + 1;
const u32 address = (tex.texImage3[id].image_base /* & 0x1FFFFF*/) << 5;
const u32 address = (tex.texImage3.image_base /* & 0x1FFFFF*/) << 5;
const u32 tlutaddr = tex.texTlut[id].tmem_offset << 9;
const u32 tlutaddr = tex.texTlut.tmem_offset << 9;
const u8* tlut_ptr = &texMem[tlutaddr];
std::optional<u32> mip_count;
const bool has_mipmaps = SamplerCommon::AreBpTexMode0MipmapsEnabled(tex.texMode0[id]);
const bool has_mipmaps = SamplerCommon::AreBpTexMode0MipmapsEnabled(tex.texMode0);
if (has_mipmaps)
{
mip_count = (tex.texMode1[id].max_lod + 0xf) / 0x10;
mip_count = (tex.texMode1.max_lod + 0xf) / 0x10;
}
const bool from_tmem = tex.texImage1[id].cache_manually_managed != 0;
const u32 tmem_address_even = from_tmem ? tex.texImage1[id].tmem_even * TMEM_LINE_SIZE : 0;
const u32 tmem_address_odd = from_tmem ? tex.texImage2[id].tmem_odd * TMEM_LINE_SIZE : 0;
const bool from_tmem = tex.texImage1.cache_manually_managed != 0;
const u32 tmem_address_even = from_tmem ? tex.texImage1.tmem_even * TMEM_LINE_SIZE : 0;
const u32 tmem_address_odd = from_tmem ? tex.texImage2.tmem_odd * TMEM_LINE_SIZE : 0;
if (from_tmem)
{