handle address wrap around in texture cache
fixes out of bounds access in Mario 64 also slightly optimise paletted texture conversion
This commit is contained in:
parent
b60f42b281
commit
58ab33210a
11
src/GPU.h
11
src/GPU.h
|
@ -499,6 +499,17 @@ public:
|
||||||
OAMDirty |= 1 << (addr / 1024);
|
OAMDirty |= 1 << (addr / 1024);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline T ReadVRAMFlat_Texture(u32 addr) const
|
||||||
|
{
|
||||||
|
return *(T*)&VRAMFlat_Texture[addr & 0x7FFFF];
|
||||||
|
}
|
||||||
|
template <typename T>
|
||||||
|
inline T ReadVRAMFlat_TexPal(u32 addr) const
|
||||||
|
{
|
||||||
|
return *(T*)&VRAMFlat_TexPal[addr & 0x1FFFF];
|
||||||
|
}
|
||||||
|
|
||||||
void SetPowerCnt(u32 val) noexcept;
|
void SetPowerCnt(u32 val) noexcept;
|
||||||
|
|
||||||
void StartFrame() noexcept;
|
void StartFrame() noexcept;
|
||||||
|
|
|
@ -193,10 +193,10 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s
|
||||||
case 1: // A3I5
|
case 1: // A3I5
|
||||||
{
|
{
|
||||||
vramaddr += ((t * width) + s);
|
vramaddr += ((t * width) + s);
|
||||||
u8 pixel = ReadVRAM_Texture<u8>(vramaddr, gpu);
|
u8 pixel = gpu.ReadVRAMFlat_Texture<u8>(vramaddr);
|
||||||
|
|
||||||
texpal <<= 4;
|
texpal <<= 4;
|
||||||
*color = ReadVRAM_TexPal<u16>(texpal + ((pixel&0x1F)<<1), gpu);
|
*color = gpu.ReadVRAMFlat_TexPal<u16>(texpal + ((pixel&0x1F)<<1));
|
||||||
*alpha = ((pixel >> 3) & 0x1C) + (pixel >> 6);
|
*alpha = ((pixel >> 3) & 0x1C) + (pixel >> 6);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -204,12 +204,12 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s
|
||||||
case 2: // 4-color
|
case 2: // 4-color
|
||||||
{
|
{
|
||||||
vramaddr += (((t * width) + s) >> 2);
|
vramaddr += (((t * width) + s) >> 2);
|
||||||
u8 pixel = ReadVRAM_Texture<u8>(vramaddr, gpu);
|
u8 pixel = gpu.ReadVRAMFlat_Texture<u8>(vramaddr);
|
||||||
pixel >>= ((s & 0x3) << 1);
|
pixel >>= ((s & 0x3) << 1);
|
||||||
pixel &= 0x3;
|
pixel &= 0x3;
|
||||||
|
|
||||||
texpal <<= 3;
|
texpal <<= 3;
|
||||||
*color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1), gpu);
|
*color = gpu.ReadVRAMFlat_TexPal<u16>(texpal + (pixel<<1));
|
||||||
*alpha = (pixel==0) ? alpha0 : 31;
|
*alpha = (pixel==0) ? alpha0 : 31;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -217,12 +217,12 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s
|
||||||
case 3: // 16-color
|
case 3: // 16-color
|
||||||
{
|
{
|
||||||
vramaddr += (((t * width) + s) >> 1);
|
vramaddr += (((t * width) + s) >> 1);
|
||||||
u8 pixel = ReadVRAM_Texture<u8>(vramaddr, gpu);
|
u8 pixel = gpu.ReadVRAMFlat_Texture<u8>(vramaddr);
|
||||||
if (s & 0x1) pixel >>= 4;
|
if (s & 0x1) pixel >>= 4;
|
||||||
else pixel &= 0xF;
|
else pixel &= 0xF;
|
||||||
|
|
||||||
texpal <<= 4;
|
texpal <<= 4;
|
||||||
*color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1), gpu);
|
*color = gpu.ReadVRAMFlat_TexPal<u16>(texpal + (pixel<<1));
|
||||||
*alpha = (pixel==0) ? alpha0 : 31;
|
*alpha = (pixel==0) ? alpha0 : 31;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -230,10 +230,10 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s
|
||||||
case 4: // 256-color
|
case 4: // 256-color
|
||||||
{
|
{
|
||||||
vramaddr += ((t * width) + s);
|
vramaddr += ((t * width) + s);
|
||||||
u8 pixel = ReadVRAM_Texture<u8>(vramaddr, gpu);
|
u8 pixel = gpu.ReadVRAMFlat_Texture<u8>(vramaddr);
|
||||||
|
|
||||||
texpal <<= 4;
|
texpal <<= 4;
|
||||||
*color = ReadVRAM_TexPal<u16>(texpal + (pixel<<1), gpu);
|
*color = gpu.ReadVRAMFlat_TexPal<u16>(texpal + (pixel<<1));
|
||||||
*alpha = (pixel==0) ? alpha0 : 31;
|
*alpha = (pixel==0) ? alpha0 : 31;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -253,31 +253,31 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s
|
||||||
val = 0;
|
val = 0;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
val = ReadVRAM_Texture<u8>(vramaddr, gpu);
|
val = gpu.ReadVRAMFlat_Texture<u8>(vramaddr);
|
||||||
val >>= (2 * (s & 0x3));
|
val >>= (2 * (s & 0x3));
|
||||||
}
|
}
|
||||||
|
|
||||||
u16 palinfo = ReadVRAM_Texture<u16>(slot1addr, gpu);
|
u16 palinfo = gpu.ReadVRAMFlat_Texture<u16>(slot1addr);
|
||||||
u32 paloffset = (palinfo & 0x3FFF) << 2;
|
u32 paloffset = (palinfo & 0x3FFF) << 2;
|
||||||
texpal <<= 4;
|
texpal <<= 4;
|
||||||
|
|
||||||
switch (val & 0x3)
|
switch (val & 0x3)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
*color = ReadVRAM_TexPal<u16>(texpal + paloffset, gpu);
|
*color = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset);
|
||||||
*alpha = 31;
|
*alpha = 31;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 1:
|
case 1:
|
||||||
*color = ReadVRAM_TexPal<u16>(texpal + paloffset + 2, gpu);
|
*color = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset + 2);
|
||||||
*alpha = 31;
|
*alpha = 31;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 2:
|
case 2:
|
||||||
if ((palinfo >> 14) == 1)
|
if ((palinfo >> 14) == 1)
|
||||||
{
|
{
|
||||||
u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset, gpu);
|
u16 color0 = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset);
|
||||||
u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2, gpu);
|
u16 color1 = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset + 2);
|
||||||
|
|
||||||
u32 r0 = color0 & 0x001F;
|
u32 r0 = color0 & 0x001F;
|
||||||
u32 g0 = color0 & 0x03E0;
|
u32 g0 = color0 & 0x03E0;
|
||||||
|
@ -294,8 +294,8 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s
|
||||||
}
|
}
|
||||||
else if ((palinfo >> 14) == 3)
|
else if ((palinfo >> 14) == 3)
|
||||||
{
|
{
|
||||||
u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset, gpu);
|
u16 color0 = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset);
|
||||||
u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2, gpu);
|
u16 color1 = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset + 2);
|
||||||
|
|
||||||
u32 r0 = color0 & 0x001F;
|
u32 r0 = color0 & 0x001F;
|
||||||
u32 g0 = color0 & 0x03E0;
|
u32 g0 = color0 & 0x03E0;
|
||||||
|
@ -311,20 +311,20 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s
|
||||||
*color = r | g | b;
|
*color = r | g | b;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
*color = ReadVRAM_TexPal<u16>(texpal + paloffset + 4, gpu);
|
*color = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset + 4);
|
||||||
*alpha = 31;
|
*alpha = 31;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 3:
|
case 3:
|
||||||
if ((palinfo >> 14) == 2)
|
if ((palinfo >> 14) == 2)
|
||||||
{
|
{
|
||||||
*color = ReadVRAM_TexPal<u16>(texpal + paloffset + 6, gpu);
|
*color = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset + 6);
|
||||||
*alpha = 31;
|
*alpha = 31;
|
||||||
}
|
}
|
||||||
else if ((palinfo >> 14) == 3)
|
else if ((palinfo >> 14) == 3)
|
||||||
{
|
{
|
||||||
u16 color0 = ReadVRAM_TexPal<u16>(texpal + paloffset, gpu);
|
u16 color0 = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset);
|
||||||
u16 color1 = ReadVRAM_TexPal<u16>(texpal + paloffset + 2, gpu);
|
u16 color1 = gpu.ReadVRAMFlat_TexPal<u16>(texpal + paloffset + 2);
|
||||||
|
|
||||||
u32 r0 = color0 & 0x001F;
|
u32 r0 = color0 & 0x001F;
|
||||||
u32 g0 = color0 & 0x03E0;
|
u32 g0 = color0 & 0x03E0;
|
||||||
|
@ -353,10 +353,10 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s
|
||||||
case 6: // A5I3
|
case 6: // A5I3
|
||||||
{
|
{
|
||||||
vramaddr += ((t * width) + s);
|
vramaddr += ((t * width) + s);
|
||||||
u8 pixel = ReadVRAM_Texture<u8>(vramaddr, gpu);
|
u8 pixel = gpu.ReadVRAMFlat_Texture<u8>(vramaddr);
|
||||||
|
|
||||||
texpal <<= 4;
|
texpal <<= 4;
|
||||||
*color = ReadVRAM_TexPal<u16>(texpal + ((pixel&0x7)<<1), gpu);
|
*color = gpu.ReadVRAMFlat_TexPal<u16>(texpal + ((pixel&0x7)<<1));
|
||||||
*alpha = (pixel >> 3);
|
*alpha = (pixel >> 3);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -364,7 +364,7 @@ void SoftRenderer::TextureLookup(const GPU& gpu, u32 texparam, u32 texpal, s16 s
|
||||||
case 7: // direct color
|
case 7: // direct color
|
||||||
{
|
{
|
||||||
vramaddr += (((t * width) + s) << 1);
|
vramaddr += (((t * width) + s) << 1);
|
||||||
*color = ReadVRAM_Texture<u16>(vramaddr, gpu);
|
*color = gpu.ReadVRAMFlat_Texture<u16>(vramaddr);
|
||||||
*alpha = (*color & 0x8000) ? 31 : 0;
|
*alpha = (*color & 0x8000) ? 31 : 0;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -1659,8 +1659,8 @@ void SoftRenderer::ClearBuffers(const GPU& gpu)
|
||||||
{
|
{
|
||||||
for (int x = 0; x < 256; x++)
|
for (int x = 0; x < 256; x++)
|
||||||
{
|
{
|
||||||
u16 val2 = ReadVRAM_Texture<u16>(0x40000 + (yoff << 9) + (xoff << 1), gpu);
|
u16 val2 = gpu.ReadVRAMFlat_Texture<u16>(0x40000 + (yoff << 9) + (xoff << 1));
|
||||||
u16 val3 = ReadVRAM_Texture<u16>(0x60000 + (yoff << 9) + (xoff << 1), gpu);
|
u16 val3 = gpu.ReadVRAMFlat_Texture<u16>(0x60000 + (yoff << 9) + (xoff << 1));
|
||||||
|
|
||||||
// TODO: confirm color conversion
|
// TODO: confirm color conversion
|
||||||
u32 r = (val2 << 1) & 0x3E; if (r) r++;
|
u32 r = (val2 << 1) & 0x3E; if (r) r++;
|
||||||
|
|
|
@ -430,16 +430,6 @@ private:
|
||||||
s32 ycoverage, ycov_incr;
|
s32 ycoverage, ycov_incr;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
inline T ReadVRAM_Texture(u32 addr, const GPU& gpu) const
|
|
||||||
{
|
|
||||||
return *(T*)&gpu.VRAMFlat_Texture[addr & 0x7FFFF];
|
|
||||||
}
|
|
||||||
template <typename T>
|
|
||||||
inline T ReadVRAM_TexPal(u32 addr, const GPU& gpu) const
|
|
||||||
{
|
|
||||||
return *(T*)&gpu.VRAMFlat_TexPal[addr & 0x1FFFF];
|
|
||||||
}
|
|
||||||
u32 AlphaBlend(const GPU3D& gpu3d, u32 srccolor, u32 dstcolor, u32 alpha) const noexcept;
|
u32 AlphaBlend(const GPU3D& gpu3d, u32 srccolor, u32 dstcolor, u32 alpha) const noexcept;
|
||||||
|
|
||||||
struct RendererPolygon
|
struct RendererPolygon
|
||||||
|
|
|
@ -75,11 +75,11 @@ inline u32 ConvertRGB5ToRGB6(u16 val)
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int outputFmt>
|
template <int outputFmt>
|
||||||
void ConvertBitmapTexture(u32 width, u32 height, u32* output, u8* texData)
|
void ConvertBitmapTexture(u32 width, u32 height, u32* output, u32 addr, GPU& gpu)
|
||||||
{
|
{
|
||||||
for (u32 i = 0; i < width*height; i++)
|
for (u32 i = 0; i < width*height; i++)
|
||||||
{
|
{
|
||||||
u16 value = *(u16*)&texData[i * 2];
|
u16 value = gpu.ReadVRAMFlat_Texture<u16>(addr + i * 2);
|
||||||
|
|
||||||
switch (outputFmt)
|
switch (outputFmt)
|
||||||
{
|
{
|
||||||
|
@ -96,28 +96,28 @@ void ConvertBitmapTexture(u32 width, u32 height, u32* output, u8* texData)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void ConvertBitmapTexture<outputFmt_RGB6A5>(u32 width, u32 height, u32* output, u8* texData);
|
template void ConvertBitmapTexture<outputFmt_RGB6A5>(u32 width, u32 height, u32* output, u32 addr, GPU& gpu);
|
||||||
|
|
||||||
template <int outputFmt>
|
template <int outputFmt>
|
||||||
void ConvertCompressedTexture(u32 width, u32 height, u32* output, u8* texData, u8* texAuxData, u16* palData)
|
void ConvertCompressedTexture(u32 width, u32 height, u32* output, u32 addr, u32 addrAux, u32 palAddr, GPU& gpu)
|
||||||
{
|
{
|
||||||
// we process a whole block at the time
|
// we process a whole block at the time
|
||||||
for (int y = 0; y < height / 4; y++)
|
for (int y = 0; y < height / 4; y++)
|
||||||
{
|
{
|
||||||
for (int x = 0; x < width / 4; x++)
|
for (int x = 0; x < width / 4; x++)
|
||||||
{
|
{
|
||||||
u32 data = ((u32*)texData)[x + y * (width / 4)];
|
u32 data = gpu.ReadVRAMFlat_Texture<u32>(addr + (x + y * (width / 4))*4);
|
||||||
u16 auxData = ((u16*)texAuxData)[x + y * (width / 4)];
|
u16 auxData = gpu.ReadVRAMFlat_Texture<u16>(addrAux + (x + y * (width / 4))*2);
|
||||||
|
|
||||||
u32 paletteOffset = auxData & 0x3FFF;
|
u32 paletteOffset = palAddr + (auxData & 0x3FFF) * 4;
|
||||||
u16 color0 = palData[paletteOffset*2] | 0x8000;
|
u16 color0 = gpu.ReadVRAMFlat_TexPal<u16>(paletteOffset) | 0x8000;
|
||||||
u16 color1 = palData[paletteOffset*2+1] | 0x8000;
|
u16 color1 = gpu.ReadVRAMFlat_TexPal<u16>(paletteOffset+2) | 0x8000;
|
||||||
u16 color2, color3;
|
u16 color2 = gpu.ReadVRAMFlat_TexPal<u16>(paletteOffset+4) | 0x8000;
|
||||||
|
u16 color3 = gpu.ReadVRAMFlat_TexPal<u16>(paletteOffset+6) | 0x8000;
|
||||||
|
|
||||||
switch ((auxData >> 14) & 0x3)
|
switch ((auxData >> 14) & 0x3)
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
color2 = palData[paletteOffset*2+2] | 0x8000;
|
|
||||||
color3 = 0;
|
color3 = 0;
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
|
@ -137,8 +137,6 @@ void ConvertCompressedTexture(u32 width, u32 height, u32* output, u8* texData, u
|
||||||
color3 = 0;
|
color3 = 0;
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
color2 = palData[paletteOffset*2+2] | 0x8000;
|
|
||||||
color3 = palData[paletteOffset*2+3] | 0x8000;
|
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
{
|
{
|
||||||
|
@ -179,7 +177,8 @@ void ConvertCompressedTexture(u32 width, u32 height, u32* output, u8* texData, u
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 4; i++)
|
for (int i = 0; i < 4; i++)
|
||||||
{
|
{
|
||||||
u16 color = (packed >> 16 * (data >> 2 * (i + j * 4))) & 0xFFFF;
|
u32 colorIdx = 16 * ((data >> 2 * (i + j * 4)) & 0x3);
|
||||||
|
u16 color = (packed >> colorIdx) & 0xFFFF;
|
||||||
u32 res;
|
u32 res;
|
||||||
switch (outputFmt)
|
switch (outputFmt)
|
||||||
{
|
{
|
||||||
|
@ -197,20 +196,20 @@ void ConvertCompressedTexture(u32 width, u32 height, u32* output, u8* texData, u
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void ConvertCompressedTexture<outputFmt_RGB6A5>(u32, u32, u32*, u8*, u8*, u16*);
|
template void ConvertCompressedTexture<outputFmt_RGB6A5>(u32, u32, u32*, u32, u32, u32, GPU&);
|
||||||
|
|
||||||
template <int outputFmt, int X, int Y>
|
template <int outputFmt, int X, int Y>
|
||||||
void ConvertAXIYTexture(u32 width, u32 height, u32* output, u8* texData, u16* palData)
|
void ConvertAXIYTexture(u32 width, u32 height, u32* output, u32 addr, u32 palAddr, GPU& gpu)
|
||||||
{
|
{
|
||||||
for (int y = 0; y < height; y++)
|
for (int y = 0; y < height; y++)
|
||||||
{
|
{
|
||||||
for (int x = 0; x < width; x++)
|
for (int x = 0; x < width; x++)
|
||||||
{
|
{
|
||||||
u8 val = texData[x + y * width];
|
u8 val = gpu.ReadVRAMFlat_Texture<u8>(addr + x + y * width);
|
||||||
|
|
||||||
u32 idx = val & ((1 << Y) - 1);
|
u32 idx = val & ((1 << Y) - 1);
|
||||||
|
|
||||||
u16 color = palData[idx];
|
u16 color = gpu.ReadVRAMFlat_TexPal<u16>(palAddr + idx * 2);
|
||||||
u32 alpha = (val >> Y) & ((1 << X) - 1);
|
u32 alpha = (val >> Y) & ((1 << X) - 1);
|
||||||
if (X != 5)
|
if (X != 5)
|
||||||
alpha = alpha * 4 + alpha / 2;
|
alpha = alpha * 4 + alpha / 2;
|
||||||
|
@ -228,22 +227,24 @@ void ConvertAXIYTexture(u32 width, u32 height, u32* output, u8* texData, u16* pa
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void ConvertAXIYTexture<outputFmt_RGB6A5, 5, 3>(u32, u32, u32*, u8*, u16*);
|
template void ConvertAXIYTexture<outputFmt_RGB6A5, 5, 3>(u32, u32, u32*, u32, u32, GPU&);
|
||||||
template void ConvertAXIYTexture<outputFmt_RGB6A5, 3, 5>(u32, u32, u32*, u8*, u16*);
|
template void ConvertAXIYTexture<outputFmt_RGB6A5, 3, 5>(u32, u32, u32*, u32, u32, GPU&);
|
||||||
|
|
||||||
template <int outputFmt, int colorBits>
|
template <int outputFmt, int colorBits>
|
||||||
void ConvertNColorsTexture(u32 width, u32 height, u32* output, u8* texData, u16* palData, bool color0Transparent)
|
void ConvertNColorsTexture(u32 width, u32 height, u32* output, u32 addr, u32 palAddr, bool color0Transparent, GPU& gpu)
|
||||||
{
|
{
|
||||||
for (int y = 0; y < height; y++)
|
for (int y = 0; y < height; y++)
|
||||||
{
|
{
|
||||||
for (int x = 0; x < width / (8 / colorBits); x++)
|
for (int x = 0; x < width / (16 / colorBits); x++)
|
||||||
{
|
{
|
||||||
u8 val = texData[x + y * (width / (8 / colorBits))];
|
// smallest possible row is 8 pixels with 2bpp => fits in u16
|
||||||
|
u16 val = gpu.ReadVRAMFlat_Texture<u16>(addr + 2 * (x + y * (width / (16 / colorBits))));
|
||||||
|
|
||||||
for (int i = 0; i < 8 / colorBits; i++)
|
for (int i = 0; i < 16 / colorBits; i++)
|
||||||
{
|
{
|
||||||
u32 index = (val >> (i * colorBits)) & ((1 << colorBits) - 1);
|
u32 index = val & ((1 << colorBits) - 1);
|
||||||
u16 color = palData[index];
|
val >>= colorBits;
|
||||||
|
u16 color = gpu.ReadVRAMFlat_TexPal<u16>(palAddr + index * 2);
|
||||||
|
|
||||||
bool transparent = color0Transparent && index == 0;
|
bool transparent = color0Transparent && index == 0;
|
||||||
u32 res;
|
u32 res;
|
||||||
|
@ -256,14 +257,14 @@ void ConvertNColorsTexture(u32 width, u32 height, u32* output, u8* texData, u16*
|
||||||
case outputFmt_BGRA8: res = ConvertRGB5ToBGR8(color)
|
case outputFmt_BGRA8: res = ConvertRGB5ToBGR8(color)
|
||||||
| (transparent ? 0 : 0xFF000000); break;
|
| (transparent ? 0 : 0xFF000000); break;
|
||||||
}
|
}
|
||||||
output[x * (8 / colorBits) + y * width + i] = res;
|
output[x * (16 / colorBits) + y * width + i] = res;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template void ConvertNColorsTexture<outputFmt_RGB6A5, 2>(u32, u32, u32*, u8*, u16*, bool);
|
template void ConvertNColorsTexture<outputFmt_RGB6A5, 2>(u32, u32, u32*, u32, u32, bool, GPU&);
|
||||||
template void ConvertNColorsTexture<outputFmt_RGB6A5, 4>(u32, u32, u32*, u8*, u16*, bool);
|
template void ConvertNColorsTexture<outputFmt_RGB6A5, 4>(u32, u32, u32*, u32, u32, bool, GPU&);
|
||||||
template void ConvertNColorsTexture<outputFmt_RGB6A5, 8>(u32, u32, u32*, u8*, u16*, bool);
|
template void ConvertNColorsTexture<outputFmt_RGB6A5, 8>(u32, u32, u32*, u32, u32, bool, GPU&);
|
||||||
|
|
||||||
}
|
}
|
|
@ -32,13 +32,13 @@ enum
|
||||||
};
|
};
|
||||||
|
|
||||||
template <int outputFmt>
|
template <int outputFmt>
|
||||||
void ConvertBitmapTexture(u32 width, u32 height, u32* output, u8* texData);
|
void ConvertBitmapTexture(u32 width, u32 height, u32* output, u32 addr, GPU& gpu);
|
||||||
template <int outputFmt>
|
template <int outputFmt>
|
||||||
void ConvertCompressedTexture(u32 width, u32 height, u32* output, u8* texData, u8* texAuxData, u16* palData);
|
void ConvertCompressedTexture(u32 width, u32 height, u32* output, u32 addr, u32 addrAux, u32 palAddr, GPU& gpu);
|
||||||
template <int outputFmt, int X, int Y>
|
template <int outputFmt, int X, int Y>
|
||||||
void ConvertAXIYTexture(u32 width, u32 height, u32* output, u8* texData, u16* palData);
|
void ConvertAXIYTexture(u32 width, u32 height, u32* output, u32 addr, u32 palAddr, GPU& gpu);
|
||||||
template <int outputFmt, int colorBits>
|
template <int outputFmt, int colorBits>
|
||||||
void ConvertNColorsTexture(u32 width, u32 height, u32* output, u8* texData, u16* palData, bool color0Transparent);
|
void ConvertNColorsTexture(u32 width, u32 height, u32* output, u32 addr, u32 palAddr, bool color0Transparent, GPU& gpu);
|
||||||
|
|
||||||
template <typename TexLoaderT, typename TexHandleT>
|
template <typename TexLoaderT, typename TexHandleT>
|
||||||
class Texcache
|
class Texcache
|
||||||
|
@ -48,6 +48,50 @@ public:
|
||||||
: TexLoader(texloader) // probably better if this would be a move constructor???
|
: TexLoader(texloader) // probably better if this would be a move constructor???
|
||||||
{}
|
{}
|
||||||
|
|
||||||
|
u64 MaskedHash(u8* vram, u32 vramSize, u32 addr, u32 size)
|
||||||
|
{
|
||||||
|
u64 hash = 0;
|
||||||
|
|
||||||
|
while (size > 0)
|
||||||
|
{
|
||||||
|
u32 pieceSize;
|
||||||
|
if (addr + size > vramSize)
|
||||||
|
// wraps around, only do the part inside
|
||||||
|
pieceSize = vramSize - addr;
|
||||||
|
else
|
||||||
|
// fits completely inside
|
||||||
|
pieceSize = size;
|
||||||
|
|
||||||
|
hash = XXH64(&vram[addr], pieceSize, hash);
|
||||||
|
|
||||||
|
addr += pieceSize;
|
||||||
|
addr &= (vramSize - 1);
|
||||||
|
assert(size >= pieceSize);
|
||||||
|
size -= pieceSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CheckInvalid(u32 start, u32 size, u64 oldHash, u64* dirty, u8* vram, u32 vramSize)
|
||||||
|
{
|
||||||
|
u32 startBit = start / VRAMDirtyGranularity;
|
||||||
|
u32 bitsCount = ((start + size + VRAMDirtyGranularity - 1) / VRAMDirtyGranularity) - startBit;
|
||||||
|
|
||||||
|
u32 startEntry = startBit >> 6;
|
||||||
|
u64 entriesCount = ((startBit + bitsCount + 0x3F) >> 6) - startEntry;
|
||||||
|
for (u32 j = startEntry; j < startEntry + entriesCount; j++)
|
||||||
|
{
|
||||||
|
if (GetRangedBitMask(j, startBit, bitsCount) & dirty[j & ((vramSize / VRAMDirtyGranularity)-1)])
|
||||||
|
{
|
||||||
|
if (MaskedHash(vram, vramSize, start, size) != oldHash)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
bool Update(GPU& gpu)
|
bool Update(GPU& gpu)
|
||||||
{
|
{
|
||||||
auto textureDirty = gpu.VRAMDirty_Texture.DeriveState(gpu.VRAMMap_Texture, gpu);
|
auto textureDirty = gpu.VRAMDirty_Texture.DeriveState(gpu.VRAMMap_Texture, gpu);
|
||||||
|
@ -66,41 +110,22 @@ public:
|
||||||
{
|
{
|
||||||
for (u32 i = 0; i < 2; i++)
|
for (u32 i = 0; i < 2; i++)
|
||||||
{
|
{
|
||||||
u32 startBit = entry.TextureRAMStart[i] / VRAMDirtyGranularity;
|
if (CheckInvalid(entry.TextureRAMStart[i], entry.TextureRAMSize[i],
|
||||||
u32 bitsCount = ((entry.TextureRAMStart[i] + entry.TextureRAMSize[i] + VRAMDirtyGranularity - 1) / VRAMDirtyGranularity) - startBit;
|
entry.TextureHash[i],
|
||||||
|
textureDirty.Data,
|
||||||
u32 startEntry = startBit >> 6;
|
gpu.VRAMFlat_Texture, sizeof(gpu.VRAMFlat_Texture)))
|
||||||
u64 entriesCount = ((startBit + bitsCount + 0x3F) >> 6) - startEntry;
|
|
||||||
for (u32 j = startEntry; j < startEntry + entriesCount; j++)
|
|
||||||
{
|
|
||||||
if (GetRangedBitMask(j, startBit, bitsCount) & textureDirty.Data[j])
|
|
||||||
{
|
|
||||||
u64 newTexHash = XXH3_64bits(&gpu.VRAMFlat_Texture[entry.TextureRAMStart[i]], entry.TextureRAMSize[i]);
|
|
||||||
|
|
||||||
if (newTexHash != entry.TextureHash[i])
|
|
||||||
goto invalidate;
|
goto invalidate;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (texPalChanged && entry.TexPalSize > 0)
|
if (texPalChanged && entry.TexPalSize > 0)
|
||||||
{
|
{
|
||||||
u32 startBit = entry.TexPalStart / VRAMDirtyGranularity;
|
if (CheckInvalid(entry.TexPalStart, entry.TexPalSize,
|
||||||
u32 bitsCount = ((entry.TexPalStart + entry.TexPalSize + VRAMDirtyGranularity - 1) / VRAMDirtyGranularity) - startBit;
|
entry.TexPalHash,
|
||||||
|
texPalDirty.Data,
|
||||||
u32 startEntry = startBit >> 6;
|
gpu.VRAMFlat_TexPal, sizeof(gpu.VRAMFlat_TexPal)))
|
||||||
u64 entriesCount = ((startBit + bitsCount + 0x3F) >> 6) - startEntry;
|
|
||||||
for (u32 j = startEntry; j < startEntry + entriesCount; j++)
|
|
||||||
{
|
|
||||||
if (GetRangedBitMask(j, startBit, bitsCount) & texPalDirty.Data[j])
|
|
||||||
{
|
|
||||||
u64 newPalHash = XXH3_64bits(&gpu.VRAMFlat_TexPal[entry.TexPalStart], entry.TexPalSize);
|
|
||||||
if (newPalHash != entry.TexPalHash)
|
|
||||||
goto invalidate;
|
goto invalidate;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
it++;
|
it++;
|
||||||
continue;
|
continue;
|
||||||
|
@ -163,17 +188,13 @@ public:
|
||||||
{
|
{
|
||||||
entry.TextureRAMSize[0] = width*height*2;
|
entry.TextureRAMSize[0] = width*height*2;
|
||||||
|
|
||||||
ConvertBitmapTexture<outputFmt_RGB6A5>(width, height, DecodingBuffer, &gpu.VRAMFlat_Texture[addr]);
|
ConvertBitmapTexture<outputFmt_RGB6A5>(width, height, DecodingBuffer, addr, gpu);
|
||||||
}
|
}
|
||||||
else if (fmt == 5)
|
else if (fmt == 5)
|
||||||
{
|
{
|
||||||
u8* texData = &gpu.VRAMFlat_Texture[addr];
|
|
||||||
u32 slot1addr = 0x20000 + ((addr & 0x1FFFC) >> 1);
|
u32 slot1addr = 0x20000 + ((addr & 0x1FFFC) >> 1);
|
||||||
if (addr >= 0x40000)
|
if (addr >= 0x40000)
|
||||||
slot1addr += 0x10000;
|
slot1addr += 0x10000;
|
||||||
u8* texAuxData = &gpu.VRAMFlat_Texture[slot1addr];
|
|
||||||
|
|
||||||
u16* palData = (u16*)(gpu.VRAMFlat_TexPal + palBase*16);
|
|
||||||
|
|
||||||
entry.TextureRAMSize[0] = width*height/16*4;
|
entry.TextureRAMSize[0] = width*height/16*4;
|
||||||
entry.TextureRAMStart[1] = slot1addr;
|
entry.TextureRAMStart[1] = slot1addr;
|
||||||
|
@ -181,7 +202,7 @@ public:
|
||||||
entry.TexPalStart = palBase*16;
|
entry.TexPalStart = palBase*16;
|
||||||
entry.TexPalSize = 0x10000;
|
entry.TexPalSize = 0x10000;
|
||||||
|
|
||||||
ConvertCompressedTexture<outputFmt_RGB6A5>(width, height, DecodingBuffer, texData, texAuxData, palData);
|
ConvertCompressedTexture<outputFmt_RGB6A5>(width, height, DecodingBuffer, addr, slot1addr, entry.TexPalStart, gpu);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -204,30 +225,29 @@ public:
|
||||||
entry.TexPalStart = palAddr;
|
entry.TexPalStart = palAddr;
|
||||||
entry.TexPalSize = numPalEntries*2;
|
entry.TexPalSize = numPalEntries*2;
|
||||||
|
|
||||||
u8* texData = &gpu.VRAMFlat_Texture[addr];
|
|
||||||
u16* palData = (u16*)(gpu.VRAMFlat_TexPal + palAddr);
|
|
||||||
|
|
||||||
//assert(entry.TexPalStart+entry.TexPalSize <= 128*1024*1024);
|
//assert(entry.TexPalStart+entry.TexPalSize <= 128*1024*1024);
|
||||||
|
|
||||||
bool color0Transparent = texParam & (1 << 29);
|
bool color0Transparent = texParam & (1 << 29);
|
||||||
|
|
||||||
switch (fmt)
|
switch (fmt)
|
||||||
{
|
{
|
||||||
case 1: ConvertAXIYTexture<outputFmt_RGB6A5, 3, 5>(width, height, DecodingBuffer, texData, palData); break;
|
case 1: ConvertAXIYTexture<outputFmt_RGB6A5, 3, 5>(width, height, DecodingBuffer, addr, palAddr, gpu); break;
|
||||||
case 6: ConvertAXIYTexture<outputFmt_RGB6A5, 5, 3>(width, height, DecodingBuffer, texData, palData); break;
|
case 6: ConvertAXIYTexture<outputFmt_RGB6A5, 5, 3>(width, height, DecodingBuffer, addr, palAddr, gpu); break;
|
||||||
case 2: ConvertNColorsTexture<outputFmt_RGB6A5, 2>(width, height, DecodingBuffer, texData, palData, color0Transparent); break;
|
case 2: ConvertNColorsTexture<outputFmt_RGB6A5, 2>(width, height, DecodingBuffer, addr, palAddr, color0Transparent, gpu); break;
|
||||||
case 3: ConvertNColorsTexture<outputFmt_RGB6A5, 4>(width, height, DecodingBuffer, texData, palData, color0Transparent); break;
|
case 3: ConvertNColorsTexture<outputFmt_RGB6A5, 4>(width, height, DecodingBuffer, addr, palAddr, color0Transparent, gpu); break;
|
||||||
case 4: ConvertNColorsTexture<outputFmt_RGB6A5, 8>(width, height, DecodingBuffer, texData, palData, color0Transparent); break;
|
case 4: ConvertNColorsTexture<outputFmt_RGB6A5, 8>(width, height, DecodingBuffer, addr, palAddr, color0Transparent, gpu); break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < 2; i++)
|
for (int i = 0; i < 2; i++)
|
||||||
{
|
{
|
||||||
if (entry.TextureRAMSize[i])
|
if (entry.TextureRAMSize[i])
|
||||||
entry.TextureHash[i] = XXH3_64bits(&gpu.VRAMFlat_Texture[entry.TextureRAMStart[i]], entry.TextureRAMSize[i]);
|
entry.TextureHash[i] = MaskedHash(gpu.VRAMFlat_Texture, sizeof(gpu.VRAMFlat_Texture),
|
||||||
|
entry.TextureRAMStart[i], entry.TextureRAMSize[i]);
|
||||||
}
|
}
|
||||||
if (entry.TexPalSize)
|
if (entry.TexPalSize)
|
||||||
entry.TexPalHash = XXH3_64bits(&gpu.VRAMFlat_TexPal[entry.TexPalStart], entry.TexPalSize);
|
entry.TexPalHash = MaskedHash(gpu.VRAMFlat_TexPal, sizeof(gpu.VRAMFlat_TexPal),
|
||||||
|
entry.TexPalStart, entry.TexPalSize);
|
||||||
|
|
||||||
auto& texArrays = TexArrays[widthLog2][heightLog2];
|
auto& texArrays = TexArrays[widthLog2][heightLog2];
|
||||||
auto& freeTextures = FreeTextures[widthLog2][heightLog2];
|
auto& freeTextures = FreeTextures[widthLog2][heightLog2];
|
||||||
|
|
Loading…
Reference in New Issue