initial texcache implementation

only works with the sw renderer
This commit is contained in:
RSDuck 2020-03-20 17:49:37 +01:00
parent e8f4735c7f
commit 36075dae5a
13 changed files with 5985 additions and 13 deletions

View File

@ -15,6 +15,7 @@ add_library(core STATIC
GPU3D.cpp
GPU3D_OpenGL.cpp
GPU3D_Soft.cpp
GPU3D_TexCache.cpp
NDS.cpp
NDSCart.cpp
OpenGLSupport.cpp
@ -24,6 +25,9 @@ add_library(core STATIC
SPU.cpp
Wifi.cpp
WifiAP.cpp
xxhash/xxhash.c
stb/stb.cpp
)
if (WIN32)

View File

@ -389,6 +389,7 @@ void MapVRAM_AB(u32 bank, u8 cnt)
case 3: // texture
VRAMMap_Texture[oldofs] &= ~bankmask;
GPU3D::TexCache::InvalidateTexSlot(oldofs);
break;
}
}
@ -412,6 +413,7 @@ void MapVRAM_AB(u32 bank, u8 cnt)
case 3: // texture
VRAMMap_Texture[ofs] |= bankmask;
GPU3D::TexCache::InvalidateTexSlot(ofs);
break;
}
}
@ -449,6 +451,7 @@ void MapVRAM_CD(u32 bank, u8 cnt)
case 3: // texture
VRAMMap_Texture[oldofs] &= ~bankmask;
GPU3D::TexCache::InvalidateTexSlot(oldofs);
break;
case 4: // BBG/BOBJ
@ -484,6 +487,7 @@ void MapVRAM_CD(u32 bank, u8 cnt)
case 3: // texture
VRAMMap_Texture[ofs] |= bankmask;
GPU3D::TexCache::InvalidateTexSlot(ofs);
break;
case 4: // BBG/BOBJ
@ -527,6 +531,8 @@ void MapVRAM_E(u32 bank, u8 cnt)
case 3: // texture palette
UNMAP_RANGE(TexPal, 0, 4);
for (int i = 0; i < 4; i++)
GPU3D::TexCache::InvalidatePalSlot(i);
break;
case 4: // ABG ext palette
@ -555,6 +561,8 @@ void MapVRAM_E(u32 bank, u8 cnt)
case 3: // texture palette
MAP_RANGE(TexPal, 0, 4);
for (int i = 0; i < 4; i++)
GPU3D::TexCache::InvalidatePalSlot(i);
break;
case 4: // ABG ext palette
@ -607,6 +615,7 @@ void MapVRAM_FG(u32 bank, u8 cnt)
case 3: // texture palette
VRAMMap_TexPal[(oldofs & 0x1) + ((oldofs & 0x2) << 1)] &= ~bankmask;
GPU3D::TexCache::InvalidatePalSlot((oldofs & 0x1) + ((oldofs & 0x2) << 1));
break;
case 4: // ABG ext palette
@ -652,6 +661,7 @@ void MapVRAM_FG(u32 bank, u8 cnt)
case 3: // texture palette
VRAMMap_TexPal[(ofs & 0x1) + ((ofs & 0x2) << 1)] |= bankmask;
GPU3D::TexCache::InvalidatePalSlot((ofs & 0x1) + ((ofs & 0x2) << 1));
break;
case 4: // ABG ext palette

View File

@ -47,6 +47,7 @@ extern u8 VRAM_H[ 32*1024];
extern u8 VRAM_I[ 16*1024];
extern u8* VRAM[9];
extern u32 VRAMMask[9];
extern u32 VRAMMap_LCDC;
extern u32 VRAMMap_ABG[0x20];

View File

@ -281,6 +281,8 @@ bool Init()
Renderer = -1;
// SetRenderer() will be called to set it up later
TexCache::Init();
return true;
}
@ -289,6 +291,8 @@ void DeInit()
if (Renderer == 0) SoftRenderer::DeInit();
else GLRenderer::DeInit();
TexCache::DeInit();
delete CmdFIFO;
delete CmdPIPE;
@ -386,6 +390,8 @@ void Reset()
FlushRequest = 0;
FlushAttributes = 0;
TexCache::Reset();
ResetRenderingState();
if (Renderer == 0) SoftRenderer::Reset();
else GLRenderer::Reset();

View File

@ -127,6 +127,39 @@ void Write8(u32 addr, u8 val);
void Write16(u32 addr, u16 val);
void Write32(u32 addr, u32 val);
namespace TexCache
{
typedef u64 ExternalTexHandle;
typedef u32* (*AllocTextureFunc)(ExternalTexHandle* handle, u32 width, u32 height);
typedef void (*FreeTextureFunc)(ExternalTexHandle handle, u32 width, u32 height);
typedef void (*FinaliseTextureFunc)(ExternalTexHandle handle, u32 width, u32 height);
enum
{
outputFmt_RGB6A5,
outputFmt_RGBA8,
outputFmt_BGRA8
};
void Init();
void DeInit();
void Reset();
void UpdateTextures();
void SaveTextures();
template <int format>
ExternalTexHandle GetTexture(u32 texParam, u32 palBase);
void InvalidateTexSlot(u32 base);
void InvalidatePalSlot(u32 base);
}
namespace SoftRenderer
{
@ -140,6 +173,10 @@ void VCount144();
void RenderFrame();
u32* GetLine(int line);
u32* AllocateTexture(TexCache::ExternalTexHandle* handle, u32 width, u32 height);
void FreeTexture(TexCache::ExternalTexHandle handle, u32 width, u32 height);
void FinaliseTexture(TexCache::ExternalTexHandle handle, u32 width, u32 height);
}
namespace GLRenderer

View File

@ -23,6 +23,7 @@
#include "Config.h"
#include "Platform.h"
#include <SDL2/SDL.h>
namespace GPU3D
{
@ -58,6 +59,83 @@ bool PrevIsShadowMask;
bool Enabled;
// texture cache implementation
/*
all texture (data) sizes are a power of two
we can exploit to have everything more compact and
have super fast alloc/dealloc times by grouping all the
textures with the same pixel count together
we use the count trailing zeroes function (aka log2)
to linearise the indices
*/
struct TextureAllocator
{
u32 Length = 0;
u32* Pixels;
u64 FreeEntries[8];
u32 FreeEntriesLeft = 0;
};
// all sizes below 8*8 (log2(64)=6) can be ignored
TextureAllocator TextureMem[14];
TextureAllocator& GetTextureAllocator(u32 width, u32 height)
{
return TextureMem[__builtin_ctz(width * height) - 6];
}
u32* AllocateTexture(TexCache::ExternalTexHandle* handle, u32 width, u32 height)
{
TextureAllocator& allocator = GetTextureAllocator(width, height);
if (allocator.FreeEntriesLeft == 0)
{
u32 newLength = (20 - (__builtin_ctz(width * height) - 6) + 4) + (allocator.Length * 2) / 3;
// while it's theoretically possible to hit this limit
// other things will probably break before it
if (newLength >= 64*8)
abort();
u32* newPixels = new u32[width * height * newLength];
if (allocator.Length)
{
memcpy(newPixels, allocator.Pixels, allocator.Length * width * height * 4);
delete[] allocator.Pixels;
}
allocator.Pixels = newPixels;
allocator.FreeEntriesLeft = newLength - allocator.Length;
allocator.Length = newLength;
}
for (int i = 0; i < (allocator.Length + 0x3F & ~0x3F) >> 6; i++)
{
if (allocator.FreeEntries[i] != 0xFFFFFFFFFFFFFFFF)
{
allocator.FreeEntriesLeft--;
u64 freeIdx = __builtin_ctzll(~allocator.FreeEntries[i]);
allocator.FreeEntries[i] |= 1ULL << freeIdx;
*handle = (i * 64 + freeIdx) * width * height;
return &allocator.Pixels[*handle];
}
}
// should never happen, hopefully...
abort();
}
void FreeTexture(TexCache::ExternalTexHandle handle, u32 width, u32 height)
{
TextureAllocator& allocator = GetTextureAllocator(width, height);
handle /= width * height;
allocator.FreeEntriesLeft++;
allocator.FreeEntries[handle >> 6] &= ~(1 << (handle & 0x3F));
}
// threading
void* RenderThread;
@ -129,6 +207,12 @@ void DeInit()
void Reset()
{
for (int i = 0; i < 14; i++)
{
memset(TextureMem[i].FreeEntries, 0, sizeof(TextureMem[i].FreeEntries));
TextureMem[i].FreeEntriesLeft = TextureMem[i].Length * 64;
}
memset(ColorBuffer, 0, BufferSize * 2 * 4);
memset(DepthBuffer, 0, BufferSize * 2 * 4);
memset(AttrBuffer, 0, BufferSize * 2 * 4);
@ -532,6 +616,8 @@ typedef struct
{
Polygon* PolyData;
u32* VertexData;
u32* TextureData;
Slope<0> SlopeL;
Slope<1> SlopeR;
s32 XL, XR;
@ -543,7 +629,7 @@ typedef struct
RendererPolygon PolygonList[2048];
void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha)
u32 TextureLookup(u32* texture, u32 texparam, u32 texpal, s16 s, s16 t)
{
u32 vramaddr = (texparam & 0xFFFF) << 3;
@ -589,6 +675,8 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
else if (t >= height) t = height-1;
}
return texture[s + t * width];
/*
u8 alpha0;
if (texparam & (1<<29)) alpha0 = 0;
else alpha0 = 31;
@ -766,7 +854,7 @@ void TextureLookup(u32 texparam, u32 texpal, s16 s, s16 t, u16* color, u8* alpha
*alpha = (*color & 0x8000) ? 31 : 0;
}
break;
}
}*/
}
// depth test is 'less or equal' instead of 'less than' under the following conditions:
@ -849,7 +937,7 @@ u32 AlphaBlend(u32 srccolor, u32 dstcolor, u32 alpha)
return srcR | (srcG << 8) | (srcB << 16) | (dstalpha << 24);
}
u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t)
u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t, u32* texture)
{
u8 r, g, b, a;
@ -883,14 +971,12 @@ u32 RenderPixel(Polygon* polygon, u8 vr, u8 vg, u8 vb, s16 s, s16 t)
if ((RenderDispCnt & (1<<0)) && (((polygon->TexParam >> 26) & 0x7) != 0))
{
u8 tr, tg, tb;
u32 tcolor = TextureLookup(texture, polygon->TexParam, polygon->TexPalette, s, t);
u16 tcolor; u8 talpha;
TextureLookup(polygon->TexParam, polygon->TexPalette, s, t, &tcolor, &talpha);
tr = (tcolor << 1) & 0x3E; if (tr) tr++;
tg = (tcolor >> 4) & 0x3E; if (tg) tg++;
tb = (tcolor >> 9) & 0x3E; if (tb) tb++;
u8 tr = tcolor & 0x3E; if (tr) tr++;
u8 tg = (tcolor >> 8) & 0x3E; if (tg) tg++;
u8 tb = (tcolor >> 16) & 0x3E; if (tb) tb++;
u8 talpha = tcolor >> 24;
if (blendmode & 0x1)
{
@ -1052,6 +1138,14 @@ void SetupPolygonRightEdge(RendererPolygon* rp, s32 y)
void SetupPolygon(RendererPolygon* rp, Polygon* polygon)
{
if (polygon->TexParam & 0x1C000000)
{
TexCache::ExternalTexHandle handle = TexCache::GetTexture<TexCache::outputFmt_RGB6A5>(polygon->TexParam, polygon->TexPalette);
u32 width = 8 << ((polygon->TexParam >> 20) & 0x7);
u32 height = 8 << ((polygon->TexParam >> 23) & 0x7);
rp->TextureData = &GetTextureAllocator(width, height).Pixels[handle];
}
u32 nverts = polygon->NumVertices;
u32 vtop = polygon->VTop, vbot = polygon->VBottom;
@ -1508,7 +1602,7 @@ void RenderPolygonScanline(RendererPolygon* rp, s32 y)
s16 s = interpX.Interpolate(sl, sr);
s16 t = interpX.Interpolate(tl, tr);
u32 color = RenderPixel(polygon, vr>>3, vg>>3, vb>>3, s, t);
u32 color = RenderPixel(polygon, vr>>3, vg>>3, vb>>3, s, t, rp->TextureData);
u8 alpha = color >> 24;
// alpha test
@ -1604,7 +1698,7 @@ void RenderPolygonScanline(RendererPolygon* rp, s32 y)
s16 s = interpX.Interpolate(sl, sr);
s16 t = interpX.Interpolate(tl, tr);
u32 color = RenderPixel(polygon, vr>>3, vg>>3, vb>>3, s, t);
u32 color = RenderPixel(polygon, vr>>3, vg>>3, vb>>3, s, t, rp->TextureData);
u8 alpha = color >> 24;
// alpha test
@ -1679,7 +1773,7 @@ void RenderPolygonScanline(RendererPolygon* rp, s32 y)
s16 s = interpX.Interpolate(sl, sr);
s16 t = interpX.Interpolate(tl, tr);
u32 color = RenderPixel(polygon, vr>>3, vg>>3, vb>>3, s, t);
u32 color = RenderPixel(polygon, vr>>3, vg>>3, vb>>3, s, t, rp->TextureData);
u8 alpha = color >> 24;
// alpha test
@ -2048,12 +2142,18 @@ void ClearBuffers()
void RenderPolygons(bool threaded, Polygon** polygons, int npolys)
{
u64 ticksStart = SDL_GetPerformanceCounter();
TexCache::UpdateTextures();
int j = 0;
for (int i = 0; i < npolys; i++)
{
if (polygons[i]->Degenerate) continue;
SetupPolygon(&PolygonList[j++], polygons[i]);
}
u64 tickesEnd = SDL_GetPerformanceCounter();
printf("time %fms\n", (tickesEnd-ticksStart)/(float)SDL_GetPerformanceFrequency()*1000.f);
TexCache::SaveTextures();
RenderScanline(0, j);

516
src/GPU3D_TexCache.cpp Normal file
View File

@ -0,0 +1,516 @@
#include "GPU3D.h"
#include "NDS.h"
#include "GPU.h"
#include "FIFO.h"
#include "types.h"
#define XXH_STATIC_LINKING_ONLY
#include "xxhash/xxhash.h"
#include "stb/stb_image_write.h"
#include <vector>
#include <unordered_map>
#include <unordered_set>
#include <assert.h>
#include <string.h>
namespace GPU3D
{
namespace TexCache
{
inline u32 ConvertRGB5ToRGB8(u16 val)
{
return (((u32)val & 0x1F) << 3)
| (((u32)val & 0x3E0) << 6)
| (((u32)val & 0x7C00) << 9);
}
inline u32 ConvertRGB5ToBGR8(u16 val)
{
return (((u32)val & 0x1F) << 9)
| (((u32)val & 0x3E0) << 6)
| (((u32)val & 0x7C00) << 3);
}
inline u32 ConvertRGB5ToRGB6(u16 val)
{
return (((u32)val & 0x1F) << 1)
| (((u32)val & 0x3E0) << 4)
| (((u32)val & 0x7C00) << 7);
}
template <int outputFmt>
void ConvertCompressedTexture(u32 width, u32 height, u32* output, u8* texData, u8* texAuxData, u16* palData)
{
// we process a whole block at the time
for (int y = 0; y < height / 4; y++)
{
for (int x = 0; x < width / 4; x++)
{
u32 data = ((u32*)texData)[x + y * (width / 4)];
u16 auxData = ((u16*)texAuxData)[x + y * (width / 4)];
u32 paletteOffset = auxData & 0x3FFF;
u16 color0 = palData[paletteOffset*2] | 0x8000;
u16 color1 = palData[paletteOffset*2+1] | 0x8000;
u16 color2, color3;
switch ((auxData >> 14) & 0x3)
{
case 0:
color2 = palData[paletteOffset*2+2] | 0x8000;
color3 = 0;
break;
case 1:
{
u32 r0 = color0 & 0x001F;
u32 g0 = color0 & 0x03E0;
u32 b0 = color0 & 0x7C00;
u32 r1 = color1 & 0x001F;
u32 g1 = color1 & 0x03E0;
u32 b1 = color1 & 0x7C00;
u32 r = (r0 + r1) >> 1;
u32 g = ((g0 + g1) >> 1) & 0x03E0;
u32 b = ((b0 + b1) >> 1) & 0x7C00;
color2 = r | g | b | 0x8000;
}
color3 = 0;
break;
case 2:
color2 = palData[paletteOffset*2+2] | 0x8000;
color3 = palData[paletteOffset*2+3] | 0x8000;
break;
case 3:
{
u32 r0 = color0 & 0x001F;
u32 g0 = color0 & 0x03E0;
u32 b0 = color0 & 0x7C00;
u32 r1 = color1 & 0x001F;
u32 g1 = color1 & 0x03E0;
u32 b1 = color1 & 0x7C00;
u32 r = (r0*5 + r1*3) >> 3;
u32 g = ((g0*5 + g1*3) >> 3) & 0x03E0;
u32 b = ((b0*5 + b1*3) >> 3) & 0x7C00;
color2 = r | g | b | 0x8000;
}
{
u32 r0 = color0 & 0x001F;
u32 g0 = color0 & 0x03E0;
u32 b0 = color0 & 0x7C00;
u32 r1 = color1 & 0x001F;
u32 g1 = color1 & 0x03E0;
u32 b1 = color1 & 0x7C00;
u32 r = (r0*3 + r1*5) >> 3;
u32 g = ((g0*3 + g1*5) >> 3) & 0x03E0;
u32 b = ((b0*3 + b1*5) >> 3) & 0x7C00;
color3 = r | g | b | 0x8000;
}
break;
}
// in 2020 our default data types are big enough to be used as lookup tables...
u64 packed = color0 | ((u64)color1 << 16) | ((u64)color2 << 32) | ((u64)color3 << 48);
for (int j = 0; j < 4; j++)
{
for (int i = 0; i < 4; i++)
{
u16 color = (packed >> 16 * (data >> 2 * (i + j * 4))) & 0xFFFF;
u32 res;
switch (outputFmt)
{
case outputFmt_RGB6A5: res = ConvertRGB5ToRGB6(color)
| ((color & 0x8000) ? 0x1F000000 : 0); break;
case outputFmt_RGBA8: res = ConvertRGB5ToRGB8(color)
| ((color & 0x8000) ? 0xFF000000 : 0); break;
case outputFmt_BGRA8: res = ConvertRGB5ToBGR8(color)
| ((color & 0x8000) ? 0xFF000000 : 0); break;
}
output[x * 4 + i + (y * 4 + j) * width] = res;
}
}
}
}
}
template <int outputFmt>
void ConvertDirectColorTexture(u32 width, u32 height, u32* output, u8* texData)
{
u16* texData16 = (u16*)texData;
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
u16 color = texData16[x + y * width];
u32 res;
switch (outputFmt)
{
case outputFmt_RGB6A5: res = ConvertRGB5ToRGB6(color)
| ((color & 0x8000) ? 0x1F000000 : 0); break;
case outputFmt_RGBA8: res = ConvertRGB5ToRGB8(color)
| ((color & 0x8000) ? 0xFF000000 : 0); break;
case outputFmt_BGRA8: res = ConvertRGB5ToBGR8(color)
| ((color & 0x8000) ? 0xFF000000 : 0); break;
}
output[x + y * width] = res;
}
}
}
template <int outputFmt, int X, int Y>
void ConvertAXIYTexture(u32 width, u32 height, u32* output, u8* texData, u16* palData, bool color0Transparent)
{
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
u8 val = texData[x + y * width];
u32 idx = val & ((1 << Y) - 1);
u16 color = palData[idx];
u32 alpha = (val >> X) & ((1 << X) - 1);
if (X != 5)
alpha = alpha * 4 + alpha / 2;
if (color0Transparent && idx == 0)
alpha = 0;
u32 res;
switch (outputFmt)
{
case outputFmt_RGB6A5: res = ConvertRGB5ToRGB6(color) | alpha << 24; break;
// make sure full alpha == 255
case outputFmt_RGBA8: res = ConvertRGB5ToRGB8(color) | (alpha << 27 | (alpha & 0x1C) << 22); break;
case outputFmt_BGRA8: res = ConvertRGB5ToBGR8(color) | (alpha << 27 | (alpha & 0x1C) << 22); break;
}
output[x + y * width] = res;
}
}
}
template <int outputFmt, int colorBits>
void ConvertNColorsTexture(u32 width, u32 height, u32* output, u8* texData, u16* palData, bool color0Transparent)
{
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width / (8 / colorBits); x++)
{
u8 val = texData[x + y * (width / (8 / colorBits))];
for (int i = 0; i < 8 / colorBits; i++)
{
u32 index = (val >> (i * colorBits)) & ((1 << colorBits) - 1);
u16 color = palData[index];
bool transparent = color0Transparent && index == 0;
u32 res;
switch (outputFmt)
{
case outputFmt_RGB6A5: res = ConvertRGB5ToRGB6(color)
| (transparent ? 0 : 0x1F000000); break;
case outputFmt_RGBA8: res = ConvertRGB5ToRGB8(color)
| (transparent ? 0 : 0xFF000000); break;
case outputFmt_BGRA8: res = ConvertRGB5ToBGR8(color)
| (transparent ? 0 : 0xFF000000); break;
}
output[x * (8 / colorBits) + y * width + i] = res;
}
}
}
}
u8 UnpackBuffer[256*1024];
struct Texture
{
// 1 bit ^= 1 KB
u64 PaletteMask[2];
u64 TextureMask[8];
ExternalTexHandle Handle;
};
u64 PaletteCacheStatus;
u8 PaletteCache[128*1024];
u64 PaletteDirty[2];
u64 TexturesDirty[8];
std::unordered_map<u64, Texture> TextureCache;
int converted = 0;
int pixelsConverted = 0;
bool updatePalette = false;
bool copyTexture = false;
bool textureUpdated = false;
void Init()
{
}
void DeInit()
{
}
void Reset()
{
PaletteCacheStatus = 0;
memset(PaletteDirty, 0, 8*2);
memset(TexturesDirty, 0, 8*8);
TextureCache.clear();
}
u8* GetTexturePtr(u32 addr, u32 size, u8** unpackBuffer)
{
u32 map = GPU::VRAMMap_Texture[addr >> 17];
if (((addr + size) & ~0x1FFFF) == (addr & ~0x1FFFF) && map && (map & (map - 1)) == 0)
{
// fast path; inside a single block of vram and no overlapping blocks mapped
// no copying necessary
return GPU::VRAM[__builtin_ctz(map)] + (addr & 0x1FFFF);
}
else
{
copyTexture = true;
u8* buffer = *unpackBuffer;
*unpackBuffer += size;
for (int i = 0; i < size; i += 8)
{
*(u64*)&buffer[i] = GPU::ReadVRAM_Texture<u64>(addr + i);
}
return buffer;
}
}
void EnsurePaletteCoherent(u64 mask)
{
if ((PaletteCacheStatus & mask) != mask)
{
u32 updateField = ~PaletteCacheStatus & mask;
PaletteCacheStatus |= mask;
while (updateField != 0)
{
updatePalette = true;
int idx = __builtin_ctz(updateField);
u32 map = GPU::VRAMMap_TexPal[idx >> 3];
if (map && (map & (map - 1)) == 0)
{
u32 bank = __builtin_ctz(map);
memcpy(
PaletteCache + idx * 0x800,
GPU::VRAM[bank] + ((idx * 0x800) & GPU::VRAMMask[bank]),
0x800);
}
else
for (int i = 0; i < 0x800; i += 8)
*(u64*)&PaletteCache[idx * 0x800 + i] = GPU::ReadVRAM_TexPal<u64>(idx * 0x800 + i);
updateField &= ~(1 << idx);
}
}
}
void UpdateTextures()
{
converted = 0;
pixelsConverted = 0;
updatePalette = false;
copyTexture = false;
textureUpdated = false;
bool paletteDirty = PaletteDirty[0] | PaletteDirty[1];
bool textureDirty = false;
for (int i = 0; i < 8; i++)
textureDirty |= TexturesDirty[i] != 0;
if (paletteDirty || textureDirty)
{
textureUpdated = true;
for (auto it = TextureCache.begin(); it != TextureCache.end(); )
{
u64 dirty = (it->second.PaletteMask[0] & PaletteDirty[0])
| (it->second.PaletteMask[1] | PaletteDirty[1]);
for (int i = 0; i < 8; i++)
dirty |= it->second.TextureMask[i] & TexturesDirty[i];
if (dirty)
{
u32 width = 8 << ((it->first >> 20) & 0x7);
u32 height = 8 << ((it->first >> 23) & 0x7);
if (GPU3D::Renderer == 0)
SoftRenderer::FreeTexture(it->second.Handle, width, height);
it = TextureCache.erase(it);
}
else
it++;
}
memset(PaletteDirty, 0, 8*2);
memset(TexturesDirty, 0, 8*8);
}
}
inline u64 MakePaletteMask(u32 addr, u32 size)
{
return ((1ULL << (((addr + size + 0x7FF & ~0x7FF) >> 11) - (addr >> 11))) - 1) << (addr >> 11);
}
inline void MakeDirtyMask(u64* out, u32 addr, u32 size)
{
u32 start = addr >> 10;
u32 count = (((addr + size + 0x3FF) & ~0x3FF) >> 10) - start;
u32 firstIdx = start >> 6;
u32 indicesCount = (((count + 0x3F) & ~0x3F) >> 6) - firstIdx;
out[firstIdx] = (1ULL << (63 - (start & 0x3F))) - 1 << (start & 0x3F);
out[firstIdx + indicesCount - 1] = (1ULL << (start & 0x3F)) - 1;
for (int i = firstIdx + 1; i < firstIdx + indicesCount - 1; i++)
out[i] |= 0xFFFFFFFFFFFFFFFF;
}
template <int format>
ExternalTexHandle GetTexture(u32 texParam, u32 palBase)
{
u32 fmt = (texParam >> 26) & 0x7;
u32 addr = (texParam & 0xFFFF) << 3;
u32 width = 8 << ((texParam >> 20) & 0x7);
u32 height = 8 << ((texParam >> 23) & 0x7);
if (fmt == 7)
palBase = 0;
u64 key = (u64)(texParam & 0x3FF0FFFF) | ((u64)palBase << 32);
auto lookup = TextureCache.emplace(std::make_pair(key, Texture()));
bool inserted = lookup.second;
Texture& texture = lookup.first->second;
if (inserted)
{
converted++;
pixelsConverted += width * height;
u8* unpackBuffer = UnpackBuffer;
u32* data = GPU3D::Renderer == 0
? SoftRenderer::AllocateTexture(&texture.Handle, width, height)
: NULL;
memset(texture.TextureMask, 0, 8*8);
memset(texture.PaletteMask, 0, 8*2);
if (fmt == 7)
{
u8* texData = GetTexturePtr(addr, width*height*2, &unpackBuffer);
MakeDirtyMask(texture.TextureMask, addr, width*height*2);
ConvertDirectColorTexture<format>(width, height, data, texData);
}
else if (fmt == 5)
{
u8* texData = GetTexturePtr(addr, width*height/16*4, &unpackBuffer);
u32 slot1addr = 0x20000 + ((addr & 0x1FFFC) >> 1);
if (addr >= 0x40000)
slot1addr += 0x10000;
u8* texAuxData = GetTexturePtr(slot1addr, width*height/16*2, &unpackBuffer);
MakeDirtyMask(texture.TextureMask, addr, width*height/16*4);
MakeDirtyMask(texture.TextureMask, slot1addr, width*height/16*2);
MakeDirtyMask(texture.PaletteMask, palBase*16, 0x10000);
u64 paletteMask = MakePaletteMask(palBase*16, 0x10000);
EnsurePaletteCoherent(MakePaletteMask(palBase*16, 0x10000));
u16* palData = (u16*)(PaletteCache + palBase*16);
ConvertCompressedTexture<format>(width, height, data, texData, texAuxData, palData);
}
else
{
u32 texSize, palAddr = palBase*16, palSize;
switch (fmt)
{
case 1: texSize = width*height; palSize = 32; break;
case 6: texSize = width*height; palSize = 8; break;
case 2: texSize = width*height/4; palSize = 4; palAddr >>= 1; break;
case 3: texSize = width*height/2; palSize = 16; break;
case 4: texSize = width*height; palSize = 256; break;
}
u8* texData = GetTexturePtr(addr, texSize, &unpackBuffer);
EnsurePaletteCoherent(MakePaletteMask(palAddr, palSize*2));
u16* palData = (u16*)(PaletteCache + palAddr);
MakeDirtyMask(texture.TextureMask, addr, texSize);
MakeDirtyMask(texture.PaletteMask, palAddr, palSize);
bool color0Transparent = texParam & (1 << 29);
switch (fmt)
{
case 1: ConvertAXIYTexture<format, 3, 5>(width, height, data, texData, palData, color0Transparent); break;
case 6: ConvertAXIYTexture<format, 5, 3>(width, height, data, texData, palData, color0Transparent); break;
case 2: ConvertNColorsTexture<format, 2>(width, height, data, texData, palData, color0Transparent); break;
case 3: ConvertNColorsTexture<format, 4>(width, height, data, texData, palData, color0Transparent); break;
case 4: ConvertNColorsTexture<format, 8>(width, height, data, texData, palData, color0Transparent); break;
}
}
if (GPU3D::Renderer == 1)
{}
}
return texture.Handle;
}
std::unordered_set<XXH64_hash_t> AlreadySavedTextures;
void SaveTextures()
{
/*for (auto texture : TextureCache)
{
u32 width = 8 << ((texture.first >> 20) & 0x7);
u32 height = 8 << ((texture.first >> 23) & 0x7);
u32* data = &TextureMem[__builtin_ctz(width*height)-6].Pixels[texture.second.Index];
XXH64_hash_t hash = XXH3_64bits(data, width*height*4);
if (AlreadySavedTextures.count(hash) == 0)
{
char filename[128];
sprintf(filename, "./textures/%016x.png", hash);
stbi_write_png(filename, width, height, 4, data, width*4);
AlreadySavedTextures.insert(hash);
}
}*/
//printf("%d %d textures converted %d pixels %d %d %d\n", converted, TextureCache.size(), pixelsConverted, updatePalette, copyTexture, textureUpdated);
}
void InvalidateTexSlot(u32 base)
{
TexturesDirty[(base << 1)] = 0xFFFFFFFFFFFFFFFF;
TexturesDirty[(base << 1) + 1] = 0xFFFFFFFFFFFFFFFF;
}
void InvalidatePalSlot(u32 base)
{
PaletteDirty[base >> 2] |= 0xFFFF << (base & 0x3) * 16;
PaletteCacheStatus &= ~(0xFF << base * 8);
}
}
}
template GPU3D::TexCache::ExternalTexHandle
GPU3D::TexCache::GetTexture<GPU3D::TexCache::outputFmt_RGB6A5>(u32, u32);
template GPU3D::TexCache::ExternalTexHandle
GPU3D::TexCache::GetTexture<GPU3D::TexCache::outputFmt_RGBA8>(u32, u32);
template GPU3D::TexCache::ExternalTexHandle
GPU3D::TexCache::GetTexture<GPU3D::TexCache::outputFmt_BGRA8>(u32, u32);

View File

@ -20,6 +20,7 @@
#include <time.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <SDL2/SDL.h>
#include "libui/ui.h"

2
src/stb/stb.cpp Normal file
View File

@ -0,0 +1,2 @@
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image_write.h"

1666
src/stb/stb_image_write.h Normal file

File diff suppressed because it is too large Load Diff

1754
src/xxhash/xxh3.h Normal file

File diff suppressed because it is too large Load Diff

42
src/xxhash/xxhash.c Normal file
View File

@ -0,0 +1,42 @@
/*
* xxHash - Fast Hash algorithm
* Copyright (C) 2012-2016, Yann Collet
*
* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* You can contact the author at :
* - xxHash homepage: http://www.xxhash.com
* - xxHash source repository : https://github.com/Cyan4973/xxHash
*/
/* xxhash.c instantiates functions defined in xxhash.h
*/
#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
#define XXH_IMPLEMENTATION /* access definitions */
#include "xxhash.h"

1833
src/xxhash/xxhash.h Normal file

File diff suppressed because it is too large Load Diff