From 7449230af8f88e84f861bf17dd8b6690e0f93383 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Wed, 8 Jul 2020 18:17:15 +0200 Subject: [PATCH] handle some palette textures on the GPU get rid of PixerPacker templates no need for GLSL precision qualifier in GL4 --- core/rend/TexCache.cpp | 98 +++++++++---- core/rend/TexCache.h | 204 ++++++++++++++------------ core/rend/gl4/gl4.h | 12 +- core/rend/gl4/gldraw.cpp | 18 ++- core/rend/gl4/gles.cpp | 124 ++++++++++------ core/rend/gles/gldraw.cpp | 16 +- core/rend/gles/gles.cpp | 114 ++++++++++++-- core/rend/gles/gles.h | 12 +- core/rend/gles/gltex.cpp | 21 ++- core/rend/vulkan/drawer.cpp | 23 ++- core/rend/vulkan/drawer.h | 2 +- core/rend/vulkan/oit/oit_drawer.cpp | 15 +- core/rend/vulkan/oit/oit_drawer.h | 2 +- core/rend/vulkan/oit/oit_pipeline.cpp | 3 +- core/rend/vulkan/oit/oit_pipeline.h | 24 ++- core/rend/vulkan/oit/oit_renderer.cpp | 26 +++- core/rend/vulkan/oit/oit_shaders.cpp | 30 +++- core/rend/vulkan/oit/oit_shaders.h | 3 +- core/rend/vulkan/pipeline.cpp | 1 + core/rend/vulkan/pipeline.h | 18 ++- core/rend/vulkan/shaders.cpp | 23 ++- core/rend/vulkan/shaders.h | 4 +- core/rend/vulkan/vulkan_renderer.cpp | 26 +++- 23 files changed, 598 insertions(+), 221 deletions(-) diff --git a/core/rend/TexCache.cpp b/core/rend/TexCache.cpp index 84d0a310f..0b25543c1 100644 --- a/core/rend/TexCache.cpp +++ b/core/rend/TexCache.cpp @@ -21,6 +21,7 @@ u32 palette16_ram[1024]; u32 palette32_ram[1024]; u32 pal_hash_256[4]; u32 pal_hash_16[64]; +bool palette_updated; // Rough approximation of LoD bias from D adjust param, only used to increase LoD const std::array D_Adjust_LoD_Bias = { @@ -86,8 +87,8 @@ void palette_update() { if (!pal_needs_update) return; - - pal_needs_update=false; + pal_needs_update = false; + palette_updated = true; switch(PAL_RAM_CTRL&3) { @@ -313,18 +314,20 @@ struct PvrTexInfo TexConvFP32 *PL32; TexConvFP32 *TW32; TexConvFP32 *VQ32; + // Conversion to 8 bpp (palette) + TexConvFP8 *TW8; }; static const PvrTexInfo format[8] = -{ // name bpp Final format Planar Twiddled VQ Planar(32b) Twiddled(32b) VQ (32b) - {"1555", 16, TextureType::_5551, tex1555_PL, tex1555_TW, tex1555_VQ, tex1555_PL32, tex1555_TW32, tex1555_VQ32 }, //1555 - {"565", 16, TextureType::_565, tex565_PL, tex565_TW, tex565_VQ, tex565_PL32, tex565_TW32, tex565_VQ32 }, //565 - {"4444", 16, TextureType::_4444, tex4444_PL, tex4444_TW, tex4444_VQ, tex4444_PL32, tex4444_TW32, tex4444_VQ32 }, //4444 - {"yuv", 16, TextureType::_8888, NULL, NULL, NULL, texYUV422_PL, texYUV422_TW, texYUV422_VQ }, //yuv - {"bumpmap", 16, TextureType::_4444, texBMP_PL, texBMP_TW, texBMP_VQ, tex4444_PL32, tex4444_TW32, tex4444_VQ32 }, //bump map - {"pal4", 4, TextureType::_5551, 0, texPAL4_TW, texPAL4_VQ, NULL, texPAL4_TW32, texPAL4_VQ32 }, //pal4 - {"pal8", 8, TextureType::_5551, 0, texPAL8_TW, texPAL8_VQ, NULL, texPAL8_TW32, texPAL8_VQ32 }, //pal8 - {"ns/1555", 0}, // Not supported (1555) +{ // name bpp Final format Planar Twiddled VQ Planar(32b) Twiddled(32b) VQ (32b) Palette (8b) + {"1555", 16, TextureType::_5551, tex1555_PL, tex1555_TW, tex1555_VQ, tex1555_PL32, tex1555_TW32, tex1555_VQ32, nullptr }, //1555 + {"565", 16, TextureType::_565, tex565_PL, tex565_TW, tex565_VQ, tex565_PL32, tex565_TW32, tex565_VQ32, nullptr }, //565 + {"4444", 16, TextureType::_4444, tex4444_PL, tex4444_TW, tex4444_VQ, tex4444_PL32, tex4444_TW32, tex4444_VQ32, nullptr }, //4444 + {"yuv", 16, TextureType::_8888, nullptr, nullptr, nullptr, texYUV422_PL, texYUV422_TW, texYUV422_VQ, nullptr }, //yuv + {"bumpmap", 16, TextureType::_4444, texBMP_PL, texBMP_TW, texBMP_VQ, tex4444_PL32, tex4444_TW32, tex4444_VQ32, nullptr }, //bump map + {"pal4", 4, TextureType::_5551, nullptr, texPAL4_TW, texPAL4_VQ, nullptr, texPAL4_TW32, texPAL4_VQ32, texPAL4PT_TW }, //pal4 + {"pal8", 8, TextureType::_5551, nullptr, texPAL8_TW, texPAL8_VQ, nullptr, texPAL8_TW32, texPAL8_VQ32, texPAL8PT_TW }, //pal8 + {"ns/1555", 0}, // Not supported (1555) }; static const u32 VQMipPoint[11] = @@ -363,20 +366,20 @@ static const TextureType PAL_TYPE[4] = { void BaseTextureCacheData::PrintTextureName() { char str[512]; - sprintf(str, "Texture: %s ", GetPixelFormatName()); + sprintf(str, "Texture: %s", GetPixelFormatName()); if (tcw.VQ_Comp) strcat(str, " VQ"); - - if (tcw.ScanOrder==0) + else if (tcw.ScanOrder == 0) strcat(str, " TW"); - - if (tcw.MipMapped) - strcat(str, " MM"); - - if (tcw.StrideSel) + else if (tcw.StrideSel) strcat(str, " Stride"); + if (tcw.ScanOrder == 0 && tcw.MipMapped) + strcat(str, " MM"); + if (tsp.FilterMode != 0) + strcat(str, " Bilinear"); + sprintf(str + strlen(str), " %dx%d @ 0x%X", 8 << tsp.TexU, 8 << tsp.TexV, tcw.TexAddr << 3); std::string id = GetId(); sprintf(str + strlen(str), " id=%s", id.c_str()); @@ -385,9 +388,15 @@ void BaseTextureCacheData::PrintTextureName() //true if : dirty or paletted texture and hashes don't match bool BaseTextureCacheData::NeedsUpdate() { - bool rc = dirty - || (tcw.PixelFmt == PixelPal4 && palette_hash != pal_hash_16[tcw.PalSelect]) - || (tcw.PixelFmt == PixelPal8 && palette_hash != pal_hash_256[tcw.PalSelect >> 4]); + bool rc = dirty; + if (tex_type != TextureType::_8) + { + if (tcw.PixelFmt == PixelPal4 && palette_hash != pal_hash_16[tcw.PalSelect]) + rc = true; + else if (tcw.PixelFmt == PixelPal8 && palette_hash != pal_hash_256[tcw.PalSelect >> 4]) + rc = true; + } + return rc; } @@ -433,12 +442,22 @@ void BaseTextureCacheData::Create() else if (tex->bpp == 8) palette_index = (tcw.PalSelect >> 4) << 8; + texconv8 = nullptr; + if (tcw.ScanOrder && (tex->PL || tex->PL32)) { //Texture is stored 'planar' in memory, no deswizzle is needed //verify(tcw.VQ_Comp==0); if (tcw.VQ_Comp != 0) + { WARN_LOG(RENDERER, "Warning: planar texture with VQ set (invalid)"); + tcw.VQ_Comp = 0; + } + if (tcw.MipMapped != 0) + { + WARN_LOG(RENDERER, "Warning: planar texture with mipmaps (invalid)"); + tcw.MipMapped = 0; + } //Planar textures support stride selection, mostly used for non power of 2 textures (videos) int stride = 0; @@ -455,6 +474,8 @@ void BaseTextureCacheData::Create() } else { + tcw.ScanOrder = 0; + tcw.StrideSel = 0; // Quake 3 Arena uses one if (tcw.MipMapped) // Mipmapped texture must be square and TexV is ignored @@ -478,6 +499,7 @@ void BaseTextureCacheData::Create() texconv = tex->TW; texconv32 = tex->TW32; size = w * h * tex->bpp / 8; + texconv8 = tex->TW8; } } } @@ -502,9 +524,14 @@ void BaseTextureCacheData::Update() bool has_alpha = false; if (IsPaletted()) { - tex_type = PAL_TYPE[PAL_RAM_CTRL&3]; - if (tex_type != TextureType::_565) - has_alpha = true; + if (IsGpuHandledPaletted(tsp, tcw)) + tex_type = TextureType::_8; + else + { + tex_type = PAL_TYPE[PAL_RAM_CTRL&3]; + if (tex_type != TextureType::_565) + has_alpha = true; + } // Get the palette hash to check for future updates if (tcw.PixelFmt == PixelPal4) @@ -547,6 +574,7 @@ void BaseTextureCacheData::Update() PixelBuffer pb16; PixelBuffer pb32; + PixelBuffer pb8; // Figure out if we really need to use a 32-bit pixel buffer bool textureUpscaling = settings.rend.TextureUpscale > 1 @@ -623,6 +651,26 @@ void BaseTextureCacheData::Update() } temp_tex_buffer = pb32.data(); } + else if (texconv8 != NULL && tex_type == TextureType::_8) + { + if (mipmapped) + { + pb8.init(w, h, true); + for (u32 i = 0; i <= tsp.TexU + 3u; i++) + { + pb8.set_mipmap(i); + u32 vram_addr = sa_tex + OtherMipPoint[i] * tex->bpp / 8; + texconv8(&pb8, &vram[vram_addr], 1 << i, 1 << i); + } + pb8.set_mipmap(0); + } + else + { + pb8.init(w, h); + texconv8(&pb8, &vram[sa], stride, h); + } + temp_tex_buffer = pb8.data(); + } else if (texconv != NULL) { if (mipmapped) diff --git a/core/rend/TexCache.h b/core/rend/TexCache.h index 1bba17247..4575242cd 100644 --- a/core/rend/TexCache.h +++ b/core/rend/TexCache.h @@ -16,6 +16,7 @@ extern bool pal_needs_update,fog_needs_update; extern u32 pal_hash_256[4]; extern u32 pal_hash_16[64]; extern bool KillTex; +extern bool palette_updated; extern u32 detwiddle[2][11][1024]; @@ -151,47 +152,22 @@ void palette_update(); #define ARGB8888_32( word ) ( ((word >> 0) & 0xFF000000) | (((word >> 16) & 0xFF) << 0) | (((word >> 8) & 0xFF) << 8) | ((word & 0xFF) << 16) ) -template -__forceinline u32 YUV422(s32 Y,s32 Yu,s32 Yv) +inline static u32 YUV422(s32 Y,s32 Yu,s32 Yv) { Yu-=128; Yv-=128; - //s32 B = (76283*(Y - 16) + 132252*(Yu - 128))>>16; - //s32 G = (76283*(Y - 16) - 53281 *(Yv - 128) - 25624*(Yu - 128))>>16; - //s32 R = (76283*(Y - 16) + 104595*(Yv - 128))>>16; - s32 R = Y + Yv*11/8; // Y + (Yv-128) * (11/8) ? s32 G = Y - (Yu*11 + Yv*22)/32; // Y - (Yu-128) * (11/8) * 0.25 - (Yv-128) * (11/8) * 0.5 ? s32 B = Y + Yu*110/64; // Y + (Yu-128) * (11/8) * 1.25 ? - return PixelPacker::packRGB(clamp(0,255,R),clamp(0,255,G),clamp(0,255,B)); + return clamp(0, 255, R) | (clamp(0, 255, G) << 8) | (clamp(0, 255, B) << 16) | 0xFF000000; } #define twop(x,y,bcx,bcy) (detwiddle[0][bcy][x]+detwiddle[1][bcx][y]) -//pixel packers ! -struct pp_565 -{ - __forceinline static u32 packRGB(u8 R,u8 G,u8 B) - { - R>>=3; - G>>=2; - B>>=3; - return (R<<11) | (G<<5) | (B<<0); - } -}; - -struct pp_8888 -{ - __forceinline static u32 packRGB(u8 R,u8 G,u8 B) - { - return (R << 0) | (G << 8) | (B << 16) | 0xFF000000; - } -}; - //pixel convertors ! -#define pixelcvt_start_base(name,x,y,type) template \ +#define pixelcvt_start_base(name,x,y,type) \ struct name \ { \ static const u32 xpp=x;\ @@ -202,7 +178,7 @@ struct pp_8888 #define pixelcvt_start(name,x,y) pixelcvt_start_base(name, x, y, u16) #define pixelcvt32_start(name,x,y) pixelcvt_start_base(name, x, y, u32) -#define pixelcvt_size_start(name, x, y) template \ +#define pixelcvt_size_start(name, x, y) template \ struct name \ { \ static const u32 xpp=x;\ @@ -312,9 +288,9 @@ pixelcvt32_start(convYUV_PL,4,1) s32 Yv = (p_in[0]>>16) &255; //p_in[2] //0,0 - pb->prel(0,YUV422(Y0,Yu,Yv)); + pb->prel(0,YUV422(Y0,Yu,Yv)); //1,0 - pb->prel(1,YUV422(Y1,Yu,Yv)); + pb->prel(1,YUV422(Y1,Yu,Yv)); //next 4 bytes p_in+=1; @@ -325,9 +301,9 @@ pixelcvt32_start(convYUV_PL,4,1) Yv = (p_in[0]>>16) &255; //p_in[2] //0,0 - pb->prel(2,YUV422(Y0,Yu,Yv)); + pb->prel(2,YUV422(Y0,Yu,Yv)); //1,0 - pb->prel(3,YUV422(Y1,Yu,Yv)); + pb->prel(3,YUV422(Y1,Yu,Yv)); } pixelcvt_end; @@ -432,9 +408,9 @@ pixelcvt32_start(convYUV_TW,2,2) s32 Yv = (p_in[2]>>0) &255; //p_in[2] //0,0 - pb->prel(0,0,YUV422(Y0,Yu,Yv)); + pb->prel(0,0,YUV422(Y0,Yu,Yv)); //1,0 - pb->prel(1,0,YUV422(Y1,Yu,Yv)); + pb->prel(1,0,YUV422(Y1,Yu,Yv)); //next 4 bytes //p_in+=2; @@ -445,9 +421,9 @@ pixelcvt32_start(convYUV_TW,2,2) Yv = (p_in[3]>>0) &255; //p_in[2] //0,1 - pb->prel(0,1,YUV422(Y0,Yu,Yv)); + pb->prel(0,1,YUV422(Y0,Yu,Yv)); //1,1 - pb->prel(1,1,YUV422(Y1,Yu,Yv)); + pb->prel(1,1,YUV422(Y1,Yu,Yv)); } pixelcvt_end; @@ -479,6 +455,33 @@ pixelcvt_size_start(convPAL4_TW,4,4) } pixelcvt_end; +// Palette 4bpp -> 8bpp +pixelcvt_size_start(convPAL4PT_TW, 4, 4) +{ + u8* p_in = (u8 *)data; + + pb->prel(0, 0, p_in[0] & 0xF); + pb->prel(0, 1, (p_in[0] >> 4) & 0xF); p_in++; + pb->prel(1, 0, p_in[0] & 0xF); + pb->prel(1, 1, (p_in[0] >> 4) & 0xF); p_in++; + + pb->prel(0, 2, p_in[0] & 0xF); + pb->prel(0, 3, (p_in[0] >> 4) & 0xF); p_in++; + pb->prel(1, 2, p_in[0] & 0xF); + pb->prel(1, 3, (p_in[0] >> 4) & 0xF); p_in++; + + pb->prel(2, 0, p_in[0] & 0xF); + pb->prel(2, 1, (p_in[0] >> 4) & 0xF); p_in++; + pb->prel(3, 0, p_in[0] & 0xF); + pb->prel(3, 1, (p_in[0] >> 4) & 0xF); p_in++; + + pb->prel(2, 2, p_in[0] & 0xF); + pb->prel(2, 3, (p_in[0] >> 4) & 0xF); p_in++; + pb->prel(3, 2, p_in[0] & 0xF); + pb->prel(3, 3, (p_in[0] >> 4) & 0xF); p_in++; +} +pixelcvt_end; + pixelcvt_size_start(convPAL8_TW,2,4) { u8* p_in=(u8*)data; @@ -496,6 +499,24 @@ pixelcvt_size_start(convPAL8_TW,2,4) } pixelcvt_end; +// Palette 8bpp -> 8bpp (untwiddle only) +pixelcvt_size_start(convPAL8PT_TW, 2, 4) +{ + u8* p_in = (u8 *)data; + + pb->prel(0, 0, p_in[0]); p_in++; + pb->prel(0, 1, p_in[0]); p_in++; + pb->prel(1, 0, p_in[0]); p_in++; + pb->prel(1, 1, p_in[0]); p_in++; + + pb->prel(0, 2, p_in[0]); p_in++; + pb->prel(0, 3, p_in[0]); p_in++; + pb->prel(1, 2, p_in[0]); p_in++; + pb->prel(1, 3, p_in[0]); p_in++; +} +pixelcvt_end; + + //handler functions template void texture_PL(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height) @@ -565,74 +586,51 @@ void texture_VQ(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height) } } -//We ask the compiler to generate the templates here -//;) -//planar formats ! -template void texture_PL, u16>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -template void texture_PL, u16>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -template void texture_PL, u16>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -template void texture_PL, u32>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); - -//twiddled formats ! -template void texture_TW, u16>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -template void texture_TW, u16>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -template void texture_TW, u16>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -template void texture_TW, u32>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); - -template void texture_TW, u16>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -template void texture_TW, u16>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -template void texture_TW, u32>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -template void texture_TW, u32>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); - -//VQ formats ! -template void texture_VQ, u16>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -template void texture_VQ, u16>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -template void texture_VQ, u16>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -template void texture_VQ, u32>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); - //Planar -#define tex565_PL texture_PL, u16> -#define tex1555_PL texture_PL, u16> -#define tex4444_PL texture_PL, u16> -#define texYUV422_PL texture_PL, u32> +#define tex565_PL texture_PL +#define tex1555_PL texture_PL +#define tex4444_PL texture_PL +#define texYUV422_PL texture_PL #define texBMP_PL tex4444_PL -#define tex565_PL32 texture_PL, u32> -#define tex1555_PL32 texture_PL, u32> -#define tex4444_PL32 texture_PL, u32> +#define tex565_PL32 texture_PL +#define tex1555_PL32 texture_PL +#define tex4444_PL32 texture_PL //Twiddle -#define tex565_TW texture_TW, u16> -#define tex1555_TW texture_TW, u16> -#define tex4444_TW texture_TW, u16> -#define texYUV422_TW texture_TW, u32> +#define tex565_TW texture_TW +#define tex1555_TW texture_TW +#define tex4444_TW texture_TW +#define texYUV422_TW texture_TW #define texBMP_TW tex4444_TW -#define texPAL4_TW texture_TW, u16> -#define texPAL8_TW texture_TW, u16> -#define texPAL4_TW32 texture_TW, u32> -#define texPAL8_TW32 texture_TW, u32> +#define texPAL4_TW texture_TW, u16> +#define texPAL8_TW texture_TW, u16> +#define texPAL4_TW32 texture_TW, u32> +#define texPAL8_TW32 texture_TW, u32> +#define texPAL4PT_TW texture_TW, u8> +#define texPAL8PT_TW texture_TW, u8> -#define tex565_TW32 texture_TW, u32> -#define tex1555_TW32 texture_TW, u32> -#define tex4444_TW32 texture_TW, u32> +#define tex565_TW32 texture_TW +#define tex1555_TW32 texture_TW +#define tex4444_TW32 texture_TW //VQ -#define tex565_VQ texture_VQ, u16> -#define tex1555_VQ texture_VQ, u16> -#define tex4444_VQ texture_VQ, u16> -#define texYUV422_VQ texture_VQ, u32> +#define tex565_VQ texture_VQ +#define tex1555_VQ texture_VQ +#define tex4444_VQ texture_VQ +#define texYUV422_VQ texture_VQ #define texBMP_VQ tex4444_VQ // According to the documentation, a texture cannot be compressed and use // a palette at the same time. However the hardware displays them // just fine. -#define texPAL4_VQ texture_VQ, u16> -#define texPAL8_VQ texture_VQ, u16> +#define texPAL4_VQ texture_VQ, u16> +#define texPAL8_VQ texture_VQ, u16> -#define tex565_VQ32 texture_VQ, u32> -#define tex1555_VQ32 texture_VQ, u32> -#define tex4444_VQ32 texture_VQ, u32> -#define texPAL4_VQ32 texture_VQ, u32> -#define texPAL8_VQ32 texture_VQ, u32> +#define tex565_VQ32 texture_VQ +#define tex1555_VQ32 texture_VQ +#define tex4444_VQ32 texture_VQ +#define texPAL4_VQ32 texture_VQ, u32> +#define texPAL8_VQ32 texture_VQ, u32> bool VramLockedWriteOffset(size_t offset); void UpscalexBRZ(int factor, u32* source, u32* dest, int width, int height, bool has_alpha); @@ -640,6 +638,7 @@ void UpscalexBRZ(int factor, u32* source, u32* dest, int width, int height, bool struct PvrTexInfo; template class PixelBuffer; typedef void TexConvFP(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); +typedef void TexConvFP8(PixelBuffer* pb, u8* p_in, u32 Width, u32 Height); typedef void TexConvFP32(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); enum class TextureType { _565, _5551, _4444, _8888, _8 }; @@ -660,8 +659,9 @@ public: u32 size; //size, in bytes, in vram const PvrTexInfo* tex; - TexConvFP* texconv; - TexConvFP32* texconv32; + TexConvFP *texconv; + TexConvFP32 *texconv32; + TexConvFP8 *texconv8; u32 dirty; vram_block* lock_block; @@ -722,6 +722,18 @@ public: bool NeedsUpdate(); virtual bool Delete(); virtual ~BaseTextureCacheData() {} + static bool IsGpuHandledPaletted(TSP tsp, TCW tcw) + { + // Some palette textures are handled on the GPU + // This is currently limited to textures using nearest filtering and not mipmapped. + // Enabling texture upscaling or dumping also disables this mode. + return (tcw.PixelFmt == PixelPal4 || tcw.PixelFmt == PixelPal8) + && settings.rend.TextureUpscale == 1 + && !settings.rend.DumpTextures + && tsp.FilterMode == 0 + && !tcw.MipMapped + && !tcw.VQ_Comp; + } }; template @@ -732,12 +744,13 @@ public: Texture *getTextureCacheData(TSP tsp, TCW tcw) { u64 key = tsp.full & TSPTextureCacheMask.full; - if (tcw.PixelFmt == PixelPal4 || tcw.PixelFmt == PixelPal8) + if ((tcw.PixelFmt == PixelPal4 || tcw.PixelFmt == PixelPal8) + && !BaseTextureCacheData::IsGpuHandledPaletted(tsp, tcw)) // Paletted textures have a palette selection that must be part of the key // We also add the palette type to the key to avoid thrashing the cache // when the palette type is changed. If the palette type is changed back in the future, // this texture will stil be available. - key |= ((u64)tcw.full << 32) | ((PAL_RAM_CTRL & 3) << 6); + key |= ((u64)tcw.full << 32) | ((PAL_RAM_CTRL & 3) << 6) | ((tsp.FilterMode != 0) << 8); else key |= (u64)(tcw.full & TCWTextureCacheMask.full) << 32; @@ -817,6 +830,7 @@ static inline void MakeFogTexture(u8 *tex_data) tex_data[i + 128] = fog_table[i * 4 + 1]; } } + void dump_screenshot(u8 *buffer, u32 width, u32 height, bool alpha = false, u32 rowPitch = 0, bool invertY = true); extern const std::array D_Adjust_LoD_Bias; diff --git a/core/rend/gl4/gl4.h b/core/rend/gl4/gl4.h index d93b57ea1..228387178 100755 --- a/core/rend/gl4/gl4.h +++ b/core/rend/gl4/gl4.h @@ -25,6 +25,7 @@ struct gl4PipelineShader GLint trilinear_alpha; GLint fog_clamp_min, fog_clamp_max; GLint normal_matrix; + GLint palette_index; bool cp_AlphaTest; bool pp_InsideClipping; @@ -39,6 +40,7 @@ struct gl4PipelineShader bool pp_Gouraud; bool pp_BumpMap; bool fog_clamping; + bool palette; }; @@ -90,8 +92,8 @@ extern GLuint depth_fbo; \n\ layout(r32ui, binding = 4) uniform coherent restrict uimage2D abufferPointerImg; \n\ struct Pixel { \n\ - highp vec4 color; \n\ - highp float depth; \n\ + vec4 color; \n\ + float depth; \n\ uint seq_num; \n\ uint next; \n\ }; \n\ @@ -122,7 +124,7 @@ uint getNextPixelIndex() \n\ \n\ void setFragDepth(void) \n\ { \n\ - highp float w = 100000.0 * gl_FragCoord.w; \n\ + float w = 100000.0 * gl_FragCoord.w; \n\ gl_FragDepth = log2(1.0 + w) / 34.0; \n\ } \n\ struct PolyParam { \n\ @@ -255,6 +257,7 @@ extern struct gl4ShaderUniforms_t int width; int height; } base_clipping; + float palette_index; void setUniformArray(GLint location, int v0, int v1) { @@ -309,6 +312,9 @@ extern struct gl4ShaderUniforms_t if (s->normal_matrix != -1) glUniformMatrix4fv(s->normal_matrix, 1, GL_FALSE, &normal_mat[0][0]); + + if (s->palette_index != -1) + glUniform1f(s->palette_index, palette_index); } } gl4ShaderUniforms; diff --git a/core/rend/gl4/gldraw.cpp b/core/rend/gl4/gldraw.cpp index bd45a664c..431f04992 100644 --- a/core/rend/gl4/gldraw.cpp +++ b/core/rend/gl4/gldraw.cpp @@ -24,7 +24,8 @@ GLuint depthSaveTexId; static gl4PipelineShader *gl4GetProgram(bool cp_AlphaTest, bool pp_InsideClipping, bool pp_Texture, bool pp_UseAlpha, bool pp_IgnoreTexA, u32 pp_ShadInstr, bool pp_Offset, - u32 pp_FogCtrl, bool pp_TwoVolumes, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping, Pass pass) + u32 pp_FogCtrl, bool pp_TwoVolumes, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping, + bool palette, Pass pass) { u32 rv=0; @@ -40,6 +41,7 @@ static gl4PipelineShader *gl4GetProgram(bool cp_AlphaTest, bool pp_InsideClippin rv <<= 1; rv |= pp_Gouraud; rv <<= 1; rv |= pp_BumpMap; rv <<= 1; rv |= fog_clamping; + rv <<= 1; rv |= palette; rv <<= 2; rv |= (int)pass; gl4PipelineShader *shader = &gl4.shaders[rv]; @@ -57,6 +59,7 @@ static gl4PipelineShader *gl4GetProgram(bool cp_AlphaTest, bool pp_InsideClippin shader->pp_Gouraud = pp_Gouraud; shader->pp_BumpMap = pp_BumpMap; shader->fog_clamping = fog_clamping; + shader->palette = palette; shader->pass = pass; gl4CompilePipelineShader(shader); } @@ -99,6 +102,7 @@ static void SetGPState(const PolyParam* gp) int clip_rect[4] = {}; TileClipping clipmode = GetTileClip(gp->tileclip, ViewportMatrix, clip_rect); + bool palette = false; if (pass == Pass::Depth) { @@ -114,6 +118,7 @@ static void SetGPState(const PolyParam* gp) false, false, false, + false, pass); } else @@ -123,6 +128,7 @@ static void SetGPState(const PolyParam* gp) bool color_clamp = gp->tsp.ColorClamp && (pvrrc.fog_clamp_min != 0 || pvrrc.fog_clamp_max != 0xffffffff); int fog_ctrl = settings.rend.Fog ? gp->tsp.FogCtrl : 2; + palette = BaseTextureCacheData::IsGpuHandledPaletted(gp->tsp, gp->tcw); CurrentShader = gl4GetProgram(Type == ListType_Punch_Through ? 1 : 0, clipmode == TileClipping::Inside, @@ -136,10 +142,19 @@ static void SetGPState(const PolyParam* gp) gp->pcw.Gouraud, gp->tcw.PixelFmt == PixelBumpMap, color_clamp, + palette, pass); } glcache.UseProgram(CurrentShader->program); + if (palette) + { + if (gp->tcw.PixelFmt == PixelPal4) + gl4ShaderUniforms.palette_index = float(gp->tcw.PalSelect << 4) / 1023.f; + else + gl4ShaderUniforms.palette_index = float((gp->tcw.PalSelect >> 4) << 8) / 1023.f; + } + gl4ShaderUniforms.tsp0 = gp->tsp; gl4ShaderUniforms.tsp1 = gp->tsp1; gl4ShaderUniforms.tcw0 = gp->tcw; @@ -671,6 +686,7 @@ static void gl4_draw_quad_texture(GLuint texture, float w, float h) false, false, false, + false, Pass::Color); glcache.UseProgram(CurrentShader->program); gl4ShaderUniforms.Set(CurrentShader); diff --git a/core/rend/gl4/gles.cpp b/core/rend/gl4/gles.cpp index db12ad55f..5d0feffa6 100644 --- a/core/rend/gl4/gles.cpp +++ b/core/rend/gl4/gles.cpp @@ -14,23 +14,23 @@ static const char* VertexShaderSource = R"(#version 140 #endif /* Vertex constants*/ -uniform highp vec4 scale; -uniform highp mat4 normal_matrix; +uniform vec4 scale; +uniform mat4 normal_matrix; /* Vertex input */ -in highp vec4 in_pos; -in lowp vec4 in_base; -in lowp vec4 in_offs; -in mediump vec2 in_uv; -in lowp vec4 in_base1; -in lowp vec4 in_offs1; -in mediump vec2 in_uv1; +in vec4 in_pos; +in vec4 in_base; +in vec4 in_offs; +in vec2 in_uv; +in vec4 in_base1; +in vec4 in_offs1; +in vec2 in_uv1; /* output */ -INTERPOLATION out lowp vec4 vtx_base; -INTERPOLATION out lowp vec4 vtx_offs; - out mediump vec2 vtx_uv; -INTERPOLATION out lowp vec4 vtx_base1; -INTERPOLATION out lowp vec4 vtx_offs1; - out mediump vec2 vtx_uv1; +INTERPOLATION out vec4 vtx_base; +INTERPOLATION out vec4 vtx_offs; + out vec2 vtx_uv; +INTERPOLATION out vec4 vtx_base1; +INTERPOLATION out vec4 vtx_offs1; + out vec2 vtx_uv1; void main() { vtx_base = in_base; @@ -62,6 +62,7 @@ R"( #define pp_Gouraud %d #define pp_BumpMap %d #define FogClamping %d +#define pp_Palette %d #define PASS %d #define PI 3.1415926 @@ -86,19 +87,21 @@ out vec4 FragColor; #endif /* Shader program params*/ -uniform lowp float cp_AlphaTestValue; -uniform lowp vec4 pp_ClipTest; -uniform lowp vec3 sp_FOG_COL_RAM,sp_FOG_COL_VERT; -uniform highp float sp_FOG_DENSITY; -uniform highp float shade_scale_factor; +uniform float cp_AlphaTestValue; +uniform vec4 pp_ClipTest; +uniform vec3 sp_FOG_COL_RAM,sp_FOG_COL_VERT; +uniform float sp_FOG_DENSITY; +uniform float shade_scale_factor; uniform sampler2D tex0, tex1; layout(binding = 5) uniform sampler2D fog_table; uniform int pp_Number; uniform usampler2D shadow_stencil; uniform sampler2D DepthTex; -uniform lowp float trilinear_alpha; -uniform lowp vec4 fog_clamp_min; -uniform lowp vec4 fog_clamp_max; +uniform float trilinear_alpha; +uniform vec4 fog_clamp_min; +uniform vec4 fog_clamp_max; +uniform sampler2D palette; +uniform float palette_index; uniform ivec2 blend_mode[2]; #if pp_TwoVolumes == 1 @@ -109,24 +112,24 @@ uniform int fog_control[2]; #endif /* Vertex input*/ -INTERPOLATION in lowp vec4 vtx_base; -INTERPOLATION in lowp vec4 vtx_offs; - in mediump vec2 vtx_uv; -INTERPOLATION in lowp vec4 vtx_base1; -INTERPOLATION in lowp vec4 vtx_offs1; - in mediump vec2 vtx_uv1; +INTERPOLATION in vec4 vtx_base; +INTERPOLATION in vec4 vtx_offs; + in vec2 vtx_uv; +INTERPOLATION in vec4 vtx_base1; +INTERPOLATION in vec4 vtx_offs1; + in vec2 vtx_uv1; -lowp float fog_mode2(highp float w) +float fog_mode2(float w) { - highp float z = clamp(w * sp_FOG_DENSITY, 1.0, 255.9999); - highp float exp = floor(log2(z)); - highp float m = z * 16.0 / pow(2.0, exp) - 16.0; + float z = clamp(w * sp_FOG_DENSITY, 1.0, 255.9999); + float exp = floor(log2(z)); + float m = z * 16.0 / pow(2.0, exp) - 16.0; float idx = floor(m) + exp * 16.0 + 0.5; vec4 fog_coef = texture(fog_table, vec2(idx / 128.0, 0.75 - (m - floor(m)) / 2.0)); return fog_coef.r; } -highp vec4 fog_clamp(highp vec4 col) +vec4 fog_clamp(vec4 col) { #if FogClamping == 1 return clamp(col, fog_clamp_min, fog_clamp_max); @@ -135,13 +138,23 @@ highp vec4 fog_clamp(highp vec4 col) #endif } +#if pp_Palette == 1 + +vec4 palettePixel(sampler2D tex, vec2 coords) +{ + vec4 c = vec4(texture(tex, coords).r * 255.0 / 1023.0 + palette_index, 0.5, 0.0, 0.0); + return texture(palette, c.xy); +} + +#endif + void main() { setFragDepth(); #if PASS == PASS_OIT // Manual depth testing - highp float frontDepth = texture(DepthTex, gl_FragCoord.xy / textureSize(DepthTex, 0)).r; + float frontDepth = texture(DepthTex, gl_FragCoord.xy / textureSize(DepthTex, 0)).r; if (gl_FragDepth < frontDepth) discard; #endif @@ -153,9 +166,9 @@ void main() discard; #endif - highp vec4 color = vtx_base; - lowp vec4 offset = vtx_offs; - mediump vec2 uv = vtx_uv; + vec4 color = vtx_base; + vec4 offset = vtx_offs; + vec2 uv = vtx_uv; bool area1 = false; ivec2 cur_blend_mode = blend_mode[0]; @@ -190,14 +203,22 @@ void main() #endif #if pp_Texture==1 { - highp vec4 texcol; - if (area1) - texcol = texture(tex1, uv); - else - texcol = texture(tex0, uv); + vec4 texcol; + #if pp_Palette == 0 + if (area1) + texcol = texture(tex1, uv); + else + texcol = texture(tex0, uv); + #else + if (area1) + texcol = palettePixel(tex1, uv); + else + texcol = palettePixel(tex0, uv); + #endif + #if pp_BumpMap == 1 - highp float s = PI / 2.0 * (texcol.a * 15.0 * 16.0 + texcol.r * 15.0) / 255.0; - highp float r = 2.0 * PI * (texcol.g * 15.0 * 16.0 + texcol.b * 15.0) / 255.0; + float s = PI / 2.0 * (texcol.a * 15.0 * 16.0 + texcol.r * 15.0) / 255.0; + float r = 2.0 * PI * (texcol.g * 15.0 * 16.0 + texcol.b * 15.0) / 255.0; texcol.a = clamp(vtx_offs.a + vtx_offs.r * sin(s) + vtx_offs.g * cos(s) * cos(r - 2.0 * PI * vtx_offs.b), 0.0, 1.0); texcol.rgb = vec3(1.0, 1.0, 1.0); #else @@ -370,7 +391,8 @@ bool gl4CompilePipelineShader( gl4PipelineShader* s, const char *pixel_source /* sprintf(pshader, pixel_source, s->cp_AlphaTest, s->pp_InsideClipping, s->pp_UseAlpha, s->pp_Texture, s->pp_IgnoreTexA, s->pp_ShadInstr, s->pp_Offset, s->pp_FogCtrl, - s->pp_TwoVolumes, s->pp_Gouraud, s->pp_BumpMap, s->fog_clamping, (int)s->pass); + s->pp_TwoVolumes, s->pp_Gouraud, s->pp_BumpMap, s->fog_clamping, s->palette, + (int)s->pass); s->program = gl_CompileAndLink(vshader, pshader); @@ -437,6 +459,11 @@ bool gl4CompilePipelineShader( gl4PipelineShader* s, const char *pixel_source /* s->shading_instr = glGetUniformLocation(s->program, "shading_instr"); s->fog_control = glGetUniformLocation(s->program, "fog_control"); + gu = glGetUniformLocation(s->program, "palette"); + if (gu != -1) + glUniform1i(gu, 6); // GL_TEXTURE6 + s->palette_index = glGetUniformLocation(s->program, "palette_index"); + return glIsProgram(s->program)==GL_TRUE; } @@ -657,6 +684,11 @@ static bool RenderFrame() fog_needs_update = false; UpdateFogTexture((u8 *)FOG_TABLE, GL_TEXTURE5, GL_RED); } + if (palette_updated) + { + UpdatePaletteTexture(GL_TEXTURE6); + palette_updated = false; + } glcache.UseProgram(gl4.modvol_shader.program); diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index 64bad9a20..e0e6a523a 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -120,6 +120,7 @@ __forceinline int clip_rect[4] = {}; TileClipping clipmode = GetTileClip(gp->tileclip, ViewportMatrix, clip_rect); + bool palette = BaseTextureCacheData::IsGpuHandledPaletted(gp->tsp, gp->tcw); CurrentShader = GetProgram(Type == ListType_Punch_Through ? 1 : 0, clipmode == TileClipping::Inside, @@ -132,11 +133,20 @@ __forceinline gp->pcw.Gouraud, gp->tcw.PixelFmt == PixelBumpMap, color_clamp, - ShaderUniforms.trilinear_alpha != 1.f); + ShaderUniforms.trilinear_alpha != 1.f, + palette); glcache.UseProgram(CurrentShader->program); if (CurrentShader->trilinear_alpha != -1) glUniform1f(CurrentShader->trilinear_alpha, ShaderUniforms.trilinear_alpha); + if (palette) + { + if (gp->tcw.PixelFmt == PixelPal4) + ShaderUniforms.palette_index = float(gp->tcw.PalSelect << 4) / 1023.f; + else + ShaderUniforms.palette_index = float((gp->tcw.PalSelect >> 4) << 8) / 1023.f; + glUniform1f(CurrentShader->palette_index, ShaderUniforms.palette_index); + } if (clipmode == TileClipping::Inside) glUniform4f(CurrentShader->pp_ClipTest, clip_rect[0], clip_rect[1], clip_rect[0] + clip_rect[2], clip_rect[1] + clip_rect[3]); @@ -160,7 +170,7 @@ __forceinline SetTextureRepeatMode(GL_TEXTURE_WRAP_T, gp->tsp.ClampV, gp->tsp.FlipV); //set texture filter mode - if (gp->tsp.FilterMode == 0) + if (gp->tsp.FilterMode == 0 || palette) { //disable filtering, mipmaps glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); @@ -643,7 +653,7 @@ static void DrawQuad(GLuint texId, float x, float y, float w, float h, float u0, ShaderUniforms.trilinear_alpha = 1.0; - PipelineShader *shader = GetProgram(0, false, 1, 0, 1, 0, 0, 2, false, false, false, false); + PipelineShader *shader = GetProgram(0, false, 1, 0, 1, 0, 0, 2, false, false, false, false, false); glcache.UseProgram(shader->program); glActiveTexture(GL_TEXTURE0); diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 9c6f66011..cb1a0a5f6 100644 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -106,6 +106,8 @@ R"(%s #define pp_BumpMap %d #define FogClamping %d #define pp_TriLinear %d +#define pp_Palette %d + #define PI 3.1415926 #define GLES2 0 @@ -152,6 +154,9 @@ uniform sampler2D tex,fog_table; uniform lowp float trilinear_alpha; uniform lowp vec4 fog_clamp_min; uniform lowp vec4 fog_clamp_max; +uniform sampler2D palette; +uniform mediump float palette_index; + /* Vertex input*/ INTERPOLATION in lowp vec4 vtx_base; INTERPOLATION in lowp vec4 vtx_offs; @@ -183,6 +188,16 @@ highp vec4 fog_clamp(lowp vec4 col) #endif } +#if pp_Palette == 1 + +lowp vec4 palettePixel(highp vec2 coords) +{ + highp vec4 c = vec4(texture(tex, coords).FOG_CHANNEL * 255.0 / 1023.0 + palette_index, 0.5, 0.0, 0.0); + return texture(palette, c.xy); +} + +#endif + void main() { // Clip inside the box @@ -201,7 +216,11 @@ void main() #endif #if pp_Texture==1 { - lowp vec4 texcol=texture(tex, vtx_uv); + #if pp_Palette == 0 + lowp vec4 texcol = texture(tex, vtx_uv); + #else + lowp vec4 texcol = palettePixel(vtx_uv); + #endif #if pp_BumpMap == 1 highp float s = PI / 2.0 * (texcol.a * 15.0 * 16.0 + texcol.r * 15.0) / 255.0; @@ -387,6 +406,7 @@ gl_ctx gl; int screen_width; int screen_height; GLuint fogTextureId; +GLuint paletteTextureId; glm::mat4 ViewportMatrix; @@ -446,6 +466,8 @@ static void gles_term() fbTextureId = 0; glcache.DeleteTextures(1, &fogTextureId); fogTextureId = 0; + glcache.DeleteTextures(1, &paletteTextureId); + paletteTextureId = 0; gl_free_osd_resources(); free_output_framebuffer(); @@ -471,7 +493,7 @@ void findGLVersion() gl.glsl_version_header = ""; gl.index_type = GL_UNSIGNED_SHORT; } - gl.fog_image_format = GL_ALPHA; + gl.single_channel_format = GL_ALPHA; const char *extensions = (const char *)glGetString(GL_EXTENSIONS); if (strstr(extensions, "GL_OES_packed_depth_stencil") != NULL) gl.GL_OES_packed_depth_stencil_supported = true; @@ -490,13 +512,13 @@ void findGLVersion() #else gl.glsl_version_header = "#version 130"; #endif - gl.fog_image_format = GL_RED; + gl.single_channel_format = GL_RED; } else { gl.gl_version = "GL2"; gl.glsl_version_header = "#version 120"; - gl.fog_image_format = GL_ALPHA; + gl.single_channel_format = GL_ALPHA; } } GLint ranges[2]; @@ -595,7 +617,8 @@ GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader) PipelineShader *GetProgram(bool cp_AlphaTest, bool pp_InsideClipping, bool pp_Texture, bool pp_UseAlpha, bool pp_IgnoreTexA, u32 pp_ShadInstr, bool pp_Offset, - u32 pp_FogCtrl, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping, bool trilinear) + u32 pp_FogCtrl, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping, bool trilinear, + bool palette) { u32 rv=0; @@ -611,6 +634,7 @@ PipelineShader *GetProgram(bool cp_AlphaTest, bool pp_InsideClipping, rv<<=1; rv|=pp_BumpMap; rv<<=1; rv|=fog_clamping; rv<<=1; rv|=trilinear; + rv<<=1; rv|=palette; PipelineShader *shader = &gl.shaders[rv]; if (shader->program == 0) @@ -627,6 +651,7 @@ PipelineShader *GetProgram(bool cp_AlphaTest, bool pp_InsideClipping, shader->pp_BumpMap = pp_BumpMap; shader->fog_clamping = fog_clamping; shader->trilinear = trilinear; + shader->palette = palette; CompilePipelineShader(shader); } @@ -645,7 +670,7 @@ bool CompilePipelineShader( PipelineShader* s) rc = sprintf(pshader,PixelPipelineShader, gl.glsl_version_header, gl.gl_version, s->cp_AlphaTest,s->pp_InsideClipping,s->pp_UseAlpha, s->pp_Texture,s->pp_IgnoreTexA,s->pp_ShadInstr,s->pp_Offset,s->pp_FogCtrl, s->pp_Gouraud, s->pp_BumpMap, - s->fog_clamping, s->trilinear); + s->fog_clamping, s->trilinear, s->palette); verify(rc + 1 <= (int)sizeof(pshader)); s->program=gl_CompileAndLink(vshader, pshader); @@ -682,6 +707,12 @@ bool CompilePipelineShader( PipelineShader* s) gu = glGetUniformLocation(s->program, "fog_table"); if (gu != -1) glUniform1i(gu, 1); + // And texture 2 as palette + gu = glGetUniformLocation(s->program, "palette"); + if (gu != -1) + glUniform1i(gu, 2); + s->palette_index = glGetUniformLocation(s->program, "palette_index"); + s->trilinear_alpha = glGetUniformLocation(s->program, "trilinear_alpha"); if (s->fog_clamping) @@ -830,6 +861,31 @@ bool gl_create_resources(); //setup +static void gl_DebugOutput(GLenum source, + GLenum type, + GLuint id, + GLenum severity, + GLsizei length, + const GLchar *message, + const void *userParam) +{ + if (id == 131185) + return; + switch (severity) + { + default: + case GL_DEBUG_SEVERITY_NOTIFICATION: + case GL_DEBUG_SEVERITY_LOW: + DEBUG_LOG(RENDERER, "opengl:[%d] %s", id, message); + break; + case GL_DEBUG_SEVERITY_MEDIUM: + INFO_LOG(RENDERER, "opengl:[%d] %s", id, message); + break; + case GL_DEBUG_SEVERITY_HIGH: + WARN_LOG(RENDERER, "opengl:[%d] %s", id, message); + break; + } +} bool gles_init() { @@ -838,10 +894,16 @@ bool gles_init() if (!gl_create_resources()) return false; - // glEnable(GL_DEBUG_OUTPUT); - // glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); - // glDebugMessageCallback(gl_DebugOutput, NULL); - // glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, GL_TRUE); +#if 0 + glEnable(GL_DEBUG_OUTPUT); + glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); +#ifdef GLES + glDebugMessageCallback((RGLGENGLDEBUGPROC)gl_DebugOutput, NULL); +#else + glDebugMessageCallback(gl_DebugOutput, NULL); +#endif + glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, GL_TRUE); +#endif //clean up the buffer glcache.ClearColor(0.f, 0.f, 0.f, 0.f); @@ -892,6 +954,28 @@ void UpdateFogTexture(u8 *fog_table, GLenum texture_slot, GLint fog_image_format glActiveTexture(GL_TEXTURE0); } +void UpdatePaletteTexture(GLenum texture_slot) +{ + glActiveTexture(texture_slot); + if (paletteTextureId == 0) + { + paletteTextureId = glcache.GenTexture(); + glcache.BindTexture(GL_TEXTURE_2D, paletteTextureId); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + } + else + glcache.BindTexture(GL_TEXTURE_2D, paletteTextureId); + + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1024, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, palette32_ram); + glCheck(); + + glActiveTexture(GL_TEXTURE0); +} + void OSD_DRAW(bool clear_screen) { #ifdef __ANDROID__ @@ -1060,12 +1144,18 @@ bool RenderFrame() if (fog_needs_update && settings.rend.Fog) { fog_needs_update = false; - UpdateFogTexture((u8 *)FOG_TABLE, GL_TEXTURE1, gl.fog_image_format); + UpdateFogTexture((u8 *)FOG_TABLE, GL_TEXTURE1, gl.single_channel_format); + } + if (palette_updated) + { + UpdatePaletteTexture(GL_TEXTURE2); + palette_updated = false; } glcache.UseProgram(gl.modvol_shader.program); - glUniform4fv( gl.modvol_shader.depth_scale, 1, ShaderUniforms.depth_coefs); + if (gl.modvol_shader.depth_scale != -1) + glUniform4fv(gl.modvol_shader.depth_scale, 1, ShaderUniforms.depth_coefs); glUniformMatrix4fv(gl.modvol_shader.normal_matrix, 1, GL_FALSE, &ShaderUniforms.normal_mat[0][0]); ShaderUniforms.PT_ALPHA=(PT_ALPHA_REF&0xFF)/255.0f; diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index 48d59887d..e88461466 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -38,6 +38,7 @@ struct PipelineShader GLint trilinear_alpha; GLint fog_clamp_min, fog_clamp_max; GLint normal_matrix; + GLint palette_index; // bool cp_AlphaTest; @@ -52,6 +53,7 @@ struct PipelineShader bool pp_BumpMap; bool fog_clamping; bool trilinear; + bool palette; }; @@ -107,7 +109,7 @@ struct gl_ctx int gl_major; int gl_minor; bool is_gles; - GLuint fog_image_format; + GLuint single_channel_format; GLenum index_type; bool GL_OES_packed_depth_stencil_supported; bool GL_OES_depth24_supported; @@ -132,6 +134,7 @@ void gl_load_osd_resources(); void gl_free_osd_resources(); bool ProcessFrame(TA_context* ctx); void UpdateFogTexture(u8 *fog_table, GLenum texture_slot, GLint fog_image_format); +void UpdatePaletteTexture(GLenum texture_slot); void findGLVersion(); void GetFramebufferScaling(float& scale_x, float& scale_y, float& scissoring_scale_x, float& scissoring_scale_y); void GetFramebufferSize(float& dc_width, float& dc_height); @@ -157,7 +160,8 @@ void HideOSD(); void OSD_DRAW(bool clear_screen); PipelineShader *GetProgram(bool cp_AlphaTest, bool pp_InsideClipping, bool pp_Texture, bool pp_UseAlpha, bool pp_IgnoreTexA, u32 pp_ShadInstr, bool pp_Offset, - u32 pp_FogCtrl, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping, bool trilinear); + u32 pp_FogCtrl, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping, bool trilinear, + bool palette); GLuint gl_CompileShader(const char* shader, GLuint type); GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader); @@ -181,6 +185,7 @@ extern struct ShaderUniforms_t int width; int height; } base_clipping; + float palette_index; void Set(const PipelineShader* s) { @@ -206,6 +211,9 @@ extern struct ShaderUniforms_t if (s->normal_matrix != -1) glUniformMatrix4fv(s->normal_matrix, 1, GL_FALSE, &normal_mat[0][0]); + + if (s->palette_index != -1) + glUniform1f(s->palette_index, palette_index); } } ShaderUniforms; diff --git a/core/rend/gles/gltex.cpp b/core/rend/gles/gltex.cpp index c4082157b..d44fb8f3c 100644 --- a/core/rend/gles/gltex.cpp +++ b/core/rend/gles/gltex.cpp @@ -12,8 +12,9 @@ void TextureCacheData::UploadToGPU(int width, int height, u8 *temp_tex_buffer, b { //upload to OpenGL ! glcache.BindTexture(GL_TEXTURE_2D, texID); - GLuint comps = GL_RGBA; + GLuint comps = tex_type == TextureType::_8 ? gl.single_channel_format : GL_RGBA; GLuint gltype; + u32 bytes_per_pixel = 2; switch (tex_type) { case TextureType::_5551: @@ -27,6 +28,11 @@ void TextureCacheData::UploadToGPU(int width, int height, u8 *temp_tex_buffer, b gltype = GL_UNSIGNED_SHORT_4_4_4_4; break; case TextureType::_8888: + bytes_per_pixel = 4; + gltype = GL_UNSIGNED_BYTE; + break; + case TextureType::_8: + bytes_per_pixel = 1; gltype = GL_UNSIGNED_BYTE; break; default: @@ -63,10 +69,13 @@ void TextureCacheData::UploadToGPU(int width, int height, u8 *temp_tex_buffer, b case TextureType::_8888: internalFormat = GL_RGBA8; break; + case TextureType::_8: + internalFormat = comps; + break; default: - die("Unsupported texture format"); - internalFormat = 0; - break; + die("Unsupported texture format"); + internalFormat = 0; + break; } if (Updates == 1) { @@ -76,7 +85,7 @@ void TextureCacheData::UploadToGPU(int width, int height, u8 *temp_tex_buffer, b for (int i = 0; i < mipmapLevels; i++) { glTexSubImage2D(GL_TEXTURE_2D, mipmapLevels - i - 1, 0, 0, 1 << i, 1 << i, comps, gltype, temp_tex_buffer); - temp_tex_buffer += (1 << (2 * i)) * (tex_type == TextureType::_8888 ? 4 : 2); + temp_tex_buffer += (1 << (2 * i)) * bytes_per_pixel; } } else @@ -87,7 +96,7 @@ void TextureCacheData::UploadToGPU(int width, int height, u8 *temp_tex_buffer, b for (int i = 0; i < mipmapLevels; i++) { glTexImage2D(GL_TEXTURE_2D, mipmapLevels - i - 1, comps, 1 << i, 1 << i, 0, comps, gltype, temp_tex_buffer); - temp_tex_buffer += (1 << (2 * i)) * (tex_type == TextureType::_8888 ? 4 : 2); + temp_tex_buffer += (1 << (2 * i)) * bytes_per_pixel; } } } diff --git a/core/rend/vulkan/drawer.cpp b/core/rend/vulkan/drawer.cpp index af574ddca..a8b69d9ae 100644 --- a/core/rend/vulkan/drawer.cpp +++ b/core/rend/vulkan/drawer.cpp @@ -157,15 +157,25 @@ void Drawer::DrawPoly(const vk::CommandBuffer& cmdBuffer, u32 listType, bool sor // Trilinear pass A trilinearAlpha = 1.0 - trilinearAlpha; } - - if (tileClip == TileClipping::Inside || trilinearAlpha != 1.f) + bool palette = BaseTextureCacheData::IsGpuHandledPaletted(poly.tsp, poly.tcw); + float palette_index = 0.f; + if (palette) { - std::array pushConstants = { + if (poly.tcw.PixelFmt == PixelPal4) + palette_index = float(poly.tcw.PalSelect << 4) / 1023.f; + else + palette_index = float((poly.tcw.PalSelect >> 4) << 8) / 1023.f; + } + + if (tileClip == TileClipping::Inside || trilinearAlpha != 1.f || palette) + { + std::array pushConstants = { (float)scissorRect.offset.x, (float)scissorRect.offset.y, (float)scissorRect.offset.x + (float)scissorRect.extent.width, (float)scissorRect.offset.y + (float)scissorRect.extent.height, - trilinearAlpha + trilinearAlpha, + palette_index }; cmdBuffer.pushConstants(pipelineManager->GetPipelineLayout(), vk::ShaderStageFlagBits::eFragment, 0, pushConstants); } @@ -304,7 +314,7 @@ void Drawer::UploadMainBuffer(const VertexShaderUniforms& vertexUniforms, const buffer->upload(chunks.size(), &chunkSizes[0], &chunks[0]); } -bool Drawer::Draw(const Texture *fogTexture) +bool Drawer::Draw(const Texture *fogTexture, const Texture *paletteTexture) { VertexShaderUniforms vtxUniforms; vtxUniforms.normal_matrix = matrices.GetNormalMatrix(); @@ -322,7 +332,8 @@ bool Drawer::Draw(const Texture *fogTexture) UploadMainBuffer(vtxUniforms, fragUniforms); // Update per-frame descriptor set and bind it - GetCurrentDescSet().UpdateUniforms(GetMainBuffer(0)->buffer.get(), offsets.vertexUniformOffset, offsets.fragmentUniformOffset, fogTexture->GetImageView()); + GetCurrentDescSet().UpdateUniforms(GetMainBuffer(0)->buffer.get(), offsets.vertexUniformOffset, offsets.fragmentUniformOffset, + fogTexture->GetImageView(), paletteTexture->GetImageView()); GetCurrentDescSet().BindPerFrameDescriptorSets(cmdBuffer); // Reset per-poly descriptor set pool GetCurrentDescSet().Reset(); diff --git a/core/rend/vulkan/drawer.h b/core/rend/vulkan/drawer.h index dabc9e4ee..45d1787ab 100644 --- a/core/rend/vulkan/drawer.h +++ b/core/rend/vulkan/drawer.h @@ -103,7 +103,7 @@ class Drawer : public BaseDrawer { public: virtual ~Drawer() = default; - bool Draw(const Texture *fogTexture); + bool Draw(const Texture *fogTexture, const Texture *paletteTexture); virtual void EndRenderPass() = 0; protected: diff --git a/core/rend/vulkan/oit/oit_drawer.cpp b/core/rend/vulkan/oit/oit_drawer.cpp index 8fffdf1cf..ecece3674 100644 --- a/core/rend/vulkan/oit/oit_drawer.cpp +++ b/core/rend/vulkan/oit/oit_drawer.cpp @@ -44,6 +44,16 @@ void OITDrawer::DrawPoly(const vk::CommandBuffer& cmdBuffer, u32 listType, bool bool twoVolumes = poly.tsp1.full != (u32)-1 || poly.tcw1.full != (u32)-1; + bool palette = BaseTextureCacheData::IsGpuHandledPaletted(poly.tsp, poly.tcw); + float palette_index = 0.f; + if (palette) + { + if (poly.tcw.PixelFmt == PixelPal4) + palette_index = float(poly.tcw.PalSelect << 4) / 1023.f; + else + palette_index = float((poly.tcw.PalSelect >> 4) << 8) / 1023.f; + } + OITDescriptorSets::PushConstants pushConstants = { { (float)scissorRect.offset.x, @@ -54,6 +64,7 @@ void OITDrawer::DrawPoly(const vk::CommandBuffer& cmdBuffer, u32 listType, bool { poly.tsp.SrcInstr, poly.tsp.DstInstr, 0, 0 }, trilinearAlpha, listType == ListType_Translucent ? (int)(&poly - pvrrc.global_param_tr.head()) : 0, + palette_index, }; if (twoVolumes) { @@ -236,7 +247,7 @@ void OITDrawer::UploadMainBuffer(const OITDescriptorSets::VertexShaderUniforms& buffer->upload(chunks.size(), &chunkSizes[0], &chunks[0]); } -bool OITDrawer::Draw(const Texture *fogTexture) +bool OITDrawer::Draw(const Texture *fogTexture, const Texture *paletteTexture) { vk::CommandBuffer cmdBuffer = NewFrame(); @@ -269,7 +280,7 @@ bool OITDrawer::Draw(const Texture *fogTexture) GetCurrentDescSet().UpdateUniforms(mainBuffer, offsets.vertexUniformOffset, offsets.fragmentUniformOffset, fogTexture->GetImageView(), offsets.polyParamsOffset, offsets.polyParamsSize, depthAttachment->GetStencilView(), - depthAttachment->GetImageView()); + depthAttachment->GetImageView(), paletteTexture->GetImageView()); GetCurrentDescSet().BindPerFrameDescriptorSets(cmdBuffer); GetCurrentDescSet().UpdateColorInputDescSet(0, colorAttachments[0]->GetImageView()); GetCurrentDescSet().UpdateColorInputDescSet(1, colorAttachments[1]->GetImageView()); diff --git a/core/rend/vulkan/oit/oit_drawer.h b/core/rend/vulkan/oit/oit_drawer.h index 44fd146fc..0bce86595 100644 --- a/core/rend/vulkan/oit/oit_drawer.h +++ b/core/rend/vulkan/oit/oit_drawer.h @@ -39,7 +39,7 @@ class OITDrawer : public BaseDrawer { public: virtual ~OITDrawer() = default; - bool Draw(const Texture *fogTexture); + bool Draw(const Texture *fogTexture, const Texture *paletteTexture); virtual vk::CommandBuffer NewFrame() = 0; virtual void EndFrame() = 0; diff --git a/core/rend/vulkan/oit/oit_pipeline.cpp b/core/rend/vulkan/oit/oit_pipeline.cpp index 7195d7901..7622c6009 100644 --- a/core/rend/vulkan/oit/oit_pipeline.cpp +++ b/core/rend/vulkan/oit/oit_pipeline.cpp @@ -156,7 +156,8 @@ void OITPipelineManager::CreatePipeline(u32 listType, bool autosort, const PolyP //params.trilinear = pp.pcw.Texture && pp.tsp.FilterMode > 1 && listType != ListType_Punch_Through; params.useAlpha = pp.tsp.UseAlpha; params.pass = pass; - params.twoVolume = pp.tsp1.full != -1 || pp.tcw1.full != -1; + params.twoVolume = pp.tsp1.full != (u32)-1 || pp.tcw1.full != (u32)-1; + params.palette = BaseTextureCacheData::IsGpuHandledPaletted(pp.tsp, pp.tcw); vk::ShaderModule fragment_module = shaderManager->GetFragmentShader(params); vk::PipelineShaderStageCreateInfo stages[] = { diff --git a/core/rend/vulkan/oit/oit_pipeline.h b/core/rend/vulkan/oit/oit_pipeline.h index 4dcdd48cc..552493ae6 100644 --- a/core/rend/vulkan/oit/oit_pipeline.h +++ b/core/rend/vulkan/oit/oit_pipeline.h @@ -55,7 +55,8 @@ public: glm::ivec4 blend_mode0; // Only using 2 elements but easier for std140 float trilinearAlpha; int pp_Number; - int _pad[2]; + float palette_index; + int _pad; // two volume mode glm::ivec4 blend_mode1; // Only using 2 elements but easier for std140 @@ -80,7 +81,8 @@ public: } // FIXME way too many params void UpdateUniforms(vk::Buffer buffer, u32 vertexUniformOffset, u32 fragmentUniformOffset, vk::ImageView fogImageView, - u32 polyParamsOffset, u32 polyParamsSize, vk::ImageView stencilImageView, vk::ImageView depthImageView) + u32 polyParamsOffset, u32 polyParamsSize, vk::ImageView stencilImageView, vk::ImageView depthImageView, + vk::ImageView paletteImageView) { if (!perFrameDescSet) { @@ -105,6 +107,17 @@ public: imageInfo = { fogSampler, fogImageView, vk::ImageLayout::eShaderReadOnlyOptimal }; writeDescriptorSets.push_back(vk::WriteDescriptorSet(*perFrameDescSet, 2, 0, 1, vk::DescriptorType::eCombinedImageSampler, &imageInfo, nullptr, nullptr)); } + if (paletteImageView) + { + TSP palTsp = {}; + palTsp.FilterMode = 0; + palTsp.ClampU = 1; + palTsp.ClampV = 1; + vk::Sampler palSampler = samplerManager->GetSampler(palTsp); + static vk::DescriptorImageInfo imageInfo; + imageInfo = { palSampler, paletteImageView, vk::ImageLayout::eShaderReadOnlyOptimal }; + writeDescriptorSets.push_back(vk::WriteDescriptorSet(*perFrameDescSet, 6, 0, 1, vk::DescriptorType::eCombinedImageSampler, &imageInfo, nullptr, nullptr)); + } if (polyParamsSize > 0) { static vk::DescriptorBufferInfo polyParamsBufferInfo; @@ -150,7 +163,7 @@ public: std::vector writeDescriptorSets; writeDescriptorSets.push_back(vk::WriteDescriptorSet(*perPolyDescSets.back(), 0, 0, 1, vk::DescriptorType::eCombinedImageSampler, &imageInfo0, nullptr, nullptr)); - if (textureId1 != -1) + if (textureId1 != (u64)-1) { Texture *texture1 = reinterpret_cast(textureId1); vk::DescriptorImageInfo imageInfo1(samplerManager->GetSampler(tsp1), texture1->GetReadOnlyImageView(), vk::ImageLayout::eShaderReadOnlyOptimal); @@ -223,6 +236,7 @@ public: { 3, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eFragment }, // Tr poly params { 4, vk::DescriptorType::eInputAttachment, 1, vk::ShaderStageFlagBits::eFragment }, // stencil input attachment { 5, vk::DescriptorType::eInputAttachment, 1, vk::ShaderStageFlagBits::eFragment }, // depth input attachment + { 6, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment },// palette texture }; perFrameLayout = GetContext()->GetDevice().createDescriptorSetLayoutUnique( vk::DescriptorSetLayoutCreateInfo(vk::DescriptorSetLayoutCreateFlags(), ARRAY_SIZE(perFrameBindings), perFrameBindings)); @@ -310,7 +324,7 @@ private: u32 hash = pp->pcw.Gouraud | (pp->pcw.Offset << 1) | (pp->pcw.Texture << 2) | (pp->pcw.Shadow << 3) | (((pp->tileclip >> 28) == 3) << 4); hash |= ((listType >> 1) << 5); - if (pp->tcw1.full != -1 || pp->tsp1.full != -1) + if (pp->tcw1.full != (u32)-1 || pp->tsp1.full != (u32)-1) { // Two-volume mode hash |= (1 << 31) | (pp->tsp.ColorClamp << 11); @@ -322,7 +336,7 @@ private: | (pp->tsp.SrcInstr << 14) | (pp->tsp.DstInstr << 17); } hash |= (pp->isp.ZWriteDis << 20) | (pp->isp.CullMode << 21) | ((autosort ? 6 : pp->isp.DepthMode) << 23); - hash |= (u32)pass << 26; + hash |= ((u32)BaseTextureCacheData::IsGpuHandledPaletted(pp->tsp, pp->tcw) << 26) | ((u32)pass << 27); return hash; } diff --git a/core/rend/vulkan/oit/oit_renderer.cpp b/core/rend/vulkan/oit/oit_renderer.cpp index 8cf194002..d875f24ca 100644 --- a/core/rend/vulkan/oit/oit_renderer.cpp +++ b/core/rend/vulkan/oit/oit_renderer.cpp @@ -159,7 +159,10 @@ public: textureCache.CollectCleanup(); if (result) + { CheckFogTexture(); + CheckPaletteTexture(); + } else texCommandPool.EndFrame(); @@ -218,7 +221,7 @@ public: else drawer = &screenDrawer; - drawer->Draw(fogTexture.get()); + drawer->Draw(fogTexture.get(), paletteTexture.get()); drawer->EndFrame(); @@ -283,9 +286,30 @@ private: fogTexture->SetCommandBuffer(nullptr); } + void CheckPaletteTexture() + { + if (!paletteTexture) + { + paletteTexture = std::unique_ptr(new Texture()); + paletteTexture->SetPhysicalDevice(GetContext()->GetPhysicalDevice()); + paletteTexture->SetDevice(GetContext()->GetDevice()); + paletteTexture->tex_type = TextureType::_8888; + palette_updated = true; + } + if (!palette_updated) + return; + palette_updated = false; + + paletteTexture->SetCommandBuffer(texCommandPool.Allocate()); + + paletteTexture->UploadToGPU(1024, 1, (u8 *)palette32_ram, false); + + paletteTexture->SetCommandBuffer(nullptr); + } OITBuffers oitBuffers; std::unique_ptr fogTexture; + std::unique_ptr paletteTexture; CommandPool texCommandPool; SamplerManager samplerManager; diff --git a/core/rend/vulkan/oit/oit_shaders.cpp b/core/rend/vulkan/oit/oit_shaders.cpp index 706feba49..656381e03 100644 --- a/core/rend/vulkan/oit/oit_shaders.cpp +++ b/core/rend/vulkan/oit/oit_shaders.cpp @@ -234,6 +234,7 @@ static const char OITFragmentShaderSource[] = R"( #define pp_Gouraud %d #define pp_BumpMap %d #define ColorClamping %d +#define pp_Palette %d #define PASS %d #define PI 3.1415926 @@ -264,6 +265,7 @@ layout (push_constant) uniform pushBlock ivec4 blend_mode0; float trilinearAlpha; int pp_Number; + float palette_index; // two volume mode ivec4 blend_mode1; @@ -283,6 +285,9 @@ layout (set = 1, binding = 0) uniform sampler2D tex0; layout (set = 1, binding = 1) uniform sampler2D tex1; #endif #endif +#if pp_Palette == 1 +layout (set = 0, binding = 6) uniform sampler2D palette; +#endif #if PASS == PASS_COLOR layout (input_attachment_index = 0, set = 0, binding = 4) uniform usubpassInput shadow_stencil; @@ -323,6 +328,16 @@ vec4 colorClamp(vec4 col) #endif } +#if pp_Palette == 1 + +vec4 palettePixel(sampler2D tex, vec2 coords) +{ + vec4 c = vec4(texture(tex, coords).r * 255.0 / 1023.0 + pushConstants.palette_index, 0.5, 0.0, 0.0); + return texture(palette, c.xy); +} + +#endif + void main() { setFragDepth(); @@ -381,10 +396,18 @@ void main() highp vec4 texcol; #if pp_TwoVolumes == 1 if (area1) - texcol = texture(tex1, uv); + #if pp_Palette == 0 + texcol = texture(tex1, uv); + #else + texcol = palettePixel(tex1, uv); + #endif else #endif - texcol = texture(tex0, uv); + #if pp_Palette == 0 + texcol = texture(tex0, uv); + #else + texcol = palettePixel(tex0, uv); + #endif #if pp_BumpMap == 1 highp float s = PI / 2.0 * (texcol.a * 15.0 * 16.0 + texcol.r * 15.0) / 255.0; highp float r = 2.0 * PI * (texcol.g * 15.0 * 16.0 + texcol.b * 15.0) / 255.0; @@ -775,7 +798,8 @@ vk::UniqueShaderModule OITShaderManager::compileShader(const FragmentShaderParam strcpy(buf, OITShaderHeader); sprintf(buf + strlen(buf), OITFragmentShaderSource, (int)params.alphaTest, (int)params.insideClipTest, (int)params.useAlpha, (int)params.texture, (int)params.ignoreTexAlpha, params.shaderInstr, (int)params.offset, params.fog, - (int)params.twoVolume, (int)params.gouraud, (int)params.bumpmap, (int)params.clamping, (int)params.pass); + (int)params.twoVolume, (int)params.gouraud, (int)params.bumpmap, (int)params.clamping, (int)params.palette, + (int)params.pass); return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment, buf); } diff --git a/core/rend/vulkan/oit/oit_shaders.h b/core/rend/vulkan/oit/oit_shaders.h index 3d7e9dca6..b26716cda 100644 --- a/core/rend/vulkan/oit/oit_shaders.h +++ b/core/rend/vulkan/oit/oit_shaders.h @@ -49,6 +49,7 @@ public: bool bumpmap; bool clamping; bool twoVolume; + bool palette; Pass pass; u32 hash() @@ -57,7 +58,7 @@ public: | ((u32)texture << 3) | ((u32)ignoreTexAlpha << 4) | (shaderInstr << 5) | ((u32)offset << 7) | ((u32)fog << 8) | ((u32)gouraud << 10) | ((u32)bumpmap << 11) | ((u32)clamping << 12) | ((u32)twoVolume << 13) - | ((int)pass << 14); + | ((u32)palette << 14) | ((int)pass << 15); } }; diff --git a/core/rend/vulkan/pipeline.cpp b/core/rend/vulkan/pipeline.cpp index 1547b2889..ae81d7e88 100644 --- a/core/rend/vulkan/pipeline.cpp +++ b/core/rend/vulkan/pipeline.cpp @@ -303,6 +303,7 @@ void PipelineManager::CreatePipeline(u32 listType, bool sortTriangles, const Pol params.texture = pp.pcw.Texture; params.trilinear = pp.pcw.Texture && pp.tsp.FilterMode > 1 && listType != ListType_Punch_Through && pp.tcw.MipMapped == 1; params.useAlpha = pp.tsp.UseAlpha; + params.palette = BaseTextureCacheData::IsGpuHandledPaletted(pp.tsp, pp.tcw); vk::ShaderModule fragment_module = shaderManager->GetFragmentShader(params); vk::PipelineShaderStageCreateInfo stages[] = { diff --git a/core/rend/vulkan/pipeline.h b/core/rend/vulkan/pipeline.h index 17aef7a30..7f3924ed8 100644 --- a/core/rend/vulkan/pipeline.h +++ b/core/rend/vulkan/pipeline.h @@ -36,7 +36,7 @@ public: this->perPolyLayout = perPolyLayout; } - void UpdateUniforms(vk::Buffer buffer, u32 vertexUniformOffset, u32 fragmentUniformOffset, vk::ImageView fogImageView) + void UpdateUniforms(vk::Buffer buffer, u32 vertexUniformOffset, u32 fragmentUniformOffset, vk::ImageView fogImageView, vk::ImageView paletteImageView) { if (!perFrameDescSet) { @@ -61,6 +61,17 @@ public: imageInfo = { fogSampler, fogImageView, vk::ImageLayout::eShaderReadOnlyOptimal }; writeDescriptorSets.push_back(vk::WriteDescriptorSet(*perFrameDescSet, 2, 0, 1, vk::DescriptorType::eCombinedImageSampler, &imageInfo, nullptr, nullptr)); } + if (paletteImageView) + { + TSP palTsp = {}; + palTsp.FilterMode = 0; + palTsp.ClampU = 1; + palTsp.ClampV = 1; + vk::Sampler palSampler = samplerManager->GetSampler(palTsp); + static vk::DescriptorImageInfo imageInfo; + imageInfo = { palSampler, paletteImageView, vk::ImageLayout::eShaderReadOnlyOptimal }; + writeDescriptorSets.push_back(vk::WriteDescriptorSet(*perFrameDescSet, 3, 0, 1, vk::DescriptorType::eCombinedImageSampler, &imageInfo, nullptr, nullptr)); + } GetContext()->GetDevice().updateDescriptorSets(writeDescriptorSets, nullptr); } @@ -137,6 +148,7 @@ public: { 0, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eVertex }, // vertex uniforms { 1, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eFragment }, // fragment uniforms { 2, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment },// fog texture + { 3, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment },// palette texture }; vk::DescriptorSetLayoutBinding perPolyBindings[] = { { 0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment },// texture @@ -146,7 +158,7 @@ public: perPolyLayout = GetContext()->GetDevice().createDescriptorSetLayoutUnique( vk::DescriptorSetLayoutCreateInfo(vk::DescriptorSetLayoutCreateFlags(), ARRAY_SIZE(perPolyBindings), perPolyBindings)); vk::DescriptorSetLayout layouts[] = { *perFrameLayout, *perPolyLayout }; - vk::PushConstantRange pushConstant(vk::ShaderStageFlagBits::eFragment, 0, 20); + vk::PushConstantRange pushConstant(vk::ShaderStageFlagBits::eFragment, 0, 24); pipelineLayout = GetContext()->GetDevice().createPipelineLayoutUnique( vk::PipelineLayoutCreateInfo(vk::PipelineLayoutCreateFlags(), ARRAY_SIZE(layouts), layouts, 1, &pushConstant)); } @@ -204,7 +216,7 @@ private: | (pp->tsp.ColorClamp << 11) | ((settings.rend.Fog ? pp->tsp.FogCtrl : 2) << 12) | (pp->tsp.SrcInstr << 14) | (pp->tsp.DstInstr << 17); hash |= (pp->isp.ZWriteDis << 20) | (pp->isp.CullMode << 21) | (pp->isp.DepthMode << 23); - hash |= (u32)sortTriangles << 26; + hash |= ((u32)sortTriangles << 26) | ((u32)BaseTextureCacheData::IsGpuHandledPaletted(pp->tsp, pp->tcw) << 27); return hash; } diff --git a/core/rend/vulkan/shaders.cpp b/core/rend/vulkan/shaders.cpp index 12a5c5b27..497f62753 100644 --- a/core/rend/vulkan/shaders.cpp +++ b/core/rend/vulkan/shaders.cpp @@ -73,6 +73,7 @@ static const char FragmentShaderSource[] = R"(#version 450 #define pp_BumpMap %d #define ColorClamping %d #define pp_TriLinear %d +#define pp_Palette %d #define PI 3.1415926 layout (location = 0) out vec4 FragColor; @@ -98,11 +99,15 @@ layout (push_constant) uniform pushBlock { vec4 clipTest; float trilinearAlpha; + float palette_index; } pushConstants; #if pp_Texture == 1 layout (set = 1, binding = 0) uniform sampler2D tex; #endif +#if pp_Palette == 1 +layout (set = 0, binding = 3) uniform sampler2D palette; +#endif // Vertex input layout (location = 0) INTERPOLATION in lowp vec4 vtx_base; @@ -132,6 +137,16 @@ vec4 colorClamp(vec4 col) #endif } +#if pp_Palette == 1 + +vec4 palettePixel(sampler2D tex, vec2 coords) +{ + vec4 c = vec4(texture(tex, coords).r * 255.0 / 1023.0 + pushConstants.palette_index, 0.5, 0.0, 0.0); + return texture(palette, c.xy); +} + +#endif + void main() { // Clip inside the box @@ -150,7 +165,11 @@ void main() #endif #if pp_Texture == 1 { - vec4 texcol = texture(tex, vtx_uv); + #if pp_Palette == 0 + vec4 texcol = texture(tex, vtx_uv); + #else + vec4 texcol = palettePixel(tex, vtx_uv); + #endif #if pp_BumpMap == 1 float s = PI / 2.0 * (texcol.a * 15.0 * 16.0 + texcol.r * 15.0) / 255.0; @@ -338,7 +357,7 @@ vk::UniqueShaderModule ShaderManager::compileShader(const FragmentShaderParams& sprintf(buf, FragmentShaderSource, (int)params.alphaTest, (int)params.insideClipTest, (int)params.useAlpha, (int)params.texture, (int)params.ignoreTexAlpha, params.shaderInstr, (int)params.offset, params.fog, (int)params.gouraud, - (int)params.bumpmap, (int)params.clamping, (int)params.trilinear); + (int)params.bumpmap, (int)params.clamping, (int)params.trilinear, (int)params.palette); return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment, buf); } diff --git a/core/rend/vulkan/shaders.h b/core/rend/vulkan/shaders.h index 93f62d381..ddb7b78f0 100644 --- a/core/rend/vulkan/shaders.h +++ b/core/rend/vulkan/shaders.h @@ -47,13 +47,15 @@ struct FragmentShaderParams bool bumpmap; bool clamping; bool trilinear; + bool palette; u32 hash() { return ((u32)alphaTest) | ((u32)insideClipTest << 1) | ((u32)useAlpha << 2) | ((u32)texture << 3) | ((u32)ignoreTexAlpha << 4) | (shaderInstr << 5) | ((u32)offset << 7) | ((u32)fog << 8) | ((u32)gouraud << 10) - | ((u32)bumpmap << 11) | ((u32)clamping << 12) | ((u32)trilinear << 13); + | ((u32)bumpmap << 11) | ((u32)clamping << 12) | ((u32)trilinear << 13) + | ((u32)palette << 14); } }; diff --git a/core/rend/vulkan/vulkan_renderer.cpp b/core/rend/vulkan/vulkan_renderer.cpp index 25d01c5cc..d022f49d4 100644 --- a/core/rend/vulkan/vulkan_renderer.cpp +++ b/core/rend/vulkan/vulkan_renderer.cpp @@ -145,7 +145,10 @@ public: textureCache.CollectCleanup(); if (result) + { CheckFogTexture(); + CheckPaletteTexture(); + } else texCommandPool.EndFrame(); @@ -205,7 +208,7 @@ public: else drawer = &screenDrawer; - drawer->Draw(fogTexture.get()); + drawer->Draw(fogTexture.get(), paletteTexture.get()); drawer->EndRenderPass(); @@ -270,8 +273,29 @@ private: fogTexture->SetCommandBuffer(nullptr); } + void CheckPaletteTexture() + { + if (!paletteTexture) + { + paletteTexture = std::unique_ptr(new Texture()); + paletteTexture->SetPhysicalDevice(GetContext()->GetPhysicalDevice()); + paletteTexture->SetDevice(GetContext()->GetDevice()); + paletteTexture->tex_type = TextureType::_8888; + palette_updated = true; + } + if (!palette_updated) + return; + palette_updated = false; + + paletteTexture->SetCommandBuffer(texCommandPool.Allocate()); + + paletteTexture->UploadToGPU(1024, 1, (u8 *)palette32_ram, false); + + paletteTexture->SetCommandBuffer(nullptr); + } std::unique_ptr fogTexture; + std::unique_ptr paletteTexture; CommandPool texCommandPool; SamplerManager samplerManager;