handle some palette textures on the GPU

get rid of PixerPacker templates
no need for GLSL precision qualifier in GL4
This commit is contained in:
Flyinghead 2020-07-08 18:17:15 +02:00
parent fbe9cc3936
commit 7449230af8
23 changed files with 598 additions and 221 deletions

View File

@ -21,6 +21,7 @@ u32 palette16_ram[1024];
u32 palette32_ram[1024]; u32 palette32_ram[1024];
u32 pal_hash_256[4]; u32 pal_hash_256[4];
u32 pal_hash_16[64]; u32 pal_hash_16[64];
bool palette_updated;
// Rough approximation of LoD bias from D adjust param, only used to increase LoD // Rough approximation of LoD bias from D adjust param, only used to increase LoD
const std::array<f32, 16> D_Adjust_LoD_Bias = { const std::array<f32, 16> D_Adjust_LoD_Bias = {
@ -86,8 +87,8 @@ void palette_update()
{ {
if (!pal_needs_update) if (!pal_needs_update)
return; return;
pal_needs_update = false;
pal_needs_update=false; palette_updated = true;
switch(PAL_RAM_CTRL&3) switch(PAL_RAM_CTRL&3)
{ {
@ -313,18 +314,20 @@ struct PvrTexInfo
TexConvFP32 *PL32; TexConvFP32 *PL32;
TexConvFP32 *TW32; TexConvFP32 *TW32;
TexConvFP32 *VQ32; TexConvFP32 *VQ32;
// Conversion to 8 bpp (palette)
TexConvFP8 *TW8;
}; };
static const PvrTexInfo format[8] = static const PvrTexInfo format[8] =
{ // name bpp Final format Planar Twiddled VQ Planar(32b) Twiddled(32b) VQ (32b) { // name bpp Final format Planar Twiddled VQ Planar(32b) Twiddled(32b) VQ (32b) Palette (8b)
{"1555", 16, TextureType::_5551, tex1555_PL, tex1555_TW, tex1555_VQ, tex1555_PL32, tex1555_TW32, tex1555_VQ32 }, //1555 {"1555", 16, TextureType::_5551, tex1555_PL, tex1555_TW, tex1555_VQ, tex1555_PL32, tex1555_TW32, tex1555_VQ32, nullptr }, //1555
{"565", 16, TextureType::_565, tex565_PL, tex565_TW, tex565_VQ, tex565_PL32, tex565_TW32, tex565_VQ32 }, //565 {"565", 16, TextureType::_565, tex565_PL, tex565_TW, tex565_VQ, tex565_PL32, tex565_TW32, tex565_VQ32, nullptr }, //565
{"4444", 16, TextureType::_4444, tex4444_PL, tex4444_TW, tex4444_VQ, tex4444_PL32, tex4444_TW32, tex4444_VQ32 }, //4444 {"4444", 16, TextureType::_4444, tex4444_PL, tex4444_TW, tex4444_VQ, tex4444_PL32, tex4444_TW32, tex4444_VQ32, nullptr }, //4444
{"yuv", 16, TextureType::_8888, NULL, NULL, NULL, texYUV422_PL, texYUV422_TW, texYUV422_VQ }, //yuv {"yuv", 16, TextureType::_8888, nullptr, nullptr, nullptr, texYUV422_PL, texYUV422_TW, texYUV422_VQ, nullptr }, //yuv
{"bumpmap", 16, TextureType::_4444, texBMP_PL, texBMP_TW, texBMP_VQ, tex4444_PL32, tex4444_TW32, tex4444_VQ32 }, //bump map {"bumpmap", 16, TextureType::_4444, texBMP_PL, texBMP_TW, texBMP_VQ, tex4444_PL32, tex4444_TW32, tex4444_VQ32, nullptr }, //bump map
{"pal4", 4, TextureType::_5551, 0, texPAL4_TW, texPAL4_VQ, NULL, texPAL4_TW32, texPAL4_VQ32 }, //pal4 {"pal4", 4, TextureType::_5551, nullptr, texPAL4_TW, texPAL4_VQ, nullptr, texPAL4_TW32, texPAL4_VQ32, texPAL4PT_TW }, //pal4
{"pal8", 8, TextureType::_5551, 0, texPAL8_TW, texPAL8_VQ, NULL, texPAL8_TW32, texPAL8_VQ32 }, //pal8 {"pal8", 8, TextureType::_5551, nullptr, texPAL8_TW, texPAL8_VQ, nullptr, texPAL8_TW32, texPAL8_VQ32, texPAL8PT_TW }, //pal8
{"ns/1555", 0}, // Not supported (1555) {"ns/1555", 0}, // Not supported (1555)
}; };
static const u32 VQMipPoint[11] = static const u32 VQMipPoint[11] =
@ -363,20 +366,20 @@ static const TextureType PAL_TYPE[4] = {
void BaseTextureCacheData::PrintTextureName() void BaseTextureCacheData::PrintTextureName()
{ {
char str[512]; char str[512];
sprintf(str, "Texture: %s ", GetPixelFormatName()); sprintf(str, "Texture: %s", GetPixelFormatName());
if (tcw.VQ_Comp) if (tcw.VQ_Comp)
strcat(str, " VQ"); strcat(str, " VQ");
else if (tcw.ScanOrder == 0)
if (tcw.ScanOrder==0)
strcat(str, " TW"); strcat(str, " TW");
else if (tcw.StrideSel)
if (tcw.MipMapped)
strcat(str, " MM");
if (tcw.StrideSel)
strcat(str, " Stride"); strcat(str, " Stride");
if (tcw.ScanOrder == 0 && tcw.MipMapped)
strcat(str, " MM");
if (tsp.FilterMode != 0)
strcat(str, " Bilinear");
sprintf(str + strlen(str), " %dx%d @ 0x%X", 8 << tsp.TexU, 8 << tsp.TexV, tcw.TexAddr << 3); sprintf(str + strlen(str), " %dx%d @ 0x%X", 8 << tsp.TexU, 8 << tsp.TexV, tcw.TexAddr << 3);
std::string id = GetId(); std::string id = GetId();
sprintf(str + strlen(str), " id=%s", id.c_str()); sprintf(str + strlen(str), " id=%s", id.c_str());
@ -385,9 +388,15 @@ void BaseTextureCacheData::PrintTextureName()
//true if : dirty or paletted texture and hashes don't match //true if : dirty or paletted texture and hashes don't match
bool BaseTextureCacheData::NeedsUpdate() { bool BaseTextureCacheData::NeedsUpdate() {
bool rc = dirty bool rc = dirty;
|| (tcw.PixelFmt == PixelPal4 && palette_hash != pal_hash_16[tcw.PalSelect]) if (tex_type != TextureType::_8)
|| (tcw.PixelFmt == PixelPal8 && palette_hash != pal_hash_256[tcw.PalSelect >> 4]); {
if (tcw.PixelFmt == PixelPal4 && palette_hash != pal_hash_16[tcw.PalSelect])
rc = true;
else if (tcw.PixelFmt == PixelPal8 && palette_hash != pal_hash_256[tcw.PalSelect >> 4])
rc = true;
}
return rc; return rc;
} }
@ -433,12 +442,22 @@ void BaseTextureCacheData::Create()
else if (tex->bpp == 8) else if (tex->bpp == 8)
palette_index = (tcw.PalSelect >> 4) << 8; palette_index = (tcw.PalSelect >> 4) << 8;
texconv8 = nullptr;
if (tcw.ScanOrder && (tex->PL || tex->PL32)) if (tcw.ScanOrder && (tex->PL || tex->PL32))
{ {
//Texture is stored 'planar' in memory, no deswizzle is needed //Texture is stored 'planar' in memory, no deswizzle is needed
//verify(tcw.VQ_Comp==0); //verify(tcw.VQ_Comp==0);
if (tcw.VQ_Comp != 0) if (tcw.VQ_Comp != 0)
{
WARN_LOG(RENDERER, "Warning: planar texture with VQ set (invalid)"); WARN_LOG(RENDERER, "Warning: planar texture with VQ set (invalid)");
tcw.VQ_Comp = 0;
}
if (tcw.MipMapped != 0)
{
WARN_LOG(RENDERER, "Warning: planar texture with mipmaps (invalid)");
tcw.MipMapped = 0;
}
//Planar textures support stride selection, mostly used for non power of 2 textures (videos) //Planar textures support stride selection, mostly used for non power of 2 textures (videos)
int stride = 0; int stride = 0;
@ -455,6 +474,8 @@ void BaseTextureCacheData::Create()
} }
else else
{ {
tcw.ScanOrder = 0;
tcw.StrideSel = 0;
// Quake 3 Arena uses one // Quake 3 Arena uses one
if (tcw.MipMapped) if (tcw.MipMapped)
// Mipmapped texture must be square and TexV is ignored // Mipmapped texture must be square and TexV is ignored
@ -478,6 +499,7 @@ void BaseTextureCacheData::Create()
texconv = tex->TW; texconv = tex->TW;
texconv32 = tex->TW32; texconv32 = tex->TW32;
size = w * h * tex->bpp / 8; size = w * h * tex->bpp / 8;
texconv8 = tex->TW8;
} }
} }
} }
@ -502,9 +524,14 @@ void BaseTextureCacheData::Update()
bool has_alpha = false; bool has_alpha = false;
if (IsPaletted()) if (IsPaletted())
{ {
tex_type = PAL_TYPE[PAL_RAM_CTRL&3]; if (IsGpuHandledPaletted(tsp, tcw))
if (tex_type != TextureType::_565) tex_type = TextureType::_8;
has_alpha = true; else
{
tex_type = PAL_TYPE[PAL_RAM_CTRL&3];
if (tex_type != TextureType::_565)
has_alpha = true;
}
// Get the palette hash to check for future updates // Get the palette hash to check for future updates
if (tcw.PixelFmt == PixelPal4) if (tcw.PixelFmt == PixelPal4)
@ -547,6 +574,7 @@ void BaseTextureCacheData::Update()
PixelBuffer<u16> pb16; PixelBuffer<u16> pb16;
PixelBuffer<u32> pb32; PixelBuffer<u32> pb32;
PixelBuffer<u8> pb8;
// Figure out if we really need to use a 32-bit pixel buffer // Figure out if we really need to use a 32-bit pixel buffer
bool textureUpscaling = settings.rend.TextureUpscale > 1 bool textureUpscaling = settings.rend.TextureUpscale > 1
@ -623,6 +651,26 @@ void BaseTextureCacheData::Update()
} }
temp_tex_buffer = pb32.data(); temp_tex_buffer = pb32.data();
} }
else if (texconv8 != NULL && tex_type == TextureType::_8)
{
if (mipmapped)
{
pb8.init(w, h, true);
for (u32 i = 0; i <= tsp.TexU + 3u; i++)
{
pb8.set_mipmap(i);
u32 vram_addr = sa_tex + OtherMipPoint[i] * tex->bpp / 8;
texconv8(&pb8, &vram[vram_addr], 1 << i, 1 << i);
}
pb8.set_mipmap(0);
}
else
{
pb8.init(w, h);
texconv8(&pb8, &vram[sa], stride, h);
}
temp_tex_buffer = pb8.data();
}
else if (texconv != NULL) else if (texconv != NULL)
{ {
if (mipmapped) if (mipmapped)

View File

@ -16,6 +16,7 @@ extern bool pal_needs_update,fog_needs_update;
extern u32 pal_hash_256[4]; extern u32 pal_hash_256[4];
extern u32 pal_hash_16[64]; extern u32 pal_hash_16[64];
extern bool KillTex; extern bool KillTex;
extern bool palette_updated;
extern u32 detwiddle[2][11][1024]; extern u32 detwiddle[2][11][1024];
@ -151,47 +152,22 @@ void palette_update();
#define ARGB8888_32( word ) ( ((word >> 0) & 0xFF000000) | (((word >> 16) & 0xFF) << 0) | (((word >> 8) & 0xFF) << 8) | ((word & 0xFF) << 16) ) #define ARGB8888_32( word ) ( ((word >> 0) & 0xFF000000) | (((word >> 16) & 0xFF) << 0) | (((word >> 8) & 0xFF) << 8) | ((word & 0xFF) << 16) )
template<class PixelPacker> inline static u32 YUV422(s32 Y,s32 Yu,s32 Yv)
__forceinline u32 YUV422(s32 Y,s32 Yu,s32 Yv)
{ {
Yu-=128; Yu-=128;
Yv-=128; Yv-=128;
//s32 B = (76283*(Y - 16) + 132252*(Yu - 128))>>16;
//s32 G = (76283*(Y - 16) - 53281 *(Yv - 128) - 25624*(Yu - 128))>>16;
//s32 R = (76283*(Y - 16) + 104595*(Yv - 128))>>16;
s32 R = Y + Yv*11/8; // Y + (Yv-128) * (11/8) ? s32 R = Y + Yv*11/8; // Y + (Yv-128) * (11/8) ?
s32 G = Y - (Yu*11 + Yv*22)/32; // Y - (Yu-128) * (11/8) * 0.25 - (Yv-128) * (11/8) * 0.5 ? s32 G = Y - (Yu*11 + Yv*22)/32; // Y - (Yu-128) * (11/8) * 0.25 - (Yv-128) * (11/8) * 0.5 ?
s32 B = Y + Yu*110/64; // Y + (Yu-128) * (11/8) * 1.25 ? s32 B = Y + Yu*110/64; // Y + (Yu-128) * (11/8) * 1.25 ?
return PixelPacker::packRGB(clamp(0,255,R),clamp(0,255,G),clamp(0,255,B)); return clamp(0, 255, R) | (clamp(0, 255, G) << 8) | (clamp(0, 255, B) << 16) | 0xFF000000;
} }
#define twop(x,y,bcx,bcy) (detwiddle[0][bcy][x]+detwiddle[1][bcx][y]) #define twop(x,y,bcx,bcy) (detwiddle[0][bcy][x]+detwiddle[1][bcx][y])
//pixel packers !
struct pp_565
{
__forceinline static u32 packRGB(u8 R,u8 G,u8 B)
{
R>>=3;
G>>=2;
B>>=3;
return (R<<11) | (G<<5) | (B<<0);
}
};
struct pp_8888
{
__forceinline static u32 packRGB(u8 R,u8 G,u8 B)
{
return (R << 0) | (G << 8) | (B << 16) | 0xFF000000;
}
};
//pixel convertors ! //pixel convertors !
#define pixelcvt_start_base(name,x,y,type) template<class PixelPacker> \ #define pixelcvt_start_base(name,x,y,type) \
struct name \ struct name \
{ \ { \
static const u32 xpp=x;\ static const u32 xpp=x;\
@ -202,7 +178,7 @@ struct pp_8888
#define pixelcvt_start(name,x,y) pixelcvt_start_base(name, x, y, u16) #define pixelcvt_start(name,x,y) pixelcvt_start_base(name, x, y, u16)
#define pixelcvt32_start(name,x,y) pixelcvt_start_base(name, x, y, u32) #define pixelcvt32_start(name,x,y) pixelcvt_start_base(name, x, y, u32)
#define pixelcvt_size_start(name, x, y) template<class PixelPacker, class pixel_size> \ #define pixelcvt_size_start(name, x, y) template<class pixel_size> \
struct name \ struct name \
{ \ { \
static const u32 xpp=x;\ static const u32 xpp=x;\
@ -312,9 +288,9 @@ pixelcvt32_start(convYUV_PL,4,1)
s32 Yv = (p_in[0]>>16) &255; //p_in[2] s32 Yv = (p_in[0]>>16) &255; //p_in[2]
//0,0 //0,0
pb->prel(0,YUV422<PixelPacker>(Y0,Yu,Yv)); pb->prel(0,YUV422(Y0,Yu,Yv));
//1,0 //1,0
pb->prel(1,YUV422<PixelPacker>(Y1,Yu,Yv)); pb->prel(1,YUV422(Y1,Yu,Yv));
//next 4 bytes //next 4 bytes
p_in+=1; p_in+=1;
@ -325,9 +301,9 @@ pixelcvt32_start(convYUV_PL,4,1)
Yv = (p_in[0]>>16) &255; //p_in[2] Yv = (p_in[0]>>16) &255; //p_in[2]
//0,0 //0,0
pb->prel(2,YUV422<PixelPacker>(Y0,Yu,Yv)); pb->prel(2,YUV422(Y0,Yu,Yv));
//1,0 //1,0
pb->prel(3,YUV422<PixelPacker>(Y1,Yu,Yv)); pb->prel(3,YUV422(Y1,Yu,Yv));
} }
pixelcvt_end; pixelcvt_end;
@ -432,9 +408,9 @@ pixelcvt32_start(convYUV_TW,2,2)
s32 Yv = (p_in[2]>>0) &255; //p_in[2] s32 Yv = (p_in[2]>>0) &255; //p_in[2]
//0,0 //0,0
pb->prel(0,0,YUV422<PixelPacker>(Y0,Yu,Yv)); pb->prel(0,0,YUV422(Y0,Yu,Yv));
//1,0 //1,0
pb->prel(1,0,YUV422<PixelPacker>(Y1,Yu,Yv)); pb->prel(1,0,YUV422(Y1,Yu,Yv));
//next 4 bytes //next 4 bytes
//p_in+=2; //p_in+=2;
@ -445,9 +421,9 @@ pixelcvt32_start(convYUV_TW,2,2)
Yv = (p_in[3]>>0) &255; //p_in[2] Yv = (p_in[3]>>0) &255; //p_in[2]
//0,1 //0,1
pb->prel(0,1,YUV422<PixelPacker>(Y0,Yu,Yv)); pb->prel(0,1,YUV422(Y0,Yu,Yv));
//1,1 //1,1
pb->prel(1,1,YUV422<PixelPacker>(Y1,Yu,Yv)); pb->prel(1,1,YUV422(Y1,Yu,Yv));
} }
pixelcvt_end; pixelcvt_end;
@ -479,6 +455,33 @@ pixelcvt_size_start(convPAL4_TW,4,4)
} }
pixelcvt_end; pixelcvt_end;
// Palette 4bpp -> 8bpp
pixelcvt_size_start(convPAL4PT_TW, 4, 4)
{
u8* p_in = (u8 *)data;
pb->prel(0, 0, p_in[0] & 0xF);
pb->prel(0, 1, (p_in[0] >> 4) & 0xF); p_in++;
pb->prel(1, 0, p_in[0] & 0xF);
pb->prel(1, 1, (p_in[0] >> 4) & 0xF); p_in++;
pb->prel(0, 2, p_in[0] & 0xF);
pb->prel(0, 3, (p_in[0] >> 4) & 0xF); p_in++;
pb->prel(1, 2, p_in[0] & 0xF);
pb->prel(1, 3, (p_in[0] >> 4) & 0xF); p_in++;
pb->prel(2, 0, p_in[0] & 0xF);
pb->prel(2, 1, (p_in[0] >> 4) & 0xF); p_in++;
pb->prel(3, 0, p_in[0] & 0xF);
pb->prel(3, 1, (p_in[0] >> 4) & 0xF); p_in++;
pb->prel(2, 2, p_in[0] & 0xF);
pb->prel(2, 3, (p_in[0] >> 4) & 0xF); p_in++;
pb->prel(3, 2, p_in[0] & 0xF);
pb->prel(3, 3, (p_in[0] >> 4) & 0xF); p_in++;
}
pixelcvt_end;
pixelcvt_size_start(convPAL8_TW,2,4) pixelcvt_size_start(convPAL8_TW,2,4)
{ {
u8* p_in=(u8*)data; u8* p_in=(u8*)data;
@ -496,6 +499,24 @@ pixelcvt_size_start(convPAL8_TW,2,4)
} }
pixelcvt_end; pixelcvt_end;
// Palette 8bpp -> 8bpp (untwiddle only)
pixelcvt_size_start(convPAL8PT_TW, 2, 4)
{
u8* p_in = (u8 *)data;
pb->prel(0, 0, p_in[0]); p_in++;
pb->prel(0, 1, p_in[0]); p_in++;
pb->prel(1, 0, p_in[0]); p_in++;
pb->prel(1, 1, p_in[0]); p_in++;
pb->prel(0, 2, p_in[0]); p_in++;
pb->prel(0, 3, p_in[0]); p_in++;
pb->prel(1, 2, p_in[0]); p_in++;
pb->prel(1, 3, p_in[0]); p_in++;
}
pixelcvt_end;
//handler functions //handler functions
template<class PixelConvertor, class pixel_type> template<class PixelConvertor, class pixel_type>
void texture_PL(PixelBuffer<pixel_type>* pb,u8* p_in,u32 Width,u32 Height) void texture_PL(PixelBuffer<pixel_type>* pb,u8* p_in,u32 Width,u32 Height)
@ -565,74 +586,51 @@ void texture_VQ(PixelBuffer<pixel_type>* pb,u8* p_in,u32 Width,u32 Height)
} }
} }
//We ask the compiler to generate the templates here
//;)
//planar formats !
template void texture_PL<conv565_PL<pp_565>, u16>(PixelBuffer<u16>* pb,u8* p_in,u32 Width,u32 Height);
template void texture_PL<conv1555_PL<pp_565>, u16>(PixelBuffer<u16>* pb,u8* p_in,u32 Width,u32 Height);
template void texture_PL<conv4444_PL<pp_565>, u16>(PixelBuffer<u16>* pb,u8* p_in,u32 Width,u32 Height);
template void texture_PL<convYUV_PL<pp_8888>, u32>(PixelBuffer<u32>* pb,u8* p_in,u32 Width,u32 Height);
//twiddled formats !
template void texture_TW<conv565_TW<pp_565>, u16>(PixelBuffer<u16>* pb,u8* p_in,u32 Width,u32 Height);
template void texture_TW<conv1555_TW<pp_565>, u16>(PixelBuffer<u16>* pb,u8* p_in,u32 Width,u32 Height);
template void texture_TW<conv4444_TW<pp_565>, u16>(PixelBuffer<u16>* pb,u8* p_in,u32 Width,u32 Height);
template void texture_TW<convYUV_TW<pp_8888>, u32>(PixelBuffer<u32>* pb,u8* p_in,u32 Width,u32 Height);
template void texture_TW<convPAL4_TW<pp_565, u16>, u16>(PixelBuffer<u16>* pb,u8* p_in,u32 Width,u32 Height);
template void texture_TW<convPAL8_TW<pp_565, u16>, u16>(PixelBuffer<u16>* pb,u8* p_in,u32 Width,u32 Height);
template void texture_TW<convPAL4_TW<pp_8888, u32>, u32>(PixelBuffer<u32>* pb,u8* p_in,u32 Width,u32 Height);
template void texture_TW<convPAL8_TW<pp_8888, u32>, u32>(PixelBuffer<u32>* pb,u8* p_in,u32 Width,u32 Height);
//VQ formats !
template void texture_VQ<conv565_TW<pp_565>, u16>(PixelBuffer<u16>* pb,u8* p_in,u32 Width,u32 Height);
template void texture_VQ<conv1555_TW<pp_565>, u16>(PixelBuffer<u16>* pb,u8* p_in,u32 Width,u32 Height);
template void texture_VQ<conv4444_TW<pp_565>, u16>(PixelBuffer<u16>* pb,u8* p_in,u32 Width,u32 Height);
template void texture_VQ<convYUV_TW<pp_8888>, u32>(PixelBuffer<u32>* pb,u8* p_in,u32 Width,u32 Height);
//Planar //Planar
#define tex565_PL texture_PL<conv565_PL<pp_565>, u16> #define tex565_PL texture_PL<conv565_PL, u16>
#define tex1555_PL texture_PL<conv1555_PL<pp_565>, u16> #define tex1555_PL texture_PL<conv1555_PL, u16>
#define tex4444_PL texture_PL<conv4444_PL<pp_565>, u16> #define tex4444_PL texture_PL<conv4444_PL, u16>
#define texYUV422_PL texture_PL<convYUV_PL<pp_8888>, u32> #define texYUV422_PL texture_PL<convYUV_PL, u32>
#define texBMP_PL tex4444_PL #define texBMP_PL tex4444_PL
#define tex565_PL32 texture_PL<conv565_PL32<pp_8888>, u32> #define tex565_PL32 texture_PL<conv565_PL32, u32>
#define tex1555_PL32 texture_PL<conv1555_PL32<pp_8888>, u32> #define tex1555_PL32 texture_PL<conv1555_PL32, u32>
#define tex4444_PL32 texture_PL<conv4444_PL32<pp_8888>, u32> #define tex4444_PL32 texture_PL<conv4444_PL32, u32>
//Twiddle //Twiddle
#define tex565_TW texture_TW<conv565_TW<pp_565>, u16> #define tex565_TW texture_TW<conv565_TW, u16>
#define tex1555_TW texture_TW<conv1555_TW<pp_565>, u16> #define tex1555_TW texture_TW<conv1555_TW, u16>
#define tex4444_TW texture_TW<conv4444_TW<pp_565>, u16> #define tex4444_TW texture_TW<conv4444_TW, u16>
#define texYUV422_TW texture_TW<convYUV_TW<pp_8888>, u32> #define texYUV422_TW texture_TW<convYUV_TW, u32>
#define texBMP_TW tex4444_TW #define texBMP_TW tex4444_TW
#define texPAL4_TW texture_TW<convPAL4_TW<pp_565, u16>, u16> #define texPAL4_TW texture_TW<convPAL4_TW<u16>, u16>
#define texPAL8_TW texture_TW<convPAL8_TW<pp_565, u16>, u16> #define texPAL8_TW texture_TW<convPAL8_TW<u16>, u16>
#define texPAL4_TW32 texture_TW<convPAL4_TW<pp_8888, u32>, u32> #define texPAL4_TW32 texture_TW<convPAL4_TW<u32>, u32>
#define texPAL8_TW32 texture_TW<convPAL8_TW<pp_8888, u32>, u32> #define texPAL8_TW32 texture_TW<convPAL8_TW<u32>, u32>
#define texPAL4PT_TW texture_TW<convPAL4PT_TW<u8>, u8>
#define texPAL8PT_TW texture_TW<convPAL8PT_TW<u8>, u8>
#define tex565_TW32 texture_TW<conv565_TW32<pp_8888>, u32> #define tex565_TW32 texture_TW<conv565_TW32, u32>
#define tex1555_TW32 texture_TW<conv1555_TW32<pp_8888>, u32> #define tex1555_TW32 texture_TW<conv1555_TW32, u32>
#define tex4444_TW32 texture_TW<conv4444_TW32<pp_8888>, u32> #define tex4444_TW32 texture_TW<conv4444_TW32, u32>
//VQ //VQ
#define tex565_VQ texture_VQ<conv565_TW<pp_565>, u16> #define tex565_VQ texture_VQ<conv565_TW, u16>
#define tex1555_VQ texture_VQ<conv1555_TW<pp_565>, u16> #define tex1555_VQ texture_VQ<conv1555_TW, u16>
#define tex4444_VQ texture_VQ<conv4444_TW<pp_565>, u16> #define tex4444_VQ texture_VQ<conv4444_TW, u16>
#define texYUV422_VQ texture_VQ<convYUV_TW<pp_8888>, u32> #define texYUV422_VQ texture_VQ<convYUV_TW, u32>
#define texBMP_VQ tex4444_VQ #define texBMP_VQ tex4444_VQ
// According to the documentation, a texture cannot be compressed and use // According to the documentation, a texture cannot be compressed and use
// a palette at the same time. However the hardware displays them // a palette at the same time. However the hardware displays them
// just fine. // just fine.
#define texPAL4_VQ texture_VQ<convPAL4_TW<pp_565, u16>, u16> #define texPAL4_VQ texture_VQ<convPAL4_TW<u16>, u16>
#define texPAL8_VQ texture_VQ<convPAL8_TW<pp_565, u16>, u16> #define texPAL8_VQ texture_VQ<convPAL8_TW<u16>, u16>
#define tex565_VQ32 texture_VQ<conv565_TW32<pp_8888>, u32> #define tex565_VQ32 texture_VQ<conv565_TW32, u32>
#define tex1555_VQ32 texture_VQ<conv1555_TW32<pp_8888>, u32> #define tex1555_VQ32 texture_VQ<conv1555_TW32, u32>
#define tex4444_VQ32 texture_VQ<conv4444_TW32<pp_8888>, u32> #define tex4444_VQ32 texture_VQ<conv4444_TW32, u32>
#define texPAL4_VQ32 texture_VQ<convPAL4_TW<pp_8888, u32>, u32> #define texPAL4_VQ32 texture_VQ<convPAL4_TW<u32>, u32>
#define texPAL8_VQ32 texture_VQ<convPAL8_TW<pp_8888, u32>, u32> #define texPAL8_VQ32 texture_VQ<convPAL8_TW<u32>, u32>
bool VramLockedWriteOffset(size_t offset); bool VramLockedWriteOffset(size_t offset);
void UpscalexBRZ(int factor, u32* source, u32* dest, int width, int height, bool has_alpha); void UpscalexBRZ(int factor, u32* source, u32* dest, int width, int height, bool has_alpha);
@ -640,6 +638,7 @@ void UpscalexBRZ(int factor, u32* source, u32* dest, int width, int height, bool
struct PvrTexInfo; struct PvrTexInfo;
template <class pixel_type> class PixelBuffer; template <class pixel_type> class PixelBuffer;
typedef void TexConvFP(PixelBuffer<u16>* pb,u8* p_in,u32 Width,u32 Height); typedef void TexConvFP(PixelBuffer<u16>* pb,u8* p_in,u32 Width,u32 Height);
typedef void TexConvFP8(PixelBuffer<u8>* pb, u8* p_in, u32 Width, u32 Height);
typedef void TexConvFP32(PixelBuffer<u32>* pb,u8* p_in,u32 Width,u32 Height); typedef void TexConvFP32(PixelBuffer<u32>* pb,u8* p_in,u32 Width,u32 Height);
enum class TextureType { _565, _5551, _4444, _8888, _8 }; enum class TextureType { _565, _5551, _4444, _8888, _8 };
@ -660,8 +659,9 @@ public:
u32 size; //size, in bytes, in vram u32 size; //size, in bytes, in vram
const PvrTexInfo* tex; const PvrTexInfo* tex;
TexConvFP* texconv; TexConvFP *texconv;
TexConvFP32* texconv32; TexConvFP32 *texconv32;
TexConvFP8 *texconv8;
u32 dirty; u32 dirty;
vram_block* lock_block; vram_block* lock_block;
@ -722,6 +722,18 @@ public:
bool NeedsUpdate(); bool NeedsUpdate();
virtual bool Delete(); virtual bool Delete();
virtual ~BaseTextureCacheData() {} virtual ~BaseTextureCacheData() {}
static bool IsGpuHandledPaletted(TSP tsp, TCW tcw)
{
// Some palette textures are handled on the GPU
// This is currently limited to textures using nearest filtering and not mipmapped.
// Enabling texture upscaling or dumping also disables this mode.
return (tcw.PixelFmt == PixelPal4 || tcw.PixelFmt == PixelPal8)
&& settings.rend.TextureUpscale == 1
&& !settings.rend.DumpTextures
&& tsp.FilterMode == 0
&& !tcw.MipMapped
&& !tcw.VQ_Comp;
}
}; };
template<typename Texture> template<typename Texture>
@ -732,12 +744,13 @@ public:
Texture *getTextureCacheData(TSP tsp, TCW tcw) Texture *getTextureCacheData(TSP tsp, TCW tcw)
{ {
u64 key = tsp.full & TSPTextureCacheMask.full; u64 key = tsp.full & TSPTextureCacheMask.full;
if (tcw.PixelFmt == PixelPal4 || tcw.PixelFmt == PixelPal8) if ((tcw.PixelFmt == PixelPal4 || tcw.PixelFmt == PixelPal8)
&& !BaseTextureCacheData::IsGpuHandledPaletted(tsp, tcw))
// Paletted textures have a palette selection that must be part of the key // Paletted textures have a palette selection that must be part of the key
// We also add the palette type to the key to avoid thrashing the cache // We also add the palette type to the key to avoid thrashing the cache
// when the palette type is changed. If the palette type is changed back in the future, // when the palette type is changed. If the palette type is changed back in the future,
// this texture will stil be available. // this texture will stil be available.
key |= ((u64)tcw.full << 32) | ((PAL_RAM_CTRL & 3) << 6); key |= ((u64)tcw.full << 32) | ((PAL_RAM_CTRL & 3) << 6) | ((tsp.FilterMode != 0) << 8);
else else
key |= (u64)(tcw.full & TCWTextureCacheMask.full) << 32; key |= (u64)(tcw.full & TCWTextureCacheMask.full) << 32;
@ -817,6 +830,7 @@ static inline void MakeFogTexture(u8 *tex_data)
tex_data[i + 128] = fog_table[i * 4 + 1]; tex_data[i + 128] = fog_table[i * 4 + 1];
} }
} }
void dump_screenshot(u8 *buffer, u32 width, u32 height, bool alpha = false, u32 rowPitch = 0, bool invertY = true); void dump_screenshot(u8 *buffer, u32 width, u32 height, bool alpha = false, u32 rowPitch = 0, bool invertY = true);
extern const std::array<f32, 16> D_Adjust_LoD_Bias; extern const std::array<f32, 16> D_Adjust_LoD_Bias;

View File

@ -25,6 +25,7 @@ struct gl4PipelineShader
GLint trilinear_alpha; GLint trilinear_alpha;
GLint fog_clamp_min, fog_clamp_max; GLint fog_clamp_min, fog_clamp_max;
GLint normal_matrix; GLint normal_matrix;
GLint palette_index;
bool cp_AlphaTest; bool cp_AlphaTest;
bool pp_InsideClipping; bool pp_InsideClipping;
@ -39,6 +40,7 @@ struct gl4PipelineShader
bool pp_Gouraud; bool pp_Gouraud;
bool pp_BumpMap; bool pp_BumpMap;
bool fog_clamping; bool fog_clamping;
bool palette;
}; };
@ -90,8 +92,8 @@ extern GLuint depth_fbo;
\n\ \n\
layout(r32ui, binding = 4) uniform coherent restrict uimage2D abufferPointerImg; \n\ layout(r32ui, binding = 4) uniform coherent restrict uimage2D abufferPointerImg; \n\
struct Pixel { \n\ struct Pixel { \n\
highp vec4 color; \n\ vec4 color; \n\
highp float depth; \n\ float depth; \n\
uint seq_num; \n\ uint seq_num; \n\
uint next; \n\ uint next; \n\
}; \n\ }; \n\
@ -122,7 +124,7 @@ uint getNextPixelIndex() \n\
\n\ \n\
void setFragDepth(void) \n\ void setFragDepth(void) \n\
{ \n\ { \n\
highp float w = 100000.0 * gl_FragCoord.w; \n\ float w = 100000.0 * gl_FragCoord.w; \n\
gl_FragDepth = log2(1.0 + w) / 34.0; \n\ gl_FragDepth = log2(1.0 + w) / 34.0; \n\
} \n\ } \n\
struct PolyParam { \n\ struct PolyParam { \n\
@ -255,6 +257,7 @@ extern struct gl4ShaderUniforms_t
int width; int width;
int height; int height;
} base_clipping; } base_clipping;
float palette_index;
void setUniformArray(GLint location, int v0, int v1) void setUniformArray(GLint location, int v0, int v1)
{ {
@ -309,6 +312,9 @@ extern struct gl4ShaderUniforms_t
if (s->normal_matrix != -1) if (s->normal_matrix != -1)
glUniformMatrix4fv(s->normal_matrix, 1, GL_FALSE, &normal_mat[0][0]); glUniformMatrix4fv(s->normal_matrix, 1, GL_FALSE, &normal_mat[0][0]);
if (s->palette_index != -1)
glUniform1f(s->palette_index, palette_index);
} }
} gl4ShaderUniforms; } gl4ShaderUniforms;

View File

@ -24,7 +24,8 @@ GLuint depthSaveTexId;
static gl4PipelineShader *gl4GetProgram(bool cp_AlphaTest, bool pp_InsideClipping, static gl4PipelineShader *gl4GetProgram(bool cp_AlphaTest, bool pp_InsideClipping,
bool pp_Texture, bool pp_UseAlpha, bool pp_IgnoreTexA, u32 pp_ShadInstr, bool pp_Offset, bool pp_Texture, bool pp_UseAlpha, bool pp_IgnoreTexA, u32 pp_ShadInstr, bool pp_Offset,
u32 pp_FogCtrl, bool pp_TwoVolumes, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping, Pass pass) u32 pp_FogCtrl, bool pp_TwoVolumes, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping,
bool palette, Pass pass)
{ {
u32 rv=0; u32 rv=0;
@ -40,6 +41,7 @@ static gl4PipelineShader *gl4GetProgram(bool cp_AlphaTest, bool pp_InsideClippin
rv <<= 1; rv |= pp_Gouraud; rv <<= 1; rv |= pp_Gouraud;
rv <<= 1; rv |= pp_BumpMap; rv <<= 1; rv |= pp_BumpMap;
rv <<= 1; rv |= fog_clamping; rv <<= 1; rv |= fog_clamping;
rv <<= 1; rv |= palette;
rv <<= 2; rv |= (int)pass; rv <<= 2; rv |= (int)pass;
gl4PipelineShader *shader = &gl4.shaders[rv]; gl4PipelineShader *shader = &gl4.shaders[rv];
@ -57,6 +59,7 @@ static gl4PipelineShader *gl4GetProgram(bool cp_AlphaTest, bool pp_InsideClippin
shader->pp_Gouraud = pp_Gouraud; shader->pp_Gouraud = pp_Gouraud;
shader->pp_BumpMap = pp_BumpMap; shader->pp_BumpMap = pp_BumpMap;
shader->fog_clamping = fog_clamping; shader->fog_clamping = fog_clamping;
shader->palette = palette;
shader->pass = pass; shader->pass = pass;
gl4CompilePipelineShader(shader); gl4CompilePipelineShader(shader);
} }
@ -99,6 +102,7 @@ static void SetGPState(const PolyParam* gp)
int clip_rect[4] = {}; int clip_rect[4] = {};
TileClipping clipmode = GetTileClip(gp->tileclip, ViewportMatrix, clip_rect); TileClipping clipmode = GetTileClip(gp->tileclip, ViewportMatrix, clip_rect);
bool palette = false;
if (pass == Pass::Depth) if (pass == Pass::Depth)
{ {
@ -114,6 +118,7 @@ static void SetGPState(const PolyParam* gp)
false, false,
false, false,
false, false,
false,
pass); pass);
} }
else else
@ -123,6 +128,7 @@ static void SetGPState(const PolyParam* gp)
bool color_clamp = gp->tsp.ColorClamp && (pvrrc.fog_clamp_min != 0 || pvrrc.fog_clamp_max != 0xffffffff); bool color_clamp = gp->tsp.ColorClamp && (pvrrc.fog_clamp_min != 0 || pvrrc.fog_clamp_max != 0xffffffff);
int fog_ctrl = settings.rend.Fog ? gp->tsp.FogCtrl : 2; int fog_ctrl = settings.rend.Fog ? gp->tsp.FogCtrl : 2;
palette = BaseTextureCacheData::IsGpuHandledPaletted(gp->tsp, gp->tcw);
CurrentShader = gl4GetProgram(Type == ListType_Punch_Through ? 1 : 0, CurrentShader = gl4GetProgram(Type == ListType_Punch_Through ? 1 : 0,
clipmode == TileClipping::Inside, clipmode == TileClipping::Inside,
@ -136,10 +142,19 @@ static void SetGPState(const PolyParam* gp)
gp->pcw.Gouraud, gp->pcw.Gouraud,
gp->tcw.PixelFmt == PixelBumpMap, gp->tcw.PixelFmt == PixelBumpMap,
color_clamp, color_clamp,
palette,
pass); pass);
} }
glcache.UseProgram(CurrentShader->program); glcache.UseProgram(CurrentShader->program);
if (palette)
{
if (gp->tcw.PixelFmt == PixelPal4)
gl4ShaderUniforms.palette_index = float(gp->tcw.PalSelect << 4) / 1023.f;
else
gl4ShaderUniforms.palette_index = float((gp->tcw.PalSelect >> 4) << 8) / 1023.f;
}
gl4ShaderUniforms.tsp0 = gp->tsp; gl4ShaderUniforms.tsp0 = gp->tsp;
gl4ShaderUniforms.tsp1 = gp->tsp1; gl4ShaderUniforms.tsp1 = gp->tsp1;
gl4ShaderUniforms.tcw0 = gp->tcw; gl4ShaderUniforms.tcw0 = gp->tcw;
@ -671,6 +686,7 @@ static void gl4_draw_quad_texture(GLuint texture, float w, float h)
false, false,
false, false,
false, false,
false,
Pass::Color); Pass::Color);
glcache.UseProgram(CurrentShader->program); glcache.UseProgram(CurrentShader->program);
gl4ShaderUniforms.Set(CurrentShader); gl4ShaderUniforms.Set(CurrentShader);

View File

@ -14,23 +14,23 @@ static const char* VertexShaderSource = R"(#version 140
#endif #endif
/* Vertex constants*/ /* Vertex constants*/
uniform highp vec4 scale; uniform vec4 scale;
uniform highp mat4 normal_matrix; uniform mat4 normal_matrix;
/* Vertex input */ /* Vertex input */
in highp vec4 in_pos; in vec4 in_pos;
in lowp vec4 in_base; in vec4 in_base;
in lowp vec4 in_offs; in vec4 in_offs;
in mediump vec2 in_uv; in vec2 in_uv;
in lowp vec4 in_base1; in vec4 in_base1;
in lowp vec4 in_offs1; in vec4 in_offs1;
in mediump vec2 in_uv1; in vec2 in_uv1;
/* output */ /* output */
INTERPOLATION out lowp vec4 vtx_base; INTERPOLATION out vec4 vtx_base;
INTERPOLATION out lowp vec4 vtx_offs; INTERPOLATION out vec4 vtx_offs;
out mediump vec2 vtx_uv; out vec2 vtx_uv;
INTERPOLATION out lowp vec4 vtx_base1; INTERPOLATION out vec4 vtx_base1;
INTERPOLATION out lowp vec4 vtx_offs1; INTERPOLATION out vec4 vtx_offs1;
out mediump vec2 vtx_uv1; out vec2 vtx_uv1;
void main() void main()
{ {
vtx_base = in_base; vtx_base = in_base;
@ -62,6 +62,7 @@ R"(
#define pp_Gouraud %d #define pp_Gouraud %d
#define pp_BumpMap %d #define pp_BumpMap %d
#define FogClamping %d #define FogClamping %d
#define pp_Palette %d
#define PASS %d #define PASS %d
#define PI 3.1415926 #define PI 3.1415926
@ -86,19 +87,21 @@ out vec4 FragColor;
#endif #endif
/* Shader program params*/ /* Shader program params*/
uniform lowp float cp_AlphaTestValue; uniform float cp_AlphaTestValue;
uniform lowp vec4 pp_ClipTest; uniform vec4 pp_ClipTest;
uniform lowp vec3 sp_FOG_COL_RAM,sp_FOG_COL_VERT; uniform vec3 sp_FOG_COL_RAM,sp_FOG_COL_VERT;
uniform highp float sp_FOG_DENSITY; uniform float sp_FOG_DENSITY;
uniform highp float shade_scale_factor; uniform float shade_scale_factor;
uniform sampler2D tex0, tex1; uniform sampler2D tex0, tex1;
layout(binding = 5) uniform sampler2D fog_table; layout(binding = 5) uniform sampler2D fog_table;
uniform int pp_Number; uniform int pp_Number;
uniform usampler2D shadow_stencil; uniform usampler2D shadow_stencil;
uniform sampler2D DepthTex; uniform sampler2D DepthTex;
uniform lowp float trilinear_alpha; uniform float trilinear_alpha;
uniform lowp vec4 fog_clamp_min; uniform vec4 fog_clamp_min;
uniform lowp vec4 fog_clamp_max; uniform vec4 fog_clamp_max;
uniform sampler2D palette;
uniform float palette_index;
uniform ivec2 blend_mode[2]; uniform ivec2 blend_mode[2];
#if pp_TwoVolumes == 1 #if pp_TwoVolumes == 1
@ -109,24 +112,24 @@ uniform int fog_control[2];
#endif #endif
/* Vertex input*/ /* Vertex input*/
INTERPOLATION in lowp vec4 vtx_base; INTERPOLATION in vec4 vtx_base;
INTERPOLATION in lowp vec4 vtx_offs; INTERPOLATION in vec4 vtx_offs;
in mediump vec2 vtx_uv; in vec2 vtx_uv;
INTERPOLATION in lowp vec4 vtx_base1; INTERPOLATION in vec4 vtx_base1;
INTERPOLATION in lowp vec4 vtx_offs1; INTERPOLATION in vec4 vtx_offs1;
in mediump vec2 vtx_uv1; in vec2 vtx_uv1;
lowp float fog_mode2(highp float w) float fog_mode2(float w)
{ {
highp float z = clamp(w * sp_FOG_DENSITY, 1.0, 255.9999); float z = clamp(w * sp_FOG_DENSITY, 1.0, 255.9999);
highp float exp = floor(log2(z)); float exp = floor(log2(z));
highp float m = z * 16.0 / pow(2.0, exp) - 16.0; float m = z * 16.0 / pow(2.0, exp) - 16.0;
float idx = floor(m) + exp * 16.0 + 0.5; float idx = floor(m) + exp * 16.0 + 0.5;
vec4 fog_coef = texture(fog_table, vec2(idx / 128.0, 0.75 - (m - floor(m)) / 2.0)); vec4 fog_coef = texture(fog_table, vec2(idx / 128.0, 0.75 - (m - floor(m)) / 2.0));
return fog_coef.r; return fog_coef.r;
} }
highp vec4 fog_clamp(highp vec4 col) vec4 fog_clamp(vec4 col)
{ {
#if FogClamping == 1 #if FogClamping == 1
return clamp(col, fog_clamp_min, fog_clamp_max); return clamp(col, fog_clamp_min, fog_clamp_max);
@ -135,13 +138,23 @@ highp vec4 fog_clamp(highp vec4 col)
#endif #endif
} }
#if pp_Palette == 1
vec4 palettePixel(sampler2D tex, vec2 coords)
{
vec4 c = vec4(texture(tex, coords).r * 255.0 / 1023.0 + palette_index, 0.5, 0.0, 0.0);
return texture(palette, c.xy);
}
#endif
void main() void main()
{ {
setFragDepth(); setFragDepth();
#if PASS == PASS_OIT #if PASS == PASS_OIT
// Manual depth testing // Manual depth testing
highp float frontDepth = texture(DepthTex, gl_FragCoord.xy / textureSize(DepthTex, 0)).r; float frontDepth = texture(DepthTex, gl_FragCoord.xy / textureSize(DepthTex, 0)).r;
if (gl_FragDepth < frontDepth) if (gl_FragDepth < frontDepth)
discard; discard;
#endif #endif
@ -153,9 +166,9 @@ void main()
discard; discard;
#endif #endif
highp vec4 color = vtx_base; vec4 color = vtx_base;
lowp vec4 offset = vtx_offs; vec4 offset = vtx_offs;
mediump vec2 uv = vtx_uv; vec2 uv = vtx_uv;
bool area1 = false; bool area1 = false;
ivec2 cur_blend_mode = blend_mode[0]; ivec2 cur_blend_mode = blend_mode[0];
@ -190,14 +203,22 @@ void main()
#endif #endif
#if pp_Texture==1 #if pp_Texture==1
{ {
highp vec4 texcol; vec4 texcol;
if (area1) #if pp_Palette == 0
texcol = texture(tex1, uv); if (area1)
else texcol = texture(tex1, uv);
texcol = texture(tex0, uv); else
texcol = texture(tex0, uv);
#else
if (area1)
texcol = palettePixel(tex1, uv);
else
texcol = palettePixel(tex0, uv);
#endif
#if pp_BumpMap == 1 #if pp_BumpMap == 1
highp float s = PI / 2.0 * (texcol.a * 15.0 * 16.0 + texcol.r * 15.0) / 255.0; float s = PI / 2.0 * (texcol.a * 15.0 * 16.0 + texcol.r * 15.0) / 255.0;
highp float r = 2.0 * PI * (texcol.g * 15.0 * 16.0 + texcol.b * 15.0) / 255.0; float r = 2.0 * PI * (texcol.g * 15.0 * 16.0 + texcol.b * 15.0) / 255.0;
texcol.a = clamp(vtx_offs.a + vtx_offs.r * sin(s) + vtx_offs.g * cos(s) * cos(r - 2.0 * PI * vtx_offs.b), 0.0, 1.0); texcol.a = clamp(vtx_offs.a + vtx_offs.r * sin(s) + vtx_offs.g * cos(s) * cos(r - 2.0 * PI * vtx_offs.b), 0.0, 1.0);
texcol.rgb = vec3(1.0, 1.0, 1.0); texcol.rgb = vec3(1.0, 1.0, 1.0);
#else #else
@ -370,7 +391,8 @@ bool gl4CompilePipelineShader( gl4PipelineShader* s, const char *pixel_source /*
sprintf(pshader, pixel_source, sprintf(pshader, pixel_source,
s->cp_AlphaTest, s->pp_InsideClipping, s->pp_UseAlpha, s->cp_AlphaTest, s->pp_InsideClipping, s->pp_UseAlpha,
s->pp_Texture, s->pp_IgnoreTexA, s->pp_ShadInstr, s->pp_Offset, s->pp_FogCtrl, s->pp_Texture, s->pp_IgnoreTexA, s->pp_ShadInstr, s->pp_Offset, s->pp_FogCtrl,
s->pp_TwoVolumes, s->pp_Gouraud, s->pp_BumpMap, s->fog_clamping, (int)s->pass); s->pp_TwoVolumes, s->pp_Gouraud, s->pp_BumpMap, s->fog_clamping, s->palette,
(int)s->pass);
s->program = gl_CompileAndLink(vshader, pshader); s->program = gl_CompileAndLink(vshader, pshader);
@ -437,6 +459,11 @@ bool gl4CompilePipelineShader( gl4PipelineShader* s, const char *pixel_source /*
s->shading_instr = glGetUniformLocation(s->program, "shading_instr"); s->shading_instr = glGetUniformLocation(s->program, "shading_instr");
s->fog_control = glGetUniformLocation(s->program, "fog_control"); s->fog_control = glGetUniformLocation(s->program, "fog_control");
gu = glGetUniformLocation(s->program, "palette");
if (gu != -1)
glUniform1i(gu, 6); // GL_TEXTURE6
s->palette_index = glGetUniformLocation(s->program, "palette_index");
return glIsProgram(s->program)==GL_TRUE; return glIsProgram(s->program)==GL_TRUE;
} }
@ -657,6 +684,11 @@ static bool RenderFrame()
fog_needs_update = false; fog_needs_update = false;
UpdateFogTexture((u8 *)FOG_TABLE, GL_TEXTURE5, GL_RED); UpdateFogTexture((u8 *)FOG_TABLE, GL_TEXTURE5, GL_RED);
} }
if (palette_updated)
{
UpdatePaletteTexture(GL_TEXTURE6);
palette_updated = false;
}
glcache.UseProgram(gl4.modvol_shader.program); glcache.UseProgram(gl4.modvol_shader.program);

View File

@ -120,6 +120,7 @@ __forceinline
int clip_rect[4] = {}; int clip_rect[4] = {};
TileClipping clipmode = GetTileClip(gp->tileclip, ViewportMatrix, clip_rect); TileClipping clipmode = GetTileClip(gp->tileclip, ViewportMatrix, clip_rect);
bool palette = BaseTextureCacheData::IsGpuHandledPaletted(gp->tsp, gp->tcw);
CurrentShader = GetProgram(Type == ListType_Punch_Through ? 1 : 0, CurrentShader = GetProgram(Type == ListType_Punch_Through ? 1 : 0,
clipmode == TileClipping::Inside, clipmode == TileClipping::Inside,
@ -132,11 +133,20 @@ __forceinline
gp->pcw.Gouraud, gp->pcw.Gouraud,
gp->tcw.PixelFmt == PixelBumpMap, gp->tcw.PixelFmt == PixelBumpMap,
color_clamp, color_clamp,
ShaderUniforms.trilinear_alpha != 1.f); ShaderUniforms.trilinear_alpha != 1.f,
palette);
glcache.UseProgram(CurrentShader->program); glcache.UseProgram(CurrentShader->program);
if (CurrentShader->trilinear_alpha != -1) if (CurrentShader->trilinear_alpha != -1)
glUniform1f(CurrentShader->trilinear_alpha, ShaderUniforms.trilinear_alpha); glUniform1f(CurrentShader->trilinear_alpha, ShaderUniforms.trilinear_alpha);
if (palette)
{
if (gp->tcw.PixelFmt == PixelPal4)
ShaderUniforms.palette_index = float(gp->tcw.PalSelect << 4) / 1023.f;
else
ShaderUniforms.palette_index = float((gp->tcw.PalSelect >> 4) << 8) / 1023.f;
glUniform1f(CurrentShader->palette_index, ShaderUniforms.palette_index);
}
if (clipmode == TileClipping::Inside) if (clipmode == TileClipping::Inside)
glUniform4f(CurrentShader->pp_ClipTest, clip_rect[0], clip_rect[1], clip_rect[0] + clip_rect[2], clip_rect[1] + clip_rect[3]); glUniform4f(CurrentShader->pp_ClipTest, clip_rect[0], clip_rect[1], clip_rect[0] + clip_rect[2], clip_rect[1] + clip_rect[3]);
@ -160,7 +170,7 @@ __forceinline
SetTextureRepeatMode(GL_TEXTURE_WRAP_T, gp->tsp.ClampV, gp->tsp.FlipV); SetTextureRepeatMode(GL_TEXTURE_WRAP_T, gp->tsp.ClampV, gp->tsp.FlipV);
//set texture filter mode //set texture filter mode
if (gp->tsp.FilterMode == 0) if (gp->tsp.FilterMode == 0 || palette)
{ {
//disable filtering, mipmaps //disable filtering, mipmaps
glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
@ -643,7 +653,7 @@ static void DrawQuad(GLuint texId, float x, float y, float w, float h, float u0,
ShaderUniforms.trilinear_alpha = 1.0; ShaderUniforms.trilinear_alpha = 1.0;
PipelineShader *shader = GetProgram(0, false, 1, 0, 1, 0, 0, 2, false, false, false, false); PipelineShader *shader = GetProgram(0, false, 1, 0, 1, 0, 0, 2, false, false, false, false, false);
glcache.UseProgram(shader->program); glcache.UseProgram(shader->program);
glActiveTexture(GL_TEXTURE0); glActiveTexture(GL_TEXTURE0);

View File

@ -106,6 +106,8 @@ R"(%s
#define pp_BumpMap %d #define pp_BumpMap %d
#define FogClamping %d #define FogClamping %d
#define pp_TriLinear %d #define pp_TriLinear %d
#define pp_Palette %d
#define PI 3.1415926 #define PI 3.1415926
#define GLES2 0 #define GLES2 0
@ -152,6 +154,9 @@ uniform sampler2D tex,fog_table;
uniform lowp float trilinear_alpha; uniform lowp float trilinear_alpha;
uniform lowp vec4 fog_clamp_min; uniform lowp vec4 fog_clamp_min;
uniform lowp vec4 fog_clamp_max; uniform lowp vec4 fog_clamp_max;
uniform sampler2D palette;
uniform mediump float palette_index;
/* Vertex input*/ /* Vertex input*/
INTERPOLATION in lowp vec4 vtx_base; INTERPOLATION in lowp vec4 vtx_base;
INTERPOLATION in lowp vec4 vtx_offs; INTERPOLATION in lowp vec4 vtx_offs;
@ -183,6 +188,16 @@ highp vec4 fog_clamp(lowp vec4 col)
#endif #endif
} }
#if pp_Palette == 1
lowp vec4 palettePixel(highp vec2 coords)
{
highp vec4 c = vec4(texture(tex, coords).FOG_CHANNEL * 255.0 / 1023.0 + palette_index, 0.5, 0.0, 0.0);
return texture(palette, c.xy);
}
#endif
void main() void main()
{ {
// Clip inside the box // Clip inside the box
@ -201,7 +216,11 @@ void main()
#endif #endif
#if pp_Texture==1 #if pp_Texture==1
{ {
lowp vec4 texcol=texture(tex, vtx_uv); #if pp_Palette == 0
lowp vec4 texcol = texture(tex, vtx_uv);
#else
lowp vec4 texcol = palettePixel(vtx_uv);
#endif
#if pp_BumpMap == 1 #if pp_BumpMap == 1
highp float s = PI / 2.0 * (texcol.a * 15.0 * 16.0 + texcol.r * 15.0) / 255.0; highp float s = PI / 2.0 * (texcol.a * 15.0 * 16.0 + texcol.r * 15.0) / 255.0;
@ -387,6 +406,7 @@ gl_ctx gl;
int screen_width; int screen_width;
int screen_height; int screen_height;
GLuint fogTextureId; GLuint fogTextureId;
GLuint paletteTextureId;
glm::mat4 ViewportMatrix; glm::mat4 ViewportMatrix;
@ -446,6 +466,8 @@ static void gles_term()
fbTextureId = 0; fbTextureId = 0;
glcache.DeleteTextures(1, &fogTextureId); glcache.DeleteTextures(1, &fogTextureId);
fogTextureId = 0; fogTextureId = 0;
glcache.DeleteTextures(1, &paletteTextureId);
paletteTextureId = 0;
gl_free_osd_resources(); gl_free_osd_resources();
free_output_framebuffer(); free_output_framebuffer();
@ -471,7 +493,7 @@ void findGLVersion()
gl.glsl_version_header = ""; gl.glsl_version_header = "";
gl.index_type = GL_UNSIGNED_SHORT; gl.index_type = GL_UNSIGNED_SHORT;
} }
gl.fog_image_format = GL_ALPHA; gl.single_channel_format = GL_ALPHA;
const char *extensions = (const char *)glGetString(GL_EXTENSIONS); const char *extensions = (const char *)glGetString(GL_EXTENSIONS);
if (strstr(extensions, "GL_OES_packed_depth_stencil") != NULL) if (strstr(extensions, "GL_OES_packed_depth_stencil") != NULL)
gl.GL_OES_packed_depth_stencil_supported = true; gl.GL_OES_packed_depth_stencil_supported = true;
@ -490,13 +512,13 @@ void findGLVersion()
#else #else
gl.glsl_version_header = "#version 130"; gl.glsl_version_header = "#version 130";
#endif #endif
gl.fog_image_format = GL_RED; gl.single_channel_format = GL_RED;
} }
else else
{ {
gl.gl_version = "GL2"; gl.gl_version = "GL2";
gl.glsl_version_header = "#version 120"; gl.glsl_version_header = "#version 120";
gl.fog_image_format = GL_ALPHA; gl.single_channel_format = GL_ALPHA;
} }
} }
GLint ranges[2]; GLint ranges[2];
@ -595,7 +617,8 @@ GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader)
PipelineShader *GetProgram(bool cp_AlphaTest, bool pp_InsideClipping, PipelineShader *GetProgram(bool cp_AlphaTest, bool pp_InsideClipping,
bool pp_Texture, bool pp_UseAlpha, bool pp_IgnoreTexA, u32 pp_ShadInstr, bool pp_Offset, bool pp_Texture, bool pp_UseAlpha, bool pp_IgnoreTexA, u32 pp_ShadInstr, bool pp_Offset,
u32 pp_FogCtrl, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping, bool trilinear) u32 pp_FogCtrl, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping, bool trilinear,
bool palette)
{ {
u32 rv=0; u32 rv=0;
@ -611,6 +634,7 @@ PipelineShader *GetProgram(bool cp_AlphaTest, bool pp_InsideClipping,
rv<<=1; rv|=pp_BumpMap; rv<<=1; rv|=pp_BumpMap;
rv<<=1; rv|=fog_clamping; rv<<=1; rv|=fog_clamping;
rv<<=1; rv|=trilinear; rv<<=1; rv|=trilinear;
rv<<=1; rv|=palette;
PipelineShader *shader = &gl.shaders[rv]; PipelineShader *shader = &gl.shaders[rv];
if (shader->program == 0) if (shader->program == 0)
@ -627,6 +651,7 @@ PipelineShader *GetProgram(bool cp_AlphaTest, bool pp_InsideClipping,
shader->pp_BumpMap = pp_BumpMap; shader->pp_BumpMap = pp_BumpMap;
shader->fog_clamping = fog_clamping; shader->fog_clamping = fog_clamping;
shader->trilinear = trilinear; shader->trilinear = trilinear;
shader->palette = palette;
CompilePipelineShader(shader); CompilePipelineShader(shader);
} }
@ -645,7 +670,7 @@ bool CompilePipelineShader( PipelineShader* s)
rc = sprintf(pshader,PixelPipelineShader, gl.glsl_version_header, gl.gl_version, rc = sprintf(pshader,PixelPipelineShader, gl.glsl_version_header, gl.gl_version,
s->cp_AlphaTest,s->pp_InsideClipping,s->pp_UseAlpha, s->cp_AlphaTest,s->pp_InsideClipping,s->pp_UseAlpha,
s->pp_Texture,s->pp_IgnoreTexA,s->pp_ShadInstr,s->pp_Offset,s->pp_FogCtrl, s->pp_Gouraud, s->pp_BumpMap, s->pp_Texture,s->pp_IgnoreTexA,s->pp_ShadInstr,s->pp_Offset,s->pp_FogCtrl, s->pp_Gouraud, s->pp_BumpMap,
s->fog_clamping, s->trilinear); s->fog_clamping, s->trilinear, s->palette);
verify(rc + 1 <= (int)sizeof(pshader)); verify(rc + 1 <= (int)sizeof(pshader));
s->program=gl_CompileAndLink(vshader, pshader); s->program=gl_CompileAndLink(vshader, pshader);
@ -682,6 +707,12 @@ bool CompilePipelineShader( PipelineShader* s)
gu = glGetUniformLocation(s->program, "fog_table"); gu = glGetUniformLocation(s->program, "fog_table");
if (gu != -1) if (gu != -1)
glUniform1i(gu, 1); glUniform1i(gu, 1);
// And texture 2 as palette
gu = glGetUniformLocation(s->program, "palette");
if (gu != -1)
glUniform1i(gu, 2);
s->palette_index = glGetUniformLocation(s->program, "palette_index");
s->trilinear_alpha = glGetUniformLocation(s->program, "trilinear_alpha"); s->trilinear_alpha = glGetUniformLocation(s->program, "trilinear_alpha");
if (s->fog_clamping) if (s->fog_clamping)
@ -830,6 +861,31 @@ bool gl_create_resources();
//setup //setup
static void gl_DebugOutput(GLenum source,
GLenum type,
GLuint id,
GLenum severity,
GLsizei length,
const GLchar *message,
const void *userParam)
{
if (id == 131185)
return;
switch (severity)
{
default:
case GL_DEBUG_SEVERITY_NOTIFICATION:
case GL_DEBUG_SEVERITY_LOW:
DEBUG_LOG(RENDERER, "opengl:[%d] %s", id, message);
break;
case GL_DEBUG_SEVERITY_MEDIUM:
INFO_LOG(RENDERER, "opengl:[%d] %s", id, message);
break;
case GL_DEBUG_SEVERITY_HIGH:
WARN_LOG(RENDERER, "opengl:[%d] %s", id, message);
break;
}
}
bool gles_init() bool gles_init()
{ {
@ -838,10 +894,16 @@ bool gles_init()
if (!gl_create_resources()) if (!gl_create_resources())
return false; return false;
// glEnable(GL_DEBUG_OUTPUT); #if 0
// glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); glEnable(GL_DEBUG_OUTPUT);
// glDebugMessageCallback(gl_DebugOutput, NULL); glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
// glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, GL_TRUE); #ifdef GLES
glDebugMessageCallback((RGLGENGLDEBUGPROC)gl_DebugOutput, NULL);
#else
glDebugMessageCallback(gl_DebugOutput, NULL);
#endif
glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, GL_TRUE);
#endif
//clean up the buffer //clean up the buffer
glcache.ClearColor(0.f, 0.f, 0.f, 0.f); glcache.ClearColor(0.f, 0.f, 0.f, 0.f);
@ -892,6 +954,28 @@ void UpdateFogTexture(u8 *fog_table, GLenum texture_slot, GLint fog_image_format
glActiveTexture(GL_TEXTURE0); glActiveTexture(GL_TEXTURE0);
} }
void UpdatePaletteTexture(GLenum texture_slot)
{
glActiveTexture(texture_slot);
if (paletteTextureId == 0)
{
paletteTextureId = glcache.GenTexture();
glcache.BindTexture(GL_TEXTURE_2D, paletteTextureId);
glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
}
else
glcache.BindTexture(GL_TEXTURE_2D, paletteTextureId);
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1024, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, palette32_ram);
glCheck();
glActiveTexture(GL_TEXTURE0);
}
void OSD_DRAW(bool clear_screen) void OSD_DRAW(bool clear_screen)
{ {
#ifdef __ANDROID__ #ifdef __ANDROID__
@ -1060,12 +1144,18 @@ bool RenderFrame()
if (fog_needs_update && settings.rend.Fog) if (fog_needs_update && settings.rend.Fog)
{ {
fog_needs_update = false; fog_needs_update = false;
UpdateFogTexture((u8 *)FOG_TABLE, GL_TEXTURE1, gl.fog_image_format); UpdateFogTexture((u8 *)FOG_TABLE, GL_TEXTURE1, gl.single_channel_format);
}
if (palette_updated)
{
UpdatePaletteTexture(GL_TEXTURE2);
palette_updated = false;
} }
glcache.UseProgram(gl.modvol_shader.program); glcache.UseProgram(gl.modvol_shader.program);
glUniform4fv( gl.modvol_shader.depth_scale, 1, ShaderUniforms.depth_coefs); if (gl.modvol_shader.depth_scale != -1)
glUniform4fv(gl.modvol_shader.depth_scale, 1, ShaderUniforms.depth_coefs);
glUniformMatrix4fv(gl.modvol_shader.normal_matrix, 1, GL_FALSE, &ShaderUniforms.normal_mat[0][0]); glUniformMatrix4fv(gl.modvol_shader.normal_matrix, 1, GL_FALSE, &ShaderUniforms.normal_mat[0][0]);
ShaderUniforms.PT_ALPHA=(PT_ALPHA_REF&0xFF)/255.0f; ShaderUniforms.PT_ALPHA=(PT_ALPHA_REF&0xFF)/255.0f;

View File

@ -38,6 +38,7 @@ struct PipelineShader
GLint trilinear_alpha; GLint trilinear_alpha;
GLint fog_clamp_min, fog_clamp_max; GLint fog_clamp_min, fog_clamp_max;
GLint normal_matrix; GLint normal_matrix;
GLint palette_index;
// //
bool cp_AlphaTest; bool cp_AlphaTest;
@ -52,6 +53,7 @@ struct PipelineShader
bool pp_BumpMap; bool pp_BumpMap;
bool fog_clamping; bool fog_clamping;
bool trilinear; bool trilinear;
bool palette;
}; };
@ -107,7 +109,7 @@ struct gl_ctx
int gl_major; int gl_major;
int gl_minor; int gl_minor;
bool is_gles; bool is_gles;
GLuint fog_image_format; GLuint single_channel_format;
GLenum index_type; GLenum index_type;
bool GL_OES_packed_depth_stencil_supported; bool GL_OES_packed_depth_stencil_supported;
bool GL_OES_depth24_supported; bool GL_OES_depth24_supported;
@ -132,6 +134,7 @@ void gl_load_osd_resources();
void gl_free_osd_resources(); void gl_free_osd_resources();
bool ProcessFrame(TA_context* ctx); bool ProcessFrame(TA_context* ctx);
void UpdateFogTexture(u8 *fog_table, GLenum texture_slot, GLint fog_image_format); void UpdateFogTexture(u8 *fog_table, GLenum texture_slot, GLint fog_image_format);
void UpdatePaletteTexture(GLenum texture_slot);
void findGLVersion(); void findGLVersion();
void GetFramebufferScaling(float& scale_x, float& scale_y, float& scissoring_scale_x, float& scissoring_scale_y); void GetFramebufferScaling(float& scale_x, float& scale_y, float& scissoring_scale_x, float& scissoring_scale_y);
void GetFramebufferSize(float& dc_width, float& dc_height); void GetFramebufferSize(float& dc_width, float& dc_height);
@ -157,7 +160,8 @@ void HideOSD();
void OSD_DRAW(bool clear_screen); void OSD_DRAW(bool clear_screen);
PipelineShader *GetProgram(bool cp_AlphaTest, bool pp_InsideClipping, PipelineShader *GetProgram(bool cp_AlphaTest, bool pp_InsideClipping,
bool pp_Texture, bool pp_UseAlpha, bool pp_IgnoreTexA, u32 pp_ShadInstr, bool pp_Offset, bool pp_Texture, bool pp_UseAlpha, bool pp_IgnoreTexA, u32 pp_ShadInstr, bool pp_Offset,
u32 pp_FogCtrl, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping, bool trilinear); u32 pp_FogCtrl, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping, bool trilinear,
bool palette);
GLuint gl_CompileShader(const char* shader, GLuint type); GLuint gl_CompileShader(const char* shader, GLuint type);
GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader); GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader);
@ -181,6 +185,7 @@ extern struct ShaderUniforms_t
int width; int width;
int height; int height;
} base_clipping; } base_clipping;
float palette_index;
void Set(const PipelineShader* s) void Set(const PipelineShader* s)
{ {
@ -206,6 +211,9 @@ extern struct ShaderUniforms_t
if (s->normal_matrix != -1) if (s->normal_matrix != -1)
glUniformMatrix4fv(s->normal_matrix, 1, GL_FALSE, &normal_mat[0][0]); glUniformMatrix4fv(s->normal_matrix, 1, GL_FALSE, &normal_mat[0][0]);
if (s->palette_index != -1)
glUniform1f(s->palette_index, palette_index);
} }
} ShaderUniforms; } ShaderUniforms;

View File

@ -12,8 +12,9 @@ void TextureCacheData::UploadToGPU(int width, int height, u8 *temp_tex_buffer, b
{ {
//upload to OpenGL ! //upload to OpenGL !
glcache.BindTexture(GL_TEXTURE_2D, texID); glcache.BindTexture(GL_TEXTURE_2D, texID);
GLuint comps = GL_RGBA; GLuint comps = tex_type == TextureType::_8 ? gl.single_channel_format : GL_RGBA;
GLuint gltype; GLuint gltype;
u32 bytes_per_pixel = 2;
switch (tex_type) switch (tex_type)
{ {
case TextureType::_5551: case TextureType::_5551:
@ -27,6 +28,11 @@ void TextureCacheData::UploadToGPU(int width, int height, u8 *temp_tex_buffer, b
gltype = GL_UNSIGNED_SHORT_4_4_4_4; gltype = GL_UNSIGNED_SHORT_4_4_4_4;
break; break;
case TextureType::_8888: case TextureType::_8888:
bytes_per_pixel = 4;
gltype = GL_UNSIGNED_BYTE;
break;
case TextureType::_8:
bytes_per_pixel = 1;
gltype = GL_UNSIGNED_BYTE; gltype = GL_UNSIGNED_BYTE;
break; break;
default: default:
@ -63,10 +69,13 @@ void TextureCacheData::UploadToGPU(int width, int height, u8 *temp_tex_buffer, b
case TextureType::_8888: case TextureType::_8888:
internalFormat = GL_RGBA8; internalFormat = GL_RGBA8;
break; break;
case TextureType::_8:
internalFormat = comps;
break;
default: default:
die("Unsupported texture format"); die("Unsupported texture format");
internalFormat = 0; internalFormat = 0;
break; break;
} }
if (Updates == 1) if (Updates == 1)
{ {
@ -76,7 +85,7 @@ void TextureCacheData::UploadToGPU(int width, int height, u8 *temp_tex_buffer, b
for (int i = 0; i < mipmapLevels; i++) for (int i = 0; i < mipmapLevels; i++)
{ {
glTexSubImage2D(GL_TEXTURE_2D, mipmapLevels - i - 1, 0, 0, 1 << i, 1 << i, comps, gltype, temp_tex_buffer); glTexSubImage2D(GL_TEXTURE_2D, mipmapLevels - i - 1, 0, 0, 1 << i, 1 << i, comps, gltype, temp_tex_buffer);
temp_tex_buffer += (1 << (2 * i)) * (tex_type == TextureType::_8888 ? 4 : 2); temp_tex_buffer += (1 << (2 * i)) * bytes_per_pixel;
} }
} }
else else
@ -87,7 +96,7 @@ void TextureCacheData::UploadToGPU(int width, int height, u8 *temp_tex_buffer, b
for (int i = 0; i < mipmapLevels; i++) for (int i = 0; i < mipmapLevels; i++)
{ {
glTexImage2D(GL_TEXTURE_2D, mipmapLevels - i - 1, comps, 1 << i, 1 << i, 0, comps, gltype, temp_tex_buffer); glTexImage2D(GL_TEXTURE_2D, mipmapLevels - i - 1, comps, 1 << i, 1 << i, 0, comps, gltype, temp_tex_buffer);
temp_tex_buffer += (1 << (2 * i)) * (tex_type == TextureType::_8888 ? 4 : 2); temp_tex_buffer += (1 << (2 * i)) * bytes_per_pixel;
} }
} }
} }

View File

@ -157,15 +157,25 @@ void Drawer::DrawPoly(const vk::CommandBuffer& cmdBuffer, u32 listType, bool sor
// Trilinear pass A // Trilinear pass A
trilinearAlpha = 1.0 - trilinearAlpha; trilinearAlpha = 1.0 - trilinearAlpha;
} }
bool palette = BaseTextureCacheData::IsGpuHandledPaletted(poly.tsp, poly.tcw);
if (tileClip == TileClipping::Inside || trilinearAlpha != 1.f) float palette_index = 0.f;
if (palette)
{ {
std::array<float, 5> pushConstants = { if (poly.tcw.PixelFmt == PixelPal4)
palette_index = float(poly.tcw.PalSelect << 4) / 1023.f;
else
palette_index = float((poly.tcw.PalSelect >> 4) << 8) / 1023.f;
}
if (tileClip == TileClipping::Inside || trilinearAlpha != 1.f || palette)
{
std::array<float, 6> pushConstants = {
(float)scissorRect.offset.x, (float)scissorRect.offset.x,
(float)scissorRect.offset.y, (float)scissorRect.offset.y,
(float)scissorRect.offset.x + (float)scissorRect.extent.width, (float)scissorRect.offset.x + (float)scissorRect.extent.width,
(float)scissorRect.offset.y + (float)scissorRect.extent.height, (float)scissorRect.offset.y + (float)scissorRect.extent.height,
trilinearAlpha trilinearAlpha,
palette_index
}; };
cmdBuffer.pushConstants<float>(pipelineManager->GetPipelineLayout(), vk::ShaderStageFlagBits::eFragment, 0, pushConstants); cmdBuffer.pushConstants<float>(pipelineManager->GetPipelineLayout(), vk::ShaderStageFlagBits::eFragment, 0, pushConstants);
} }
@ -304,7 +314,7 @@ void Drawer::UploadMainBuffer(const VertexShaderUniforms& vertexUniforms, const
buffer->upload(chunks.size(), &chunkSizes[0], &chunks[0]); buffer->upload(chunks.size(), &chunkSizes[0], &chunks[0]);
} }
bool Drawer::Draw(const Texture *fogTexture) bool Drawer::Draw(const Texture *fogTexture, const Texture *paletteTexture)
{ {
VertexShaderUniforms vtxUniforms; VertexShaderUniforms vtxUniforms;
vtxUniforms.normal_matrix = matrices.GetNormalMatrix(); vtxUniforms.normal_matrix = matrices.GetNormalMatrix();
@ -322,7 +332,8 @@ bool Drawer::Draw(const Texture *fogTexture)
UploadMainBuffer(vtxUniforms, fragUniforms); UploadMainBuffer(vtxUniforms, fragUniforms);
// Update per-frame descriptor set and bind it // Update per-frame descriptor set and bind it
GetCurrentDescSet().UpdateUniforms(GetMainBuffer(0)->buffer.get(), offsets.vertexUniformOffset, offsets.fragmentUniformOffset, fogTexture->GetImageView()); GetCurrentDescSet().UpdateUniforms(GetMainBuffer(0)->buffer.get(), offsets.vertexUniformOffset, offsets.fragmentUniformOffset,
fogTexture->GetImageView(), paletteTexture->GetImageView());
GetCurrentDescSet().BindPerFrameDescriptorSets(cmdBuffer); GetCurrentDescSet().BindPerFrameDescriptorSets(cmdBuffer);
// Reset per-poly descriptor set pool // Reset per-poly descriptor set pool
GetCurrentDescSet().Reset(); GetCurrentDescSet().Reset();

View File

@ -103,7 +103,7 @@ class Drawer : public BaseDrawer
{ {
public: public:
virtual ~Drawer() = default; virtual ~Drawer() = default;
bool Draw(const Texture *fogTexture); bool Draw(const Texture *fogTexture, const Texture *paletteTexture);
virtual void EndRenderPass() = 0; virtual void EndRenderPass() = 0;
protected: protected:

View File

@ -44,6 +44,16 @@ void OITDrawer::DrawPoly(const vk::CommandBuffer& cmdBuffer, u32 listType, bool
bool twoVolumes = poly.tsp1.full != (u32)-1 || poly.tcw1.full != (u32)-1; bool twoVolumes = poly.tsp1.full != (u32)-1 || poly.tcw1.full != (u32)-1;
bool palette = BaseTextureCacheData::IsGpuHandledPaletted(poly.tsp, poly.tcw);
float palette_index = 0.f;
if (palette)
{
if (poly.tcw.PixelFmt == PixelPal4)
palette_index = float(poly.tcw.PalSelect << 4) / 1023.f;
else
palette_index = float((poly.tcw.PalSelect >> 4) << 8) / 1023.f;
}
OITDescriptorSets::PushConstants pushConstants = { OITDescriptorSets::PushConstants pushConstants = {
{ {
(float)scissorRect.offset.x, (float)scissorRect.offset.x,
@ -54,6 +64,7 @@ void OITDrawer::DrawPoly(const vk::CommandBuffer& cmdBuffer, u32 listType, bool
{ poly.tsp.SrcInstr, poly.tsp.DstInstr, 0, 0 }, { poly.tsp.SrcInstr, poly.tsp.DstInstr, 0, 0 },
trilinearAlpha, trilinearAlpha,
listType == ListType_Translucent ? (int)(&poly - pvrrc.global_param_tr.head()) : 0, listType == ListType_Translucent ? (int)(&poly - pvrrc.global_param_tr.head()) : 0,
palette_index,
}; };
if (twoVolumes) if (twoVolumes)
{ {
@ -236,7 +247,7 @@ void OITDrawer::UploadMainBuffer(const OITDescriptorSets::VertexShaderUniforms&
buffer->upload(chunks.size(), &chunkSizes[0], &chunks[0]); buffer->upload(chunks.size(), &chunkSizes[0], &chunks[0]);
} }
bool OITDrawer::Draw(const Texture *fogTexture) bool OITDrawer::Draw(const Texture *fogTexture, const Texture *paletteTexture)
{ {
vk::CommandBuffer cmdBuffer = NewFrame(); vk::CommandBuffer cmdBuffer = NewFrame();
@ -269,7 +280,7 @@ bool OITDrawer::Draw(const Texture *fogTexture)
GetCurrentDescSet().UpdateUniforms(mainBuffer, offsets.vertexUniformOffset, offsets.fragmentUniformOffset, GetCurrentDescSet().UpdateUniforms(mainBuffer, offsets.vertexUniformOffset, offsets.fragmentUniformOffset,
fogTexture->GetImageView(), offsets.polyParamsOffset, fogTexture->GetImageView(), offsets.polyParamsOffset,
offsets.polyParamsSize, depthAttachment->GetStencilView(), offsets.polyParamsSize, depthAttachment->GetStencilView(),
depthAttachment->GetImageView()); depthAttachment->GetImageView(), paletteTexture->GetImageView());
GetCurrentDescSet().BindPerFrameDescriptorSets(cmdBuffer); GetCurrentDescSet().BindPerFrameDescriptorSets(cmdBuffer);
GetCurrentDescSet().UpdateColorInputDescSet(0, colorAttachments[0]->GetImageView()); GetCurrentDescSet().UpdateColorInputDescSet(0, colorAttachments[0]->GetImageView());
GetCurrentDescSet().UpdateColorInputDescSet(1, colorAttachments[1]->GetImageView()); GetCurrentDescSet().UpdateColorInputDescSet(1, colorAttachments[1]->GetImageView());

View File

@ -39,7 +39,7 @@ class OITDrawer : public BaseDrawer
{ {
public: public:
virtual ~OITDrawer() = default; virtual ~OITDrawer() = default;
bool Draw(const Texture *fogTexture); bool Draw(const Texture *fogTexture, const Texture *paletteTexture);
virtual vk::CommandBuffer NewFrame() = 0; virtual vk::CommandBuffer NewFrame() = 0;
virtual void EndFrame() = 0; virtual void EndFrame() = 0;

View File

@ -156,7 +156,8 @@ void OITPipelineManager::CreatePipeline(u32 listType, bool autosort, const PolyP
//params.trilinear = pp.pcw.Texture && pp.tsp.FilterMode > 1 && listType != ListType_Punch_Through; //params.trilinear = pp.pcw.Texture && pp.tsp.FilterMode > 1 && listType != ListType_Punch_Through;
params.useAlpha = pp.tsp.UseAlpha; params.useAlpha = pp.tsp.UseAlpha;
params.pass = pass; params.pass = pass;
params.twoVolume = pp.tsp1.full != -1 || pp.tcw1.full != -1; params.twoVolume = pp.tsp1.full != (u32)-1 || pp.tcw1.full != (u32)-1;
params.palette = BaseTextureCacheData::IsGpuHandledPaletted(pp.tsp, pp.tcw);
vk::ShaderModule fragment_module = shaderManager->GetFragmentShader(params); vk::ShaderModule fragment_module = shaderManager->GetFragmentShader(params);
vk::PipelineShaderStageCreateInfo stages[] = { vk::PipelineShaderStageCreateInfo stages[] = {

View File

@ -55,7 +55,8 @@ public:
glm::ivec4 blend_mode0; // Only using 2 elements but easier for std140 glm::ivec4 blend_mode0; // Only using 2 elements but easier for std140
float trilinearAlpha; float trilinearAlpha;
int pp_Number; int pp_Number;
int _pad[2]; float palette_index;
int _pad;
// two volume mode // two volume mode
glm::ivec4 blend_mode1; // Only using 2 elements but easier for std140 glm::ivec4 blend_mode1; // Only using 2 elements but easier for std140
@ -80,7 +81,8 @@ public:
} }
// FIXME way too many params // FIXME way too many params
void UpdateUniforms(vk::Buffer buffer, u32 vertexUniformOffset, u32 fragmentUniformOffset, vk::ImageView fogImageView, void UpdateUniforms(vk::Buffer buffer, u32 vertexUniformOffset, u32 fragmentUniformOffset, vk::ImageView fogImageView,
u32 polyParamsOffset, u32 polyParamsSize, vk::ImageView stencilImageView, vk::ImageView depthImageView) u32 polyParamsOffset, u32 polyParamsSize, vk::ImageView stencilImageView, vk::ImageView depthImageView,
vk::ImageView paletteImageView)
{ {
if (!perFrameDescSet) if (!perFrameDescSet)
{ {
@ -105,6 +107,17 @@ public:
imageInfo = { fogSampler, fogImageView, vk::ImageLayout::eShaderReadOnlyOptimal }; imageInfo = { fogSampler, fogImageView, vk::ImageLayout::eShaderReadOnlyOptimal };
writeDescriptorSets.push_back(vk::WriteDescriptorSet(*perFrameDescSet, 2, 0, 1, vk::DescriptorType::eCombinedImageSampler, &imageInfo, nullptr, nullptr)); writeDescriptorSets.push_back(vk::WriteDescriptorSet(*perFrameDescSet, 2, 0, 1, vk::DescriptorType::eCombinedImageSampler, &imageInfo, nullptr, nullptr));
} }
if (paletteImageView)
{
TSP palTsp = {};
palTsp.FilterMode = 0;
palTsp.ClampU = 1;
palTsp.ClampV = 1;
vk::Sampler palSampler = samplerManager->GetSampler(palTsp);
static vk::DescriptorImageInfo imageInfo;
imageInfo = { palSampler, paletteImageView, vk::ImageLayout::eShaderReadOnlyOptimal };
writeDescriptorSets.push_back(vk::WriteDescriptorSet(*perFrameDescSet, 6, 0, 1, vk::DescriptorType::eCombinedImageSampler, &imageInfo, nullptr, nullptr));
}
if (polyParamsSize > 0) if (polyParamsSize > 0)
{ {
static vk::DescriptorBufferInfo polyParamsBufferInfo; static vk::DescriptorBufferInfo polyParamsBufferInfo;
@ -150,7 +163,7 @@ public:
std::vector<vk::WriteDescriptorSet> writeDescriptorSets; std::vector<vk::WriteDescriptorSet> writeDescriptorSets;
writeDescriptorSets.push_back(vk::WriteDescriptorSet(*perPolyDescSets.back(), 0, 0, 1, vk::DescriptorType::eCombinedImageSampler, &imageInfo0, nullptr, nullptr)); writeDescriptorSets.push_back(vk::WriteDescriptorSet(*perPolyDescSets.back(), 0, 0, 1, vk::DescriptorType::eCombinedImageSampler, &imageInfo0, nullptr, nullptr));
if (textureId1 != -1) if (textureId1 != (u64)-1)
{ {
Texture *texture1 = reinterpret_cast<Texture *>(textureId1); Texture *texture1 = reinterpret_cast<Texture *>(textureId1);
vk::DescriptorImageInfo imageInfo1(samplerManager->GetSampler(tsp1), texture1->GetReadOnlyImageView(), vk::ImageLayout::eShaderReadOnlyOptimal); vk::DescriptorImageInfo imageInfo1(samplerManager->GetSampler(tsp1), texture1->GetReadOnlyImageView(), vk::ImageLayout::eShaderReadOnlyOptimal);
@ -223,6 +236,7 @@ public:
{ 3, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eFragment }, // Tr poly params { 3, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eFragment }, // Tr poly params
{ 4, vk::DescriptorType::eInputAttachment, 1, vk::ShaderStageFlagBits::eFragment }, // stencil input attachment { 4, vk::DescriptorType::eInputAttachment, 1, vk::ShaderStageFlagBits::eFragment }, // stencil input attachment
{ 5, vk::DescriptorType::eInputAttachment, 1, vk::ShaderStageFlagBits::eFragment }, // depth input attachment { 5, vk::DescriptorType::eInputAttachment, 1, vk::ShaderStageFlagBits::eFragment }, // depth input attachment
{ 6, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment },// palette texture
}; };
perFrameLayout = GetContext()->GetDevice().createDescriptorSetLayoutUnique( perFrameLayout = GetContext()->GetDevice().createDescriptorSetLayoutUnique(
vk::DescriptorSetLayoutCreateInfo(vk::DescriptorSetLayoutCreateFlags(), ARRAY_SIZE(perFrameBindings), perFrameBindings)); vk::DescriptorSetLayoutCreateInfo(vk::DescriptorSetLayoutCreateFlags(), ARRAY_SIZE(perFrameBindings), perFrameBindings));
@ -310,7 +324,7 @@ private:
u32 hash = pp->pcw.Gouraud | (pp->pcw.Offset << 1) | (pp->pcw.Texture << 2) | (pp->pcw.Shadow << 3) u32 hash = pp->pcw.Gouraud | (pp->pcw.Offset << 1) | (pp->pcw.Texture << 2) | (pp->pcw.Shadow << 3)
| (((pp->tileclip >> 28) == 3) << 4); | (((pp->tileclip >> 28) == 3) << 4);
hash |= ((listType >> 1) << 5); hash |= ((listType >> 1) << 5);
if (pp->tcw1.full != -1 || pp->tsp1.full != -1) if (pp->tcw1.full != (u32)-1 || pp->tsp1.full != (u32)-1)
{ {
// Two-volume mode // Two-volume mode
hash |= (1 << 31) | (pp->tsp.ColorClamp << 11); hash |= (1 << 31) | (pp->tsp.ColorClamp << 11);
@ -322,7 +336,7 @@ private:
| (pp->tsp.SrcInstr << 14) | (pp->tsp.DstInstr << 17); | (pp->tsp.SrcInstr << 14) | (pp->tsp.DstInstr << 17);
} }
hash |= (pp->isp.ZWriteDis << 20) | (pp->isp.CullMode << 21) | ((autosort ? 6 : pp->isp.DepthMode) << 23); hash |= (pp->isp.ZWriteDis << 20) | (pp->isp.CullMode << 21) | ((autosort ? 6 : pp->isp.DepthMode) << 23);
hash |= (u32)pass << 26; hash |= ((u32)BaseTextureCacheData::IsGpuHandledPaletted(pp->tsp, pp->tcw) << 26) | ((u32)pass << 27);
return hash; return hash;
} }

View File

@ -159,7 +159,10 @@ public:
textureCache.CollectCleanup(); textureCache.CollectCleanup();
if (result) if (result)
{
CheckFogTexture(); CheckFogTexture();
CheckPaletteTexture();
}
else else
texCommandPool.EndFrame(); texCommandPool.EndFrame();
@ -218,7 +221,7 @@ public:
else else
drawer = &screenDrawer; drawer = &screenDrawer;
drawer->Draw(fogTexture.get()); drawer->Draw(fogTexture.get(), paletteTexture.get());
drawer->EndFrame(); drawer->EndFrame();
@ -283,9 +286,30 @@ private:
fogTexture->SetCommandBuffer(nullptr); fogTexture->SetCommandBuffer(nullptr);
} }
void CheckPaletteTexture()
{
if (!paletteTexture)
{
paletteTexture = std::unique_ptr<Texture>(new Texture());
paletteTexture->SetPhysicalDevice(GetContext()->GetPhysicalDevice());
paletteTexture->SetDevice(GetContext()->GetDevice());
paletteTexture->tex_type = TextureType::_8888;
palette_updated = true;
}
if (!palette_updated)
return;
palette_updated = false;
paletteTexture->SetCommandBuffer(texCommandPool.Allocate());
paletteTexture->UploadToGPU(1024, 1, (u8 *)palette32_ram, false);
paletteTexture->SetCommandBuffer(nullptr);
}
OITBuffers oitBuffers; OITBuffers oitBuffers;
std::unique_ptr<Texture> fogTexture; std::unique_ptr<Texture> fogTexture;
std::unique_ptr<Texture> paletteTexture;
CommandPool texCommandPool; CommandPool texCommandPool;
SamplerManager samplerManager; SamplerManager samplerManager;

View File

@ -234,6 +234,7 @@ static const char OITFragmentShaderSource[] = R"(
#define pp_Gouraud %d #define pp_Gouraud %d
#define pp_BumpMap %d #define pp_BumpMap %d
#define ColorClamping %d #define ColorClamping %d
#define pp_Palette %d
#define PASS %d #define PASS %d
#define PI 3.1415926 #define PI 3.1415926
@ -264,6 +265,7 @@ layout (push_constant) uniform pushBlock
ivec4 blend_mode0; ivec4 blend_mode0;
float trilinearAlpha; float trilinearAlpha;
int pp_Number; int pp_Number;
float palette_index;
// two volume mode // two volume mode
ivec4 blend_mode1; ivec4 blend_mode1;
@ -283,6 +285,9 @@ layout (set = 1, binding = 0) uniform sampler2D tex0;
layout (set = 1, binding = 1) uniform sampler2D tex1; layout (set = 1, binding = 1) uniform sampler2D tex1;
#endif #endif
#endif #endif
#if pp_Palette == 1
layout (set = 0, binding = 6) uniform sampler2D palette;
#endif
#if PASS == PASS_COLOR #if PASS == PASS_COLOR
layout (input_attachment_index = 0, set = 0, binding = 4) uniform usubpassInput shadow_stencil; layout (input_attachment_index = 0, set = 0, binding = 4) uniform usubpassInput shadow_stencil;
@ -323,6 +328,16 @@ vec4 colorClamp(vec4 col)
#endif #endif
} }
#if pp_Palette == 1
vec4 palettePixel(sampler2D tex, vec2 coords)
{
vec4 c = vec4(texture(tex, coords).r * 255.0 / 1023.0 + pushConstants.palette_index, 0.5, 0.0, 0.0);
return texture(palette, c.xy);
}
#endif
void main() void main()
{ {
setFragDepth(); setFragDepth();
@ -381,10 +396,18 @@ void main()
highp vec4 texcol; highp vec4 texcol;
#if pp_TwoVolumes == 1 #if pp_TwoVolumes == 1
if (area1) if (area1)
texcol = texture(tex1, uv); #if pp_Palette == 0
texcol = texture(tex1, uv);
#else
texcol = palettePixel(tex1, uv);
#endif
else else
#endif #endif
texcol = texture(tex0, uv); #if pp_Palette == 0
texcol = texture(tex0, uv);
#else
texcol = palettePixel(tex0, uv);
#endif
#if pp_BumpMap == 1 #if pp_BumpMap == 1
highp float s = PI / 2.0 * (texcol.a * 15.0 * 16.0 + texcol.r * 15.0) / 255.0; highp float s = PI / 2.0 * (texcol.a * 15.0 * 16.0 + texcol.r * 15.0) / 255.0;
highp float r = 2.0 * PI * (texcol.g * 15.0 * 16.0 + texcol.b * 15.0) / 255.0; highp float r = 2.0 * PI * (texcol.g * 15.0 * 16.0 + texcol.b * 15.0) / 255.0;
@ -775,7 +798,8 @@ vk::UniqueShaderModule OITShaderManager::compileShader(const FragmentShaderParam
strcpy(buf, OITShaderHeader); strcpy(buf, OITShaderHeader);
sprintf(buf + strlen(buf), OITFragmentShaderSource, (int)params.alphaTest, (int)params.insideClipTest, (int)params.useAlpha, sprintf(buf + strlen(buf), OITFragmentShaderSource, (int)params.alphaTest, (int)params.insideClipTest, (int)params.useAlpha,
(int)params.texture, (int)params.ignoreTexAlpha, params.shaderInstr, (int)params.offset, params.fog, (int)params.texture, (int)params.ignoreTexAlpha, params.shaderInstr, (int)params.offset, params.fog,
(int)params.twoVolume, (int)params.gouraud, (int)params.bumpmap, (int)params.clamping, (int)params.pass); (int)params.twoVolume, (int)params.gouraud, (int)params.bumpmap, (int)params.clamping, (int)params.palette,
(int)params.pass);
return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment, buf); return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment, buf);
} }

View File

@ -49,6 +49,7 @@ public:
bool bumpmap; bool bumpmap;
bool clamping; bool clamping;
bool twoVolume; bool twoVolume;
bool palette;
Pass pass; Pass pass;
u32 hash() u32 hash()
@ -57,7 +58,7 @@ public:
| ((u32)texture << 3) | ((u32)ignoreTexAlpha << 4) | (shaderInstr << 5) | ((u32)texture << 3) | ((u32)ignoreTexAlpha << 4) | (shaderInstr << 5)
| ((u32)offset << 7) | ((u32)fog << 8) | ((u32)gouraud << 10) | ((u32)offset << 7) | ((u32)fog << 8) | ((u32)gouraud << 10)
| ((u32)bumpmap << 11) | ((u32)clamping << 12) | ((u32)twoVolume << 13) | ((u32)bumpmap << 11) | ((u32)clamping << 12) | ((u32)twoVolume << 13)
| ((int)pass << 14); | ((u32)palette << 14) | ((int)pass << 15);
} }
}; };

View File

@ -303,6 +303,7 @@ void PipelineManager::CreatePipeline(u32 listType, bool sortTriangles, const Pol
params.texture = pp.pcw.Texture; params.texture = pp.pcw.Texture;
params.trilinear = pp.pcw.Texture && pp.tsp.FilterMode > 1 && listType != ListType_Punch_Through && pp.tcw.MipMapped == 1; params.trilinear = pp.pcw.Texture && pp.tsp.FilterMode > 1 && listType != ListType_Punch_Through && pp.tcw.MipMapped == 1;
params.useAlpha = pp.tsp.UseAlpha; params.useAlpha = pp.tsp.UseAlpha;
params.palette = BaseTextureCacheData::IsGpuHandledPaletted(pp.tsp, pp.tcw);
vk::ShaderModule fragment_module = shaderManager->GetFragmentShader(params); vk::ShaderModule fragment_module = shaderManager->GetFragmentShader(params);
vk::PipelineShaderStageCreateInfo stages[] = { vk::PipelineShaderStageCreateInfo stages[] = {

View File

@ -36,7 +36,7 @@ public:
this->perPolyLayout = perPolyLayout; this->perPolyLayout = perPolyLayout;
} }
void UpdateUniforms(vk::Buffer buffer, u32 vertexUniformOffset, u32 fragmentUniformOffset, vk::ImageView fogImageView) void UpdateUniforms(vk::Buffer buffer, u32 vertexUniformOffset, u32 fragmentUniformOffset, vk::ImageView fogImageView, vk::ImageView paletteImageView)
{ {
if (!perFrameDescSet) if (!perFrameDescSet)
{ {
@ -61,6 +61,17 @@ public:
imageInfo = { fogSampler, fogImageView, vk::ImageLayout::eShaderReadOnlyOptimal }; imageInfo = { fogSampler, fogImageView, vk::ImageLayout::eShaderReadOnlyOptimal };
writeDescriptorSets.push_back(vk::WriteDescriptorSet(*perFrameDescSet, 2, 0, 1, vk::DescriptorType::eCombinedImageSampler, &imageInfo, nullptr, nullptr)); writeDescriptorSets.push_back(vk::WriteDescriptorSet(*perFrameDescSet, 2, 0, 1, vk::DescriptorType::eCombinedImageSampler, &imageInfo, nullptr, nullptr));
} }
if (paletteImageView)
{
TSP palTsp = {};
palTsp.FilterMode = 0;
palTsp.ClampU = 1;
palTsp.ClampV = 1;
vk::Sampler palSampler = samplerManager->GetSampler(palTsp);
static vk::DescriptorImageInfo imageInfo;
imageInfo = { palSampler, paletteImageView, vk::ImageLayout::eShaderReadOnlyOptimal };
writeDescriptorSets.push_back(vk::WriteDescriptorSet(*perFrameDescSet, 3, 0, 1, vk::DescriptorType::eCombinedImageSampler, &imageInfo, nullptr, nullptr));
}
GetContext()->GetDevice().updateDescriptorSets(writeDescriptorSets, nullptr); GetContext()->GetDevice().updateDescriptorSets(writeDescriptorSets, nullptr);
} }
@ -137,6 +148,7 @@ public:
{ 0, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eVertex }, // vertex uniforms { 0, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eVertex }, // vertex uniforms
{ 1, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eFragment }, // fragment uniforms { 1, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eFragment }, // fragment uniforms
{ 2, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment },// fog texture { 2, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment },// fog texture
{ 3, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment },// palette texture
}; };
vk::DescriptorSetLayoutBinding perPolyBindings[] = { vk::DescriptorSetLayoutBinding perPolyBindings[] = {
{ 0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment },// texture { 0, vk::DescriptorType::eCombinedImageSampler, 1, vk::ShaderStageFlagBits::eFragment },// texture
@ -146,7 +158,7 @@ public:
perPolyLayout = GetContext()->GetDevice().createDescriptorSetLayoutUnique( perPolyLayout = GetContext()->GetDevice().createDescriptorSetLayoutUnique(
vk::DescriptorSetLayoutCreateInfo(vk::DescriptorSetLayoutCreateFlags(), ARRAY_SIZE(perPolyBindings), perPolyBindings)); vk::DescriptorSetLayoutCreateInfo(vk::DescriptorSetLayoutCreateFlags(), ARRAY_SIZE(perPolyBindings), perPolyBindings));
vk::DescriptorSetLayout layouts[] = { *perFrameLayout, *perPolyLayout }; vk::DescriptorSetLayout layouts[] = { *perFrameLayout, *perPolyLayout };
vk::PushConstantRange pushConstant(vk::ShaderStageFlagBits::eFragment, 0, 20); vk::PushConstantRange pushConstant(vk::ShaderStageFlagBits::eFragment, 0, 24);
pipelineLayout = GetContext()->GetDevice().createPipelineLayoutUnique( pipelineLayout = GetContext()->GetDevice().createPipelineLayoutUnique(
vk::PipelineLayoutCreateInfo(vk::PipelineLayoutCreateFlags(), ARRAY_SIZE(layouts), layouts, 1, &pushConstant)); vk::PipelineLayoutCreateInfo(vk::PipelineLayoutCreateFlags(), ARRAY_SIZE(layouts), layouts, 1, &pushConstant));
} }
@ -204,7 +216,7 @@ private:
| (pp->tsp.ColorClamp << 11) | ((settings.rend.Fog ? pp->tsp.FogCtrl : 2) << 12) | (pp->tsp.SrcInstr << 14) | (pp->tsp.ColorClamp << 11) | ((settings.rend.Fog ? pp->tsp.FogCtrl : 2) << 12) | (pp->tsp.SrcInstr << 14)
| (pp->tsp.DstInstr << 17); | (pp->tsp.DstInstr << 17);
hash |= (pp->isp.ZWriteDis << 20) | (pp->isp.CullMode << 21) | (pp->isp.DepthMode << 23); hash |= (pp->isp.ZWriteDis << 20) | (pp->isp.CullMode << 21) | (pp->isp.DepthMode << 23);
hash |= (u32)sortTriangles << 26; hash |= ((u32)sortTriangles << 26) | ((u32)BaseTextureCacheData::IsGpuHandledPaletted(pp->tsp, pp->tcw) << 27);
return hash; return hash;
} }

View File

@ -73,6 +73,7 @@ static const char FragmentShaderSource[] = R"(#version 450
#define pp_BumpMap %d #define pp_BumpMap %d
#define ColorClamping %d #define ColorClamping %d
#define pp_TriLinear %d #define pp_TriLinear %d
#define pp_Palette %d
#define PI 3.1415926 #define PI 3.1415926
layout (location = 0) out vec4 FragColor; layout (location = 0) out vec4 FragColor;
@ -98,11 +99,15 @@ layout (push_constant) uniform pushBlock
{ {
vec4 clipTest; vec4 clipTest;
float trilinearAlpha; float trilinearAlpha;
float palette_index;
} pushConstants; } pushConstants;
#if pp_Texture == 1 #if pp_Texture == 1
layout (set = 1, binding = 0) uniform sampler2D tex; layout (set = 1, binding = 0) uniform sampler2D tex;
#endif #endif
#if pp_Palette == 1
layout (set = 0, binding = 3) uniform sampler2D palette;
#endif
// Vertex input // Vertex input
layout (location = 0) INTERPOLATION in lowp vec4 vtx_base; layout (location = 0) INTERPOLATION in lowp vec4 vtx_base;
@ -132,6 +137,16 @@ vec4 colorClamp(vec4 col)
#endif #endif
} }
#if pp_Palette == 1
vec4 palettePixel(sampler2D tex, vec2 coords)
{
vec4 c = vec4(texture(tex, coords).r * 255.0 / 1023.0 + pushConstants.palette_index, 0.5, 0.0, 0.0);
return texture(palette, c.xy);
}
#endif
void main() void main()
{ {
// Clip inside the box // Clip inside the box
@ -150,7 +165,11 @@ void main()
#endif #endif
#if pp_Texture == 1 #if pp_Texture == 1
{ {
vec4 texcol = texture(tex, vtx_uv); #if pp_Palette == 0
vec4 texcol = texture(tex, vtx_uv);
#else
vec4 texcol = palettePixel(tex, vtx_uv);
#endif
#if pp_BumpMap == 1 #if pp_BumpMap == 1
float s = PI / 2.0 * (texcol.a * 15.0 * 16.0 + texcol.r * 15.0) / 255.0; float s = PI / 2.0 * (texcol.a * 15.0 * 16.0 + texcol.r * 15.0) / 255.0;
@ -338,7 +357,7 @@ vk::UniqueShaderModule ShaderManager::compileShader(const FragmentShaderParams&
sprintf(buf, FragmentShaderSource, (int)params.alphaTest, (int)params.insideClipTest, (int)params.useAlpha, (int)params.texture, sprintf(buf, FragmentShaderSource, (int)params.alphaTest, (int)params.insideClipTest, (int)params.useAlpha, (int)params.texture,
(int)params.ignoreTexAlpha, params.shaderInstr, (int)params.offset, params.fog, (int)params.gouraud, (int)params.ignoreTexAlpha, params.shaderInstr, (int)params.offset, params.fog, (int)params.gouraud,
(int)params.bumpmap, (int)params.clamping, (int)params.trilinear); (int)params.bumpmap, (int)params.clamping, (int)params.trilinear, (int)params.palette);
return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment, buf); return ShaderCompiler::Compile(vk::ShaderStageFlagBits::eFragment, buf);
} }

View File

@ -47,13 +47,15 @@ struct FragmentShaderParams
bool bumpmap; bool bumpmap;
bool clamping; bool clamping;
bool trilinear; bool trilinear;
bool palette;
u32 hash() u32 hash()
{ {
return ((u32)alphaTest) | ((u32)insideClipTest << 1) | ((u32)useAlpha << 2) return ((u32)alphaTest) | ((u32)insideClipTest << 1) | ((u32)useAlpha << 2)
| ((u32)texture << 3) | ((u32)ignoreTexAlpha << 4) | (shaderInstr << 5) | ((u32)texture << 3) | ((u32)ignoreTexAlpha << 4) | (shaderInstr << 5)
| ((u32)offset << 7) | ((u32)fog << 8) | ((u32)gouraud << 10) | ((u32)offset << 7) | ((u32)fog << 8) | ((u32)gouraud << 10)
| ((u32)bumpmap << 11) | ((u32)clamping << 12) | ((u32)trilinear << 13); | ((u32)bumpmap << 11) | ((u32)clamping << 12) | ((u32)trilinear << 13)
| ((u32)palette << 14);
} }
}; };

View File

@ -145,7 +145,10 @@ public:
textureCache.CollectCleanup(); textureCache.CollectCleanup();
if (result) if (result)
{
CheckFogTexture(); CheckFogTexture();
CheckPaletteTexture();
}
else else
texCommandPool.EndFrame(); texCommandPool.EndFrame();
@ -205,7 +208,7 @@ public:
else else
drawer = &screenDrawer; drawer = &screenDrawer;
drawer->Draw(fogTexture.get()); drawer->Draw(fogTexture.get(), paletteTexture.get());
drawer->EndRenderPass(); drawer->EndRenderPass();
@ -270,8 +273,29 @@ private:
fogTexture->SetCommandBuffer(nullptr); fogTexture->SetCommandBuffer(nullptr);
} }
void CheckPaletteTexture()
{
if (!paletteTexture)
{
paletteTexture = std::unique_ptr<Texture>(new Texture());
paletteTexture->SetPhysicalDevice(GetContext()->GetPhysicalDevice());
paletteTexture->SetDevice(GetContext()->GetDevice());
paletteTexture->tex_type = TextureType::_8888;
palette_updated = true;
}
if (!palette_updated)
return;
palette_updated = false;
paletteTexture->SetCommandBuffer(texCommandPool.Allocate());
paletteTexture->UploadToGPU(1024, 1, (u8 *)palette32_ram, false);
paletteTexture->SetCommandBuffer(nullptr);
}
std::unique_ptr<Texture> fogTexture; std::unique_ptr<Texture> fogTexture;
std::unique_ptr<Texture> paletteTexture;
CommandPool texCommandPool; CommandPool texCommandPool;
SamplerManager samplerManager; SamplerManager samplerManager;