From 18b2d8273fadcea78e6a8ea1eda1f68137087d9e Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Mon, 9 Jul 2018 14:59:53 +0200 Subject: [PATCH] Improve paletted and YUV texture quality. Paletted textures using a 32-bit palette and YUV textures are now converted to 8888 format, which results in a lossless conversion. Fixes background texture quality in Alone in the Dark. --- core/rend/TexCache.h | 186 ++++++++++++++++++++++----------------- core/rend/gles/gltex.cpp | 92 ++++++++++++------- 2 files changed, 166 insertions(+), 112 deletions(-) diff --git a/core/rend/TexCache.h b/core/rend/TexCache.h index 17eee0e55..af8d1bf2f 100644 --- a/core/rend/TexCache.h +++ b/core/rend/TexCache.h @@ -13,25 +13,26 @@ extern u32 _pal_rev_16[64]; extern u32 detwiddle[2][8][1024]; //Pixel buffer class (realy helpfull ;) ) +template struct PixelBuffer { - u16* p_buffer_start; - u16* p_current_line; - u16* p_current_pixel; + pixel_type* p_buffer_start; + pixel_type* p_current_line; + pixel_type* p_current_pixel; u32 pixels_per_line; void init(void* data,u32 ppl_bytes) { - p_buffer_start=p_current_line=p_current_pixel=(u16*)data; - pixels_per_line=ppl_bytes/sizeof(u16); + p_buffer_start=p_current_line=p_current_pixel=(pixel_type*)data; + pixels_per_line=ppl_bytes/sizeof(pixel_type); } - __forceinline void prel(u32 x,u16 value) + __forceinline void prel(u32 x,pixel_type value) { p_current_pixel[x]=value; } - __forceinline void prel(u32 x,u32 y,u16 value) + __forceinline void prel(u32 x,u32 y,pixel_type value) { p_current_pixel[y*pixels_per_line+x]=value; } @@ -57,19 +58,17 @@ void palette_update(); #define clamp(minv,maxv,x) min(maxv,max(minv,x)) +// Unpack to 16-bit word + #define ARGB1555( word ) ( ((word>>15)&1) | (((word>>10) & 0x1F)<<11) | (((word>>5) & 0x1F)<<6) | (((word>>0) & 0x1F)<<1) ) -// ARGB8888(unpack_1_to_8[(word>>15)&1],unpack_5_to_8[(word>>10) & 0x1F], -//unpack_5_to_8[(word>>5) & 0x1F],unpack_5_to_8[word&0x1F]) #define ARGB565( word ) ( (((word>>0)&0x1F)<<0) | (((word>>5)&0x3F)<<5) | (((word>>11)&0x1F)<<11) ) -//ARGB8888(0xFF,unpack_5_to_8[(word>>11) & 0x1F], unpack_6_to_8[(word>>5) & 0x3F],unpack_5_to_8[word&0x1F]) -//( 0xFF000000 | unpack_5_to_8[(word>>11) & 0x1F] | unpack_5_to_8[(word>>5) & 0x3F]<<8 | unpack_5_to_8[word&0x1F]<<16 ) - #define ARGB4444( word ) ( (((word>>0)&0xF)<<4) | (((word>>4)&0xF)<<8) | (((word>>8)&0xF)<<12) | (((word>>12)&0xF)<<0) ) -//ARGB8888( (word&0xF000)>>(12-4),(word&0xF00)>>(8-4),(word&0xF0)>>(4-4),(word&0xF)<<4 ) -#define ARGB8888( word ) ( (((word>>4)&0xF)<<4) | (((word>>12)&0xF)<<8) | (((word>>20)&0xF)<<12) | (((word>>28)&0xF)<<0) ) +// Unpack to 32-bit word + +#define ARGB8888( word ) ( ((word >> 24) & 0xFF) | (((word >> 16) & 0xFF) << 24) | (((word >> 8) & 0xFF) << 16) | ((word & 0xFF) << 8) ) template __forceinline u32 YUV422(s32 Y,s32 Yu,s32 Yv) @@ -102,13 +101,32 @@ struct pp_565 } }; +struct pp_8888 +{ + __forceinline static u32 packRGB(u8 R,u8 G,u8 B) + { + return (R << 24) | (G << 16) | (B << 8) | 0xFF; + } +}; + //pixel convertors ! -#define pixelcvt_start(name,x,y) template \ +#define pixelcvt_start_base(name,x,y,type) template \ + struct name \ + { \ + static const u32 xpp=x;\ + static const u32 ypp=y; \ + __forceinline static void Convert(PixelBuffer* pb,u8* data) \ + { + +#define pixelcvt_start(name,x,y) pixelcvt_start_base(name, x, y, u16) +#define pixelcvt32_start(name,x,y) pixelcvt_start_base(name, x, y, u32) + +#define pixelcvt_size_start(name, x, y) template \ struct name \ { \ static const u32 xpp=x;\ static const u32 ypp=y; \ - __forceinline static void Convert(PixelBuffer* pb,u8* data) \ + __forceinline static void Convert(PixelBuffer* pb,u8* data) \ { #define pixelcvt_end } } @@ -153,7 +171,17 @@ pixelcvt_next(conv4444_PL,4,1) //3,0 pb->prel(3,ARGB4444(p_in[3])); } -pixelcvt_next(convYUV_PL,4,1) +pixelcvt_next(convBMP_PL,4,1) +{ + u16* p_in=(u16*)data; + pb->prel(0,ARGB4444(p_in[0])); + pb->prel(1,ARGB4444(p_in[1])); + pb->prel(2,ARGB4444(p_in[2])); + pb->prel(3,ARGB4444(p_in[3])); +} +pixelcvt_end; + +pixelcvt32_start(convYUV_PL,4,1) { //convert 4x1 4444 to 4x1 8888 u32* p_in=(u32*)data; @@ -182,15 +210,8 @@ pixelcvt_next(convYUV_PL,4,1) //1,0 pb->prel(3,YUV422(Y1,Yu,Yv)); } -pixelcvt_next(convBMP_PL,4,1) -{ - u16* p_in=(u16*)data; - pb->prel(0,ARGB4444(p_in[0])); - pb->prel(1,ARGB4444(p_in[1])); - pb->prel(2,ARGB4444(p_in[2])); - pb->prel(3,ARGB4444(p_in[3])); -} pixelcvt_end; + //twiddled pixelcvt_start(conv565_TW,2,2) { @@ -231,7 +252,17 @@ pixelcvt_next(conv4444_TW,2,2) //1,1 pb->prel(1,1,ARGB4444(p_in[3])); } -pixelcvt_next(convYUV_TW,2,2) +pixelcvt_next(convBMP_TW,2,2) +{ + u16* p_in=(u16*)data; + pb->prel(0,0,ARGB4444(p_in[0])); + pb->prel(0,1,ARGB4444(p_in[1])); + pb->prel(1,0,ARGB4444(p_in[2])); + pb->prel(1,1,ARGB4444(p_in[3])); +} +pixelcvt_end; + +pixelcvt32_start(convYUV_TW,2,2) { //convert 4x1 4444 to 4x1 8888 u16* p_in=(u16*)data; @@ -260,17 +291,9 @@ pixelcvt_next(convYUV_TW,2,2) //1,1 pb->prel(1,1,YUV422(Y1,Yu,Yv)); } -pixelcvt_next(convBMP_TW,2,2) -{ - u16* p_in=(u16*)data; - pb->prel(0,0,ARGB4444(p_in[0])); - pb->prel(0,1,ARGB4444(p_in[1])); - pb->prel(1,0,ARGB4444(p_in[2])); - pb->prel(1,1,ARGB4444(p_in[3])); -} pixelcvt_end; -pixelcvt_start(convPAL4_TW,4,4) +pixelcvt_size_start(convPAL4_TW,2,4) { u8* p_in=(u8*)data; u32* pal=&palette_ram[palette_index]; @@ -295,7 +318,9 @@ pixelcvt_start(convPAL4_TW,4,4) pb->prel(3,2,pal[p_in[0]&0xF]); pb->prel(3,3,pal[(p_in[0]>>4)&0xF]);p_in++; } -pixelcvt_next(convPAL8_TW,2,4) +pixelcvt_end; + +pixelcvt_size_start(convPAL8_TW,2,4) { u8* p_in=(u8*)data; u32* pal=&palette_ram[palette_index]; @@ -311,9 +336,10 @@ pixelcvt_next(convPAL8_TW,2,4) pb->prel(1,3,pal[p_in[0]]);p_in++; } pixelcvt_end; + //handler functions -template -void texture_PL(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height) +template +void texture_PL(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height) { pb->amove(0,0); @@ -334,8 +360,8 @@ void texture_PL(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height) } } -template -void texture_TW(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height) +template +void texture_TW(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height) { pb->amove(0,0); @@ -360,8 +386,8 @@ void texture_TW(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height) } } -template -void texture_VQ(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height) +template +void texture_VQ(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height) { p_in+=256*4*2; pb->amove(0,0); @@ -389,55 +415,55 @@ void texture_VQ(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height) //We ask the compiler to generate the templates here //;) //planar formats ! -template void texture_PL >(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -template void texture_PL >(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -template void texture_PL >(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -template void texture_PL >(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -template void texture_PL >(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); +template void texture_PL, u16>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); +template void texture_PL, u16>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); +template void texture_PL, u16>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); +template void texture_PL, u32>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); +template void texture_PL, u16>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); //twiddled formats ! -template void texture_TW >(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -template void texture_TW >(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -template void texture_TW >(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -template void texture_TW >(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -template void texture_TW >(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); +template void texture_TW, u16>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); +template void texture_TW, u16>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); +template void texture_TW, u16>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); +template void texture_TW, u32>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); +template void texture_TW, u16>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -template void texture_TW >(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -template void texture_TW >(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); +template void texture_TW, u16>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); +template void texture_TW, u16>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); +template void texture_TW, u32>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); +template void texture_TW, u32>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); //VQ formats ! -template void texture_VQ >(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -template void texture_VQ >(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -template void texture_VQ >(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -template void texture_VQ >(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -template void texture_VQ >(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); +template void texture_VQ, u16>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); +template void texture_VQ, u16>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); +template void texture_VQ, u16>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); +template void texture_VQ, u32>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); +template void texture_VQ, u16>(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); //Planar -#define tex565_PL texture_PL > -#define tex1555_PL texture_PL > -#define tex4444_PL texture_PL > -#define texYUV422_PL texture_PL > -#define texBMP_PL texture_PL > +#define tex565_PL texture_PL, u16> +#define tex1555_PL texture_PL, u16> +#define tex4444_PL texture_PL, u16> +#define texYUV422_PL texture_PL, u32> +#define texBMP_PL texture_PL, u16> //Twiddle -#define tex565_TW texture_TW > -#define tex1555_TW texture_TW > -#define tex4444_TW texture_TW > -#define texYUV422_TW texture_TW > -#define texBMP_TW texture_TW > -#define texPAL4_TW texture_TW > -#define texPAL8_TW texture_TW > +#define tex565_TW texture_TW, u16> +#define tex1555_TW texture_TW, u16> +#define tex4444_TW texture_TW, u16> +#define texYUV422_TW texture_TW, u32> +#define texBMP_TW texture_TW, u16> +#define texPAL4_TW texture_TW, u16> +#define texPAL8_TW texture_TW, u16> +#define texPAL4_TW32 texture_TW, u32> +#define texPAL8_TW32 texture_TW, u32> //VQ -#define tex565_VQ texture_VQ > -#define tex1555_VQ texture_VQ > -#define tex4444_VQ texture_VQ > -#define texYUV422_VQ texture_VQ > -#define texBMP_VQ texture_VQ > - -void texture_PAL4(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -void texture_PAL8(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); - +#define tex565_VQ texture_VQ, u16> +#define tex1555_VQ texture_VQ, u16> +#define tex4444_VQ texture_VQ, u16> +#define texYUV422_VQ texture_VQ, u32> +#define texBMP_VQ texture_VQ, u16> #define Is_64_Bit(addr) ((addr &0x1000000)==0) diff --git a/core/rend/gles/gltex.cpp b/core/rend/gles/gltex.cpp index b369d803e..4819240d4 100644 --- a/core/rend/gles/gltex.cpp +++ b/core/rend/gles/gltex.cpp @@ -12,8 +12,8 @@ The mapping is done with tcw:tsp -> GL texture. That includes stuff like filtering/ texture repeat To save space native formats are used for 1555/565/4444 (only bit shuffling is done) -YUV is converted to 565 (some loss of quality on that) -PALs are decoded to their unpaletted format, 8888 is downcasted to 4444 +YUV is converted to 8888 +PALs are decoded to their unpaletted format (5551/565/4444/8888 depending on palette type) Mipmaps not supported for now @@ -26,31 +26,37 @@ Compression #include #endif -u16 temp_tex_buffer[1024*1024]; +u16 temp_tex_buffer[4 * 1024 * 1024]; // Maximum texture size: RGBA_8888 x 1024 x 1024 extern u32 decoded_colors[3][65536]; -typedef void TexConvFP(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); +typedef void TexConvFP(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); +typedef void TexConvFP32(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); struct PvrTexInfo { const char* name; - int bpp; //4/8 for pal. 16 for uv, argb + int bpp; //4/8 for pal. 16 for yuv, rgb, argb GLuint type; + // Conversion to 16 bpp TexConvFP *PL; TexConvFP *TW; TexConvFP *VQ; + // Conversion to 32 bpp + TexConvFP32 *PL32; + TexConvFP32 *TW32; + TexConvFP32 *VQ32; }; PvrTexInfo format[8]= -{ - {"1555", 16,GL_UNSIGNED_SHORT_5_5_5_1, &tex1555_PL,&tex1555_TW,&tex1555_VQ}, //1555 - {"565", 16,GL_UNSIGNED_SHORT_5_6_5, &tex565_PL,&tex565_TW,&tex565_VQ}, //565 - {"4444", 16,GL_UNSIGNED_SHORT_4_4_4_4, &tex4444_PL,&tex4444_TW,&tex4444_VQ}, //4444 - {"yuv", 16,GL_UNSIGNED_SHORT_5_6_5, &texYUV422_PL,&texYUV422_TW,&texYUV422_VQ}, //yuv - {"UNSUPPORTED BUMP MAPPED POLY", 16,GL_UNSIGNED_SHORT_4_4_4_4,&texBMP_PL,&texBMP_TW,&texBMP_VQ}, //bump_ns - {"pal4", 4,0,0,texPAL4_TW,0}, //pal4 - {"pla8", 8,0,0,texPAL8_TW,0}, //pal8 - {"ns/1555", 0}, //ns, 1555 +{ // name bpp GL format Planar Twiddled VQ Planar(32b) Twiddled(32b) VQ (32b) + {"1555", 16, GL_UNSIGNED_SHORT_5_5_5_1, &tex1555_PL, &tex1555_TW, &tex1555_VQ, NULL }, //1555 + {"565", 16, GL_UNSIGNED_SHORT_5_6_5, &tex565_PL, &tex565_TW, &tex565_VQ, NULL }, //565 + {"4444", 16, GL_UNSIGNED_SHORT_4_4_4_4, &tex4444_PL, &tex4444_TW, &tex4444_VQ, NULL }, //4444 + {"yuv", 16, GL_UNSIGNED_INT_8_8_8_8, NULL, NULL, NULL, &texYUV422_PL, &texYUV422_TW, &texYUV422_VQ }, //yuv + {"bumpmap", 16, GL_UNSIGNED_SHORT_4_4_4_4, &texBMP_PL, &texBMP_TW, &texBMP_VQ, NULL}, //bump map + {"pal4", 4, 0, 0, &texPAL4_TW, 0, NULL, &texPAL4_TW32, NULL }, //pal4 + {"pal8", 8, 0, 0, &texPAL8_TW, 0, NULL, &texPAL8_TW32, NULL }, //pal8 + {"ns/1555", 0}, //ns, 1555 }; const u32 MipPoint[8] = @@ -66,7 +72,7 @@ const u32 MipPoint[8] = }; const GLuint PAL_TYPE[4]= -{GL_UNSIGNED_SHORT_5_5_5_1,GL_UNSIGNED_SHORT_5_6_5,GL_UNSIGNED_SHORT_4_4_4_4,GL_UNSIGNED_SHORT_4_4_4_4}; +{GL_UNSIGNED_SHORT_5_5_5_1,GL_UNSIGNED_SHORT_5_6_5,GL_UNSIGNED_SHORT_4_4_4_4, GL_UNSIGNED_INT_8_8_8_8}; static void dumpRtTexture(u32 name, u32 w, u32 h) { char sname[256]; @@ -130,6 +136,7 @@ struct TextureCacheData PvrTexInfo* tex; TexConvFP* texconv; + TexConvFP32* texconv32; u32 dirty; vram_block* lock_block; @@ -160,7 +167,7 @@ struct TextureCacheData printf(" Stride"); printf(" %dx%d @ 0x%X",8<PL) + if (tcw.ScanOrder && (tex->PL || tex->PL32)) { //Texture is stored 'planar' in memory, no deswizzle is needed - verify(tcw.VQ_Comp==0); + //verify(tcw.VQ_Comp==0); + if (tcw.VQ_Comp != 0) + printf("Warning: planar texture with VQ set (invalid)\n"); + //Planar textures support stride selection, mostly used for non power of 2 textures (videos) int stride=w; if (tcw.StrideSel) stride=(TEXT_CONTROL&31)*32; //Call the format specific conversion code - texconv=tex->PL; + texconv = tex->PL; + texconv32 = tex->PL32; //calculate the size, in bytes, for the locking size=stride*h*tex->bpp/8; } @@ -242,19 +253,21 @@ struct TextureCacheData if (tcw.VQ_Comp) { - verify(tex->VQ!=0); + verify(tex->VQ != NULL || tex->VQ32 != NULL); indirect_color_ptr=sa; if (tcw.MipMapped) sa+=MipPoint[tsp.TexU]; - texconv=tex->VQ; + texconv = tex->VQ; + texconv32 = tex->VQ32; size=w*h/8; } else { - verify(tex->TW!=0) + verify(tex->TW != NULL || tex->TW32 != NULL) if (tcw.MipMapped) sa+=MipPoint[tsp.TexU]*tex->bpp/2; - texconv=tex->TW; + texconv = tex->TW; + texconv32 = tex->TW32; size=w*h*tex->bpp/8; } } @@ -263,7 +276,8 @@ struct TextureCacheData printf("Unhandled texture %d\n",tcw.PixelFmt); size=w*h*2; memset(temp_tex_buffer,0xFFFFFFFF,size); - texconv=0; + texconv = NULL; + texconv32 = NULL; } } @@ -285,28 +299,42 @@ struct TextureCacheData vq_codebook=(u8*)&vram[indirect_color_ptr]; //might be used if VQ tex //texture conversion work - PixelBuffer pbt; - pbt.p_buffer_start=pbt.p_current_line=temp_tex_buffer; - pbt.pixels_per_line=w; - u32 stride=w; - if (tcw.StrideSel && tcw.ScanOrder && tex->PL) + if (tcw.StrideSel && tcw.ScanOrder && (tex->PL || tex->PL32)) stride=(TEXT_CONTROL&31)*32; //I think this needs +1 ? - if(texconv!=0) + // For paletted formats, we have the choice of conversion type (16 or 32). + // Use the one that fits the palette entry size. + if (texconv32 != NULL && (pal_table_rev == NULL || textype == GL_UNSIGNED_INT_8_8_8_8)) { + PixelBuffer pbt; + pbt.p_buffer_start = pbt.p_current_line = (u32*)temp_tex_buffer; + pbt.pixels_per_line = w; + + texconv32(&pbt, (u8*)&vram[sa], stride, h); + } + else if (texconv != NULL) + { + PixelBuffer pbt; + pbt.p_buffer_start=pbt.p_current_line=temp_tex_buffer; + pbt.pixels_per_line=w; + texconv(&pbt,(u8*)&vram[sa],stride,h); } else { //fill it in with a temp color printf("UNHANDLED TEXTURE\n"); - memset(temp_tex_buffer,0xF88F8F7F,w*h*2); + memset(temp_tex_buffer, 0x80, w * h * 2); } //PrintTextureName(); - + if (sa_tex > VRAM_SIZE || size == 0 || sa + size > VRAM_SIZE) + { + printf("Warning: invalid texture. Address %08X %08X size %d\n", sa_tex, sa, size); + return; + } //lock the texture to detect changes in it lock_block = libCore_vramlock_Lock(sa_tex,sa+size-1,this);