Better texture cache: avoid duplicate. Attempt to put the RTT texture in
the cache to avoid copy to/from vram.
This commit is contained in:
parent
683cbbba3e
commit
43c125c208
|
@ -212,29 +212,42 @@ __forceinline
|
|||
glStencilFunc(GL_ALWAYS,stencil,stencil);
|
||||
}
|
||||
|
||||
bool texture_changed = false;
|
||||
|
||||
if (gp->texid != cache.texture)
|
||||
{
|
||||
cache.texture=gp->texid;
|
||||
if (gp->texid != -1) {
|
||||
//verify(glIsTexture(gp->texid));
|
||||
glBindTexture(GL_TEXTURE_2D, gp->texid);
|
||||
texture_changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (gp->tsp.full!=cache.tsp.full)
|
||||
if (gp->tsp.full != cache.tsp.full || texture_changed)
|
||||
{
|
||||
cache.tsp=gp->tsp;
|
||||
|
||||
if (Type==ListType_Translucent)
|
||||
{
|
||||
glBlendFunc(SrcBlendGL[gp->tsp.SrcInstr],DstBlendGL[gp->tsp.DstInstr]);
|
||||
}
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (gp->tsp.ClampU ? GL_CLAMP_TO_EDGE : (gp->tsp.FlipU ? GL_MIRRORED_REPEAT : GL_REPEAT))) ;
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (gp->tsp.ClampV ? GL_CLAMP_TO_EDGE : (gp->tsp.FlipV ? GL_MIRRORED_REPEAT : GL_REPEAT))) ;
|
||||
|
||||
#ifdef WEIRD_SLOWNESS
|
||||
//SGX seems to be super slow with discard enabled blended pixels
|
||||
//can't cache this -- due to opengl shader api
|
||||
bool clip_alpha_on_zero=gp->tsp.SrcInstr==4 && (gp->tsp.DstInstr==1 || gp->tsp.DstInstr==5);
|
||||
glUniform1f(CurrentShader->cp_AlphaTestValue,clip_alpha_on_zero?(1/255.f):(-2.f));
|
||||
#endif
|
||||
//set texture filter mode
|
||||
if (gp->tsp.FilterMode == 0)
|
||||
{
|
||||
//disable filtering, mipmaps
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
}
|
||||
else
|
||||
{
|
||||
//bilinear filtering
|
||||
//PowerVR supports also trilinear via two passes, but we ignore that for now
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, (gp->tcw.MipMapped && settings.rend.UseMipmaps) ? GL_LINEAR_MIPMAP_NEAREST : GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
#include "rend/TexCache.h"
|
||||
#include "hw/pvr/pvr_mem.h"
|
||||
#include "hw/mem/_vmem.h"
|
||||
#include "deps/libpng/png.h"
|
||||
|
||||
/*
|
||||
Textures
|
||||
|
@ -67,6 +68,47 @@ const u32 MipPoint[8] =
|
|||
const GLuint PAL_TYPE[4]=
|
||||
{GL_UNSIGNED_SHORT_5_5_5_1,GL_UNSIGNED_SHORT_5_6_5,GL_UNSIGNED_SHORT_4_4_4_4,GL_UNSIGNED_SHORT_4_4_4_4};
|
||||
|
||||
static void dumpRtTexture(u32 name, u32 w, u32 h) {
|
||||
char sname[256];
|
||||
sprintf(sname, "texdump/%x-%d.png", name, FrameCount);
|
||||
FILE *fp = fopen(sname, "wb");
|
||||
if (fp == NULL)
|
||||
return;
|
||||
|
||||
glPixelStorei(GL_PACK_ALIGNMENT, 1);
|
||||
|
||||
png_bytepp rows = (png_bytepp)malloc(h * sizeof(png_bytep));
|
||||
for (int y = 0; y < h; y++) {
|
||||
rows[y] = (png_bytep)malloc(w * 4); // 32-bit per pixel
|
||||
glReadPixels(0, y, w, 1, GL_RGBA, GL_UNSIGNED_BYTE, rows[y]);
|
||||
}
|
||||
|
||||
png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
|
||||
png_infop info_ptr = png_create_info_struct(png_ptr);
|
||||
|
||||
png_init_io(png_ptr, fp);
|
||||
|
||||
|
||||
/* write header */
|
||||
png_set_IHDR(png_ptr, info_ptr, w, h,
|
||||
8, PNG_COLOR_TYPE_RGBA, PNG_INTERLACE_NONE,
|
||||
PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE);
|
||||
|
||||
png_write_info(png_ptr, info_ptr);
|
||||
|
||||
|
||||
/* write bytes */
|
||||
png_write_image(png_ptr, rows);
|
||||
|
||||
/* end write */
|
||||
png_write_end(png_ptr, NULL);
|
||||
fclose(fp);
|
||||
|
||||
for (int y = 0; y < h; y++)
|
||||
free(rows[y]);
|
||||
free(rows);
|
||||
}
|
||||
|
||||
|
||||
//Texture Cache :)
|
||||
struct TextureCacheData
|
||||
|
@ -369,7 +411,7 @@ struct TextureCacheData
|
|||
map<u64,TextureCacheData> TexCache;
|
||||
typedef map<u64,TextureCacheData>::iterator TexCacheIter;
|
||||
|
||||
//TexCacheList<TextureCacheData> TexCache;
|
||||
TextureCacheData *getTextureCacheData(TSP tsp, TCW tcw);
|
||||
|
||||
struct FBT
|
||||
{
|
||||
|
@ -396,6 +438,9 @@ void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt)
|
|||
int fbh2 = 2;
|
||||
while (fbh2 < fbh)
|
||||
fbh2 *= 2;
|
||||
int fbw2 = 2;
|
||||
while (fbw2 < fbw)
|
||||
fbw2 *= 2;
|
||||
|
||||
// Get the currently bound frame buffer object. On most platforms this just gives 0.
|
||||
//glGetIntegerv(GL_FRAMEBUFFER_BINDING, &m_i32OriginalFbo);
|
||||
|
@ -411,20 +456,20 @@ void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt)
|
|||
*/
|
||||
|
||||
#ifdef GLES
|
||||
glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT24_OES, fbw, fbh2);
|
||||
glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT24_OES, fbw2, fbh2);
|
||||
#else
|
||||
glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT24, fbw, fbh2);
|
||||
glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT24, fbw2, fbh2);
|
||||
#endif
|
||||
|
||||
glGenRenderbuffers(1, &rv.stencilb);
|
||||
glBindRenderbuffer(GL_RENDERBUFFER, rv.stencilb);
|
||||
glRenderbufferStorage(GL_RENDERBUFFER, GL_STENCIL_INDEX8, fbw, fbh2);
|
||||
glRenderbufferStorage(GL_RENDERBUFFER, GL_STENCIL_INDEX8, fbw2, fbh2);
|
||||
|
||||
// Create a texture for rendering to
|
||||
glGenTextures(1, &rv.tex);
|
||||
glBindTexture(GL_TEXTURE_2D, rv.tex);
|
||||
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, channels, fbw, fbh2, 0, channels, fmt, 0);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, channels, fbw2, fbh2, 0, channels, fmt, 0);
|
||||
|
||||
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
||||
|
@ -487,12 +532,17 @@ void ReadRTTBuffer() {
|
|||
glGetIntegerv(GL_IMPLEMENTATION_COLOR_READ_FORMAT, &color_fmt);
|
||||
glGetIntegerv(GL_IMPLEMENTATION_COLOR_READ_TYPE, &color_type);
|
||||
|
||||
if (FB_W_CTRL.fb_packmode == 1 && stride == w * 2 && color_fmt == GL_RGB && color_type == GL_UNSIGNED_SHORT_5_6_5) {
|
||||
const u8 fb_packmode = FB_W_CTRL.fb_packmode;
|
||||
|
||||
if (fb_packmode == 1 && stride == w * 2 && color_fmt == GL_RGB && color_type == GL_UNSIGNED_SHORT_5_6_5) {
|
||||
// Can be read directly into vram
|
||||
glReadPixels(0, 0, w, h, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
const u16 kval_bit = (FB_W_CTRL.fb_kval & 0x80) << 8;
|
||||
const u8 fb_alpha_threshold = FB_W_CTRL.fb_alpha_threshold;
|
||||
|
||||
u32 lines = h;
|
||||
while (lines > 0) {
|
||||
u8 *p = (u8 *)temp_tex_buffer;
|
||||
|
@ -501,10 +551,10 @@ void ReadRTTBuffer() {
|
|||
|
||||
for (u32 l = 0; l < chunk_lines; l++) {
|
||||
for (u32 c = 0; c < w; c++) {
|
||||
switch(FB_W_CTRL.fb_packmode)
|
||||
switch(fb_packmode)
|
||||
{
|
||||
case 0: //0x0 0555 KRGB 16 bit (default) Bit 15 is the value of fb_kval[7].
|
||||
*dst++ = (((p[0] >> 3) & 0x1F) << 10) | (((p[1] >> 3) & 0x1F) << 5) | ((p[2] >> 3) & 0x1F) | ((FB_W_CTRL.fb_kval & 0x80) << 8);
|
||||
*dst++ = (((p[0] >> 3) & 0x1F) << 10) | (((p[1] >> 3) & 0x1F) << 5) | ((p[2] >> 3) & 0x1F) | kval_bit;
|
||||
break;
|
||||
case 1: //0x1 565 RGB 16 bit
|
||||
*dst++ = (((p[0] >> 3) & 0x1F) << 11) | (((p[1] >> 2) & 0x3F) << 5) | ((p[2] >> 3) & 0x1F);
|
||||
|
@ -513,7 +563,7 @@ void ReadRTTBuffer() {
|
|||
*dst++ = (((p[0] >> 4) & 0xF) << 8) | (((p[1] >> 4) & 0xF) << 4) | ((p[2] >> 4) & 0xF) | (((p[3] >> 4) & 0xF) << 12);
|
||||
break;
|
||||
case 3://0x3 1555 ARGB 16 bit The alpha value is determined by comparison with the value of fb_alpha_threshold.
|
||||
*dst++ = (((p[0] >> 3) & 0x1F) << 10) | (((p[1] >> 3) & 0x1F) << 5) | ((p[2] >> 3) & 0x1F) | (p[3] >= FB_W_CTRL.fb_alpha_threshold ? 0x8000 : 0);
|
||||
*dst++ = (((p[0] >> 3) & 0x1F) << 10) | (((p[1] >> 3) & 0x1F) << 5) | ((p[2] >> 3) & 0x1F) | (p[3] >= fb_alpha_threshold ? 0x8000 : 0);
|
||||
break;
|
||||
}
|
||||
p += 4;
|
||||
|
@ -537,42 +587,104 @@ void ReadRTTBuffer() {
|
|||
}
|
||||
}
|
||||
|
||||
//dumpRtTexture(fb_rtt.TexAddr, w, h);
|
||||
|
||||
if (w > 1024 || h > 1024) {
|
||||
glDeleteTextures(1, &fb_rtt.tex);
|
||||
}
|
||||
else
|
||||
{
|
||||
TCW tcw = { { TexAddr : fb_rtt.TexAddr, Reserved : 0, StrideSel : 0, ScanOrder : 1 } };
|
||||
switch (FB_W_CTRL.fb_packmode) {
|
||||
case 0:
|
||||
case 3:
|
||||
tcw.PixelFmt = 0;
|
||||
break;
|
||||
case 1:
|
||||
tcw.PixelFmt = 1;
|
||||
break;
|
||||
case 2:
|
||||
tcw.PixelFmt = 2;
|
||||
break;
|
||||
}
|
||||
TSP tsp = { 0 };
|
||||
for (tsp.TexU = 0; tsp.TexU <= 7 && (8 << tsp.TexU) < w; tsp.TexU++);
|
||||
for (tsp.TexV = 0; tsp.TexV <= 7 && (8 << tsp.TexV) < h; tsp.TexV++);
|
||||
|
||||
TextureCacheData *texture_data = getTextureCacheData(tsp, tcw);
|
||||
if (texture_data->texID != 0)
|
||||
glDeleteTextures(1, &texture_data->texID);
|
||||
else {
|
||||
texture_data->Create(false);
|
||||
texture_data->lock_block = libCore_vramlock_Lock(texture_data->sa_tex, texture_data->sa + texture_data->size - 1, texture_data);
|
||||
}
|
||||
texture_data->texID = fb_rtt.tex;
|
||||
texture_data->dirty = 0;
|
||||
}
|
||||
fb_rtt.tex = 0;
|
||||
|
||||
if (fb_rtt.fbo) { glDeleteFramebuffers(1,&fb_rtt.fbo); fb_rtt.fbo = 0; }
|
||||
if (fb_rtt.tex) { glDeleteTextures(1,&fb_rtt.tex); fb_rtt.tex = 0; }
|
||||
if (fb_rtt.depthb) { glDeleteRenderbuffers(1,&fb_rtt.depthb); fb_rtt.depthb = 0; }
|
||||
if (fb_rtt.stencilb) { glDeleteRenderbuffers(1,&fb_rtt.stencilb); fb_rtt.stencilb = 0; }
|
||||
|
||||
}
|
||||
|
||||
GLuint gl_GetTexture(TSP tsp, TCW tcw)
|
||||
{
|
||||
//lookup texture
|
||||
static int TexCacheLookups;
|
||||
static int TexCacheHits;
|
||||
static float LastTexCacheStats;
|
||||
|
||||
// Only use TexU and TexV from TSP in the cache key
|
||||
const TSP TSPTextureCacheMask = { { TexV : 7, TexU : 7 } };
|
||||
const TCW TCWTextureCacheMask = { { TexAddr : 0x1FFFFF, Reserved : 0, StrideSel : 0, ScanOrder : 0, PixelFmt : 7, VQ_Comp : 1, MipMapped : 1 } };
|
||||
|
||||
TextureCacheData *getTextureCacheData(TSP tsp, TCW tcw) {
|
||||
u64 key = ((u64)(tcw.full & TCWTextureCacheMask.full) << 32) | (tsp.full & TSPTextureCacheMask.full);
|
||||
|
||||
TexCacheIter tx = TexCache.find(key);
|
||||
|
||||
TextureCacheData* tf;
|
||||
//= TexCache.Find(tcw.full,tsp.full);
|
||||
u64 key=((u64)tcw.full<<32) | tsp.full;
|
||||
|
||||
TexCacheIter tx=TexCache.find(key);
|
||||
|
||||
if (tx!=TexCache.end())
|
||||
if (tx != TexCache.end())
|
||||
{
|
||||
tf=&tx->second;
|
||||
tf = &tx->second;
|
||||
}
|
||||
else //create if not existing
|
||||
{
|
||||
TextureCacheData tfc={0};
|
||||
TexCache[key]=tfc;
|
||||
TexCache[key] = tfc;
|
||||
|
||||
tx=TexCache.find(key);
|
||||
tf=&tx->second;
|
||||
|
||||
tf->tsp=tsp;
|
||||
tf->tcw=tcw;
|
||||
tf->Create(true);
|
||||
tf->tsp = tsp;
|
||||
tf->tcw = tcw;
|
||||
}
|
||||
|
||||
return tf;
|
||||
}
|
||||
|
||||
GLuint gl_GetTexture(TSP tsp, TCW tcw)
|
||||
{
|
||||
TexCacheLookups++;
|
||||
|
||||
//lookup texture
|
||||
TextureCacheData* tf = getTextureCacheData(tsp, tcw);
|
||||
|
||||
if (tf->texID == 0)
|
||||
tf->Create(true);
|
||||
|
||||
//update if needed
|
||||
if (tf->NeedsUpdate())
|
||||
tf->Update();
|
||||
else
|
||||
TexCacheHits++;
|
||||
|
||||
// if (os_GetSeconds() - LastTexCacheStats >= 2.0)
|
||||
// {
|
||||
// LastTexCacheStats = os_GetSeconds();
|
||||
// printf("Texture cache efficiency: %.2f%% cache size %ld\n", (float)TexCacheHits / TexCacheLookups * 100, TexCache.size());
|
||||
// TexCacheLookups = 0;
|
||||
// TexCacheHits = 0;
|
||||
// }
|
||||
|
||||
//update state for opts/stuff
|
||||
tf->Lookups++;
|
||||
|
@ -588,8 +700,7 @@ text_info raw_GetTexture(TSP tsp, TCW tcw)
|
|||
|
||||
//lookup texture
|
||||
TextureCacheData* tf;
|
||||
//= TexCache.Find(tcw.full,tsp.full);
|
||||
u64 key = ((u64)tcw.full << 32) | tsp.full;
|
||||
u64 key = ((u64)(tcw.full & TCWTextureCacheMask.full) << 32) | (tsp.full & TSPTextureCacheMask.full);
|
||||
|
||||
TexCacheIter tx = TexCache.find(key);
|
||||
|
||||
|
|
Loading…
Reference in New Issue