diff --git a/core/rend/gles/CustomTexture.cpp b/core/rend/CustomTexture.cpp similarity index 92% rename from core/rend/gles/CustomTexture.cpp rename to core/rend/CustomTexture.cpp index 99fe82ac7..450497d6c 100644 --- a/core/rend/gles/CustomTexture.cpp +++ b/core/rend/CustomTexture.cpp @@ -27,11 +27,14 @@ #include #include "reios/reios.h" +// TODO Move this out of gles.cpp +u8* loadPNGData(const string& subpath, int &width, int &height); + void CustomTexture::LoaderThread() { while (initialized) { - TextureCacheData *texture; + BaseTextureCacheData *texture; do { texture = NULL; @@ -142,7 +145,7 @@ u8* CustomTexture::LoadCustomTexture(u32 hash, int& width, int& height) return image_data; } -void CustomTexture::LoadCustomTextureAsync(TextureCacheData *texture_data) +void CustomTexture::LoadCustomTextureAsync(BaseTextureCacheData *texture_data) { if (!Init()) return; @@ -154,7 +157,7 @@ void CustomTexture::LoadCustomTextureAsync(TextureCacheData *texture_data) wakeup_thread.Set(); } -void CustomTexture::DumpTexture(u32 hash, int w, int h, GLuint textype, void *temp_tex_buffer) +void CustomTexture::DumpTexture(u32 hash, int w, int h, TextureType textype, void *temp_tex_buffer) { std::string base_dump_dir = get_writable_data_path(DATA_PATH "texdump/"); if (!file_exists(base_dump_dir)) @@ -185,7 +188,7 @@ void CustomTexture::DumpTexture(u32 hash, int w, int h, GLuint textype, void *te u8 *dst = (u8 *)rows[h - y - 1]; switch (textype) { - case GL_UNSIGNED_SHORT_4_4_4_4: + case TextureType::_4444: for (int x = 0; x < w; x++) { *dst++ = ((*src >> 12) & 0xF) << 4; @@ -195,7 +198,7 @@ void CustomTexture::DumpTexture(u32 hash, int w, int h, GLuint textype, void *te src++; } break; - case GL_UNSIGNED_SHORT_5_6_5: + case TextureType::_565: for (int x = 0; x < w; x++) { *dst++ = ((*src >> 11) & 0x1F) << 3; @@ -205,7 +208,7 @@ void CustomTexture::DumpTexture(u32 hash, int w, int h, GLuint textype, void *te src++; } break; - case GL_UNSIGNED_SHORT_5_5_5_1: + case TextureType::_5551: for (int x = 0; x < w; x++) { *dst++ = ((*src >> 11) & 0x1F) << 3; @@ -215,7 +218,7 @@ void CustomTexture::DumpTexture(u32 hash, int w, int h, GLuint textype, void *te src++; } break; - case GL_UNSIGNED_BYTE: + case TextureType::_8888: for (int x = 0; x < w; x++) { *(u32 *)dst = *(u32 *)src; @@ -224,7 +227,7 @@ void CustomTexture::DumpTexture(u32 hash, int w, int h, GLuint textype, void *te } break; default: - WARN_LOG(RENDERER, "dumpTexture: unsupported picture format %x", textype); + WARN_LOG(RENDERER, "dumpTexture: unsupported picture format %x", (u32)textype); fclose(fp); free(rows[0]); free(rows); diff --git a/core/rend/gles/CustomTexture.h b/core/rend/CustomTexture.h similarity index 80% rename from core/rend/gles/CustomTexture.h rename to core/rend/CustomTexture.h index fd8a00db1..1550995d0 100644 --- a/core/rend/gles/CustomTexture.h +++ b/core/rend/CustomTexture.h @@ -16,13 +16,11 @@ You should have received a copy of the GNU General Public License along with reicast. If not, see . */ - -#ifndef CORE_REND_GLES_CUSTOMTEXTURE_H_ -#define CORE_REND_GLES_CUSTOMTEXTURE_H_ +#pragma once #include #include -#include "gles.h" +#include "TexCache.h" class CustomTexture { public: @@ -34,8 +32,8 @@ public: {} ~CustomTexture() { Terminate(); } u8* LoadCustomTexture(u32 hash, int& width, int& height); - void LoadCustomTextureAsync(TextureCacheData *texture_data); - void DumpTexture(u32 hash, int w, int h, GLuint textype, void *temp_tex_buffer); + void LoadCustomTextureAsync(BaseTextureCacheData *texture_data); + void DumpTexture(u32 hash, int w, int h, TextureType textype, void *temp_tex_buffer); private: bool Init(); @@ -53,8 +51,6 @@ private: cThread loader_thread; #endif cResetEvent wakeup_thread; - std::vector work_queue; + std::vector work_queue; cMutex work_queue_mutex; }; - -#endif /* CORE_REND_GLES_CUSTOMTEXTURE_H_ */ diff --git a/core/rend/TexCache.cpp b/core/rend/TexCache.cpp index 9b874bb3c..26704bf3e 100644 --- a/core/rend/TexCache.cpp +++ b/core/rend/TexCache.cpp @@ -4,11 +4,13 @@ #include "TexCache.h" #include "hw/pvr/pvr_regs.h" +#include "hw/pvr/Renderer_if.h" #include "hw/mem/_vmem.h" #include "hw/mem/vmem32.h" #include "hw/sh4/modules/mmu.h" #include "deps/xbrz/xbrz.h" #include +#include "CustomTexture.h" u8* vq_codebook; u32 palette_index; @@ -390,3 +392,338 @@ void UpscalexBRZ(int factor, u32* source, u32* dest, int width, int height, bool xbrz::scale(factor, source, dest, width, height, has_alpha ? xbrz::ColorFormat::ARGB : xbrz::ColorFormat::RGB, xbrz_cfg); #endif } + +struct PvrTexInfo +{ + const char* name; + int bpp; //4/8 for pal. 16 for yuv, rgb, argb + TextureType type; + // Conversion to 16 bpp + TexConvFP *PL; + TexConvFP *TW; + TexConvFP *VQ; + // Conversion to 32 bpp + TexConvFP32 *PL32; + TexConvFP32 *TW32; + TexConvFP32 *VQ32; +}; + +static const PvrTexInfo format[8] = +{ // name bpp Final format Planar Twiddled VQ Planar(32b) Twiddled(32b) VQ (32b) + {"1555", 16, TextureType::_5551, tex1555_PL, tex1555_TW, tex1555_VQ, tex1555_PL32, tex1555_TW32, tex1555_VQ32 }, //1555 + {"565", 16, TextureType::_565, tex565_PL, tex565_TW, tex565_VQ, tex565_PL32, tex565_TW32, tex565_VQ32 }, //565 + {"4444", 16, TextureType::_4444, tex4444_PL, tex4444_TW, tex4444_VQ, tex4444_PL32, tex4444_TW32, tex4444_VQ32 }, //4444 + {"yuv", 16, TextureType::_8888, NULL, NULL, NULL, texYUV422_PL, texYUV422_TW, texYUV422_VQ }, //yuv + {"bumpmap", 16, TextureType::_4444, texBMP_PL, texBMP_TW, texBMP_VQ, NULL}, //bump map + {"pal4", 4, TextureType::_5551, 0, texPAL4_TW, texPAL4_VQ, NULL, texPAL4_TW32, texPAL4_VQ32 }, //pal4 + {"pal8", 8, TextureType::_5551, 0, texPAL8_TW, texPAL8_VQ, NULL, texPAL8_TW32, texPAL8_VQ32 }, //pal8 + {"ns/1555", 0}, // Not supported (1555) +}; + +static const u32 MipPoint[8] = +{ + 0x00006,//8 + 0x00016,//16 + 0x00056,//32 + 0x00156,//64 + 0x00556,//128 + 0x01556,//256 + 0x05556,//512 + 0x15556//1024 +}; + +static const TextureType PAL_TYPE[4] = { + TextureType::_5551, TextureType::_565, TextureType::_4444, TextureType::_8888 +}; + +static CustomTexture custom_texture; + +void BaseTextureCacheData::PrintTextureName() +{ + char str[512]; + sprintf(str, "Texture: %s ", GetPixelFormatName()); + + if (tcw.VQ_Comp) + strcat(str, " VQ"); + + if (tcw.ScanOrder==0) + strcat(str, " TW"); + + if (tcw.MipMapped) + strcat(str, " MM"); + + if (tcw.StrideSel) + strcat(str, " Stride"); + + sprintf(str + strlen(str), " %dx%d @ 0x%X", 8 << tsp.TexU, 8 << tsp.TexV, tcw.TexAddr << 3); + std::string id = GetId(); + sprintf(str + strlen(str), " id=%s", id.c_str()); + DEBUG_LOG(RENDERER, "%s", str); +} + +//true if : dirty or paletted texture and hashes don't match +bool BaseTextureCacheData::NeedsUpdate() { + bool rc = dirty + || (tcw.PixelFmt == PixelPal4 && palette_hash != pal_hash_16[tcw.PalSelect]) + || (tcw.PixelFmt == PixelPal8 && palette_hash != pal_hash_256[tcw.PalSelect >> 4]); + return rc; +} + +bool BaseTextureCacheData::Delete() +{ + if (custom_load_in_progress > 0) + return false; + + if (lock_block) + libCore_vramlock_Unlock_block(lock_block); + lock_block=0; + + delete[] custom_image_data; + + return true; +} + +void BaseTextureCacheData::Create() +{ + //Reset state info .. + Lookups=0; + Updates=0; + dirty=FrameCount; + lock_block = nullptr; + + //decode info from tsp/tcw into the texture struct + tex=&format[tcw.PixelFmt == PixelReserved ? Pixel1555 : tcw.PixelFmt]; //texture format table entry + + sa_tex = (tcw.TexAddr<<3) & VRAM_MASK; //texture start address + sa = sa_tex; //data texture start address (modified for MIPs, as needed) + w=8<bpp == 4) + palette_index = tcw.PalSelect << 4; + else if (tex->bpp == 8) + palette_index = (tcw.PalSelect >> 4) << 8; + + //VQ table (if VQ tex) + if (tcw.VQ_Comp) + vq_codebook = sa; + + //Convert a pvr texture into OpenGL + switch (tcw.PixelFmt) + { + + case Pixel1555: //0 1555 value: 1 bit; RGB values: 5 bits each + case PixelReserved: //7 Reserved Regarded as 1555 + case Pixel565: //1 565 R value: 5 bits; G value: 6 bits; B value: 5 bits + case Pixel4444: //2 4444 value: 4 bits; RGB values: 4 bits each + case PixelYUV: //3 YUV422 32 bits per 2 pixels; YUYV values: 8 bits each + case PixelBumpMap: //4 Bump Map 16 bits/pixel; S value: 8 bits; R value: 8 bits + case PixelPal4: //5 4 BPP Palette Palette texture with 4 bits/pixel + case PixelPal8: //6 8 BPP Palette Palette texture with 8 bits/pixel + if (tcw.ScanOrder && (tex->PL || tex->PL32)) + { + //Texture is stored 'planar' in memory, no deswizzle is needed + //verify(tcw.VQ_Comp==0); + if (tcw.VQ_Comp != 0) + WARN_LOG(RENDERER, "Warning: planar texture with VQ set (invalid)"); + + //Planar textures support stride selection, mostly used for non power of 2 textures (videos) + int stride = w; + if (tcw.StrideSel) + { + stride = std::max((TEXT_CONTROL & 31) * 32, w); + } + //Call the format specific conversion code + texconv = tex->PL; + texconv32 = tex->PL32; + //calculate the size, in bytes, for the locking + size=stride*h*tex->bpp/8; + } + else + { + // Quake 3 Arena uses one. Not sure if valid but no need to crash + //verify(w==h || !tcw.MipMapped); // are non square mipmaps supported ? i can't recall right now *WARN* + + if (tcw.VQ_Comp) + { + verify(tex->VQ != NULL || tex->VQ32 != NULL); + vq_codebook = sa; + if (tcw.MipMapped) + sa+=MipPoint[tsp.TexU]; + texconv = tex->VQ; + texconv32 = tex->VQ32; + size=w*h/8; + } + else + { + verify(tex->TW != NULL || tex->TW32 != NULL); + if (tcw.MipMapped) + sa+=MipPoint[tsp.TexU]*tex->bpp/2; + texconv = tex->TW; + texconv32 = tex->TW32; + size=w*h*tex->bpp/8; + } + } + break; + default: + WARN_LOG(RENDERER, "Unhandled texture format %d", tcw.PixelFmt); + size=w*h*2; + texconv = NULL; + texconv32 = NULL; + } +} + +void BaseTextureCacheData::ComputeHash() +{ + texture_hash = XXH32(&vram[sa], size, 7); + if (IsPaletted()) + texture_hash ^= palette_hash; + old_texture_hash = texture_hash; + texture_hash ^= tcw.full; +} + +void BaseTextureCacheData::Update() +{ + //texture state tracking stuff + Updates++; + dirty=0; + + tex_type = tex->type; + + bool has_alpha = false; + if (IsPaletted()) + { + tex_type = PAL_TYPE[PAL_RAM_CTRL&3]; + if (tex_type == TextureType::_8888) + has_alpha = true; + + // Get the palette hash to check for future updates + if (tcw.PixelFmt == PixelPal4) + palette_hash = pal_hash_16[tcw.PalSelect]; + else + palette_hash = pal_hash_256[tcw.PalSelect >> 4]; + } + + ::palette_index = this->palette_index; // might be used if pal. tex + ::vq_codebook = &vram[vq_codebook]; // might be used if VQ tex + + //texture conversion work + u32 stride=w; + + if (tcw.StrideSel && tcw.ScanOrder && (tex->PL || tex->PL32)) + stride = std::max(w, (TEXT_CONTROL & 31) * 32); + + PrintTextureName(); + u32 original_h = h; + if (sa_tex > VRAM_SIZE || size == 0 || sa + size > VRAM_SIZE) + { + if (sa + size > VRAM_SIZE) + { + // Shenmue Space Harrier mini-arcade loads a texture that goes beyond the end of VRAM + // but only uses the top portion of it + h = (VRAM_SIZE - sa) * 8 / stride / tex->bpp; + size = stride * h * tex->bpp/8; + } + else + { + WARN_LOG(RENDERER, "Warning: invalid texture. Address %08X %08X size %d", sa_tex, sa, size); + return; + } + } + if (settings.rend.CustomTextures) + custom_texture.LoadCustomTextureAsync(this); + + void *temp_tex_buffer = NULL; + u32 upscaled_w = w; + u32 upscaled_h = h; + + PixelBuffer pb16; + PixelBuffer pb32; + + // Figure out if we really need to use a 32-bit pixel buffer + bool need_32bit_buffer = true; + if ((settings.rend.TextureUpscale <= 1 + || w * h > settings.rend.MaxFilteredTextureSize + * settings.rend.MaxFilteredTextureSize // Don't process textures that are too big + || tcw.PixelFmt == PixelYUV) // Don't process YUV textures + && (!IsPaletted() || tex_type != TextureType::_8888) + && texconv != NULL) + need_32bit_buffer = false; + // TODO avoid upscaling/depost. textures that change too often + + if (texconv32 != NULL && need_32bit_buffer) + { + // Force the texture type since that's the only 32-bit one we know + tex_type = TextureType::_8888; + + pb32.init(w, h); + + texconv32(&pb32, (u8*)&vram[sa], stride, h); + +#ifdef DEPOSTERIZE + { + // Deposterization + PixelBuffer tmp_buf; + tmp_buf.init(w, h); + + DePosterize(pb32.data(), tmp_buf.data(), w, h); + pb32.steal_data(tmp_buf); + } +#endif + + // xBRZ scaling + if (settings.rend.TextureUpscale > 1) + { + PixelBuffer tmp_buf; + tmp_buf.init(w * settings.rend.TextureUpscale, h * settings.rend.TextureUpscale); + + if (tcw.PixelFmt == Pixel1555 || tcw.PixelFmt == Pixel4444) + // Alpha channel formats. Palettes with alpha are already handled + has_alpha = true; + UpscalexBRZ(settings.rend.TextureUpscale, pb32.data(), tmp_buf.data(), w, h, has_alpha); + pb32.steal_data(tmp_buf); + upscaled_w *= settings.rend.TextureUpscale; + upscaled_h *= settings.rend.TextureUpscale; + } + temp_tex_buffer = pb32.data(); + } + else if (texconv != NULL) + { + pb16.init(w, h); + + texconv(&pb16,(u8*)&vram[sa],stride,h); + temp_tex_buffer = pb16.data(); + } + else + { + //fill it in with a temp color + WARN_LOG(RENDERER, "UNHANDLED TEXTURE"); + pb16.init(w, h); + memset(pb16.data(), 0x80, w * h * 2); + temp_tex_buffer = pb16.data(); + } + // Restore the original texture height if it was constrained to VRAM limits above + h = original_h; + + //lock the texture to detect changes in it + lock_block = libCore_vramlock_Lock(sa_tex,sa+size-1,this); + + UploadToGPU(upscaled_w, upscaled_h, (u8*)temp_tex_buffer); + if (settings.rend.DumpTextures) + { + ComputeHash(); + custom_texture.DumpTexture(texture_hash, upscaled_w, upscaled_h, tex_type, temp_tex_buffer); + } +} + +void BaseTextureCacheData::CheckCustomTexture() +{ + if (custom_load_in_progress == 0 && custom_image_data != NULL) + { + tex_type = TextureType::_8888; + UploadToGPU(custom_width, custom_height, custom_image_data); + delete [] custom_image_data; + custom_image_data = NULL; + } +} diff --git a/core/rend/TexCache.h b/core/rend/TexCache.h index b126b2c3d..eceb807ac 100644 --- a/core/rend/TexCache.h +++ b/core/rend/TexCache.h @@ -1,5 +1,7 @@ #pragma once +#include #include "oslib/oslib.h" +#include "hw/pvr/ta_structs.h" extern u8* vq_codebook; extern u32 palette_index; @@ -620,5 +622,81 @@ template void texture_VQ, u16>(PixelBuffer* pb,u8* p_in, #define texPAL4_VQ32 texture_VQ, u32> #define texPAL8_VQ32 texture_VQ, u32> +bool VramLockedWriteOffset(size_t offset); void DePosterize(u32* source, u32* dest, int width, int height); void UpscalexBRZ(int factor, u32* source, u32* dest, int width, int height, bool has_alpha); + +struct PvrTexInfo; +template class PixelBuffer; +typedef void TexConvFP(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); +typedef void TexConvFP32(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); +enum class TextureType { _565, _5551, _4444, _8888 }; + +struct BaseTextureCacheData +{ + TSP tsp; //dreamcast texture parameters + TCW tcw; + + // Decoded/filtered texture format + TextureType tex_type; + + u32 Lookups; + + u32 sa; //pixel data start address in vram (might be offset for mipmaps/etc) + u32 sa_tex; //texture data start address in vram + u32 w,h; //width & height of the texture + u32 size; //size, in bytes, in vram + + const PvrTexInfo* tex; + TexConvFP* texconv; + TexConvFP32* texconv32; + + u32 dirty; + vram_block* lock_block; + + u32 Updates; + + u32 palette_index; + //used for palette updates + u32 palette_hash; // Palette hash at time of last update + u32 vq_codebook; // VQ quantizers table for compressed textures + u32 texture_hash; // xxhash of texture data, used for custom textures + u32 old_texture_hash; // legacy hash + u8* volatile custom_image_data; // loaded custom image data + volatile u32 custom_width; + volatile u32 custom_height; + std::atomic_int custom_load_in_progress; + + void PrintTextureName(); + virtual std::string GetId() = 0; + + bool IsPaletted() + { + return tcw.PixelFmt == PixelPal4 || tcw.PixelFmt == PixelPal8; + } + + const char* GetPixelFormatName() + { + switch (tcw.PixelFmt) + { + case Pixel1555: return "1555"; + case Pixel565: return "565"; + case Pixel4444: return "4444"; + case PixelYUV: return "yuv"; + case PixelBumpMap: return "bumpmap"; + case PixelPal4: return "pal4"; + case PixelPal8: return "pal8"; + default: return "unknown"; + } + } + + void Create(); + void ComputeHash(); + void Update(); + virtual void UploadToGPU(int width, int height, u8 *temp_tex_buffer) = 0; + void CheckCustomTexture(); + //true if : dirty or paletted texture and hashes don't match + bool NeedsUpdate(); + virtual bool Delete(); + virtual ~BaseTextureCacheData() {} +}; diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index 046207802..0ba627480 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -1,7 +1,7 @@ #include "glcache.h" #include "rend/rend.h" +#include "rend/sorter.h" -#include /* Drawing and related state management @@ -299,454 +299,12 @@ void DrawList(const List& gply, int first, int count) } } -bool operator<(const PolyParam &left, const PolyParam &right) +static vector pidx_sort; + +static void SortTriangles(int first, int count) { -/* put any condition you want to sort on here */ - return left.zvZcount<2) - { - pp->zvZ=0; - } - else - { - u32* idx = idx_base + pp->first; - - Vertex* vtx=vtx_base+idx[0]; - Vertex* vtx_end=vtx_base + idx[pp->count-1]+1; - - u32 zv=0xFFFFFFFF; - while(vtx!=vtx_end) - { - zv=min(zv,(u32&)vtx->z); - vtx++; - } - - pp->zvZ=(f32&)zv; - } - pp++; - } - - std::stable_sort(pvrrc.global_param_tr.head() + first, pvrrc.global_param_tr.head() + first + count); -} - -Vertex* vtx_sort_base; - - -struct IndexTrig -{ - u32 id[3]; - u16 pid; - f32 z; -}; - - -struct SortTrigDrawParam -{ - PolyParam* ppid; - u32 first; - u32 count; -}; - -float min3(float v0,float v1,float v2) -{ - return min(min(v0,v1),v2); -} - -float max3(float v0,float v1,float v2) -{ - return max(max(v0,v1),v2); -} - - -float minZ(Vertex* v, u32* mod) -{ - return min(min(v[mod[0]].z,v[mod[1]].z),v[mod[2]].z); -} - -bool operator<(const IndexTrig &left, const IndexTrig &right) -{ - return left.zx-b->x; - float yd=a->y-b->y; - - return xd*xd+yd*yd; -} - -//was good idea, but not really working .. -bool Intersect(Vertex* a, Vertex* b) -{ - float a1=area_x2(a); - float a2=area_x2(b); - - float d = distance_apprx(a,b); - - return (a1+a1)>d; -} - -//root for quick-union -u16 rid(vector& v, u16 id) -{ - while(id!=v[id]) id=v[id]; - return id; -} - -struct TrigBounds -{ - float xs,xe; - float ys,ye; - float zs,ze; -}; - -//find 3d bounding box for triangle -TrigBounds bound(Vertex* v) -{ - TrigBounds rv = { min(min(v[0].x,v[1].x),v[2].x), max(max(v[0].x,v[1].x),v[2].x), - min(min(v[0].y,v[1].y),v[2].y), max(max(v[0].y,v[1].y),v[2].y), - min(min(v[0].z,v[1].z),v[2].z), max(max(v[0].z,v[1].z),v[2].z), - }; - - return rv; -} - -//bounding box 2d intersection -bool Intersect(TrigBounds& a, TrigBounds& b) -{ - return ( !(a.xeb.xe) && !(a.yeb.ye) /*&& !(a.zeb.ze)*/ ); -} - - -bool operator<(const IndexTrig &left, const IndexTrig &right) -{ - /* - TrigBounds l=bound(vtx_sort_base+left.id); - TrigBounds r=bound(vtx_sort_base+right.id); - - if (!Intersect(l,r)) - { - return true; - } - else - { - return (l.zs + l.ze) < (r.zs + r.ze); - }*/ - - return minZ(&vtx_sort_base[left.id])pcw.full&PCW_DRAW_MASK)==(pp1->pcw.full&PCW_DRAW_MASK) && pp0->isp.full==pp1->isp.full && pp0->tcw.full==pp1->tcw.full && pp0->tsp.full==pp1->tsp.full && pp0->tileclip==pp1->tileclip; -} - -static vector pidx_sort; - -void fill_id(u32* d, Vertex* v0, Vertex* v1, Vertex* v2, Vertex* vb) -{ - d[0]=v0-vb; - d[1]=v1-vb; - d[2]=v2-vb; -} - -void GenSorted(int first, int count) -{ - u32 tess_gen=0; - - pidx_sort.clear(); - - if (pvrrc.verts.used() == 0 || count <= 1) - return; - - Vertex* vtx_base=pvrrc.verts.head(); - u32* idx_base = pvrrc.idx.head(); - - PolyParam* pp_base = &pvrrc.global_param_tr.head()[first]; - PolyParam* pp = pp_base; - PolyParam* pp_end = pp + count; - - Vertex* vtx_arr=vtx_base+idx_base[pp->first]; - vtx_sort_base=vtx_base; - - static u32 vtx_cnt; - - int vtx_count=idx_base[pp_end[-1].first+pp_end[-1].count-1]-idx_base[pp->first]; - if (vtx_count>vtx_cnt) - vtx_cnt=vtx_count; - -#if PRINT_SORT_STATS - printf("TVTX: %d || %d\n",vtx_cnt,vtx_count); -#endif - - if (vtx_count<=0) - return; - - //make lists of all triangles, with their pid and vid - static vector lst; - - lst.resize(vtx_count*4); - - - int pfsti=0; - - while(pp!=pp_end) - { - u32 ppid=(pp-pp_base); - - if (pp->count>2) - { - u32* idx = idx_base + pp->first; - - Vertex* vtx=vtx_base+idx[0]; - Vertex* vtx_end=vtx_base + idx[pp->count-1]-1; - u32 flip=0; - while(vtx!=vtx_end) - { - Vertex* v0, * v1, * v2, * v3, * v4, * v5; - - if (flip) - { - v0=&vtx[1]; - v1=&vtx[0]; - v2=&vtx[2]; - } - else - { - v0=&vtx[0]; - v1=&vtx[1]; - v2=&vtx[2]; - } -#if 0 - if (settings.pvr.subdivide_transp) - { - u32 tess_x=(max3(v0->x,v1->x,v2->x)-min3(v0->x,v1->x,v2->x))/32; - u32 tess_y=(max3(v0->y,v1->y,v2->y)-min3(v0->y,v1->y,v2->y))/32; - - if (tess_x==1) tess_x=0; - if (tess_y==1) tess_y=0; - - //bool tess=(maxZ(v0,v1,v2)/minZ(v0,v1,v2))>=1.2; - - if (tess_x + tess_y) - { - v3=pvrrc.verts.Append(3); - v4=v3+1; - v5=v4+1; - - //xyz - for (int i=0;i<3;i++) - { - ((float*)&v3->x)[i]=((float*)&v0->x)[i]*0.5f+((float*)&v2->x)[i]*0.5f; - ((float*)&v4->x)[i]=((float*)&v0->x)[i]*0.5f+((float*)&v1->x)[i]*0.5f; - ((float*)&v5->x)[i]=((float*)&v1->x)[i]*0.5f+((float*)&v2->x)[i]*0.5f; - } - - //*TODO* Make it perspective correct - - //uv - for (int i=0;i<2;i++) - { - ((float*)&v3->u)[i]=((float*)&v0->u)[i]*0.5f+((float*)&v2->u)[i]*0.5f; - ((float*)&v4->u)[i]=((float*)&v0->u)[i]*0.5f+((float*)&v1->u)[i]*0.5f; - ((float*)&v5->u)[i]=((float*)&v1->u)[i]*0.5f+((float*)&v2->u)[i]*0.5f; - } - - //color - for (int i=0;i<4;i++) - { - v3->col[i]=v0->col[i]/2+v2->col[i]/2; - v4->col[i]=v0->col[i]/2+v1->col[i]/2; - v5->col[i]=v1->col[i]/2+v2->col[i]/2; - } - - fill_id(lst[pfsti].id,v0,v3,v4,vtx_base); - lst[pfsti].pid= ppid ; - lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); - pfsti++; - - fill_id(lst[pfsti].id,v2,v3,v5,vtx_base); - lst[pfsti].pid= ppid ; - lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); - pfsti++; - - fill_id(lst[pfsti].id,v3,v4,v5,vtx_base); - lst[pfsti].pid= ppid ; - lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); - pfsti++; - - fill_id(lst[pfsti].id,v5,v4,v1,vtx_base); - lst[pfsti].pid= ppid ; - lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); - pfsti++; - - tess_gen+=3; - } - else - { - fill_id(lst[pfsti].id,v0,v1,v2,vtx_base); - lst[pfsti].pid= ppid ; - lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); - pfsti++; - } - } - else -#endif - { - fill_id(lst[pfsti].id,v0,v1,v2,vtx_base); - lst[pfsti].pid= ppid ; - lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); - pfsti++; - } - - flip ^= 1; - - vtx++; - } - } - pp++; - } - - u32 aused=pfsti; - - lst.resize(aused); - - //sort them -#if 1 - std::stable_sort(lst.begin(),lst.end()); - - //Merge pids/draw cmds if two different pids are actually equal - if (true) - { - for (u32 k=1;klst[k].pid) - { - //MOVE UP - for (int j=k;j>0 && lst[j].pid!=lst[j-1].pid && !Intersect(lst[j],lst[j-1]);j--) - { - swap(lst[j],lst[j-1]); - } - } - else - { - //move down - for (int j=k+1;j vidx_sort; - - vidx_sort.resize(aused*3); - - int idx=-1; - - for (u32 i=0; icount=stdp.first-last->first; - } - - pidx_sort.push_back(stdp); - idx=pid; - } - } - - SortTrigDrawParam* stdp=&pidx_sort[pidx_sort.size()-1]; - stdp->count=aused*3-stdp->first; - -#if PRINT_SORT_STATS - printf("Reassembled into %d from %d\n",pidx_sort.size(),pp_end-pp_base); -#endif + vector vidx_sort; + GenSorted(first, count, pidx_sort, vidx_sort); //Upload to GPU if needed if (pidx_sort.size()) @@ -767,8 +325,6 @@ void GenSorted(int first, int count) else glBufferData(GL_ELEMENT_ARRAY_BUFFER, vidx_sort.size() * sizeof(u32), &vidx_sort[0], GL_STREAM_DRAW); glCheck(); - - if (tess_gen) DEBUG_LOG(RENDERER, "Generated %.2fK Triangles !", tess_gen / 1000.0); } } @@ -1099,7 +655,7 @@ void DrawStrips() { if (!settings.rend.PerStripSorting) { - GenSorted(previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); + SortTriangles(previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); DrawSorted(render_pass < pvrrc.render_passes.used() - 1); } else diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index a993c0696..233dca95c 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -2,6 +2,7 @@ #include #include #include "rend/rend.h" +#include "rend/TexCache.h" #if (defined(GLES) && HOST_OS != OS_DARWIN && !defined(USE_SDL)) || defined(__ANDROID__) #define USE_EGL @@ -168,7 +169,6 @@ text_info raw_GetTexture(TSP tsp, TCW tcw); void killtex(); void CollectCleanup(); void DoCleanup(); -void SortPParams(int first, int count); void SetCull(u32 CullMode); s32 SetTileClip(u32 val, GLint uniform); void SetMVS_Mode(ModifierVolumeMode mv_mode, ISP_Modvol ispc); @@ -238,63 +238,13 @@ extern struct ShaderUniforms_t } ShaderUniforms; -struct PvrTexInfo; -template class PixelBuffer; -typedef void TexConvFP(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); -typedef void TexConvFP32(PixelBuffer* pb,u8* p_in,u32 Width,u32 Height); - -struct TextureCacheData +struct TextureCacheData : BaseTextureCacheData { - TSP tsp; //dreamcast texture parameters - TCW tcw; - GLuint texID; //gl texture u16* pData; - int tex_type; - - u32 Lookups; - - //decoded texture info - u32 sa; //pixel data start address in vram (might be offset for mipmaps/etc) - u32 sa_tex; //texture data start address in vram - u32 w,h; //width & height of the texture - u32 size; //size, in bytes, in vram - - const PvrTexInfo* tex; - TexConvFP* texconv; - TexConvFP32* texconv32; - - u32 dirty; - vram_block* lock_block; - - u32 Updates; - - u32 palette_index; - //used for palette updates - u32 palette_hash; // Palette hash at time of last update - u32 vq_codebook; // VQ quantizers table for compressed textures - u32 texture_hash; // xxhash of texture data, used for custom textures - u32 old_texture_hash; // legacy hash - u8* volatile custom_image_data; // loaded custom image data - volatile u32 custom_width; - volatile u32 custom_height; - std::atomic_int custom_load_in_progress; - - void PrintTextureName(); - - bool IsPaletted() - { - return tcw.PixelFmt == PixelPal4 || tcw.PixelFmt == PixelPal8; - } - - void Create(bool isGL); - void ComputeHash(); - void Update(); - void UploadToGPU(GLuint textype, int width, int height, u8 *temp_tex_buffer); - void CheckCustomTexture(); - //true if : dirty or paletted texture and hashes don't match - bool NeedsUpdate(); - bool Delete(); + virtual std::string GetId() override { return std::to_string(texID); } + virtual void UploadToGPU(int width, int height, u8 *temp_tex_buffer) override; + virtual bool Delete() override; }; extern const u32 Zfunction[8]; diff --git a/core/rend/gles/gltex.cpp b/core/rend/gles/gltex.cpp index fd873fe3c..55a6ca8d3 100644 --- a/core/rend/gles/gltex.cpp +++ b/core/rend/gles/gltex.cpp @@ -3,8 +3,6 @@ #include "rend/TexCache.h" #include "hw/pvr/pvr_mem.h" #include "hw/mem/_vmem.h" -#include -#include "CustomTexture.h" #include @@ -32,50 +30,6 @@ Compression extern u32 decoded_colors[3][65536]; -struct PvrTexInfo -{ - const char* name; - int bpp; //4/8 for pal. 16 for yuv, rgb, argb - GLuint type; - // Conversion to 16 bpp - TexConvFP *PL; - TexConvFP *TW; - TexConvFP *VQ; - // Conversion to 32 bpp - TexConvFP32 *PL32; - TexConvFP32 *TW32; - TexConvFP32 *VQ32; -}; - -static const PvrTexInfo format[8] = -{ // name bpp GL format Planar Twiddled VQ Planar(32b) Twiddled(32b) VQ (32b) - {"1555", 16, GL_UNSIGNED_SHORT_5_5_5_1, tex1555_PL, tex1555_TW, tex1555_VQ, tex1555_PL32, tex1555_TW32, tex1555_VQ32 }, //1555 - {"565", 16, GL_UNSIGNED_SHORT_5_6_5, tex565_PL, tex565_TW, tex565_VQ, tex565_PL32, tex565_TW32, tex565_VQ32 }, //565 - {"4444", 16, GL_UNSIGNED_SHORT_4_4_4_4, tex4444_PL, tex4444_TW, tex4444_VQ, tex4444_PL32, tex4444_TW32, tex4444_VQ32 }, //4444 - {"yuv", 16, GL_UNSIGNED_BYTE, NULL, NULL, NULL, texYUV422_PL, texYUV422_TW, texYUV422_VQ }, //yuv - {"bumpmap", 16, GL_UNSIGNED_SHORT_4_4_4_4, texBMP_PL, texBMP_TW, texBMP_VQ, NULL}, //bump map - {"pal4", 4, 0, 0, texPAL4_TW, texPAL4_VQ, NULL, texPAL4_TW32, texPAL4_VQ32 }, //pal4 - {"pal8", 8, 0, 0, texPAL8_TW, texPAL8_VQ, NULL, texPAL8_TW32, texPAL8_VQ32 }, //pal8 - {"ns/1555", 0}, // Not supported (1555) -}; - -static const u32 MipPoint[8] = -{ - 0x00006,//8 - 0x00016,//16 - 0x00056,//32 - 0x00156,//64 - 0x00556,//128 - 0x01556,//256 - 0x05556,//512 - 0x15556//1024 -}; - -static const GLuint PAL_TYPE[4]= -{GL_UNSIGNED_SHORT_5_5_5_1,GL_UNSIGNED_SHORT_5_6_5,GL_UNSIGNED_SHORT_4_4_4_4, GL_UNSIGNED_BYTE}; - -static CustomTexture custom_texture; - static void dumpRtTexture(u32 name, u32 w, u32 h) { char sname[256]; sprintf(sname, "texdump/%x-%d.png", name, FrameCount); @@ -117,283 +71,44 @@ static void dumpRtTexture(u32 name, u32 w, u32 h) { free(rows); } -//Texture Cache :) -void TextureCacheData::PrintTextureName() +void TextureCacheData::UploadToGPU(int width, int height, u8 *temp_tex_buffer) { - char str[512]; - sprintf(str, "Texture: %s ", tex ? tex->name : "?format?"); - - if (tcw.VQ_Comp) - strcat(str, " VQ"); - - if (tcw.ScanOrder==0) - strcat(str, " TW"); - - if (tcw.MipMapped) - strcat(str, " MM"); - - if (tcw.StrideSel) - strcat(str, " Stride"); - - sprintf(str + strlen(str), " %dx%d @ 0x%X", 8 << tsp.TexU, 8 << tsp.TexV, tcw.TexAddr << 3); - sprintf(str + strlen(str), " id=%d", texID); - DEBUG_LOG(RENDERER, "%s", str); -} - -//Create GL texture from tsp/tcw -void TextureCacheData::Create(bool isGL) -{ - //ask GL for texture ID - if (isGL) { - texID = glcache.GenTexture(); - } - else { - texID = 0; - } - - pData = 0; - tex_type = 0; - - //Reset state info .. - Lookups=0; - Updates=0; - dirty=FrameCount; - lock_block=0; - - //decode info from tsp/tcw into the texture struct - tex=&format[tcw.PixelFmt == PixelReserved ? Pixel1555 : tcw.PixelFmt]; //texture format table entry - - sa_tex = (tcw.TexAddr<<3) & VRAM_MASK; //texture start address - sa = sa_tex; //data texture start address (modified for MIPs, as needed) - w=8<bpp == 4) - palette_index = tcw.PalSelect << 4; - else if (tex->bpp == 8) - palette_index = (tcw.PalSelect >> 4) << 8; - - //VQ table (if VQ tex) - if (tcw.VQ_Comp) - vq_codebook = sa; - - //Convert a pvr texture into OpenGL - switch (tcw.PixelFmt) + if (texID != 0) { - - case Pixel1555: //0 1555 value: 1 bit; RGB values: 5 bits each - case PixelReserved: //7 Reserved Regarded as 1555 - case Pixel565: //1 565 R value: 5 bits; G value: 6 bits; B value: 5 bits - case Pixel4444: //2 4444 value: 4 bits; RGB values: 4 bits each - case PixelYUV: //3 YUV422 32 bits per 2 pixels; YUYV values: 8 bits each - case PixelBumpMap: //4 Bump Map 16 bits/pixel; S value: 8 bits; R value: 8 bits - case PixelPal4: //5 4 BPP Palette Palette texture with 4 bits/pixel - case PixelPal8: //6 8 BPP Palette Palette texture with 8 bits/pixel - if (tcw.ScanOrder && (tex->PL || tex->PL32)) - { - //Texture is stored 'planar' in memory, no deswizzle is needed - //verify(tcw.VQ_Comp==0); - if (tcw.VQ_Comp != 0) - WARN_LOG(RENDERER, "Warning: planar texture with VQ set (invalid)"); - - //Planar textures support stride selection, mostly used for non power of 2 textures (videos) - int stride=w; - if (tcw.StrideSel) - stride=(TEXT_CONTROL&31)*32; - //Call the format specific conversion code - texconv = tex->PL; - texconv32 = tex->PL32; - //calculate the size, in bytes, for the locking - size=stride*h*tex->bpp/8; - } - else - { - // Quake 3 Arena uses one. Not sure if valid but no need to crash - //verify(w==h || !tcw.MipMapped); // are non square mipmaps supported ? i can't recall right now *WARN* - - if (tcw.VQ_Comp) - { - verify(tex->VQ != NULL || tex->VQ32 != NULL); - vq_codebook = sa; - if (tcw.MipMapped) - sa+=MipPoint[tsp.TexU]; - texconv = tex->VQ; - texconv32 = tex->VQ32; - size=w*h/8; - } - else - { - verify(tex->TW != NULL || tex->TW32 != NULL); - if (tcw.MipMapped) - sa+=MipPoint[tsp.TexU]*tex->bpp/2; - texconv = tex->TW; - texconv32 = tex->TW32; - size=w*h*tex->bpp/8; - } - } - break; - default: - WARN_LOG(RENDERER, "Unhandled texture format %d", tcw.PixelFmt); - size=w*h*2; - texconv = NULL; - texconv32 = NULL; - } -} - -void TextureCacheData::ComputeHash() -{ - texture_hash = XXH32(&vram[sa], size, 7); - if (IsPaletted()) - texture_hash ^= palette_hash; - old_texture_hash = texture_hash; - texture_hash ^= tcw.full; -} - -void TextureCacheData::Update() -{ - //texture state tracking stuff - Updates++; - dirty=0; - - GLuint textype=tex->type; - - bool has_alpha = false; - if (IsPaletted()) - { - textype=PAL_TYPE[PAL_RAM_CTRL&3]; - if (textype == GL_UNSIGNED_BYTE) - has_alpha = true; - - // Get the palette hash to check for future updates - if (tcw.PixelFmt == PixelPal4) - palette_hash = pal_hash_16[tcw.PalSelect]; - else - palette_hash = pal_hash_256[tcw.PalSelect >> 4]; - } - - ::palette_index = this->palette_index; // might be used if pal. tex - ::vq_codebook = &vram[vq_codebook]; // might be used if VQ tex - - //texture conversion work - u32 stride=w; - - if (tcw.StrideSel && tcw.ScanOrder && (tex->PL || tex->PL32)) - stride=(TEXT_CONTROL&31)*32; //I think this needs +1 ? - - PrintTextureName(); - u32 original_h = h; - if (sa_tex > VRAM_SIZE || size == 0 || sa + size > VRAM_SIZE) - { - if (sa + size > VRAM_SIZE) - { - // Shenmue Space Harrier mini-arcade loads a texture that goes beyond the end of VRAM - // but only uses the top portion of it - h = (VRAM_SIZE - sa) * 8 / stride / tex->bpp; - size = stride * h * tex->bpp/8; - } - else - { - WARN_LOG(RENDERER, "Warning: invalid texture. Address %08X %08X size %d", sa_tex, sa, size); - return; - } - } - if (settings.rend.CustomTextures) - custom_texture.LoadCustomTextureAsync(this); - - void *temp_tex_buffer = NULL; - u32 upscaled_w = w; - u32 upscaled_h = h; - - PixelBuffer pb16; - PixelBuffer pb32; - - // Figure out if we really need to use a 32-bit pixel buffer - bool need_32bit_buffer = true; - if ((settings.rend.TextureUpscale <= 1 - || w * h > settings.rend.MaxFilteredTextureSize - * settings.rend.MaxFilteredTextureSize // Don't process textures that are too big - || tcw.PixelFmt == PixelYUV) // Don't process YUV textures - && (!IsPaletted() || textype != GL_UNSIGNED_BYTE) - && texconv != NULL) - need_32bit_buffer = false; - // TODO avoid upscaling/depost. textures that change too often - - if (texconv32 != NULL && need_32bit_buffer) - { - // Force the texture type since that's the only 32-bit one we know - textype = GL_UNSIGNED_BYTE; - - pb32.init(w, h); - - texconv32(&pb32, (u8*)&vram[sa], stride, h); - -#ifdef DEPOSTERIZE - { - // Deposterization - PixelBuffer tmp_buf; - tmp_buf.init(w, h); - - DePosterize(pb32.data(), tmp_buf.data(), w, h); - pb32.steal_data(tmp_buf); - } -#endif - - // xBRZ scaling - if (settings.rend.TextureUpscale > 1) - { - PixelBuffer tmp_buf; - tmp_buf.init(w * settings.rend.TextureUpscale, h * settings.rend.TextureUpscale); - - if (tcw.PixelFmt == Pixel1555 || tcw.PixelFmt == Pixel4444) - // Alpha channel formats. Palettes with alpha are already handled - has_alpha = true; - UpscalexBRZ(settings.rend.TextureUpscale, pb32.data(), tmp_buf.data(), w, h, has_alpha); - pb32.steal_data(tmp_buf); - upscaled_w *= settings.rend.TextureUpscale; - upscaled_h *= settings.rend.TextureUpscale; - } - temp_tex_buffer = pb32.data(); - } - else if (texconv != NULL) - { - pb16.init(w, h); - - texconv(&pb16,(u8*)&vram[sa],stride,h); - temp_tex_buffer = pb16.data(); - } - else - { - //fill it in with a temp color - WARN_LOG(RENDERER, "UNHANDLED TEXTURE"); - pb16.init(w, h); - memset(pb16.data(), 0x80, w * h * 2); - temp_tex_buffer = pb16.data(); - } - // Restore the original texture height if it was constrained to VRAM limits above - h = original_h; - - //lock the texture to detect changes in it - lock_block = libCore_vramlock_Lock(sa_tex,sa+size-1,this); - - if (texID) { //upload to OpenGL ! - UploadToGPU(textype, upscaled_w, upscaled_h, (u8*)temp_tex_buffer); - if (settings.rend.DumpTextures) + glcache.BindTexture(GL_TEXTURE_2D, texID); + GLuint comps = GL_RGBA; + GLuint gltype; + switch (tex_type) { - ComputeHash(); - custom_texture.DumpTexture(texture_hash, upscaled_w, upscaled_h, textype, temp_tex_buffer); + case TextureType::_5551: + gltype = GL_UNSIGNED_SHORT_5_5_5_1; + break; + case TextureType::_565: + gltype = GL_UNSIGNED_SHORT_5_6_5; + comps = GL_RGB; + break; + case TextureType::_4444: + gltype = GL_UNSIGNED_SHORT_4_4_4_4; + break; + case TextureType::_8888: + gltype = GL_UNSIGNED_BYTE; + break; } + glTexImage2D(GL_TEXTURE_2D, 0,comps, width, height, 0, comps, gltype, temp_tex_buffer); + if (tcw.MipMapped && settings.rend.UseMipmaps) + glGenerateMipmap(GL_TEXTURE_2D); } else { #if FEAT_HAS_SOFTREND - if (textype == GL_UNSIGNED_SHORT_5_6_5) + /* + if (tex_type == TextureType::_565) tex_type = 0; - else if (textype == GL_UNSIGNED_SHORT_5_5_5_1) + else if (tex_type == TextureType::_5551) tex_type = 1; - else if (textype == GL_UNSIGNED_SHORT_4_4_4_4) + else if (tex_type == TextureType::_4444) tex_type = 2; - + */ u16 *tex_data = (u16 *)temp_tex_buffer; if (pData) { _mm_free(pData); @@ -415,40 +130,12 @@ void TextureCacheData::Update() #endif } } - -void TextureCacheData::UploadToGPU(GLuint textype, int width, int height, u8 *temp_tex_buffer) -{ - //upload to OpenGL ! - glcache.BindTexture(GL_TEXTURE_2D, texID); - GLuint comps=textype == GL_UNSIGNED_SHORT_5_6_5 ? GL_RGB : GL_RGBA; - glTexImage2D(GL_TEXTURE_2D, 0,comps, width, height, 0, comps, textype, temp_tex_buffer); - if (tcw.MipMapped && settings.rend.UseMipmaps) - glGenerateMipmap(GL_TEXTURE_2D); -} - -void TextureCacheData::CheckCustomTexture() -{ - if (custom_load_in_progress == 0 && custom_image_data != NULL) - { - UploadToGPU(GL_UNSIGNED_BYTE, custom_width, custom_height, custom_image_data); - delete [] custom_image_data; - custom_image_data = NULL; - } -} - -//true if : dirty or paletted texture and hashes don't match -bool TextureCacheData::NeedsUpdate() { - bool rc = dirty - || (tcw.PixelFmt == PixelPal4 && palette_hash != pal_hash_16[tcw.PalSelect]) - || (tcw.PixelFmt == PixelPal8 && palette_hash != pal_hash_256[tcw.PalSelect >> 4]); - return rc; -} bool TextureCacheData::Delete() { - if (custom_load_in_progress > 0) + if (!BaseTextureCacheData::Delete()) return false; - + if (pData) { #if FEAT_HAS_SOFTREND _mm_free(pData); @@ -461,17 +148,12 @@ bool TextureCacheData::Delete() if (texID) { glcache.DeleteTextures(1, &texID); } - if (lock_block) - libCore_vramlock_Unlock_block(lock_block); - lock_block=0; - - delete[] custom_image_data; return true; } -static map TexCache; -typedef map::iterator TexCacheIter; +static std::unordered_map TexCache; +typedef std::unordered_map::iterator TexCacheIter; static TextureCacheData *getTextureCacheData(TSP tsp, TCW tcw); @@ -574,19 +256,13 @@ void ReadRTTBuffer() { { u32 tex_addr = gl.rtt.TexAddr << 3; - // Manually mark textures as dirty and remove all vram locks before calling glReadPixels + // Remove all vram locks before calling glReadPixels // (deadlock on rpi) - for (TexCacheIter i = TexCache.begin(); i != TexCache.end(); i++) - { - if (i->second.sa_tex <= tex_addr + size - 1 && i->second.sa + i->second.size - 1 >= tex_addr) { - i->second.dirty = FrameCount; - if (i->second.lock_block != NULL) { - libCore_vramlock_Unlock_block(i->second.lock_block); - i->second.lock_block = NULL; - } - } - } - _vmem_unprotect_vram(0, VRAM_SIZE); + u32 page_tex_addr = tex_addr & PAGE_MASK; + u32 page_size = size + tex_addr - page_tex_addr; + page_size = ((page_size - 1) / PAGE_SIZE + 1) * PAGE_SIZE; + for (u32 page = page_tex_addr; page < page_tex_addr + page_size; page += PAGE_SIZE) + VramLockedWriteOffset(page); glPixelStorei(GL_PACK_ALIGNMENT, 1); u16 *dst = (u16 *)&vram[tex_addr]; @@ -641,13 +317,6 @@ void ReadRTTBuffer() { dst += (stride - w * 2) / 2; } } - - // Restore VRAM locks - for (TexCacheIter i = TexCache.begin(); i != TexCache.end(); i++) - { - if (i->second.lock_block != NULL) - _vmem_protect_vram(i->second.sa_tex, i->second.sa + i->second.size - i->second.sa_tex); - } } else { @@ -683,7 +352,7 @@ void ReadRTTBuffer() { if (texture_data->texID != 0) glcache.DeleteTextures(1, &texture_data->texID); else - texture_data->Create(false); + texture_data->Create(); texture_data->texID = gl.rtt.tex; texture_data->dirty = 0; if (texture_data->lock_block == NULL) @@ -745,7 +414,10 @@ GLuint gl_GetTexture(TSP tsp, TCW tcw) TextureCacheData* tf = getTextureCacheData(tsp, tcw); if (tf->texID == 0) - tf->Create(true); + { + tf->Create(); + tf->texID = glcache.GenTexture(); + } //update if needed if (tf->NeedsUpdate()) @@ -792,7 +464,7 @@ text_info raw_GetTexture(TSP tsp, TCW tcw) tf->tsp = tsp; tf->tcw = tcw; - tf->Create(false); + tf->Create(); } //update if needed @@ -806,7 +478,7 @@ text_info raw_GetTexture(TSP tsp, TCW tcw) rv.height = tf->h; rv.width = tf->w; rv.pdata = tf->pData; - rv.textype = tf->tex_type; + rv.textype = (u32)tf->tex_type; return rv; @@ -817,21 +489,20 @@ void CollectCleanup() { u32 TargetFrame = max((u32)120,FrameCount) - 120; - for (TexCacheIter i=TexCache.begin();i!=TexCache.end();i++) + for (const auto& pair : TexCache) { - if ( i->second.dirty && i->second.dirty < TargetFrame) { - list.push_back(i->first); - } + if (pair.second.dirty && pair.second.dirty < TargetFrame) + list.push_back(pair.first); if (list.size() > 5) break; } - for (size_t i=0; isecond.Delete(); - } + for (auto& pair : TexCache) + pair.second.Delete(); TexCache.clear(); KillTex = false; diff --git a/core/rend/sorter.cpp b/core/rend/sorter.cpp new file mode 100644 index 000000000..65a1281e2 --- /dev/null +++ b/core/rend/sorter.cpp @@ -0,0 +1,455 @@ +/* + This file is part of reicast. + + reicast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + reicast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with reicast. If not, see . + */ +#include +#include "sorter.h" + +struct IndexTrig +{ + u32 id[3]; + u16 pid; + f32 z; +}; + +float min3(float v0,float v1,float v2) +{ + return min(min(v0,v1),v2); +} + +float max3(float v0,float v1,float v2) +{ + return max(max(v0,v1),v2); +} + +float minZ(Vertex* v, u32* mod) +{ + return min(min(v[mod[0]].z,v[mod[1]].z),v[mod[2]].z); +} + +bool operator<(const IndexTrig &left, const IndexTrig &right) +{ + return left.zcount<2) + { + pp->zvZ=0; + } + else + { + u32* idx = idx_base + pp->first; + + Vertex* vtx=vtx_base+idx[0]; + Vertex* vtx_end=vtx_base + idx[pp->count-1]+1; + + u32 zv=0xFFFFFFFF; + while(vtx!=vtx_end) + { + zv=min(zv,(u32&)vtx->z); + vtx++; + } + + pp->zvZ=(f32&)zv; + } + pp++; + } + + std::stable_sort(pvrrc.global_param_tr.head() + first, pvrrc.global_param_tr.head() + first + count); +} + +static Vertex* vtx_sort_base; + +#if 0 +/* + + Per triangle sorting experiments + +*/ + +//approximate the triangle area +float area_x2(Vertex* v) +{ + return 2/3*fabs( (v[0].x-v[2].x)*(v[1].y-v[0].y) - (v[0].x-v[1].x)*(v[2].y-v[0].y)) ; +} + +//approximate the distance ^2 +float distance_apprx(Vertex* a, Vertex* b) +{ + float xd=a->x-b->x; + float yd=a->y-b->y; + + return xd*xd+yd*yd; +} + +//was good idea, but not really working .. +bool Intersect(Vertex* a, Vertex* b) +{ + float a1=area_x2(a); + float a2=area_x2(b); + + float d = distance_apprx(a,b); + + return (a1+a1)>d; +} + +//root for quick-union +u16 rid(vector& v, u16 id) +{ + while(id!=v[id]) id=v[id]; + return id; +} + +struct TrigBounds +{ + float xs,xe; + float ys,ye; + float zs,ze; +}; + +//find 3d bounding box for triangle +TrigBounds bound(Vertex* v) +{ + TrigBounds rv = { min(min(v[0].x,v[1].x),v[2].x), max(max(v[0].x,v[1].x),v[2].x), + min(min(v[0].y,v[1].y),v[2].y), max(max(v[0].y,v[1].y),v[2].y), + min(min(v[0].z,v[1].z),v[2].z), max(max(v[0].z,v[1].z),v[2].z), + }; + + return rv; +} + +//bounding box 2d intersection +bool Intersect(TrigBounds& a, TrigBounds& b) +{ + return ( !(a.xeb.xe) && !(a.yeb.ye) /*&& !(a.zeb.ze)*/ ); +} + + +bool operator<(const IndexTrig &left, const IndexTrig &right) +{ + /* + TrigBounds l=bound(vtx_sort_base+left.id); + TrigBounds r=bound(vtx_sort_base+right.id); + + if (!Intersect(l,r)) + { + return true; + } + else + { + return (l.zs + l.ze) < (r.zs + r.ze); + }*/ + + return minZ(&vtx_sort_base[left.id])pcw.full&PCW_DRAW_MASK)==(pp1->pcw.full&PCW_DRAW_MASK) && pp0->isp.full==pp1->isp.full && pp0->tcw.full==pp1->tcw.full && pp0->tsp.full==pp1->tsp.full && pp0->tileclip==pp1->tileclip; +} + +void fill_id(u32* d, Vertex* v0, Vertex* v1, Vertex* v2, Vertex* vb) +{ + d[0]=v0-vb; + d[1]=v1-vb; + d[2]=v2-vb; +} + +void GenSorted(int first, int count, vector& pidx_sort, vector& vidx_sort) +{ + u32 tess_gen=0; + + pidx_sort.clear(); + + if (pvrrc.verts.used() == 0 || count <= 1) + return; + + Vertex* vtx_base=pvrrc.verts.head(); + u32* idx_base = pvrrc.idx.head(); + + PolyParam* pp_base = &pvrrc.global_param_tr.head()[first]; + PolyParam* pp = pp_base; + PolyParam* pp_end = pp + count; + + Vertex* vtx_arr=vtx_base+idx_base[pp->first]; + vtx_sort_base=vtx_base; + + static u32 vtx_cnt; + + int vtx_count=idx_base[pp_end[-1].first+pp_end[-1].count-1]-idx_base[pp->first]; + if (vtx_count>vtx_cnt) + vtx_cnt=vtx_count; + +#if PRINT_SORT_STATS + printf("TVTX: %d || %d\n",vtx_cnt,vtx_count); +#endif + + if (vtx_count<=0) + return; + + //make lists of all triangles, with their pid and vid + static vector lst; + + lst.resize(vtx_count*4); + + + int pfsti=0; + + while(pp!=pp_end) + { + u32 ppid=(pp-pp_base); + + if (pp->count>2) + { + u32* idx = idx_base + pp->first; + + Vertex* vtx=vtx_base+idx[0]; + Vertex* vtx_end=vtx_base + idx[pp->count-1]-1; + u32 flip=0; + while(vtx!=vtx_end) + { + Vertex* v0, * v1, * v2, * v3, * v4, * v5; + + if (flip) + { + v0=&vtx[1]; + v1=&vtx[0]; + v2=&vtx[2]; + } + else + { + v0=&vtx[0]; + v1=&vtx[1]; + v2=&vtx[2]; + } +#if 0 + if (settings.pvr.subdivide_transp) + { + u32 tess_x=(max3(v0->x,v1->x,v2->x)-min3(v0->x,v1->x,v2->x))/32; + u32 tess_y=(max3(v0->y,v1->y,v2->y)-min3(v0->y,v1->y,v2->y))/32; + + if (tess_x==1) tess_x=0; + if (tess_y==1) tess_y=0; + + //bool tess=(maxZ(v0,v1,v2)/minZ(v0,v1,v2))>=1.2; + + if (tess_x + tess_y) + { + v3=pvrrc.verts.Append(3); + v4=v3+1; + v5=v4+1; + + //xyz + for (int i=0;i<3;i++) + { + ((float*)&v3->x)[i]=((float*)&v0->x)[i]*0.5f+((float*)&v2->x)[i]*0.5f; + ((float*)&v4->x)[i]=((float*)&v0->x)[i]*0.5f+((float*)&v1->x)[i]*0.5f; + ((float*)&v5->x)[i]=((float*)&v1->x)[i]*0.5f+((float*)&v2->x)[i]*0.5f; + } + + //*TODO* Make it perspective correct + + //uv + for (int i=0;i<2;i++) + { + ((float*)&v3->u)[i]=((float*)&v0->u)[i]*0.5f+((float*)&v2->u)[i]*0.5f; + ((float*)&v4->u)[i]=((float*)&v0->u)[i]*0.5f+((float*)&v1->u)[i]*0.5f; + ((float*)&v5->u)[i]=((float*)&v1->u)[i]*0.5f+((float*)&v2->u)[i]*0.5f; + } + + //color + for (int i=0;i<4;i++) + { + v3->col[i]=v0->col[i]/2+v2->col[i]/2; + v4->col[i]=v0->col[i]/2+v1->col[i]/2; + v5->col[i]=v1->col[i]/2+v2->col[i]/2; + } + + fill_id(lst[pfsti].id,v0,v3,v4,vtx_base); + lst[pfsti].pid= ppid ; + lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); + pfsti++; + + fill_id(lst[pfsti].id,v2,v3,v5,vtx_base); + lst[pfsti].pid= ppid ; + lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); + pfsti++; + + fill_id(lst[pfsti].id,v3,v4,v5,vtx_base); + lst[pfsti].pid= ppid ; + lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); + pfsti++; + + fill_id(lst[pfsti].id,v5,v4,v1,vtx_base); + lst[pfsti].pid= ppid ; + lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); + pfsti++; + + tess_gen+=3; + } + else + { + fill_id(lst[pfsti].id,v0,v1,v2,vtx_base); + lst[pfsti].pid= ppid ; + lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); + pfsti++; + } + } + else +#endif + { + fill_id(lst[pfsti].id,v0,v1,v2,vtx_base); + lst[pfsti].pid= ppid ; + lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); + pfsti++; + } + + flip ^= 1; + + vtx++; + } + } + pp++; + } + + u32 aused=pfsti; + + lst.resize(aused); + + //sort them +#if 1 + std::stable_sort(lst.begin(),lst.end()); + + //Merge pids/draw cmds if two different pids are actually equal + if (true) + { + for (u32 k=1;klst[k].pid) + { + //MOVE UP + for (int j=k;j>0 && lst[j].pid!=lst[j-1].pid && !Intersect(lst[j],lst[j-1]);j--) + { + swap(lst[j],lst[j-1]); + } + } + else + { + //move down + for (int j=k+1;jcount=stdp.first-last->first; + } + + pidx_sort.push_back(stdp); + idx=pid; + } + } + + SortTrigDrawParam* stdp=&pidx_sort[pidx_sort.size()-1]; + stdp->count=aused*3-stdp->first; + +#if PRINT_SORT_STATS + printf("Reassembled into %d from %d\n",pidx_sort.size(),pp_end-pp_base); +#endif + + if (tess_gen) DEBUG_LOG(RENDERER, "Generated %.2fK Triangles !", tess_gen / 1000.0); +} + diff --git a/core/rend/sorter.h b/core/rend/sorter.h new file mode 100644 index 000000000..ec78aa974 --- /dev/null +++ b/core/rend/sorter.h @@ -0,0 +1,32 @@ +/* + This file is part of reicast. + + reicast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + reicast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with reicast. If not, see . + */ +#pragma once +#include "types.h" +#include "hw/pvr/Renderer_if.h" + +//Sort based on min-z of each strip +void SortPParams(int first, int count); + +struct SortTrigDrawParam +{ + PolyParam* ppid; + u32 first; + u32 count; +}; + +// Sort based on min-z of each triangle +void GenSorted(int first, int count, vector& sorted_pp, vector& sorted_idx);