rend: refactor texture cache and poly/triangle sorters out of gles

2019-10-04 12:22:18 +02:00 · 2019-10-04 12:22:18 +02:00 · 24b973f763
parent 8eac844825
commit 24b973f763
9 changed files with 982 additions and 906 deletions
--- a/core/rend/gles/CustomTexture.cpp
+++ b/core/rend/gles/CustomTexture.cpp
@ -27,11 +27,14 @@
 #include <png.h>
 #include "reios/reios.h"

+// TODO Move this out of gles.cpp
+u8* loadPNGData(const string& subpath, int &width, int &height);
+
 void CustomTexture::LoaderThread()
 {
 	while (initialized)
 	{
-		TextureCacheData *texture;
+		BaseTextureCacheData *texture;
 		
 		do {
 			texture = NULL;
@ -142,7 +145,7 @@ u8* CustomTexture::LoadCustomTexture(u32 hash, int& width, int& height)
 	return image_data;
 }

-void CustomTexture::LoadCustomTextureAsync(TextureCacheData *texture_data)
+void CustomTexture::LoadCustomTextureAsync(BaseTextureCacheData *texture_data)
 {
 	if (!Init())
 		return;
@ -154,7 +157,7 @@ void CustomTexture::LoadCustomTextureAsync(TextureCacheData *texture_data)
 	wakeup_thread.Set();
 }

-void CustomTexture::DumpTexture(u32 hash, int w, int h, GLuint textype, void *temp_tex_buffer)
+void CustomTexture::DumpTexture(u32 hash, int w, int h, TextureType textype, void *temp_tex_buffer)
 {
 	std::string base_dump_dir = get_writable_data_path(DATA_PATH "texdump/");
 	if (!file_exists(base_dump_dir))
@ -185,7 +188,7 @@ void CustomTexture::DumpTexture(u32 hash, int w, int h, GLuint textype, void *te
 		u8 *dst = (u8 *)rows[h - y - 1];
 		switch (textype)
 		{
-		case GL_UNSIGNED_SHORT_4_4_4_4:
+		case TextureType::_4444:
 			for (int x = 0; x < w; x++)
 			{
 				*dst++ = ((*src >> 12) & 0xF) << 4;
@ -195,7 +198,7 @@ void CustomTexture::DumpTexture(u32 hash, int w, int h, GLuint textype, void *te
 				src++;
 			}
 			break;
-		case GL_UNSIGNED_SHORT_5_6_5:
+		case TextureType::_565:
 			for (int x = 0; x < w; x++)
 			{
 				*dst++ = ((*src >> 11) & 0x1F) << 3;
@ -205,7 +208,7 @@ void CustomTexture::DumpTexture(u32 hash, int w, int h, GLuint textype, void *te
 				src++;
 			}
 			break;
-		case GL_UNSIGNED_SHORT_5_5_5_1:
+		case TextureType::_5551:
 			for (int x = 0; x < w; x++)
 			{
 				*dst++ = ((*src >> 11) & 0x1F) << 3;
@ -215,7 +218,7 @@ void CustomTexture::DumpTexture(u32 hash, int w, int h, GLuint textype, void *te
 				src++;
 			}
 			break;
-		case GL_UNSIGNED_BYTE:
+		case TextureType::_8888:
 			for (int x = 0; x < w; x++)
 			{
 				*(u32 *)dst = *(u32 *)src;
@ -224,7 +227,7 @@ void CustomTexture::DumpTexture(u32 hash, int w, int h, GLuint textype, void *te
 			}
 			break;
 		default:
-			WARN_LOG(RENDERER, "dumpTexture: unsupported picture format %x", textype);
+			WARN_LOG(RENDERER, "dumpTexture: unsupported picture format %x", (u32)textype);
 			fclose(fp);
 			free(rows[0]);
 			free(rows);
--- a/core/rend/gles/CustomTexture.h
+++ b/core/rend/gles/CustomTexture.h
@ -16,13 +16,11 @@
 	 You should have received a copy of the GNU General Public License
 	 along with reicast.  If not, see <https://www.gnu.org/licenses/>.
 */
-
-#ifndef CORE_REND_GLES_CUSTOMTEXTURE_H_
-#define CORE_REND_GLES_CUSTOMTEXTURE_H_
+#pragma once

 #include <string>
 #include <set>
-#include "gles.h"
+#include "TexCache.h"

 class CustomTexture {
 public:
@ -34,8 +32,8 @@ public:
 		{}
 	~CustomTexture() { Terminate(); }
 	u8* LoadCustomTexture(u32 hash, int& width, int& height);
-	void LoadCustomTextureAsync(TextureCacheData *texture_data);
-	void DumpTexture(u32 hash, int w, int h, GLuint textype, void *temp_tex_buffer);
+	void LoadCustomTextureAsync(BaseTextureCacheData *texture_data);
+	void DumpTexture(u32 hash, int w, int h, TextureType textype, void *temp_tex_buffer);

 private:
 	bool Init();
@ -53,8 +51,6 @@ private:
 	cThread loader_thread;
 #endif
 	cResetEvent wakeup_thread;
-	std::vector<TextureCacheData *> work_queue;
+	std::vector<BaseTextureCacheData *> work_queue;
 	cMutex work_queue_mutex;
 };
-
-#endif /* CORE_REND_GLES_CUSTOMTEXTURE_H_ */
--- a/core/rend/TexCache.cpp
+++ b/core/rend/TexCache.cpp
@ -4,11 +4,13 @@

 #include "TexCache.h"
 #include "hw/pvr/pvr_regs.h"
+#include "hw/pvr/Renderer_if.h"
 #include "hw/mem/_vmem.h"
 #include "hw/mem/vmem32.h"
 #include "hw/sh4/modules/mmu.h"
 #include "deps/xbrz/xbrz.h"
 #include <xxhash.h>
+#include "CustomTexture.h"

 u8* vq_codebook;
 u32 palette_index;
@ -390,3 +392,338 @@ void UpscalexBRZ(int factor, u32* source, u32* dest, int width, int height, bool
 	xbrz::scale(factor, source, dest, width, height, has_alpha ? xbrz::ColorFormat::ARGB : xbrz::ColorFormat::RGB, xbrz_cfg);
 #endif
 }
+
+struct PvrTexInfo
+{
+	const char* name;
+	int bpp;        //4/8 for pal. 16 for yuv, rgb, argb
+	TextureType type;
+	// Conversion to 16 bpp
+	TexConvFP *PL;
+	TexConvFP *TW;
+	TexConvFP *VQ;
+	// Conversion to 32 bpp
+	TexConvFP32 *PL32;
+	TexConvFP32 *TW32;
+	TexConvFP32 *VQ32;
+};
+
+static const PvrTexInfo format[8] =
+{	// name     bpp Final format			   Planar		Twiddled	 VQ				Planar(32b)    Twiddled(32b)  VQ (32b)
+	{"1555", 	16,	TextureType::_5551,        tex1555_PL,	tex1555_TW,  tex1555_VQ,	tex1555_PL32,  tex1555_TW32,  tex1555_VQ32 },	//1555
+	{"565", 	16, TextureType::_565,         tex565_PL,	tex565_TW,   tex565_VQ, 	tex565_PL32,   tex565_TW32,   tex565_VQ32 },	//565
+	{"4444", 	16, TextureType::_4444,        tex4444_PL,	tex4444_TW,  tex4444_VQ, 	tex4444_PL32,  tex4444_TW32,  tex4444_VQ32 },	//4444
+	{"yuv", 	16, TextureType::_8888,        NULL, 		NULL, 		 NULL,			texYUV422_PL,  texYUV422_TW,  texYUV422_VQ },	//yuv
+	{"bumpmap", 16, TextureType::_4444,        texBMP_PL,	texBMP_TW,	 texBMP_VQ, 	NULL},											//bump map
+	{"pal4", 	4,	TextureType::_5551,		   0,			texPAL4_TW,  texPAL4_VQ, 	NULL, 		   texPAL4_TW32,  texPAL4_VQ32 },	//pal4
+	{"pal8", 	8,	TextureType::_5551,		   0,			texPAL8_TW,  texPAL8_VQ, 	NULL, 		   texPAL8_TW32,  texPAL8_VQ32 },	//pal8
+	{"ns/1555", 0},																														// Not supported (1555)
+};
+
+static const u32 MipPoint[8] =
+{
+	0x00006,//8
+	0x00016,//16
+	0x00056,//32
+	0x00156,//64
+	0x00556,//128
+	0x01556,//256
+	0x05556,//512
+	0x15556//1024
+};
+
+static const TextureType PAL_TYPE[4] = {
+	TextureType::_5551, TextureType::_565, TextureType::_4444, TextureType::_8888
+};
+
+static CustomTexture custom_texture;
+
+void BaseTextureCacheData::PrintTextureName()
+{
+	char str[512];
+	sprintf(str, "Texture: %s ", GetPixelFormatName());
+
+	if (tcw.VQ_Comp)
+		strcat(str, " VQ");
+
+	if (tcw.ScanOrder==0)
+		strcat(str, " TW");
+
+	if (tcw.MipMapped)
+		strcat(str, " MM");
+
+	if (tcw.StrideSel)
+		strcat(str, " Stride");
+
+	sprintf(str + strlen(str), " %dx%d @ 0x%X", 8 << tsp.TexU, 8 << tsp.TexV, tcw.TexAddr << 3);
+	std::string id = GetId();
+	sprintf(str + strlen(str), " id=%s", id.c_str());
+	DEBUG_LOG(RENDERER, "%s", str);
+}
+
+//true if : dirty or paletted texture and hashes don't match
+bool BaseTextureCacheData::NeedsUpdate() {
+	bool rc = dirty
+			|| (tcw.PixelFmt == PixelPal4 && palette_hash != pal_hash_16[tcw.PalSelect])
+			|| (tcw.PixelFmt == PixelPal8 && palette_hash != pal_hash_256[tcw.PalSelect >> 4]);
+	return rc;
+}
+
+bool BaseTextureCacheData::Delete()
+{
+	if (custom_load_in_progress > 0)
+		return false;
+
+	if (lock_block)
+		libCore_vramlock_Unlock_block(lock_block);
+	lock_block=0;
+
+	delete[] custom_image_data;
+
+	return true;
+}
+
+void BaseTextureCacheData::Create()
+{
+	//Reset state info ..
+	Lookups=0;
+	Updates=0;
+	dirty=FrameCount;
+	lock_block = nullptr;
+
+	//decode info from tsp/tcw into the texture struct
+	tex=&format[tcw.PixelFmt == PixelReserved ? Pixel1555 : tcw.PixelFmt];	//texture format table entry
+
+	sa_tex = (tcw.TexAddr<<3) & VRAM_MASK;	//texture start address
+	sa = sa_tex;							//data texture start address (modified for MIPs, as needed)
+	w=8<<tsp.TexU;                   //tex width
+	h=8<<tsp.TexV;                   //tex height
+
+	//PAL texture
+	if (tex->bpp == 4)
+		palette_index = tcw.PalSelect << 4;
+	else if (tex->bpp == 8)
+		palette_index = (tcw.PalSelect >> 4) << 8;
+
+	//VQ table (if VQ tex)
+	if (tcw.VQ_Comp)
+		vq_codebook = sa;
+
+	//Convert a pvr texture into OpenGL
+	switch (tcw.PixelFmt)
+	{
+
+	case Pixel1555: 	//0     1555 value: 1 bit; RGB values: 5 bits each
+	case PixelReserved: //7     Reserved        Regarded as 1555
+	case Pixel565: 		//1     565      R value: 5 bits; G value: 6 bits; B value: 5 bits
+	case Pixel4444: 	//2     4444 value: 4 bits; RGB values: 4 bits each
+	case PixelYUV:		//3     YUV422 32 bits per 2 pixels; YUYV values: 8 bits each
+	case PixelBumpMap:	//4		Bump Map 	16 bits/pixel; S value: 8 bits; R value: 8 bits
+	case PixelPal4:		//5     4 BPP Palette   Palette texture with 4 bits/pixel
+	case PixelPal8:		//6     8 BPP Palette   Palette texture with 8 bits/pixel
+		if (tcw.ScanOrder && (tex->PL || tex->PL32))
+		{
+			//Texture is stored 'planar' in memory, no deswizzle is needed
+			//verify(tcw.VQ_Comp==0);
+			if (tcw.VQ_Comp != 0)
+				WARN_LOG(RENDERER, "Warning: planar texture with VQ set (invalid)");
+
+			//Planar textures support stride selection, mostly used for non power of 2 textures (videos)
+			int stride = w;
+			if (tcw.StrideSel)
+			{
+				stride = std::max((TEXT_CONTROL & 31) * 32, w);
+			}
+			//Call the format specific conversion code
+			texconv = tex->PL;
+			texconv32 = tex->PL32;
+			//calculate the size, in bytes, for the locking
+			size=stride*h*tex->bpp/8;
+		}
+		else
+		{
+			// Quake 3 Arena uses one. Not sure if valid but no need to crash
+			//verify(w==h || !tcw.MipMapped); // are non square mipmaps supported ? i can't recall right now *WARN*
+
+			if (tcw.VQ_Comp)
+			{
+				verify(tex->VQ != NULL || tex->VQ32 != NULL);
+				vq_codebook = sa;
+				if (tcw.MipMapped)
+					sa+=MipPoint[tsp.TexU];
+				texconv = tex->VQ;
+				texconv32 = tex->VQ32;
+				size=w*h/8;
+			}
+			else
+			{
+				verify(tex->TW != NULL || tex->TW32 != NULL);
+				if (tcw.MipMapped)
+					sa+=MipPoint[tsp.TexU]*tex->bpp/2;
+				texconv = tex->TW;
+				texconv32 = tex->TW32;
+				size=w*h*tex->bpp/8;
+			}
+		}
+		break;
+	default:
+		WARN_LOG(RENDERER, "Unhandled texture format %d", tcw.PixelFmt);
+		size=w*h*2;
+		texconv = NULL;
+		texconv32 = NULL;
+	}
+}
+
+void BaseTextureCacheData::ComputeHash()
+{
+	texture_hash = XXH32(&vram[sa], size, 7);
+	if (IsPaletted())
+		texture_hash ^= palette_hash;
+	old_texture_hash = texture_hash;
+	texture_hash ^= tcw.full;
+}
+
+void BaseTextureCacheData::Update()
+{
+	//texture state tracking stuff
+	Updates++;
+	dirty=0;
+
+	tex_type = tex->type;
+
+	bool has_alpha = false;
+	if (IsPaletted())
+	{
+		tex_type = PAL_TYPE[PAL_RAM_CTRL&3];
+		if (tex_type == TextureType::_8888)
+			has_alpha = true;
+
+		// Get the palette hash to check for future updates
+		if (tcw.PixelFmt == PixelPal4)
+			palette_hash = pal_hash_16[tcw.PalSelect];
+		else
+			palette_hash = pal_hash_256[tcw.PalSelect >> 4];
+	}
+
+	::palette_index = this->palette_index; // might be used if pal. tex
+	::vq_codebook = &vram[vq_codebook];    // might be used if VQ tex
+
+	//texture conversion work
+	u32 stride=w;
+
+	if (tcw.StrideSel && tcw.ScanOrder && (tex->PL || tex->PL32))
+		stride = std::max(w, (TEXT_CONTROL & 31) * 32);
+
+	PrintTextureName();
+	u32 original_h = h;
+	if (sa_tex > VRAM_SIZE || size == 0 || sa + size > VRAM_SIZE)
+	{
+		if (sa + size > VRAM_SIZE)
+		{
+			// Shenmue Space Harrier mini-arcade loads a texture that goes beyond the end of VRAM
+			// but only uses the top portion of it
+			h = (VRAM_SIZE - sa) * 8 / stride / tex->bpp;
+			size = stride * h * tex->bpp/8;
+		}
+		else
+		{
+			WARN_LOG(RENDERER, "Warning: invalid texture. Address %08X %08X size %d", sa_tex, sa, size);
+			return;
+		}
+	}
+	if (settings.rend.CustomTextures)
+		custom_texture.LoadCustomTextureAsync(this);
+
+	void *temp_tex_buffer = NULL;
+	u32 upscaled_w = w;
+	u32 upscaled_h = h;
+
+	PixelBuffer<u16> pb16;
+	PixelBuffer<u32> pb32;
+
+	// Figure out if we really need to use a 32-bit pixel buffer
+	bool need_32bit_buffer = true;
+	if ((settings.rend.TextureUpscale <= 1
+			|| w * h > settings.rend.MaxFilteredTextureSize
+				* settings.rend.MaxFilteredTextureSize		// Don't process textures that are too big
+			|| tcw.PixelFmt == PixelYUV)					// Don't process YUV textures
+		&& (!IsPaletted() || tex_type != TextureType::_8888)
+		&& texconv != NULL)
+		need_32bit_buffer = false;
+	// TODO avoid upscaling/depost. textures that change too often
+
+	if (texconv32 != NULL && need_32bit_buffer)
+	{
+		// Force the texture type since that's the only 32-bit one we know
+		tex_type = TextureType::_8888;
+
+		pb32.init(w, h);
+
+		texconv32(&pb32, (u8*)&vram[sa], stride, h);
+
+#ifdef DEPOSTERIZE
+		{
+			// Deposterization
+			PixelBuffer<u32> tmp_buf;
+			tmp_buf.init(w, h);
+
+			DePosterize(pb32.data(), tmp_buf.data(), w, h);
+			pb32.steal_data(tmp_buf);
+		}
+#endif
+
+		// xBRZ scaling
+		if (settings.rend.TextureUpscale > 1)
+		{
+			PixelBuffer<u32> tmp_buf;
+			tmp_buf.init(w * settings.rend.TextureUpscale, h * settings.rend.TextureUpscale);
+
+			if (tcw.PixelFmt == Pixel1555 || tcw.PixelFmt == Pixel4444)
+				// Alpha channel formats. Palettes with alpha are already handled
+				has_alpha = true;
+			UpscalexBRZ(settings.rend.TextureUpscale, pb32.data(), tmp_buf.data(), w, h, has_alpha);
+			pb32.steal_data(tmp_buf);
+			upscaled_w *= settings.rend.TextureUpscale;
+			upscaled_h *= settings.rend.TextureUpscale;
+		}
+		temp_tex_buffer = pb32.data();
+	}
+	else if (texconv != NULL)
+	{
+		pb16.init(w, h);
+
+		texconv(&pb16,(u8*)&vram[sa],stride,h);
+		temp_tex_buffer = pb16.data();
+	}
+	else
+	{
+		//fill it in with a temp color
+		WARN_LOG(RENDERER, "UNHANDLED TEXTURE");
+		pb16.init(w, h);
+		memset(pb16.data(), 0x80, w * h * 2);
+		temp_tex_buffer = pb16.data();
+	}
+	// Restore the original texture height if it was constrained to VRAM limits above
+	h = original_h;
+
+	//lock the texture to detect changes in it
+	lock_block = libCore_vramlock_Lock(sa_tex,sa+size-1,this);
+
+	UploadToGPU(upscaled_w, upscaled_h, (u8*)temp_tex_buffer);
+	if (settings.rend.DumpTextures)
+	{
+		ComputeHash();
+		custom_texture.DumpTexture(texture_hash, upscaled_w, upscaled_h, tex_type, temp_tex_buffer);
+	}
+}
+
+void BaseTextureCacheData::CheckCustomTexture()
+{
+	if (custom_load_in_progress == 0 && custom_image_data != NULL)
+	{
+		tex_type = TextureType::_8888;
+		UploadToGPU(custom_width, custom_height, custom_image_data);
+		delete [] custom_image_data;
+		custom_image_data = NULL;
+	}
+}
--- a/core/rend/TexCache.h
+++ b/core/rend/TexCache.h
@ -1,5 +1,7 @@
 #pragma once
+#include <atomic>
 #include "oslib/oslib.h"
+#include "hw/pvr/ta_structs.h"

 extern u8* vq_codebook;
 extern u32 palette_index;
@ -620,5 +622,81 @@ template void texture_VQ<convBMP_TW<pp_565>, u16>(PixelBuffer<u16>* pb,u8* p_in,
 #define texPAL4_VQ32 texture_VQ<convPAL4_TW<pp_8888, u32>, u32>
 #define texPAL8_VQ32 texture_VQ<convPAL8_TW<pp_8888, u32>, u32>

+bool VramLockedWriteOffset(size_t offset);
 void DePosterize(u32* source, u32* dest, int width, int height);
 void UpscalexBRZ(int factor, u32* source, u32* dest, int width, int height, bool has_alpha);
+
+struct PvrTexInfo;
+template <class pixel_type> class PixelBuffer;
+typedef void TexConvFP(PixelBuffer<u16>* pb,u8* p_in,u32 Width,u32 Height);
+typedef void TexConvFP32(PixelBuffer<u32>* pb,u8* p_in,u32 Width,u32 Height);
+enum class TextureType { _565, _5551, _4444, _8888 };
+
+struct BaseTextureCacheData
+{
+	TSP tsp;        //dreamcast texture parameters
+	TCW tcw;
+
+	// Decoded/filtered texture format
+	TextureType tex_type;
+
+	u32 Lookups;
+
+	u32 sa;         //pixel data start address in vram (might be offset for mipmaps/etc)
+	u32 sa_tex;		//texture data start address in vram
+	u32 w,h;        //width & height of the texture
+	u32 size;       //size, in bytes, in vram
+
+	const PvrTexInfo* tex;
+	TexConvFP*  texconv;
+	TexConvFP32*  texconv32;
+
+	u32 dirty;
+	vram_block* lock_block;
+
+	u32 Updates;
+
+	u32 palette_index;
+	//used for palette updates
+	u32 palette_hash;			// Palette hash at time of last update
+	u32 vq_codebook;            // VQ quantizers table for compressed textures
+	u32 texture_hash;			// xxhash of texture data, used for custom textures
+	u32 old_texture_hash;		// legacy hash
+	u8* volatile custom_image_data;		// loaded custom image data
+	volatile u32 custom_width;
+	volatile u32 custom_height;
+	std::atomic_int custom_load_in_progress;
+
+	void PrintTextureName();
+	virtual std::string GetId() = 0;
+
+	bool IsPaletted()
+	{
+		return tcw.PixelFmt == PixelPal4 || tcw.PixelFmt == PixelPal8;
+	}
+
+	const char* GetPixelFormatName()
+	{
+		switch (tcw.PixelFmt)
+		{
+		case Pixel1555: return "1555";
+		case Pixel565: return "565";
+		case Pixel4444: return "4444";
+		case PixelYUV: return "yuv";
+		case PixelBumpMap: return "bumpmap";
+		case PixelPal4: return "pal4";
+		case PixelPal8: return "pal8";
+		default: return "unknown";
+		}
+	}
+
+	void Create();
+	void ComputeHash();
+	void Update();
+	virtual void UploadToGPU(int width, int height, u8 *temp_tex_buffer) = 0;
+	void CheckCustomTexture();
+	//true if : dirty or paletted texture and hashes don't match
+	bool NeedsUpdate();
+	virtual bool Delete();
+	virtual ~BaseTextureCacheData() {}
+};
--- a/core/rend/gles/gldraw.cpp
+++ b/core/rend/gles/gldraw.cpp
@ -1,7 +1,7 @@
 #include "glcache.h"
 #include "rend/rend.h"
+#include "rend/sorter.h"

-#include <algorithm>
 /*

 Drawing and related state management
@ -299,454 +299,12 @@ void DrawList(const List<PolyParam>& gply, int first, int count)
 	}
 }

-bool operator<(const PolyParam &left, const PolyParam &right)
+static vector<SortTrigDrawParam> pidx_sort;
+
+static void SortTriangles(int first, int count)
 {
-/* put any condition you want to sort on here */
-	return left.zvZ<right.zvZ;
-	//return left.zMin<right.zMax;
-}
-
-//Sort based on min-z of each strip
-void SortPParams(int first, int count)
-{
-	if (pvrrc.verts.used() == 0 || count <= 1)
-		return;
-
-	Vertex* vtx_base=pvrrc.verts.head();
-	u32* idx_base = pvrrc.idx.head();
-
-	PolyParam* pp = &pvrrc.global_param_tr.head()[first];
-	PolyParam* pp_end = pp + count;
-
-	while(pp!=pp_end)
-	{
-		if (pp->count<2)
-		{
-			pp->zvZ=0;
-		}
-		else
-		{
-			u32* idx = idx_base + pp->first;
-
-			Vertex* vtx=vtx_base+idx[0];
-			Vertex* vtx_end=vtx_base + idx[pp->count-1]+1;
-
-			u32 zv=0xFFFFFFFF;
-			while(vtx!=vtx_end)
-			{
-				zv=min(zv,(u32&)vtx->z);
-				vtx++;
-			}
-
-			pp->zvZ=(f32&)zv;
-		}
-		pp++;
-	}
-
-	std::stable_sort(pvrrc.global_param_tr.head() + first, pvrrc.global_param_tr.head() + first + count);
-}
-
-Vertex* vtx_sort_base;
-
-
-struct IndexTrig
-{
-	u32 id[3];
-	u16 pid;
-	f32 z;
-};
-
-
-struct SortTrigDrawParam
-{
-	PolyParam* ppid;
-	u32 first;
-	u32 count;
-};
-
-float min3(float v0,float v1,float v2)
-{
-	return min(min(v0,v1),v2);
-}
-
-float max3(float v0,float v1,float v2)
-{
-	return max(max(v0,v1),v2);
-}
-
-
-float minZ(Vertex* v, u32* mod)
-{
-	return min(min(v[mod[0]].z,v[mod[1]].z),v[mod[2]].z);
-}
-
-bool operator<(const IndexTrig &left, const IndexTrig &right)
-{
-	return left.z<right.z;
-}
-
-
-#if 0
-/*
-
-	Per triangle sorting experiments
-
-*/
-
-//approximate the triangle area
-float area_x2(Vertex* v)
-{
-	return 2/3*fabs( (v[0].x-v[2].x)*(v[1].y-v[0].y) - (v[0].x-v[1].x)*(v[2].y-v[0].y)) ;
-}
-
-//approximate the distance ^2
-float distance_apprx(Vertex* a, Vertex* b)
-{
-	float xd=a->x-b->x;
-	float yd=a->y-b->y;
-
-	return xd*xd+yd*yd;
-}
-
-//was good idea, but not really working ..
-bool Intersect(Vertex* a, Vertex* b)
-{
-	float a1=area_x2(a);
-	float a2=area_x2(b);
-
-	float d = distance_apprx(a,b);
-
-	return (a1+a1)>d;
-}
-
-//root for quick-union
-u16 rid(vector<u16>& v, u16 id)
-{
-	while(id!=v[id]) id=v[id];
-	return id;
-}
-
-struct TrigBounds
-{
-	float xs,xe;
-	float ys,ye;
-	float zs,ze;
-};
-
-//find 3d bounding box for triangle
-TrigBounds bound(Vertex* v)
-{
-	TrigBounds rv = {	min(min(v[0].x,v[1].x),v[2].x), max(max(v[0].x,v[1].x),v[2].x),
-						min(min(v[0].y,v[1].y),v[2].y), max(max(v[0].y,v[1].y),v[2].y),
-						min(min(v[0].z,v[1].z),v[2].z), max(max(v[0].z,v[1].z),v[2].z),
-					};
-
-	return rv;
-}
-
-//bounding box 2d intersection
-bool Intersect(TrigBounds& a, TrigBounds& b)
-{
-	return  ( !(a.xe<b.xs || a.xs>b.xe) && !(a.ye<b.ys || a.ys>b.ye) /*&& !(a.ze<b.zs || a.zs>b.ze)*/ );
-}
-
-
-bool operator<(const IndexTrig &left, const IndexTrig &right)
-{
-	/*
-	TrigBounds l=bound(vtx_sort_base+left.id);
-	TrigBounds r=bound(vtx_sort_base+right.id);
-
-	if (!Intersect(l,r))
-	{
-		return true;
-	}
-	else
-	{
-		return (l.zs + l.ze) < (r.zs + r.ze);
-	}*/
-
-	return minZ(&vtx_sort_base[left.id])<minZ(&vtx_sort_base[right.id]);
-}
-
-//Not really working cuz of broken intersect
-bool Intersect(const IndexTrig &left, const IndexTrig &right)
-{
-	TrigBounds l=bound(vtx_sort_base+left.id);
-	TrigBounds r=bound(vtx_sort_base+right.id);
-
-	return Intersect(l,r);
-}
-
-#endif
-
-//are two poly params the same?
-bool PP_EQ(PolyParam* pp0, PolyParam* pp1)
-{
-	return (pp0->pcw.full&PCW_DRAW_MASK)==(pp1->pcw.full&PCW_DRAW_MASK) && pp0->isp.full==pp1->isp.full && pp0->tcw.full==pp1->tcw.full && pp0->tsp.full==pp1->tsp.full && pp0->tileclip==pp1->tileclip;
-}
-
-static vector<SortTrigDrawParam>	pidx_sort;
-
-void fill_id(u32* d, Vertex* v0, Vertex* v1, Vertex* v2,  Vertex* vb)
-{
-	d[0]=v0-vb;
-	d[1]=v1-vb;
-	d[2]=v2-vb;
-}
-
-void GenSorted(int first, int count)
-{
-	u32 tess_gen=0;
-
-	pidx_sort.clear();
-
-	if (pvrrc.verts.used() == 0 || count <= 1)
-		return;
-
-	Vertex* vtx_base=pvrrc.verts.head();
-	u32* idx_base = pvrrc.idx.head();
-
-	PolyParam* pp_base = &pvrrc.global_param_tr.head()[first];
-	PolyParam* pp = pp_base;
-	PolyParam* pp_end = pp + count;
-	
-	Vertex* vtx_arr=vtx_base+idx_base[pp->first];
-	vtx_sort_base=vtx_base;
-
-	static u32 vtx_cnt;
-
-	int vtx_count=idx_base[pp_end[-1].first+pp_end[-1].count-1]-idx_base[pp->first];
-	if (vtx_count>vtx_cnt)
-		vtx_cnt=vtx_count;
-
-#if PRINT_SORT_STATS
-	printf("TVTX: %d || %d\n",vtx_cnt,vtx_count);
-#endif
-	
-	if (vtx_count<=0)
-		return;
-
-	//make lists of all triangles, with their pid and vid
-	static vector<IndexTrig> lst;
-	
-	lst.resize(vtx_count*4);
-	
-
-	int pfsti=0;
-
-	while(pp!=pp_end)
-	{
-		u32 ppid=(pp-pp_base);
-
-		if (pp->count>2)
-		{
-			u32* idx = idx_base + pp->first;
-
-			Vertex* vtx=vtx_base+idx[0];
-			Vertex* vtx_end=vtx_base + idx[pp->count-1]-1;
-			u32 flip=0;
-			while(vtx!=vtx_end)
-			{
-				Vertex* v0, * v1, * v2, * v3, * v4, * v5;
-
-				if (flip)
-				{
-					v0=&vtx[1];
-					v1=&vtx[0];
-					v2=&vtx[2];
-				}
-				else
-				{
-					v0=&vtx[0];
-					v1=&vtx[1];
-					v2=&vtx[2];
-				}
-#if 0
-				if (settings.pvr.subdivide_transp)
-				{
-					u32 tess_x=(max3(v0->x,v1->x,v2->x)-min3(v0->x,v1->x,v2->x))/32;
-					u32 tess_y=(max3(v0->y,v1->y,v2->y)-min3(v0->y,v1->y,v2->y))/32;
-
-					if (tess_x==1) tess_x=0;
-					if (tess_y==1) tess_y=0;
-
-					//bool tess=(maxZ(v0,v1,v2)/minZ(v0,v1,v2))>=1.2;
-
-					if (tess_x + tess_y)
-					{
-						v3=pvrrc.verts.Append(3);
-						v4=v3+1;
-						v5=v4+1;
-
-						//xyz
-						for (int i=0;i<3;i++)
-						{
-							((float*)&v3->x)[i]=((float*)&v0->x)[i]*0.5f+((float*)&v2->x)[i]*0.5f;
-							((float*)&v4->x)[i]=((float*)&v0->x)[i]*0.5f+((float*)&v1->x)[i]*0.5f;
-							((float*)&v5->x)[i]=((float*)&v1->x)[i]*0.5f+((float*)&v2->x)[i]*0.5f;
-						}
-
-						//*TODO* Make it perspective correct
-
-						//uv
-						for (int i=0;i<2;i++)
-						{
-							((float*)&v3->u)[i]=((float*)&v0->u)[i]*0.5f+((float*)&v2->u)[i]*0.5f;
-							((float*)&v4->u)[i]=((float*)&v0->u)[i]*0.5f+((float*)&v1->u)[i]*0.5f;
-							((float*)&v5->u)[i]=((float*)&v1->u)[i]*0.5f+((float*)&v2->u)[i]*0.5f;
-						}
-
-						//color
-						for (int i=0;i<4;i++)
-						{
-							v3->col[i]=v0->col[i]/2+v2->col[i]/2;
-							v4->col[i]=v0->col[i]/2+v1->col[i]/2;
-							v5->col[i]=v1->col[i]/2+v2->col[i]/2;
-						}
-
-						fill_id(lst[pfsti].id,v0,v3,v4,vtx_base);
-						lst[pfsti].pid= ppid ;
-						lst[pfsti].z = minZ(vtx_base,lst[pfsti].id);
-						pfsti++;
-
-						fill_id(lst[pfsti].id,v2,v3,v5,vtx_base);
-						lst[pfsti].pid= ppid ;
-						lst[pfsti].z = minZ(vtx_base,lst[pfsti].id);
-						pfsti++;
-
-						fill_id(lst[pfsti].id,v3,v4,v5,vtx_base);
-						lst[pfsti].pid= ppid ;
-						lst[pfsti].z = minZ(vtx_base,lst[pfsti].id);
-						pfsti++;
-
-						fill_id(lst[pfsti].id,v5,v4,v1,vtx_base);
-						lst[pfsti].pid= ppid ;
-						lst[pfsti].z = minZ(vtx_base,lst[pfsti].id);
-						pfsti++;
-
-						tess_gen+=3;
-					}
-					else
-					{
-						fill_id(lst[pfsti].id,v0,v1,v2,vtx_base);
-						lst[pfsti].pid= ppid ;
-						lst[pfsti].z = minZ(vtx_base,lst[pfsti].id);
-						pfsti++;
-					}
-				}
-				else
-#endif
-				{
-					fill_id(lst[pfsti].id,v0,v1,v2,vtx_base);
-					lst[pfsti].pid= ppid ;
-					lst[pfsti].z = minZ(vtx_base,lst[pfsti].id);
-					pfsti++;
-				}
-
-				flip ^= 1;
-				
-				vtx++;
-			}
-		}
-		pp++;
-	}
-
-	u32 aused=pfsti;
-
-	lst.resize(aused);
-
-	//sort them
-#if 1
-	std::stable_sort(lst.begin(),lst.end());
-
-	//Merge pids/draw cmds if two different pids are actually equal
-	if (true)
-	{
-		for (u32 k=1;k<aused;k++)
-		{
-			if (lst[k].pid!=lst[k-1].pid)
-			{
-				if (PP_EQ(&pp_base[lst[k].pid],&pp_base[lst[k-1].pid]))
-				{
-					lst[k].pid=lst[k-1].pid;
-				}
-			}
-		}
-	}
-#endif
-
-	
-#if 0
-	//tries to optimise draw calls by reordering non-intersecting polygons
-	//uber slow and not very effective
-	{
-		int opid=lst[0].pid;
-
-		for (int k=1;k<aused;k++)
-		{
-			if (lst[k].pid!=opid)
-			{
-				if (opid>lst[k].pid)
-				{
-					//MOVE UP
-					for (int j=k;j>0 && lst[j].pid!=lst[j-1].pid && !Intersect(lst[j],lst[j-1]);j--)
-					{
-						swap(lst[j],lst[j-1]);
-					}
-				}
-				else
-				{
-					//move down
-					for (int j=k+1;j<aused && lst[j].pid!=lst[j-1].pid && !Intersect(lst[j],lst[j-1]);j++)
-					{
-						swap(lst[j],lst[j-1]);
-					}
-				}
-			}
-
-			opid=lst[k].pid;
-		}
-	}
-#endif
-
-	//re-assemble them into drawing commands
-	static vector<u32> vidx_sort;
-
-	vidx_sort.resize(aused*3);
-
-	int idx=-1;
-
-	for (u32 i=0; i<aused; i++)
-	{
-		int pid=lst[i].pid;
-		u32* midx = lst[i].id;
-
-		vidx_sort[i*3 + 0]=midx[0];
-		vidx_sort[i*3 + 1]=midx[1];
-		vidx_sort[i*3 + 2]=midx[2];
-
-		if (idx!=pid /* && !PP_EQ(&pp_base[pid],&pp_base[idx]) */ )
-		{
-			SortTrigDrawParam stdp = { pp_base + pid, i * 3, 0 };
-			
-			if (idx!=-1)
-			{
-				SortTrigDrawParam* last=&pidx_sort[pidx_sort.size()-1];
-				last->count=stdp.first-last->first;
-			}
-
-			pidx_sort.push_back(stdp);
-			idx=pid;
-		}
-	}
-
-	SortTrigDrawParam* stdp=&pidx_sort[pidx_sort.size()-1];
-	stdp->count=aused*3-stdp->first;
-
-#if PRINT_SORT_STATS
-	printf("Reassembled into %d from %d\n",pidx_sort.size(),pp_end-pp_base);
-#endif
+	vector<u32> vidx_sort;
+	GenSorted(first, count, pidx_sort, vidx_sort);

 	//Upload to GPU if needed
 	if (pidx_sort.size())
@ -767,8 +325,6 @@ void GenSorted(int first, int count)
 		else
 			glBufferData(GL_ELEMENT_ARRAY_BUFFER, vidx_sort.size() * sizeof(u32), &vidx_sort[0], GL_STREAM_DRAW);
 		glCheck();
-
-		if (tess_gen) DEBUG_LOG(RENDERER, "Generated %.2fK Triangles !", tess_gen / 1000.0);
 	}
 }

@ -1099,7 +655,7 @@ void DrawStrips()
            {
 				if (!settings.rend.PerStripSorting)
 				{
-					GenSorted(previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count);
+					SortTriangles(previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count);
 					DrawSorted(render_pass < pvrrc.render_passes.used() - 1);
 				}
 				else
--- a/core/rend/gles/gles.h
+++ b/core/rend/gles/gles.h
@ -2,6 +2,7 @@
 #include <unordered_map>
 #include <atomic>
 #include "rend/rend.h"
+#include "rend/TexCache.h"

 #if (defined(GLES) && HOST_OS != OS_DARWIN && !defined(USE_SDL)) || defined(__ANDROID__)
 #define USE_EGL
@ -168,7 +169,6 @@ text_info raw_GetTexture(TSP tsp, TCW tcw);
 void killtex();
 void CollectCleanup();
 void DoCleanup();
-void SortPParams(int first, int count);
 void SetCull(u32 CullMode);
 s32 SetTileClip(u32 val, GLint uniform);
 void SetMVS_Mode(ModifierVolumeMode mv_mode, ISP_Modvol ispc);
@ -238,63 +238,13 @@ extern struct ShaderUniforms_t

 } ShaderUniforms;

-struct PvrTexInfo;
-template <class pixel_type> class PixelBuffer;
-typedef void TexConvFP(PixelBuffer<u16>* pb,u8* p_in,u32 Width,u32 Height);
-typedef void TexConvFP32(PixelBuffer<u32>* pb,u8* p_in,u32 Width,u32 Height);
-
-struct TextureCacheData
+struct TextureCacheData : BaseTextureCacheData
 {
-	TSP tsp;        //dreamcast texture parameters
-	TCW tcw;
-	
 	GLuint texID;   //gl texture
 	u16* pData;
-	int tex_type;
-	
-	u32 Lookups;
-	
-	//decoded texture info
-	u32 sa;         //pixel data start address in vram (might be offset for mipmaps/etc)
-	u32 sa_tex;		//texture data start address in vram
-	u32 w,h;        //width & height of the texture
-	u32 size;       //size, in bytes, in vram
-	
-	const PvrTexInfo* tex;
-	TexConvFP*  texconv;
-	TexConvFP32*  texconv32;
-	
-	u32 dirty;
-	vram_block* lock_block;
-	
-	u32 Updates;
-	
-	u32 palette_index;
-	//used for palette updates
-	u32 palette_hash;			// Palette hash at time of last update
-	u32 vq_codebook;            // VQ quantizers table for compressed textures
-	u32 texture_hash;			// xxhash of texture data, used for custom textures
-	u32 old_texture_hash;		// legacy hash
-	u8* volatile custom_image_data;		// loaded custom image data
-	volatile u32 custom_width;
-	volatile u32 custom_height;
-	std::atomic_int custom_load_in_progress;
-	
-	void PrintTextureName();
-	
-	bool IsPaletted()
-	{
-		return tcw.PixelFmt == PixelPal4 || tcw.PixelFmt == PixelPal8;
-	}
-	
-	void Create(bool isGL);
-	void ComputeHash();
-	void Update();
-	void UploadToGPU(GLuint textype, int width, int height, u8 *temp_tex_buffer);
-	void CheckCustomTexture();
-	//true if : dirty or paletted texture and hashes don't match
-	bool NeedsUpdate();
-	bool Delete();
+	virtual std::string GetId() override { return std::to_string(texID); }
+	virtual void UploadToGPU(int width, int height, u8 *temp_tex_buffer) override;
+	virtual bool Delete() override;
 };

 extern const u32 Zfunction[8];
--- a/core/rend/gles/gltex.cpp
+++ b/core/rend/gles/gltex.cpp
@ -3,8 +3,6 @@
 #include "rend/TexCache.h"
 #include "hw/pvr/pvr_mem.h"
 #include "hw/mem/_vmem.h"
-#include <xxhash.h>
-#include "CustomTexture.h"

 #include <png.h>

@ -32,50 +30,6 @@ Compression

 extern u32 decoded_colors[3][65536];

-struct PvrTexInfo
-{
-	const char* name;
-	int bpp;        //4/8 for pal. 16 for yuv, rgb, argb
-	GLuint type;
-	// Conversion to 16 bpp
-	TexConvFP *PL;
-	TexConvFP *TW;
-	TexConvFP *VQ;
-	// Conversion to 32 bpp
-	TexConvFP32 *PL32;
-	TexConvFP32 *TW32;
-	TexConvFP32 *VQ32;
-};
-
-static const PvrTexInfo format[8] =
-{	// name     bpp GL format				   Planar		Twiddled	 VQ				Planar(32b)    Twiddled(32b)  VQ (32b)
-	{"1555", 	16,	GL_UNSIGNED_SHORT_5_5_5_1, tex1555_PL,	tex1555_TW,  tex1555_VQ,	tex1555_PL32,  tex1555_TW32,  tex1555_VQ32 },	//1555
-	{"565", 	16, GL_UNSIGNED_SHORT_5_6_5,   tex565_PL,	tex565_TW,   tex565_VQ, 	tex565_PL32,   tex565_TW32,   tex565_VQ32 },	//565
-	{"4444", 	16, GL_UNSIGNED_SHORT_4_4_4_4, tex4444_PL,	tex4444_TW,  tex4444_VQ, 	tex4444_PL32,  tex4444_TW32,  tex4444_VQ32 },	//4444
-	{"yuv", 	16, GL_UNSIGNED_BYTE,          NULL, 		NULL, 		 NULL,			texYUV422_PL,  texYUV422_TW,  texYUV422_VQ },	//yuv
-	{"bumpmap", 16, GL_UNSIGNED_SHORT_4_4_4_4, texBMP_PL,	texBMP_TW,	 texBMP_VQ, 	NULL},											//bump map
-	{"pal4", 	4,	0,						   0,			texPAL4_TW,  texPAL4_VQ, 	NULL, 		   texPAL4_TW32,  texPAL4_VQ32 },	//pal4
-	{"pal8", 	8,	0,						   0,			texPAL8_TW,  texPAL8_VQ, 	NULL, 		   texPAL8_TW32,  texPAL8_VQ32 },	//pal8
-	{"ns/1555", 0},																														// Not supported (1555)
-};
-
-static const u32 MipPoint[8] =
-{
-	0x00006,//8
-	0x00016,//16
-	0x00056,//32
-	0x00156,//64
-	0x00556,//128
-	0x01556,//256
-	0x05556,//512
-	0x15556//1024
-};
-
-static const GLuint PAL_TYPE[4]=
-{GL_UNSIGNED_SHORT_5_5_5_1,GL_UNSIGNED_SHORT_5_6_5,GL_UNSIGNED_SHORT_4_4_4_4, GL_UNSIGNED_BYTE};
-
-static CustomTexture custom_texture;
-
 static void dumpRtTexture(u32 name, u32 w, u32 h) {
 	char sname[256];
 	sprintf(sname, "texdump/%x-%d.png", name, FrameCount);
@ -117,283 +71,44 @@ static void dumpRtTexture(u32 name, u32 w, u32 h) {
 	free(rows);
 }

-//Texture Cache :)
-void TextureCacheData::PrintTextureName()
+void TextureCacheData::UploadToGPU(int width, int height, u8 *temp_tex_buffer)
 {
-	char str[512];
-	sprintf(str, "Texture: %s ", tex ? tex->name : "?format?");
-
-	if (tcw.VQ_Comp)
-		strcat(str, " VQ");
-
-	if (tcw.ScanOrder==0)
-		strcat(str, " TW");
-
-	if (tcw.MipMapped)
-		strcat(str, " MM");
-
-	if (tcw.StrideSel)
-		strcat(str, " Stride");
-
-	sprintf(str + strlen(str), " %dx%d @ 0x%X", 8 << tsp.TexU, 8 << tsp.TexV, tcw.TexAddr << 3);
-	sprintf(str + strlen(str), " id=%d", texID);
-	DEBUG_LOG(RENDERER, "%s", str);
-}
-
-//Create GL texture from tsp/tcw
-void TextureCacheData::Create(bool isGL)
-{
-	//ask GL for texture ID
-	if (isGL) {
-		texID = glcache.GenTexture();
-	}
-	else {
-		texID = 0;
-	}
-
-	pData = 0;
-	tex_type = 0;
-
-	//Reset state info ..
-	Lookups=0;
-	Updates=0;
-	dirty=FrameCount;
-	lock_block=0;
-
-	//decode info from tsp/tcw into the texture struct
-	tex=&format[tcw.PixelFmt == PixelReserved ? Pixel1555 : tcw.PixelFmt];	//texture format table entry
-
-	sa_tex = (tcw.TexAddr<<3) & VRAM_MASK;	//texture start address
-	sa = sa_tex;							//data texture start address (modified for MIPs, as needed)
-	w=8<<tsp.TexU;                   //tex width
-	h=8<<tsp.TexV;                   //tex height
-
-	//PAL texture
-	if (tex->bpp == 4)
-		palette_index = tcw.PalSelect << 4;
-	else if (tex->bpp == 8)
-		palette_index = (tcw.PalSelect >> 4) << 8;
-
-	//VQ table (if VQ tex)
-	if (tcw.VQ_Comp)
-		vq_codebook = sa;
-
-	//Convert a pvr texture into OpenGL
-	switch (tcw.PixelFmt)
+	if (texID != 0)
 	{
-
-	case Pixel1555: 	//0     1555 value: 1 bit; RGB values: 5 bits each
-	case PixelReserved: //7     Reserved        Regarded as 1555
-	case Pixel565: 		//1     565      R value: 5 bits; G value: 6 bits; B value: 5 bits
-	case Pixel4444: 	//2     4444 value: 4 bits; RGB values: 4 bits each
-	case PixelYUV:		//3     YUV422 32 bits per 2 pixels; YUYV values: 8 bits each
-	case PixelBumpMap:	//4		Bump Map 	16 bits/pixel; S value: 8 bits; R value: 8 bits
-	case PixelPal4:		//5     4 BPP Palette   Palette texture with 4 bits/pixel
-	case PixelPal8:		//6     8 BPP Palette   Palette texture with 8 bits/pixel
-		if (tcw.ScanOrder && (tex->PL || tex->PL32))
-		{
-			//Texture is stored 'planar' in memory, no deswizzle is needed
-			//verify(tcw.VQ_Comp==0);
-			if (tcw.VQ_Comp != 0)
-				WARN_LOG(RENDERER, "Warning: planar texture with VQ set (invalid)");
-
-			//Planar textures support stride selection, mostly used for non power of 2 textures (videos)
-			int stride=w;
-			if (tcw.StrideSel)
-				stride=(TEXT_CONTROL&31)*32;
-			//Call the format specific conversion code
-			texconv = tex->PL;
-			texconv32 = tex->PL32;
-			//calculate the size, in bytes, for the locking
-			size=stride*h*tex->bpp/8;
-		}
-		else
-		{
-			// Quake 3 Arena uses one. Not sure if valid but no need to crash
-			//verify(w==h || !tcw.MipMapped); // are non square mipmaps supported ? i can't recall right now *WARN*
-
-			if (tcw.VQ_Comp)
-			{
-				verify(tex->VQ != NULL || tex->VQ32 != NULL);
-				vq_codebook = sa;
-				if (tcw.MipMapped)
-					sa+=MipPoint[tsp.TexU];
-				texconv = tex->VQ;
-				texconv32 = tex->VQ32;
-				size=w*h/8;
-			}
-			else
-			{
-				verify(tex->TW != NULL || tex->TW32 != NULL);
-				if (tcw.MipMapped)
-					sa+=MipPoint[tsp.TexU]*tex->bpp/2;
-				texconv = tex->TW;
-				texconv32 = tex->TW32;
-				size=w*h*tex->bpp/8;
-			}
-		}
-		break;
-	default:
-		WARN_LOG(RENDERER, "Unhandled texture format %d", tcw.PixelFmt);
-		size=w*h*2;
-		texconv = NULL;
-		texconv32 = NULL;
-	}
-}
-
-void TextureCacheData::ComputeHash()
-{
-	texture_hash = XXH32(&vram[sa], size, 7);
-	if (IsPaletted())
-		texture_hash ^= palette_hash;
-	old_texture_hash = texture_hash;
-	texture_hash ^= tcw.full;
-}
-	
-void TextureCacheData::Update()
-{
-	//texture state tracking stuff
-	Updates++;
-	dirty=0;
-
-	GLuint textype=tex->type;
-
-	bool has_alpha = false;
-	if (IsPaletted())
-	{
-		textype=PAL_TYPE[PAL_RAM_CTRL&3];
-		if (textype == GL_UNSIGNED_BYTE)
-			has_alpha = true;
-
-		// Get the palette hash to check for future updates
-		if (tcw.PixelFmt == PixelPal4)
-			palette_hash = pal_hash_16[tcw.PalSelect];
-		else
-			palette_hash = pal_hash_256[tcw.PalSelect >> 4];
-	}
-
-	::palette_index = this->palette_index; // might be used if pal. tex
-	::vq_codebook = &vram[vq_codebook];    // might be used if VQ tex
-
-	//texture conversion work
-	u32 stride=w;
-
-	if (tcw.StrideSel && tcw.ScanOrder && (tex->PL || tex->PL32))
-		stride=(TEXT_CONTROL&31)*32; //I think this needs +1 ?
-
-	PrintTextureName();
-	u32 original_h = h;
-	if (sa_tex > VRAM_SIZE || size == 0 || sa + size > VRAM_SIZE)
-	{
-		if (sa + size > VRAM_SIZE)
-		{
-			// Shenmue Space Harrier mini-arcade loads a texture that goes beyond the end of VRAM
-			// but only uses the top portion of it
-			h = (VRAM_SIZE - sa) * 8 / stride / tex->bpp;
-			size = stride * h * tex->bpp/8;
-		}
-		else
-		{
-			WARN_LOG(RENDERER, "Warning: invalid texture. Address %08X %08X size %d", sa_tex, sa, size);
-			return;
-		}
-	}
-	if (settings.rend.CustomTextures)
-		custom_texture.LoadCustomTextureAsync(this);
-
-	void *temp_tex_buffer = NULL;
-	u32 upscaled_w = w;
-	u32 upscaled_h = h;
-
-	PixelBuffer<u16> pb16;
-	PixelBuffer<u32> pb32;
-
-	// Figure out if we really need to use a 32-bit pixel buffer
-	bool need_32bit_buffer = true;
-	if ((settings.rend.TextureUpscale <= 1
-			|| w * h > settings.rend.MaxFilteredTextureSize
-				* settings.rend.MaxFilteredTextureSize		// Don't process textures that are too big
-			|| tcw.PixelFmt == PixelYUV)					// Don't process YUV textures
-		&& (!IsPaletted() || textype != GL_UNSIGNED_BYTE)
-		&& texconv != NULL)
-		need_32bit_buffer = false;
-	// TODO avoid upscaling/depost. textures that change too often
-
-	if (texconv32 != NULL && need_32bit_buffer)
-	{
-		// Force the texture type since that's the only 32-bit one we know
-		textype = GL_UNSIGNED_BYTE;
-
-		pb32.init(w, h);
-
-		texconv32(&pb32, (u8*)&vram[sa], stride, h);
-
-#ifdef DEPOSTERIZE
-		{
-			// Deposterization
-			PixelBuffer<u32> tmp_buf;
-			tmp_buf.init(w, h);
-
-			DePosterize(pb32.data(), tmp_buf.data(), w, h);
-			pb32.steal_data(tmp_buf);
-		}
-#endif
-
-		// xBRZ scaling
-		if (settings.rend.TextureUpscale > 1)
-		{
-			PixelBuffer<u32> tmp_buf;
-			tmp_buf.init(w * settings.rend.TextureUpscale, h * settings.rend.TextureUpscale);
-
-			if (tcw.PixelFmt == Pixel1555 || tcw.PixelFmt == Pixel4444)
-				// Alpha channel formats. Palettes with alpha are already handled
-				has_alpha = true;
-			UpscalexBRZ(settings.rend.TextureUpscale, pb32.data(), tmp_buf.data(), w, h, has_alpha);
-			pb32.steal_data(tmp_buf);
-			upscaled_w *= settings.rend.TextureUpscale;
-			upscaled_h *= settings.rend.TextureUpscale;
-		}
-		temp_tex_buffer = pb32.data();
-	}
-	else if (texconv != NULL)
-	{
-		pb16.init(w, h);
-
-		texconv(&pb16,(u8*)&vram[sa],stride,h);
-		temp_tex_buffer = pb16.data();
-	}
-	else
-	{
-		//fill it in with a temp color
-		WARN_LOG(RENDERER, "UNHANDLED TEXTURE");
-		pb16.init(w, h);
-		memset(pb16.data(), 0x80, w * h * 2);
-		temp_tex_buffer = pb16.data();
-	}
-	// Restore the original texture height if it was constrained to VRAM limits above
-	h = original_h;
-
-	//lock the texture to detect changes in it
-	lock_block = libCore_vramlock_Lock(sa_tex,sa+size-1,this);
-
-	if (texID) {
 		//upload to OpenGL !
-		UploadToGPU(textype, upscaled_w, upscaled_h, (u8*)temp_tex_buffer);
-		if (settings.rend.DumpTextures)
+		glcache.BindTexture(GL_TEXTURE_2D, texID);
+		GLuint comps = GL_RGBA;
+		GLuint gltype;
+		switch (tex_type)
 		{
-			ComputeHash();
-			custom_texture.DumpTexture(texture_hash, upscaled_w, upscaled_h, textype, temp_tex_buffer);
+		case TextureType::_5551:
+			gltype = GL_UNSIGNED_SHORT_5_5_5_1;
+			break;
+		case TextureType::_565:
+			gltype = GL_UNSIGNED_SHORT_5_6_5;
+			comps = GL_RGB;
+			break;
+		case TextureType::_4444:
+			gltype = GL_UNSIGNED_SHORT_4_4_4_4;
+			break;
+		case TextureType::_8888:
+			gltype = GL_UNSIGNED_BYTE;
+			break;
 		}
+		glTexImage2D(GL_TEXTURE_2D, 0,comps, width, height, 0, comps, gltype, temp_tex_buffer);
+		if (tcw.MipMapped && settings.rend.UseMipmaps)
+			glGenerateMipmap(GL_TEXTURE_2D);
 	}
 	else {
 		#if FEAT_HAS_SOFTREND
-			if (textype == GL_UNSIGNED_SHORT_5_6_5)
+			/*
+			if (tex_type == TextureType::_565)
 				tex_type = 0;
-			else if (textype == GL_UNSIGNED_SHORT_5_5_5_1)
+			else if (tex_type == TextureType::_5551)
 				tex_type = 1;
-			else if (textype == GL_UNSIGNED_SHORT_4_4_4_4)
+			else if (tex_type == TextureType::_4444)
 				tex_type = 2;
-
+			*/
 			u16 *tex_data = (u16 *)temp_tex_buffer;
 			if (pData) {
 				_mm_free(pData);
@ -415,40 +130,12 @@ void TextureCacheData::Update()
 		#endif
 	}
 }
-
-void TextureCacheData::UploadToGPU(GLuint textype, int width, int height, u8 *temp_tex_buffer)
-{
-	//upload to OpenGL !
-	glcache.BindTexture(GL_TEXTURE_2D, texID);
-	GLuint comps=textype == GL_UNSIGNED_SHORT_5_6_5 ? GL_RGB : GL_RGBA;
-	glTexImage2D(GL_TEXTURE_2D, 0,comps, width, height, 0, comps, textype, temp_tex_buffer);
-	if (tcw.MipMapped && settings.rend.UseMipmaps)
-		glGenerateMipmap(GL_TEXTURE_2D);
-}
-
-void TextureCacheData::CheckCustomTexture()
-{
-	if (custom_load_in_progress == 0 && custom_image_data != NULL)
-	{
-		UploadToGPU(GL_UNSIGNED_BYTE, custom_width, custom_height, custom_image_data);
-		delete [] custom_image_data;
-		custom_image_data = NULL;
-	}
-}
-
-//true if : dirty or paletted texture and hashes don't match
-bool TextureCacheData::NeedsUpdate() {
-	bool rc = dirty
-			|| (tcw.PixelFmt == PixelPal4 && palette_hash != pal_hash_16[tcw.PalSelect])
-			|| (tcw.PixelFmt == PixelPal8 && palette_hash != pal_hash_256[tcw.PalSelect >> 4]);
-	return rc;
-}
 	
 bool TextureCacheData::Delete()
 {
-	if (custom_load_in_progress > 0)
+	if (!BaseTextureCacheData::Delete())
 		return false;
-	
+
 	if (pData) {
 		#if FEAT_HAS_SOFTREND
 			_mm_free(pData);
@ -461,17 +148,12 @@ bool TextureCacheData::Delete()
 	if (texID) {
 		glcache.DeleteTextures(1, &texID);
 	}
-	if (lock_block)
-		libCore_vramlock_Unlock_block(lock_block);
-	lock_block=0;
-
-	delete[] custom_image_data;
 	
 	return true;
 }

-static map<u64,TextureCacheData> TexCache;
-typedef map<u64,TextureCacheData>::iterator TexCacheIter;
+static std::unordered_map<u64, TextureCacheData> TexCache;
+typedef std::unordered_map<u64, TextureCacheData>::iterator TexCacheIter;

 static TextureCacheData *getTextureCacheData(TSP tsp, TCW tcw);

@ -574,19 +256,13 @@ void ReadRTTBuffer() {
 	{
 		u32 tex_addr = gl.rtt.TexAddr << 3;

-		// Manually mark textures as dirty and remove all vram locks before calling glReadPixels
+		// Remove all vram locks before calling glReadPixels
 		// (deadlock on rpi)
-		for (TexCacheIter i = TexCache.begin(); i != TexCache.end(); i++)
-		{
-			if (i->second.sa_tex <= tex_addr + size - 1 && i->second.sa + i->second.size - 1 >= tex_addr) {
-				i->second.dirty = FrameCount;
-				if (i->second.lock_block != NULL) {
-					libCore_vramlock_Unlock_block(i->second.lock_block);
-					i->second.lock_block = NULL;
-				}
-			}
-		}
-		_vmem_unprotect_vram(0, VRAM_SIZE);
+		u32 page_tex_addr = tex_addr & PAGE_MASK;
+		u32 page_size = size + tex_addr - page_tex_addr;
+		page_size = ((page_size - 1) / PAGE_SIZE + 1) * PAGE_SIZE;
+		for (u32 page = page_tex_addr; page < page_tex_addr + page_size; page += PAGE_SIZE)
+			VramLockedWriteOffset(page);

 		glPixelStorei(GL_PACK_ALIGNMENT, 1);
 		u16 *dst = (u16 *)&vram[tex_addr];
@ -641,13 +317,6 @@ void ReadRTTBuffer() {
 				dst += (stride - w * 2) / 2;
 			}
 		}
-
-		// Restore VRAM locks
-		for (TexCacheIter i = TexCache.begin(); i != TexCache.end(); i++)
-		{
-			if (i->second.lock_block != NULL)
-				_vmem_protect_vram(i->second.sa_tex, i->second.sa + i->second.size - i->second.sa_tex);
-		}
 	}
 	else
 	{
@ -683,7 +352,7 @@ void ReadRTTBuffer() {
    	if (texture_data->texID != 0)
    		glcache.DeleteTextures(1, &texture_data->texID);
    	else
-    		texture_data->Create(false);
+    		texture_data->Create();
    	texture_data->texID = gl.rtt.tex;
    	texture_data->dirty = 0;
    	if (texture_data->lock_block == NULL)
@ -745,7 +414,10 @@ GLuint gl_GetTexture(TSP tsp, TCW tcw)
 	TextureCacheData* tf = getTextureCacheData(tsp, tcw);

 	if (tf->texID == 0)
-		tf->Create(true);
+	{
+		tf->Create();
+		tf->texID = glcache.GenTexture();
+	}

 	//update if needed
 	if (tf->NeedsUpdate())
@ -792,7 +464,7 @@ text_info raw_GetTexture(TSP tsp, TCW tcw)

 		tf->tsp = tsp;
 		tf->tcw = tcw;
-		tf->Create(false);
+		tf->Create();
 	}

 	//update if needed
@ -806,7 +478,7 @@ text_info raw_GetTexture(TSP tsp, TCW tcw)
 	rv.height = tf->h;
 	rv.width = tf->w;
 	rv.pdata = tf->pData;
-	rv.textype = tf->tex_type;
+	rv.textype = (u32)tf->tex_type;
 	
 	
 	return rv;
@ -817,21 +489,20 @@ void CollectCleanup() {

 	u32 TargetFrame = max((u32)120,FrameCount) - 120;

-	for (TexCacheIter i=TexCache.begin();i!=TexCache.end();i++)
+	for (const auto& pair : TexCache)
 	{
-		if ( i->second.dirty &&  i->second.dirty < TargetFrame) {
-			list.push_back(i->first);
-		}
+		if (pair.second.dirty && pair.second.dirty < TargetFrame)
+			list.push_back(pair.first);

 		if (list.size() > 5)
 			break;
 	}

-	for (size_t i=0; i<list.size(); i++) {
-		if (TexCache[list[i]].Delete())
+	for (u64 id : list) {
+		if (TexCache[id].Delete())
 		{
 			//printf("Deleting %d\n", TexCache[list[i]].texID);
-			TexCache.erase(list[i]);
+			TexCache.erase(id);
 		}
 	}
 }
@ -841,10 +512,8 @@ void DoCleanup() {
 }
 void killtex()
 {
-	for (TexCacheIter i=TexCache.begin();i!=TexCache.end();i++)
-	{
-		i->second.Delete();
-	}
+	for (auto& pair : TexCache)
+		pair.second.Delete();

 	TexCache.clear();
 	KillTex = false;
--- a/core/rend/sorter.cpp
+++ b/core/rend/sorter.cpp
@ -0,0 +1,455 @@
+/*
+	 This file is part of reicast.
+
+	 reicast is free software: you can redistribute it and/or modify
+	 it under the terms of the GNU General Public License as published by
+	 the Free Software Foundation, either version 2 of the License, or
+	 (at your option) any later version.
+
+	 reicast is distributed in the hope that it will be useful,
+	 but WITHOUT ANY WARRANTY; without even the implied warranty of
+	 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	 GNU General Public License for more details.
+
+	 You should have received a copy of the GNU General Public License
+	 along with reicast.  If not, see <https://www.gnu.org/licenses/>.
+ */
+#include <algorithm>
+#include "sorter.h"
+
+struct IndexTrig
+{
+	u32 id[3];
+	u16 pid;
+	f32 z;
+};
+
+float min3(float v0,float v1,float v2)
+{
+	return min(min(v0,v1),v2);
+}
+
+float max3(float v0,float v1,float v2)
+{
+	return max(max(v0,v1),v2);
+}
+
+float minZ(Vertex* v, u32* mod)
+{
+	return min(min(v[mod[0]].z,v[mod[1]].z),v[mod[2]].z);
+}
+
+bool operator<(const IndexTrig &left, const IndexTrig &right)
+{
+	return left.z<right.z;
+}
+
+bool operator<(const PolyParam &left, const PolyParam &right)
+{
+/* put any condition you want to sort on here */
+	return left.zvZ<right.zvZ;
+	//return left.zMin<right.zMax;
+}
+
+void SortPParams(int first, int count)
+{
+	if (pvrrc.verts.used() == 0 || count <= 1)
+		return;
+
+	Vertex* vtx_base=pvrrc.verts.head();
+	u32* idx_base = pvrrc.idx.head();
+
+	PolyParam* pp = &pvrrc.global_param_tr.head()[first];
+	PolyParam* pp_end = pp + count;
+
+	while(pp!=pp_end)
+	{
+		if (pp->count<2)
+		{
+			pp->zvZ=0;
+		}
+		else
+		{
+			u32* idx = idx_base + pp->first;
+
+			Vertex* vtx=vtx_base+idx[0];
+			Vertex* vtx_end=vtx_base + idx[pp->count-1]+1;
+
+			u32 zv=0xFFFFFFFF;
+			while(vtx!=vtx_end)
+			{
+				zv=min(zv,(u32&)vtx->z);
+				vtx++;
+			}
+
+			pp->zvZ=(f32&)zv;
+		}
+		pp++;
+	}
+
+	std::stable_sort(pvrrc.global_param_tr.head() + first, pvrrc.global_param_tr.head() + first + count);
+}
+
+static Vertex* vtx_sort_base;
+
+#if 0
+/*
+
+	Per triangle sorting experiments
+
+*/
+
+//approximate the triangle area
+float area_x2(Vertex* v)
+{
+	return 2/3*fabs( (v[0].x-v[2].x)*(v[1].y-v[0].y) - (v[0].x-v[1].x)*(v[2].y-v[0].y)) ;
+}
+
+//approximate the distance ^2
+float distance_apprx(Vertex* a, Vertex* b)
+{
+	float xd=a->x-b->x;
+	float yd=a->y-b->y;
+
+	return xd*xd+yd*yd;
+}
+
+//was good idea, but not really working ..
+bool Intersect(Vertex* a, Vertex* b)
+{
+	float a1=area_x2(a);
+	float a2=area_x2(b);
+
+	float d = distance_apprx(a,b);
+
+	return (a1+a1)>d;
+}
+
+//root for quick-union
+u16 rid(vector<u16>& v, u16 id)
+{
+	while(id!=v[id]) id=v[id];
+	return id;
+}
+
+struct TrigBounds
+{
+	float xs,xe;
+	float ys,ye;
+	float zs,ze;
+};
+
+//find 3d bounding box for triangle
+TrigBounds bound(Vertex* v)
+{
+	TrigBounds rv = {	min(min(v[0].x,v[1].x),v[2].x), max(max(v[0].x,v[1].x),v[2].x),
+						min(min(v[0].y,v[1].y),v[2].y), max(max(v[0].y,v[1].y),v[2].y),
+						min(min(v[0].z,v[1].z),v[2].z), max(max(v[0].z,v[1].z),v[2].z),
+					};
+
+	return rv;
+}
+
+//bounding box 2d intersection
+bool Intersect(TrigBounds& a, TrigBounds& b)
+{
+	return  ( !(a.xe<b.xs || a.xs>b.xe) && !(a.ye<b.ys || a.ys>b.ye) /*&& !(a.ze<b.zs || a.zs>b.ze)*/ );
+}
+
+
+bool operator<(const IndexTrig &left, const IndexTrig &right)
+{
+	/*
+	TrigBounds l=bound(vtx_sort_base+left.id);
+	TrigBounds r=bound(vtx_sort_base+right.id);
+
+	if (!Intersect(l,r))
+	{
+		return true;
+	}
+	else
+	{
+		return (l.zs + l.ze) < (r.zs + r.ze);
+	}*/
+
+	return minZ(&vtx_sort_base[left.id])<minZ(&vtx_sort_base[right.id]);
+}
+
+//Not really working cuz of broken intersect
+bool Intersect(const IndexTrig &left, const IndexTrig &right)
+{
+	TrigBounds l=bound(vtx_sort_base+left.id);
+	TrigBounds r=bound(vtx_sort_base+right.id);
+
+	return Intersect(l,r);
+}
+
+#endif
+
+//are two poly params the same?
+bool PP_EQ(PolyParam* pp0, PolyParam* pp1)
+{
+	return (pp0->pcw.full&PCW_DRAW_MASK)==(pp1->pcw.full&PCW_DRAW_MASK) && pp0->isp.full==pp1->isp.full && pp0->tcw.full==pp1->tcw.full && pp0->tsp.full==pp1->tsp.full && pp0->tileclip==pp1->tileclip;
+}
+
+void fill_id(u32* d, Vertex* v0, Vertex* v1, Vertex* v2,  Vertex* vb)
+{
+	d[0]=v0-vb;
+	d[1]=v1-vb;
+	d[2]=v2-vb;
+}
+
+void GenSorted(int first, int count, vector<SortTrigDrawParam>& pidx_sort, vector<u32>& vidx_sort)
+{
+	u32 tess_gen=0;
+
+	pidx_sort.clear();
+
+	if (pvrrc.verts.used() == 0 || count <= 1)
+		return;
+
+	Vertex* vtx_base=pvrrc.verts.head();
+	u32* idx_base = pvrrc.idx.head();
+
+	PolyParam* pp_base = &pvrrc.global_param_tr.head()[first];
+	PolyParam* pp = pp_base;
+	PolyParam* pp_end = pp + count;
+
+	Vertex* vtx_arr=vtx_base+idx_base[pp->first];
+	vtx_sort_base=vtx_base;
+
+	static u32 vtx_cnt;
+
+	int vtx_count=idx_base[pp_end[-1].first+pp_end[-1].count-1]-idx_base[pp->first];
+	if (vtx_count>vtx_cnt)
+		vtx_cnt=vtx_count;
+
+#if PRINT_SORT_STATS
+	printf("TVTX: %d || %d\n",vtx_cnt,vtx_count);
+#endif
+
+	if (vtx_count<=0)
+		return;
+
+	//make lists of all triangles, with their pid and vid
+	static vector<IndexTrig> lst;
+
+	lst.resize(vtx_count*4);
+
+
+	int pfsti=0;
+
+	while(pp!=pp_end)
+	{
+		u32 ppid=(pp-pp_base);
+
+		if (pp->count>2)
+		{
+			u32* idx = idx_base + pp->first;
+
+			Vertex* vtx=vtx_base+idx[0];
+			Vertex* vtx_end=vtx_base + idx[pp->count-1]-1;
+			u32 flip=0;
+			while(vtx!=vtx_end)
+			{
+				Vertex* v0, * v1, * v2, * v3, * v4, * v5;
+
+				if (flip)
+				{
+					v0=&vtx[1];
+					v1=&vtx[0];
+					v2=&vtx[2];
+				}
+				else
+				{
+					v0=&vtx[0];
+					v1=&vtx[1];
+					v2=&vtx[2];
+				}
+#if 0
+				if (settings.pvr.subdivide_transp)
+				{
+					u32 tess_x=(max3(v0->x,v1->x,v2->x)-min3(v0->x,v1->x,v2->x))/32;
+					u32 tess_y=(max3(v0->y,v1->y,v2->y)-min3(v0->y,v1->y,v2->y))/32;
+
+					if (tess_x==1) tess_x=0;
+					if (tess_y==1) tess_y=0;
+
+					//bool tess=(maxZ(v0,v1,v2)/minZ(v0,v1,v2))>=1.2;
+
+					if (tess_x + tess_y)
+					{
+						v3=pvrrc.verts.Append(3);
+						v4=v3+1;
+						v5=v4+1;
+
+						//xyz
+						for (int i=0;i<3;i++)
+						{
+							((float*)&v3->x)[i]=((float*)&v0->x)[i]*0.5f+((float*)&v2->x)[i]*0.5f;
+							((float*)&v4->x)[i]=((float*)&v0->x)[i]*0.5f+((float*)&v1->x)[i]*0.5f;
+							((float*)&v5->x)[i]=((float*)&v1->x)[i]*0.5f+((float*)&v2->x)[i]*0.5f;
+						}
+
+						//*TODO* Make it perspective correct
+
+						//uv
+						for (int i=0;i<2;i++)
+						{
+							((float*)&v3->u)[i]=((float*)&v0->u)[i]*0.5f+((float*)&v2->u)[i]*0.5f;
+							((float*)&v4->u)[i]=((float*)&v0->u)[i]*0.5f+((float*)&v1->u)[i]*0.5f;
+							((float*)&v5->u)[i]=((float*)&v1->u)[i]*0.5f+((float*)&v2->u)[i]*0.5f;
+						}
+
+						//color
+						for (int i=0;i<4;i++)
+						{
+							v3->col[i]=v0->col[i]/2+v2->col[i]/2;
+							v4->col[i]=v0->col[i]/2+v1->col[i]/2;
+							v5->col[i]=v1->col[i]/2+v2->col[i]/2;
+						}
+
+						fill_id(lst[pfsti].id,v0,v3,v4,vtx_base);
+						lst[pfsti].pid= ppid ;
+						lst[pfsti].z = minZ(vtx_base,lst[pfsti].id);
+						pfsti++;
+
+						fill_id(lst[pfsti].id,v2,v3,v5,vtx_base);
+						lst[pfsti].pid= ppid ;
+						lst[pfsti].z = minZ(vtx_base,lst[pfsti].id);
+						pfsti++;
+
+						fill_id(lst[pfsti].id,v3,v4,v5,vtx_base);
+						lst[pfsti].pid= ppid ;
+						lst[pfsti].z = minZ(vtx_base,lst[pfsti].id);
+						pfsti++;
+
+						fill_id(lst[pfsti].id,v5,v4,v1,vtx_base);
+						lst[pfsti].pid= ppid ;
+						lst[pfsti].z = minZ(vtx_base,lst[pfsti].id);
+						pfsti++;
+
+						tess_gen+=3;
+					}
+					else
+					{
+						fill_id(lst[pfsti].id,v0,v1,v2,vtx_base);
+						lst[pfsti].pid= ppid ;
+						lst[pfsti].z = minZ(vtx_base,lst[pfsti].id);
+						pfsti++;
+					}
+				}
+				else
+#endif
+				{
+					fill_id(lst[pfsti].id,v0,v1,v2,vtx_base);
+					lst[pfsti].pid= ppid ;
+					lst[pfsti].z = minZ(vtx_base,lst[pfsti].id);
+					pfsti++;
+				}
+
+				flip ^= 1;
+
+				vtx++;
+			}
+		}
+		pp++;
+	}
+
+	u32 aused=pfsti;
+
+	lst.resize(aused);
+
+	//sort them
+#if 1
+	std::stable_sort(lst.begin(),lst.end());
+
+	//Merge pids/draw cmds if two different pids are actually equal
+	if (true)
+	{
+		for (u32 k=1;k<aused;k++)
+		{
+			if (lst[k].pid!=lst[k-1].pid)
+			{
+				if (PP_EQ(&pp_base[lst[k].pid],&pp_base[lst[k-1].pid]))
+				{
+					lst[k].pid=lst[k-1].pid;
+				}
+			}
+		}
+	}
+#endif
+
+
+#if 0
+	//tries to optimise draw calls by reordering non-intersecting polygons
+	//uber slow and not very effective
+	{
+		int opid=lst[0].pid;
+
+		for (int k=1;k<aused;k++)
+		{
+			if (lst[k].pid!=opid)
+			{
+				if (opid>lst[k].pid)
+				{
+					//MOVE UP
+					for (int j=k;j>0 && lst[j].pid!=lst[j-1].pid && !Intersect(lst[j],lst[j-1]);j--)
+					{
+						swap(lst[j],lst[j-1]);
+					}
+				}
+				else
+				{
+					//move down
+					for (int j=k+1;j<aused && lst[j].pid!=lst[j-1].pid && !Intersect(lst[j],lst[j-1]);j++)
+					{
+						swap(lst[j],lst[j-1]);
+					}
+				}
+			}
+
+			opid=lst[k].pid;
+		}
+	}
+#endif
+
+	//re-assemble them into drawing commands
+	vidx_sort.resize(aused*3);
+
+	int idx=-1;
+
+	for (u32 i=0; i<aused; i++)
+	{
+		int pid=lst[i].pid;
+		u32* midx = lst[i].id;
+
+		vidx_sort[i*3 + 0]=midx[0];
+		vidx_sort[i*3 + 1]=midx[1];
+		vidx_sort[i*3 + 2]=midx[2];
+
+		if (idx!=pid /* && !PP_EQ(&pp_base[pid],&pp_base[idx]) */ )
+		{
+			SortTrigDrawParam stdp = { pp_base + pid, i * 3, 0 };
+
+			if (idx!=-1)
+			{
+				SortTrigDrawParam* last=&pidx_sort[pidx_sort.size()-1];
+				last->count=stdp.first-last->first;
+			}
+
+			pidx_sort.push_back(stdp);
+			idx=pid;
+		}
+	}
+
+	SortTrigDrawParam* stdp=&pidx_sort[pidx_sort.size()-1];
+	stdp->count=aused*3-stdp->first;
+
+#if PRINT_SORT_STATS
+	printf("Reassembled into %d from %d\n",pidx_sort.size(),pp_end-pp_base);
+#endif
+
+	if (tess_gen) DEBUG_LOG(RENDERER, "Generated %.2fK Triangles !", tess_gen / 1000.0);
+}
+
--- a/core/rend/sorter.h
+++ b/core/rend/sorter.h
@ -0,0 +1,32 @@
+/*
+	 This file is part of reicast.
+
+	 reicast is free software: you can redistribute it and/or modify
+	 it under the terms of the GNU General Public License as published by
+	 the Free Software Foundation, either version 2 of the License, or
+	 (at your option) any later version.
+
+	 reicast is distributed in the hope that it will be useful,
+	 but WITHOUT ANY WARRANTY; without even the implied warranty of
+	 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+	 GNU General Public License for more details.
+
+	 You should have received a copy of the GNU General Public License
+	 along with reicast.  If not, see <https://www.gnu.org/licenses/>.
+ */
+#pragma once
+#include "types.h"
+#include "hw/pvr/Renderer_if.h"
+
+//Sort based on min-z of each strip
+void SortPParams(int first, int count);
+
+struct SortTrigDrawParam
+{
+	PolyParam* ppid;
+	u32 first;
+	u32 count;
+};
+
+// Sort based on min-z of each triangle
+void GenSorted(int first, int count, vector<SortTrigDrawParam>& sorted_pp, vector<u32>& sorted_idx);