softrend: Resembling a pixel pipeline, textures, sort, cull

- Basic pixel pipeline, a bit better triangle tests, specialized render handlers
- Textures w/ point filtering. Not very smart is it goes 32 -> 16 -> 32 bpp, but works.
- The texture cache is shared rather inelegantly w/ OpenGL one
- Culling
- PParam sorting (shared w/ GL)

The texturing and color blending paths are ugly and slow
This commit is contained in:
Stefanos Kornilios Mitsis Poiitidis 2015-08-05 17:02:34 +02:00 committed by TwistedUmbrella
parent fcf273dd11
commit e6a9d3e661
4 changed files with 923 additions and 551 deletions

View File

@ -419,7 +419,7 @@ union FPU_SHAD_SCALE_type
#define FPU_SHAD_SCALE PvrReg(FPU_SHAD_SCALE_addr,FPU_SHAD_SCALE_type) // RW Intensity Volume mode
#define FPU_CULL_VAL PvrReg(FPU_CULL_VAL_addr,u32) // RW Comparison value for culling
#define FPU_CULL_VAL PvrReg(FPU_CULL_VAL_addr,f32) // RW Comparison value for culling
#define FPU_PARAM_CFG PvrReg(FPU_PARAM_CFG_addr,u32) // RW Parameter read control
#define HALF_OFFSET PvrReg(HALF_OFFSET_addr,u32) // RW Pixel sampling control
#define FPU_PERP_VAL PvrReg(FPU_PERP_VAL_addr,u32) // RW Comparison value for perpendicular polygons

View File

@ -100,8 +100,17 @@ struct gl_ctx
extern gl_ctx gl;
GLuint gl_GetTexture(TSP tsp,TCW tcw);
struct text_info {
u16* pdata;
u32 width;
u32 height;
u32 textype; // 0 565, 1 1555, 2 4444
};
text_info raw_GetTexture(TSP tsp, TCW tcw);
void CollectCleanup();
void DoCleanup();
void SortPParams();
void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt);
int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode,

View File

@ -69,6 +69,8 @@ struct TextureCacheData
TCW tcw;
GLuint texID; //gl texture
u16* pData;
int tex_type;
u32 Lookups;
@ -122,10 +124,18 @@ struct TextureCacheData
}
//Create GL texture from tsp/tcw
void Create()
void Create(bool isGL)
{
//ask GL for texture ID
glGenTextures(1,&texID);
if (isGL) {
glGenTextures(1, &texID);
}
else {
texID = 0;
}
pData = 0;
tex_type = 0;
//Reset state info ..
Lookups=0;
@ -141,19 +151,20 @@ struct TextureCacheData
w=8<<tsp.TexU; //tex width
h=8<<tsp.TexV; //tex height
if (texID) {
//bind texture to set modes
glBindTexture(GL_TEXTURE_2D,texID);
glBindTexture(GL_TEXTURE_2D, texID);
//set texture repeat mode
SetRepeatMode(GL_TEXTURE_WRAP_S,tsp.ClampU,tsp.FlipU); // glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (tsp.ClampU ? GL_CLAMP_TO_EDGE : (tsp.FlipU ? GL_MIRRORED_REPEAT : GL_REPEAT))) ;
SetRepeatMode(GL_TEXTURE_WRAP_T,tsp.ClampV,tsp.FlipV); // glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (tsp.ClampV ? GL_CLAMP_TO_EDGE : (tsp.FlipV ? GL_MIRRORED_REPEAT : GL_REPEAT))) ;
SetRepeatMode(GL_TEXTURE_WRAP_S, tsp.ClampU, tsp.FlipU); // glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, (tsp.ClampU ? GL_CLAMP_TO_EDGE : (tsp.FlipU ? GL_MIRRORED_REPEAT : GL_REPEAT))) ;
SetRepeatMode(GL_TEXTURE_WRAP_T, tsp.ClampV, tsp.FlipV); // glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, (tsp.ClampV ? GL_CLAMP_TO_EDGE : (tsp.FlipV ? GL_MIRRORED_REPEAT : GL_REPEAT))) ;
#ifdef GLES
glHint(GL_GENERATE_MIPMAP_HINT, GL_NICEST);
#endif
//set texture filter mode
if ( tsp.FilterMode == 0 )
if (tsp.FilterMode == 0)
{
//disable filtering, mipmaps
glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MIN_FILTER,GL_NEAREST);
@ -166,6 +177,7 @@ struct TextureCacheData
glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MIN_FILTER, (tcw.MipMapped && settings.rend.UseMipmaps)?GL_LINEAR_MIPMAP_NEAREST:GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MAG_FILTER,GL_LINEAR);
}
}
//PAL texture
if (tex->bpp==4)
@ -286,21 +298,43 @@ struct TextureCacheData
//lock the texture to detect changes in it
lock_block = libCore_vramlock_Lock(sa_tex,sa+size-1,this);
if (texID) {
//upload to OpenGL !
glBindTexture(GL_TEXTURE_2D, texID);
GLuint comps=textype==GL_UNSIGNED_SHORT_5_6_5?GL_RGB:GL_RGBA;
glTexImage2D(GL_TEXTURE_2D, 0,comps , w, h, 0, comps, textype, temp_tex_buffer);
if (tcw.MipMapped && settings.rend.UseMipmaps)
glGenerateMipmap(GL_TEXTURE_2D);
}
else {
if (textype == GL_UNSIGNED_SHORT_5_6_5)
tex_type = 0;
else if (textype == GL_UNSIGNED_SHORT_5_5_5_1)
tex_type = 1;
else if (textype == GL_UNSIGNED_SHORT_4_4_4_4)
tex_type = 2;
if (pData) {
free(pData);
}
pData = (u16*)malloc(w * h * 2);
memcpy(pData, temp_tex_buffer, w * h * 2);
}
}
//true if : dirty or paletted texture and revs don't match
bool NeedsUpdate() { return (dirty) || (pal_table_rev!=0 && *pal_table_rev!=pal_local_rev); }
void Delete()
{
glDeleteTextures(1,&texID);
if (pData) {
free(pData);
pData = 0;
}
if (texID) {
glDeleteTextures(1, &texID);
}
if (lock_block)
libCore_vramlock_Unlock_block(lock_block);
lock_block=0;
@ -414,7 +448,7 @@ GLuint gl_GetTexture(TSP tsp, TCW tcw)
tf->tsp=tsp;
tf->tcw=tcw;
tf->Create();
tf->Create(true);
}
//update if needed
@ -428,6 +462,52 @@ GLuint gl_GetTexture(TSP tsp, TCW tcw)
return tf->texID;
}
text_info raw_GetTexture(TSP tsp, TCW tcw)
{
text_info rv = { 0 };
//lookup texture
TextureCacheData* tf;
//= TexCache.Find(tcw.full,tsp.full);
u64 key = ((u64)tcw.full << 32) | tsp.full;
TexCacheIter tx = TexCache.find(key);
if (tx != TexCache.end())
{
tf = &tx->second;
}
else //create if not existing
{
TextureCacheData tfc = { 0 };
TexCache[key] = tfc;
tx = TexCache.find(key);
tf = &tx->second;
tf->tsp = tsp;
tf->tcw = tcw;
tf->Create(false);
}
//update if needed
if (tf->NeedsUpdate())
tf->Update();
//update state for opts/stuff
tf->Lookups++;
//return gl texture
rv.height = tf->h;
rv.width = tf->w;
rv.pdata = tf->pData;
rv.textype = tf->tex_type;
return rv;
}
void CollectCleanup() {
vector<u64> list;

View File

@ -19,40 +19,28 @@
BITMAPINFOHEADER bi = { sizeof(BITMAPINFOHEADER), 0, 0, 1, 32, BI_RGB };
struct softrend : Renderer
#include "rend/gles/gles.h"
u32 decoded_colors[3][65536];
DECL_ALIGN(32) u32 render_buffer[640 * 480 * 2 * 4]; //Color + depth
DECL_ALIGN(32) u32 pixels[640 * 480 * 4];
static __m128 _mm_load_scaled_float(float v, float s)
{
virtual bool Process(TA_context* ctx) {
//disable RTTs for now ..
if (ctx->rend.isRTT)
return false;
ctx->rend_inuse.Lock();
ctx->MarkRend();
if (!ta_parse_vdrc(ctx))
return false;
return true;
}
DECL_ALIGN(32) u32 render_buffer[640 * 480 * 2 * 4]; //Color + depth
DECL_ALIGN(32) u32 pixels[640 * 480 * 4];
static __m128 _mm_load_scaled_float(float v, float s)
{
return _mm_setr_ps(v, v + s, v + s + s, v + s + s + s);
}
static __m128 _mm_broadcast_float(float v)
{
}
static __m128 _mm_broadcast_float(float v)
{
return _mm_setr_ps(v, v, v, v);
}
static __m128i _mm_broadcast_int(int v)
{
}
static __m128i _mm_broadcast_int(int v)
{
__m128i rv = _mm_cvtsi32_si128(v);
return _mm_shuffle_epi32(rv, 0);
}
static __m128 _mm_load_ps_r(float a, float b, float c, float d)
{
}
static __m128 _mm_load_ps_r(float a, float b, float c, float d)
{
static __declspec(align(128)) float v[4];
v[0] = a;
v[1] = b;
@ -60,31 +48,31 @@ struct softrend : Renderer
v[3] = d;
return _mm_load_ps(v);
}
}
__forceinline int iround(float x)
{
__forceinline int iround(float x)
{
return _mm_cvtt_ss2si(_mm_load_ss(&x));
}
}
float mmin(float a, float b, float c, float d)
{
float mmin(float a, float b, float c, float d)
{
int rv = min(a, b);
rv = min(c, rv);
return max(d, rv);
}
}
float mmax(float a, float b, float c, float d)
{
float mmax(float a, float b, float c, float d)
{
int rv = max(a, b);
rv = max(c, rv);
return min(d, rv);
}
}
//i think this gives false positives ...
//yup, if ANY of the 3 tests fail the ANY tests fails.
__forceinline void EvalHalfSpace(bool& all, bool& any, float cp, float sv, float lv)
{
//i think this gives false positives ...
//yup, if ANY of the 3 tests fail the ANY tests fails.
__forceinline void EvalHalfSpace(bool& all, bool& any, float cp, float sv, float lv)
{
//bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0;
//bool a10 = C1 + DX12 * y0 - DY12 * x0 > qDY12;
//bool a01 = C1 + DX12 * y0 - DY12 * x0 > -qDX12;
@ -99,29 +87,29 @@ struct softrend : Renderer
any &= a;
all &= b;
}
}
//return true if any is positive
__forceinline bool EvalHalfSpaceFAny(float cp12, float cp23, float cp31)
{
//return true if any is positive
__forceinline bool EvalHalfSpaceFAny(float cp12, float cp23, float cp31)
{
bool svt = cp12 > 0; //needed for ANY
svt |= cp23 > 0;
svt |= cp31 > 0;
return svt;
}
}
__forceinline bool EvalHalfSpaceFAll(float cp12, float cp23, float cp31, float lv12, float lv23, float lv31)
{
__forceinline bool EvalHalfSpaceFAll(float cp12, float cp23, float cp31, float lv12, float lv23, float lv31)
{
bool lvt = (cp12 - lv12) > 0;
lvt &= (cp23 - lv23) > 0;
lvt &= (cp31 - lv31) > 0; //needed for all
return lvt;
}
}
__forceinline void PlaneMinMax(float& MIN, float& MAX, float DX, float DY, float q)
{
__forceinline void PlaneMinMax(float& MIN, float& MAX, float DX, float DY, float q)
{
float q_fp = (q - 1);
float v1 = 0;
float v2 = q_fp*DY;
@ -130,10 +118,10 @@ struct softrend : Renderer
MIN = min(v1, min(v2, min(v3, v4)));
MAX = max(v1, max(v2, max(v3, v4)));
}
}
struct PlaneStepper
{
struct PlaneStepper
{
__m128 ddx, ddy;
__m128 c;
@ -196,19 +184,25 @@ struct softrend : Renderer
{
return _mm_add_ps(bas, ddx);
}
};
};
struct IPs
{
struct IPs
{
PlaneStepper ZUV;
PlaneStepper Col;
void Setup(const Vertex &v1, const Vertex &v2, const Vertex &v3, int minx, int miny, int q)
void Setup(PolyParam* pp, text_info* texture, const Vertex &v1, const Vertex &v2, const Vertex &v3, int minx, int miny, int q)
{
u32 w = 0, h = 0;
if (texture) {
w = texture->width;
h = texture->height;
}
ZUV.Setup(v1, v2, v3, minx, miny, q,
v1.z, v2.z, v3.z,
v1.u, v2.u, v3.u,
v1.v, v2.v, v3.v,
v1.u * w, v2.u * w, v3.u * w,
v1.v * h, v2.v * h, v3.v * h,
0, -1, 1);
Col.Setup(v1, v2, v3, minx, miny, q,
@ -218,16 +212,32 @@ struct softrend : Renderer
v1.col[3], v2.col[3], v3.col[3]
);
}
};
};
IPs __declspec(align(64)) ip;
IPs __declspec(align(64)) ip;
#define TPL_DECL_pixel template<bool useoldmsk, bool alpha_blend, bool pp_UseAlpha, bool pp_Texture, bool pp_IgnoreTexA, int pp_ShadInstr, bool pp_Offset >
#define TPL_DECL_triangle template<bool alpha_blend, bool pp_UseAlpha, bool pp_Texture, bool pp_IgnoreTexA, int pp_ShadInstr, bool pp_Offset >
#define TPL_PRMS_pixel(useoldmsk) <useoldmsk, alpha_blend, pp_UseAlpha, pp_Texture, pp_IgnoreTexA, pp_ShadInstr, pp_Offset >
#define TPL_PRMS_triangle <alpha_blend, pp_UseAlpha, pp_Texture, pp_IgnoreTexA, pp_ShadInstr, pp_Offset >
//<alpha_blend, pp_UseAlpha, pp_Texture, pp_IgnoreTexA, pp_ShadInstr, pp_Offset >
typedef void(*RendtriangleFn)(PolyParam* pp, int vertex_offset, const Vertex &v1, const Vertex &v2, const Vertex &v3, u32* colorBuffer);
RendtriangleFn RendtriangleFns[2][2][2][2][4][2];
template<bool useoldmsk, bool alpha_blend>
__forceinline void PixelFlush(__m128 x, __m128 y, u8* cb, __m128 oldmask)
{
__m128i const_setAlpha = { 0xFF000000, 0xFF000000, 0xFF000000, 0xFF000000, 0xFF000000 };
__m128i shuffle_alpha = {
0x0E, 0x80, 0x0E, 0x80, 0x0E, 0x80, 0x0E, 0x80,
0x06, 0x80, 0x06, 0x80, 0x06, 0x80, 0x06, 0x80
};
TPL_DECL_pixel
static void PixelFlush(PolyParam* pp, text_info* texture, __m128 x, __m128 y, u8* cb, __m128 oldmask)
{
x = _mm_shuffle_ps(x, x, 0);
__m128 invW = ip.ZUV.Ip(x, y);
__m128 u = ip.ZUV.InStep(invW);
@ -259,19 +269,121 @@ struct softrend : Renderer
__m128 c = ip.Col.InStep(b);
__m128 d = ip.Col.InStep(c);
__m128i ui = _mm_cvttps_epi32(u);
__m128i vi = _mm_cvttps_epi32(v);
//(int)v<<x+(int)u
__m128i textadr = _mm_add_epi32(_mm_slli_epi32(vi, 8), ui);//texture addresses ! 4x of em !
//we need :
__m128i ab = _mm_packs_epi32(_mm_cvttps_epi32(a), _mm_cvttps_epi32(b));
__m128i cd = _mm_packs_epi32(_mm_cvttps_epi32(c), _mm_cvttps_epi32(d));
rv = _mm_packus_epi16(ab, cd);
//rv = _mm_xor_si128(rv,textadr);
if (!pp_UseAlpha) {
rv = _mm_or_si128(rv, const_setAlpha);
}
if (pp_Texture) {
__m128i ui = _mm_cvttps_epi32(u);
__m128i vi = _mm_cvttps_epi32(v);
//(int)v<<x+(int)u
__m128i textadr = _mm_add_epi32(_mm_slli_epi32(vi, 16), ui);//texture addresses ! 4x of em !
__m128i textel;
for (int i = 0; i < 4; i++) {
u32 u = textadr.m128i_i16[i * 2 + 0];
u32 v = textadr.m128i_i16[i * 2 + 1];
u32 textel_size = 2;
u %= texture->width;
v %= texture->height;
u32 pixel = decoded_colors[texture->textype][texture->pdata[(u + v * texture->width)]];
textel.m128i_i32[i] = pixel;
}
if (pp_IgnoreTexA) {
textel = _mm_or_si128(textel, const_setAlpha);
}
if (pp_ShadInstr == 0){
//color.rgb = texcol.rgb;
//color.a = texcol.a;
rv = textel;
}
else if (pp_ShadInstr == 1) {
//color.rgb *= texcol.rgb;
//color.a = texcol.a;
//color.a = 1
rv = _mm_or_si128(rv, const_setAlpha);
//color *= texcol
__m128i lo_rv = _mm_cvtepu8_epi16(rv);
__m128i hi_rv = _mm_cvtepu8_epi16(_mm_shuffle_epi32(rv, _MM_SHUFFLE(1, 0, 3, 2)));
__m128i lo_fb = _mm_cvtepu8_epi16(textel);
__m128i hi_fb = _mm_cvtepu8_epi16(_mm_shuffle_epi32(textel, _MM_SHUFFLE(1, 0, 3, 2)));
lo_rv = _mm_mullo_epi16(lo_rv, lo_fb);
hi_rv = _mm_mullo_epi16(hi_rv, hi_fb);
rv = _mm_packus_epi16(_mm_srli_epi16(lo_rv, 8), _mm_srli_epi16(hi_rv, 8));
}
else if (pp_ShadInstr == 2) {
//color.rgb=mix(color.rgb,texcol.rgb,texcol.a);
// a bit wrong atm, as it also mixes alphas
__m128i lo_rv = _mm_cvtepu8_epi16(rv);
__m128i hi_rv = _mm_cvtepu8_epi16(_mm_shuffle_epi32(rv, _MM_SHUFFLE(1, 0, 3, 2)));
__m128i lo_fb = _mm_cvtepu8_epi16(textel);
__m128i hi_fb = _mm_cvtepu8_epi16(_mm_shuffle_epi32(textel, _MM_SHUFFLE(1, 0, 3, 2)));
__m128i lo_rv_alpha = _mm_shuffle_epi8(lo_fb, shuffle_alpha);
__m128i hi_rv_alpha = _mm_shuffle_epi8(hi_fb, shuffle_alpha);
__m128i lo_fb_alpha = _mm_sub_epi16(_mm_set1_epi16(255), lo_rv_alpha);
__m128i hi_fb_alpha = _mm_sub_epi16(_mm_set1_epi16(255), hi_rv_alpha);
lo_rv = _mm_mullo_epi16(lo_rv, lo_rv_alpha);
hi_rv = _mm_mullo_epi16(hi_rv, hi_rv_alpha);
lo_fb = _mm_mullo_epi16(lo_fb, lo_fb_alpha);
hi_fb = _mm_mullo_epi16(hi_fb, hi_fb_alpha);
rv = _mm_packus_epi16(_mm_srli_epi16(_mm_adds_epu16(lo_rv, lo_fb), 8), _mm_srli_epi16(_mm_adds_epu16(hi_rv, hi_fb), 8));
}
else if (pp_ShadInstr == 3) {
//color*=texcol
__m128i lo_rv = _mm_cvtepu8_epi16(rv);
__m128i hi_rv = _mm_cvtepu8_epi16(_mm_shuffle_epi32(rv, _MM_SHUFFLE(1, 0, 3, 2)));
__m128i lo_fb = _mm_cvtepu8_epi16(textel);
__m128i hi_fb = _mm_cvtepu8_epi16(_mm_shuffle_epi32(textel, _MM_SHUFFLE(1, 0, 3, 2)));
lo_rv = _mm_mullo_epi16(lo_rv, lo_fb);
hi_rv = _mm_mullo_epi16(hi_rv, hi_fb);
rv = _mm_packus_epi16(_mm_srli_epi16(lo_rv, 8), _mm_srli_epi16(hi_rv, 8));
}
if (pp_Offset) {
//add offset
}
//textadr = _mm_add_epi32(textadr, _mm_setr_epi32(tex_addr, tex_addr, tex_addr, tex_addr));
//rv = textel; // _mm_xor_si128(rv, textadr);
}
}
//__m128i rv=ip.col;//_mm_xor_si128(_mm_cvtps_epi32(_mm_mul_ps(x,Z.c)),_mm_cvtps_epi32(y));
@ -279,18 +391,14 @@ struct softrend : Renderer
if (alpha_blend) {
__m128i fb = *(__m128i*)cb;
#if 0
for (int i = 0; i < 16; i+=4) {
for (int i = 0; i < 16; i += 4) {
u8 src_blend[4] = { rv.m128i_u8[i + 3], rv.m128i_u8[i + 3], rv.m128i_u8[i + 3], rv.m128i_u8[i + 3] };
u8 dst_blend[4] = { 255 - rv.m128i_u8[i + 3], 255 - rv.m128i_u8[i + 3], 255 - rv.m128i_u8[i + 3], 255 - rv.m128i_u8[i + 3] };
for (int j = 0; j < 4; j++) {
rv.m128i_u8[i + j] = (rv.m128i_u8[i + j] * src_blend[j])/256 + (fb.m128i_u8[i + j] * dst_blend[j])/256;
rv.m128i_u8[i + j] = (rv.m128i_u8[i + j] * src_blend[j]) / 256 + (fb.m128i_u8[i + j] * dst_blend[j]) / 256;
}
}
#else
static __m128i shuffle_alpha = {
0x0E, 0x80, 0x0E, 0x80, 0x0E, 0x80, 0x0E, 0x80,
0x06, 0x80, 0x06, 0x80, 0x06, 0x80, 0x06, 0x80
};
__m128i lo_rv = _mm_cvtepu8_epi16(rv);
@ -328,11 +436,18 @@ struct softrend : Renderer
}
*zb = invW;
*(__m128i*)cb = rv;
}
//u32 nok,fok;
TPL_DECL_triangle
static void Rendtriangle(PolyParam* pp, int vertex_offset, const Vertex &v1, const Vertex &v2, const Vertex &v3, u32* colorBuffer)
{
text_info texture = { 0 };
if (pp_Texture) {
texture = raw_GetTexture(pp->tsp, pp->tcw);
}
//u32 nok,fok;
template <bool alpha_blend>
void Rendtriangle(PolyParam* pp, int vertex_offset, const Vertex &v1, const Vertex &v2, const Vertex &v3, u32* colorBuffer)
{
const int stride = 640 * 4;
//Plane equation
@ -395,8 +510,8 @@ struct softrend : Renderer
const int q = 4;
// Bounding rectangle
int minx = iround(mmin(X1, X2, X3, 0) );// +0xF) >> 4;
int miny = iround(mmin(Y1, Y2, Y3, 0) );// +0xF) >> 4;
int minx = iround(mmin(X1, X2, X3, 0));// +0xF) >> 4;
int miny = iround(mmin(Y1, Y2, Y3, 0));// +0xF) >> 4;
// Start in corner of block
minx &= ~(q - 1);
@ -433,9 +548,9 @@ struct softrend : Renderer
const float FDX23mq = FDX23 + FDY23*q;
const float FDX31mq = FDX31 + FDY31*q;
float hs12 = C1 + FDX12 * (miny+0.5f) - FDY12 * (minx+0.5f) + FDqY12 - MIN_12;
float hs23 = C2 + FDX23 * (miny+0.5f) - FDY23 * (minx+0.5f) + FDqY23 - MIN_23;
float hs31 = C3 + FDX31 * (miny+0.5f) - FDY31 * (minx+0.5f) + FDqY31 - MIN_31;
float hs12 = C1 + FDX12 * (miny + 0.5f) - FDY12 * (minx + 0.5f) + FDqY12 - MIN_12;
float hs23 = C2 + FDX23 * (miny + 0.5f) - FDY23 * (minx + 0.5f) + FDqY23 - MIN_23;
float hs31 = C3 + FDX31 * (miny + 0.5f) - FDY31 * (minx + 0.5f) + FDqY31 - MIN_31;
MAX_12 -= MIN_12;
MAX_23 -= MIN_23;
@ -449,7 +564,7 @@ struct softrend : Renderer
u8* cb_y = (u8*)colorBuffer;
cb_y += miny*stride + minx*(q * 4);
ip.Setup(v1, v2, v3, minx, miny, q);
ip.Setup(pp, &texture, v1, v2, v3, minx, miny, q);
__m128 y_ps = _mm_broadcast_float(miny);
__m128 minx_ps = _mm_load_scaled_float(minx - q, 1);
static __declspec(align(16)) float ones_ps[4] = { 1, 1, 1, 1 };
@ -488,7 +603,7 @@ struct softrend : Renderer
__m128 yl_ps = y_ps;
for (int iy = q; iy > 0; iy--)
{
PixelFlush<false, alpha_blend>(x_ps, yl_ps, cb_x, x_ps);
PixelFlush TPL_PRMS_pixel(false) (pp, &texture, x_ps, yl_ps, cb_x, x_ps);
yl_ps = _mm_add_ps(yl_ps, *(__m128*)ones_ps);
cb_x += sizeof(__m128);
}
@ -523,7 +638,7 @@ struct softrend : Renderer
int msk = _mm_movemask_ps((__m128&)a);
if (msk != 0)
{
PixelFlush<true, alpha_blend>(x_ps, yl_ps, cb_x, *(__m128*)&a);
PixelFlush TPL_PRMS_pixel(true) (pp, &texture, x_ps, yl_ps, cb_x, *(__m128*)&a);
}
yl_ps = _mm_add_ps(yl_ps, *(__m128*)ones_ps);
@ -554,8 +669,27 @@ struct softrend : Renderer
cb_y += stride*q;
y_ps = _mm_add_ps(y_ps, *(__m128*)q_ps);
}
}
struct softrend : Renderer
{
virtual bool Process(TA_context* ctx) {
//disable RTTs for now ..
if (ctx->rend.isRTT)
return false;
ctx->rend_inuse.Lock();
ctx->MarkRend();
if (!ta_parse_vdrc(ctx))
return false;
return true;
}
template <bool alpha_blend>
void RenderParamList(List<PolyParam>* param_list) {
@ -572,8 +706,10 @@ struct softrend : Renderer
u16* poly_idx = &idx[params[i].first];
for (int v = 0; v < vertex_count; v++) {
////<alpha_blend, pp_UseAlpha, pp_Texture, pp_IgnoreTexA, pp_ShadInstr, pp_Offset >
RendtriangleFn fn = RendtriangleFns[alpha_blend][params[i].tsp.UseAlpha][params[i].pcw.Texture][params[i].tsp.IgnoreTexA][params[i].tsp.ShadInstr][params[i].pcw.Offset];
Rendtriangle<alpha_blend>(&params[i], v, verts[poly_idx[v]], verts[poly_idx[v + 1]], verts[poly_idx[v + 2]], render_buffer);
fn(&params[i], v, verts[poly_idx[v]], verts[poly_idx[v + 1]], verts[poly_idx[v + 2]], render_buffer);
}
}
}
@ -588,6 +724,9 @@ struct softrend : Renderer
RenderParamList<false>(&pvrrc.global_param_op);
RenderParamList<false>(&pvrrc.global_param_pt);
if (pvrrc.isAutoSort)
SortPParams();
RenderParamList<true>(&pvrrc.global_param_tr);
@ -628,6 +767,150 @@ struct softrend : Renderer
holdBMP = (HBITMAP)SelectObject(hmem, hBMP);
ReleaseDC(hWnd, hdc);
#define REP_16(x) ((x)* 16 + (x))
#define REP_32(x) ((x)* 8 + (x)/4)
#define REP_64(x) ((x)* 4 + (x)/16)
for (int c = 0; c < 65536; c++) {
//565
decoded_colors[0][c] = 0xFF000000 | (REP_32((c >> 11) % 32) << 16) | (REP_64((c >> 5) % 64) << 8) | (REP_32((c >> 0) % 32) << 0);
//1555
decoded_colors[1][c] = ((c >> 0) % 2 * 255 << 24) | (REP_32((c >> 10) % 32) << 16) | (REP_32((c >> 5) % 32) << 8) | (REP_32((c >> 1) % 32) << 0);
//4444
decoded_colors[2][c] = (REP_16((c >> 0) % 16) << 24) | (REP_16((c >> 12) % 16) << 16) | (REP_16((c >> 8) % 16) << 8) | (REP_16((c >> 4) % 16) << 0);
}
{
RendtriangleFns[0][0][1][0][0][0] = &Rendtriangle<0, 0, 1, 0, 0, 0>;
RendtriangleFns[0][0][1][0][0][1] = &Rendtriangle<0, 0, 1, 0, 0, 1>;
RendtriangleFns[0][0][1][0][1][0] = &Rendtriangle<0, 0, 1, 0, 1, 0>;
RendtriangleFns[0][0][1][0][1][1] = &Rendtriangle<0, 0, 1, 0, 1, 1>;
RendtriangleFns[0][0][1][0][2][0] = &Rendtriangle<0, 0, 1, 0, 2, 0>;
RendtriangleFns[0][0][1][0][2][1] = &Rendtriangle<0, 0, 1, 0, 2, 1>;
RendtriangleFns[0][0][1][0][3][0] = &Rendtriangle<0, 0, 1, 0, 3, 0>;
RendtriangleFns[0][0][1][0][3][1] = &Rendtriangle<0, 0, 1, 0, 3, 1>;
RendtriangleFns[0][0][1][1][0][0] = &Rendtriangle<0, 0, 1, 1, 0, 0>;
RendtriangleFns[0][0][1][1][0][1] = &Rendtriangle<0, 0, 1, 1, 0, 1>;
RendtriangleFns[0][0][1][1][1][0] = &Rendtriangle<0, 0, 1, 1, 1, 0>;
RendtriangleFns[0][0][1][1][1][1] = &Rendtriangle<0, 0, 1, 1, 1, 1>;
RendtriangleFns[0][0][1][1][2][0] = &Rendtriangle<0, 0, 1, 1, 2, 0>;
RendtriangleFns[0][0][1][1][2][1] = &Rendtriangle<0, 0, 1, 1, 2, 1>;
RendtriangleFns[0][0][1][1][3][0] = &Rendtriangle<0, 0, 1, 1, 3, 0>;
RendtriangleFns[0][0][1][1][3][1] = &Rendtriangle<0, 0, 1, 1, 3, 1>;
RendtriangleFns[0][0][0][0][0][0] = &Rendtriangle<0, 0, 0, 0, 0, 0>;
RendtriangleFns[0][0][0][0][0][1] = &Rendtriangle<0, 0, 0, 0, 0, 1>;
RendtriangleFns[0][0][0][0][1][0] = &Rendtriangle<0, 0, 0, 0, 1, 0>;
RendtriangleFns[0][0][0][0][1][1] = &Rendtriangle<0, 0, 0, 0, 1, 1>;
RendtriangleFns[0][0][0][0][2][0] = &Rendtriangle<0, 0, 0, 0, 2, 0>;
RendtriangleFns[0][0][0][0][2][1] = &Rendtriangle<0, 0, 0, 0, 2, 1>;
RendtriangleFns[0][0][0][0][3][0] = &Rendtriangle<0, 0, 0, 0, 3, 0>;
RendtriangleFns[0][0][0][0][3][1] = &Rendtriangle<0, 0, 0, 0, 3, 1>;
RendtriangleFns[0][0][0][1][0][0] = &Rendtriangle<0, 0, 0, 1, 0, 0>;
RendtriangleFns[0][0][0][1][0][1] = &Rendtriangle<0, 0, 0, 1, 0, 1>;
RendtriangleFns[0][0][0][1][1][0] = &Rendtriangle<0, 0, 0, 1, 1, 0>;
RendtriangleFns[0][0][0][1][1][1] = &Rendtriangle<0, 0, 0, 1, 1, 1>;
RendtriangleFns[0][0][0][1][2][0] = &Rendtriangle<0, 0, 0, 1, 2, 0>;
RendtriangleFns[0][0][0][1][2][1] = &Rendtriangle<0, 0, 0, 1, 2, 1>;
RendtriangleFns[0][0][0][1][3][0] = &Rendtriangle<0, 0, 0, 1, 3, 0>;
RendtriangleFns[0][0][0][1][3][1] = &Rendtriangle<0, 0, 0, 1, 3, 1>;
RendtriangleFns[0][1][1][0][0][0] = &Rendtriangle<0, 1, 1, 0, 0, 0>;
RendtriangleFns[0][1][1][0][0][1] = &Rendtriangle<0, 1, 1, 0, 0, 1>;
RendtriangleFns[0][1][1][0][1][0] = &Rendtriangle<0, 1, 1, 0, 1, 0>;
RendtriangleFns[0][1][1][0][1][1] = &Rendtriangle<0, 1, 1, 0, 1, 1>;
RendtriangleFns[0][1][1][0][2][0] = &Rendtriangle<0, 1, 1, 0, 2, 0>;
RendtriangleFns[0][1][1][0][2][1] = &Rendtriangle<0, 1, 1, 0, 2, 1>;
RendtriangleFns[0][1][1][0][3][0] = &Rendtriangle<0, 1, 1, 0, 3, 0>;
RendtriangleFns[0][1][1][0][3][1] = &Rendtriangle<0, 1, 1, 0, 3, 1>;
RendtriangleFns[0][1][1][1][0][0] = &Rendtriangle<0, 1, 1, 1, 0, 0>;
RendtriangleFns[0][1][1][1][0][1] = &Rendtriangle<0, 1, 1, 1, 0, 1>;
RendtriangleFns[0][1][1][1][1][0] = &Rendtriangle<0, 1, 1, 1, 1, 0>;
RendtriangleFns[0][1][1][1][1][1] = &Rendtriangle<0, 1, 1, 1, 1, 1>;
RendtriangleFns[0][1][1][1][2][0] = &Rendtriangle<0, 1, 1, 1, 2, 0>;
RendtriangleFns[0][1][1][1][2][1] = &Rendtriangle<0, 1, 1, 1, 2, 1>;
RendtriangleFns[0][1][1][1][3][0] = &Rendtriangle<0, 1, 1, 1, 3, 0>;
RendtriangleFns[0][1][1][1][3][1] = &Rendtriangle<0, 1, 1, 1, 3, 1>;
RendtriangleFns[0][1][0][0][0][0] = &Rendtriangle<0, 1, 0, 0, 0, 0>;
RendtriangleFns[0][1][0][0][0][1] = &Rendtriangle<0, 1, 0, 0, 0, 1>;
RendtriangleFns[0][1][0][0][1][0] = &Rendtriangle<0, 1, 0, 0, 1, 0>;
RendtriangleFns[0][1][0][0][1][1] = &Rendtriangle<0, 1, 0, 0, 1, 1>;
RendtriangleFns[0][1][0][0][2][0] = &Rendtriangle<0, 1, 0, 0, 2, 0>;
RendtriangleFns[0][1][0][0][2][1] = &Rendtriangle<0, 1, 0, 0, 2, 1>;
RendtriangleFns[0][1][0][0][3][0] = &Rendtriangle<0, 1, 0, 0, 3, 0>;
RendtriangleFns[0][1][0][0][3][1] = &Rendtriangle<0, 1, 0, 0, 3, 1>;
RendtriangleFns[0][1][0][1][0][0] = &Rendtriangle<0, 1, 0, 1, 0, 0>;
RendtriangleFns[0][1][0][1][0][1] = &Rendtriangle<0, 1, 0, 1, 0, 1>;
RendtriangleFns[0][1][0][1][1][0] = &Rendtriangle<0, 1, 0, 1, 1, 0>;
RendtriangleFns[0][1][0][1][1][1] = &Rendtriangle<0, 1, 0, 1, 1, 1>;
RendtriangleFns[0][1][0][1][2][0] = &Rendtriangle<0, 1, 0, 1, 2, 0>;
RendtriangleFns[0][1][0][1][2][1] = &Rendtriangle<0, 1, 0, 1, 2, 1>;
RendtriangleFns[0][1][0][1][3][0] = &Rendtriangle<0, 1, 0, 1, 3, 0>;
RendtriangleFns[0][1][0][1][3][1] = &Rendtriangle<0, 1, 0, 1, 3, 1>;
RendtriangleFns[1][0][1][0][0][0] = &Rendtriangle<1, 0, 1, 0, 0, 0>;
RendtriangleFns[1][0][1][0][0][1] = &Rendtriangle<1, 0, 1, 0, 0, 1>;
RendtriangleFns[1][0][1][0][1][0] = &Rendtriangle<1, 0, 1, 0, 1, 0>;
RendtriangleFns[1][0][1][0][1][1] = &Rendtriangle<1, 0, 1, 0, 1, 1>;
RendtriangleFns[1][0][1][0][2][0] = &Rendtriangle<1, 0, 1, 0, 2, 0>;
RendtriangleFns[1][0][1][0][2][1] = &Rendtriangle<1, 0, 1, 0, 2, 1>;
RendtriangleFns[1][0][1][0][3][0] = &Rendtriangle<1, 0, 1, 0, 3, 0>;
RendtriangleFns[1][0][1][0][3][1] = &Rendtriangle<1, 0, 1, 0, 3, 1>;
RendtriangleFns[1][0][1][1][0][0] = &Rendtriangle<1, 0, 1, 1, 0, 0>;
RendtriangleFns[1][0][1][1][0][1] = &Rendtriangle<1, 0, 1, 1, 0, 1>;
RendtriangleFns[1][0][1][1][1][0] = &Rendtriangle<1, 0, 1, 1, 1, 0>;
RendtriangleFns[1][0][1][1][1][1] = &Rendtriangle<1, 0, 1, 1, 1, 1>;
RendtriangleFns[1][0][1][1][2][0] = &Rendtriangle<1, 0, 1, 1, 2, 0>;
RendtriangleFns[1][0][1][1][2][1] = &Rendtriangle<1, 0, 1, 1, 2, 1>;
RendtriangleFns[1][0][1][1][3][0] = &Rendtriangle<1, 0, 1, 1, 3, 0>;
RendtriangleFns[1][0][1][1][3][1] = &Rendtriangle<1, 0, 1, 1, 3, 1>;
RendtriangleFns[1][0][0][0][0][0] = &Rendtriangle<1, 0, 0, 0, 0, 0>;
RendtriangleFns[1][0][0][0][0][1] = &Rendtriangle<1, 0, 0, 0, 0, 1>;
RendtriangleFns[1][0][0][0][1][0] = &Rendtriangle<1, 0, 0, 0, 1, 0>;
RendtriangleFns[1][0][0][0][1][1] = &Rendtriangle<1, 0, 0, 0, 1, 1>;
RendtriangleFns[1][0][0][0][2][0] = &Rendtriangle<1, 0, 0, 0, 2, 0>;
RendtriangleFns[1][0][0][0][2][1] = &Rendtriangle<1, 0, 0, 0, 2, 1>;
RendtriangleFns[1][0][0][0][3][0] = &Rendtriangle<1, 0, 0, 0, 3, 0>;
RendtriangleFns[1][0][0][0][3][1] = &Rendtriangle<1, 0, 0, 0, 3, 1>;
RendtriangleFns[1][0][0][1][0][0] = &Rendtriangle<1, 0, 0, 1, 0, 0>;
RendtriangleFns[1][0][0][1][0][1] = &Rendtriangle<1, 0, 0, 1, 0, 1>;
RendtriangleFns[1][0][0][1][1][0] = &Rendtriangle<1, 0, 0, 1, 1, 0>;
RendtriangleFns[1][0][0][1][1][1] = &Rendtriangle<1, 0, 0, 1, 1, 1>;
RendtriangleFns[1][0][0][1][2][0] = &Rendtriangle<1, 0, 0, 1, 2, 0>;
RendtriangleFns[1][0][0][1][2][1] = &Rendtriangle<1, 0, 0, 1, 2, 1>;
RendtriangleFns[1][0][0][1][3][0] = &Rendtriangle<1, 0, 0, 1, 3, 0>;
RendtriangleFns[1][0][0][1][3][1] = &Rendtriangle<1, 0, 0, 1, 3, 1>;
RendtriangleFns[1][1][1][0][0][0] = &Rendtriangle<1, 1, 1, 0, 0, 0>;
RendtriangleFns[1][1][1][0][0][1] = &Rendtriangle<1, 1, 1, 0, 0, 1>;
RendtriangleFns[1][1][1][0][1][0] = &Rendtriangle<1, 1, 1, 0, 1, 0>;
RendtriangleFns[1][1][1][0][1][1] = &Rendtriangle<1, 1, 1, 0, 1, 1>;
RendtriangleFns[1][1][1][0][2][0] = &Rendtriangle<1, 1, 1, 0, 2, 0>;
RendtriangleFns[1][1][1][0][2][1] = &Rendtriangle<1, 1, 1, 0, 2, 1>;
RendtriangleFns[1][1][1][0][3][0] = &Rendtriangle<1, 1, 1, 0, 3, 0>;
RendtriangleFns[1][1][1][0][3][1] = &Rendtriangle<1, 1, 1, 0, 3, 1>;
RendtriangleFns[1][1][1][1][0][0] = &Rendtriangle<1, 1, 1, 1, 0, 0>;
RendtriangleFns[1][1][1][1][0][1] = &Rendtriangle<1, 1, 1, 1, 0, 1>;
RendtriangleFns[1][1][1][1][1][0] = &Rendtriangle<1, 1, 1, 1, 1, 0>;
RendtriangleFns[1][1][1][1][1][1] = &Rendtriangle<1, 1, 1, 1, 1, 1>;
RendtriangleFns[1][1][1][1][2][0] = &Rendtriangle<1, 1, 1, 1, 2, 0>;
RendtriangleFns[1][1][1][1][2][1] = &Rendtriangle<1, 1, 1, 1, 2, 1>;
RendtriangleFns[1][1][1][1][3][0] = &Rendtriangle<1, 1, 1, 1, 3, 0>;
RendtriangleFns[1][1][1][1][3][1] = &Rendtriangle<1, 1, 1, 1, 3, 1>;
RendtriangleFns[1][1][0][0][0][0] = &Rendtriangle<1, 1, 0, 0, 0, 0>;
RendtriangleFns[1][1][0][0][0][1] = &Rendtriangle<1, 1, 0, 0, 0, 1>;
RendtriangleFns[1][1][0][0][1][0] = &Rendtriangle<1, 1, 0, 0, 1, 0>;
RendtriangleFns[1][1][0][0][1][1] = &Rendtriangle<1, 1, 0, 0, 1, 1>;
RendtriangleFns[1][1][0][0][2][0] = &Rendtriangle<1, 1, 0, 0, 2, 0>;
RendtriangleFns[1][1][0][0][2][1] = &Rendtriangle<1, 1, 0, 0, 2, 1>;
RendtriangleFns[1][1][0][0][3][0] = &Rendtriangle<1, 1, 0, 0, 3, 0>;
RendtriangleFns[1][1][0][0][3][1] = &Rendtriangle<1, 1, 0, 0, 3, 1>;
RendtriangleFns[1][1][0][1][0][0] = &Rendtriangle<1, 1, 0, 1, 0, 0>;
RendtriangleFns[1][1][0][1][0][1] = &Rendtriangle<1, 1, 0, 1, 0, 1>;
RendtriangleFns[1][1][0][1][1][0] = &Rendtriangle<1, 1, 0, 1, 1, 0>;
RendtriangleFns[1][1][0][1][1][1] = &Rendtriangle<1, 1, 0, 1, 1, 1>;
RendtriangleFns[1][1][0][1][2][0] = &Rendtriangle<1, 1, 0, 1, 2, 0>;
RendtriangleFns[1][1][0][1][2][1] = &Rendtriangle<1, 1, 0, 1, 2, 1>;
RendtriangleFns[1][1][0][1][3][0] = &Rendtriangle<1, 1, 0, 1, 3, 0>;
RendtriangleFns[1][1][0][1][3][1] = &Rendtriangle<1, 1, 0, 1, 3, 1>;
}
return true;
}