fix simple texture bleeding case when upscaling. Clean up

This commit is contained in:
Flyinghead 2020-03-11 17:55:13 +01:00
parent 8567c42b30
commit aff2b9e1af
6 changed files with 90 additions and 200 deletions

View File

@ -9,73 +9,6 @@
#include <zlib.h>
#include "deps/crypto/md5.h"
#if FEAT_HAS_NIXPROF
#include "profiler/profiler.h"
#endif
#define FRAME_MD5 0x1
FILE* fLogFrames;
FILE* fCheckFrames;
/*
rendv3 ideas
- multiple backends
- ESish
- OpenGL ES2.0
- OpenGL ES3.0
- OpenGL 3.1
- OpenGL 4.x
- Direct3D 10+ ?
- correct memory ordering model
- resource pools
- threaded ta
- threaded rendering
- rtts
- framebuffers
- overlays
PHASES
- TA submition (memops, dma)
- TA parsing (defered, rend thread)
- CORE render (in-order, defered, rend thread)
submition is done in-order
- Partial handling of TA values
- Gotchas with TA contexts
parsing is done on demand and out-of-order, and might be skipped
- output is only consumed by renderer
render is queued on RENDER_START, and won't stall the emulation or might be skipped
- VRAM integrity is an issue with out-of-order or delayed rendering.
- selective vram snapshots require ta parsing to complete in order with REND_START / REND_END
Complications
- For some apis (gles2, maybe gl31) texture allocation needs to happen on the gpu thread
- multiple versions of different time snapshots of the same texture are required
- ta parsing vs frameskip logic
Texture versioning and staging
A memory copy of the texture can be used to temporary store the texture before upload to vram
This can be moved to another thread
If the api supports async resource creation, we don't need the extra copy
Texcache lookups need to be versioned
rendv2x hacks
- Only a single pending render. Any renders while still pending are dropped (before parsing)
- wait and block for parse/texcache. Render is async
*/
u32 VertexCount=0;
u32 FrameCount=1;
@ -91,8 +24,6 @@ cResetEvent rs, re;
static bool swap_pending;
static bool do_swap;
int max_idx,max_mvo,max_op,max_pt,max_tr,max_vtx,max_modt, ovrn;
static bool render_called = false;
u32 fb_watch_addr_start;
u32 fb_watch_addr_end;
@ -131,7 +62,7 @@ static void dump_frame(const char* file, TA_context* ctx, u8* vram, u8* vram_ref
if (vram_ref) {
src_vram = (u8*)malloc(VRAM_SIZE);
for (int i = 0; i < VRAM_SIZE; i++) {
for (u32 i = 0; i < VRAM_SIZE; i++) {
src_vram[i] = vram[i] ^ vram_ref[i];
}
}
@ -155,7 +86,7 @@ static void dump_frame(const char* file, TA_context* ctx, u8* vram, u8* vram_ref
free(compressed);
fwrite(&ctx->tad.render_pass_count, 1, sizeof(u32), fw);
for (int i = 0; i < ctx->tad.render_pass_count; i++) {
for (u32 i = 0; i < ctx->tad.render_pass_count; i++) {
u32 offset = ctx->tad.render_passes[i] - ctx->tad.thd_root;
fwrite(&offset, 1, sizeof(offset), fw);
}
@ -182,8 +113,8 @@ TA_context* read_frame(const char* file, u8* vram_ref = NULL) {
fclose(fw);
return 0;
}
int sizeofPolyParam = sizeof(PolyParam);
int sizeofVertex = sizeof(Vertex);
u32 sizeofPolyParam = sizeof(PolyParam);
u32 sizeofVertex = sizeof(Vertex);
if (id0[7] == '3')
{
sizeofPolyParam -= 12;
@ -233,7 +164,7 @@ TA_context* read_frame(const char* file, u8* vram_ref = NULL) {
if (fread(&t, 1, sizeof(t), fw) > 0) {
ctx->tad.render_pass_count = t;
for (int i = 0; i < t; i++) {
for (u32 i = 0; i < t; i++) {
u32 offset;
verify(fread(&offset, 1, sizeof(offset), fw) == sizeof(offset));
ctx->tad.render_passes[i] = ctx->tad.thd_root + offset;
@ -276,7 +207,7 @@ static bool rend_frame(TA_context* ctx)
bool rend_single_frame()
{
if (renderer_changed != settings.pvr.rend)
if ((u32)renderer_changed != settings.pvr.rend)
{
rend_term_renderer();
SwitchRenderApi(renderer_changed);
@ -465,90 +396,28 @@ void rend_start_render()
{
bool is_rtt=(FB_W_SOF1& 0x1000000)!=0 && !ctx->rend.isRenderFramebuffer;
if (fLogFrames || fCheckFrames) {
MD5Context md5;
u8 digest[16];
//tactx_Recycle(ctx); ctx = read_frame("frames/dcframe-SoA-intro-tr-autosort");
//printf("REP: %.2f ms\n",render_end_pending_cycles/200000.0);
if (!ctx->rend.isRenderFramebuffer)
FillBGP(ctx);
MD5Init(&md5);
MD5Update(&md5, ctx->tad.thd_root, ctx->tad.End() - ctx->tad.thd_root);
MD5Final(digest, &md5);
ctx->rend.isRTT=is_rtt;
if (fLogFrames) {
fputc(FRAME_MD5, fLogFrames);
fwrite(digest, 1, 16, fLogFrames);
fflush(fLogFrames);
}
ctx->rend.fb_X_CLIP=FB_X_CLIP;
ctx->rend.fb_Y_CLIP=FB_Y_CLIP;
if (fCheckFrames) {
u8 digest2[16];
int ch = fgetc(fCheckFrames);
ctx->rend.fog_clamp_min = FOG_CLAMP_MIN;
ctx->rend.fog_clamp_max = FOG_CLAMP_MAX;
if (ch == EOF) {
INFO_LOG(PVR, "Testing: TA Hash log matches, exiting");
exit(1);
}
verify(ch == FRAME_MD5);
verify(fread(digest2, 1, 16, fCheckFrames) == 16);
verify(memcmp(digest, digest2, 16) == 0);
}
/*
u8* dig = digest;
printf("FRAME: %02X-%02X-%02X-%02X-%02X-%02X-%02X-%02X-%02X-%02X-%02X-%02X-%02X-%02X-%02X-%02X\n",
digest[0], digest[1], digest[2], digest[3], digest[4], digest[5], digest[6], digest[7],
digest[8], digest[9], digest[10], digest[11], digest[12], digest[13], digest[14], digest[15]
);
*/
}
if (!ctx->rend.Overrun)
if (QueueRender(ctx))
{
//tactx_Recycle(ctx); ctx = read_frame("frames/dcframe-SoA-intro-tr-autosort");
//printf("REP: %.2f ms\n",render_end_pending_cycles/200000.0);
if (!ctx->rend.isRenderFramebuffer)
FillBGP(ctx);
ctx->rend.isRTT=is_rtt;
ctx->rend.fb_X_CLIP=FB_X_CLIP;
ctx->rend.fb_Y_CLIP=FB_Y_CLIP;
ctx->rend.fog_clamp_min = FOG_CLAMP_MIN;
ctx->rend.fog_clamp_max = FOG_CLAMP_MAX;
max_idx=max(max_idx,ctx->rend.idx.used());
max_vtx=max(max_vtx,ctx->rend.verts.used());
max_op=max(max_op,ctx->rend.global_param_op.used());
max_pt=max(max_pt,ctx->rend.global_param_pt.used());
max_tr=max(max_tr,ctx->rend.global_param_tr.used());
max_mvo=max(max_mvo,ctx->rend.global_param_mvo.used());
max_modt=max(max_modt,ctx->rend.modtrig.used());
#if defined(_WIN32) && 0
printf("max: idx: %d, vtx: %d, op: %d, pt: %d, tr: %d, mvo: %d, modt: %d, ov: %d\n", max_idx, max_vtx, max_op, max_pt, max_tr, max_mvo, max_modt, ovrn);
#endif
if (QueueRender(ctx))
{
palette_update();
palette_update();
#if !defined(TARGET_NO_THREADS)
rs.Set();
rs.Set();
#else
rend_single_frame();
rend_single_frame();
#endif
pend_rend = true;
}
}
else
{
ovrn++;
INFO_LOG(PVR, "WARNING: Rendering context is overrun (%d), aborting frame", ovrn);
tactx_Recycle(ctx);
pend_rend = true;
}
}
}

View File

@ -277,17 +277,8 @@ void spg_Reset(bool hard)
void SetREP(TA_context* cntx)
{
if (cntx && !cntx->rend.Overrun)
{
VertexCount+= cntx->rend.verts.used();
int render_end_pending_cycles= cntx->rend.verts.used()*60;
//if (render_end_pending_cycles<500000)
render_end_pending_cycles+=500000*3;
sh4_sched_request(render_end_schid,render_end_pending_cycles);
}
if (cntx)
sh4_sched_request(render_end_schid, 500000 * 3);
else
{
sh4_sched_request(render_end_schid, 4096);
}
}

View File

@ -11,7 +11,7 @@
#include "Renderer_if.h"
u32 ta_type_lut[256];
extern int screen_height;
#define TACALL DYNACALL
#ifdef RELEASE
@ -368,22 +368,18 @@ public:
TA_EOL;
}
break;
//32B
case ParamType_User_Tile_Clip:
{
SetTileClip(data->data_32[3]&63,data->data_32[4]&31,data->data_32[5]&63,data->data_32[6]&31);
data+=SZ32;
}
SetTileClip(data->data_32[3] & 63, data->data_32[4] & 31, data->data_32[5] & 63, data->data_32[6] & 31);
data += SZ32;
break;
//32B
case ParamType_Object_List_Set:
{
INFO_LOG(PVR, "Unsupported list type: ParamType_Object_List_Set"); // NAOMI Virtual on Oratorio Tangram
// *cough* ignore it :p
data+=SZ32;
}
INFO_LOG(PVR, "Unsupported list type: ParamType_Object_List_Set"); // NAOMI Virtual on Oratorio Tangram
// *cough* ignore it :p
data += SZ32;
break;
//Global Parameter
@ -691,8 +687,8 @@ public:
memset(FaceOffsColor, 0xff, sizeof(FaceOffsColor));
memset(FaceBaseColor1, 0xff, sizeof(FaceBaseColor1));
memset(FaceOffsColor1, 0xff, sizeof(FaceOffsColor1));
SFaceBaseColor = 0xffffffff;
SFaceOffsColor = 0xffffffff;
SFaceBaseColor = 0;
SFaceOffsColor = 0;
lmr = NULL;
CurrentPP = NULL;
CurrentPPlist = NULL;
@ -1268,11 +1264,6 @@ public:
//cv[indx].base_int=1;\
//cv[indx].offset_int=1;
#define append_sprite_yz(indx,set,st2) \
cv[indx].y=sv->y##set; \
cv[indx].z=sv->z##st2; \
update_fz(sv->z##st2);
#define sprite_uv(indx,u_name,v_name) \
cv[indx].u = f16(sv->u_name);\
cv[indx].v = f16(sv->v_name);
@ -1494,6 +1485,49 @@ FifoSplitter<0> TAFifo0;
int ta_parse_cnt = 0;
static void fix_texture_bleeding(const List<PolyParam> *list)
{
for (const PolyParam *pp = list->head(); pp <= list->LastPtr(); pp++)
{
if (!pp->pcw.Texture || pp->count < 3)
continue;
// Find polygons that are facing the camera (constant z)
// and only use 0 and 1 for U and V (some tolerance around 1 for SA2)
// then apply a half-pixel correction on U and V.
const u32 first = vd_rc.idx.head()[pp->first];
const u32 last = vd_rc.idx.head()[pp->first + pp->count - 1];
bool need_fixing = true;
float z;
for (u32 idx = first; idx <= last && need_fixing; idx++)
{
Vertex& vtx = vd_rc.verts.head()[idx];
if (vtx.u != 0.f && (vtx.u <= 0.995f || vtx.u > 1.f))
need_fixing = false;
else if (vtx.v != 0.f && (vtx.v <= 0.995f || vtx.v > 1.f))
need_fixing = false;
else if (idx == first)
z = vtx.z;
else if (z != vtx.z)
need_fixing = false;
}
if (!need_fixing)
continue;
u32 tex_width = 8 << pp->tsp.TexU;
u32 tex_height = 8 << pp->tsp.TexV;
for (u32 idx = first; idx <= last; idx++)
{
Vertex& vtx = vd_rc.verts.head()[idx];
if (vtx.u > 0.995f)
vtx.u = 1.f;
vtx.u = (0.5f + vtx.u * (tex_width - 1)) / tex_width;
if (vtx.v > 0.995f)
vtx.v = 1.f;
vtx.v = (0.5f + vtx.v * (tex_height - 1)) / tex_height;
}
}
}
/*
Also: gotta stage textures here
*/
@ -1525,6 +1559,9 @@ bool ta_parse_vdrc(TA_context* ctx)
}
while(ta_data<=ta_data_end);
if (ctx->rend.Overrun)
break;
bool empty_pass = vd_rc.global_param_op.used() == (pass == 0 ? 1 : vd_rc.render_passes.LastPtr()->op_count)
&& vd_rc.global_param_pt.used() == (pass == 0 ? 0 : vd_rc.render_passes.LastPtr()->pt_count)
&& vd_rc.global_param_tr.used() == (pass == 0 ? 0 : vd_rc.render_passes.LastPtr()->tr_count);
@ -1541,10 +1578,18 @@ bool ta_parse_vdrc(TA_context* ctx)
render_pass->autosort = UsingAutoSort(pass);
render_pass->z_clear = ClearZBeforePass(pass);
}
if (screen_height > 480)
{
fix_texture_bleeding(&vd_rc.global_param_op);
fix_texture_bleeding(&vd_rc.global_param_pt);
fix_texture_bleeding(&vd_rc.global_param_tr);
}
}
rv = !empty_context;
}
bool overrun = ctx->rend.Overrun;
if (overrun)
WARN_LOG(PVR, "ERROR: TA context overrun");
vd_ctx->rend = vd_rc;
vd_ctx = 0;
@ -1552,7 +1597,7 @@ bool ta_parse_vdrc(TA_context* ctx)
ctx->rend.Overrun = overrun;
return rv;
return rv && !overrun;
}

View File

@ -977,10 +977,7 @@ bool ProcessFrame(TA_context* ctx)
}
TexCache.CollectCleanup();
if (ctx->rend.Overrun)
WARN_LOG(PVR, "ERROR: TA context overrun");
return !ctx->rend.Overrun;
return true;
}
static void upload_vertex_indices()

View File

@ -163,15 +163,9 @@ public:
textureCache.CollectCleanup();
if (ctx->rend.Overrun)
WARN_LOG(PVR, "ERROR: TA context overrun");
result = result && !ctx->rend.Overrun;
if (result)
CheckFogTexture();
if (!result)
else
texCommandPool.EndFrame();
return result;

View File

@ -150,15 +150,9 @@ public:
textureCache.CollectCleanup();
if (ctx->rend.Overrun)
WARN_LOG(PVR, "ERROR: TA context overrun");
result = result && !ctx->rend.Overrun;
if (result)
CheckFogTexture();
if (!result)
else
texCommandPool.EndFrame();
return result;