From aff2b9e1af05ce7f45e5b23578a11dfdfedd7bfd Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Wed, 11 Mar 2020 17:55:13 +0100 Subject: [PATCH] fix simple texture bleeding case when upscaling. Clean up --- core/hw/pvr/Renderer_if.cpp | 171 +++----------------------- core/hw/pvr/spg.cpp | 13 +- core/hw/pvr/ta_vtx.cpp | 85 ++++++++++--- core/rend/gles/gles.cpp | 5 +- core/rend/vulkan/oit/oit_renderer.cpp | 8 +- core/rend/vulkan/vulkan_renderer.cpp | 8 +- 6 files changed, 90 insertions(+), 200 deletions(-) diff --git a/core/hw/pvr/Renderer_if.cpp b/core/hw/pvr/Renderer_if.cpp index 9bd688c70..a14cf2fe6 100644 --- a/core/hw/pvr/Renderer_if.cpp +++ b/core/hw/pvr/Renderer_if.cpp @@ -9,73 +9,6 @@ #include -#include "deps/crypto/md5.h" - -#if FEAT_HAS_NIXPROF -#include "profiler/profiler.h" -#endif - -#define FRAME_MD5 0x1 -FILE* fLogFrames; -FILE* fCheckFrames; - -/* - - rendv3 ideas - - multiple backends - - ESish - - OpenGL ES2.0 - - OpenGL ES3.0 - - OpenGL 3.1 - - OpenGL 4.x - - Direct3D 10+ ? - - correct memory ordering model - - resource pools - - threaded ta - - threaded rendering - - rtts - - framebuffers - - overlays - - - PHASES - - TA submition (memops, dma) - - - TA parsing (defered, rend thread) - - - CORE render (in-order, defered, rend thread) - - - submition is done in-order - - Partial handling of TA values - - Gotchas with TA contexts - - parsing is done on demand and out-of-order, and might be skipped - - output is only consumed by renderer - - render is queued on RENDER_START, and won't stall the emulation or might be skipped - - VRAM integrity is an issue with out-of-order or delayed rendering. - - selective vram snapshots require ta parsing to complete in order with REND_START / REND_END - - - Complications - - For some apis (gles2, maybe gl31) texture allocation needs to happen on the gpu thread - - multiple versions of different time snapshots of the same texture are required - - ta parsing vs frameskip logic - - - Texture versioning and staging - A memory copy of the texture can be used to temporary store the texture before upload to vram - This can be moved to another thread - If the api supports async resource creation, we don't need the extra copy - Texcache lookups need to be versioned - - - rendv2x hacks - - Only a single pending render. Any renders while still pending are dropped (before parsing) - - wait and block for parse/texcache. Render is async -*/ - u32 VertexCount=0; u32 FrameCount=1; @@ -91,8 +24,6 @@ cResetEvent rs, re; static bool swap_pending; static bool do_swap; -int max_idx,max_mvo,max_op,max_pt,max_tr,max_vtx,max_modt, ovrn; - static bool render_called = false; u32 fb_watch_addr_start; u32 fb_watch_addr_end; @@ -131,7 +62,7 @@ static void dump_frame(const char* file, TA_context* ctx, u8* vram, u8* vram_ref if (vram_ref) { src_vram = (u8*)malloc(VRAM_SIZE); - for (int i = 0; i < VRAM_SIZE; i++) { + for (u32 i = 0; i < VRAM_SIZE; i++) { src_vram[i] = vram[i] ^ vram_ref[i]; } } @@ -155,7 +86,7 @@ static void dump_frame(const char* file, TA_context* ctx, u8* vram, u8* vram_ref free(compressed); fwrite(&ctx->tad.render_pass_count, 1, sizeof(u32), fw); - for (int i = 0; i < ctx->tad.render_pass_count; i++) { + for (u32 i = 0; i < ctx->tad.render_pass_count; i++) { u32 offset = ctx->tad.render_passes[i] - ctx->tad.thd_root; fwrite(&offset, 1, sizeof(offset), fw); } @@ -182,8 +113,8 @@ TA_context* read_frame(const char* file, u8* vram_ref = NULL) { fclose(fw); return 0; } - int sizeofPolyParam = sizeof(PolyParam); - int sizeofVertex = sizeof(Vertex); + u32 sizeofPolyParam = sizeof(PolyParam); + u32 sizeofVertex = sizeof(Vertex); if (id0[7] == '3') { sizeofPolyParam -= 12; @@ -233,7 +164,7 @@ TA_context* read_frame(const char* file, u8* vram_ref = NULL) { if (fread(&t, 1, sizeof(t), fw) > 0) { ctx->tad.render_pass_count = t; - for (int i = 0; i < t; i++) { + for (u32 i = 0; i < t; i++) { u32 offset; verify(fread(&offset, 1, sizeof(offset), fw) == sizeof(offset)); ctx->tad.render_passes[i] = ctx->tad.thd_root + offset; @@ -276,7 +207,7 @@ static bool rend_frame(TA_context* ctx) bool rend_single_frame() { - if (renderer_changed != settings.pvr.rend) + if ((u32)renderer_changed != settings.pvr.rend) { rend_term_renderer(); SwitchRenderApi(renderer_changed); @@ -465,90 +396,28 @@ void rend_start_render() { bool is_rtt=(FB_W_SOF1& 0x1000000)!=0 && !ctx->rend.isRenderFramebuffer; - if (fLogFrames || fCheckFrames) { - MD5Context md5; - u8 digest[16]; + //tactx_Recycle(ctx); ctx = read_frame("frames/dcframe-SoA-intro-tr-autosort"); + //printf("REP: %.2f ms\n",render_end_pending_cycles/200000.0); + if (!ctx->rend.isRenderFramebuffer) + FillBGP(ctx); - MD5Init(&md5); - MD5Update(&md5, ctx->tad.thd_root, ctx->tad.End() - ctx->tad.thd_root); - MD5Final(digest, &md5); + ctx->rend.isRTT=is_rtt; - if (fLogFrames) { - fputc(FRAME_MD5, fLogFrames); - fwrite(digest, 1, 16, fLogFrames); - fflush(fLogFrames); - } + ctx->rend.fb_X_CLIP=FB_X_CLIP; + ctx->rend.fb_Y_CLIP=FB_Y_CLIP; - if (fCheckFrames) { - u8 digest2[16]; - int ch = fgetc(fCheckFrames); + ctx->rend.fog_clamp_min = FOG_CLAMP_MIN; + ctx->rend.fog_clamp_max = FOG_CLAMP_MAX; - if (ch == EOF) { - INFO_LOG(PVR, "Testing: TA Hash log matches, exiting"); - exit(1); - } - - verify(ch == FRAME_MD5); - - verify(fread(digest2, 1, 16, fCheckFrames) == 16); - - verify(memcmp(digest, digest2, 16) == 0); - - - } - - /* - u8* dig = digest; - printf("FRAME: %02X-%02X-%02X-%02X-%02X-%02X-%02X-%02X-%02X-%02X-%02X-%02X-%02X-%02X-%02X-%02X\n", - digest[0], digest[1], digest[2], digest[3], digest[4], digest[5], digest[6], digest[7], - digest[8], digest[9], digest[10], digest[11], digest[12], digest[13], digest[14], digest[15] - ); - */ - } - - if (!ctx->rend.Overrun) + if (QueueRender(ctx)) { - //tactx_Recycle(ctx); ctx = read_frame("frames/dcframe-SoA-intro-tr-autosort"); - //printf("REP: %.2f ms\n",render_end_pending_cycles/200000.0); - if (!ctx->rend.isRenderFramebuffer) - FillBGP(ctx); - - ctx->rend.isRTT=is_rtt; - - ctx->rend.fb_X_CLIP=FB_X_CLIP; - ctx->rend.fb_Y_CLIP=FB_Y_CLIP; - - ctx->rend.fog_clamp_min = FOG_CLAMP_MIN; - ctx->rend.fog_clamp_max = FOG_CLAMP_MAX; - - max_idx=max(max_idx,ctx->rend.idx.used()); - max_vtx=max(max_vtx,ctx->rend.verts.used()); - max_op=max(max_op,ctx->rend.global_param_op.used()); - max_pt=max(max_pt,ctx->rend.global_param_pt.used()); - max_tr=max(max_tr,ctx->rend.global_param_tr.used()); - - max_mvo=max(max_mvo,ctx->rend.global_param_mvo.used()); - max_modt=max(max_modt,ctx->rend.modtrig.used()); - -#if defined(_WIN32) && 0 - printf("max: idx: %d, vtx: %d, op: %d, pt: %d, tr: %d, mvo: %d, modt: %d, ov: %d\n", max_idx, max_vtx, max_op, max_pt, max_tr, max_mvo, max_modt, ovrn); -#endif - if (QueueRender(ctx)) - { - palette_update(); + palette_update(); #if !defined(TARGET_NO_THREADS) - rs.Set(); + rs.Set(); #else - rend_single_frame(); + rend_single_frame(); #endif - pend_rend = true; - } - } - else - { - ovrn++; - INFO_LOG(PVR, "WARNING: Rendering context is overrun (%d), aborting frame", ovrn); - tactx_Recycle(ctx); + pend_rend = true; } } } diff --git a/core/hw/pvr/spg.cpp b/core/hw/pvr/spg.cpp index 476fbc4bf..a0f8837ed 100755 --- a/core/hw/pvr/spg.cpp +++ b/core/hw/pvr/spg.cpp @@ -277,17 +277,8 @@ void spg_Reset(bool hard) void SetREP(TA_context* cntx) { - if (cntx && !cntx->rend.Overrun) - { - VertexCount+= cntx->rend.verts.used(); - int render_end_pending_cycles= cntx->rend.verts.used()*60; - //if (render_end_pending_cycles<500000) - render_end_pending_cycles+=500000*3; - - sh4_sched_request(render_end_schid,render_end_pending_cycles); - } + if (cntx) + sh4_sched_request(render_end_schid, 500000 * 3); else - { sh4_sched_request(render_end_schid, 4096); - } } diff --git a/core/hw/pvr/ta_vtx.cpp b/core/hw/pvr/ta_vtx.cpp index 5ce3a34d5..a035ab278 100644 --- a/core/hw/pvr/ta_vtx.cpp +++ b/core/hw/pvr/ta_vtx.cpp @@ -11,7 +11,7 @@ #include "Renderer_if.h" u32 ta_type_lut[256]; - +extern int screen_height; #define TACALL DYNACALL #ifdef RELEASE @@ -368,22 +368,18 @@ public: TA_EOL; } break; + //32B case ParamType_User_Tile_Clip: - { - - SetTileClip(data->data_32[3]&63,data->data_32[4]&31,data->data_32[5]&63,data->data_32[6]&31); - data+=SZ32; - } + SetTileClip(data->data_32[3] & 63, data->data_32[4] & 31, data->data_32[5] & 63, data->data_32[6] & 31); + data += SZ32; break; + //32B case ParamType_Object_List_Set: - { - INFO_LOG(PVR, "Unsupported list type: ParamType_Object_List_Set"); // NAOMI Virtual on Oratorio Tangram - - // *cough* ignore it :p - data+=SZ32; - } + INFO_LOG(PVR, "Unsupported list type: ParamType_Object_List_Set"); // NAOMI Virtual on Oratorio Tangram + // *cough* ignore it :p + data += SZ32; break; //Global Parameter @@ -691,8 +687,8 @@ public: memset(FaceOffsColor, 0xff, sizeof(FaceOffsColor)); memset(FaceBaseColor1, 0xff, sizeof(FaceBaseColor1)); memset(FaceOffsColor1, 0xff, sizeof(FaceOffsColor1)); - SFaceBaseColor = 0xffffffff; - SFaceOffsColor = 0xffffffff; + SFaceBaseColor = 0; + SFaceOffsColor = 0; lmr = NULL; CurrentPP = NULL; CurrentPPlist = NULL; @@ -1268,11 +1264,6 @@ public: //cv[indx].base_int=1;\ //cv[indx].offset_int=1; - #define append_sprite_yz(indx,set,st2) \ - cv[indx].y=sv->y##set; \ - cv[indx].z=sv->z##st2; \ - update_fz(sv->z##st2); - #define sprite_uv(indx,u_name,v_name) \ cv[indx].u = f16(sv->u_name);\ cv[indx].v = f16(sv->v_name); @@ -1494,6 +1485,49 @@ FifoSplitter<0> TAFifo0; int ta_parse_cnt = 0; +static void fix_texture_bleeding(const List *list) +{ + for (const PolyParam *pp = list->head(); pp <= list->LastPtr(); pp++) + { + if (!pp->pcw.Texture || pp->count < 3) + continue; + // Find polygons that are facing the camera (constant z) + // and only use 0 and 1 for U and V (some tolerance around 1 for SA2) + // then apply a half-pixel correction on U and V. + const u32 first = vd_rc.idx.head()[pp->first]; + const u32 last = vd_rc.idx.head()[pp->first + pp->count - 1]; + bool need_fixing = true; + float z; + for (u32 idx = first; idx <= last && need_fixing; idx++) + { + Vertex& vtx = vd_rc.verts.head()[idx]; + + if (vtx.u != 0.f && (vtx.u <= 0.995f || vtx.u > 1.f)) + need_fixing = false; + else if (vtx.v != 0.f && (vtx.v <= 0.995f || vtx.v > 1.f)) + need_fixing = false; + else if (idx == first) + z = vtx.z; + else if (z != vtx.z) + need_fixing = false; + } + if (!need_fixing) + continue; + u32 tex_width = 8 << pp->tsp.TexU; + u32 tex_height = 8 << pp->tsp.TexV; + for (u32 idx = first; idx <= last; idx++) + { + Vertex& vtx = vd_rc.verts.head()[idx]; + if (vtx.u > 0.995f) + vtx.u = 1.f; + vtx.u = (0.5f + vtx.u * (tex_width - 1)) / tex_width; + if (vtx.v > 0.995f) + vtx.v = 1.f; + vtx.v = (0.5f + vtx.v * (tex_height - 1)) / tex_height; + } + } +} + /* Also: gotta stage textures here */ @@ -1525,6 +1559,9 @@ bool ta_parse_vdrc(TA_context* ctx) } while(ta_data<=ta_data_end); + if (ctx->rend.Overrun) + break; + bool empty_pass = vd_rc.global_param_op.used() == (pass == 0 ? 1 : vd_rc.render_passes.LastPtr()->op_count) && vd_rc.global_param_pt.used() == (pass == 0 ? 0 : vd_rc.render_passes.LastPtr()->pt_count) && vd_rc.global_param_tr.used() == (pass == 0 ? 0 : vd_rc.render_passes.LastPtr()->tr_count); @@ -1541,10 +1578,18 @@ bool ta_parse_vdrc(TA_context* ctx) render_pass->autosort = UsingAutoSort(pass); render_pass->z_clear = ClearZBeforePass(pass); } + if (screen_height > 480) + { + fix_texture_bleeding(&vd_rc.global_param_op); + fix_texture_bleeding(&vd_rc.global_param_pt); + fix_texture_bleeding(&vd_rc.global_param_tr); + } } rv = !empty_context; } bool overrun = ctx->rend.Overrun; + if (overrun) + WARN_LOG(PVR, "ERROR: TA context overrun"); vd_ctx->rend = vd_rc; vd_ctx = 0; @@ -1552,7 +1597,7 @@ bool ta_parse_vdrc(TA_context* ctx) ctx->rend.Overrun = overrun; - return rv; + return rv && !overrun; } diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index af7e5d143..7949e92de 100644 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -977,10 +977,7 @@ bool ProcessFrame(TA_context* ctx) } TexCache.CollectCleanup(); - if (ctx->rend.Overrun) - WARN_LOG(PVR, "ERROR: TA context overrun"); - - return !ctx->rend.Overrun; + return true; } static void upload_vertex_indices() diff --git a/core/rend/vulkan/oit/oit_renderer.cpp b/core/rend/vulkan/oit/oit_renderer.cpp index f67c17530..95c500326 100644 --- a/core/rend/vulkan/oit/oit_renderer.cpp +++ b/core/rend/vulkan/oit/oit_renderer.cpp @@ -163,15 +163,9 @@ public: textureCache.CollectCleanup(); - if (ctx->rend.Overrun) - WARN_LOG(PVR, "ERROR: TA context overrun"); - - result = result && !ctx->rend.Overrun; - if (result) CheckFogTexture(); - - if (!result) + else texCommandPool.EndFrame(); return result; diff --git a/core/rend/vulkan/vulkan_renderer.cpp b/core/rend/vulkan/vulkan_renderer.cpp index 238c8cf1e..6cbfad324 100644 --- a/core/rend/vulkan/vulkan_renderer.cpp +++ b/core/rend/vulkan/vulkan_renderer.cpp @@ -150,15 +150,9 @@ public: textureCache.CollectCleanup(); - if (ctx->rend.Overrun) - WARN_LOG(PVR, "ERROR: TA context overrun"); - - result = result && !ctx->rend.Overrun; - if (result) CheckFogTexture(); - - if (!result) + else texCommandPool.EndFrame(); return result;