pvr: use render pass # to read the right tile when marking blocks

Fixes Unreal Tournament flashing/black screen.
gl: mark vram area in rtt to avoid overwriting data (Worms World Party)
Force Worms World Party to use CopyToVRam (regression due to
6a5db32d5d)
Don't calculate precise aspect ratio and use 4/3 instead (or more if
widescreen or stretch).
Fix clipping issue when using SCALER_CTL.vscalefactor. Avoir crash when
clip values are >= width or height.
vulkan: RenderFramebuffer wasn't working at all if !EmulateFramebuffer
This commit is contained in:
Flyinghead 2022-10-30 12:10:24 +01:00
parent b935bef906
commit 40dbf53792
20 changed files with 129 additions and 71 deletions

View File

@ -93,7 +93,9 @@ static void loadSpecialSettings()
// JSR (JP)
|| prod_id == "HDR-0078"
// JSR (EU)
|| prod_id == "MK-5105850")
|| prod_id == "MK-5105850"
// Worms World Party
|| prod_id == "T7016D 50")
{
INFO_LOG(BOOT, "Enabling RTT Copy to VRAM for game %s", prod_id.c_str());
config::RenderToTextureBuffer.override(true);

View File

@ -176,7 +176,7 @@ private:
#ifdef LIBRETRO
if (renderToScreen)
retro_resize_renderer(_pvrrc->rend.framebufferWidth, _pvrrc->rend.framebufferHeight,
getOutputFramebufferAspectRatio(_pvrrc->rend));
getOutputFramebufferAspectRatio());
#endif
bool proc = renderer->Process(_pvrrc);
if (!proc || renderToScreen)
@ -330,8 +330,12 @@ void rend_start_render()
linkedCtx->nextContext = tactx_Pop(addresses[i]);
if (linkedCtx->nextContext != nullptr)
linkedCtx = linkedCtx->nextContext;
else
INFO_LOG(PVR, "rend_start_render: Context%d @ %x not found", i, addresses[i]);
}
}
else
INFO_LOG(PVR, "rend_start_render: Context0 @ %x not found", addresses[0]);
}
scheduleRenderDone(ctx);

View File

@ -29,6 +29,7 @@
// ta.cpp
extern u8 ta_fsm[2049]; //[2048] stores the current state
extern u32 ta_fsm_cl;
extern u32 taRenderPass;
// pvr_regs.cpp
extern bool fog_needs_update;
extern bool pal_needs_update;
@ -72,6 +73,7 @@ void serialize(Serializer& ser)
ser << ta_fsm[2048];
ser << ta_fsm_cl;
ser << taRenderPass;
SerializeTAContext(ser);
@ -106,6 +108,10 @@ void deserialize(Deserializer& deser)
deser >> ta_fsm[2048];
deser >> ta_fsm_cl;
if (deser.version() >= Deserializer::V29)
deser >> taRenderPass;
else
taRenderPass = 0;
if (deser.version() >= Deserializer::V5_LIBRETRO && deser.version() < Deserializer::V9_LIBRETRO)
{
deser.skip<bool>(); // pal_needs_update

View File

@ -137,7 +137,7 @@ void pvr_WriteReg(u32 paddr,u32 data)
case TA_LIST_INIT_addr:
if (data >> 31)
{
ta_vtx_ListInit();
ta_vtx_ListInit(false);
TA_NEXT_OPB = TA_NEXT_OPB_INIT;
TA_ITP_CURRENT = TA_ISP_BASE;
}
@ -150,7 +150,7 @@ void pvr_WriteReg(u32 paddr,u32 data)
case TA_LIST_CONT_addr:
//a write of anything works ?
ta_vtx_ListInit();
ta_vtx_ListInit(true);
break;
case SPG_CONTROL_addr:

View File

@ -72,6 +72,7 @@ enum ta_state
u8 ta_fsm[2049]; //[2048] stores the current state
u32 ta_fsm_cl=7;
u32 taRenderPass;
static void fill_fsm(ta_state st, s8 pt, s8 obj, ta_state next, u32 proc=0, u32 sz64=0)
{
@ -465,35 +466,39 @@ static u32 opbSize(int n)
return n == 0 ? 0 : 16 << n;
}
static void markObjectListBlocks()
static void markObjectListBlocks(int renderPass)
{
u32 addr;
u32 tile_size;
getRegionTileAddrAndSize(addr, tile_size);
addr += tile_size * renderPass;
// Read the opaque pointer of the first tile and check that it's non-null (Naomi doom)
u32 opbAddr = pvr_read32p<u32>(addr + 4);
bool emptyOpaqueList = (opbAddr & 0x80000000) != 0;
addr = TA_OL_BASE;
const int tileCount = (TA_GLOB_TILE_CLIP.tile_y_num + 1) * (TA_GLOB_TILE_CLIP.tile_x_num + 1);
// opaque
u32 opBlockSize = opbSize(TA_ALLOC_CTRL & 3);
if (emptyOpaqueList)
addr += opBlockSize * (TA_GLOB_TILE_CLIP.tile_y_num + 1) * (TA_GLOB_TILE_CLIP.tile_x_num + 1);
addr += opBlockSize * tileCount;
if (opBlockSize == 0 || emptyOpaqueList)
{
// skip modvols OPBs
addr += opbSize((TA_ALLOC_CTRL >> 4) & 3) * (TA_GLOB_TILE_CLIP.tile_y_num + 1) * (TA_GLOB_TILE_CLIP.tile_x_num + 1);
addr += opbSize((TA_ALLOC_CTRL >> 4) & 3) * tileCount;
// transparent
opBlockSize = opbSize((TA_ALLOC_CTRL >> 8) & 3);
if (opBlockSize == 0)
{
// skip TR modvols OPBs
addr += opbSize((TA_ALLOC_CTRL >> 12) & 3) * (TA_GLOB_TILE_CLIP.tile_y_num + 1) * (TA_GLOB_TILE_CLIP.tile_x_num + 1);
addr += opbSize((TA_ALLOC_CTRL >> 12) & 3) * tileCount;
// punch-through
opBlockSize = opbSize((TA_ALLOC_CTRL >> 16) & 3);
if (opBlockSize == 0)
if (opBlockSize == 0) {
INFO_LOG(PVR, "markObjectListBlocks: all lists are empty");
return;
}
}
}
for (int y = 0; y <= TA_GLOB_TILE_CLIP.tile_y_num; y++)
@ -504,11 +509,15 @@ static void markObjectListBlocks()
}
}
void ta_vtx_ListInit()
void ta_vtx_ListInit(bool continuation)
{
if (!continuation)
taRenderPass = 0;
else
taRenderPass++;
SetCurrentTARC(TA_OL_BASE);
ta_tad.ClearPartial();
markObjectListBlocks();
markObjectListBlocks(taRenderPass);
ta_cur_state = TAS_NS;
ta_fsm_cl = 7;

View File

@ -8,7 +8,7 @@ constexpr u32 SZ64 = 2;
struct TA_context;
void ta_vtx_ListInit();
void ta_vtx_ListInit(bool continuation);
void ta_vtx_SoftReset();
void DYNACALL ta_vtx_data32(const SQBuffer *data);

View File

@ -443,7 +443,7 @@ bool DX11Renderer::Render()
}
else
{
aspectRatio = getOutputFramebufferAspectRatio(pvrrc);
aspectRatio = getOutputFramebufferAspectRatio();
#ifndef LIBRETRO
deviceContext->OMSetRenderTargets(1, &theDX11Context.getRenderTarget().get(), nullptr);
displayFramebuffer();
@ -1197,6 +1197,8 @@ void DX11Renderer::writeFramebufferToVRAM()
yscale = 1.f;
ComPtr<ID3D11Texture2D> fbTexture = fbTex;
FB_X_CLIP_type xClip = pvrrc.fb_X_CLIP;
FB_Y_CLIP_type yClip = pvrrc.fb_Y_CLIP;
if (xscale != 1.f || yscale != 1.f)
{
@ -1236,6 +1238,11 @@ void DX11Renderer::writeFramebufferToVRAM()
width = scaledW;
height = scaledH;
fbTexture = fbScaledTexture;
// FB_Y_CLIP is applied before vscalefactor if > 1, so it must be scaled here
if (yscale > 1) {
yClip.min = std::round(yClip.min * yscale);
yClip.max = std::round(yClip.max * yscale);
}
}
u32 texAddress = pvrrc.fb_W_SOF1 & VRAM_MASK; // TODO SCALER_CTL.interlace, SCALER_CTL.fieldselect
u32 linestride = pvrrc.fb_W_LINESTRIDE * 8;
@ -1281,7 +1288,11 @@ void DX11Renderer::writeFramebufferToVRAM()
}
deviceContext->Unmap(stagingTex, 0);
WriteFramebuffer<2, 1, 0, 3>(width, height, (u8 *)tmp_buf.data(), texAddress, pvrrc.fb_W_CTRL, linestride, pvrrc.fb_X_CLIP, pvrrc.fb_Y_CLIP);
xClip.min = std::min(xClip.min, width - 1);
xClip.max = std::min(xClip.max, width - 1);
yClip.min = std::min(yClip.min, height - 1);
yClip.max = std::min(yClip.max, height - 1);
WriteFramebuffer<2, 1, 0, 3>(width, height, (u8 *)tmp_buf.data(), texAddress, pvrrc.fb_W_CTRL, linestride, xClip, yClip);
}
Renderer *rend_DirectX11()

View File

@ -672,7 +672,7 @@ struct DX11OITRenderer : public DX11Renderer
}
else
{
aspectRatio = getOutputFramebufferAspectRatio(pvrrc);
aspectRatio = getOutputFramebufferAspectRatio();
#ifndef LIBRETRO
deviceContext->OMSetRenderTargets(1, &theDX11Context.getRenderTarget().get(), nullptr);
displayFramebuffer();

View File

@ -1089,7 +1089,7 @@ bool D3DRenderer::Render()
}
else
{
aspectRatio = getOutputFramebufferAspectRatio(pvrrc);
aspectRatio = getOutputFramebufferAspectRatio();
displayFramebuffer();
DrawOSD(false);
frameRendered = true;
@ -1250,6 +1250,8 @@ void D3DRenderer::writeFramebufferToVRAM()
yscale = 1.f;
ComPtr<IDirect3DSurface9> fbSurface = framebufferSurface;
FB_X_CLIP_type xClip = pvrrc.fb_X_CLIP;
FB_Y_CLIP_type yClip = pvrrc.fb_Y_CLIP;
if (xscale != 1.f || yscale != 1.f)
{
@ -1276,6 +1278,11 @@ void D3DRenderer::writeFramebufferToVRAM()
width = scaledW;
height = scaledH;
fbSurface = fbScaledSurface;
// FB_Y_CLIP is applied before vscalefactor if > 1, so it must be scaled here
if (yscale > 1) {
yClip.min = std::round(yClip.min * yscale);
yClip.max = std::round(yClip.max * yscale);
}
}
u32 texAddress = pvrrc.fb_W_SOF1 & VRAM_MASK; // TODO SCALER_CTL.interlace, SCALER_CTL.fieldselect
u32 linestride = pvrrc.fb_W_LINESTRIDE * 8;
@ -1305,7 +1312,11 @@ void D3DRenderer::writeFramebufferToVRAM()
}
verifyWin(offscreenSurface->UnlockRect());
WriteFramebuffer<2, 1, 0, 3>(width, height, (u8 *)tmp_buf.data(), texAddress, pvrrc.fb_W_CTRL, linestride, pvrrc.fb_X_CLIP, pvrrc.fb_Y_CLIP);
xClip.min = std::min(xClip.min, width - 1);
xClip.max = std::min(xClip.max, width - 1);
yClip.min = std::min(yClip.min, height - 1);
yClip.max = std::min(yClip.max, height - 1);
WriteFramebuffer<2, 1, 0, 3>(width, height, (u8 *)tmp_buf.data(), texAddress, pvrrc.fb_W_CTRL, linestride, xClip, yClip);
}
Renderer* rend_DirectX9()

View File

@ -941,7 +941,7 @@ static bool RenderFrame(int width, int height)
writeFramebufferToVRAM();
#ifndef LIBRETRO
else {
gl.ofbo.aspectRatio = getOutputFramebufferAspectRatio(_pvrrc->rend);
gl.ofbo.aspectRatio = getOutputFramebufferAspectRatio();
render_output_framebuffer();
}
#endif

View File

@ -747,6 +747,8 @@ void writeFramebufferToVRAM()
float yscale = 1024.f / pvrrc.scaler_ctl.vscalefactor;
if (std::abs(yscale - 1.f) < 0.01)
yscale = 1.f;
FB_X_CLIP_type xClip = pvrrc.fb_X_CLIP;
FB_Y_CLIP_type yClip = pvrrc.fb_Y_CLIP;
if (xscale != 1.f || yscale != 1.f)
{
@ -761,6 +763,7 @@ void writeFramebufferToVRAM()
gl.ofbo.framebuffer->bind(GL_READ_FRAMEBUFFER);
gl.fbscaling.framebuffer->bind(GL_DRAW_FRAMEBUFFER);
glcache.Disable(GL_SCISSOR_TEST);
glBlitFramebuffer(0, 0, width, height,
0, 0, scaledW, scaledH,
GL_COLOR_BUFFER_BIT, GL_LINEAR);
@ -768,6 +771,11 @@ void writeFramebufferToVRAM()
width = scaledW;
height = scaledH;
// FB_Y_CLIP is applied before vscalefactor if > 1, so it must be scaled here
if (yscale > 1) {
yClip.min = std::round(yClip.min * yscale);
yClip.max = std::round(yClip.max * yscale);
}
}
u32 tex_addr = pvrrc.fb_W_SOF1 & VRAM_MASK; // TODO SCALER_CTL.interlace, SCALER_CTL.fieldselect
@ -780,7 +788,11 @@ void writeFramebufferToVRAM()
u8 *p = (u8 *)tmp_buf.data();
glReadPixels(0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, p);
WriteFramebuffer(width, height, p, tex_addr, pvrrc.fb_W_CTRL, linestride, pvrrc.fb_X_CLIP, pvrrc.fb_Y_CLIP);
xClip.min = std::min(xClip.min, width - 1);
xClip.max = std::min(xClip.max, width - 1);
yClip.min = std::min(yClip.min, height - 1);
yClip.max = std::min(yClip.max, height - 1);
WriteFramebuffer(width, height, p, tex_addr, pvrrc.fb_W_CTRL, linestride, xClip, yClip);
glBindFramebuffer(GL_FRAMEBUFFER, gl.ofbo.origFbo);
glCheck();

View File

@ -1382,7 +1382,7 @@ bool RenderFrame(int width, int height)
writeFramebufferToVRAM();
#ifndef LIBRETRO
else {
gl.ofbo.aspectRatio = getOutputFramebufferAspectRatio(_pvrrc->rend);
gl.ofbo.aspectRatio = getOutputFramebufferAspectRatio();
render_output_framebuffer();
}
#endif

View File

@ -199,6 +199,8 @@ GLuint BindRTT(bool withDepthBuffer)
return gl.rtt.framebuffer->getFramebuffer();
}
constexpr u32 MAGIC_NUMBER = 0xbaadf00d;
void ReadRTTBuffer()
{
u32 w = pvrrc.getFramebufferWidth();
@ -242,6 +244,8 @@ void ReadRTTBuffer()
{
gl.rtt.directXfer = true;
glReadPixels(0, 0, w, h, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, dst);
if (dst == nullptr)
*(u32 *)&vram[tex_addr] = MAGIC_NUMBER;
}
else
{
@ -250,6 +254,7 @@ void ReadRTTBuffer()
{
gl.rtt.fb_w_ctrl = pvrrc.fb_W_CTRL;
glReadPixels(0, 0, w, h, GL_RGBA, GL_UNSIGNED_BYTE, 0);
*(u32 *)&vram[tex_addr] = MAGIC_NUMBER;
}
else
{
@ -303,13 +308,16 @@ static void readAsyncPixelBuffer(u32 addr)
return;
}
u16 *dst = (u16 *)&vram[tex_addr];
if (gl.rtt.directXfer)
// Can be read directly into vram
memcpy(dst, ptr, gl.rtt.width * gl.rtt.height * 2);
else
WriteTextureToVRam(gl.rtt.width, gl.rtt.height, ptr, dst, gl.rtt.fb_w_ctrl, gl.rtt.linestride);
// Make sure the vram region hasn't been overwritten already, otherwise we skip the copy
// (Worms World Party intro)
if (*(u32 *)dst == MAGIC_NUMBER)
{
if (gl.rtt.directXfer)
// Can be read directly into vram
memcpy(dst, ptr, gl.rtt.width * gl.rtt.height * 2);
else
WriteTextureToVRam(gl.rtt.width, gl.rtt.height, ptr, dst, gl.rtt.fb_w_ctrl, gl.rtt.linestride);
}
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
#endif

View File

@ -226,8 +226,13 @@ private:
{
if (renderingContext->scaler_ctl.hscale)
scale_x *= 2.f;
// vscalefactor is applied after scissoring if > 1
if (renderingContext->scaler_ctl.vscalefactor > 0x401 || renderingContext->scaler_ctl.vscalefactor < 0x400)
scale_y *= renderingContext->scaler_ctl.vscalefactor / 1024.f;
{
float vscalefactor = 1024.f / renderingContext->scaler_ctl.vscalefactor;
if (vscalefactor < 1)
scale_y /= vscalefactor;
}
}
}
@ -264,44 +269,28 @@ inline static void getScaledFramebufferSize(const rend_context& rendCtx, int& wi
}
}
inline static float getOutputFramebufferAspectRatio(const rend_context& rendCtx)
inline static float getOutputFramebufferAspectRatio()
{
int w,h;
getPvrFramebufferSize(rendCtx, w, h);
float width = w;
float height = h;
width *= 1 + VO_CONTROL.pixel_double;
width /= 1 + rendCtx.scaler_ctl.hscale;
height *= 1 + (FB_R_CTRL.vclk_div == 0 && SPG_CONTROL.interlace == 0);
height *= 1 + (FB_R_CTRL.fb_line_double);
if (rendCtx.scaler_ctl.vscalefactor != 0
&& (rendCtx.scaler_ctl.vscalefactor > 1025 || rendCtx.scaler_ctl.vscalefactor < 1024)
&& SPG_CONTROL.interlace == 0)
{
if (config::EmulateFramebuffer)
height *= 1024.f / rendCtx.scaler_ctl.vscalefactor;
else if (rendCtx.scaler_ctl.vscalefactor > 1025)
height *= std::round(1024.f / rendCtx.scaler_ctl.vscalefactor);
}
float renderAR = width / height;
float aspectRatio;
if (config::Rotate90)
{
renderAR = 1 / renderAR;
aspectRatio = 3.f / 4.f;
}
else
{
if (config::Widescreen && !config::EmulateFramebuffer)
{
if (config::SuperWidescreen)
renderAR = (float)settings.display.width / settings.display.height;
aspectRatio = (float)settings.display.width / settings.display.height;
else
renderAR *= 4 / 3.f;
aspectRatio = 16.f / 9.f;
}
else
{
aspectRatio = 4.f / 3.f;
}
}
return renderAR * config::ScreenStretching / 100.f;
return aspectRatio * config::ScreenStretching / 100.f;
}
inline static void getDCFramebufferReadSize(int& width, int& height)
@ -326,16 +315,6 @@ inline static void getDCFramebufferReadSize(int& width, int& height)
inline static float getDCFramebufferAspectRatio()
{
int width;
int height;
getDCFramebufferReadSize(width, height);
width *= 1 + VO_CONTROL.pixel_double;
height *= 1 + (FB_R_CTRL.vclk_div == 0 && SPG_CONTROL.interlace == 0);
height *= 1 + (FB_R_CTRL.fb_line_double);
height *= 1 + SPG_CONTROL.interlace;
float aspectRatio = (float)width / height;
if (config::Rotate90)
aspectRatio = 1 / aspectRatio;
float aspectRatio = config::Rotate90 ? 3.f / 4.f : 4.f / 3.f;
return aspectRatio * config::ScreenStretching / 100.f;
}

View File

@ -114,6 +114,8 @@ void BaseDrawer::scaleAndWriteFramebuffer(vk::CommandBuffer commandBuffer, Frame
yscale = 1.f;
FramebufferAttachment *scaledFB = nullptr;
FB_X_CLIP_type xClip = pvrrc.fb_X_CLIP;
FB_Y_CLIP_type yClip = pvrrc.fb_Y_CLIP;
if (xscale != 1.f || yscale != 1.f)
{
@ -140,6 +142,11 @@ void BaseDrawer::scaleAndWriteFramebuffer(vk::CommandBuffer commandBuffer, Frame
finalFB = scaledFB;
width = scaledW;
height = scaledH;
// FB_Y_CLIP is applied before vscalefactor if > 1, so it must be scaled here
if (yscale > 1) {
yClip.min = std::round(yClip.min * yscale);
yClip.max = std::round(yClip.max * yscale);
}
}
vk::BufferImageCopy copyRegion(0, width, height, vk::ImageSubresourceLayers(vk::ImageAspectFlagBits::eColor, 0, 0, 1), vk::Offset3D(0, 0, 0),
@ -167,8 +174,12 @@ void BaseDrawer::scaleAndWriteFramebuffer(vk::CommandBuffer commandBuffer, Frame
tmpBuf.init(width, height);
finalFB->GetBufferData()->download(width * height * 4, tmpBuf.data());
xClip.min = std::min(xClip.min, width - 1);
xClip.max = std::min(xClip.max, width - 1);
yClip.min = std::min(yClip.min, height - 1);
yClip.max = std::min(yClip.max, height - 1);
WriteFramebuffer(width, height, (u8 *)tmpBuf.data(), pvrrc.fb_W_SOF1 & VRAM_MASK,
pvrrc.fb_W_CTRL, pvrrc.fb_W_LINESTRIDE * 8, pvrrc.fb_X_CLIP, pvrrc.fb_Y_CLIP);
pvrrc.fb_W_CTRL, pvrrc.fb_W_LINESTRIDE * 8, xClip, yClip);
delete scaledFB;
}
@ -744,7 +755,7 @@ void ScreenDrawer::EndRenderPass()
{
currentCommandBuffer.end();
commandPool->EndFrame();
aspectRatio = getOutputFramebufferAspectRatio(_pvrrc->rend);
aspectRatio = getOutputFramebufferAspectRatio();
}
currentCommandBuffer = nullptr;
Drawer::EndRenderPass();

View File

@ -190,7 +190,7 @@ public:
{
currentCommandBuffer.end();
commandPool->EndFrame();
aspectRatio = getOutputFramebufferAspectRatio(_pvrrc->rend);
aspectRatio = getOutputFramebufferAspectRatio();
}
currentCommandBuffer = nullptr;
OITDrawer::EndFrame();

View File

@ -100,7 +100,7 @@ public:
bool Present() override
{
if (config::EmulateFramebuffer)
if (config::EmulateFramebuffer || framebufferRendered)
return presentFramebuffer();
else
return screenDrawer.PresentFrame();

View File

@ -91,7 +91,7 @@ public:
bool Present() override
{
if (config::EmulateFramebuffer)
if (config::EmulateFramebuffer || framebufferRendered)
return presentFramebuffer();
else
return screenDrawer.PresentFrame();

View File

@ -145,6 +145,8 @@ public:
CheckFogTexture();
CheckPaletteTexture();
texCommandBuffer.end();
if (!ctx->rend.isRTT)
framebufferRendered = false;
}
else
{
@ -246,6 +248,7 @@ public:
curTexture->SetCommandBuffer(nullptr);
commandBuffer.end();
fbCommandPool.EndFrame();
framebufferRendered = true;
}
protected:
@ -324,6 +327,7 @@ protected:
std::unique_ptr<QuadPipeline> quadPipeline;
std::unique_ptr<QuadDrawer> framebufferDrawer;
CommandPool fbCommandPool;
bool framebufferRendered = false;
#ifdef LIBRETRO
std::unique_ptr<VulkanOverlay> overlay;
#endif

View File

@ -64,7 +64,8 @@ public:
V26,
V27,
V28,
Current = V28,
V29,
Current = V29,
Next = Current + 1,
};