Direct framebuffer writes support

This commit is contained in:
Flyinghead 2018-08-26 16:58:10 +02:00
parent bb3753dc86
commit e59d7eaf3d
10 changed files with 369 additions and 75 deletions

View File

@ -85,6 +85,13 @@ cResetEvent re(false,true);
int max_idx,max_mvo,max_op,max_pt,max_tr,max_vtx,max_modt, ovrn;
static bool render_called = false;
u32 fb1_watch_addr_start;
u32 fb1_watch_addr_end;
u32 fb2_watch_addr_start;
u32 fb2_watch_addr_end;
bool fb_dirty;
TA_context* _pvrrc;
void SetREP(TA_context* cntx);
@ -231,7 +238,7 @@ bool rend_frame(TA_context* ctx, bool draw_osd) {
}
bool proc = renderer->Process(ctx);
#if !defined(TARGET_NO_THREADS)
if (!proc || !ctx->rend.isRTT)
if (!proc || (!ctx->rend.isRTT && !ctx->rend.isRenderFramebuffer))
// If rendering to texture, continue locking until the frame is rendered
re.Set();
#endif
@ -332,14 +339,18 @@ void rend_resize(int width, int height) {
void rend_start_render()
{
render_called = true;
pend_rend = false;
bool is_rtt=(FB_W_SOF1& 0x1000000)!=0;
TA_context* ctx = tactx_Pop(CORE_CURRENT_CTX);
SetREP(ctx);
// No end of render interrupt when rendering the framebuffer
if (!ctx || !ctx->rend.isRenderFramebuffer)
SetREP(ctx);
if (ctx)
{
bool is_rtt=(FB_W_SOF1& 0x1000000)!=0 && !ctx->rend.isRenderFramebuffer;
if (fLogFrames || fCheckFrames) {
MD5Context md5;
u8 digest[16];
@ -386,7 +397,8 @@ void rend_start_render()
{
//tactx_Recycle(ctx); ctx = read_frame("frames/dcframe-SoA-intro-tr-autosort");
//printf("REP: %.2f ms\n",render_end_pending_cycles/200000.0);
FillBGP(ctx);
if (!ctx->rend.isRenderFramebuffer)
FillBGP(ctx);
ctx->rend.isRTT=is_rtt;
@ -551,5 +563,24 @@ void rend_term()
void rend_vblank()
{
if (!render_called && fb_dirty && FB_R_CTRL.fb_enable)
{
SetCurrentTARC(CORE_CURRENT_CTX);
ta_ctx->rend.isRenderFramebuffer = true;
rend_start_render();
fb_dirty = false;
}
render_called = false;
check_framebuffer_write();
os_DoEvents();
}
void check_framebuffer_write()
{
u32 fb_size = (FB_R_SIZE.fb_y_size + 1) * (FB_R_SIZE.fb_x_size + FB_R_SIZE.fb_modulus) / 4;
fb1_watch_addr_start = FB_R_SOF1;
fb1_watch_addr_end = FB_R_SOF1 + fb_size - 1;
fb2_watch_addr_start = FB_R_SOF2;
fb2_watch_addr_end = FB_R_SOF2 + fb_size - 1;
}

View File

@ -54,4 +54,12 @@ extern Renderer* renderer;
Renderer* rend_D3D11();
Renderer* rend_GLES2();
Renderer* rend_norend();
Renderer* rend_softrend();
Renderer* rend_softrend();
extern u32 fb1_watch_addr_start;
extern u32 fb1_watch_addr_end;
extern u32 fb2_watch_addr_start;
extern u32 fb2_watch_addr_end;
extern bool fb_dirty;
void check_framebuffer_write();

View File

@ -209,10 +209,24 @@ void DYNACALL pvr_write_area1_8(u32 addr,u8 data)
}
void DYNACALL pvr_write_area1_16(u32 addr,u16 data)
{
u32 vaddr = addr & VRAM_MASK;
if (!fb_dirty
&& ((vaddr >= fb1_watch_addr_start && vaddr < fb1_watch_addr_end)
|| (vaddr >= fb2_watch_addr_start && vaddr < fb2_watch_addr_end)))
{
fb_dirty = true;
}
*(u16*)&vram[pvr_map32(addr) & VRAM_MASK]=data;
}
void DYNACALL pvr_write_area1_32(u32 addr,u32 data)
{
u32 vaddr = addr & VRAM_MASK;
if (!fb_dirty
&& ((vaddr >= fb1_watch_addr_start && vaddr < fb1_watch_addr_end)
|| (vaddr >= fb2_watch_addr_start && vaddr < fb2_watch_addr_end)))
{
fb_dirty = true;
}
*(u32*)&vram[pvr_map32(addr) & VRAM_MASK] = data;
}

View File

@ -61,15 +61,33 @@ void pvr_WriteReg(u32 paddr,u32 data)
ta_vtx_ListCont();
}
if (addr == FB_R_CTRL_addr ||
addr == SPG_CONTROL_addr ||
addr == SPG_LOAD_addr)
if (addr == SPG_CONTROL_addr || addr == SPG_LOAD_addr)
{
PvrReg(addr,u32)=data;
CalculateSync();
if (PvrReg(addr, u32) != data)
{
PvrReg(addr, u32) = data;
CalculateSync();
}
return;
}
if (addr == FB_R_CTRL_addr)
{
bool vclk_div_changed = (PvrReg(addr, u32) ^ data) & (1 << 23);
PvrReg(addr, u32) = data;
if (vclk_div_changed)
CalculateSync();
return;
}
if (addr == FB_R_SIZE_addr)
{
if (PvrReg(addr, u32) != data)
{
PvrReg(addr, u32) = data;
fb_dirty = false;
check_framebuffer_write();
}
return;
}
if (addr == TA_YUV_TEX_BASE_addr)
{
PvrReg(addr, u32) = data;

View File

@ -110,7 +110,8 @@ struct rend_context
bool Overrun;
bool isRTT;
bool isRenderFramebuffer;
double early;
FB_X_CLIP_type fb_X_CLIP;
@ -140,6 +141,7 @@ struct rend_context
Overrun=false;
fZ_min= 1000000.0f;
fZ_max= 1.0f;
isRenderFramebuffer = false;
}
};

View File

@ -1243,17 +1243,15 @@ public:
__forceinline
static void AppendSpriteVertexA(TA_Sprite1A* sv)
{
u16* idx=vdrc.idx.Append(6);
u16* idx=vdrc.idx.Append(4);
u32 vbase=vdrc.verts.used();
idx[0]=vbase+0;
idx[1]=vbase+1;
idx[2]=vbase+2;
idx[3]=vbase+3;
idx[4]=vbase+3;
idx[5]=vbase+4;
CurrentPP->count=vdrc.idx.used()-CurrentPP->first-2;
CurrentPP->count=vdrc.idx.used()-CurrentPP->first;
Vertex* cv = vdrc.verts.Append(4);
@ -1465,7 +1463,7 @@ bool ta_parse_vdrc(TA_context* ctx)
{
TAFifo0.vdec_init();
for (int pass = 0; pass <= ctx->tad.render_pass_count; pass++)
for (int pass = 0; pass <= ctx->tad.render_pass_count; pass++)
{
ctx->MarkRend(pass);
vd_rc.proc_start = ctx->rend.proc_start;
@ -1489,8 +1487,21 @@ bool ta_parse_vdrc(TA_context* ctx)
render_pass->autosort = UsingAutoSort(pass);
render_pass->z_clear = ClearZBeforePass(pass);
}
rv = true; //whatever
bool empty_context = true;
// Don't draw empty contexts.
// Apparently the background plane is only drawn if it at least one polygon is drawn.
for (PolyParam *pp = vd_rc.global_param_op.head() + 1;
empty_context && pp < vd_rc.global_param_op.LastPtr(0); pp++)
if (pp->count > 2)
empty_context = false;
for (PolyParam *pp = vd_rc.global_param_pt.head(); empty_context && pp < vd_rc.global_param_pt.LastPtr(0); pp++)
if (pp->count > 2)
empty_context = false;
for (PolyParam *pp = vd_rc.global_param_tr.head(); empty_context && pp < vd_rc.global_param_tr.LastPtr(0); pp++)
if (pp->count > 2)
empty_context = false;
rv = !empty_context;
}
bool overrun = ctx->rend.Overrun;
@ -1635,6 +1646,7 @@ void FillBGP(TA_context* ctx)
bgpp->pcw.Gouraud=bgpp->isp.Gouraud;
bgpp->pcw.Offset=bgpp->isp.Offset;
bgpp->pcw.Texture=bgpp->isp.Texture;
bgpp->pcw.Shadow = ISP_BACKGND_T.shadow;
float scale_x= (SCALER_CTL.hscale) ? 2.f:1.f; //if AA hack the hacked pos value hacks
for (int i=0;i<3;i++)

View File

@ -1078,3 +1078,66 @@ void DrawStrips()
previous_pass = current_pass;
}
}
void DrawFramebuffer(float w, float h)
{
struct Vertex vertices[] = {
{ 0, h, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 1 },
{ 0, 0, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 0 },
{ w, h, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 1 },
{ w, 0, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 0 },
};
GLushort indices[] = { 0, 1, 2, 1, 3 };
glcache.Disable(GL_SCISSOR_TEST);
glcache.Disable(GL_DEPTH_TEST);
glcache.Disable(GL_STENCIL_TEST);
glcache.Disable(GL_CULL_FACE);
glcache.Disable(GL_BLEND);
ShaderUniforms.trilinear_alpha = 1.0;
PipelineShader *shader = &gl.pogram_table[GetProgramID(0, 1, 1, 0, 1, 0, 0, 2, false, false)];
if (shader->program == -1)
CompilePipelineShader(shader);
else
{
glcache.UseProgram(shader->program);
ShaderUniforms.Set(shader);
}
glActiveTexture(GL_TEXTURE0);
glcache.BindTexture(GL_TEXTURE_2D, fbTextureId);
#ifndef GLES
glBindVertexArray(gl.vbo.vao);
#endif
// FIXME This make glDrawElements fails on OSX
//glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, gl.vbo.idxs);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STREAM_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, gl.vbo.geometry);
glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STREAM_DRAW);
//setup vertex buffers attrib pointers
glEnableVertexAttribArray(VERTEX_POS_ARRAY);
glVertexAttribPointer(VERTEX_POS_ARRAY, 3, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex, x));
glEnableVertexAttribArray(VERTEX_COL_BASE_ARRAY);
glVertexAttribPointer(VERTEX_COL_BASE_ARRAY, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(Vertex), (void*)offsetof(Vertex, col));
glEnableVertexAttribArray(VERTEX_COL_OFFS_ARRAY);
glVertexAttribPointer(VERTEX_COL_OFFS_ARRAY, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(Vertex), (void*)offsetof(Vertex, spc));
glEnableVertexAttribArray(VERTEX_UV_ARRAY);
glVertexAttribPointer(VERTEX_UV_ARRAY, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex, u));
// FIXME This fails on OSX
// glDrawElements(GL_TRIANGLE_STRIP, 5, GL_UNSIGNED_SHORT, indices);
glDrawElements(GL_TRIANGLE_STRIP, 5, GL_UNSIGNED_SHORT, (void *)0);
glcache.DeleteTextures(1, &fbTextureId);
fbTextureId = 0;
}

View File

@ -1470,9 +1470,16 @@ bool ProcessFrame(TA_context* ctx)
printf("Texture cache cleared\n");
}
if (!ta_parse_vdrc(ctx))
return false;
if (ctx->rend.isRenderFramebuffer)
{
RenderFramebuffer();
ctx->rend_inuse.Unlock();
}
else
{
if (!ta_parse_vdrc(ctx))
return false;
}
CollectCleanup();
if (ctx->rend.Overrun)
@ -1602,7 +1609,7 @@ bool RenderFrame()
float scissoring_scale_x = 1;
if (!is_rtt)
if (!is_rtt && !pvrrc.isRenderFramebuffer)
{
scale_x=fb_scale_x;
scale_y=fb_scale_y;
@ -1785,66 +1792,72 @@ bool RenderFrame()
//move vertex to gpu
//Main VBO
glBindBuffer(GL_ARRAY_BUFFER, gl.vbo.geometry); glCheck();
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, gl.vbo.idxs); glCheck();
glBufferData(GL_ARRAY_BUFFER,pvrrc.verts.bytes(),pvrrc.verts.head(),GL_STREAM_DRAW); glCheck();
glBufferData(GL_ELEMENT_ARRAY_BUFFER,pvrrc.idx.bytes(),pvrrc.idx.head(),GL_STREAM_DRAW);
//Modvol VBO
if (pvrrc.modtrig.used())
if (!pvrrc.isRenderFramebuffer)
{
glBindBuffer(GL_ARRAY_BUFFER, gl.vbo.modvols); glCheck();
glBufferData(GL_ARRAY_BUFFER,pvrrc.modtrig.bytes(),pvrrc.modtrig.head(),GL_STREAM_DRAW); glCheck();
}
//Main VBO
glBindBuffer(GL_ARRAY_BUFFER, gl.vbo.geometry); glCheck();
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, gl.vbo.idxs); glCheck();
int offs_x=ds2s_offs_x+0.5f;
//this needs to be scaled
glBufferData(GL_ARRAY_BUFFER,pvrrc.verts.bytes(),pvrrc.verts.head(),GL_STREAM_DRAW); glCheck();
//not all scaling affects pixel operations, scale to adjust for that
scale_x *= scissoring_scale_x;
glBufferData(GL_ELEMENT_ARRAY_BUFFER,pvrrc.idx.bytes(),pvrrc.idx.head(),GL_STREAM_DRAW);
#if 0
//handy to debug really stupid render-not-working issues ...
printf("SS: %dx%d\n", screen_width, screen_height);
printf("SCI: %d, %f\n", pvrrc.fb_X_CLIP.max, dc2s_scale_h);
printf("SCI: %f, %f, %f, %f\n", offs_x+pvrrc.fb_X_CLIP.min/scale_x,(pvrrc.fb_Y_CLIP.min/scale_y)*dc2s_scale_h,(pvrrc.fb_X_CLIP.max-pvrrc.fb_X_CLIP.min+1)/scale_x*dc2s_scale_h,(pvrrc.fb_Y_CLIP.max-pvrrc.fb_Y_CLIP.min+1)/scale_y*dc2s_scale_h);
#endif
if (!wide_screen_on)
{
float width = (pvrrc.fb_X_CLIP.max - pvrrc.fb_X_CLIP.min + 1) / scale_x;
float height = (pvrrc.fb_Y_CLIP.max - pvrrc.fb_Y_CLIP.min + 1) / scale_y;
float min_x = pvrrc.fb_X_CLIP.min / scale_x;
float min_y = pvrrc.fb_Y_CLIP.min / scale_y;
if (!is_rtt)
//Modvol VBO
if (pvrrc.modtrig.used())
{
// Add x offset for aspect ratio > 4/3
min_x = min_x * dc2s_scale_h + offs_x;
// Invert y coordinates when rendering to screen
min_y = screen_height - (min_y + height) * dc2s_scale_h;
width *= dc2s_scale_h;
height *= dc2s_scale_h;
}
else if (settings.rend.RenderToTextureUpscale > 1 && !settings.rend.RenderToTextureBuffer)
{
min_x *= settings.rend.RenderToTextureUpscale;
min_y *= settings.rend.RenderToTextureUpscale;
width *= settings.rend.RenderToTextureUpscale;
height *= settings.rend.RenderToTextureUpscale;
glBindBuffer(GL_ARRAY_BUFFER, gl.vbo.modvols); glCheck();
glBufferData(GL_ARRAY_BUFFER,pvrrc.modtrig.bytes(),pvrrc.modtrig.head(),GL_STREAM_DRAW); glCheck();
}
glScissor(min_x, min_y, width, height);
glcache.Enable(GL_SCISSOR_TEST);
int offs_x=ds2s_offs_x+0.5f;
//this needs to be scaled
//not all scaling affects pixel operations, scale to adjust for that
scale_x *= scissoring_scale_x;
#if 0
//handy to debug really stupid render-not-working issues ...
printf("SS: %dx%d\n", screen_width, screen_height);
printf("SCI: %d, %f\n", pvrrc.fb_X_CLIP.max, dc2s_scale_h);
printf("SCI: %f, %f, %f, %f\n", offs_x+pvrrc.fb_X_CLIP.min/scale_x,(pvrrc.fb_Y_CLIP.min/scale_y)*dc2s_scale_h,(pvrrc.fb_X_CLIP.max-pvrrc.fb_X_CLIP.min+1)/scale_x*dc2s_scale_h,(pvrrc.fb_Y_CLIP.max-pvrrc.fb_Y_CLIP.min+1)/scale_y*dc2s_scale_h);
#endif
if (!wide_screen_on)
{
float width = (pvrrc.fb_X_CLIP.max - pvrrc.fb_X_CLIP.min + 1) / scale_x;
float height = (pvrrc.fb_Y_CLIP.max - pvrrc.fb_Y_CLIP.min + 1) / scale_y;
float min_x = pvrrc.fb_X_CLIP.min / scale_x;
float min_y = pvrrc.fb_Y_CLIP.min / scale_y;
if (!is_rtt)
{
// Add x offset for aspect ratio > 4/3
min_x = min_x * dc2s_scale_h + offs_x;
// Invert y coordinates when rendering to screen
min_y = screen_height - (min_y + height) * dc2s_scale_h;
width *= dc2s_scale_h;
height *= dc2s_scale_h;
}
else if (settings.rend.RenderToTextureUpscale > 1 && !settings.rend.RenderToTextureBuffer)
{
min_x *= settings.rend.RenderToTextureUpscale;
min_y *= settings.rend.RenderToTextureUpscale;
width *= settings.rend.RenderToTextureUpscale;
height *= settings.rend.RenderToTextureUpscale;
}
glScissor(min_x, min_y, width, height);
glcache.Enable(GL_SCISSOR_TEST);
}
//restore scale_x
scale_x /= scissoring_scale_x;
DrawStrips();
}
else
{
DrawFramebuffer(dc_width, dc_height);
}
//restore scale_x
scale_x /= scissoring_scale_x;
DrawStrips();
#if HOST_OS==OS_WINDOWS
//Sleep(40); //to test MT stability
#endif

View File

@ -115,6 +115,7 @@ struct gl_ctx
};
extern gl_ctx gl;
extern GLuint fbTextureId;
GLuint gl_GetTexture(TSP tsp,TCW tcw);
struct text_info {
@ -131,6 +132,9 @@ void SortPParams(int first, int count);
void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt);
void ReadRTTBuffer();
void RenderFramebuffer();
void DrawFramebuffer(float w, float h);
int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode,
u32 pp_Texture, u32 pp_UseAlpha, u32 pp_IgnoreTexA, u32 pp_ShadInstr, u32 pp_Offset,
u32 pp_FogCtrl, bool pp_Gouraud, bool pp_BumpMap);

View File

@ -949,3 +949,132 @@ void rend_text_invl(vram_block* bl)
libCore_vramlock_Unlock_block_wb(bl);
}
GLuint fbTextureId;
void RenderFramebuffer()
{
if (FB_R_SIZE.fb_x_size == 0 || FB_R_SIZE.fb_y_size == 0)
return;
int width = (FB_R_SIZE.fb_x_size + 1) << 1; // in 16-bit words
int height = FB_R_SIZE.fb_y_size + 1;
int modulus = (FB_R_SIZE.fb_modulus - 1) << 1;
int bpp;
switch (FB_R_CTRL.fb_depth)
{
case fbde_0555:
case fbde_565:
bpp = 2;
break;
case fbde_888:
bpp = 3;
width = (width * 2) / 3; // in pixels
modulus = (modulus * 2) / 3; // in pixels
break;
case fbde_C888:
bpp = 4;
width /= 2; // in pixels
modulus /= 2; // in pixels
break;
default:
die("Invalid framebuffer format\n");
bpp = 4;
break;
}
if (fbTextureId == 0)
fbTextureId = glcache.GenTexture();
glcache.BindTexture(GL_TEXTURE_2D, fbTextureId);
//set texture repeat mode
glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
u32 addr = SPG_CONTROL.interlace && !SPG_STATUS.fieldnum ? FB_R_SOF2 : FB_R_SOF1;
PixelBuffer<u32> pb;
pb.init(width, height);
u8 *dst = (u8*)pb.data();
switch (FB_R_CTRL.fb_depth)
{
case fbde_0555: // 555 RGB
for (int y = 0; y < height; y++)
{
for (int i = 0; i < width; i++)
{
u16 src = pvr_read_area1_16(addr);
*dst++ = (((src >> 10) & 0x1F) << 3) + FB_R_CTRL.fb_concat;
*dst++ = (((src >> 5) & 0x1F) << 3) + FB_R_CTRL.fb_concat;
*dst++ = (((src >> 0) & 0x1F) << 3) + FB_R_CTRL.fb_concat;
*dst++ = 0xFF;
addr += bpp;
}
addr += modulus * bpp;
}
break;
case fbde_565: // 565 RGB
for (int y = 0; y < height; y++)
{
for (int i = 0; i < width; i++)
{
u16 src = pvr_read_area1_16(addr);
*dst++ = (((src >> 11) & 0x1F) << 3) + FB_R_CTRL.fb_concat;
*dst++ = (((src >> 5) & 0x3F) << 2) + (FB_R_CTRL.fb_concat >> 1);
*dst++ = (((src >> 0) & 0x1F) << 3) + FB_R_CTRL.fb_concat;
*dst++ = 0xFF;
addr += bpp;
}
addr += modulus * bpp;
}
break;
case fbde_888: // 888 RGB
for (int y = 0; y < height; y++)
{
for (int i = 0; i < width; i++)
{
if (addr & 1)
{
u32 src = pvr_read_area1_32(addr - 1);
*dst++ = src >> 16;
*dst++ = src >> 8;
*dst++ = src;
}
else
{
u32 src = pvr_read_area1_32(addr);
*dst++ = src >> 24;
*dst++ = src >> 16;
*dst++ = src >> 8;
}
*dst++ = 0xFF;
addr += bpp;
}
addr += modulus * bpp;
}
break;
case fbde_C888: // 0888 RGB
for (int y = 0; y < height; y++)
{
for (int i = 0; i < width; i++)
{
u32 src = pvr_read_area1_32(addr);
*dst++ = src >> 16;
*dst++ = src >> 8;
*dst++ = src;
*dst++ = 0xFF;
addr += bpp;
}
addr += modulus * bpp;
}
break;
}
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, pb.data());
}