Added RTT support

This commit is contained in:
Flyinghead 2018-05-08 18:47:00 +02:00
parent 9f13ded502
commit 2e4ec3a1ed
5 changed files with 114 additions and 50 deletions

View File

@ -134,9 +134,17 @@ bool QueueRender(TA_context* ctx)
}
if (rqueue) {
tactx_Recycle(ctx);
fskip++;
return false;
// If the queued frame is for rendering to a texture, we can't skip it, so we wait
if (ctx->rend.isRTT) {
frame_finished.Wait();
verify(!rqueue);
}
else
{
tactx_Recycle(ctx);
fskip++;
return false;
}
}
frame_finished.Reset();

View File

@ -143,15 +143,22 @@ s32 SetTileClip(u32 val, bool set)
return 0;
if (set && clip_mode) {
csy = 480 - csy;
cey = 480 - cey;
float dc2s_scale_h = screen_height / 480.0f;
float ds2s_offs_x = (screen_width - dc2s_scale_h * 640) / 2;
csx = csx * dc2s_scale_h * scale_x + ds2s_offs_x;
cex = cex * dc2s_scale_h * scale_x + ds2s_offs_x;
csy = csy * dc2s_scale_h * scale_y;
cey = cey * dc2s_scale_h * scale_y;
glUniform4f(CurrentShader->pp_ClipTest, csx, cey, cex, csy);
csx *= scale_x;
csy *= scale_y;
cex *= scale_x;
cey *= scale_y;
if (!pvrrc.isRTT) {
float t = cey;
cey = 480 - csy;
csy = 480 - t;
float dc2s_scale_h = screen_height / 480.0f;
float ds2s_offs_x = (screen_width - dc2s_scale_h * 640) / 2;
csx = csx * dc2s_scale_h + ds2s_offs_x;
cex = cex * dc2s_scale_h + ds2s_offs_x;
csy = csy * dc2s_scale_h;
cey = cey * dc2s_scale_h;
}
glUniform4f(CurrentShader->pp_ClipTest, csx, csy, cex, cey);
}
return clip_mode;

View File

@ -1431,10 +1431,6 @@ void OSD_DRAW()
bool ProcessFrame(TA_context* ctx)
{
//disable RTTs for now ..
if (ctx->rend.isRTT)
return false;
ctx->rend_inuse.Lock();
ctx->MarkRend();
@ -1563,10 +1559,10 @@ bool RenderFrame()
//For some reason this produces wrong results
//so for now its hacked based like on the d3d code
/*
dc_width=FB_X_CLIP.max-FB_X_CLIP.min+1;
dc_height=FB_Y_CLIP.max-FB_Y_CLIP.min+1;
u32 pvr_stride=(FB_W_LINESTRIDE.stride)*8;
*/
dc_width = FB_X_CLIP.max - FB_X_CLIP.min + 1;
dc_height = FB_Y_CLIP.max - FB_Y_CLIP.min + 1;
}
scale_x = 1;
@ -1597,8 +1593,6 @@ bool RenderFrame()
dc_width *= scale_x;
dc_height *= scale_y;
glUseProgram(gl.modvol_shader.program);
/*
float vnear=0;
@ -1626,12 +1620,12 @@ bool RenderFrame()
/*
Handle Dc to screen scaling
*/
float dc2s_scale_h=screen_height/480.0f;
float ds2s_offs_x=(screen_width-dc2s_scale_h*640)/2;
float dc2s_scale_h = is_rtt ? (screen_width / dc_width) : (screen_height / 480.0);
float ds2s_offs_x = is_rtt ? 0 : ((screen_width - dc2s_scale_h * 640.0) / 2);
//-1 -> too much to left
ShaderUniforms.scale_coefs[0]=2.0f/(screen_width/dc2s_scale_h*scale_x);
ShaderUniforms.scale_coefs[1]=(is_rtt?2:-2)/dc_height;
ShaderUniforms.scale_coefs[1]=(is_rtt ? 2 : -2) / min(480.0, dc_height); // FIXME Is that min() right? due to global clipping (TA_GLOB_TILE_CLIP)?
ShaderUniforms.scale_coefs[2]=1-2*ds2s_offs_x/(screen_width);
ShaderUniforms.scale_coefs[3]=(is_rtt?1:-1);
@ -1641,7 +1635,7 @@ bool RenderFrame()
ShaderUniforms.depth_coefs[2]=0;
ShaderUniforms.depth_coefs[3]=0;
//printf("scale: %f, %f, %f, %f\n",scale_coefs[0],scale_coefs[1],scale_coefs[2],scale_coefs[3]);
//printf("scale: %f, %f, %f, %f\n",ShaderUniforms.scale_coefs[0],ShaderUniforms.scale_coefs[1],ShaderUniforms.scale_coefs[2],ShaderUniforms.scale_coefs[3]);
//VERT and RAM fog color constants
@ -1720,7 +1714,7 @@ bool RenderFrame()
case 2: //0x2 4444 ARGB 16 bit
channels=GL_RGBA;
format=GL_UNSIGNED_SHORT_5_5_5_1;
format=GL_UNSIGNED_SHORT_4_4_4_4;
break;
case 3://0x3 1555 ARGB 16 bit The alpha value is determined by comparison with the value of fb_alpha_threshold.
@ -1729,25 +1723,18 @@ bool RenderFrame()
break;
case 4: //0x4 888 RGB 24 bit packed
channels=GL_RGB;
format=GL_UNSIGNED_SHORT_5_6_5;
break;
case 5: //0x5 0888 KRGB 32 bit K is the value of fk_kval.
channels=GL_RGBA;
format=GL_UNSIGNED_SHORT_4_4_4_4;
break;
case 6: //0x6 8888 ARGB 32 bit
channels=GL_RGBA;
format=GL_UNSIGNED_SHORT_4_4_4_4;
break;
fprintf(stderr, "Unsupported render to texture format: %d\n", FB_W_CTRL.fb_packmode);
return false;
case 7: //7 invalid
die("7 is not valid");
break;
}
BindRTT(FB_W_SOF1&VRAM_MASK,FB_X_CLIP.max-FB_X_CLIP.min+1,FB_Y_CLIP.max-FB_Y_CLIP.min+1,channels,format);
//printf("RTT packmode=%d stride=%d - %d,%d -> %d,%d\n", FB_W_CTRL.fb_packmode, FB_W_LINESTRIDE.stride * 4,
// FB_X_CLIP.min, FB_Y_CLIP.min, FB_X_CLIP.max, FB_Y_CLIP.max);
BindRTT(FB_W_SOF1 & VRAM_MASK, dc_width, dc_height, channels, format);
}
else
{
@ -1755,11 +1742,12 @@ bool RenderFrame()
//Fix this in a proper way
glBindFramebuffer(GL_FRAMEBUFFER,0);
#endif
glViewport(0, 0, screen_width, screen_height);
}
//Clear depth
//Color is cleared by the bgp
if (settings.rend.WideScreen)
if (!is_rtt && settings.rend.WideScreen)
glClearColor(pvrrc.verts.head()->col[2]/255.0f,pvrrc.verts.head()->col[1]/255.0f,pvrrc.verts.head()->col[0]/255.0f,1.0f);
else
glClearColor(0,0,0,1.0f);
@ -1802,14 +1790,18 @@ bool RenderFrame()
printf("SCI: %f, %f, %f, %f\n", offs_x+pvrrc.fb_X_CLIP.min/scale_x,(pvrrc.fb_Y_CLIP.min/scale_y)*dc2s_scale_h,(pvrrc.fb_X_CLIP.max-pvrrc.fb_X_CLIP.min+1)/scale_x*dc2s_scale_h,(pvrrc.fb_Y_CLIP.max-pvrrc.fb_Y_CLIP.min+1)/scale_y*dc2s_scale_h);
#endif
glScissor(offs_x+pvrrc.fb_X_CLIP.min/scale_x,(pvrrc.fb_Y_CLIP.min/scale_y)*dc2s_scale_h,(pvrrc.fb_X_CLIP.max-pvrrc.fb_X_CLIP.min+1)/scale_x*dc2s_scale_h,(pvrrc.fb_Y_CLIP.max-pvrrc.fb_Y_CLIP.min+1)/scale_y*dc2s_scale_h);
if (settings.rend.WideScreen && pvrrc.fb_X_CLIP.min==0 && ((pvrrc.fb_X_CLIP.max+1)/scale_x==640) && (pvrrc.fb_Y_CLIP.min==0) && ((pvrrc.fb_Y_CLIP.max+1)/scale_y==480 ) )
if (!is_rtt && settings.rend.WideScreen && pvrrc.fb_X_CLIP.min==0 && ((pvrrc.fb_X_CLIP.max+1)/scale_x==640) && (pvrrc.fb_Y_CLIP.min==0) && ((pvrrc.fb_Y_CLIP.max+1)/scale_y==480 ) )
{
glDisable(GL_SCISSOR_TEST);
}
else
{
glScissor(offs_x + pvrrc.fb_X_CLIP.min / scale_x,
pvrrc.fb_Y_CLIP.min / scale_y * (is_rtt ? 1 : dc2s_scale_h),
(pvrrc.fb_X_CLIP.max - pvrrc.fb_X_CLIP.min + 1) / scale_x * (is_rtt ? 1 : dc2s_scale_h),
(pvrrc.fb_Y_CLIP.max - pvrrc.fb_Y_CLIP.min + 1) / scale_y * (is_rtt ? 1 : dc2s_scale_h));
glEnable(GL_SCISSOR_TEST);
}
//restore scale_x
scale_x /= scissoring_scale_x;
@ -1824,6 +1816,9 @@ bool RenderFrame()
KillTex=false;
if (is_rtt)
ReadRTTBuffer();
return !is_rtt;
}

View File

@ -118,6 +118,7 @@ void DoCleanup();
void SortPParams();
void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt);
void ReadRTTBuffer();
int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode,
u32 pp_Texture, u32 pp_UseAlpha, u32 pp_IgnoreTexA, u32 pp_ShadInstr, u32 pp_Offset,
u32 pp_FogCtrl);

View File

@ -392,6 +392,9 @@ void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt)
rv.TexAddr=addy>>3;
// Find the largest square power of two texture that fits into the viewport
int fbh2 = 2;
while (fbh2 < fbh)
fbh2 *= 2;
// Get the currently bound frame buffer object. On most platforms this just gives 0.
//glGetIntegerv(GL_FRAMEBUFFER_BINDING, &m_i32OriginalFbo);
@ -407,20 +410,20 @@ void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt)
*/
#ifdef GLES
glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT24_OES, fbw, fbh);
glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT24_OES, fbw, fbh2);
#else
glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT24, fbw, fbh);
glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT24, fbw, fbh2);
#endif
glGenRenderbuffers(1, &rv.stencilb);
glBindRenderbuffer(GL_RENDERBUFFER, rv.stencilb);
glRenderbufferStorage(GL_RENDERBUFFER, GL_STENCIL_INDEX8, fbw, fbh);
glRenderbufferStorage(GL_RENDERBUFFER, GL_STENCIL_INDEX8, fbw, fbh2);
// Create a texture for rendering to
glGenTextures(1, &rv.tex);
glBindTexture(GL_TEXTURE_2D, rv.tex);
glTexImage2D(GL_TEXTURE_2D, 0, channels, fbw, fbh, 0, channels, fmt, 0);
glTexImage2D(GL_TEXTURE_2D, 0, channels, fbw, fbh2, 0, channels, fmt, 0);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
@ -441,15 +444,65 @@ void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt)
GLuint uStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER);
verify(uStatus == GL_FRAMEBUFFER_COMPLETE);
glViewport(0, 0, fbw, fbh); // TODO CLIP_X/Y min?
}
void ReadRTTBuffer() {
for (TexCacheIter i = TexCache.begin(); i != TexCache.end(); i++)
{
if (i->second.sa_tex == fb_rtt.TexAddr << 3)
i->second.dirty = FrameCount;
}
u32 w = pvrrc.fb_X_CLIP.max - pvrrc.fb_X_CLIP.min + 1;
u32 h = pvrrc.fb_Y_CLIP.max - pvrrc.fb_Y_CLIP.min + 1;
// FIXME stride
glPixelStorei(GL_PACK_ALIGNMENT, 1);
u16 *src = temp_tex_buffer;
u16 *dst = (u16 *)&vram[fb_rtt.TexAddr << 3];
switch(FB_W_CTRL.fb_packmode)
{
case 0: //0x0 0555 KRGB 16 bit (default) Bit 15 is the value of fb_kval[7].
// Untested: read into temp (5551) and copy/convert to 1555
glReadPixels(0, 0, w, h, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1, temp_tex_buffer);
for (u32 i = 0; i < w * h; i++) {
*dst++ = ((*src++ >> 1) & 0x7FFF) | ((FB_W_CTRL.fb_kval & 0x80) << 8);
}
break;
case 1: //0x1 565 RGB 16 bit
// Can be read directly into vram
glReadPixels(0, 0, w, h, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, &vram[fb_rtt.TexAddr << 3]);
break;
case 2: //0x2 4444 ARGB 16 bit
// Untested: read into temp (rgba_4444) and copy/convert to argb_4444
glReadPixels(0, 0, w, h, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4, temp_tex_buffer);
for (u32 i = 0; i < w * h; i++) {
*dst++ = ((*src >> 4) & 0xFFF) | ((*src & 0xF) << 12);
src++;
}
break;
case 3://0x3 1555 ARGB 16 bit The alpha value is determined by comparison with the value of fb_alpha_threshold.
// TODO
memset(dst, '\0', w * h * 2);
break;
}
if (fb_rtt.fbo) { glDeleteFramebuffers(1,&fb_rtt.fbo); fb_rtt.fbo = 0; }
if (fb_rtt.tex) { glDeleteTextures(1,&fb_rtt.tex); fb_rtt.tex = 0; }
if (fb_rtt.depthb) { glDeleteRenderbuffers(1,&fb_rtt.depthb); fb_rtt.depthb = 0; }
if (fb_rtt.stencilb) { glDeleteRenderbuffers(1,&fb_rtt.stencilb); fb_rtt.stencilb = 0; }
}
GLuint gl_GetTexture(TSP tsp, TCW tcw)
{
if (tcw.TexAddr==fb_rtt.TexAddr && fb_rtt.tex)
{
return fb_rtt.tex;
}
//lookup texture
TextureCacheData* tf;
//= TexCache.Find(tcw.full,tsp.full);