diff --git a/src/xenia/gpu/gl4/blitter.cc b/src/xenia/gpu/gl4/blitter.cc index 25af6446e..373bad6f2 100644 --- a/src/xenia/gpu/gl4/blitter.cc +++ b/src/xenia/gpu/gl4/blitter.cc @@ -50,7 +50,7 @@ struct VertexData { \n\ }; \n\ "; const std::string vs_source = header + - "\n\ + "\n\ layout(location = 0) uniform vec4 src_uv_params; \n\ out gl_PerVertex { \n\ vec4 gl_Position; \n\ @@ -240,6 +240,9 @@ void Blitter::Draw(GLuint src_texture, uint32_t src_x, uint32_t src_y, src_width / float(src_texture_width), src_height / float(src_texture_height)); + // Useful for seeing the entire framebuffer/etc: + // glProgramUniform4f(vertex_program_, 0, 0.0f, 0.0f, 1.0f, 1.0f); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); } diff --git a/src/xenia/gpu/gl4/command_processor.cc b/src/xenia/gpu/gl4/command_processor.cc index d458db9d8..dd1ca303f 100644 --- a/src/xenia/gpu/gl4/command_processor.cc +++ b/src/xenia/gpu/gl4/command_processor.cc @@ -405,6 +405,10 @@ bool CommandProcessor::SetupGL() { return false; } + glEnable(GL_SCISSOR_TEST); + glClipControl(GL_UPPER_LEFT, GL_NEGATIVE_ONE_TO_ONE); + glPointParameteri(GL_POINT_SPRITE_COORD_ORIGIN, GL_UPPER_LEFT); + return true; } @@ -568,6 +572,9 @@ void CommandProcessor::IssueSwap() { // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // HACK: just use whatever our current framebuffer is. swap_params.framebuffer_texture = last_framebuffer_texture_; + /*swap_params.framebuffer_texture = active_framebuffer_ + ? active_framebuffer_->color_targets[0] + : last_framebuffer_texture_;*/ // Guess frontbuffer dimensions. // Command buffer seems to set these right before the XE_SWAP. @@ -578,10 +585,6 @@ void CommandProcessor::IssueSwap() { swap_params.width = window_scissor_br & 0x7FFF - swap_params.x; swap_params.height = (window_scissor_br >> 16) & 0x7FFF - swap_params.y; - // This is just so that we draw reasonable garbage when drawing garbage. - swap_params.width = std::min(swap_params.width, 2560u); - swap_params.height = std::min(swap_params.height, 2560u); - PrepareForWait(); swap_handler_(swap_params); ReturnFromWait(); @@ -1712,8 +1715,6 @@ CommandProcessor::UpdateStatus CommandProcessor::UpdateRenderTargets() { // TODO(benvanik): can we do this all named? // TODO(benvanik): do we want this on READ too? glBindFramebuffer(GL_DRAW_FRAMEBUFFER, cached_framebuffer->framebuffer); - - glViewport(0, 0, 2560, 2560); } return UpdateStatus::kMismatch; @@ -1771,62 +1772,16 @@ CommandProcessor::UpdateStatus CommandProcessor::UpdateViewportState() { // http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h // See r200UpdateWindow: // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c + uint32_t window_offset_x = 0; + uint32_t window_offset_y = 0; if ((pa_su_sc_mode_cntl >> 17) & 1) { uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32; - draw_batcher_.set_window_offset(window_offset & 0x7FFF, - (window_offset >> 16) & 0x7FFF); + window_offset_x = window_offset & 0x7FFF; + window_offset_y = (window_offset >> 16) & 0x7FFF; + draw_batcher_.set_window_offset(window_offset_x, window_offset_y); } else { draw_batcher_.set_window_offset(0, 0); } - uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32; - uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32; - draw_batcher_.set_window_scissor( - window_scissor_tl & 0x7FFF, (window_scissor_tl >> 16) & 0x7FFF, - window_scissor_br & 0x7FFF, (window_scissor_br >> 16) & 0x7FFF); - - // HACK: no clue where to get these values. - // RB_SURFACE_INFO - uint32_t surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; - uint32_t surface_pitch = surface_info & 0x3FFF; - auto surface_msaa = static_cast((surface_info >> 16) & 0x3); - // TODO(benvanik): ?? - float window_width_scalar = 1; - float window_height_scalar = 1; - switch (surface_msaa) { - case MsaaSamples::k1X: - break; - case MsaaSamples::k2X: - window_width_scalar = 2; - break; - case MsaaSamples::k4X: - window_width_scalar = 2; - window_height_scalar = 2; - break; - } - draw_batcher_.set_window_scalar(window_width_scalar, window_height_scalar); - - // Whether each of the viewport settings is enabled. - // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf - uint32_t vte_control = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32; - bool vport_xscale_enable = (vte_control & (1 << 0)) > 0; - bool vport_xoffset_enable = (vte_control & (1 << 1)) > 0; - bool vport_yscale_enable = (vte_control & (1 << 2)) > 0; - bool vport_yoffset_enable = (vte_control & (1 << 3)) > 0; - bool vport_zscale_enable = (vte_control & (1 << 4)) > 0; - bool vport_zoffset_enable = (vte_control & (1 << 5)) > 0; - assert_true(vport_xscale_enable == vport_yscale_enable == - vport_zscale_enable == vport_xoffset_enable == - vport_yoffset_enable == vport_zoffset_enable); - - // Viewport scaling. Only enabled if the flags are all set. - draw_batcher_.set_viewport_offset( - vport_xoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 : 0, - vport_yoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 : 0, - vport_zoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 : 0); - draw_batcher_.set_viewport_scale( - vport_xscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32 : 1, - vport_yscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32 : 1, - vport_zscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 : 1); // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf // VTX_XY_FMT = true: the incoming X, Y have already been multiplied by 1/W0. @@ -1835,6 +1790,7 @@ CommandProcessor::UpdateStatus CommandProcessor::UpdateViewportState() { // = false: multiply the Z coordinate by 1/W0. // VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal to // get 1/W0. + uint32_t vte_control = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32; draw_batcher_.set_vtx_fmt((vte_control >> 8) & 0x1 ? 1.0f : 0.0f, (vte_control >> 9) & 0x1 ? 1.0f : 0.0f, (vte_control >> 10) & 0x1 ? 1.0f : 0.0f); @@ -1844,7 +1800,32 @@ CommandProcessor::UpdateStatus CommandProcessor::UpdateViewportState() { bool dirty = false; // dirty |= SetShadowRegister(state_regs.pa_cl_clip_cntl, // XE_GPU_REG_PA_CL_CLIP_CNTL); + dirty |= + SetShadowRegister(state_regs.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); + dirty |= + SetShadowRegister(state_regs.pa_cl_vte_cntl, XE_GPU_REG_PA_CL_VTE_CNTL); + dirty |= SetShadowRegister(state_regs.pa_sc_window_scissor_tl, + XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL); + dirty |= SetShadowRegister(state_regs.pa_sc_window_scissor_br, + XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR); + dirty |= SetShadowRegister(state_regs.pa_cl_vport_xoffset, + XE_GPU_REG_PA_CL_VPORT_XOFFSET); + dirty |= SetShadowRegister(state_regs.pa_cl_vport_yoffset, + XE_GPU_REG_PA_CL_VPORT_YOFFSET); + dirty |= SetShadowRegister(state_regs.pa_cl_vport_zoffset, + XE_GPU_REG_PA_CL_VPORT_ZOFFSET); + dirty |= SetShadowRegister(state_regs.pa_cl_vport_xscale, + XE_GPU_REG_PA_CL_VPORT_XSCALE); + dirty |= SetShadowRegister(state_regs.pa_cl_vport_yscale, + XE_GPU_REG_PA_CL_VPORT_YSCALE); + dirty |= SetShadowRegister(state_regs.pa_cl_vport_zscale, + XE_GPU_REG_PA_CL_VPORT_ZSCALE); if (!dirty) { + if ((state_regs.pa_cl_vte_cntl & (1 << 0)) > 0) { + draw_batcher_.set_window_scalar(1.0f, 1.0f); + } else { + draw_batcher_.set_window_scalar(1.0f / 2560.0f, -1.0f / 2560.0f); + } return UpdateStatus::kCompatible; } @@ -1861,6 +1842,69 @@ CommandProcessor::UpdateStatus CommandProcessor::UpdateViewportState() { // glClipControl(GL_LOWER_LEFT, GL_NEGATIVE_ONE_TO_ONE); //} + GLint ws_x = state_regs.pa_sc_window_scissor_tl & 0x7FFF; + GLint ws_y = (state_regs.pa_sc_window_scissor_tl >> 16) & 0x7FFF; + GLsizei ws_w = (state_regs.pa_sc_window_scissor_br & 0x7FFF) - ws_x; + GLsizei ws_h = ((state_regs.pa_sc_window_scissor_br >> 16) & 0x7FFF) - ws_y; + glScissorIndexed(0, ws_x, ws_y, ws_w, ws_h); + + // HACK: no clue where to get these values. + // RB_SURFACE_INFO + auto surface_msaa = + static_cast((state_regs.rb_surface_info >> 16) & 0x3); + // TODO(benvanik): ?? + float window_width_scalar = 1; + float window_height_scalar = 1; + switch (surface_msaa) { + case MsaaSamples::k1X: + break; + case MsaaSamples::k2X: + // window_width_scalar = 2; + break; + case MsaaSamples::k4X: + window_width_scalar = 2; + window_height_scalar = 2; + break; + } + + // Whether each of the viewport settings are enabled. + // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf + bool vport_xscale_enable = (state_regs.pa_cl_vte_cntl & (1 << 0)) > 0; + bool vport_xoffset_enable = (state_regs.pa_cl_vte_cntl & (1 << 1)) > 0; + bool vport_yscale_enable = (state_regs.pa_cl_vte_cntl & (1 << 2)) > 0; + bool vport_yoffset_enable = (state_regs.pa_cl_vte_cntl & (1 << 3)) > 0; + bool vport_zscale_enable = (state_regs.pa_cl_vte_cntl & (1 << 4)) > 0; + bool vport_zoffset_enable = (state_regs.pa_cl_vte_cntl & (1 << 5)) > 0; + assert_true(vport_xscale_enable == vport_yscale_enable == + vport_zscale_enable == vport_xoffset_enable == + vport_yoffset_enable == vport_zoffset_enable); + + if (vport_xscale_enable) { + float texel_offset_x = 0.0f; + float texel_offset_y = 0.0f; + float vox = vport_xoffset_enable ? state_regs.pa_cl_vport_xoffset : 0; + float voy = vport_yoffset_enable ? state_regs.pa_cl_vport_yoffset : 0; + float voz = vport_zoffset_enable ? state_regs.pa_cl_vport_zoffset : 0; + float vsx = vport_xscale_enable ? state_regs.pa_cl_vport_xscale : 1; + float vsy = vport_yscale_enable ? state_regs.pa_cl_vport_yscale : 1; + float vsz = vport_zscale_enable ? state_regs.pa_cl_vport_zscale : 1; + float vpw = 2 * window_width_scalar * vsx; + float vph = -2 * window_height_scalar * vsy; + float vpx = window_width_scalar * vox - vpw / 2; + float vpy = window_height_scalar * voy - vph / 2; + glViewportIndexedf(0, vpx + texel_offset_x, vpy + texel_offset_y, vpw, vph); + draw_batcher_.set_window_scalar(1.0f, 1.0f); + } else { + float texel_offset_x = 0.0f; + float texel_offset_y = 0.0f; + float vpw = 2 * 2560.0f * window_width_scalar; + float vph = 2 * 2560.0f * window_height_scalar; + float vpx = -2560.0f * window_width_scalar; + float vpy = -2560.0f * window_height_scalar; + glViewportIndexedf(0, vpx + texel_offset_x, vpy + texel_offset_y, vpw, vph); + draw_batcher_.set_window_scalar(1.0f / 2560.0f, -1.0f / 2560.0f); + } + return UpdateStatus::kMismatch; } @@ -1883,9 +1927,11 @@ CommandProcessor::UpdateStatus CommandProcessor::UpdateRasterizerState() { draw_batcher_.Flush(DrawBatcher::FlushMode::kStateChange); // Scissoring. + // TODO(benvanik): is this used? we are using scissoring for window scissor. if (regs.pa_sc_screen_scissor_tl != 0 && regs.pa_sc_screen_scissor_br != 0x20002000) { - glEnable(GL_SCISSOR_TEST); + assert_always(); + // glEnable(GL_SCISSOR_TEST); // TODO(benvanik): signed? int32_t screen_scissor_x = regs.pa_sc_screen_scissor_tl & 0x7FFF; int32_t screen_scissor_y = (regs.pa_sc_screen_scissor_tl >> 16) & 0x7FFF; @@ -1896,7 +1942,7 @@ CommandProcessor::UpdateStatus CommandProcessor::UpdateRasterizerState() { glScissor(screen_scissor_x, screen_scissor_y, screen_scissor_w, screen_scissor_h); } else { - glDisable(GL_SCISSOR_TEST); + // glDisable(GL_SCISSOR_TEST); } switch (regs.pa_su_sc_mode_cntl & 0x3) { @@ -1912,7 +1958,6 @@ CommandProcessor::UpdateStatus CommandProcessor::UpdateRasterizerState() { glCullFace(GL_BACK); break; } - if (regs.pa_su_sc_mode_cntl & 0x4) { glFrontFace(GL_CW); } else { @@ -2486,7 +2531,7 @@ bool CommandProcessor::IssueCopy() { // but I can't seem to find something similar. // Maybe scissor rect/window offset? uint32_t x = 0; - uint32_t y = 2560 - copy_dest_height; + uint32_t y = 0; uint32_t w = copy_dest_pitch; uint32_t h = copy_dest_height; diff --git a/src/xenia/gpu/gl4/command_processor.h b/src/xenia/gpu/gl4/command_processor.h index 0b0e54f08..cdc57ff1c 100644 --- a/src/xenia/gpu/gl4/command_processor.h +++ b/src/xenia/gpu/gl4/command_processor.h @@ -298,7 +298,17 @@ class CommandProcessor { void Reset() { std::memset(this, 0, sizeof(*this)); } } update_render_targets_regs_; struct UpdateViewportStateRegisters { - uint32_t pa_cl_clip_cntl; + // uint32_t pa_cl_clip_cntl; + uint32_t rb_surface_info; + uint32_t pa_cl_vte_cntl; + uint32_t pa_sc_window_scissor_tl; + uint32_t pa_sc_window_scissor_br; + float pa_cl_vport_xoffset; + float pa_cl_vport_yoffset; + float pa_cl_vport_zoffset; + float pa_cl_vport_xscale; + float pa_cl_vport_yscale; + float pa_cl_vport_zscale; UpdateViewportStateRegisters() { Reset(); } void Reset() { std::memset(this, 0, sizeof(*this)); } diff --git a/src/xenia/gpu/gl4/draw_batcher.h b/src/xenia/gpu/gl4/draw_batcher.h index b1c52dbf5..4b2fb960b 100644 --- a/src/xenia/gpu/gl4/draw_batcher.h +++ b/src/xenia/gpu/gl4/draw_batcher.h @@ -81,27 +81,10 @@ class DrawBatcher { active_draw_.header->window_offset.x = float(x); active_draw_.header->window_offset.y = float(y); } - void set_window_scissor(uint32_t left, uint32_t top, uint32_t right, - uint32_t bottom) { - active_draw_.header->window_scissor.x = float(left); - active_draw_.header->window_scissor.y = float(top); - active_draw_.header->window_scissor.z = float(right); - active_draw_.header->window_scissor.w = float(bottom); - } void set_window_scalar(float width_scalar, float height_scalar) { active_draw_.header->window_offset.z = width_scalar; active_draw_.header->window_offset.w = height_scalar; } - void set_viewport_offset(float offset_x, float offset_y, float offset_z) { - active_draw_.header->viewport_offset.x = offset_x; - active_draw_.header->viewport_offset.y = offset_y; - active_draw_.header->viewport_offset.z = offset_z; - } - void set_viewport_scale(float scale_x, float scale_y, float scale_z) { - active_draw_.header->viewport_scale.x = scale_x; - active_draw_.header->viewport_scale.y = scale_y; - active_draw_.header->viewport_scale.z = scale_z; - } void set_vtx_fmt(float xy, float z, float w) { active_draw_.header->vtx_fmt.x = xy; active_draw_.header->vtx_fmt.y = xy; @@ -193,9 +176,6 @@ class DrawBatcher { // This must match GL4Shader's header. struct CommonHeader { float4 window_offset; // tx,ty,sx,sy - float4 window_scissor; // x0,y0,x1,y1 - float4 viewport_offset; // tx,ty,tz,? - float4 viewport_scale; // sx,sy,sz,? float4 vtx_fmt; // float4 alpha_test; // alpha test enable, func, ref, ? diff --git a/src/xenia/gpu/gl4/gl4_shader.cc b/src/xenia/gpu/gl4/gl4_shader.cc index 14ff61438..7f78dcab3 100644 --- a/src/xenia/gpu/gl4/gl4_shader.cc +++ b/src/xenia/gpu/gl4/gl4_shader.cc @@ -54,9 +54,6 @@ std::string GL4Shader::GetHeader() { // This must match DrawBatcher::CommonHeader. "struct StateData {\n" " vec4 window_offset;\n" - " vec4 window_scissor;\n" - " vec4 viewport_offset;\n" - " vec4 viewport_scale;\n" " vec4 vtx_fmt;\n" " vec4 alpha_test;\n" // TODO(benvanik): variable length. @@ -185,40 +182,21 @@ bool GL4Shader::PrepareVertexShader( } std::string apply_transform = - "vec4 applyTransform(const in StateData state, vec4 Pclip) {\n" - " // Clip->NDC with perspective divide.\n" - " // We do this here because it's programmable on the 360.\n" - " if (state.vtx_fmt.w != 0.0) {\n" - " // w is not 1/W0. Common case.\n" - " Pclip.w = 1.0 / Pclip.w;\n" + "vec4 applyTransform(const in StateData state, vec4 pos) {\n" + " if (state.vtx_fmt.w == 0.0) {\n" + " // w is 1/W0, so fix it.\n" + " pos.w = 1.0 / pos.w;\n" " }\n" - " vec3 Pndc = Pclip.xyz;\n" - " if (state.vtx_fmt.x == 0.0) {\n" - " // Need to multiply by 1/W0.\n" - " Pndc.xy *= Pclip.w;\n" + " if (state.vtx_fmt.x != 0.0) {\n" + " // Already multiplied by 1/W0, so pull it out.\n" + " pos.xy /= pos.w;\n" " }\n" - " if (state.vtx_fmt.z == 0.0) {\n" - " // Need to multiply by 1/W0.\n" - " Pndc.z *= Pclip.w;\n" + " if (state.vtx_fmt.z != 0.0) {\n" + " // Already multiplied by 1/W0, so pull it out.\n" + " pos.z /= pos.w;\n" " }\n" - " // Perform clipping, lest we get weird geometry.\n" - // TODO(benvanik): is this right? dxclip mode may change this? - " Pclip.w = 1.0;\n" - " if (Pndc.z < gl_DepthRange.near || Pndc.z > gl_DepthRange.far) {\n" - " // Clipped! w=0 will kill it in the hardware persp divide.\n" - " Pclip.w = 0.0;\n" - " }\n" - " vec3 Pwnd = Pndc.xyz * state.viewport_scale.xyz + \n" - " state.viewport_offset.xyz;\n" - " // 1px padding required for pixel offset issue.\n" - " Pwnd.xy += 1.0;\n" - " vec3 Pwnd2 = vec3(Pwnd.xy * state.window_offset.zw + \n" - " state.window_offset.xy, Pwnd.z);\n" - " Pwnd2.y = 2560.0 - Pwnd2.y;\n" - " vec3 fb_offset = vec3(2560.0 / 2.0, 2560.0 / 2.0, 0.0);\n" - " vec3 fb_scale = vec3(2560.0 / 2.0, 2560.0 / 2.0, 1.0);\n" - " vec3 Pndc2 = (Pwnd2.xyz - fb_offset.xyz) / fb_scale.xyz;\n" - " return vec4(Pndc2.xy, Pndc2.z, Pclip.w);\n" + " pos.xy *= state.window_offset.zw;\n" + " return pos;\n" "}\n"; std::string source = GetHeader() + apply_transform + @@ -275,13 +253,6 @@ bool GL4Shader::PreparePixelShader( "void processFragment(const in StateData state);\n" "void main() {\n" + " const StateData state = states[draw_id];\n" - " // Custom scissoring. Doing it here avoids the need for glScissor.\n" - " if (gl_FragCoord.x < state.window_scissor.x ||\n" - " gl_FragCoord.x > state.window_scissor.z ||\n" - " gl_FragCoord.y < state.window_scissor.y ||\n" - " gl_FragCoord.y > state.window_scissor.w) {\n" - " discard;\n" - " }\n" " processFragment(state);\n" "}\n"; diff --git a/src/xenia/gpu/gl4/gl4_shader_translator.cc b/src/xenia/gpu/gl4/gl4_shader_translator.cc index 712d6d094..21ab96e8b 100644 --- a/src/xenia/gpu/gl4/gl4_shader_translator.cc +++ b/src/xenia/gpu/gl4/gl4_shader_translator.cc @@ -855,6 +855,26 @@ bool GL4ShaderTranslator::TranslateALU_LOG_IEEE(const ucode::instr_alu_t& alu) { return true; } +bool GL4ShaderTranslator::TranslateALU_RECIP_CLAMP(const instr_alu_t& alu) { + // if result == -inf result = -flt_max + // if result == +inf result = flt_max + BeginAppendScalarOp(alu); + Append("1.0 / "); + AppendScalarOpSrcReg(alu, 3); + EndAppendScalarOp(alu); + return true; +} + +bool GL4ShaderTranslator::TranslateALU_RECIP_FF(const instr_alu_t& alu) { + // if result == -inf result = -zero + // if result == +inf result = zero + BeginAppendScalarOp(alu); + Append("1.0 / "); + AppendScalarOpSrcReg(alu, 3); + EndAppendScalarOp(alu); + return true; +} + bool GL4ShaderTranslator::TranslateALU_RECIP_IEEE(const instr_alu_t& alu) { BeginAppendScalarOp(alu); Append("1.0 / "); @@ -863,10 +883,34 @@ bool GL4ShaderTranslator::TranslateALU_RECIP_IEEE(const instr_alu_t& alu) { return true; } +bool GL4ShaderTranslator::TranslateALU_RECIPSQ_CLAMP( + const ucode::instr_alu_t& alu) { + // if result == -inf result = -flt_max + // if result == +inf result = flt_max + BeginAppendScalarOp(alu); + Append("inversesqrt("); + AppendScalarOpSrcReg(alu, 3); + Append(".x)"); + EndAppendScalarOp(alu); + return true; +} + +bool GL4ShaderTranslator::TranslateALU_RECIPSQ_FF( + const ucode::instr_alu_t& alu) { + // if result == -inf result = -zero + // if result == +inf result = zero + BeginAppendScalarOp(alu); + Append("inversesqrt("); + AppendScalarOpSrcReg(alu, 3); + Append(".x)"); + EndAppendScalarOp(alu); + return true; +} + bool GL4ShaderTranslator::TranslateALU_RECIPSQ_IEEE( const ucode::instr_alu_t& alu) { BeginAppendScalarOp(alu); - Append("1.0 / sqrt("); + Append("inversesqrt("); AppendScalarOpSrcReg(alu, 3); Append(".x)"); EndAppendScalarOp(alu); @@ -1097,11 +1141,11 @@ bool GL4ShaderTranslator::TranslateALU(const instr_alu_t* alu, int sync) { ALU_INSTR_IMPL(EXP_IEEE, 1), // 14 ALU_INSTR(LOG_CLAMP, 1), // 15 ALU_INSTR_IMPL(LOG_IEEE, 1), // 16 - ALU_INSTR(RECIP_CLAMP, 1), // 17 - ALU_INSTR(RECIP_FF, 1), // 18 + ALU_INSTR_IMPL(RECIP_CLAMP, 1), // 17 + ALU_INSTR_IMPL(RECIP_FF, 1), // 18 ALU_INSTR_IMPL(RECIP_IEEE, 1), // 19 - ALU_INSTR(RECIPSQ_CLAMP, 1), // 20 - ALU_INSTR(RECIPSQ_FF, 1), // 21 + ALU_INSTR_IMPL(RECIPSQ_CLAMP, 1), // 20 + ALU_INSTR_IMPL(RECIPSQ_FF, 1), // 21 ALU_INSTR_IMPL(RECIPSQ_IEEE, 1), // 22 ALU_INSTR(MOVAs, 1), // 23 ALU_INSTR(MOVA_FLOORs, 1), // 24 diff --git a/src/xenia/gpu/gl4/gl4_shader_translator.h b/src/xenia/gpu/gl4/gl4_shader_translator.h index 99819a132..224aacf6f 100644 --- a/src/xenia/gpu/gl4/gl4_shader_translator.h +++ b/src/xenia/gpu/gl4/gl4_shader_translator.h @@ -110,7 +110,11 @@ class GL4ShaderTranslator { bool TranslateALU_FLOORs(const ucode::instr_alu_t& alu); bool TranslateALU_EXP_IEEE(const ucode::instr_alu_t& alu); bool TranslateALU_LOG_IEEE(const ucode::instr_alu_t& alu); + bool TranslateALU_RECIP_CLAMP(const ucode::instr_alu_t& alu); + bool TranslateALU_RECIP_FF(const ucode::instr_alu_t& alu); bool TranslateALU_RECIP_IEEE(const ucode::instr_alu_t& alu); + bool TranslateALU_RECIPSQ_CLAMP(const ucode::instr_alu_t& alu); + bool TranslateALU_RECIPSQ_FF(const ucode::instr_alu_t& alu); bool TranslateALU_RECIPSQ_IEEE(const ucode::instr_alu_t& alu); // ... bool TranslateALU_SUBs(const ucode::instr_alu_t& alu);