diff --git a/src/xenia/gpu/gl4/command_processor.cc b/src/xenia/gpu/gl4/command_processor.cc index 5a4fc7f68..c1446bdbb 100644 --- a/src/xenia/gpu/gl4/command_processor.cc +++ b/src/xenia/gpu/gl4/command_processor.cc @@ -1521,12 +1521,18 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) { auto state_data = draw_command->state_data; + uint32_t mode_control = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32; + // Window parameters. // See r200UpdateWindow: // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c - uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32; - state_data->window_offset.x = float(window_offset & 0x7FFF); - state_data->window_offset.y = float((window_offset >> 16) & 0x7FFF); + if ((mode_control >> 17) & 1) { + uint32_t window_offset = regs[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32; + state_data->window_offset.x = float(window_offset & 0x7FFF); + state_data->window_offset.y = float((window_offset >> 16) & 0x7FFF); + } else { + state_data->window_offset.x = state_data->window_offset.y = 0; + } uint32_t window_scissor_tl = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32; uint32_t window_scissor_br = regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32; state_data->window_scissor.x = float(window_scissor_tl & 0x7FFF); @@ -1540,19 +1546,21 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) { uint32_t surface_pitch = surface_info & 0x3FFF; auto surface_msaa = static_cast((surface_info >> 16) & 0x3); // TODO(benvanik): ?? - float viewport_width_scalar = 1; - float viewport_height_scalar = 1; + float window_width_scalar = 1; + float window_height_scalar = 1; switch (surface_msaa) { case MsaaSamples::k1X: break; case MsaaSamples::k2X: - viewport_width_scalar /= 2; + window_width_scalar = 2; break; case MsaaSamples::k4X: - viewport_width_scalar /= 2; - viewport_height_scalar /= 2; + window_width_scalar = 2; + window_height_scalar = 2; break; } + state_data->window_offset.z = window_width_scalar; + state_data->window_offset.w = window_height_scalar; glViewport(0, 0, 1280, 720); // Whether each of the viewport settings is enabled. @@ -1584,27 +1592,26 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) { vport_zscale_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 : 1; // 1 state_data->viewport_offset.z = vport_zoffset_enable ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 : 0; // 0 + + // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf // VTX_XY_FMT = true: the incoming X, Y have already been multiplied by 1/W0. // = false: multiply the X, Y coordinates by 1/W0. - bool vtx_xy_fmt = (vte_control >> 8) & 0x1; + state_data->vtx_fmt.x = state_data->vtx_fmt.y = + (vte_control >> 8) & 0x1 ? 1.0f : 0.0f; // VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0. // = false: multiply the Z coordinate by 1/W0. - bool vtx_z_fmt = (vte_control >> 9) & 0x1; + state_data->vtx_fmt.z = (vte_control >> 9) & 0x1 ? 1.0f : 0.0f; // VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal to // get 1/W0. - bool vtx_w0_fmt = (vte_control >> 10) & 0x1; - // TODO(benvanik): pass to shaders? disable transform? etc? - if (vtx_xy_fmt) { - state_data->pretransform.x = 0; - state_data->pretransform.y = 0; - state_data->pretransform.z = viewport_width_scalar; - state_data->pretransform.w = viewport_height_scalar; - } else { - state_data->pretransform.x = -1.0; - state_data->pretransform.y = 1.0; - state_data->pretransform.z = 1280.0f / 2.0f * viewport_width_scalar; - state_data->pretransform.w = -720.0f / 2.0f * viewport_height_scalar; - } + state_data->vtx_fmt.w = (vte_control >> 10) & 0x1 ? 1.0f : 0.0f; + + // Clipping. + // https://github.com/freedreno/amd-gpu/blob/master/include/reg/yamato/14/yamato_genenum.h#L1587 + uint32_t clip_control = regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32; + bool clip_enabled = ((clip_control >> 17) & 0x1) == 0; + //assert_true(clip_enabled); + bool dx_clip = ((clip_control >> 20) & 0x1) == 0x1; + //assert_true(dx_clip); // Scissoring. int32_t screen_scissor_tl = regs[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL].u32; @@ -1624,7 +1631,6 @@ bool CommandProcessor::UpdateState(DrawCommand* draw_command) { } // Rasterizer state. - uint32_t mode_control = regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32; if (draw_command->prim_type == PrimitiveType::kRectangleList) { // Rect lists aren't culled. There may be other things they skip too. glDisable(GL_CULL_FACE); diff --git a/src/xenia/gpu/gl4/command_processor.h b/src/xenia/gpu/gl4/command_processor.h index f404365f0..0d5a39c64 100644 --- a/src/xenia/gpu/gl4/command_processor.h +++ b/src/xenia/gpu/gl4/command_processor.h @@ -50,9 +50,9 @@ struct UniformDataBlock { }; }; - float4 pretransform; - float4 window_offset; // tx,ty,rt_w,rt_h + float4 window_offset; // tx,ty,sx,sy float4 window_scissor; // x0,y0,x1,y1 + float4 vtx_fmt; float4 viewport_offset; // tx,ty,tz,? float4 viewport_scale; // sx,sy,sz,? // TODO(benvanik): vertex format xyzw? diff --git a/src/xenia/gpu/gl4/gl4_shader.cc b/src/xenia/gpu/gl4/gl4_shader.cc index 57ba19db6..90c6178eb 100644 --- a/src/xenia/gpu/gl4/gl4_shader.cc +++ b/src/xenia/gpu/gl4/gl4_shader.cc @@ -40,9 +40,9 @@ const std::string header = "layout(std140, column_major) uniform;\n" "layout(std430, column_major) buffer;\n" "struct StateData {\n" - " vec4 pretransform;\n" " vec4 window_offset;\n" " vec4 window_scissor;\n" + " vec4 vtx_fmt;\n" " vec4 viewport_offset;\n" " vec4 viewport_scale;\n" " vec4 alpha_test;\n" @@ -67,20 +67,45 @@ bool GL4Shader::PrepareVertexShader( } has_prepared_ = true; - std::string apply_viewport = - "vec4 applyViewport(vec4 pos) {\n" - " pos.xy = pos.xy / state.pretransform.zw + state.pretransform.xy;\n" + std::string apply_transform = + "vec4 applyTransform(vec4 pos) {\n" + " // Clip->NDC with perspective divide.\n" + " // We do this here because it's programmable on the 360.\n" + " float w = pos.w;\n" + " if (state.vtx_fmt.w == 0.0) {\n" + " // w is not 1/W0. Common case.\n" + " w = 1.0 / w;\n" + " }\n" + " if (state.vtx_fmt.x == 0.0) {\n" + " // Need to multiply by 1/W0.\n" + " pos.xy /= w;\n" + " }\n" + " if (state.vtx_fmt.z == 0.0) {\n" + " // Need to multiply by 1/W0.\n" + " pos.z /= w;\n" + " }\n" + " pos.w = 1.0;\n" + " // Perform clipping, lest we get weird geometry.\n" + // TODO(benvanik): is this right? dxclip mode may change this? + " if (pos.z < gl_DepthRange.near || pos.z > gl_DepthRange.far) {\n" + " // Clipped! w=0 will kill it in the hardware persp divide.\n" + " pos.w = 0.0;\n" + " }\n" + " // NDC transform.\n" " pos.x = pos.x * state.viewport_scale.x + \n" " state.viewport_offset.x;\n" " pos.y = pos.y * state.viewport_scale.y + \n" " state.viewport_offset.y;\n" " pos.z = pos.z * state.viewport_scale.z + \n" " state.viewport_offset.z;\n" - " pos.xy += state.window_offset.xy;\n" + " // NDC->Window with viewport.\n" + " pos.xy = pos.xy * state.window_offset.zw + state.window_offset.xy;\n" + " pos.xy = pos.xy / (vec2(1280.0 - 1.0, -720.0 + 1.0) / 2.0) + vec2(-1.0, 1.0);\n" + " // Window adjustment.\n" " return pos;\n" "}\n"; std::string source = - header + apply_viewport + + header + apply_transform + "out gl_PerVertex {\n" " vec4 gl_Position;\n" " float gl_PointSize;\n" @@ -96,7 +121,7 @@ bool GL4Shader::PrepareVertexShader( " vtx.o[i] = vec4(0.0, 0.0, 0.0, 0.0);\n" " }\n" " processVertex();\n" - " gl_Position = applyViewport(gl_Position);\n" + " gl_Position = applyTransform(gl_Position);\n" "}\n"; std::string translated_source =