diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 3dcdf4da9..c24d263a6 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -702,6 +702,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; break; case PrimitiveType::kTriangleList: + case PrimitiveType::kRectangleList: primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; case PrimitiveType::kTriangleStrip: @@ -926,21 +927,23 @@ void D3D12CommandProcessor::UpdateFixedFunctionState( // the NDC in PC APIs, we use a viewport of the largest possible size, and // divide the position by it in translated shaders. uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32; - float viewport_scale_x = (pa_cl_vte_cntl & (1 << 0)) - ? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32 - : 1280.0f; - float viewport_scale_y = (pa_cl_vte_cntl & (1 << 2)) - ? -regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32 - : -1280.0f; + float viewport_scale_x = + (pa_cl_vte_cntl & (1 << 0)) + ? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32) + : 1280.0f; + float viewport_scale_y = + (pa_cl_vte_cntl & (1 << 2)) + ? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32) + : 1280.0f; float viewport_scale_z = (pa_cl_vte_cntl & (1 << 4)) ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 : 1.0f; float viewport_offset_x = (pa_cl_vte_cntl & (1 << 1)) ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 - : viewport_scale_x; + : std::abs(viewport_scale_x); float viewport_offset_y = (pa_cl_vte_cntl & (1 << 3)) ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 - : viewport_scale_y; + : std::abs(viewport_scale_y); float viewport_offset_z = (pa_cl_vte_cntl & (1 << 5)) ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 : 0.0f; @@ -1077,26 +1080,35 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( // Also apply half-pixel offset to reproduce Direct3D 9 rasterization rules. // TODO(Triang3l): Check if pixel coordinates need to be offset depending on a // different register (and if there's such register at all). + float viewport_scale_x = regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32; + float viewport_scale_y = regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32; bool gl_clip_space_def = !(pa_cl_clip_cntl & (1 << 19)) && (pa_cl_vte_cntl & (1 << 4)); - float ndc_scale_x = (pa_cl_vte_cntl & (1 << 0)) ? 1.0f : 1.0f / 1280.0f; - float ndc_scale_y = (pa_cl_vte_cntl & (1 << 2)) ? 1.0f : 1.0f / 1280.0f; + float ndc_scale_x, ndc_scale_y; + if (pa_cl_vte_cntl & (1 << 0)) { + ndc_scale_x = viewport_scale_x >= 0.0f ? 1.0f : -1.0f; + } else { + ndc_scale_x = 1.0f / 1280.0f; + } + if (pa_cl_vte_cntl & (1 << 2)) { + ndc_scale_y = viewport_scale_y >= 0.0f ? -1.0f : 1.0f; + } else { + ndc_scale_y = -1.0f / 1280.0f; + } float ndc_scale_z = gl_clip_space_def ? 0.5f : 1.0f; float ndc_offset_x = (pa_cl_vte_cntl & (1 << 1)) ? 0.0f : -1.0f; - float ndc_offset_y = (pa_cl_vte_cntl & (1 << 3)) ? 0.0f : -1.0f; + float ndc_offset_y = (pa_cl_vte_cntl & (1 << 3)) ? 0.0f : 1.0f; float ndc_offset_z = gl_clip_space_def ? 0.5f : 0.0f; float pixel_half_pixel_offset = 0.0f; if (!(pa_su_vtx_cntl & (1 << 0))) { if (pa_cl_vte_cntl & (1 << 0)) { - float viewport_scale_x = regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32; if (viewport_scale_x != 0.0f) { - ndc_offset_x -= 0.5f / viewport_scale_x; + ndc_offset_x += 0.5f / viewport_scale_x; } } else { - ndc_offset_x -= 1.0f / 2560.0f; + ndc_offset_x += 1.0f / 2560.0f; } if (pa_cl_vte_cntl & (1 << 2)) { - float viewport_scale_y = regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32; if (viewport_scale_y != 0.0f) { ndc_offset_y -= 0.5f / viewport_scale_y; } @@ -1143,7 +1155,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( render_targets[i].guest_render_target; } - cbuffer_bindings_system_.up_to_date &= dirty; + cbuffer_bindings_system_.up_to_date &= !dirty; } bool D3D12CommandProcessor::UpdateBindings( diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index dd83e80a5..f10ae3d0e 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -26,6 +26,9 @@ namespace xe { namespace gpu { namespace d3d12 { +// Generated with `xb buildhlsl`. +#include "xenia/gpu/d3d12/shaders/bin/primitive_rectangle_list_gs.h" + PipelineCache::PipelineCache(D3D12CommandProcessor* command_processor, RegisterFile* register_file) : command_processor_(command_processor), register_file_(register_file) { @@ -254,6 +257,13 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages( primitive_type == PrimitiveType::kLineLoop || primitive_type == PrimitiveType::k2DLineStrip; dirty |= regs.primitive_topology_is_line != primitive_topology_is_line; + if (primitive_type == PrimitiveType::kRectangleList) { + dirty |= regs.geometry_shader_primitive_type != primitive_type; + regs.geometry_shader_primitive_type = primitive_type; + } else { + dirty |= regs.geometry_shader_primitive_type != PrimitiveType::kNone; + regs.geometry_shader_primitive_type = PrimitiveType::kNone; + } XXH64_update(&hash_state_, ®s, sizeof(regs)); if (!dirty) { return UpdateStatus::kCompatible; @@ -294,9 +304,16 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages( update_desc_.PS.pShaderBytecode = nullptr; update_desc_.PS.BytecodeLength = 0; } - // TODO(Triang3l): Geometry shaders. - update_desc_.GS.pShaderBytecode = nullptr; - update_desc_.GS.BytecodeLength = 0; + switch (primitive_type) { + case PrimitiveType::kRectangleList: + update_desc_.GS.pShaderBytecode = primitive_rectangle_list_gs; + update_desc_.GS.BytecodeLength = sizeof(primitive_rectangle_list_gs); + break; + default: + // TODO(Triang3l): More geometry shaders for various primitive types. + update_desc_.GS.pShaderBytecode = nullptr; + update_desc_.GS.BytecodeLength = 0; + } update_desc_.PrimitiveTopologyType = primitive_topology_is_line ? D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE : D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; diff --git a/src/xenia/gpu/d3d12/pipeline_cache.h b/src/xenia/gpu/d3d12/pipeline_cache.h index 8cd5a7877..fba3a2b4d 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.h +++ b/src/xenia/gpu/d3d12/pipeline_cache.h @@ -113,6 +113,8 @@ class PipelineCache { D3D12Shader* pixel_shader; uint32_t sq_program_cntl; bool primitive_topology_is_line; + // Primitive type if it needs a geometry shader, or kNone. + PrimitiveType geometry_shader_primitive_type; UpdateShaderStagesRegisters() { Reset(); } void Reset() { std::memset(this, 0, sizeof(*this)); } diff --git a/src/xenia/gpu/d3d12/render_target_cache.cc b/src/xenia/gpu/d3d12/render_target_cache.cc index 075a95ae4..f60a398a8 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.cc +++ b/src/xenia/gpu/d3d12/render_target_cache.cc @@ -401,7 +401,7 @@ bool RenderTargetCache::UpdateRenderTargets() { : 1280.0f; float viewport_offset_y = (pa_cl_vte_cntl & (1 << 3)) ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 - : viewport_scale_y; + : std::abs(viewport_scale_y); if (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 16)) { viewport_offset_y += float(window_offset_y); } diff --git a/src/xenia/gpu/d3d12/shaders/primitive_rectangle_list.gs.hlsl b/src/xenia/gpu/d3d12/shaders/primitive_rectangle_list.gs.hlsl new file mode 100644 index 000000000..877c08093 --- /dev/null +++ b/src/xenia/gpu/d3d12/shaders/primitive_rectangle_list.gs.hlsl @@ -0,0 +1,85 @@ +struct XeVertex { + float4 position : SV_Position; + float4 interpolators[16] : TEXCOORD; + float point_size : PSIZE; +}; + +[maxvertexcount(6)] +void main(triangle XeVertex xe_in[3], inout TriangleStream xe_stream) { + XeVertex xe_out; + + xe_out.position = xe_in[0].position; + xe_out.interpolators = xe_in[0].interpolators; + xe_out.point_size = xe_in[0].point_size; + xe_stream.Append(xe_out); + xe_out.position = xe_in[1].position; + xe_out.interpolators = xe_in[1].interpolators; + xe_out.point_size = xe_in[1].point_size; + xe_stream.Append(xe_out); + xe_out.position = xe_in[2].position; + xe_out.interpolators = xe_in[2].interpolators; + xe_out.point_size = xe_in[2].point_size; + xe_stream.Append(xe_out); + xe_stream.RestartStrip(); + + // Most games use a left-aligned form. + [branch] if (all(xe_in[0].position.xy == + float2(xe_in[2].position.x, xe_in[1].position.y)) || + all(xe_in[0].position.xy == + float2(xe_in[1].position.x, xe_in[2].position.y))) { + // 0 ------ 1 0: -1,-1 + // | - | 1: 1,-1 + // | // | 2: -1, 1 + // | - | 3: [ 1, 1 ] + // 2 ----- [3] + // + // 0 ------ 2 0: -1,-1 + // | - | 1: -1, 1 + // | // | 2: 1,-1 + // | - | 3: [ 1, 1 ] + // 1 ------[3] + xe_out.position = xe_in[2].position; + xe_out.interpolators = xe_in[2].interpolators; + xe_out.point_size = xe_in[2].point_size; + xe_stream.Append(xe_out); + xe_out.position = xe_in[1].position; + xe_out.interpolators = xe_in[1].interpolators; + xe_out.point_size = xe_in[1].point_size; + xe_stream.Append(xe_out); + xe_out.position = float4(xe_in[1].position.xy - + xe_in[0].position.xy + + xe_in[2].position.xy, + xe_in[2].position.zw); + [unroll] for (int i = 0; i < 16; ++i) { + xe_out.interpolators[i] = xe_in[1].interpolators[i] - + xe_in[0].interpolators[i] + + xe_in[2].interpolators[i]; + } + } else { + // 0 ------ 1 0: -1,-1 + // | - | 1: 1,-1 + // | \\ | 2: 1, 1 + // | - | 3: [-1, 1 ] + // [3] ----- 2 + xe_out.position = xe_in[0].position; + xe_out.interpolators = xe_in[0].interpolators; + xe_out.point_size = xe_in[0].point_size; + xe_stream.Append(xe_out); + xe_out.position = xe_in[2].position; + xe_out.interpolators = xe_in[2].interpolators; + xe_out.point_size = xe_in[2].point_size; + xe_stream.Append(xe_out); + xe_out.position = float4(xe_in[0].position.xy - + xe_in[1].position.xy + + xe_in[2].position.xy, + xe_in[2].position.zw); + [unroll] for (int i = 0; i < 16; ++i) { + xe_out.interpolators[i] = xe_in[0].interpolators[i] - + xe_in[1].interpolators[i] + + xe_in[2].interpolators[i]; + } + } + xe_out.point_size = xe_in[2].point_size; + xe_stream.Append(xe_out); + xe_stream.RestartStrip(); +}