From 6d48b856b9469990c8978a16cd54f4fd15e642f3 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Mon, 27 Aug 2018 12:35:44 +0300 Subject: [PATCH] [D3D12] Point sprites and color exponent bias --- .../gpu/d3d12/d3d12_command_processor.cc | 47 ++++++++++++++++--- src/xenia/gpu/d3d12/pipeline_cache.cc | 8 +++- .../shaders/primitive_point_list.gs.hlsl | 29 ++++++++++++ .../d3d12/shaders/primitive_quad_list.gs.hlsl | 9 +--- .../shaders/primitive_rectangle_list.gs.hlsl | 12 +++-- src/xenia/gpu/d3d12/shaders/xenos_draw.hlsli | 34 ++++++++++++++ src/xenia/gpu/hlsl_shader_translator.cc | 45 ++++++++++++------ src/xenia/gpu/hlsl_shader_translator.h | 16 +++++-- 8 files changed, 162 insertions(+), 38 deletions(-) create mode 100644 src/xenia/gpu/d3d12/shaders/primitive_point_list.gs.hlsl create mode 100644 src/xenia/gpu/d3d12/shaders/xenos_draw.hlsli diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index aa493f2df..741e7b6b8 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -940,6 +940,9 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, // Set the primitive topology. D3D_PRIMITIVE_TOPOLOGY primitive_topology; switch (primitive_type) { + case PrimitiveType::kPointList: + primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; + break; case PrimitiveType::kLineList: primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; break; @@ -1308,6 +1311,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32; uint32_t pa_cl_clip_cntl = regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32; uint32_t pa_su_vtx_cntl = regs[XE_GPU_REG_PA_SU_VTX_CNTL].u32; + uint32_t pa_su_point_size = regs[XE_GPU_REG_PA_SU_POINT_SIZE].u32; uint32_t sq_program_cntl = regs[XE_GPU_REG_SQ_PROGRAM_CNTL].u32; uint32_t sq_context_misc = regs[XE_GPU_REG_SQ_CONTEXT_MISC].u32; uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32; @@ -1405,6 +1409,15 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( system_constants_.ndc_offset[2] = ndc_offset_z; system_constants_.pixel_half_pixel_offset = pixel_half_pixel_offset; + // Point size. + float point_size[2]; + point_size[0] = float(pa_su_point_size >> 16) * 0.125f; + point_size[1] = float(pa_su_point_size & 0xFFFF) * 0.125f; + dirty |= system_constants_.point_size[0] != point_size[0]; + dirty |= system_constants_.point_size[1] != point_size[1]; + system_constants_.point_size[0] = point_size[0]; + system_constants_.point_size[1] = point_size[1]; + // Pixel position register. uint32_t pixel_pos_reg = (sq_program_cntl & (1 << 18)) ? (sq_context_misc >> 8) & 0xFF : UINT_MAX; @@ -1421,9 +1434,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( system_constants_.ssaa_inv_scale[1] = ssaa_inv_scale_y; // Alpha test. - uint32_t alpha_test_enabled = (rb_colorcontrol & 0x8) ? 1 : 0; - dirty |= system_constants_.alpha_test_enabled != alpha_test_enabled; - system_constants_.alpha_test_enabled = alpha_test_enabled; + int32_t alpha_test = 0; if (rb_colorcontrol & 0x8) { uint32_t alpha_test_function = rb_colorcontrol & 0x7; // 0: Never - fail in [-inf, +inf]. @@ -1434,6 +1445,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( // 5: Not equal - fail in [ref, ref]. // 6: Greater or equal - pass in [ref, +inf]. // 7: Always - pass in [-inf, +inf]. + int32_t alpha_test = (alpha_test_function & 0x2) ? 1 : -1; uint32_t alpha_test_range_start = (alpha_test_function == 1 || alpha_test_function == 2 || alpha_test_function == 5 || alpha_test_function == 6) @@ -1444,17 +1456,38 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( alpha_test_function == 4 || alpha_test_function == 5) ? rb_alpha_ref : 0x7F800000u; - uint32_t alpha_test_range_pass = (alpha_test_function & 0x2) ? 1 : 0; dirty |= system_constants_.alpha_test_range[0] != alpha_test_range_start; dirty |= system_constants_.alpha_test_range[1] != alpha_test_range_end; - dirty |= system_constants_.alpha_test_range_pass != alpha_test_range_pass; system_constants_.alpha_test_range[0] = alpha_test_range_start; system_constants_.alpha_test_range[1] = alpha_test_range_end; - system_constants_.alpha_test_range_pass = alpha_test_range_pass; + } else { + alpha_test = 0; } + dirty |= system_constants_.alpha_test != alpha_test; + system_constants_.alpha_test = alpha_test; - // Color output index mapping. + // Color exponent bias and output index mapping. for (uint32_t i = 0; i < 4; ++i) { + uint32_t color_info; + switch (i) { + case 1: + color_info = regs[XE_GPU_REG_RB_COLOR1_INFO].u32; + break; + case 2: + color_info = regs[XE_GPU_REG_RB_COLOR2_INFO].u32; + break; + case 3: + color_info = regs[XE_GPU_REG_RB_COLOR3_INFO].u32; + break; + default: + color_info = regs[XE_GPU_REG_RB_COLOR_INFO].u32; + } + float color_exp_bias; + // Exponent bias is in bits 20:25 of RB_COLOR_INFO. + *reinterpret_cast(&color_exp_bias) = + 0x3F800000 + (int32_t((color_info & (0x3F << 20)) << 6) >> 3); + dirty |= system_constants_.color_exp_bias[i] != color_exp_bias; + system_constants_.color_exp_bias[i] = color_exp_bias; dirty |= system_constants_.color_output_map[i] != render_targets[i].guest_render_target; system_constants_.color_output_map[i] = diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index 00f95f0f2..c8abe2fa8 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -27,6 +27,7 @@ namespace gpu { namespace d3d12 { // Generated with `xb buildhlsl`. +#include "xenia/gpu/d3d12/shaders/bin/primitive_point_list_gs.h" #include "xenia/gpu/d3d12/shaders/bin/primitive_quad_list_gs.h" #include "xenia/gpu/d3d12/shaders/bin/primitive_rectangle_list_gs.h" @@ -294,7 +295,8 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages( primitive_topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; }; dirty |= regs.primitive_topology_type != primitive_topology_type; - if (primitive_type == PrimitiveType::kRectangleList || + if (primitive_type == PrimitiveType::kPointList || + primitive_type == PrimitiveType::kRectangleList || primitive_type == PrimitiveType::kQuadList) { dirty |= regs.geometry_shader_primitive_type != primitive_type; regs.geometry_shader_primitive_type = primitive_type; @@ -326,6 +328,10 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages( update_desc_.PS.BytecodeLength = 0; } switch (primitive_type) { + case PrimitiveType::kPointList: + update_desc_.GS.pShaderBytecode = primitive_point_list_gs; + update_desc_.GS.BytecodeLength = sizeof(primitive_point_list_gs); + break; case PrimitiveType::kRectangleList: update_desc_.GS.pShaderBytecode = primitive_rectangle_list_gs; update_desc_.GS.BytecodeLength = sizeof(primitive_rectangle_list_gs); diff --git a/src/xenia/gpu/d3d12/shaders/primitive_point_list.gs.hlsl b/src/xenia/gpu/d3d12/shaders/primitive_point_list.gs.hlsl new file mode 100644 index 000000000..58a84eced --- /dev/null +++ b/src/xenia/gpu/d3d12/shaders/primitive_point_list.gs.hlsl @@ -0,0 +1,29 @@ +#include "xenos_draw.hlsli" + +[maxvertexcount(4)] +void main(point XeVertex xe_in[1], inout TriangleStream xe_stream) { + XeVertex xe_out; + xe_out.interpolators = xe_in[0].interpolators; + xe_out.position.zw = xe_in[0].position.zw; + xe_out.point_size = xe_in[0].point_size; + + // Shader header writes -1.0f to point_size by default, so any positive value + // means that it was overwritten by the translated vertex shader. + float2 point_size = + (xe_in[0].point_size > 0.0f ? xe_in[0].point_size.xx : xe_point_size) * + xe_ndc_scale.xy; + + xe_out.point_coord = float2(0.0, 1.0); + xe_out.position.xy = xe_in[0].position.xy + float2(-1.0, 1.0) * point_size; + xe_stream.Append(xe_out); + xe_out.point_coord = float2(1.0, 1.0); + xe_out.position.xy = xe_in[0].position.xy + point_size; + xe_stream.Append(xe_out); + xe_out.point_coord = float2(0.0, 0.0); + xe_out.position.xy = xe_in[0].position.xy - point_size; + xe_stream.Append(xe_out); + xe_out.point_coord = float2(1.0, 0.0); + xe_out.position.xy = xe_in[0].position.xy + float2(1.0, -1.0) * point_size; + xe_stream.Append(xe_out); + xe_stream.RestartStrip(); +} diff --git a/src/xenia/gpu/d3d12/shaders/primitive_quad_list.gs.hlsl b/src/xenia/gpu/d3d12/shaders/primitive_quad_list.gs.hlsl index 0b4c5b524..882e29ef0 100644 --- a/src/xenia/gpu/d3d12/shaders/primitive_quad_list.gs.hlsl +++ b/src/xenia/gpu/d3d12/shaders/primitive_quad_list.gs.hlsl @@ -1,12 +1,7 @@ -struct XeVertex { - float4 interpolators[16] : TEXCOORD; - float4 position : SV_Position; - float point_size : PSIZE; -}; +#include "xenos_draw.hlsli" [maxvertexcount(4)] -void main(lineadj XeVertex xe_in[4], - inout TriangleStream xe_stream) { +void main(lineadj XeVertex xe_in[4], inout TriangleStream xe_stream) { xe_stream.Append(xe_in[0]); xe_stream.Append(xe_in[1]); xe_stream.Append(xe_in[3]); diff --git a/src/xenia/gpu/d3d12/shaders/primitive_rectangle_list.gs.hlsl b/src/xenia/gpu/d3d12/shaders/primitive_rectangle_list.gs.hlsl index 26d82c6c7..a4384911f 100644 --- a/src/xenia/gpu/d3d12/shaders/primitive_rectangle_list.gs.hlsl +++ b/src/xenia/gpu/d3d12/shaders/primitive_rectangle_list.gs.hlsl @@ -1,8 +1,4 @@ -struct XeVertex { - float4 interpolators[16] : TEXCOORD; - float4 position : SV_Position; - float point_size : PSIZE; -}; +#include "xenos_draw.hlsli" [maxvertexcount(6)] void main(triangle XeVertex xe_in[3], @@ -37,6 +33,9 @@ void main(triangle XeVertex xe_in[3], xe_in[0].interpolators[i] + xe_in[2].interpolators[i]; } + xe_out.point_coord = xe_in[1].point_coord + + xe_in[0].point_coord - + xe_in[2].point_coord; xe_out.position = float4(xe_in[1].position.xy - xe_in[0].position.xy + xe_in[2].position.xy, @@ -54,6 +53,9 @@ void main(triangle XeVertex xe_in[3], xe_in[1].interpolators[i] + xe_in[2].interpolators[i]; } + xe_out.point_coord = xe_in[0].point_coord + + xe_in[1].point_coord - + xe_in[2].point_coord; xe_out.position = float4(xe_in[0].position.xy - xe_in[1].position.xy + xe_in[2].position.xy, diff --git a/src/xenia/gpu/d3d12/shaders/xenos_draw.hlsli b/src/xenia/gpu/d3d12/shaders/xenos_draw.hlsli new file mode 100644 index 000000000..4994d59ce --- /dev/null +++ b/src/xenia/gpu/d3d12/shaders/xenos_draw.hlsli @@ -0,0 +1,34 @@ +#ifndef XENIA_GPU_D3D12_SHADERS_XENOS_DRAW_HLSLI_ +#define XENIA_GPU_D3D12_SHADERS_XENOS_DRAW_HLSLI_ + +cbuffer XeSystemConstants : register(b0) { + // vec4 0 + float3 xe_mul_rcp_w; + uint xe_vertex_base_index; + // vec4 1 + float3 xe_ndc_scale; + uint xe_vertex_index_endian; + // vec4 2 + float3 xe_ndc_offset; + float xe_pixel_half_pixel_offset; + // vec4 3 + float2 xe_point_size; + float2 xe_ssaa_inv_scale; + // vec4 4 + uint xe_pixel_pos_reg; + int xe_alpha_test; + float2 xe_alpha_test_range; + // vec4 5 + float4 xe_color_exp_bias; + // vec4 6 + uint4 xe_color_output_map; +}; + +struct XeVertex { + float4 interpolators[16] : TEXCOORD0; + float2 point_coord : TEXCOORD16; + float4 position : SV_Position; + float point_size : PSIZE; +}; + +#endif // XENIA_GPU_D3D12_SHADERS_XENOS_DRAW_HLSLI_ diff --git a/src/xenia/gpu/hlsl_shader_translator.cc b/src/xenia/gpu/hlsl_shader_translator.cc index 9b17b4cb5..b81923a4c 100644 --- a/src/xenia/gpu/hlsl_shader_translator.cc +++ b/src/xenia/gpu/hlsl_shader_translator.cc @@ -170,27 +170,35 @@ std::vector HlslShaderTranslator::CompleteTranslation() { // Bool and loop constants are quadrupled to allow dynamic indexing (constant // registers are vectors). source.Append( - "cbuffer xe_system_constants : register(b0) {\n" + "cbuffer XeSystemConstants : register(b0) {\n" + // vec4 0 " float3 xe_mul_rcp_w;\n" " uint xe_vertex_base_index;\n" + // vec4 1 " float3 xe_ndc_scale;\n" " uint xe_vertex_index_endian;\n" + // vec4 2 " float3 xe_ndc_offset;\n" " float xe_pixel_half_pixel_offset;\n" + // vec4 3 + " float2 xe_point_size;\n" " float2 xe_ssaa_inv_scale;\n" + // vec4 4 " uint xe_pixel_pos_reg;\n" - " bool xe_alpha_test_enabled;\n" + " int xe_alpha_test;\n" " float2 xe_alpha_test_range;\n" - " bool xe_alpha_test_range_pass;\n" + // vec4 5 + " float4 xe_color_exp_bias;\n" + // vec4 6 " uint4 xe_color_output_map;\n" "};\n" "\n" - "cbuffer xe_loop_bool_constants : register(b1) {\n" + "cbuffer XeLoopBoolConstants : register(b1) {\n" " uint4 xe_bool_constants[8];\n" " uint4 xe_loop_constants[32];\n" "};\n" "\n" - "cbuffer xe_fetch_constants : register(b2) {\n" + "cbuffer XeFetchConstants : register(b2) {\n" " uint4 xe_fetch[48];\n" "};\n" "\n" @@ -260,7 +268,8 @@ std::vector HlslShaderTranslator::CompleteTranslation() { "XE_BYTE_SWAP_OVERLOAD(uint4)\n" "\n" "struct XeVertexShaderOutput {\n" - " float4 interpolators[%u] : TEXCOORD;\n" + " float4 interpolators[%u] : TEXCOORD0;\n" + " float2 point_coord : TEXCOORD16;\n" " float4 position : SV_Position;\n" " float point_size : PSIZE;\n" "};\n" @@ -273,6 +282,8 @@ std::vector HlslShaderTranslator::CompleteTranslation() { " uint4 xe_vertex_element;\n" " xe_r[0].r = float(xe_vertex_index);\n" " XeVertexShaderOutput xe_output;\n" + // point_coord is written by the geometry shader. + " xe_output.point_coord = float2(0.0, 0.0);\n" " xe_output.position = float4(0.0, 0.0, 0.0, 1.0);\n" " xe_output.point_size = -1.0;\n", kMaxInterpolators, register_count()); @@ -285,7 +296,8 @@ std::vector HlslShaderTranslator::CompleteTranslation() { // XE_PIXEL_SHADER_WRITES_DEPTH in the beginning of the final output. source.AppendFormat( "struct XePixelShaderInput {\n" - " float4 interpolators[%u] : TEXCOORD;\n" + " float4 interpolators[%u] : TEXCOORD0;\n" + " float2 point_coord : TEXCOORD16;\n" " float4 position : SV_Position;\n" "};\n" "\n" @@ -314,13 +326,13 @@ std::vector HlslShaderTranslator::CompleteTranslation() { for (uint32_t i = 0; i < interpolator_register_count; ++i) { source.AppendFormat(" xe_r[%u] = xe_input.interpolators[%u];\n", i, i); } - // Write pixel position to the register specified by ps_param_gen. + // Write pixel position and point coordinate to the register specified by + // ps_param_gen. source.AppendFormat( " [branch] if (xe_pixel_pos_reg < %uu) {\n" - " float4 xe_pixel_pos = xe_input.position;\n" - " xe_pixel_pos.xy = xe_pixel_pos.xy * xe_ssaa_inv_scale +\n" - " xe_pixel_half_pixel_offset;\n" - " xe_r[xe_pixel_pos_reg] = xe_pixel_pos;\n" + " xe_r[xe_pixel_pos_reg] =\n" + " float4(xe_input.position.xy * xe_ssaa_inv_scale +\n" + " xe_pixel_half_pixel_offset, xe_input.point_coord);\n" " }\n", register_count()); } @@ -390,14 +402,19 @@ std::vector HlslShaderTranslator::CompleteTranslation() { " xe_ndc_offset * xe_output.position.www;\n"); } else if (is_pixel_shader()) { source.Append( + // Apply the exponent bias. + " xe_color_output[0] *= xe_color_exp_bias.x;\n" + " xe_color_output[1] *= xe_color_exp_bias.y;\n" + " xe_color_output[2] *= xe_color_exp_bias.z;\n" + " xe_color_output[3] *= xe_color_exp_bias.w;\n" // Perform alpha test - check if the alpha is within the specified // bounds (inclusively), fail or pass depending on comparison mode and // on the results of the bound test. - " [branch] if (xe_alpha_test_enabled) {\n" + " [branch] if (xe_alpha_test != 0) {\n" " bool xe_alpha_test_failed =\n" " xe_color_output[0u].a >= xe_alpha_test_range.x &&\n" " xe_color_output[0u].a <= xe_alpha_test_range.y;\n" - " [flatten] if (xe_alpha_test_range_pass) {\n" + " [flatten] if (xe_alpha_test > 0) {\n" " xe_alpha_test_failed = !xe_alpha_test_failed;\n" " }\n" " if (xe_alpha_test_failed) {\n" diff --git a/src/xenia/gpu/hlsl_shader_translator.h b/src/xenia/gpu/hlsl_shader_translator.h index 941a8f36b..aca46c696 100644 --- a/src/xenia/gpu/hlsl_shader_translator.h +++ b/src/xenia/gpu/hlsl_shader_translator.h @@ -28,22 +28,30 @@ class HlslShaderTranslator : public ShaderTranslator { // vec4 0 float mul_rcp_w[3]; uint32_t vertex_base_index; + // vec4 1 float ndc_scale[3]; uint32_t vertex_index_endian; + // vec4 2 float ndc_offset[3]; float pixel_half_pixel_offset; + // vec4 3 + float point_size[2]; float ssaa_inv_scale[2]; + + // vec3 4 uint32_t pixel_pos_reg; - uint32_t alpha_test_enabled; - // vec4 4 + // 0 - disabled, 1 - passes if in range, -1 - fails if in range. + int32_t alpha_test; // The range is floats as uints so it's easier to pass infinity. uint32_t alpha_test_range[2]; - uint32_t alpha_test_range_pass; - uint32_t padding_4; + // vec4 5 + float color_exp_bias[4]; + + // vec4 6 uint32_t color_output_map[4]; };