[D3D12] Point sprites and color exponent bias

This commit is contained in:
Triang3l 2018-08-27 12:35:44 +03:00
parent 1818905366
commit 6d48b856b9
8 changed files with 162 additions and 38 deletions

View File

@ -940,6 +940,9 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
// Set the primitive topology.
D3D_PRIMITIVE_TOPOLOGY primitive_topology;
switch (primitive_type) {
case PrimitiveType::kPointList:
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
break;
case PrimitiveType::kLineList:
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST;
break;
@ -1308,6 +1311,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32;
uint32_t pa_cl_clip_cntl = regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32;
uint32_t pa_su_vtx_cntl = regs[XE_GPU_REG_PA_SU_VTX_CNTL].u32;
uint32_t pa_su_point_size = regs[XE_GPU_REG_PA_SU_POINT_SIZE].u32;
uint32_t sq_program_cntl = regs[XE_GPU_REG_SQ_PROGRAM_CNTL].u32;
uint32_t sq_context_misc = regs[XE_GPU_REG_SQ_CONTEXT_MISC].u32;
uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
@ -1405,6 +1409,15 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
system_constants_.ndc_offset[2] = ndc_offset_z;
system_constants_.pixel_half_pixel_offset = pixel_half_pixel_offset;
// Point size.
float point_size[2];
point_size[0] = float(pa_su_point_size >> 16) * 0.125f;
point_size[1] = float(pa_su_point_size & 0xFFFF) * 0.125f;
dirty |= system_constants_.point_size[0] != point_size[0];
dirty |= system_constants_.point_size[1] != point_size[1];
system_constants_.point_size[0] = point_size[0];
system_constants_.point_size[1] = point_size[1];
// Pixel position register.
uint32_t pixel_pos_reg =
(sq_program_cntl & (1 << 18)) ? (sq_context_misc >> 8) & 0xFF : UINT_MAX;
@ -1421,9 +1434,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
system_constants_.ssaa_inv_scale[1] = ssaa_inv_scale_y;
// Alpha test.
uint32_t alpha_test_enabled = (rb_colorcontrol & 0x8) ? 1 : 0;
dirty |= system_constants_.alpha_test_enabled != alpha_test_enabled;
system_constants_.alpha_test_enabled = alpha_test_enabled;
int32_t alpha_test = 0;
if (rb_colorcontrol & 0x8) {
uint32_t alpha_test_function = rb_colorcontrol & 0x7;
// 0: Never - fail in [-inf, +inf].
@ -1434,6 +1445,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
// 5: Not equal - fail in [ref, ref].
// 6: Greater or equal - pass in [ref, +inf].
// 7: Always - pass in [-inf, +inf].
int32_t alpha_test = (alpha_test_function & 0x2) ? 1 : -1;
uint32_t alpha_test_range_start =
(alpha_test_function == 1 || alpha_test_function == 2 ||
alpha_test_function == 5 || alpha_test_function == 6)
@ -1444,17 +1456,38 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
alpha_test_function == 4 || alpha_test_function == 5)
? rb_alpha_ref
: 0x7F800000u;
uint32_t alpha_test_range_pass = (alpha_test_function & 0x2) ? 1 : 0;
dirty |= system_constants_.alpha_test_range[0] != alpha_test_range_start;
dirty |= system_constants_.alpha_test_range[1] != alpha_test_range_end;
dirty |= system_constants_.alpha_test_range_pass != alpha_test_range_pass;
system_constants_.alpha_test_range[0] = alpha_test_range_start;
system_constants_.alpha_test_range[1] = alpha_test_range_end;
system_constants_.alpha_test_range_pass = alpha_test_range_pass;
} else {
alpha_test = 0;
}
dirty |= system_constants_.alpha_test != alpha_test;
system_constants_.alpha_test = alpha_test;
// Color output index mapping.
// Color exponent bias and output index mapping.
for (uint32_t i = 0; i < 4; ++i) {
uint32_t color_info;
switch (i) {
case 1:
color_info = regs[XE_GPU_REG_RB_COLOR1_INFO].u32;
break;
case 2:
color_info = regs[XE_GPU_REG_RB_COLOR2_INFO].u32;
break;
case 3:
color_info = regs[XE_GPU_REG_RB_COLOR3_INFO].u32;
break;
default:
color_info = regs[XE_GPU_REG_RB_COLOR_INFO].u32;
}
float color_exp_bias;
// Exponent bias is in bits 20:25 of RB_COLOR_INFO.
*reinterpret_cast<int32_t*>(&color_exp_bias) =
0x3F800000 + (int32_t((color_info & (0x3F << 20)) << 6) >> 3);
dirty |= system_constants_.color_exp_bias[i] != color_exp_bias;
system_constants_.color_exp_bias[i] = color_exp_bias;
dirty |= system_constants_.color_output_map[i] !=
render_targets[i].guest_render_target;
system_constants_.color_output_map[i] =

View File

@ -27,6 +27,7 @@ namespace gpu {
namespace d3d12 {
// Generated with `xb buildhlsl`.
#include "xenia/gpu/d3d12/shaders/bin/primitive_point_list_gs.h"
#include "xenia/gpu/d3d12/shaders/bin/primitive_quad_list_gs.h"
#include "xenia/gpu/d3d12/shaders/bin/primitive_rectangle_list_gs.h"
@ -294,7 +295,8 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
primitive_topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
};
dirty |= regs.primitive_topology_type != primitive_topology_type;
if (primitive_type == PrimitiveType::kRectangleList ||
if (primitive_type == PrimitiveType::kPointList ||
primitive_type == PrimitiveType::kRectangleList ||
primitive_type == PrimitiveType::kQuadList) {
dirty |= regs.geometry_shader_primitive_type != primitive_type;
regs.geometry_shader_primitive_type = primitive_type;
@ -326,6 +328,10 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
update_desc_.PS.BytecodeLength = 0;
}
switch (primitive_type) {
case PrimitiveType::kPointList:
update_desc_.GS.pShaderBytecode = primitive_point_list_gs;
update_desc_.GS.BytecodeLength = sizeof(primitive_point_list_gs);
break;
case PrimitiveType::kRectangleList:
update_desc_.GS.pShaderBytecode = primitive_rectangle_list_gs;
update_desc_.GS.BytecodeLength = sizeof(primitive_rectangle_list_gs);

View File

@ -0,0 +1,29 @@
#include "xenos_draw.hlsli"
[maxvertexcount(4)]
void main(point XeVertex xe_in[1], inout TriangleStream<XeVertex> xe_stream) {
XeVertex xe_out;
xe_out.interpolators = xe_in[0].interpolators;
xe_out.position.zw = xe_in[0].position.zw;
xe_out.point_size = xe_in[0].point_size;
// Shader header writes -1.0f to point_size by default, so any positive value
// means that it was overwritten by the translated vertex shader.
float2 point_size =
(xe_in[0].point_size > 0.0f ? xe_in[0].point_size.xx : xe_point_size) *
xe_ndc_scale.xy;
xe_out.point_coord = float2(0.0, 1.0);
xe_out.position.xy = xe_in[0].position.xy + float2(-1.0, 1.0) * point_size;
xe_stream.Append(xe_out);
xe_out.point_coord = float2(1.0, 1.0);
xe_out.position.xy = xe_in[0].position.xy + point_size;
xe_stream.Append(xe_out);
xe_out.point_coord = float2(0.0, 0.0);
xe_out.position.xy = xe_in[0].position.xy - point_size;
xe_stream.Append(xe_out);
xe_out.point_coord = float2(1.0, 0.0);
xe_out.position.xy = xe_in[0].position.xy + float2(1.0, -1.0) * point_size;
xe_stream.Append(xe_out);
xe_stream.RestartStrip();
}

View File

@ -1,12 +1,7 @@
struct XeVertex {
float4 interpolators[16] : TEXCOORD;
float4 position : SV_Position;
float point_size : PSIZE;
};
#include "xenos_draw.hlsli"
[maxvertexcount(4)]
void main(lineadj XeVertex xe_in[4],
inout TriangleStream<XeVertex> xe_stream) {
void main(lineadj XeVertex xe_in[4], inout TriangleStream<XeVertex> xe_stream) {
xe_stream.Append(xe_in[0]);
xe_stream.Append(xe_in[1]);
xe_stream.Append(xe_in[3]);

View File

@ -1,8 +1,4 @@
struct XeVertex {
float4 interpolators[16] : TEXCOORD;
float4 position : SV_Position;
float point_size : PSIZE;
};
#include "xenos_draw.hlsli"
[maxvertexcount(6)]
void main(triangle XeVertex xe_in[3],
@ -37,6 +33,9 @@ void main(triangle XeVertex xe_in[3],
xe_in[0].interpolators[i] +
xe_in[2].interpolators[i];
}
xe_out.point_coord = xe_in[1].point_coord +
xe_in[0].point_coord -
xe_in[2].point_coord;
xe_out.position = float4(xe_in[1].position.xy -
xe_in[0].position.xy +
xe_in[2].position.xy,
@ -54,6 +53,9 @@ void main(triangle XeVertex xe_in[3],
xe_in[1].interpolators[i] +
xe_in[2].interpolators[i];
}
xe_out.point_coord = xe_in[0].point_coord +
xe_in[1].point_coord -
xe_in[2].point_coord;
xe_out.position = float4(xe_in[0].position.xy -
xe_in[1].position.xy +
xe_in[2].position.xy,

View File

@ -0,0 +1,34 @@
#ifndef XENIA_GPU_D3D12_SHADERS_XENOS_DRAW_HLSLI_
#define XENIA_GPU_D3D12_SHADERS_XENOS_DRAW_HLSLI_
cbuffer XeSystemConstants : register(b0) {
// vec4 0
float3 xe_mul_rcp_w;
uint xe_vertex_base_index;
// vec4 1
float3 xe_ndc_scale;
uint xe_vertex_index_endian;
// vec4 2
float3 xe_ndc_offset;
float xe_pixel_half_pixel_offset;
// vec4 3
float2 xe_point_size;
float2 xe_ssaa_inv_scale;
// vec4 4
uint xe_pixel_pos_reg;
int xe_alpha_test;
float2 xe_alpha_test_range;
// vec4 5
float4 xe_color_exp_bias;
// vec4 6
uint4 xe_color_output_map;
};
struct XeVertex {
float4 interpolators[16] : TEXCOORD0;
float2 point_coord : TEXCOORD16;
float4 position : SV_Position;
float point_size : PSIZE;
};
#endif // XENIA_GPU_D3D12_SHADERS_XENOS_DRAW_HLSLI_

View File

@ -170,27 +170,35 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
// Bool and loop constants are quadrupled to allow dynamic indexing (constant
// registers are vectors).
source.Append(
"cbuffer xe_system_constants : register(b0) {\n"
"cbuffer XeSystemConstants : register(b0) {\n"
// vec4 0
" float3 xe_mul_rcp_w;\n"
" uint xe_vertex_base_index;\n"
// vec4 1
" float3 xe_ndc_scale;\n"
" uint xe_vertex_index_endian;\n"
// vec4 2
" float3 xe_ndc_offset;\n"
" float xe_pixel_half_pixel_offset;\n"
// vec4 3
" float2 xe_point_size;\n"
" float2 xe_ssaa_inv_scale;\n"
// vec4 4
" uint xe_pixel_pos_reg;\n"
" bool xe_alpha_test_enabled;\n"
" int xe_alpha_test;\n"
" float2 xe_alpha_test_range;\n"
" bool xe_alpha_test_range_pass;\n"
// vec4 5
" float4 xe_color_exp_bias;\n"
// vec4 6
" uint4 xe_color_output_map;\n"
"};\n"
"\n"
"cbuffer xe_loop_bool_constants : register(b1) {\n"
"cbuffer XeLoopBoolConstants : register(b1) {\n"
" uint4 xe_bool_constants[8];\n"
" uint4 xe_loop_constants[32];\n"
"};\n"
"\n"
"cbuffer xe_fetch_constants : register(b2) {\n"
"cbuffer XeFetchConstants : register(b2) {\n"
" uint4 xe_fetch[48];\n"
"};\n"
"\n"
@ -260,7 +268,8 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
"XE_BYTE_SWAP_OVERLOAD(uint4)\n"
"\n"
"struct XeVertexShaderOutput {\n"
" float4 interpolators[%u] : TEXCOORD;\n"
" float4 interpolators[%u] : TEXCOORD0;\n"
" float2 point_coord : TEXCOORD16;\n"
" float4 position : SV_Position;\n"
" float point_size : PSIZE;\n"
"};\n"
@ -273,6 +282,8 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
" uint4 xe_vertex_element;\n"
" xe_r[0].r = float(xe_vertex_index);\n"
" XeVertexShaderOutput xe_output;\n"
// point_coord is written by the geometry shader.
" xe_output.point_coord = float2(0.0, 0.0);\n"
" xe_output.position = float4(0.0, 0.0, 0.0, 1.0);\n"
" xe_output.point_size = -1.0;\n",
kMaxInterpolators, register_count());
@ -285,7 +296,8 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
// XE_PIXEL_SHADER_WRITES_DEPTH in the beginning of the final output.
source.AppendFormat(
"struct XePixelShaderInput {\n"
" float4 interpolators[%u] : TEXCOORD;\n"
" float4 interpolators[%u] : TEXCOORD0;\n"
" float2 point_coord : TEXCOORD16;\n"
" float4 position : SV_Position;\n"
"};\n"
"\n"
@ -314,13 +326,13 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
for (uint32_t i = 0; i < interpolator_register_count; ++i) {
source.AppendFormat(" xe_r[%u] = xe_input.interpolators[%u];\n", i, i);
}
// Write pixel position to the register specified by ps_param_gen.
// Write pixel position and point coordinate to the register specified by
// ps_param_gen.
source.AppendFormat(
" [branch] if (xe_pixel_pos_reg < %uu) {\n"
" float4 xe_pixel_pos = xe_input.position;\n"
" xe_pixel_pos.xy = xe_pixel_pos.xy * xe_ssaa_inv_scale +\n"
" xe_pixel_half_pixel_offset;\n"
" xe_r[xe_pixel_pos_reg] = xe_pixel_pos;\n"
" xe_r[xe_pixel_pos_reg] =\n"
" float4(xe_input.position.xy * xe_ssaa_inv_scale +\n"
" xe_pixel_half_pixel_offset, xe_input.point_coord);\n"
" }\n",
register_count());
}
@ -390,14 +402,19 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
" xe_ndc_offset * xe_output.position.www;\n");
} else if (is_pixel_shader()) {
source.Append(
// Apply the exponent bias.
" xe_color_output[0] *= xe_color_exp_bias.x;\n"
" xe_color_output[1] *= xe_color_exp_bias.y;\n"
" xe_color_output[2] *= xe_color_exp_bias.z;\n"
" xe_color_output[3] *= xe_color_exp_bias.w;\n"
// Perform alpha test - check if the alpha is within the specified
// bounds (inclusively), fail or pass depending on comparison mode and
// on the results of the bound test.
" [branch] if (xe_alpha_test_enabled) {\n"
" [branch] if (xe_alpha_test != 0) {\n"
" bool xe_alpha_test_failed =\n"
" xe_color_output[0u].a >= xe_alpha_test_range.x &&\n"
" xe_color_output[0u].a <= xe_alpha_test_range.y;\n"
" [flatten] if (xe_alpha_test_range_pass) {\n"
" [flatten] if (xe_alpha_test > 0) {\n"
" xe_alpha_test_failed = !xe_alpha_test_failed;\n"
" }\n"
" if (xe_alpha_test_failed) {\n"

View File

@ -28,22 +28,30 @@ class HlslShaderTranslator : public ShaderTranslator {
// vec4 0
float mul_rcp_w[3];
uint32_t vertex_base_index;
// vec4 1
float ndc_scale[3];
uint32_t vertex_index_endian;
// vec4 2
float ndc_offset[3];
float pixel_half_pixel_offset;
// vec4 3
float point_size[2];
float ssaa_inv_scale[2];
// vec3 4
uint32_t pixel_pos_reg;
uint32_t alpha_test_enabled;
// vec4 4
// 0 - disabled, 1 - passes if in range, -1 - fails if in range.
int32_t alpha_test;
// The range is floats as uints so it's easier to pass infinity.
uint32_t alpha_test_range[2];
uint32_t alpha_test_range_pass;
uint32_t padding_4;
// vec4 5
float color_exp_bias[4];
// vec4 6
uint32_t color_output_map[4];
};