[D3D12] Point sprites and color exponent bias
This commit is contained in:
parent
1818905366
commit
6d48b856b9
|
@ -940,6 +940,9 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
// Set the primitive topology.
|
||||
D3D_PRIMITIVE_TOPOLOGY primitive_topology;
|
||||
switch (primitive_type) {
|
||||
case PrimitiveType::kPointList:
|
||||
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
|
||||
break;
|
||||
case PrimitiveType::kLineList:
|
||||
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST;
|
||||
break;
|
||||
|
@ -1308,6 +1311,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32;
|
||||
uint32_t pa_cl_clip_cntl = regs[XE_GPU_REG_PA_CL_CLIP_CNTL].u32;
|
||||
uint32_t pa_su_vtx_cntl = regs[XE_GPU_REG_PA_SU_VTX_CNTL].u32;
|
||||
uint32_t pa_su_point_size = regs[XE_GPU_REG_PA_SU_POINT_SIZE].u32;
|
||||
uint32_t sq_program_cntl = regs[XE_GPU_REG_SQ_PROGRAM_CNTL].u32;
|
||||
uint32_t sq_context_misc = regs[XE_GPU_REG_SQ_CONTEXT_MISC].u32;
|
||||
uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
|
||||
|
@ -1405,6 +1409,15 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
system_constants_.ndc_offset[2] = ndc_offset_z;
|
||||
system_constants_.pixel_half_pixel_offset = pixel_half_pixel_offset;
|
||||
|
||||
// Point size.
|
||||
float point_size[2];
|
||||
point_size[0] = float(pa_su_point_size >> 16) * 0.125f;
|
||||
point_size[1] = float(pa_su_point_size & 0xFFFF) * 0.125f;
|
||||
dirty |= system_constants_.point_size[0] != point_size[0];
|
||||
dirty |= system_constants_.point_size[1] != point_size[1];
|
||||
system_constants_.point_size[0] = point_size[0];
|
||||
system_constants_.point_size[1] = point_size[1];
|
||||
|
||||
// Pixel position register.
|
||||
uint32_t pixel_pos_reg =
|
||||
(sq_program_cntl & (1 << 18)) ? (sq_context_misc >> 8) & 0xFF : UINT_MAX;
|
||||
|
@ -1421,9 +1434,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
system_constants_.ssaa_inv_scale[1] = ssaa_inv_scale_y;
|
||||
|
||||
// Alpha test.
|
||||
uint32_t alpha_test_enabled = (rb_colorcontrol & 0x8) ? 1 : 0;
|
||||
dirty |= system_constants_.alpha_test_enabled != alpha_test_enabled;
|
||||
system_constants_.alpha_test_enabled = alpha_test_enabled;
|
||||
int32_t alpha_test = 0;
|
||||
if (rb_colorcontrol & 0x8) {
|
||||
uint32_t alpha_test_function = rb_colorcontrol & 0x7;
|
||||
// 0: Never - fail in [-inf, +inf].
|
||||
|
@ -1434,6 +1445,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
// 5: Not equal - fail in [ref, ref].
|
||||
// 6: Greater or equal - pass in [ref, +inf].
|
||||
// 7: Always - pass in [-inf, +inf].
|
||||
int32_t alpha_test = (alpha_test_function & 0x2) ? 1 : -1;
|
||||
uint32_t alpha_test_range_start =
|
||||
(alpha_test_function == 1 || alpha_test_function == 2 ||
|
||||
alpha_test_function == 5 || alpha_test_function == 6)
|
||||
|
@ -1444,17 +1456,38 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
alpha_test_function == 4 || alpha_test_function == 5)
|
||||
? rb_alpha_ref
|
||||
: 0x7F800000u;
|
||||
uint32_t alpha_test_range_pass = (alpha_test_function & 0x2) ? 1 : 0;
|
||||
dirty |= system_constants_.alpha_test_range[0] != alpha_test_range_start;
|
||||
dirty |= system_constants_.alpha_test_range[1] != alpha_test_range_end;
|
||||
dirty |= system_constants_.alpha_test_range_pass != alpha_test_range_pass;
|
||||
system_constants_.alpha_test_range[0] = alpha_test_range_start;
|
||||
system_constants_.alpha_test_range[1] = alpha_test_range_end;
|
||||
system_constants_.alpha_test_range_pass = alpha_test_range_pass;
|
||||
} else {
|
||||
alpha_test = 0;
|
||||
}
|
||||
dirty |= system_constants_.alpha_test != alpha_test;
|
||||
system_constants_.alpha_test = alpha_test;
|
||||
|
||||
// Color output index mapping.
|
||||
// Color exponent bias and output index mapping.
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
uint32_t color_info;
|
||||
switch (i) {
|
||||
case 1:
|
||||
color_info = regs[XE_GPU_REG_RB_COLOR1_INFO].u32;
|
||||
break;
|
||||
case 2:
|
||||
color_info = regs[XE_GPU_REG_RB_COLOR2_INFO].u32;
|
||||
break;
|
||||
case 3:
|
||||
color_info = regs[XE_GPU_REG_RB_COLOR3_INFO].u32;
|
||||
break;
|
||||
default:
|
||||
color_info = regs[XE_GPU_REG_RB_COLOR_INFO].u32;
|
||||
}
|
||||
float color_exp_bias;
|
||||
// Exponent bias is in bits 20:25 of RB_COLOR_INFO.
|
||||
*reinterpret_cast<int32_t*>(&color_exp_bias) =
|
||||
0x3F800000 + (int32_t((color_info & (0x3F << 20)) << 6) >> 3);
|
||||
dirty |= system_constants_.color_exp_bias[i] != color_exp_bias;
|
||||
system_constants_.color_exp_bias[i] = color_exp_bias;
|
||||
dirty |= system_constants_.color_output_map[i] !=
|
||||
render_targets[i].guest_render_target;
|
||||
system_constants_.color_output_map[i] =
|
||||
|
|
|
@ -27,6 +27,7 @@ namespace gpu {
|
|||
namespace d3d12 {
|
||||
|
||||
// Generated with `xb buildhlsl`.
|
||||
#include "xenia/gpu/d3d12/shaders/bin/primitive_point_list_gs.h"
|
||||
#include "xenia/gpu/d3d12/shaders/bin/primitive_quad_list_gs.h"
|
||||
#include "xenia/gpu/d3d12/shaders/bin/primitive_rectangle_list_gs.h"
|
||||
|
||||
|
@ -294,7 +295,8 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
|
|||
primitive_topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
|
||||
};
|
||||
dirty |= regs.primitive_topology_type != primitive_topology_type;
|
||||
if (primitive_type == PrimitiveType::kRectangleList ||
|
||||
if (primitive_type == PrimitiveType::kPointList ||
|
||||
primitive_type == PrimitiveType::kRectangleList ||
|
||||
primitive_type == PrimitiveType::kQuadList) {
|
||||
dirty |= regs.geometry_shader_primitive_type != primitive_type;
|
||||
regs.geometry_shader_primitive_type = primitive_type;
|
||||
|
@ -326,6 +328,10 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
|
|||
update_desc_.PS.BytecodeLength = 0;
|
||||
}
|
||||
switch (primitive_type) {
|
||||
case PrimitiveType::kPointList:
|
||||
update_desc_.GS.pShaderBytecode = primitive_point_list_gs;
|
||||
update_desc_.GS.BytecodeLength = sizeof(primitive_point_list_gs);
|
||||
break;
|
||||
case PrimitiveType::kRectangleList:
|
||||
update_desc_.GS.pShaderBytecode = primitive_rectangle_list_gs;
|
||||
update_desc_.GS.BytecodeLength = sizeof(primitive_rectangle_list_gs);
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
#include "xenos_draw.hlsli"
|
||||
|
||||
[maxvertexcount(4)]
|
||||
void main(point XeVertex xe_in[1], inout TriangleStream<XeVertex> xe_stream) {
|
||||
XeVertex xe_out;
|
||||
xe_out.interpolators = xe_in[0].interpolators;
|
||||
xe_out.position.zw = xe_in[0].position.zw;
|
||||
xe_out.point_size = xe_in[0].point_size;
|
||||
|
||||
// Shader header writes -1.0f to point_size by default, so any positive value
|
||||
// means that it was overwritten by the translated vertex shader.
|
||||
float2 point_size =
|
||||
(xe_in[0].point_size > 0.0f ? xe_in[0].point_size.xx : xe_point_size) *
|
||||
xe_ndc_scale.xy;
|
||||
|
||||
xe_out.point_coord = float2(0.0, 1.0);
|
||||
xe_out.position.xy = xe_in[0].position.xy + float2(-1.0, 1.0) * point_size;
|
||||
xe_stream.Append(xe_out);
|
||||
xe_out.point_coord = float2(1.0, 1.0);
|
||||
xe_out.position.xy = xe_in[0].position.xy + point_size;
|
||||
xe_stream.Append(xe_out);
|
||||
xe_out.point_coord = float2(0.0, 0.0);
|
||||
xe_out.position.xy = xe_in[0].position.xy - point_size;
|
||||
xe_stream.Append(xe_out);
|
||||
xe_out.point_coord = float2(1.0, 0.0);
|
||||
xe_out.position.xy = xe_in[0].position.xy + float2(1.0, -1.0) * point_size;
|
||||
xe_stream.Append(xe_out);
|
||||
xe_stream.RestartStrip();
|
||||
}
|
|
@ -1,12 +1,7 @@
|
|||
struct XeVertex {
|
||||
float4 interpolators[16] : TEXCOORD;
|
||||
float4 position : SV_Position;
|
||||
float point_size : PSIZE;
|
||||
};
|
||||
#include "xenos_draw.hlsli"
|
||||
|
||||
[maxvertexcount(4)]
|
||||
void main(lineadj XeVertex xe_in[4],
|
||||
inout TriangleStream<XeVertex> xe_stream) {
|
||||
void main(lineadj XeVertex xe_in[4], inout TriangleStream<XeVertex> xe_stream) {
|
||||
xe_stream.Append(xe_in[0]);
|
||||
xe_stream.Append(xe_in[1]);
|
||||
xe_stream.Append(xe_in[3]);
|
||||
|
|
|
@ -1,8 +1,4 @@
|
|||
struct XeVertex {
|
||||
float4 interpolators[16] : TEXCOORD;
|
||||
float4 position : SV_Position;
|
||||
float point_size : PSIZE;
|
||||
};
|
||||
#include "xenos_draw.hlsli"
|
||||
|
||||
[maxvertexcount(6)]
|
||||
void main(triangle XeVertex xe_in[3],
|
||||
|
@ -37,6 +33,9 @@ void main(triangle XeVertex xe_in[3],
|
|||
xe_in[0].interpolators[i] +
|
||||
xe_in[2].interpolators[i];
|
||||
}
|
||||
xe_out.point_coord = xe_in[1].point_coord +
|
||||
xe_in[0].point_coord -
|
||||
xe_in[2].point_coord;
|
||||
xe_out.position = float4(xe_in[1].position.xy -
|
||||
xe_in[0].position.xy +
|
||||
xe_in[2].position.xy,
|
||||
|
@ -54,6 +53,9 @@ void main(triangle XeVertex xe_in[3],
|
|||
xe_in[1].interpolators[i] +
|
||||
xe_in[2].interpolators[i];
|
||||
}
|
||||
xe_out.point_coord = xe_in[0].point_coord +
|
||||
xe_in[1].point_coord -
|
||||
xe_in[2].point_coord;
|
||||
xe_out.position = float4(xe_in[0].position.xy -
|
||||
xe_in[1].position.xy +
|
||||
xe_in[2].position.xy,
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
#ifndef XENIA_GPU_D3D12_SHADERS_XENOS_DRAW_HLSLI_
|
||||
#define XENIA_GPU_D3D12_SHADERS_XENOS_DRAW_HLSLI_
|
||||
|
||||
cbuffer XeSystemConstants : register(b0) {
|
||||
// vec4 0
|
||||
float3 xe_mul_rcp_w;
|
||||
uint xe_vertex_base_index;
|
||||
// vec4 1
|
||||
float3 xe_ndc_scale;
|
||||
uint xe_vertex_index_endian;
|
||||
// vec4 2
|
||||
float3 xe_ndc_offset;
|
||||
float xe_pixel_half_pixel_offset;
|
||||
// vec4 3
|
||||
float2 xe_point_size;
|
||||
float2 xe_ssaa_inv_scale;
|
||||
// vec4 4
|
||||
uint xe_pixel_pos_reg;
|
||||
int xe_alpha_test;
|
||||
float2 xe_alpha_test_range;
|
||||
// vec4 5
|
||||
float4 xe_color_exp_bias;
|
||||
// vec4 6
|
||||
uint4 xe_color_output_map;
|
||||
};
|
||||
|
||||
struct XeVertex {
|
||||
float4 interpolators[16] : TEXCOORD0;
|
||||
float2 point_coord : TEXCOORD16;
|
||||
float4 position : SV_Position;
|
||||
float point_size : PSIZE;
|
||||
};
|
||||
|
||||
#endif // XENIA_GPU_D3D12_SHADERS_XENOS_DRAW_HLSLI_
|
|
@ -170,27 +170,35 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
|
|||
// Bool and loop constants are quadrupled to allow dynamic indexing (constant
|
||||
// registers are vectors).
|
||||
source.Append(
|
||||
"cbuffer xe_system_constants : register(b0) {\n"
|
||||
"cbuffer XeSystemConstants : register(b0) {\n"
|
||||
// vec4 0
|
||||
" float3 xe_mul_rcp_w;\n"
|
||||
" uint xe_vertex_base_index;\n"
|
||||
// vec4 1
|
||||
" float3 xe_ndc_scale;\n"
|
||||
" uint xe_vertex_index_endian;\n"
|
||||
// vec4 2
|
||||
" float3 xe_ndc_offset;\n"
|
||||
" float xe_pixel_half_pixel_offset;\n"
|
||||
// vec4 3
|
||||
" float2 xe_point_size;\n"
|
||||
" float2 xe_ssaa_inv_scale;\n"
|
||||
// vec4 4
|
||||
" uint xe_pixel_pos_reg;\n"
|
||||
" bool xe_alpha_test_enabled;\n"
|
||||
" int xe_alpha_test;\n"
|
||||
" float2 xe_alpha_test_range;\n"
|
||||
" bool xe_alpha_test_range_pass;\n"
|
||||
// vec4 5
|
||||
" float4 xe_color_exp_bias;\n"
|
||||
// vec4 6
|
||||
" uint4 xe_color_output_map;\n"
|
||||
"};\n"
|
||||
"\n"
|
||||
"cbuffer xe_loop_bool_constants : register(b1) {\n"
|
||||
"cbuffer XeLoopBoolConstants : register(b1) {\n"
|
||||
" uint4 xe_bool_constants[8];\n"
|
||||
" uint4 xe_loop_constants[32];\n"
|
||||
"};\n"
|
||||
"\n"
|
||||
"cbuffer xe_fetch_constants : register(b2) {\n"
|
||||
"cbuffer XeFetchConstants : register(b2) {\n"
|
||||
" uint4 xe_fetch[48];\n"
|
||||
"};\n"
|
||||
"\n"
|
||||
|
@ -260,7 +268,8 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
|
|||
"XE_BYTE_SWAP_OVERLOAD(uint4)\n"
|
||||
"\n"
|
||||
"struct XeVertexShaderOutput {\n"
|
||||
" float4 interpolators[%u] : TEXCOORD;\n"
|
||||
" float4 interpolators[%u] : TEXCOORD0;\n"
|
||||
" float2 point_coord : TEXCOORD16;\n"
|
||||
" float4 position : SV_Position;\n"
|
||||
" float point_size : PSIZE;\n"
|
||||
"};\n"
|
||||
|
@ -273,6 +282,8 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
|
|||
" uint4 xe_vertex_element;\n"
|
||||
" xe_r[0].r = float(xe_vertex_index);\n"
|
||||
" XeVertexShaderOutput xe_output;\n"
|
||||
// point_coord is written by the geometry shader.
|
||||
" xe_output.point_coord = float2(0.0, 0.0);\n"
|
||||
" xe_output.position = float4(0.0, 0.0, 0.0, 1.0);\n"
|
||||
" xe_output.point_size = -1.0;\n",
|
||||
kMaxInterpolators, register_count());
|
||||
|
@ -285,7 +296,8 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
|
|||
// XE_PIXEL_SHADER_WRITES_DEPTH in the beginning of the final output.
|
||||
source.AppendFormat(
|
||||
"struct XePixelShaderInput {\n"
|
||||
" float4 interpolators[%u] : TEXCOORD;\n"
|
||||
" float4 interpolators[%u] : TEXCOORD0;\n"
|
||||
" float2 point_coord : TEXCOORD16;\n"
|
||||
" float4 position : SV_Position;\n"
|
||||
"};\n"
|
||||
"\n"
|
||||
|
@ -314,13 +326,13 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
|
|||
for (uint32_t i = 0; i < interpolator_register_count; ++i) {
|
||||
source.AppendFormat(" xe_r[%u] = xe_input.interpolators[%u];\n", i, i);
|
||||
}
|
||||
// Write pixel position to the register specified by ps_param_gen.
|
||||
// Write pixel position and point coordinate to the register specified by
|
||||
// ps_param_gen.
|
||||
source.AppendFormat(
|
||||
" [branch] if (xe_pixel_pos_reg < %uu) {\n"
|
||||
" float4 xe_pixel_pos = xe_input.position;\n"
|
||||
" xe_pixel_pos.xy = xe_pixel_pos.xy * xe_ssaa_inv_scale +\n"
|
||||
" xe_pixel_half_pixel_offset;\n"
|
||||
" xe_r[xe_pixel_pos_reg] = xe_pixel_pos;\n"
|
||||
" xe_r[xe_pixel_pos_reg] =\n"
|
||||
" float4(xe_input.position.xy * xe_ssaa_inv_scale +\n"
|
||||
" xe_pixel_half_pixel_offset, xe_input.point_coord);\n"
|
||||
" }\n",
|
||||
register_count());
|
||||
}
|
||||
|
@ -390,14 +402,19 @@ std::vector<uint8_t> HlslShaderTranslator::CompleteTranslation() {
|
|||
" xe_ndc_offset * xe_output.position.www;\n");
|
||||
} else if (is_pixel_shader()) {
|
||||
source.Append(
|
||||
// Apply the exponent bias.
|
||||
" xe_color_output[0] *= xe_color_exp_bias.x;\n"
|
||||
" xe_color_output[1] *= xe_color_exp_bias.y;\n"
|
||||
" xe_color_output[2] *= xe_color_exp_bias.z;\n"
|
||||
" xe_color_output[3] *= xe_color_exp_bias.w;\n"
|
||||
// Perform alpha test - check if the alpha is within the specified
|
||||
// bounds (inclusively), fail or pass depending on comparison mode and
|
||||
// on the results of the bound test.
|
||||
" [branch] if (xe_alpha_test_enabled) {\n"
|
||||
" [branch] if (xe_alpha_test != 0) {\n"
|
||||
" bool xe_alpha_test_failed =\n"
|
||||
" xe_color_output[0u].a >= xe_alpha_test_range.x &&\n"
|
||||
" xe_color_output[0u].a <= xe_alpha_test_range.y;\n"
|
||||
" [flatten] if (xe_alpha_test_range_pass) {\n"
|
||||
" [flatten] if (xe_alpha_test > 0) {\n"
|
||||
" xe_alpha_test_failed = !xe_alpha_test_failed;\n"
|
||||
" }\n"
|
||||
" if (xe_alpha_test_failed) {\n"
|
||||
|
|
|
@ -28,22 +28,30 @@ class HlslShaderTranslator : public ShaderTranslator {
|
|||
// vec4 0
|
||||
float mul_rcp_w[3];
|
||||
uint32_t vertex_base_index;
|
||||
|
||||
// vec4 1
|
||||
float ndc_scale[3];
|
||||
uint32_t vertex_index_endian;
|
||||
|
||||
// vec4 2
|
||||
float ndc_offset[3];
|
||||
float pixel_half_pixel_offset;
|
||||
|
||||
// vec4 3
|
||||
float point_size[2];
|
||||
float ssaa_inv_scale[2];
|
||||
|
||||
// vec3 4
|
||||
uint32_t pixel_pos_reg;
|
||||
uint32_t alpha_test_enabled;
|
||||
// vec4 4
|
||||
// 0 - disabled, 1 - passes if in range, -1 - fails if in range.
|
||||
int32_t alpha_test;
|
||||
// The range is floats as uints so it's easier to pass infinity.
|
||||
uint32_t alpha_test_range[2];
|
||||
uint32_t alpha_test_range_pass;
|
||||
uint32_t padding_4;
|
||||
|
||||
// vec4 5
|
||||
float color_exp_bias[4];
|
||||
|
||||
// vec4 6
|
||||
uint32_t color_output_map[4];
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue