[D3D12] Rectangle list geometry shader and viewport fixes

This commit is contained in:
Triang3l 2018-08-14 17:21:18 +03:00
parent 1200eaae24
commit 17fb60a97a
5 changed files with 136 additions and 20 deletions

View File

@ -702,6 +702,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type,
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP;
break; break;
case PrimitiveType::kTriangleList: case PrimitiveType::kTriangleList:
case PrimitiveType::kRectangleList:
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
break; break;
case PrimitiveType::kTriangleStrip: case PrimitiveType::kTriangleStrip:
@ -926,21 +927,23 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(
// the NDC in PC APIs, we use a viewport of the largest possible size, and // the NDC in PC APIs, we use a viewport of the largest possible size, and
// divide the position by it in translated shaders. // divide the position by it in translated shaders.
uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32; uint32_t pa_cl_vte_cntl = regs[XE_GPU_REG_PA_CL_VTE_CNTL].u32;
float viewport_scale_x = (pa_cl_vte_cntl & (1 << 0)) float viewport_scale_x =
? regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32 (pa_cl_vte_cntl & (1 << 0))
? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32)
: 1280.0f;
float viewport_scale_y =
(pa_cl_vte_cntl & (1 << 2))
? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32)
: 1280.0f; : 1280.0f;
float viewport_scale_y = (pa_cl_vte_cntl & (1 << 2))
? -regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32
: -1280.0f;
float viewport_scale_z = (pa_cl_vte_cntl & (1 << 4)) float viewport_scale_z = (pa_cl_vte_cntl & (1 << 4))
? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32 ? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32
: 1.0f; : 1.0f;
float viewport_offset_x = (pa_cl_vte_cntl & (1 << 1)) float viewport_offset_x = (pa_cl_vte_cntl & (1 << 1))
? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32 ? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32
: viewport_scale_x; : std::abs(viewport_scale_x);
float viewport_offset_y = (pa_cl_vte_cntl & (1 << 3)) float viewport_offset_y = (pa_cl_vte_cntl & (1 << 3))
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32
: viewport_scale_y; : std::abs(viewport_scale_y);
float viewport_offset_z = (pa_cl_vte_cntl & (1 << 5)) float viewport_offset_z = (pa_cl_vte_cntl & (1 << 5))
? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32 ? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32
: 0.0f; : 0.0f;
@ -1077,26 +1080,35 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
// Also apply half-pixel offset to reproduce Direct3D 9 rasterization rules. // Also apply half-pixel offset to reproduce Direct3D 9 rasterization rules.
// TODO(Triang3l): Check if pixel coordinates need to be offset depending on a // TODO(Triang3l): Check if pixel coordinates need to be offset depending on a
// different register (and if there's such register at all). // different register (and if there's such register at all).
float viewport_scale_x = regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32;
float viewport_scale_y = regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32;
bool gl_clip_space_def = bool gl_clip_space_def =
!(pa_cl_clip_cntl & (1 << 19)) && (pa_cl_vte_cntl & (1 << 4)); !(pa_cl_clip_cntl & (1 << 19)) && (pa_cl_vte_cntl & (1 << 4));
float ndc_scale_x = (pa_cl_vte_cntl & (1 << 0)) ? 1.0f : 1.0f / 1280.0f; float ndc_scale_x, ndc_scale_y;
float ndc_scale_y = (pa_cl_vte_cntl & (1 << 2)) ? 1.0f : 1.0f / 1280.0f; if (pa_cl_vte_cntl & (1 << 0)) {
ndc_scale_x = viewport_scale_x >= 0.0f ? 1.0f : -1.0f;
} else {
ndc_scale_x = 1.0f / 1280.0f;
}
if (pa_cl_vte_cntl & (1 << 2)) {
ndc_scale_y = viewport_scale_y >= 0.0f ? -1.0f : 1.0f;
} else {
ndc_scale_y = -1.0f / 1280.0f;
}
float ndc_scale_z = gl_clip_space_def ? 0.5f : 1.0f; float ndc_scale_z = gl_clip_space_def ? 0.5f : 1.0f;
float ndc_offset_x = (pa_cl_vte_cntl & (1 << 1)) ? 0.0f : -1.0f; float ndc_offset_x = (pa_cl_vte_cntl & (1 << 1)) ? 0.0f : -1.0f;
float ndc_offset_y = (pa_cl_vte_cntl & (1 << 3)) ? 0.0f : -1.0f; float ndc_offset_y = (pa_cl_vte_cntl & (1 << 3)) ? 0.0f : 1.0f;
float ndc_offset_z = gl_clip_space_def ? 0.5f : 0.0f; float ndc_offset_z = gl_clip_space_def ? 0.5f : 0.0f;
float pixel_half_pixel_offset = 0.0f; float pixel_half_pixel_offset = 0.0f;
if (!(pa_su_vtx_cntl & (1 << 0))) { if (!(pa_su_vtx_cntl & (1 << 0))) {
if (pa_cl_vte_cntl & (1 << 0)) { if (pa_cl_vte_cntl & (1 << 0)) {
float viewport_scale_x = regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32;
if (viewport_scale_x != 0.0f) { if (viewport_scale_x != 0.0f) {
ndc_offset_x -= 0.5f / viewport_scale_x; ndc_offset_x += 0.5f / viewport_scale_x;
} }
} else { } else {
ndc_offset_x -= 1.0f / 2560.0f; ndc_offset_x += 1.0f / 2560.0f;
} }
if (pa_cl_vte_cntl & (1 << 2)) { if (pa_cl_vte_cntl & (1 << 2)) {
float viewport_scale_y = regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32;
if (viewport_scale_y != 0.0f) { if (viewport_scale_y != 0.0f) {
ndc_offset_y -= 0.5f / viewport_scale_y; ndc_offset_y -= 0.5f / viewport_scale_y;
} }
@ -1143,7 +1155,7 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
render_targets[i].guest_render_target; render_targets[i].guest_render_target;
} }
cbuffer_bindings_system_.up_to_date &= dirty; cbuffer_bindings_system_.up_to_date &= !dirty;
} }
bool D3D12CommandProcessor::UpdateBindings( bool D3D12CommandProcessor::UpdateBindings(

View File

@ -26,6 +26,9 @@ namespace xe {
namespace gpu { namespace gpu {
namespace d3d12 { namespace d3d12 {
// Generated with `xb buildhlsl`.
#include "xenia/gpu/d3d12/shaders/bin/primitive_rectangle_list_gs.h"
PipelineCache::PipelineCache(D3D12CommandProcessor* command_processor, PipelineCache::PipelineCache(D3D12CommandProcessor* command_processor,
RegisterFile* register_file) RegisterFile* register_file)
: command_processor_(command_processor), register_file_(register_file) { : command_processor_(command_processor), register_file_(register_file) {
@ -254,6 +257,13 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
primitive_type == PrimitiveType::kLineLoop || primitive_type == PrimitiveType::kLineLoop ||
primitive_type == PrimitiveType::k2DLineStrip; primitive_type == PrimitiveType::k2DLineStrip;
dirty |= regs.primitive_topology_is_line != primitive_topology_is_line; dirty |= regs.primitive_topology_is_line != primitive_topology_is_line;
if (primitive_type == PrimitiveType::kRectangleList) {
dirty |= regs.geometry_shader_primitive_type != primitive_type;
regs.geometry_shader_primitive_type = primitive_type;
} else {
dirty |= regs.geometry_shader_primitive_type != PrimitiveType::kNone;
regs.geometry_shader_primitive_type = PrimitiveType::kNone;
}
XXH64_update(&hash_state_, &regs, sizeof(regs)); XXH64_update(&hash_state_, &regs, sizeof(regs));
if (!dirty) { if (!dirty) {
return UpdateStatus::kCompatible; return UpdateStatus::kCompatible;
@ -294,9 +304,16 @@ PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages(
update_desc_.PS.pShaderBytecode = nullptr; update_desc_.PS.pShaderBytecode = nullptr;
update_desc_.PS.BytecodeLength = 0; update_desc_.PS.BytecodeLength = 0;
} }
// TODO(Triang3l): Geometry shaders. switch (primitive_type) {
case PrimitiveType::kRectangleList:
update_desc_.GS.pShaderBytecode = primitive_rectangle_list_gs;
update_desc_.GS.BytecodeLength = sizeof(primitive_rectangle_list_gs);
break;
default:
// TODO(Triang3l): More geometry shaders for various primitive types.
update_desc_.GS.pShaderBytecode = nullptr; update_desc_.GS.pShaderBytecode = nullptr;
update_desc_.GS.BytecodeLength = 0; update_desc_.GS.BytecodeLength = 0;
}
update_desc_.PrimitiveTopologyType = update_desc_.PrimitiveTopologyType =
primitive_topology_is_line ? D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE primitive_topology_is_line ? D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE
: D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; : D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;

View File

@ -113,6 +113,8 @@ class PipelineCache {
D3D12Shader* pixel_shader; D3D12Shader* pixel_shader;
uint32_t sq_program_cntl; uint32_t sq_program_cntl;
bool primitive_topology_is_line; bool primitive_topology_is_line;
// Primitive type if it needs a geometry shader, or kNone.
PrimitiveType geometry_shader_primitive_type;
UpdateShaderStagesRegisters() { Reset(); } UpdateShaderStagesRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); } void Reset() { std::memset(this, 0, sizeof(*this)); }

View File

@ -401,7 +401,7 @@ bool RenderTargetCache::UpdateRenderTargets() {
: 1280.0f; : 1280.0f;
float viewport_offset_y = (pa_cl_vte_cntl & (1 << 3)) float viewport_offset_y = (pa_cl_vte_cntl & (1 << 3))
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32 ? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32
: viewport_scale_y; : std::abs(viewport_scale_y);
if (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 16)) { if (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 16)) {
viewport_offset_y += float(window_offset_y); viewport_offset_y += float(window_offset_y);
} }

View File

@ -0,0 +1,85 @@
struct XeVertex {
float4 position : SV_Position;
float4 interpolators[16] : TEXCOORD;
float point_size : PSIZE;
};
[maxvertexcount(6)]
void main(triangle XeVertex xe_in[3], inout TriangleStream<XeVertex> xe_stream) {
XeVertex xe_out;
xe_out.position = xe_in[0].position;
xe_out.interpolators = xe_in[0].interpolators;
xe_out.point_size = xe_in[0].point_size;
xe_stream.Append(xe_out);
xe_out.position = xe_in[1].position;
xe_out.interpolators = xe_in[1].interpolators;
xe_out.point_size = xe_in[1].point_size;
xe_stream.Append(xe_out);
xe_out.position = xe_in[2].position;
xe_out.interpolators = xe_in[2].interpolators;
xe_out.point_size = xe_in[2].point_size;
xe_stream.Append(xe_out);
xe_stream.RestartStrip();
// Most games use a left-aligned form.
[branch] if (all(xe_in[0].position.xy ==
float2(xe_in[2].position.x, xe_in[1].position.y)) ||
all(xe_in[0].position.xy ==
float2(xe_in[1].position.x, xe_in[2].position.y))) {
// 0 ------ 1 0: -1,-1
// | - | 1: 1,-1
// | // | 2: -1, 1
// | - | 3: [ 1, 1 ]
// 2 ----- [3]
//
// 0 ------ 2 0: -1,-1
// | - | 1: -1, 1
// | // | 2: 1,-1
// | - | 3: [ 1, 1 ]
// 1 ------[3]
xe_out.position = xe_in[2].position;
xe_out.interpolators = xe_in[2].interpolators;
xe_out.point_size = xe_in[2].point_size;
xe_stream.Append(xe_out);
xe_out.position = xe_in[1].position;
xe_out.interpolators = xe_in[1].interpolators;
xe_out.point_size = xe_in[1].point_size;
xe_stream.Append(xe_out);
xe_out.position = float4(xe_in[1].position.xy -
xe_in[0].position.xy +
xe_in[2].position.xy,
xe_in[2].position.zw);
[unroll] for (int i = 0; i < 16; ++i) {
xe_out.interpolators[i] = xe_in[1].interpolators[i] -
xe_in[0].interpolators[i] +
xe_in[2].interpolators[i];
}
} else {
// 0 ------ 1 0: -1,-1
// | - | 1: 1,-1
// | \\ | 2: 1, 1
// | - | 3: [-1, 1 ]
// [3] ----- 2
xe_out.position = xe_in[0].position;
xe_out.interpolators = xe_in[0].interpolators;
xe_out.point_size = xe_in[0].point_size;
xe_stream.Append(xe_out);
xe_out.position = xe_in[2].position;
xe_out.interpolators = xe_in[2].interpolators;
xe_out.point_size = xe_in[2].point_size;
xe_stream.Append(xe_out);
xe_out.position = float4(xe_in[0].position.xy -
xe_in[1].position.xy +
xe_in[2].position.xy,
xe_in[2].position.zw);
[unroll] for (int i = 0; i < 16; ++i) {
xe_out.interpolators[i] = xe_in[0].interpolators[i] -
xe_in[1].interpolators[i] +
xe_in[2].interpolators[i];
}
}
xe_out.point_size = xe_in[2].point_size;
xe_stream.Append(xe_out);
xe_stream.RestartStrip();
}