diff --git a/src/xenia/gpu/d3d11/d3d11_geometry_shader.cc b/src/xenia/gpu/d3d11/d3d11_geometry_shader.cc index 42c1b55b0..5984631fe 100644 --- a/src/xenia/gpu/d3d11/d3d11_geometry_shader.cc +++ b/src/xenia/gpu/d3d11/d3d11_geometry_shader.cc @@ -136,9 +136,11 @@ int D3D11GeometryShader::Generate(D3D11VertexShader* vertex_shader, " float4 o[%d] : XE_O;\n", D3D11Shader::MAX_INTERPOLATORS); } + if (alloc_counts.point_size) { + output->Append( + " float4 oPointSize : PSIZE;\n"); + } output->Append( - // TODO(benvanik): only pull in point size if required. - " float4 oPointSize : PSIZE;\n" "};\n"); output->Append( @@ -163,6 +165,8 @@ int D3D11PointSpriteGeometryShader::Generate(D3D11VertexShader* vertex_shader, return 1; } + auto alloc_counts = vertex_shader->alloc_counts(); + // TODO(benvanik): fetch default point size from register and use that if // the VS doesn't write oPointSize. // TODO(benvanik): clamp to min/max. @@ -176,8 +180,19 @@ int D3D11PointSpriteGeometryShader::Generate(D3D11VertexShader* vertex_shader, " float2( 1.0, 1.0),\n" " float2(-1.0, -1.0),\n" " float2( 1.0, -1.0),\n" - " };\n" - " float psize = max(input[0].oPointSize.x, 1.0);\n" + " };\n"); + if (alloc_counts.point_size) { + // Point size specified in input. + // TODO(benvanik): pull in psize min/max. + output->Append( + " float psize = max(input[0].oPointSize.x, 1.0);\n"); + } else { + // Point size from register. + // TODO(benvanik): pull in psize. + output->Append( + " float psize = 1.0;\n"); + } + output->Append( " for (uint n = 0; n < 4; n++) {\n" " VERTEX v = input[0];\n" " v.oPos.xy += offsets[n] * psize;\n" @@ -204,6 +219,8 @@ int D3D11RectListGeometryShader::Generate(D3D11VertexShader* vertex_shader, return 1; } + auto alloc_counts = vertex_shader->alloc_counts(); + output->Append( "[maxvertexcount(4)]\n" "void main(triangle VERTEX input[3], inout TriangleStream output) {\n" @@ -212,10 +229,11 @@ int D3D11RectListGeometryShader::Generate(D3D11VertexShader* vertex_shader, " output.Append(v);\n" " }\n" " VERTEX v = input[2];\n" - " v.oPos += input[1].oPos - input[0].oPos;\n" - // TODO(benvanik): only if needed? - " v.oPointSize += input[1].oPointSize - input[0].oPointSize;\n"); - auto alloc_counts = vertex_shader->alloc_counts(); + " v.oPos += input[1].oPos - input[0].oPos;\n"); + if (alloc_counts.point_size) { + output->Append( + " v.oPointSize += input[1].oPointSize - input[0].oPointSize;\n"); + } for (uint32_t n = 0; n < alloc_counts.params; n++) { // TODO(benvanik): this may be wrong - the count is a bad metric. output->Append( diff --git a/src/xenia/gpu/d3d11/d3d11_shader.cc b/src/xenia/gpu/d3d11/d3d11_shader.cc index 9e20284fb..ee89d12bb 100644 --- a/src/xenia/gpu/d3d11/d3d11_shader.cc +++ b/src/xenia/gpu/d3d11/d3d11_shader.cc @@ -449,8 +449,10 @@ const char* D3D11VertexShader::Translate(xe_gpu_program_cntl_t* program_cntl) { " float4 o[%d] : XE_O;\n", MAX_INTERPOLATORS); } - output->append( - " float4 oPointSize : PSIZE;\n"); + if (alloc_counts_.point_size) { + output->append( + " float4 oPointSize : PSIZE;\n"); + } output->append( "};\n"); @@ -461,8 +463,11 @@ const char* D3D11VertexShader::Translate(xe_gpu_program_cntl_t* program_cntl) { // Always write position, as some shaders seem to only write certain values. output->append( - " o.oPos = float4(0.0, 0.0, 0.0, 0.0);\n" - " o.oPointSize = float4(1.0, 0.0, 0.0, 0.0);\n"); + " o.oPos = float4(0.0, 0.0, 0.0, 0.0);\n"); + if (alloc_counts_.point_size) { + output->append( + " o.oPointSize = float4(1.0, 0.0, 0.0, 0.0);\n"); + } // TODO(benvanik): remove this, if possible (though the compiler may be smart // enough to do it for us). diff --git a/src/xenia/gpu/shader.cc b/src/xenia/gpu/shader.cc index b98c12443..8774c5fea 100644 --- a/src/xenia/gpu/shader.cc +++ b/src/xenia/gpu/shader.cc @@ -127,8 +127,18 @@ void Shader::GatherExec(const instr_cf_exec_t* cf) { } } else { // TODO(benvanik): gather registers used, predicate bits used, etc. - /*const instr_alu_t* alu = - (const instr_alu_t*)(dwords_ + alu_off * 3);*/ + const instr_alu_t* alu = + (const instr_alu_t*)(dwords_ + alu_off * 3); + if (alu->vector_write_mask) { + if (alu->export_data && alu->vector_dest == 63) { + alloc_counts_.point_size = true; + } + } + if (alu->scalar_write_mask || !alu->vector_write_mask) { + if (alu->export_data && alu->scalar_dest == 63) { + alloc_counts_.point_size = true; + } + } } sequence >>= 2; } diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h index ff26175ec..a02cd7f4e 100644 --- a/src/xenia/gpu/shader.h +++ b/src/xenia/gpu/shader.h @@ -57,6 +57,7 @@ public: uint32_t positions; uint32_t params; uint32_t memories; + bool point_size; } alloc_counts_t; const alloc_counts_t& alloc_counts() const { return alloc_counts_; }