Detect write to psize and include conditionally.

This commit is contained in:
Ben Vanik 2014-01-20 11:07:14 -08:00
parent 0efd7b4a62
commit e469d87678
4 changed files with 48 additions and 14 deletions

View File

@ -136,9 +136,11 @@ int D3D11GeometryShader::Generate(D3D11VertexShader* vertex_shader,
" float4 o[%d] : XE_O;\n",
D3D11Shader::MAX_INTERPOLATORS);
}
if (alloc_counts.point_size) {
output->Append(
" float4 oPointSize : PSIZE;\n");
}
output->Append(
// TODO(benvanik): only pull in point size if required.
" float4 oPointSize : PSIZE;\n"
"};\n");
output->Append(
@ -163,6 +165,8 @@ int D3D11PointSpriteGeometryShader::Generate(D3D11VertexShader* vertex_shader,
return 1;
}
auto alloc_counts = vertex_shader->alloc_counts();
// TODO(benvanik): fetch default point size from register and use that if
// the VS doesn't write oPointSize.
// TODO(benvanik): clamp to min/max.
@ -176,8 +180,19 @@ int D3D11PointSpriteGeometryShader::Generate(D3D11VertexShader* vertex_shader,
" float2( 1.0, 1.0),\n"
" float2(-1.0, -1.0),\n"
" float2( 1.0, -1.0),\n"
" };\n"
" float psize = max(input[0].oPointSize.x, 1.0);\n"
" };\n");
if (alloc_counts.point_size) {
// Point size specified in input.
// TODO(benvanik): pull in psize min/max.
output->Append(
" float psize = max(input[0].oPointSize.x, 1.0);\n");
} else {
// Point size from register.
// TODO(benvanik): pull in psize.
output->Append(
" float psize = 1.0;\n");
}
output->Append(
" for (uint n = 0; n < 4; n++) {\n"
" VERTEX v = input[0];\n"
" v.oPos.xy += offsets[n] * psize;\n"
@ -204,6 +219,8 @@ int D3D11RectListGeometryShader::Generate(D3D11VertexShader* vertex_shader,
return 1;
}
auto alloc_counts = vertex_shader->alloc_counts();
output->Append(
"[maxvertexcount(4)]\n"
"void main(triangle VERTEX input[3], inout TriangleStream<VERTEX> output) {\n"
@ -212,10 +229,11 @@ int D3D11RectListGeometryShader::Generate(D3D11VertexShader* vertex_shader,
" output.Append(v);\n"
" }\n"
" VERTEX v = input[2];\n"
" v.oPos += input[1].oPos - input[0].oPos;\n"
// TODO(benvanik): only if needed?
" v.oPointSize += input[1].oPointSize - input[0].oPointSize;\n");
auto alloc_counts = vertex_shader->alloc_counts();
" v.oPos += input[1].oPos - input[0].oPos;\n");
if (alloc_counts.point_size) {
output->Append(
" v.oPointSize += input[1].oPointSize - input[0].oPointSize;\n");
}
for (uint32_t n = 0; n < alloc_counts.params; n++) {
// TODO(benvanik): this may be wrong - the count is a bad metric.
output->Append(

View File

@ -449,8 +449,10 @@ const char* D3D11VertexShader::Translate(xe_gpu_program_cntl_t* program_cntl) {
" float4 o[%d] : XE_O;\n",
MAX_INTERPOLATORS);
}
output->append(
" float4 oPointSize : PSIZE;\n");
if (alloc_counts_.point_size) {
output->append(
" float4 oPointSize : PSIZE;\n");
}
output->append(
"};\n");
@ -461,8 +463,11 @@ const char* D3D11VertexShader::Translate(xe_gpu_program_cntl_t* program_cntl) {
// Always write position, as some shaders seem to only write certain values.
output->append(
" o.oPos = float4(0.0, 0.0, 0.0, 0.0);\n"
" o.oPointSize = float4(1.0, 0.0, 0.0, 0.0);\n");
" o.oPos = float4(0.0, 0.0, 0.0, 0.0);\n");
if (alloc_counts_.point_size) {
output->append(
" o.oPointSize = float4(1.0, 0.0, 0.0, 0.0);\n");
}
// TODO(benvanik): remove this, if possible (though the compiler may be smart
// enough to do it for us).

View File

@ -127,8 +127,18 @@ void Shader::GatherExec(const instr_cf_exec_t* cf) {
}
} else {
// TODO(benvanik): gather registers used, predicate bits used, etc.
/*const instr_alu_t* alu =
(const instr_alu_t*)(dwords_ + alu_off * 3);*/
const instr_alu_t* alu =
(const instr_alu_t*)(dwords_ + alu_off * 3);
if (alu->vector_write_mask) {
if (alu->export_data && alu->vector_dest == 63) {
alloc_counts_.point_size = true;
}
}
if (alu->scalar_write_mask || !alu->vector_write_mask) {
if (alu->export_data && alu->scalar_dest == 63) {
alloc_counts_.point_size = true;
}
}
}
sequence >>= 2;
}

View File

@ -57,6 +57,7 @@ public:
uint32_t positions;
uint32_t params;
uint32_t memories;
bool point_size;
} alloc_counts_t;
const alloc_counts_t& alloc_counts() const { return alloc_counts_; }