diff --git a/src/xenia/gpu/d3d11/d3d11_geometry_shader.cc b/src/xenia/gpu/d3d11/d3d11_geometry_shader.cc index 308270f27..7eebf3247 100644 --- a/src/xenia/gpu/d3d11/d3d11_geometry_shader.cc +++ b/src/xenia/gpu/d3d11/d3d11_geometry_shader.cc @@ -124,6 +124,46 @@ ID3D10Blob* D3D11GeometryShader::Compile(const char* shader_source) { return shader_blob; } +int D3D11GeometryShader::Generate(D3D11VertexShader* vertex_shader, + alloy::StringBuffer* output) { + output->Append( + "struct VERTEX {\n" + " float4 oPos : SV_POSITION;\n"); + auto alloc_counts = vertex_shader->alloc_counts(); + if (alloc_counts.params) { + // TODO(benvanik): only add used ones? + output->Append( + " float4 o[%d] : XE_O;\n", + D3D11Shader::MAX_INTERPOLATORS); + } + output->Append( + // TODO(benvanik): only pull in point size if required. + " float4 oPointSize : PSIZE;\n" + "};\n"); + + output->Append( + "cbuffer geo_consts {\n" + " float4 window;\n" // x,y,w,h + " float4 viewport_z_enable;\n" // min,(max - min),?,enabled + " float4 viewport_size;\n" // x,y,w,h + "};" + "float4 applyViewport(float4 pos) {\n" + " if (viewport_z_enable.w) {\n" + //" pos.x = (pos.x + 1) * viewport_size.z * 0.5 + viewport_size.x;\n" + //" pos.y = (1 - pos.y) * viewport_size.w * 0.5 + viewport_size.y;\n" + //" pos.z = viewport_z_enable.x + pos.z * viewport_z_enable.y;\n" + // w? + " } else {\n" + " pos.xy = pos.xy / float2(window.z / 2.0, -window.w / 2.0) + float2(-1.0, 1.0);\n" + " pos.zw = float2(0.0, 1.0);\n" + " }\n" + " pos.xy += window.xy;\n" + " return pos;\n" + "}\n"); + + return 0; +} + D3D11PointSpriteGeometryShader::D3D11PointSpriteGeometryShader( ID3D11Device* device, uint64_t hash) : @@ -135,24 +175,15 @@ D3D11PointSpriteGeometryShader::~D3D11PointSpriteGeometryShader() { int D3D11PointSpriteGeometryShader::Generate(D3D11VertexShader* vertex_shader, alloy::StringBuffer* output) { + if (D3D11GeometryShader::Generate(vertex_shader, output)) { + return 1; + } + // TODO(benvanik): fetch default point size from register and use that if - // the VS doesn't write oPointSize. + // the VS doesn't write oPointSize. // TODO(benvanik): clamp to min/max. // TODO(benvanik): figure out how to see which interpolator gets adjusted. - output->Append( - "struct VERTEX {\n" - " float4 oPos : SV_POSITION;\n"); - auto alloc_counts = vertex_shader->alloc_counts(); - if (alloc_counts.params) { - output->Append( - " float4 o[%d] : XE_O;\n", - D3D11Shader::MAX_INTERPOLATORS); - } - output->Append( - " float4 oPointSize : PSIZE;\n" - "};\n"); - output->Append( "[maxvertexcount(4)]\n" "void main(point VERTEX input[1], inout TriangleStream output) {\n" @@ -163,9 +194,8 @@ int D3D11PointSpriteGeometryShader::Generate(D3D11VertexShader* vertex_shader, " float2( 1.0, -1.0),\n" " };\n" " float psize = max(input[0].oPointSize.x, 1.0);\n" - " VERTEX v;\n" " for (uint n = 0; n < 4; n++) {\n" - " v = input[0];\n" + " VERTEX v = input[0];\n" " v.oPos.xy += offsets[n] * psize;\n" " output.Append(v);\n" " }\n" @@ -186,29 +216,23 @@ D3D11RectListGeometryShader::~D3D11RectListGeometryShader() { int D3D11RectListGeometryShader::Generate(D3D11VertexShader* vertex_shader, alloy::StringBuffer* output) { - output->Append( - "struct VERTEX {\n" - " float4 oPos : SV_POSITION;\n"); - auto alloc_counts = vertex_shader->alloc_counts(); - if (alloc_counts.params) { - output->Append( - " float4 o[%d] : XE_O;\n", - D3D11Shader::MAX_INTERPOLATORS); + if (D3D11GeometryShader::Generate(vertex_shader, output)) { + return 1; } - output->Append( - " float4 oPointSize : PSIZE;\n" - "};\n"); - + output->Append( "[maxvertexcount(4)]\n" "void main(triangle VERTEX input[3], inout TriangleStream output) {\n" - " output.Append(input[0]);\n" - " output.Append(input[1]);\n" - " output.Append(input[2]);\n" + " for (uint n = 0; n < 3; n++) {\n" + " VERTEX v = input[n];\n" + " v.oPos = applyViewport(v.oPos);\n" + " output.Append(v);\n" + " }\n" " VERTEX v = input[2];\n" - " v.oPos += input[1].oPos - input[0].oPos;\n" + " v.oPos = applyViewport(v.oPos + input[1].oPos - input[0].oPos);\n" // TODO(benvanik): only if needed? " v.oPointSize += input[1].oPointSize - input[0].oPointSize;\n"); + auto alloc_counts = vertex_shader->alloc_counts(); for (uint32_t n = 0; n < alloc_counts.params; n++) { // TODO(benvanik): this may be wrong - the count is a bad metric. output->Append( @@ -234,27 +258,19 @@ D3D11QuadListGeometryShader::~D3D11QuadListGeometryShader() { int D3D11QuadListGeometryShader::Generate(D3D11VertexShader* vertex_shader, alloy::StringBuffer* output) { - output->Append( - "struct VERTEX {\n" - " float4 oPos : SV_POSITION;\n"); - auto alloc_counts = vertex_shader->alloc_counts(); - if (alloc_counts.params) { - output->Append( - " float4 o[%d] : XE_O;\n", - D3D11Shader::MAX_INTERPOLATORS); + if (D3D11GeometryShader::Generate(vertex_shader, output)) { + return 1; } - output->Append( - " float4 oPointSize : PSIZE;\n" - "};\n"); output->Append( "[maxvertexcount(4)]\n" - "void main(lineadj VERTEX input[4], inout TriangleStream output,\n" - " uint primitive_id : SV_PrimitiveID) {\n" - " output.Append(input[0]);\n" - " output.Append(input[1]);\n" - " output.Append(input[3]);\n" - " output.Append(input[2]);\n" + "void main(lineadj VERTEX input[4], inout TriangleStream output) {\n" + " const uint order[4] = { 0, 1, 3, 2 };\n" + " for (uint n = 0; n < 4; n++) {\n" + " VERTEX v = input[order[n]];\n" + " v.oPos = applyViewport(v.oPos);\n" + " output.Append(v);\n" + " }\n" " output.RestartStrip();\n" "}\n"); diff --git a/src/xenia/gpu/d3d11/d3d11_geometry_shader.h b/src/xenia/gpu/d3d11/d3d11_geometry_shader.h index 2ff85102f..cdfebad5f 100644 --- a/src/xenia/gpu/d3d11/d3d11_geometry_shader.h +++ b/src/xenia/gpu/d3d11/d3d11_geometry_shader.h @@ -38,7 +38,7 @@ protected: ID3D10Blob* Compile(const char* shader_source); virtual int Generate(D3D11VertexShader* vertex_shader, - alloy::StringBuffer* output) = 0; + alloy::StringBuffer* output); protected: ID3D11Device* device_; diff --git a/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc b/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc index c428fbd99..0775e3623 100644 --- a/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc +++ b/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc @@ -50,6 +50,9 @@ D3D11GraphicsDriver::D3D11GraphicsDriver( buffer_desc.ByteWidth = (32) * sizeof(int); hr = device_->CreateBuffer( &buffer_desc, NULL, &state_.constant_buffers.loop_constants); + buffer_desc.ByteWidth = (32) * sizeof(int); + hr = device_->CreateBuffer( + &buffer_desc, NULL, &state_.constant_buffers.geo_constants); } D3D11GraphicsDriver::~D3D11GraphicsDriver() { @@ -57,6 +60,7 @@ D3D11GraphicsDriver::~D3D11GraphicsDriver() { XESAFERELEASE(state_.constant_buffers.float_constants); XESAFERELEASE(state_.constant_buffers.bool_constants); XESAFERELEASE(state_.constant_buffers.loop_constants); + XESAFERELEASE(state_.constant_buffers.geo_constants); delete shader_cache_; XESAFERELEASE(context_); XESAFERELEASE(device_); @@ -186,8 +190,13 @@ int D3D11GraphicsDriver::SetupDraw(XE_GPU_PRIMITIVE_TYPE prim_type) { } context_->IASetPrimitiveTopology(primitive_topology); - context_->GSSetShader( - geometry_shader ? geometry_shader->handle() : NULL, NULL, NULL); + if (geometry_shader) { + context_->GSSetShader(geometry_shader->handle(), NULL, NULL); + context_->GSSetConstantBuffers( + 0, 1, &state_.constant_buffers.geo_constants); + } else { + context_->GSSetShader(NULL, NULL, NULL); + } // Setup all fetchers (vertices/textures). if (PrepareFetchers()) { @@ -344,9 +353,9 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) { uint32_t window_scissor_tl = rf.values[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32; uint32_t window_scissor_br = rf.values[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32; //uint32_t window_width = - // (window_scissor_br & 0xFFFF) - (window_scissor_tl & 0xFFFF); + // (window_scissor_br & 0x7FFF) - (window_scissor_tl & 0x7FFF); //uint32_t window_height = - // (window_scissor_br >> 16) - (window_scissor_tl >> 16); + // ((window_scissor_br >> 16) & 0x7FFF) - ((window_scissor_tl >> 16) & 0x7FFF); uint32_t window_width = 1280; uint32_t window_height = 720; if (RebuildRenderTargets(window_width, window_height)) { @@ -440,25 +449,71 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) { // Viewport. // If we have resized the window we will want to change this. uint32_t window_offset = rf.values[XE_GPU_REG_PA_SC_WINDOW_OFFSET].u32; - // ? - D3D11_VIEWPORT viewport; - viewport.MinDepth = 0.0f; - viewport.MaxDepth = 1.0f; - viewport.TopLeftX = 0; - viewport.TopLeftY = 0; - viewport.Width = 1280; - viewport.Height = 720; - context_->RSSetViewports(1, &viewport); + // signed? + uint32_t window_offset_x = window_offset & 0x7FFF; + uint32_t window_offset_y = (window_offset >> 16) & 0x7FFF; // ? // TODO(benvanik): figure out how to emulate viewports in D3D11. Could use // viewport above to scale, though that doesn't support negatives/etc. - float vport_xoffset = rf.values[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32; // 640 + uint32_t vte_control = rf.values[XE_GPU_REG_PA_CL_VTE_CNTL].u32; + bool vport_xscale_enable = (vte_control & (1 << 0)) > 0; float vport_xscale = rf.values[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32; // 640 - float vport_yoffset = rf.values[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32; // 360 + bool vport_xoffset_enable = (vte_control & (1 << 1)) > 0; + float vport_xoffset = rf.values[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32; // 640 + bool vport_yscale_enable = (vte_control & (1 << 2)) > 0; float vport_yscale = rf.values[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32; // -360 - float vport_zoffset = rf.values[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32; // 0 + bool vport_yoffset_enable = (vte_control & (1 << 3)) > 0; + float vport_yoffset = rf.values[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32; // 360 + bool vport_zscale_enable = (vte_control & (1 << 4)) > 0; float vport_zscale = rf.values[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32; // 1 + bool vport_zoffset_enable = (vte_control & (1 << 5)) > 0; + float vport_zoffset = rf.values[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32; // 0 + + // TODO(benvanik): compute viewport values. + D3D11_VIEWPORT viewport; + if (vport_xscale_enable) { + // Viewport enabled. + viewport.MinDepth = 0.0f; + viewport.MaxDepth = 1.0f; + viewport.TopLeftX = 0; + viewport.TopLeftY = 0; + viewport.Width = 1280; + viewport.Height = 720; + } else { + // Viewport disabled. Geometry shaders will compensate for this. + viewport.MinDepth = 0.0f; + viewport.MaxDepth = 1.0f; + viewport.TopLeftX = 0; + viewport.TopLeftY = 0; + viewport.Width = 1280; + viewport.Height = 720; + } + context_->RSSetViewports(1, &viewport); + + //"cbuffer geo_consts {\n" + //" float4 window;\n" // x,y,w,h + //" float4 viewport_z_enable;\n" // min,(max - min),?,enabled + //" float4 viewport_size;\n" // x,y,w,h + //"};" + D3D11_MAPPED_SUBRESOURCE res; + context_->Map( + state_.constant_buffers.geo_constants, 0, + D3D11_MAP_WRITE_DISCARD, 0, &res); + float* geo_buffer = (float*)res.pData; + geo_buffer[0] = (float)window_offset_x; + geo_buffer[1] = (float)window_offset_y; + geo_buffer[2] = (float)window_width; + geo_buffer[3] = (float)window_height; + geo_buffer[4] = viewport.MinDepth; + geo_buffer[5] = viewport.MaxDepth - viewport.MinDepth; + geo_buffer[6] = 0; // unused + geo_buffer[7] = vport_xscale_enable ? 1.0f : 0.0f; + geo_buffer[8] = viewport.TopLeftX; + geo_buffer[9] = viewport.TopLeftY; + geo_buffer[10] = viewport.Width; + geo_buffer[11] = viewport.Height; + context_->Unmap(state_.constant_buffers.geo_constants, 0); // Scissoring. // TODO(benvanik): pull from scissor registers. @@ -467,10 +522,10 @@ int D3D11GraphicsDriver::UpdateState(uint32_t state_overrides) { uint32_t screen_scissor_br = rf.values[XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR].u32; if (screen_scissor_tl != 0 && screen_scissor_br != 0x20002000) { D3D11_RECT scissor_rect; - scissor_rect.top = screen_scissor_tl >> 16; - scissor_rect.left = screen_scissor_tl & 0xFFFF; - scissor_rect.bottom = screen_scissor_br >> 16; - scissor_rect.right = screen_scissor_br & 0xFFFF; + scissor_rect.top = (screen_scissor_tl >> 16) & 0x7FFF; + scissor_rect.left = screen_scissor_tl & 0x7FFF; + scissor_rect.bottom = (screen_scissor_br >> 16) & 0x7FFF; + scissor_rect.right = screen_scissor_br & 0x7FFF; context_->RSSetScissorRects(1, &scissor_rect); } else { context_->RSSetScissorRects(0, NULL); diff --git a/src/xenia/gpu/d3d11/d3d11_graphics_driver.h b/src/xenia/gpu/d3d11/d3d11_graphics_driver.h index 483589e93..971b0a361 100644 --- a/src/xenia/gpu/d3d11/d3d11_graphics_driver.h +++ b/src/xenia/gpu/d3d11/d3d11_graphics_driver.h @@ -95,6 +95,7 @@ private: ID3D11Buffer* float_constants; ID3D11Buffer* bool_constants; ID3D11Buffer* loop_constants; + ID3D11Buffer* geo_constants; } constant_buffers; } state_;