diff --git a/docs/gpu.md b/docs/gpu.md index 077f55522..cf630b6e4 100644 --- a/docs/gpu.md +++ b/docs/gpu.md @@ -8,3 +8,11 @@ * [LLVM R600 Tables](https://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600Instructions.td) ** The opcode formats don't match, but the name->psuedo code is correct. + +## Tools + +### apitrace + +[apitrace](http://apitrace.github.io/) can be used to capture and replay D3D11 +call streams. To disable stdout spew first set `XE_OPTION_ENABLE_LOGGING` to 0 +in `logging.h`. diff --git a/src/xenia/gpu/d3d11/d3d11_geometry_shader.cc b/src/xenia/gpu/d3d11/d3d11_geometry_shader.cc new file mode 100644 index 000000000..1e931f16b --- /dev/null +++ b/src/xenia/gpu/d3d11/d3d11_geometry_shader.cc @@ -0,0 +1,225 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include + +#include +#include +#include + +#include + + +using namespace xe; +using namespace xe::gpu; +using namespace xe::gpu::d3d11; +using namespace xe::gpu::xenos; + + +D3D11GeometryShader::D3D11GeometryShader(ID3D11Device* device, uint64_t hash) : + hash_(hash), handle_(NULL) { + device_ = device; + device_->AddRef(); +} + +D3D11GeometryShader::~D3D11GeometryShader() { + XESAFERELEASE(handle_); + XESAFERELEASE(device_); +} + +int D3D11GeometryShader::Prepare(D3D11VertexShader* vertex_shader) { + if (handle_) { + return 0; + } + + // TODO(benvanik): look in file based on hash/etc. + void* byte_code = NULL; + size_t byte_code_length = 0; + + // Translate and compile source. + auto output = new alloy::StringBuffer(); + if (Generate(vertex_shader, output)) { + delete output; + return 1; + } + ID3D10Blob* shader_blob = Compile(output->GetString()); + delete output; + if (!shader_blob) { + return 1; + } + byte_code_length = shader_blob->GetBufferSize(); + byte_code = xe_malloc(byte_code_length); + xe_copy_struct( + byte_code, shader_blob->GetBufferPointer(), byte_code_length); + XESAFERELEASE(shader_blob); + + // Create shader. + HRESULT hr = device_->CreateGeometryShader( + byte_code, byte_code_length, + NULL, + &handle_); + if (FAILED(hr)) { + XELOGE("D3D11: failed to create geometry shader"); + xe_free(byte_code); + return 1; + } + + return 0; +} + +ID3D10Blob* D3D11GeometryShader::Compile(const char* shader_source) { + // TODO(benvanik): pick shared runtime mode defines. + D3D10_SHADER_MACRO defines[] = { + "TEST_DEFINE", "1", + 0, 0, + }; + + uint32_t flags1 = 0; + flags1 |= D3D10_SHADER_DEBUG; + flags1 |= D3D10_SHADER_ENABLE_STRICTNESS; + uint32_t flags2 = 0; + + // Create a name. + const char* base_path = ""; + if (FLAGS_dump_shaders.size()) { + base_path = FLAGS_dump_shaders.c_str(); + } + char file_name[XE_MAX_PATH]; + xesnprintfa(file_name, XECOUNT(file_name), + "%s/gen_%.16llX.gs", + base_path, + hash_); + + if (FLAGS_dump_shaders.size()) { + FILE* f = fopen(file_name, "w"); + fprintf(f, shader_source); + fclose(f); + } + + // Compile shader to bytecode blob. + ID3D10Blob* shader_blob = 0; + ID3D10Blob* error_blob = 0; + HRESULT hr = D3DCompile( + shader_source, strlen(shader_source), + file_name, + defines, NULL, + "main", + "gs_5_0", + flags1, flags2, + &shader_blob, &error_blob); + if (error_blob) { + char* msg = (char*)error_blob->GetBufferPointer(); + XELOGE("D3D11: shader compile failed with %s", msg); + } + XESAFERELEASE(error_blob); + if (FAILED(hr)) { + return NULL; + } + return shader_blob; +} + + +D3D11PointSpriteGeometryShader::D3D11PointSpriteGeometryShader( + ID3D11Device* device, uint64_t hash) : + D3D11GeometryShader(device, hash) { +} + +D3D11PointSpriteGeometryShader::~D3D11PointSpriteGeometryShader() { +} + +int D3D11PointSpriteGeometryShader::Generate(D3D11VertexShader* vertex_shader, + alloy::StringBuffer* output) { + return 0; +} + + +D3D11RectListGeometryShader::D3D11RectListGeometryShader( + ID3D11Device* device, uint64_t hash) : + D3D11GeometryShader(device, hash) { +} + +D3D11RectListGeometryShader::~D3D11RectListGeometryShader() { +} + +int D3D11RectListGeometryShader::Generate(D3D11VertexShader* vertex_shader, + alloy::StringBuffer* output) { + output->Append( + "struct VERTEX {\n" + " float4 oPos : SV_POSITION;\n"); + auto alloc_counts = vertex_shader->alloc_counts(); + if (alloc_counts.params) { + output->Append( + " float4 o[%d] : XE_O;\n", + D3D11Shader::MAX_INTERPOLATORS); + } + output->Append( + " float4 oPointSize : PSIZE;\n" + "};\n"); + + output->Append( + "[maxvertexcount(4)]\n" + "void main(triangle VERTEX input[3], inout TriangleStream output) {\n" + " output.Append(input[0]);\n" + " output.Append(input[1]);\n" + " output.Append(input[2]);\n" + " VERTEX v = input[2];\n" + " v.oPos += input[1].oPos - input[0].oPos;\n" + // TODO(benvanik): only if needed? + " v.oPointSize += input[1].oPointSize - input[0].oPointSize;\n"); + for (uint32_t n = 0; n < alloc_counts.params; n++) { + // TODO(benvanik): this may be wrong - the count is a bad metric. + output->Append( + " v.o[%d] += input[1].o[%d] - input[0].o[%d];\n", + n, n, n, n); + } + output->Append( + " output.Append(v);\n" + " output.RestartStrip();\n" + "}\n"); + + return 0; +} + + +D3D11QuadListGeometryShader::D3D11QuadListGeometryShader( + ID3D11Device* device, uint64_t hash) : + D3D11GeometryShader(device, hash) { +} + +D3D11QuadListGeometryShader::~D3D11QuadListGeometryShader() { +} + +int D3D11QuadListGeometryShader::Generate(D3D11VertexShader* vertex_shader, + alloy::StringBuffer* output) { + output->Append( + "struct VERTEX {\n" + " float4 oPos : SV_POSITION;\n"); + auto alloc_counts = vertex_shader->alloc_counts(); + if (alloc_counts.params) { + output->Append( + " float4 o[%d] : XE_O;\n", + D3D11Shader::MAX_INTERPOLATORS); + } + output->Append( + " float4 oPointSize : PSIZE;\n" + "};\n"); + + output->Append( + "[maxvertexcount(4)]\n" + "void main(lineadj VERTEX input[4], inout TriangleStream output,\n" + " uint primitive_id : SV_PrimitiveID) {\n" + " output.Append(input[0]);\n" + " output.Append(input[1]);\n" + " output.Append(input[3]);\n" + " output.Append(input[2]);\n" + " output.RestartStrip();\n" + "}\n"); + + return 0; +} diff --git a/src/xenia/gpu/d3d11/d3d11_geometry_shader.h b/src/xenia/gpu/d3d11/d3d11_geometry_shader.h new file mode 100644 index 000000000..2ff85102f --- /dev/null +++ b/src/xenia/gpu/d3d11/d3d11_geometry_shader.h @@ -0,0 +1,88 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2014 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_D3D11_D3D11_GEOMETRY_SHADER_H_ +#define XENIA_GPU_D3D11_D3D11_GEOMETRY_SHADER_H_ + +#include + +#include + +#include + + +namespace xe { +namespace gpu { +namespace d3d11 { + +class D3D11VertexShader; + + +class D3D11GeometryShader { +public: + virtual ~D3D11GeometryShader(); + + ID3D11GeometryShader* handle() const { return handle_; } + + int Prepare(D3D11VertexShader* vertex_shader); + +protected: + D3D11GeometryShader(ID3D11Device* device, uint64_t hash); + + ID3D10Blob* Compile(const char* shader_source); + + virtual int Generate(D3D11VertexShader* vertex_shader, + alloy::StringBuffer* output) = 0; + +protected: + ID3D11Device* device_; + uint64_t hash_; + ID3D11GeometryShader* handle_; +}; + + +class D3D11PointSpriteGeometryShader : public D3D11GeometryShader { +public: + D3D11PointSpriteGeometryShader(ID3D11Device* device, uint64_t hash); + virtual ~D3D11PointSpriteGeometryShader(); + +protected: + virtual int Generate(D3D11VertexShader* vertex_shader, + alloy::StringBuffer* output); +}; + + +class D3D11RectListGeometryShader : public D3D11GeometryShader { +public: + D3D11RectListGeometryShader(ID3D11Device* device, uint64_t hash); + virtual ~D3D11RectListGeometryShader(); + +protected: + virtual int Generate(D3D11VertexShader* vertex_shader, + alloy::StringBuffer* output); +}; + + +class D3D11QuadListGeometryShader : public D3D11GeometryShader { +public: + D3D11QuadListGeometryShader(ID3D11Device* device, uint64_t hash); + virtual ~D3D11QuadListGeometryShader(); + +protected: + virtual int Generate(D3D11VertexShader* vertex_shader, + alloy::StringBuffer* output); +}; + + +} // namespace d3d11 +} // namespace gpu +} // namespace xe + + +#endif // XENIA_GPU_D3D11_D3D11_SHADER_H_ diff --git a/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc b/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc index 07dd7edb3..b3b59c41b 100644 --- a/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc +++ b/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -49,40 +50,6 @@ D3D11GraphicsDriver::D3D11GraphicsDriver( buffer_desc.ByteWidth = (32) * sizeof(int); hr = device_->CreateBuffer( &buffer_desc, NULL, &state_.constant_buffers.loop_constants); - - //const char* shader_source = - // "" - // "[maxvertexcount(4)]\n" - // "void main(triange RectVert input[3], inout TriangleStream output) {" - // " output.Append(input[0]);" - // " output.Append(input[1]);" - // " output.Append(input[2]);" - // " /* compute 3 */" - // " output.RestartStrip();" - // "}"; - //ID3D10Blob* shader_blob = 0; - //ID3D10Blob* error_blob = 0; - //HRESULT hr = D3DCompile( - // shader_source, strlen(shader_source), - // "d3d11_rect_shader.gs", - // NULL, NULL, - // "main", - // "gs_5_0", - // D3D10_SHADER_DEBUG | D3D10_SHADER_ENABLE_STRICTNESS, 0, - // &shader_blob, &error_blob); - //if (error_blob) { - // char* msg = (char*)error_blob->GetBufferPointer(); - // XELOGE("D3D11: rect shader compile failed with %s", msg); - //} - //XESAFERELEASE(error_blob); - - //byte* rect_shader_bytes = 0; - //size_t rect_shader_length = 0; - //ID3D11GeometryShader* rect_shader; - //hr = device_->CreateGeometryShader( - // shader_blob->GetBufferPointer(), - // shader_blob->GetBufferSize(), - // NULL, &rect_shader); } D3D11GraphicsDriver::~D3D11GraphicsDriver() { @@ -162,9 +129,16 @@ int D3D11GraphicsDriver::SetupDraw(XE_GPU_PRIMITIVE_TYPE prim_type) { // Switch primitive topology. // Some are unsupported on D3D11 and must be emulated. D3D11_PRIMITIVE_TOPOLOGY primitive_topology; + D3D11GeometryShader* geometry_shader = NULL; switch (prim_type) { case XE_GPU_PRIMITIVE_TYPE_POINT_LIST: primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; + //if (state_.vertex_shader) { + // if (state_.vertex_shader->DemandGeometryShader( + // D3D11VertexShader::POINT_SPRITE_SHADER, &geometry_shader)) { + // return 1; + // } + //} break; case XE_GPU_PRIMITIVE_TYPE_LINE_LIST: primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; @@ -179,20 +153,36 @@ int D3D11GraphicsDriver::SetupDraw(XE_GPU_PRIMITIVE_TYPE prim_type) { primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; case XE_GPU_PRIMITIVE_TYPE_RECTANGLE_LIST: - XELOGW("D3D11: faking RECTANGLE_LIST as a tri list"); - primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + primitive_topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; + if (state_.vertex_shader) { + if (state_.vertex_shader->DemandGeometryShader( + D3D11VertexShader::RECT_LIST_SHADER, &geometry_shader)) { + return 1; + } + } + break; + case XE_GPU_PRIMITIVE_TYPE_QUAD_LIST: + primitive_topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ; + if (state_.vertex_shader) { + if (state_.vertex_shader->DemandGeometryShader( + D3D11VertexShader::QUAD_LIST_SHADER, &geometry_shader)) { + return 1; + } + } break; default: case XE_GPU_PRIMITIVE_TYPE_TRIANGLE_FAN: case XE_GPU_PRIMITIVE_TYPE_UNKNOWN_07: case XE_GPU_PRIMITIVE_TYPE_LINE_LOOP: - case XE_GPU_PRIMITIVE_TYPE_UNKNOWN_0D: primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; XELOGE("D3D11: unsupported primitive type %d", prim_type); break; } context_->IASetPrimitiveTopology(primitive_topology); + context_->GSSetShader( + geometry_shader ? geometry_shader->handle() : NULL, NULL, NULL); + // Setup all fetchers (vertices/textures). if (PrepareFetchers()) { return 1; @@ -429,7 +419,7 @@ int D3D11GraphicsDriver::UpdateState() { rasterizer_desc.DepthBias = 0; rasterizer_desc.DepthBiasClamp = 0; rasterizer_desc.SlopeScaledDepthBias = 0; - rasterizer_desc.DepthClipEnable = true; + rasterizer_desc.DepthClipEnable = false; // ? rasterizer_desc.ScissorEnable = false; rasterizer_desc.MultisampleEnable = false; rasterizer_desc.AntialiasedLineEnable = false; diff --git a/src/xenia/gpu/d3d11/d3d11_shader.cc b/src/xenia/gpu/d3d11/d3d11_shader.cc index 3b6598a69..3d8aec046 100644 --- a/src/xenia/gpu/d3d11/d3d11_shader.cc +++ b/src/xenia/gpu/d3d11/d3d11_shader.cc @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -23,8 +24,6 @@ using namespace xe::gpu::xenos; namespace { -const uint32_t MAX_INTERPOLATORS = 16; - const int OUTPUT_CAPACITY = 64 * 1024; int GetFormatComponentCount(uint32_t format) { @@ -210,9 +209,13 @@ D3D11VertexShader::D3D11VertexShader( handle_(0), input_layout_(0), D3D11Shader(device, XE_GPU_SHADER_TYPE_VERTEX, src_ptr, length, hash) { + xe_zero_struct(geometry_shaders_, sizeof(geometry_shaders_)); } D3D11VertexShader::~D3D11VertexShader() { + for (size_t n = 0; n < XECOUNT(geometry_shaders_); n++) { + delete geometry_shaders_[n]; + } XESAFERELEASE(input_layout_); XESAFERELEASE(handle_); } @@ -372,7 +375,6 @@ int D3D11VertexShader::Prepare(xe_gpu_program_cntl_t* program_cntl) { return 0; } - const char* D3D11VertexShader::Translate(xe_gpu_program_cntl_t* program_cntl) { Output* output = new Output(); xe_gpu_translate_ctx_t ctx; @@ -437,7 +439,8 @@ const char* D3D11VertexShader::Translate(xe_gpu_program_cntl_t* program_cntl) { // Always write position, as some shaders seem to only write certain values. output->append( - " o.oPos = float4(0.0, 0.0, 0.0, 0.0);\n"); + " o.oPos = float4(0.0, 0.0, 0.0, 0.0);\n" + " o.oPointSize = float4(1.0, 0.0, 0.0, 0.0);\n"); // TODO(benvanik): remove this, if possible (though the compiler may be smart // enough to do it for us). @@ -477,6 +480,43 @@ const char* D3D11VertexShader::Translate(xe_gpu_program_cntl_t* program_cntl) { return translated_src_; } +int D3D11VertexShader::DemandGeometryShader(GeometryShaderType type, + D3D11GeometryShader** out_shader) { + if (geometry_shaders_[type]) { + *out_shader = geometry_shaders_[type]; + return 0; + } + + // Demand generate. + D3D11GeometryShader* shader = NULL; + switch (type) { + case POINT_SPRITE_SHADER: + shader = new D3D11PointSpriteGeometryShader(device_, hash_); + break; + case RECT_LIST_SHADER: + shader = new D3D11RectListGeometryShader(device_, hash_); + break; + case QUAD_LIST_SHADER: + shader = new D3D11QuadListGeometryShader(device_, hash_); + break; + default: + XEASSERTALWAYS(); + return 1; + } + if (!shader) { + return 1; + } + + if (shader->Prepare(this)) { + delete shader; + return 1; + } + + geometry_shaders_[type] = shader; + *out_shader = geometry_shaders_[type]; + return 0; +} + D3D11PixelShader::D3D11PixelShader( ID3D11Device* device, diff --git a/src/xenia/gpu/d3d11/d3d11_shader.h b/src/xenia/gpu/d3d11/d3d11_shader.h index 79ecd9460..4e6abad46 100644 --- a/src/xenia/gpu/d3d11/d3d11_shader.h +++ b/src/xenia/gpu/d3d11/d3d11_shader.h @@ -29,11 +29,15 @@ typedef struct { xenos::XE_GPU_SHADER_TYPE type; } xe_gpu_translate_ctx_t; +class D3D11GeometryShader; + class D3D11Shader : public Shader { public: virtual ~D3D11Shader(); + const static uint32_t MAX_INTERPOLATORS = 16; + protected: D3D11Shader( ID3D11Device* device, @@ -69,12 +73,23 @@ public: int Prepare(xenos::xe_gpu_program_cntl_t* program_cntl); + enum GeometryShaderType { + POINT_SPRITE_SHADER, + RECT_LIST_SHADER, + QUAD_LIST_SHADER, + + MAX_GEOMETRY_SHADER_TYPE, + }; + int DemandGeometryShader(GeometryShaderType type, + D3D11GeometryShader** out_shader); + private: const char* Translate(xenos::xe_gpu_program_cntl_t* program_cntl); private: - ID3D11VertexShader* handle_; - ID3D11InputLayout* input_layout_; + ID3D11VertexShader* handle_; + ID3D11InputLayout* input_layout_; + D3D11GeometryShader* geometry_shaders_[MAX_GEOMETRY_SHADER_TYPE]; }; diff --git a/src/xenia/gpu/d3d11/sources.gypi b/src/xenia/gpu/d3d11/sources.gypi index 3e3afa8e9..b1ad47ff4 100644 --- a/src/xenia/gpu/d3d11/sources.gypi +++ b/src/xenia/gpu/d3d11/sources.gypi @@ -1,6 +1,8 @@ # Copyright 2013 Ben Vanik. All Rights Reserved. { 'sources': [ + 'd3d11_geometry_shader.cc', + 'd3d11_geometry_shader.h', 'd3d11_gpu-private.h', 'd3d11_gpu.cc', 'd3d11_gpu.h', diff --git a/src/xenia/gpu/xenos/xenos.h b/src/xenia/gpu/xenos/xenos.h index af530c014..8eafb7a51 100644 --- a/src/xenia/gpu/xenos/xenos.h +++ b/src/xenia/gpu/xenos/xenos.h @@ -40,7 +40,7 @@ typedef enum { XE_GPU_PRIMITIVE_TYPE_UNKNOWN_07 = 0x07, XE_GPU_PRIMITIVE_TYPE_RECTANGLE_LIST = 0x08, XE_GPU_PRIMITIVE_TYPE_LINE_LOOP = 0x0C, - XE_GPU_PRIMITIVE_TYPE_UNKNOWN_0D = 0x0D, + XE_GPU_PRIMITIVE_TYPE_QUAD_LIST = 0x0D, } XE_GPU_PRIMITIVE_TYPE; typedef enum {