From 0ef278325f84c0f745165729bb36c67534a3f3d0 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sat, 12 Oct 2013 15:07:34 -0700 Subject: [PATCH] Hacking to get first triangle drawn. --- src/xenia/gpu/d3d11/d3d11_graphics_driver.cc | 150 +++++++++++++++---- src/xenia/gpu/d3d11/d3d11_graphics_driver.h | 20 +-- src/xenia/gpu/d3d11/d3d11_shader.cc | 133 +++++++++++++++- src/xenia/gpu/d3d11/d3d11_window.cc | 12 +- src/xenia/gpu/graphics_driver.h | 4 +- src/xenia/gpu/nop/nop_graphics_driver.cc | 2 +- src/xenia/gpu/nop/nop_graphics_driver.h | 2 +- src/xenia/gpu/ring_buffer_worker.cc | 4 +- src/xenia/gpu/shader.cc | 92 ++++++++++++ src/xenia/gpu/shader.h | 15 +- src/xenia/gpu/xenos/ucode.h | 19 +++ src/xenia/gpu/xenos/ucode_disassembler.cc | 26 +--- src/xenia/malloc.h | 2 + 13 files changed, 396 insertions(+), 85 deletions(-) diff --git a/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc b/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc index c36f867a8..a686862b0 100644 --- a/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc +++ b/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc @@ -104,7 +104,7 @@ void D3D11GraphicsDriver::SetShader( } } -void D3D11GraphicsDriver::DrawAutoIndexed( +void D3D11GraphicsDriver::DrawIndexAuto( XE_GPU_PRIMITIVE_TYPE prim_type, uint32_t index_count) { RegisterFile& rf = register_file_; @@ -113,13 +113,19 @@ void D3D11GraphicsDriver::DrawAutoIndexed( prim_type, index_count); // Misc state. - UpdateState(); + if (UpdateState()) { + return; + } // Build constant buffers. - UpdateConstantBuffers(); + if (UpdateConstantBuffers()) { + return; + } // Bind shaders. - BindShaders(); + if (BindShaders()) { + return; + } // Switch primitive topology. // Some are unsupported on D3D11 and must be emulated. @@ -151,26 +157,66 @@ void D3D11GraphicsDriver::DrawAutoIndexed( context_->IASetPrimitiveTopology(primitive_topology); // Setup all fetchers (vertices/textures). - PrepareFetchers(); + if (PrepareFetchers()) { + return; + } // Setup index buffer. - PrepareIndexBuffer(); + if (PrepareIndexBuffer()) { + return; + } // Issue draw. uint32_t start_index = rf.values[XE_GPU_REG_VGT_INDX_OFFSET].u32; uint32_t base_vertex = 0; //context_->DrawIndexed(index_count, start_index, base_vertex); + context_->Draw(index_count, 0); } -void D3D11GraphicsDriver::UpdateState() { - //context_->OMSetBlendState(blend_state, blend_factor, sample_mask); +int D3D11GraphicsDriver::UpdateState() { + // General rasterizer state. + ID3D11RasterizerState* rasterizer_state = 0; + D3D11_RASTERIZER_DESC rasterizer_desc; + xe_zero_struct(&rasterizer_desc, sizeof(rasterizer_desc)); + rasterizer_desc.FillMode = D3D11_FILL_SOLID; // D3D11_FILL_WIREFRAME; + rasterizer_desc.CullMode = D3D11_CULL_NONE; // D3D11_CULL_FRONT BACK + rasterizer_desc.FrontCounterClockwise = false; + rasterizer_desc.DepthBias = 0; + rasterizer_desc.DepthBiasClamp = 0; + rasterizer_desc.SlopeScaledDepthBias = 0; + rasterizer_desc.DepthClipEnable = true; + rasterizer_desc.ScissorEnable = false; + rasterizer_desc.MultisampleEnable = false; + rasterizer_desc.AntialiasedLineEnable = false; + device_->CreateRasterizerState(&rasterizer_desc, &rasterizer_state); + context_->RSSetState(rasterizer_state); + XESAFERELEASE(rasterizer_state); + + // Depth-stencil state. //context_->OMSetDepthStencilState - //context_->RSSetScissorRects - //context_->RSSetState - //context_->RSSetViewports + + // Blend state. + //context_->OMSetBlendState(blend_state, blend_factor, sample_mask); + + // Scissoring. + // TODO(benvanik): pull from scissor registers. + context_->RSSetScissorRects(0, NULL); + + // Viewport. + // If we have resized the window we will want to change this. + D3D11_VIEWPORT viewport; + viewport.MinDepth = 0.0f; + viewport.MaxDepth = 1.0f; + viewport.TopLeftX = 0; + viewport.TopLeftY = 0; + viewport.Width = 1280; + viewport.Height = 720; + context_->RSSetViewports(1, &viewport); + + return 0; } -void D3D11GraphicsDriver::UpdateConstantBuffers() { +int D3D11GraphicsDriver::UpdateConstantBuffers() { RegisterFile& rf = register_file_; D3D11_MAPPED_SUBRESOURCE res; @@ -197,9 +243,11 @@ void D3D11GraphicsDriver::UpdateConstantBuffers() { &rf.values[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031], (8) * sizeof(int)); context_->Unmap(state_.constant_buffers.bool_constants, 0); + + return 0; } -void D3D11GraphicsDriver::BindShaders() { +int D3D11GraphicsDriver::BindShaders() { RegisterFile& rf = register_file_; xe_gpu_program_cntl_t program_cntl; program_cntl.dword_0 = rf.values[XE_GPU_REG_SQ_PROGRAM_CNTL].u32; @@ -212,7 +260,7 @@ void D3D11GraphicsDriver::BindShaders() { if (vs->Prepare(&program_cntl)) { XELOGGPU("D3D11: failed to prepare vertex shader"); state_.vertex_shader = NULL; - return; + return 1; } } @@ -230,6 +278,10 @@ void D3D11GraphicsDriver::BindShaders() { //context_->VSSetSamplers //context_->VSSetShaderResources + } else { + context_->VSSetShader(NULL, NULL, 0); + context_->IASetInputLayout(NULL); + return 1; } // Pixel shader setup. @@ -240,7 +292,7 @@ void D3D11GraphicsDriver::BindShaders() { if (ps->Prepare(&program_cntl)) { XELOGGPU("D3D11: failed to prepare pixel shader"); state_.pixel_shader = NULL; - return; + return 1; } } @@ -255,33 +307,48 @@ void D3D11GraphicsDriver::BindShaders() { //context_->PSSetSamplers //context_->PSSetShaderResources + } else { + context_->PSSetShader(NULL, NULL, 0); + return 1; } + + return 0; } -void D3D11GraphicsDriver::PrepareFetchers() { +int D3D11GraphicsDriver::PrepareFetchers() { RegisterFile& rf = register_file_; for (int n = 0; n < 32; n++) { int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + n * 6; xe_gpu_fetch_group_t* group = (xe_gpu_fetch_group_t*)&rf.values[r]; if (group->type_0 == 0x2) { - PrepareTextureFetcher(n, &group->texture_fetch); + if (PrepareTextureFetcher(n, &group->texture_fetch)) { + return 1; + } } else { // TODO(benvanik): verify register numbering. if (group->type_0 == 0x3) { - PrepareVertexFetcher(n * 3 + 0, &group->vertex_fetch_0); + if (PrepareVertexFetcher(n * 3 + 0, &group->vertex_fetch_0)) { + return 1; + } } if (group->type_1 == 0x3) { - PrepareVertexFetcher(n * 3 + 1, &group->vertex_fetch_1); + if (PrepareVertexFetcher(n * 3 + 1, &group->vertex_fetch_1)) { + return 1; + } } if (group->type_2 == 0x3) { - PrepareVertexFetcher(n * 3 + 2, &group->vertex_fetch_2); + if (PrepareVertexFetcher(n * 3 + 2, &group->vertex_fetch_2)) { + return 1; + } } } } + + return 0; } -void D3D11GraphicsDriver::PrepareVertexFetcher( - int slot, xe_gpu_vertex_fetch_t* fetch) { +int D3D11GraphicsDriver::PrepareVertexFetcher( + int fetch_slot, xe_gpu_vertex_fetch_t* fetch) { uint32_t address = (fetch->address << 2) + address_translation_; uint32_t size_dwords = fetch->size; @@ -292,9 +359,18 @@ void D3D11GraphicsDriver::PrepareVertexFetcher( buffer_desc.Usage = D3D11_USAGE_DYNAMIC; buffer_desc.BindFlags = D3D11_BIND_VERTEX_BUFFER; buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - device_->CreateBuffer(&buffer_desc, NULL, &buffer); + HRESULT hr = device_->CreateBuffer(&buffer_desc, NULL, &buffer); + if (FAILED(hr)) { + XELOGE("D3D11: unable to create vertex fetch buffer"); + return 1; + } D3D11_MAPPED_SUBRESOURCE res; - context_->Map(buffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &res); + hr = context_->Map(buffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &res); + if (FAILED(hr)) { + XELOGE("D3D11: unable to map vertex fetch buffer"); + XESAFERELEASE(buffer); + return 1; + } uint32_t* src = (uint32_t*)xe_memory_addr(memory_, address); uint32_t* dest = (uint32_t*)res.pData; for (uint32_t n = 0; n < size_dwords; n++) { @@ -308,19 +384,31 @@ void D3D11GraphicsDriver::PrepareVertexFetcher( } context_->Unmap(buffer, 0); - // TODO(benvanik): fetch from VS. - /*uint32_t stride = 0; + D3D11VertexShader* vs = state_.vertex_shader; + if (!vs) { + return 1; + } + const instr_fetch_vtx_t* vtx = vs->GetFetchVtxBySlot(fetch_slot); + if (!vtx->must_be_one) { + return 1; + } + // TODO(benvanik): always dword aligned? + uint32_t stride = vtx->stride * 4; uint32_t offset = 0; - context_->IASetVertexBuffers(slot, 1, &buffer, &stride, &offset);*/ + int vb_slot = 95 - fetch_slot; + context_->IASetVertexBuffers(vb_slot, 1, &buffer, &stride, &offset); buffer->Release(); + + return 0; } -void D3D11GraphicsDriver::PrepareTextureFetcher( - int slot, xe_gpu_texture_fetch_t* fetch) { +int D3D11GraphicsDriver::PrepareTextureFetcher( + int fetch_slot, xe_gpu_texture_fetch_t* fetch) { + return 0; } -void D3D11GraphicsDriver::PrepareIndexBuffer() { +int D3D11GraphicsDriver::PrepareIndexBuffer() { RegisterFile& rf = register_file_; /* @@ -353,4 +441,6 @@ void D3D11GraphicsDriver::PrepareIndexBuffer() { context_->IASetIndexBuffer(buffer, format, 0); buffer->Release();*/ + + return 0; } diff --git a/src/xenia/gpu/d3d11/d3d11_graphics_driver.h b/src/xenia/gpu/d3d11/d3d11_graphics_driver.h index b49ae0d20..76bacdf33 100644 --- a/src/xenia/gpu/d3d11/d3d11_graphics_driver.h +++ b/src/xenia/gpu/d3d11/d3d11_graphics_driver.h @@ -42,20 +42,20 @@ public: uint32_t address, uint32_t start, uint32_t length); - virtual void DrawAutoIndexed( + virtual void DrawIndexAuto( xenos::XE_GPU_PRIMITIVE_TYPE prim_type, uint32_t index_count); private: - void UpdateState(); - void UpdateConstantBuffers(); - void BindShaders(); - void PrepareFetchers(); - void PrepareVertexFetcher( - int slot, xenos::xe_gpu_vertex_fetch_t* fetch); - void PrepareTextureFetcher( - int slot, xenos::xe_gpu_texture_fetch_t* fetch); - void PrepareIndexBuffer(); + int UpdateState(); + int UpdateConstantBuffers(); + int BindShaders(); + int PrepareFetchers(); + int PrepareVertexFetcher( + int fetch_slot, xenos::xe_gpu_vertex_fetch_t* fetch); + int PrepareTextureFetcher( + int fetch_slot, xenos::xe_gpu_texture_fetch_t* fetch); + int PrepareIndexBuffer(); private: ID3D11Device* device_; diff --git a/src/xenia/gpu/d3d11/d3d11_shader.cc b/src/xenia/gpu/d3d11/d3d11_shader.cc index ebc2d4dfe..9bb207148 100644 --- a/src/xenia/gpu/d3d11/d3d11_shader.cc +++ b/src/xenia/gpu/d3d11/d3d11_shader.cc @@ -9,6 +9,10 @@ #include +#include + +#include + using namespace xe; using namespace xe::gpu; @@ -53,6 +57,11 @@ int D3D11VertexShader::Prepare(xe_gpu_program_cntl_t* program_cntl) { void* byte_code = NULL; size_t byte_code_length = 0; + + if (!byte_code) { + return 1; + } + // Create shader. HRESULT hr = device_->CreateVertexShader( byte_code, byte_code_length, @@ -60,22 +69,131 @@ int D3D11VertexShader::Prepare(xe_gpu_program_cntl_t* program_cntl) { &handle_); if (FAILED(hr)) { XELOGE("D3D11: failed to create vertex shader"); + xe_free(byte_code); return 1; } // Create input layout. - uint32_t element_count = 0; - D3D11_INPUT_ELEMENT_DESC* element_descs = 0; + size_t element_count = fetch_vtxs_.size(); + D3D11_INPUT_ELEMENT_DESC* element_descs = + (D3D11_INPUT_ELEMENT_DESC*)xe_alloca( + sizeof(D3D11_INPUT_ELEMENT_DESC) * element_count); + int n = 0; + for (std::vector::iterator it = fetch_vtxs_.begin(); + it != fetch_vtxs_.end(); ++it, ++n) { + const instr_fetch_vtx_t& vtx = *it; + DXGI_FORMAT vtx_format; + switch (vtx.format) { + case FMT_1_REVERSE: + vtx_format = DXGI_FORMAT_R1_UNORM; // ? + break; + case FMT_8: + if (!vtx.num_format_all) { + vtx_format = vtx.format_comp_all ? + DXGI_FORMAT_R8_SNORM : DXGI_FORMAT_R8_UNORM; + } else { + vtx_format = vtx.format_comp_all ? + DXGI_FORMAT_R8_SINT : DXGI_FORMAT_R8_UINT; + } + break; + case FMT_8_8_8_8: + if (!vtx.num_format_all) { + vtx_format = vtx.format_comp_all ? + DXGI_FORMAT_R8G8B8A8_SNORM : DXGI_FORMAT_R8G8B8A8_UNORM; + } else { + vtx_format = vtx.format_comp_all ? + DXGI_FORMAT_R8G8B8A8_SINT : DXGI_FORMAT_R8G8B8A8_UINT; + } + break; + case FMT_8_8: + if (!vtx.num_format_all) { + vtx_format = vtx.format_comp_all ? + DXGI_FORMAT_R8G8_SNORM : DXGI_FORMAT_R8G8_UNORM; + } else { + vtx_format = vtx.format_comp_all ? + DXGI_FORMAT_R8G8_SINT : DXGI_FORMAT_R8G8_UINT; + } + break; + case FMT_16: + if (!vtx.num_format_all) { + vtx_format = vtx.format_comp_all ? + DXGI_FORMAT_R16_SNORM : DXGI_FORMAT_R16_UNORM; + } else { + vtx_format = vtx.format_comp_all ? + DXGI_FORMAT_R16_SINT : DXGI_FORMAT_R16_UINT; + } + break; + case FMT_16_16: + if (!vtx.num_format_all) { + vtx_format = vtx.format_comp_all ? + DXGI_FORMAT_R16G16_SNORM : DXGI_FORMAT_R16G16_UNORM; + } else { + vtx_format = vtx.format_comp_all ? + DXGI_FORMAT_R16G16_SINT : DXGI_FORMAT_R16G16_UINT; + } + break; + case FMT_16_16_16_16: + if (!vtx.num_format_all) { + vtx_format = vtx.format_comp_all ? + DXGI_FORMAT_R16G16B16A16_SNORM : DXGI_FORMAT_R16G16B16A16_UNORM; + } else { + vtx_format = vtx.format_comp_all ? + DXGI_FORMAT_R16G16B16A16_SINT : DXGI_FORMAT_R16G16B16A16_UINT; + } + break; + case FMT_32: + vtx_format = vtx.format_comp_all ? + DXGI_FORMAT_R32_SINT : DXGI_FORMAT_R32_UINT; + break; + case FMT_32_32: + vtx_format = vtx.format_comp_all ? + DXGI_FORMAT_R32G32_SINT : DXGI_FORMAT_R32G32_UINT; + break; + case FMT_32_32_32_32: + vtx_format = vtx.format_comp_all ? + DXGI_FORMAT_R32G32B32A32_SINT : DXGI_FORMAT_R32G32B32A32_UINT; + break; + case FMT_32_FLOAT: + vtx_format = DXGI_FORMAT_R32_FLOAT; + break; + case FMT_32_32_FLOAT: + vtx_format = DXGI_FORMAT_R32G32_FLOAT; + break; + case FMT_32_32_32_32_FLOAT: + vtx_format = DXGI_FORMAT_R32G32B32A32_FLOAT; + break; + case FMT_32_32_32_FLOAT: + vtx_format = DXGI_FORMAT_R32G32B32_FLOAT; + break; + default: + XEASSERTALWAYS(); + break; + } + element_descs[n].SemanticName = "XEVF"; + element_descs[n].SemanticIndex = n; + element_descs[n].Format = vtx_format; + // TODO(benvanik): pick slot in same way that driver does. + // CONST(31, 2) = reg 31, index 2 = rf([31] * 6 + [2] * 2) + uint32_t fetch_slot = vtx.const_index * 3 + vtx.const_index_sel; + uint32_t vb_slot = 95 - fetch_slot; + element_descs[n].InputSlot = vb_slot; + element_descs[n].AlignedByteOffset = vtx.offset * 4; + element_descs[n].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA; + element_descs[n].InstanceDataStepRate = 0; + } hr = device_->CreateInputLayout( element_descs, - element_count, + (UINT)element_count, byte_code, byte_code_length, &input_layout_); if (FAILED(hr)) { XELOGE("D3D11: failed to create vertex shader input layout"); + xe_free(byte_code); return 1; } + xe_free(byte_code); + is_prepared_ = true; return 0; } @@ -102,6 +220,11 @@ int D3D11PixelShader::Prepare(xe_gpu_program_cntl_t* program_cntl) { void* byte_code = NULL; size_t byte_code_length = 0; + + if (!byte_code) { + return 1; + } + // Create shader. HRESULT hr = device_->CreatePixelShader( byte_code, byte_code_length, @@ -109,8 +232,12 @@ int D3D11PixelShader::Prepare(xe_gpu_program_cntl_t* program_cntl) { &handle_); if (FAILED(hr)) { XELOGE("D3D11: failed to create vertex shader"); + xe_free(byte_code); return 1; } + + xe_free(byte_code); + is_prepared_ = true; return 0; } diff --git a/src/xenia/gpu/d3d11/d3d11_window.cc b/src/xenia/gpu/d3d11/d3d11_window.cc index a08ec8c5f..3f88f8fd7 100644 --- a/src/xenia/gpu/d3d11/d3d11_window.cc +++ b/src/xenia/gpu/d3d11/d3d11_window.cc @@ -38,7 +38,7 @@ D3D11Window::D3D11Window( desc.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH; // Setup buffers. - desc.BufferCount = 2; + desc.BufferCount = 1; desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; desc.BufferDesc.Width = width_; desc.BufferDesc.Height = height_; @@ -93,16 +93,6 @@ D3D11Window::~D3D11Window() { } void D3D11Window::Swap() { - // Setup the viewport. - //D3D11_VIEWPORT viewport; - //viewport.MinDepth = 0.0f; - //viewport.MaxDepth = 1.0f; - //viewport.TopLeftX = 0; - //viewport.TopLeftY = 0; - //viewport.Width = (FLOAT)width_; - //viewport.Height = (FLOAT)height_; - //context_->RSSetViewports(1, &viewport); - // Swap buffers. // TODO(benvanik): control vsync with flag. bool vsync = true; diff --git a/src/xenia/gpu/graphics_driver.h b/src/xenia/gpu/graphics_driver.h index ae1b47b34..d0d0286ac 100644 --- a/src/xenia/gpu/graphics_driver.h +++ b/src/xenia/gpu/graphics_driver.h @@ -38,7 +38,9 @@ public: uint32_t address, uint32_t start, uint32_t length) = 0; - virtual void DrawAutoIndexed( + //virtual void DrawIndex(); + //virtual void DrawIndexImmediate(); + virtual void DrawIndexAuto( xenos::XE_GPU_PRIMITIVE_TYPE prim_type, uint32_t index_count) = 0; diff --git a/src/xenia/gpu/nop/nop_graphics_driver.cc b/src/xenia/gpu/nop/nop_graphics_driver.cc index 023efda2c..eb55f064c 100644 --- a/src/xenia/gpu/nop/nop_graphics_driver.cc +++ b/src/xenia/gpu/nop/nop_graphics_driver.cc @@ -66,7 +66,7 @@ void NopGraphicsDriver::SetShader( } } -void NopGraphicsDriver::DrawAutoIndexed( +void NopGraphicsDriver::DrawIndexAuto( XE_GPU_PRIMITIVE_TYPE prim_type, uint32_t index_count) { XELOGGPU("NOP: draw indexed %d (%d indicies)", diff --git a/src/xenia/gpu/nop/nop_graphics_driver.h b/src/xenia/gpu/nop/nop_graphics_driver.h index 9da009cbb..996af59c8 100644 --- a/src/xenia/gpu/nop/nop_graphics_driver.h +++ b/src/xenia/gpu/nop/nop_graphics_driver.h @@ -39,7 +39,7 @@ public: uint32_t address, uint32_t start, uint32_t length); - virtual void DrawAutoIndexed( + virtual void DrawIndexAuto( xenos::XE_GPU_PRIMITIVE_TYPE prim_type, uint32_t index_count); diff --git a/src/xenia/gpu/ring_buffer_worker.cc b/src/xenia/gpu/ring_buffer_worker.cc index 4fee6da61..cbd28fa83 100644 --- a/src/xenia/gpu/ring_buffer_worker.cc +++ b/src/xenia/gpu/ring_buffer_worker.cc @@ -258,7 +258,7 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) { uint32_t prim_type = d1 & 0x3F; uint32_t src_sel = (d1 >> 6) & 0x3; XEASSERT(src_sel == 0x2); // 'SrcSel=AutoIndex' - driver_->DrawAutoIndexed( + driver_->DrawIndexAuto( (XE_GPU_PRIMITIVE_TYPE)prim_type, index_count); } @@ -273,7 +273,7 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) { uint32_t prim_type = d0 & 0x3F; uint32_t src_sel = (d0 >> 6) & 0x3; XEASSERT(src_sel == 0x2); // 'SrcSel=AutoIndex' - driver_->DrawAutoIndexed( + driver_->DrawIndexAuto( (XE_GPU_PRIMITIVE_TYPE)prim_type, index_count); } diff --git a/src/xenia/gpu/shader.cc b/src/xenia/gpu/shader.cc index 556593d89..128955fc7 100644 --- a/src/xenia/gpu/shader.cc +++ b/src/xenia/gpu/shader.cc @@ -22,6 +22,8 @@ Shader::Shader( const uint8_t* src_ptr, size_t length, uint64_t hash) : type_(type), hash_(hash), is_prepared_(false) { + xe_zero_struct(fetch_vtx_slots_, sizeof(fetch_vtx_slots_)); + // Verify. dword_count_ = length / 4; XEASSERT(dword_count_ <= 512); @@ -32,12 +34,102 @@ Shader::Shader( for (uint32_t n = 0; n < dword_count_; n++) { dwords_[n] = XEGETUINT32BE(src_ptr + n * 4); } + + // Gather input/output registers/etc. + GatherIO(); } Shader::~Shader() { xe_free(dwords_); } +void Shader::GatherIO() { + // Process all execution blocks. + instr_cf_t cfa; + instr_cf_t cfb; + for (int idx = 0; idx < dword_count_; idx += 3) { + uint32_t dword_0 = dwords_[idx + 0]; + uint32_t dword_1 = dwords_[idx + 1]; + uint32_t dword_2 = dwords_[idx + 2]; + cfa.dword_0 = dword_0; + cfa.dword_1 = dword_1 & 0xFFFF; + cfb.dword_0 = (dword_1 >> 16) | (dword_2 << 16); + cfb.dword_1 = dword_2 >> 16; + if (cfa.opc == ALLOC) { + GatherAlloc(&cfa.alloc); + } else if (cfa.is_exec()) { + GatherExec(&cfa.exec); + } + if (cfb.opc == ALLOC) { + GatherAlloc(&cfb.alloc); + } else if (cfb.is_exec()) { + GatherExec(&cfb.exec); + } + if (cfa.opc == EXEC_END || cfb.opc == EXEC_END) { + break; + } + } +} + +void Shader::GatherAlloc(const instr_cf_alloc_t* cf) { + allocs_.push_back(*cf); +} + +void Shader::GatherExec(const instr_cf_exec_t* cf) { + uint32_t sequence = cf->serialize; + for (uint32_t i = 0; i < cf->count; i++) { + uint32_t alu_off = (cf->address + i); + int sync = sequence & 0x2; + if (sequence & 0x1) { + const instr_fetch_t* fetch = + (const instr_fetch_t*)(dwords_ + alu_off * 3); + switch (fetch->opc) { + case VTX_FETCH: + GatherVertexFetch(&fetch->vtx); + break; + case TEX_FETCH: + case TEX_GET_BORDER_COLOR_FRAC: + case TEX_GET_COMP_TEX_LOD: + case TEX_GET_GRADIENTS: + case TEX_GET_WEIGHTS: + case TEX_SET_TEX_LOD: + case TEX_SET_GRADIENTS_H: + case TEX_SET_GRADIENTS_V: + default: + XEASSERTALWAYS(); + break; + } + } else { + // TODO(benvanik): gather registers used, predicate bits used, etc. + /*const instr_alu_t* alu = + (const instr_alu_t*)(dwords_ + alu_off * 3);*/ + } + sequence >>= 2; + } +} + +void Shader::GatherVertexFetch(const instr_fetch_vtx_t* vtx) { + // dst_reg/dst_swiz + // src_reg/src_swiz + // format = a2xx_sq_surfaceformat + // format_comp_all ? signed : unsigned + // num_format_all ? normalized + // stride + // offset + // const_index/const_index_sel -- fetch constant register + // num_format_all ? integer : fraction + // exp_adjust_all - [-32,31] - (2^exp_adjust_all)*fetch - 0 = default + + fetch_vtxs_.push_back(*vtx); + + uint32_t fetch_slot = vtx->const_index * 3 + vtx->const_index_sel; + fetch_vtx_slots_[fetch_slot] = *vtx; +} + +const instr_fetch_vtx_t* Shader::GetFetchVtxBySlot(uint32_t fetch_slot) { + return &fetch_vtx_slots_[fetch_slot]; +} + char* Shader::Disassemble() { return DisassembleShader(type_, dwords_, dword_count_); } diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h index 4dacfa295..549363e52 100644 --- a/src/xenia/gpu/shader.h +++ b/src/xenia/gpu/shader.h @@ -11,6 +11,7 @@ #define XENIA_GPU_SHADER_H_ #include +#include #include @@ -31,19 +32,27 @@ public: uint64_t hash() const { return hash_; } bool is_prepared() const { return is_prepared_; } - // vfetch formats - // sampler formats - // constants/registers/etc used + const xenos::instr_fetch_vtx_t* GetFetchVtxBySlot(uint32_t fetch_slot); // NOTE: xe_free() the returned string! char* Disassemble(); +private: + void GatherIO(); + void GatherAlloc(const xenos::instr_cf_alloc_t* cf); + void GatherExec(const xenos::instr_cf_exec_t* cf); + void GatherVertexFetch(const xenos::instr_fetch_vtx_t* vtx); + protected: xenos::XE_GPU_SHADER_TYPE type_; uint32_t* dwords_; size_t dword_count_; uint64_t hash_; bool is_prepared_; + + std::vector allocs_; + std::vector fetch_vtxs_; + xenos::instr_fetch_vtx_t fetch_vtx_slots_[96]; }; diff --git a/src/xenia/gpu/xenos/ucode.h b/src/xenia/gpu/xenos/ucode.h index 1e525c18c..4aaa7448a 100644 --- a/src/xenia/gpu/xenos/ucode.h +++ b/src/xenia/gpu/xenos/ucode.h @@ -361,6 +361,25 @@ XEPACKEDUNION(instr_cf_t, { uint32_t dword_0; uint32_t dword_1; }); + + bool is_exec() const { + return (this->opc == EXEC) || + (this->opc == EXEC_END) || + (this->opc == COND_EXEC) || + (this->opc == COND_EXEC_END) || + (this->opc == COND_PRED_EXEC) || + (this->opc == COND_PRED_EXEC_END) || + (this->opc == COND_EXEC_PRED_CLEAN) || + (this->opc == COND_EXEC_PRED_CLEAN_END); + } + bool is_cond_exec() const { + return (this->opc == COND_EXEC) || + (this->opc == COND_EXEC_END) || + (this->opc == COND_PRED_EXEC) || + (this->opc == COND_PRED_EXEC_END) || + (this->opc == COND_EXEC_PRED_CLEAN) || + (this->opc == COND_EXEC_PRED_CLEAN_END); + } }); diff --git a/src/xenia/gpu/xenos/ucode_disassembler.cc b/src/xenia/gpu/xenos/ucode_disassembler.cc index 4ba7ecba3..58f7122aa 100644 --- a/src/xenia/gpu/xenos/ucode_disassembler.cc +++ b/src/xenia/gpu/xenos/ucode_disassembler.cc @@ -571,26 +571,6 @@ int disasm_fetch( return 0; } -int cf_exec(const instr_cf_t* cf) { - return (cf->opc == EXEC) || - (cf->opc == EXEC_END) || - (cf->opc == COND_EXEC) || - (cf->opc == COND_EXEC_END) || - (cf->opc == COND_PRED_EXEC) || - (cf->opc == COND_PRED_EXEC_END) || - (cf->opc == COND_EXEC_PRED_CLEAN) || - (cf->opc == COND_EXEC_PRED_CLEAN_END); -} - -int cf_cond_exec(const instr_cf_t* cf) { - return (cf->opc == COND_EXEC) || - (cf->opc == COND_EXEC_END) || - (cf->opc == COND_PRED_EXEC) || - (cf->opc == COND_PRED_EXEC_END) || - (cf->opc == COND_EXEC_PRED_CLEAN) || - (cf->opc == COND_EXEC_PRED_CLEAN_END); -} - void print_cf_nop(Output* output, const instr_cf_t* cf) { } @@ -609,7 +589,7 @@ void print_cf_exec(Output* output, const instr_cf_t* cf) { if (cf->exec.address_mode == ABSOLUTE_ADDR) { output->append(" ABSOLUTE_ADDR"); } - if (cf_cond_exec(cf)) { + if (cf->is_cond_exec()) { output->append(" COND(%d)", cf->exec.condition); } } @@ -732,11 +712,11 @@ char* xenos::DisassembleShader( cfb.dword_0 = (dword_1 >> 16) | (dword_2 << 16); cfb.dword_1 = dword_2 >> 16; print_cf(output, &cfa, 0); - if (cf_exec(&cfa)) { + if (cfa.is_exec()) { disasm_exec(output, dwords, dword_count, 0, type, &cfa); } print_cf(output, &cfb, 0); - if (cf_exec(&cfb)) { + if (cfb.is_exec()) { disasm_exec(output, dwords, dword_count, 0, type, &cfb); } if (cfa.opc == EXEC_END || cfb.opc == EXEC_END) { diff --git a/src/xenia/malloc.h b/src/xenia/malloc.h index da8d5a44a..c3f5ec754 100644 --- a/src/xenia/malloc.h +++ b/src/xenia/malloc.h @@ -13,6 +13,8 @@ #include +#define xe_alloca(size) alloca(size) + void *xe_malloc(const size_t size); void *xe_calloc(const size_t size); void *xe_realloc(void *ptr, const size_t old_size, const size_t new_size);