Hacking to get first triangle drawn.

This commit is contained in:
Ben Vanik 2013-10-12 15:07:34 -07:00
parent 83d7523da1
commit 0ef278325f
13 changed files with 396 additions and 85 deletions

View File

@ -104,7 +104,7 @@ void D3D11GraphicsDriver::SetShader(
} }
} }
void D3D11GraphicsDriver::DrawAutoIndexed( void D3D11GraphicsDriver::DrawIndexAuto(
XE_GPU_PRIMITIVE_TYPE prim_type, XE_GPU_PRIMITIVE_TYPE prim_type,
uint32_t index_count) { uint32_t index_count) {
RegisterFile& rf = register_file_; RegisterFile& rf = register_file_;
@ -113,13 +113,19 @@ void D3D11GraphicsDriver::DrawAutoIndexed(
prim_type, index_count); prim_type, index_count);
// Misc state. // Misc state.
UpdateState(); if (UpdateState()) {
return;
}
// Build constant buffers. // Build constant buffers.
UpdateConstantBuffers(); if (UpdateConstantBuffers()) {
return;
}
// Bind shaders. // Bind shaders.
BindShaders(); if (BindShaders()) {
return;
}
// Switch primitive topology. // Switch primitive topology.
// Some are unsupported on D3D11 and must be emulated. // Some are unsupported on D3D11 and must be emulated.
@ -151,26 +157,66 @@ void D3D11GraphicsDriver::DrawAutoIndexed(
context_->IASetPrimitiveTopology(primitive_topology); context_->IASetPrimitiveTopology(primitive_topology);
// Setup all fetchers (vertices/textures). // Setup all fetchers (vertices/textures).
PrepareFetchers(); if (PrepareFetchers()) {
return;
}
// Setup index buffer. // Setup index buffer.
PrepareIndexBuffer(); if (PrepareIndexBuffer()) {
return;
}
// Issue draw. // Issue draw.
uint32_t start_index = rf.values[XE_GPU_REG_VGT_INDX_OFFSET].u32; uint32_t start_index = rf.values[XE_GPU_REG_VGT_INDX_OFFSET].u32;
uint32_t base_vertex = 0; uint32_t base_vertex = 0;
//context_->DrawIndexed(index_count, start_index, base_vertex); //context_->DrawIndexed(index_count, start_index, base_vertex);
context_->Draw(index_count, 0);
} }
void D3D11GraphicsDriver::UpdateState() { int D3D11GraphicsDriver::UpdateState() {
//context_->OMSetBlendState(blend_state, blend_factor, sample_mask); // General rasterizer state.
ID3D11RasterizerState* rasterizer_state = 0;
D3D11_RASTERIZER_DESC rasterizer_desc;
xe_zero_struct(&rasterizer_desc, sizeof(rasterizer_desc));
rasterizer_desc.FillMode = D3D11_FILL_SOLID; // D3D11_FILL_WIREFRAME;
rasterizer_desc.CullMode = D3D11_CULL_NONE; // D3D11_CULL_FRONT BACK
rasterizer_desc.FrontCounterClockwise = false;
rasterizer_desc.DepthBias = 0;
rasterizer_desc.DepthBiasClamp = 0;
rasterizer_desc.SlopeScaledDepthBias = 0;
rasterizer_desc.DepthClipEnable = true;
rasterizer_desc.ScissorEnable = false;
rasterizer_desc.MultisampleEnable = false;
rasterizer_desc.AntialiasedLineEnable = false;
device_->CreateRasterizerState(&rasterizer_desc, &rasterizer_state);
context_->RSSetState(rasterizer_state);
XESAFERELEASE(rasterizer_state);
// Depth-stencil state.
//context_->OMSetDepthStencilState //context_->OMSetDepthStencilState
//context_->RSSetScissorRects
//context_->RSSetState // Blend state.
//context_->RSSetViewports //context_->OMSetBlendState(blend_state, blend_factor, sample_mask);
// Scissoring.
// TODO(benvanik): pull from scissor registers.
context_->RSSetScissorRects(0, NULL);
// Viewport.
// If we have resized the window we will want to change this.
D3D11_VIEWPORT viewport;
viewport.MinDepth = 0.0f;
viewport.MaxDepth = 1.0f;
viewport.TopLeftX = 0;
viewport.TopLeftY = 0;
viewport.Width = 1280;
viewport.Height = 720;
context_->RSSetViewports(1, &viewport);
return 0;
} }
void D3D11GraphicsDriver::UpdateConstantBuffers() { int D3D11GraphicsDriver::UpdateConstantBuffers() {
RegisterFile& rf = register_file_; RegisterFile& rf = register_file_;
D3D11_MAPPED_SUBRESOURCE res; D3D11_MAPPED_SUBRESOURCE res;
@ -197,9 +243,11 @@ void D3D11GraphicsDriver::UpdateConstantBuffers() {
&rf.values[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031], &rf.values[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031],
(8) * sizeof(int)); (8) * sizeof(int));
context_->Unmap(state_.constant_buffers.bool_constants, 0); context_->Unmap(state_.constant_buffers.bool_constants, 0);
return 0;
} }
void D3D11GraphicsDriver::BindShaders() { int D3D11GraphicsDriver::BindShaders() {
RegisterFile& rf = register_file_; RegisterFile& rf = register_file_;
xe_gpu_program_cntl_t program_cntl; xe_gpu_program_cntl_t program_cntl;
program_cntl.dword_0 = rf.values[XE_GPU_REG_SQ_PROGRAM_CNTL].u32; program_cntl.dword_0 = rf.values[XE_GPU_REG_SQ_PROGRAM_CNTL].u32;
@ -212,7 +260,7 @@ void D3D11GraphicsDriver::BindShaders() {
if (vs->Prepare(&program_cntl)) { if (vs->Prepare(&program_cntl)) {
XELOGGPU("D3D11: failed to prepare vertex shader"); XELOGGPU("D3D11: failed to prepare vertex shader");
state_.vertex_shader = NULL; state_.vertex_shader = NULL;
return; return 1;
} }
} }
@ -230,6 +278,10 @@ void D3D11GraphicsDriver::BindShaders() {
//context_->VSSetSamplers //context_->VSSetSamplers
//context_->VSSetShaderResources //context_->VSSetShaderResources
} else {
context_->VSSetShader(NULL, NULL, 0);
context_->IASetInputLayout(NULL);
return 1;
} }
// Pixel shader setup. // Pixel shader setup.
@ -240,7 +292,7 @@ void D3D11GraphicsDriver::BindShaders() {
if (ps->Prepare(&program_cntl)) { if (ps->Prepare(&program_cntl)) {
XELOGGPU("D3D11: failed to prepare pixel shader"); XELOGGPU("D3D11: failed to prepare pixel shader");
state_.pixel_shader = NULL; state_.pixel_shader = NULL;
return; return 1;
} }
} }
@ -255,33 +307,48 @@ void D3D11GraphicsDriver::BindShaders() {
//context_->PSSetSamplers //context_->PSSetSamplers
//context_->PSSetShaderResources //context_->PSSetShaderResources
} else {
context_->PSSetShader(NULL, NULL, 0);
return 1;
} }
return 0;
} }
void D3D11GraphicsDriver::PrepareFetchers() { int D3D11GraphicsDriver::PrepareFetchers() {
RegisterFile& rf = register_file_; RegisterFile& rf = register_file_;
for (int n = 0; n < 32; n++) { for (int n = 0; n < 32; n++) {
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + n * 6; int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + n * 6;
xe_gpu_fetch_group_t* group = (xe_gpu_fetch_group_t*)&rf.values[r]; xe_gpu_fetch_group_t* group = (xe_gpu_fetch_group_t*)&rf.values[r];
if (group->type_0 == 0x2) { if (group->type_0 == 0x2) {
PrepareTextureFetcher(n, &group->texture_fetch); if (PrepareTextureFetcher(n, &group->texture_fetch)) {
return 1;
}
} else { } else {
// TODO(benvanik): verify register numbering. // TODO(benvanik): verify register numbering.
if (group->type_0 == 0x3) { if (group->type_0 == 0x3) {
PrepareVertexFetcher(n * 3 + 0, &group->vertex_fetch_0); if (PrepareVertexFetcher(n * 3 + 0, &group->vertex_fetch_0)) {
return 1;
}
} }
if (group->type_1 == 0x3) { if (group->type_1 == 0x3) {
PrepareVertexFetcher(n * 3 + 1, &group->vertex_fetch_1); if (PrepareVertexFetcher(n * 3 + 1, &group->vertex_fetch_1)) {
return 1;
}
} }
if (group->type_2 == 0x3) { if (group->type_2 == 0x3) {
PrepareVertexFetcher(n * 3 + 2, &group->vertex_fetch_2); if (PrepareVertexFetcher(n * 3 + 2, &group->vertex_fetch_2)) {
return 1;
}
} }
} }
} }
return 0;
} }
void D3D11GraphicsDriver::PrepareVertexFetcher( int D3D11GraphicsDriver::PrepareVertexFetcher(
int slot, xe_gpu_vertex_fetch_t* fetch) { int fetch_slot, xe_gpu_vertex_fetch_t* fetch) {
uint32_t address = (fetch->address << 2) + address_translation_; uint32_t address = (fetch->address << 2) + address_translation_;
uint32_t size_dwords = fetch->size; uint32_t size_dwords = fetch->size;
@ -292,9 +359,18 @@ void D3D11GraphicsDriver::PrepareVertexFetcher(
buffer_desc.Usage = D3D11_USAGE_DYNAMIC; buffer_desc.Usage = D3D11_USAGE_DYNAMIC;
buffer_desc.BindFlags = D3D11_BIND_VERTEX_BUFFER; buffer_desc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; buffer_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
device_->CreateBuffer(&buffer_desc, NULL, &buffer); HRESULT hr = device_->CreateBuffer(&buffer_desc, NULL, &buffer);
if (FAILED(hr)) {
XELOGE("D3D11: unable to create vertex fetch buffer");
return 1;
}
D3D11_MAPPED_SUBRESOURCE res; D3D11_MAPPED_SUBRESOURCE res;
context_->Map(buffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &res); hr = context_->Map(buffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &res);
if (FAILED(hr)) {
XELOGE("D3D11: unable to map vertex fetch buffer");
XESAFERELEASE(buffer);
return 1;
}
uint32_t* src = (uint32_t*)xe_memory_addr(memory_, address); uint32_t* src = (uint32_t*)xe_memory_addr(memory_, address);
uint32_t* dest = (uint32_t*)res.pData; uint32_t* dest = (uint32_t*)res.pData;
for (uint32_t n = 0; n < size_dwords; n++) { for (uint32_t n = 0; n < size_dwords; n++) {
@ -308,19 +384,31 @@ void D3D11GraphicsDriver::PrepareVertexFetcher(
} }
context_->Unmap(buffer, 0); context_->Unmap(buffer, 0);
// TODO(benvanik): fetch from VS. D3D11VertexShader* vs = state_.vertex_shader;
/*uint32_t stride = 0; if (!vs) {
return 1;
}
const instr_fetch_vtx_t* vtx = vs->GetFetchVtxBySlot(fetch_slot);
if (!vtx->must_be_one) {
return 1;
}
// TODO(benvanik): always dword aligned?
uint32_t stride = vtx->stride * 4;
uint32_t offset = 0; uint32_t offset = 0;
context_->IASetVertexBuffers(slot, 1, &buffer, &stride, &offset);*/ int vb_slot = 95 - fetch_slot;
context_->IASetVertexBuffers(vb_slot, 1, &buffer, &stride, &offset);
buffer->Release(); buffer->Release();
return 0;
} }
void D3D11GraphicsDriver::PrepareTextureFetcher( int D3D11GraphicsDriver::PrepareTextureFetcher(
int slot, xe_gpu_texture_fetch_t* fetch) { int fetch_slot, xe_gpu_texture_fetch_t* fetch) {
return 0;
} }
void D3D11GraphicsDriver::PrepareIndexBuffer() { int D3D11GraphicsDriver::PrepareIndexBuffer() {
RegisterFile& rf = register_file_; RegisterFile& rf = register_file_;
/* /*
@ -353,4 +441,6 @@ void D3D11GraphicsDriver::PrepareIndexBuffer() {
context_->IASetIndexBuffer(buffer, format, 0); context_->IASetIndexBuffer(buffer, format, 0);
buffer->Release();*/ buffer->Release();*/
return 0;
} }

View File

@ -42,20 +42,20 @@ public:
uint32_t address, uint32_t address,
uint32_t start, uint32_t start,
uint32_t length); uint32_t length);
virtual void DrawAutoIndexed( virtual void DrawIndexAuto(
xenos::XE_GPU_PRIMITIVE_TYPE prim_type, xenos::XE_GPU_PRIMITIVE_TYPE prim_type,
uint32_t index_count); uint32_t index_count);
private: private:
void UpdateState(); int UpdateState();
void UpdateConstantBuffers(); int UpdateConstantBuffers();
void BindShaders(); int BindShaders();
void PrepareFetchers(); int PrepareFetchers();
void PrepareVertexFetcher( int PrepareVertexFetcher(
int slot, xenos::xe_gpu_vertex_fetch_t* fetch); int fetch_slot, xenos::xe_gpu_vertex_fetch_t* fetch);
void PrepareTextureFetcher( int PrepareTextureFetcher(
int slot, xenos::xe_gpu_texture_fetch_t* fetch); int fetch_slot, xenos::xe_gpu_texture_fetch_t* fetch);
void PrepareIndexBuffer(); int PrepareIndexBuffer();
private: private:
ID3D11Device* device_; ID3D11Device* device_;

View File

@ -9,6 +9,10 @@
#include <xenia/gpu/d3d11/d3d11_shader.h> #include <xenia/gpu/d3d11/d3d11_shader.h>
#include <xenia/gpu/xenos/ucode.h>
#include <d3dx11.h>
using namespace xe; using namespace xe;
using namespace xe::gpu; using namespace xe::gpu;
@ -53,6 +57,11 @@ int D3D11VertexShader::Prepare(xe_gpu_program_cntl_t* program_cntl) {
void* byte_code = NULL; void* byte_code = NULL;
size_t byte_code_length = 0; size_t byte_code_length = 0;
if (!byte_code) {
return 1;
}
// Create shader. // Create shader.
HRESULT hr = device_->CreateVertexShader( HRESULT hr = device_->CreateVertexShader(
byte_code, byte_code_length, byte_code, byte_code_length,
@ -60,22 +69,131 @@ int D3D11VertexShader::Prepare(xe_gpu_program_cntl_t* program_cntl) {
&handle_); &handle_);
if (FAILED(hr)) { if (FAILED(hr)) {
XELOGE("D3D11: failed to create vertex shader"); XELOGE("D3D11: failed to create vertex shader");
xe_free(byte_code);
return 1; return 1;
} }
// Create input layout. // Create input layout.
uint32_t element_count = 0; size_t element_count = fetch_vtxs_.size();
D3D11_INPUT_ELEMENT_DESC* element_descs = 0; D3D11_INPUT_ELEMENT_DESC* element_descs =
(D3D11_INPUT_ELEMENT_DESC*)xe_alloca(
sizeof(D3D11_INPUT_ELEMENT_DESC) * element_count);
int n = 0;
for (std::vector<instr_fetch_vtx_t>::iterator it = fetch_vtxs_.begin();
it != fetch_vtxs_.end(); ++it, ++n) {
const instr_fetch_vtx_t& vtx = *it;
DXGI_FORMAT vtx_format;
switch (vtx.format) {
case FMT_1_REVERSE:
vtx_format = DXGI_FORMAT_R1_UNORM; // ?
break;
case FMT_8:
if (!vtx.num_format_all) {
vtx_format = vtx.format_comp_all ?
DXGI_FORMAT_R8_SNORM : DXGI_FORMAT_R8_UNORM;
} else {
vtx_format = vtx.format_comp_all ?
DXGI_FORMAT_R8_SINT : DXGI_FORMAT_R8_UINT;
}
break;
case FMT_8_8_8_8:
if (!vtx.num_format_all) {
vtx_format = vtx.format_comp_all ?
DXGI_FORMAT_R8G8B8A8_SNORM : DXGI_FORMAT_R8G8B8A8_UNORM;
} else {
vtx_format = vtx.format_comp_all ?
DXGI_FORMAT_R8G8B8A8_SINT : DXGI_FORMAT_R8G8B8A8_UINT;
}
break;
case FMT_8_8:
if (!vtx.num_format_all) {
vtx_format = vtx.format_comp_all ?
DXGI_FORMAT_R8G8_SNORM : DXGI_FORMAT_R8G8_UNORM;
} else {
vtx_format = vtx.format_comp_all ?
DXGI_FORMAT_R8G8_SINT : DXGI_FORMAT_R8G8_UINT;
}
break;
case FMT_16:
if (!vtx.num_format_all) {
vtx_format = vtx.format_comp_all ?
DXGI_FORMAT_R16_SNORM : DXGI_FORMAT_R16_UNORM;
} else {
vtx_format = vtx.format_comp_all ?
DXGI_FORMAT_R16_SINT : DXGI_FORMAT_R16_UINT;
}
break;
case FMT_16_16:
if (!vtx.num_format_all) {
vtx_format = vtx.format_comp_all ?
DXGI_FORMAT_R16G16_SNORM : DXGI_FORMAT_R16G16_UNORM;
} else {
vtx_format = vtx.format_comp_all ?
DXGI_FORMAT_R16G16_SINT : DXGI_FORMAT_R16G16_UINT;
}
break;
case FMT_16_16_16_16:
if (!vtx.num_format_all) {
vtx_format = vtx.format_comp_all ?
DXGI_FORMAT_R16G16B16A16_SNORM : DXGI_FORMAT_R16G16B16A16_UNORM;
} else {
vtx_format = vtx.format_comp_all ?
DXGI_FORMAT_R16G16B16A16_SINT : DXGI_FORMAT_R16G16B16A16_UINT;
}
break;
case FMT_32:
vtx_format = vtx.format_comp_all ?
DXGI_FORMAT_R32_SINT : DXGI_FORMAT_R32_UINT;
break;
case FMT_32_32:
vtx_format = vtx.format_comp_all ?
DXGI_FORMAT_R32G32_SINT : DXGI_FORMAT_R32G32_UINT;
break;
case FMT_32_32_32_32:
vtx_format = vtx.format_comp_all ?
DXGI_FORMAT_R32G32B32A32_SINT : DXGI_FORMAT_R32G32B32A32_UINT;
break;
case FMT_32_FLOAT:
vtx_format = DXGI_FORMAT_R32_FLOAT;
break;
case FMT_32_32_FLOAT:
vtx_format = DXGI_FORMAT_R32G32_FLOAT;
break;
case FMT_32_32_32_32_FLOAT:
vtx_format = DXGI_FORMAT_R32G32B32A32_FLOAT;
break;
case FMT_32_32_32_FLOAT:
vtx_format = DXGI_FORMAT_R32G32B32_FLOAT;
break;
default:
XEASSERTALWAYS();
break;
}
element_descs[n].SemanticName = "XEVF";
element_descs[n].SemanticIndex = n;
element_descs[n].Format = vtx_format;
// TODO(benvanik): pick slot in same way that driver does.
// CONST(31, 2) = reg 31, index 2 = rf([31] * 6 + [2] * 2)
uint32_t fetch_slot = vtx.const_index * 3 + vtx.const_index_sel;
uint32_t vb_slot = 95 - fetch_slot;
element_descs[n].InputSlot = vb_slot;
element_descs[n].AlignedByteOffset = vtx.offset * 4;
element_descs[n].InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
element_descs[n].InstanceDataStepRate = 0;
}
hr = device_->CreateInputLayout( hr = device_->CreateInputLayout(
element_descs, element_descs,
element_count, (UINT)element_count,
byte_code, byte_code_length, byte_code, byte_code_length,
&input_layout_); &input_layout_);
if (FAILED(hr)) { if (FAILED(hr)) {
XELOGE("D3D11: failed to create vertex shader input layout"); XELOGE("D3D11: failed to create vertex shader input layout");
xe_free(byte_code);
return 1; return 1;
} }
xe_free(byte_code);
is_prepared_ = true; is_prepared_ = true;
return 0; return 0;
} }
@ -102,6 +220,11 @@ int D3D11PixelShader::Prepare(xe_gpu_program_cntl_t* program_cntl) {
void* byte_code = NULL; void* byte_code = NULL;
size_t byte_code_length = 0; size_t byte_code_length = 0;
if (!byte_code) {
return 1;
}
// Create shader. // Create shader.
HRESULT hr = device_->CreatePixelShader( HRESULT hr = device_->CreatePixelShader(
byte_code, byte_code_length, byte_code, byte_code_length,
@ -109,8 +232,12 @@ int D3D11PixelShader::Prepare(xe_gpu_program_cntl_t* program_cntl) {
&handle_); &handle_);
if (FAILED(hr)) { if (FAILED(hr)) {
XELOGE("D3D11: failed to create vertex shader"); XELOGE("D3D11: failed to create vertex shader");
xe_free(byte_code);
return 1; return 1;
} }
xe_free(byte_code);
is_prepared_ = true; is_prepared_ = true;
return 0; return 0;
} }

View File

@ -38,7 +38,7 @@ D3D11Window::D3D11Window(
desc.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH; desc.Flags = DXGI_SWAP_CHAIN_FLAG_ALLOW_MODE_SWITCH;
// Setup buffers. // Setup buffers.
desc.BufferCount = 2; desc.BufferCount = 1;
desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
desc.BufferDesc.Width = width_; desc.BufferDesc.Width = width_;
desc.BufferDesc.Height = height_; desc.BufferDesc.Height = height_;
@ -93,16 +93,6 @@ D3D11Window::~D3D11Window() {
} }
void D3D11Window::Swap() { void D3D11Window::Swap() {
// Setup the viewport.
//D3D11_VIEWPORT viewport;
//viewport.MinDepth = 0.0f;
//viewport.MaxDepth = 1.0f;
//viewport.TopLeftX = 0;
//viewport.TopLeftY = 0;
//viewport.Width = (FLOAT)width_;
//viewport.Height = (FLOAT)height_;
//context_->RSSetViewports(1, &viewport);
// Swap buffers. // Swap buffers.
// TODO(benvanik): control vsync with flag. // TODO(benvanik): control vsync with flag.
bool vsync = true; bool vsync = true;

View File

@ -38,7 +38,9 @@ public:
uint32_t address, uint32_t address,
uint32_t start, uint32_t start,
uint32_t length) = 0; uint32_t length) = 0;
virtual void DrawAutoIndexed( //virtual void DrawIndex();
//virtual void DrawIndexImmediate();
virtual void DrawIndexAuto(
xenos::XE_GPU_PRIMITIVE_TYPE prim_type, xenos::XE_GPU_PRIMITIVE_TYPE prim_type,
uint32_t index_count) = 0; uint32_t index_count) = 0;

View File

@ -66,7 +66,7 @@ void NopGraphicsDriver::SetShader(
} }
} }
void NopGraphicsDriver::DrawAutoIndexed( void NopGraphicsDriver::DrawIndexAuto(
XE_GPU_PRIMITIVE_TYPE prim_type, XE_GPU_PRIMITIVE_TYPE prim_type,
uint32_t index_count) { uint32_t index_count) {
XELOGGPU("NOP: draw indexed %d (%d indicies)", XELOGGPU("NOP: draw indexed %d (%d indicies)",

View File

@ -39,7 +39,7 @@ public:
uint32_t address, uint32_t address,
uint32_t start, uint32_t start,
uint32_t length); uint32_t length);
virtual void DrawAutoIndexed( virtual void DrawIndexAuto(
xenos::XE_GPU_PRIMITIVE_TYPE prim_type, xenos::XE_GPU_PRIMITIVE_TYPE prim_type,
uint32_t index_count); uint32_t index_count);

View File

@ -258,7 +258,7 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
uint32_t prim_type = d1 & 0x3F; uint32_t prim_type = d1 & 0x3F;
uint32_t src_sel = (d1 >> 6) & 0x3; uint32_t src_sel = (d1 >> 6) & 0x3;
XEASSERT(src_sel == 0x2); // 'SrcSel=AutoIndex' XEASSERT(src_sel == 0x2); // 'SrcSel=AutoIndex'
driver_->DrawAutoIndexed( driver_->DrawIndexAuto(
(XE_GPU_PRIMITIVE_TYPE)prim_type, (XE_GPU_PRIMITIVE_TYPE)prim_type,
index_count); index_count);
} }
@ -273,7 +273,7 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
uint32_t prim_type = d0 & 0x3F; uint32_t prim_type = d0 & 0x3F;
uint32_t src_sel = (d0 >> 6) & 0x3; uint32_t src_sel = (d0 >> 6) & 0x3;
XEASSERT(src_sel == 0x2); // 'SrcSel=AutoIndex' XEASSERT(src_sel == 0x2); // 'SrcSel=AutoIndex'
driver_->DrawAutoIndexed( driver_->DrawIndexAuto(
(XE_GPU_PRIMITIVE_TYPE)prim_type, (XE_GPU_PRIMITIVE_TYPE)prim_type,
index_count); index_count);
} }

View File

@ -22,6 +22,8 @@ Shader::Shader(
const uint8_t* src_ptr, size_t length, const uint8_t* src_ptr, size_t length,
uint64_t hash) : uint64_t hash) :
type_(type), hash_(hash), is_prepared_(false) { type_(type), hash_(hash), is_prepared_(false) {
xe_zero_struct(fetch_vtx_slots_, sizeof(fetch_vtx_slots_));
// Verify. // Verify.
dword_count_ = length / 4; dword_count_ = length / 4;
XEASSERT(dword_count_ <= 512); XEASSERT(dword_count_ <= 512);
@ -32,12 +34,102 @@ Shader::Shader(
for (uint32_t n = 0; n < dword_count_; n++) { for (uint32_t n = 0; n < dword_count_; n++) {
dwords_[n] = XEGETUINT32BE(src_ptr + n * 4); dwords_[n] = XEGETUINT32BE(src_ptr + n * 4);
} }
// Gather input/output registers/etc.
GatherIO();
} }
Shader::~Shader() { Shader::~Shader() {
xe_free(dwords_); xe_free(dwords_);
} }
void Shader::GatherIO() {
// Process all execution blocks.
instr_cf_t cfa;
instr_cf_t cfb;
for (int idx = 0; idx < dword_count_; idx += 3) {
uint32_t dword_0 = dwords_[idx + 0];
uint32_t dword_1 = dwords_[idx + 1];
uint32_t dword_2 = dwords_[idx + 2];
cfa.dword_0 = dword_0;
cfa.dword_1 = dword_1 & 0xFFFF;
cfb.dword_0 = (dword_1 >> 16) | (dword_2 << 16);
cfb.dword_1 = dword_2 >> 16;
if (cfa.opc == ALLOC) {
GatherAlloc(&cfa.alloc);
} else if (cfa.is_exec()) {
GatherExec(&cfa.exec);
}
if (cfb.opc == ALLOC) {
GatherAlloc(&cfb.alloc);
} else if (cfb.is_exec()) {
GatherExec(&cfb.exec);
}
if (cfa.opc == EXEC_END || cfb.opc == EXEC_END) {
break;
}
}
}
void Shader::GatherAlloc(const instr_cf_alloc_t* cf) {
allocs_.push_back(*cf);
}
void Shader::GatherExec(const instr_cf_exec_t* cf) {
uint32_t sequence = cf->serialize;
for (uint32_t i = 0; i < cf->count; i++) {
uint32_t alu_off = (cf->address + i);
int sync = sequence & 0x2;
if (sequence & 0x1) {
const instr_fetch_t* fetch =
(const instr_fetch_t*)(dwords_ + alu_off * 3);
switch (fetch->opc) {
case VTX_FETCH:
GatherVertexFetch(&fetch->vtx);
break;
case TEX_FETCH:
case TEX_GET_BORDER_COLOR_FRAC:
case TEX_GET_COMP_TEX_LOD:
case TEX_GET_GRADIENTS:
case TEX_GET_WEIGHTS:
case TEX_SET_TEX_LOD:
case TEX_SET_GRADIENTS_H:
case TEX_SET_GRADIENTS_V:
default:
XEASSERTALWAYS();
break;
}
} else {
// TODO(benvanik): gather registers used, predicate bits used, etc.
/*const instr_alu_t* alu =
(const instr_alu_t*)(dwords_ + alu_off * 3);*/
}
sequence >>= 2;
}
}
void Shader::GatherVertexFetch(const instr_fetch_vtx_t* vtx) {
// dst_reg/dst_swiz
// src_reg/src_swiz
// format = a2xx_sq_surfaceformat
// format_comp_all ? signed : unsigned
// num_format_all ? normalized
// stride
// offset
// const_index/const_index_sel -- fetch constant register
// num_format_all ? integer : fraction
// exp_adjust_all - [-32,31] - (2^exp_adjust_all)*fetch - 0 = default
fetch_vtxs_.push_back(*vtx);
uint32_t fetch_slot = vtx->const_index * 3 + vtx->const_index_sel;
fetch_vtx_slots_[fetch_slot] = *vtx;
}
const instr_fetch_vtx_t* Shader::GetFetchVtxBySlot(uint32_t fetch_slot) {
return &fetch_vtx_slots_[fetch_slot];
}
char* Shader::Disassemble() { char* Shader::Disassemble() {
return DisassembleShader(type_, dwords_, dword_count_); return DisassembleShader(type_, dwords_, dword_count_);
} }

View File

@ -11,6 +11,7 @@
#define XENIA_GPU_SHADER_H_ #define XENIA_GPU_SHADER_H_
#include <xenia/core.h> #include <xenia/core.h>
#include <xenia/gpu/xenos/ucode.h>
#include <xenia/gpu/xenos/xenos.h> #include <xenia/gpu/xenos/xenos.h>
@ -31,19 +32,27 @@ public:
uint64_t hash() const { return hash_; } uint64_t hash() const { return hash_; }
bool is_prepared() const { return is_prepared_; } bool is_prepared() const { return is_prepared_; }
// vfetch formats const xenos::instr_fetch_vtx_t* GetFetchVtxBySlot(uint32_t fetch_slot);
// sampler formats
// constants/registers/etc used
// NOTE: xe_free() the returned string! // NOTE: xe_free() the returned string!
char* Disassemble(); char* Disassemble();
private:
void GatherIO();
void GatherAlloc(const xenos::instr_cf_alloc_t* cf);
void GatherExec(const xenos::instr_cf_exec_t* cf);
void GatherVertexFetch(const xenos::instr_fetch_vtx_t* vtx);
protected: protected:
xenos::XE_GPU_SHADER_TYPE type_; xenos::XE_GPU_SHADER_TYPE type_;
uint32_t* dwords_; uint32_t* dwords_;
size_t dword_count_; size_t dword_count_;
uint64_t hash_; uint64_t hash_;
bool is_prepared_; bool is_prepared_;
std::vector<xenos::instr_cf_alloc_t> allocs_;
std::vector<xenos::instr_fetch_vtx_t> fetch_vtxs_;
xenos::instr_fetch_vtx_t fetch_vtx_slots_[96];
}; };

View File

@ -361,6 +361,25 @@ XEPACKEDUNION(instr_cf_t, {
uint32_t dword_0; uint32_t dword_0;
uint32_t dword_1; uint32_t dword_1;
}); });
bool is_exec() const {
return (this->opc == EXEC) ||
(this->opc == EXEC_END) ||
(this->opc == COND_EXEC) ||
(this->opc == COND_EXEC_END) ||
(this->opc == COND_PRED_EXEC) ||
(this->opc == COND_PRED_EXEC_END) ||
(this->opc == COND_EXEC_PRED_CLEAN) ||
(this->opc == COND_EXEC_PRED_CLEAN_END);
}
bool is_cond_exec() const {
return (this->opc == COND_EXEC) ||
(this->opc == COND_EXEC_END) ||
(this->opc == COND_PRED_EXEC) ||
(this->opc == COND_PRED_EXEC_END) ||
(this->opc == COND_EXEC_PRED_CLEAN) ||
(this->opc == COND_EXEC_PRED_CLEAN_END);
}
}); });

View File

@ -571,26 +571,6 @@ int disasm_fetch(
return 0; return 0;
} }
int cf_exec(const instr_cf_t* cf) {
return (cf->opc == EXEC) ||
(cf->opc == EXEC_END) ||
(cf->opc == COND_EXEC) ||
(cf->opc == COND_EXEC_END) ||
(cf->opc == COND_PRED_EXEC) ||
(cf->opc == COND_PRED_EXEC_END) ||
(cf->opc == COND_EXEC_PRED_CLEAN) ||
(cf->opc == COND_EXEC_PRED_CLEAN_END);
}
int cf_cond_exec(const instr_cf_t* cf) {
return (cf->opc == COND_EXEC) ||
(cf->opc == COND_EXEC_END) ||
(cf->opc == COND_PRED_EXEC) ||
(cf->opc == COND_PRED_EXEC_END) ||
(cf->opc == COND_EXEC_PRED_CLEAN) ||
(cf->opc == COND_EXEC_PRED_CLEAN_END);
}
void print_cf_nop(Output* output, const instr_cf_t* cf) { void print_cf_nop(Output* output, const instr_cf_t* cf) {
} }
@ -609,7 +589,7 @@ void print_cf_exec(Output* output, const instr_cf_t* cf) {
if (cf->exec.address_mode == ABSOLUTE_ADDR) { if (cf->exec.address_mode == ABSOLUTE_ADDR) {
output->append(" ABSOLUTE_ADDR"); output->append(" ABSOLUTE_ADDR");
} }
if (cf_cond_exec(cf)) { if (cf->is_cond_exec()) {
output->append(" COND(%d)", cf->exec.condition); output->append(" COND(%d)", cf->exec.condition);
} }
} }
@ -732,11 +712,11 @@ char* xenos::DisassembleShader(
cfb.dword_0 = (dword_1 >> 16) | (dword_2 << 16); cfb.dword_0 = (dword_1 >> 16) | (dword_2 << 16);
cfb.dword_1 = dword_2 >> 16; cfb.dword_1 = dword_2 >> 16;
print_cf(output, &cfa, 0); print_cf(output, &cfa, 0);
if (cf_exec(&cfa)) { if (cfa.is_exec()) {
disasm_exec(output, dwords, dword_count, 0, type, &cfa); disasm_exec(output, dwords, dword_count, 0, type, &cfa);
} }
print_cf(output, &cfb, 0); print_cf(output, &cfb, 0);
if (cf_exec(&cfb)) { if (cfb.is_exec()) {
disasm_exec(output, dwords, dword_count, 0, type, &cfb); disasm_exec(output, dwords, dword_count, 0, type, &cfb);
} }
if (cfa.opc == EXEC_END || cfb.opc == EXEC_END) { if (cfa.opc == EXEC_END || cfb.opc == EXEC_END) {

View File

@ -13,6 +13,8 @@
#include <xenia/types.h> #include <xenia/types.h>
#define xe_alloca(size) alloca(size)
void *xe_malloc(const size_t size); void *xe_malloc(const size_t size);
void *xe_calloc(const size_t size); void *xe_calloc(const size_t size);
void *xe_realloc(void *ptr, const size_t old_size, const size_t new_size); void *xe_realloc(void *ptr, const size_t old_size, const size_t new_size);