diff --git a/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc b/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc index 93ee1715d..258be9ed3 100644 --- a/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc +++ b/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc @@ -101,34 +101,22 @@ void D3D11GraphicsDriver::SetShader( } } -void D3D11GraphicsDriver::DrawIndexBuffer( - XE_GPU_PRIMITIVE_TYPE prim_type, - bool index_32bit, uint32_t index_count, - uint32_t index_base, uint32_t index_size, uint32_t endianness) { - XELOGGPU("D3D11: draw index buffer"); -} - -void D3D11GraphicsDriver::DrawIndexAuto( - XE_GPU_PRIMITIVE_TYPE prim_type, - uint32_t index_count) { +int D3D11GraphicsDriver::SetupDraw(XE_GPU_PRIMITIVE_TYPE prim_type) { RegisterFile& rf = register_file_; - XELOGGPU("D3D11: draw indexed %d (%d indicies)", - prim_type, index_count); - // Misc state. if (UpdateState()) { - return; + return 1; } // Build constant buffers. if (UpdateConstantBuffers()) { - return; + return 1; } // Bind shaders. if (BindShaders()) { - return; + return 1; } // Switch primitive topology. @@ -156,17 +144,56 @@ void D3D11GraphicsDriver::DrawIndexAuto( case XE_GPU_PRIMITIVE_TYPE_RECTANGLE_LIST: case XE_GPU_PRIMITIVE_TYPE_LINE_LOOP: XELOGE("D3D11: unsupported primitive type %d", prim_type); - return; + return 1; } context_->IASetPrimitiveTopology(primitive_topology); // Setup all fetchers (vertices/textures). if (PrepareFetchers()) { + return 1; + } + + // All ready to draw (except index buffer)! + + return 0; +} + +void D3D11GraphicsDriver::DrawIndexBuffer( + XE_GPU_PRIMITIVE_TYPE prim_type, + bool index_32bit, uint32_t index_count, + uint32_t index_base, uint32_t index_size, uint32_t endianness) { + RegisterFile& rf = register_file_; + + XELOGGPU("D3D11: draw indexed %d (%d indicies) from %.8X", + prim_type, index_count, index_base); + + // Setup shaders/etc. + if (SetupDraw(prim_type)) { return; } // Setup index buffer. - if (PrepareIndexBuffer()) { + if (PrepareIndexBuffer( + index_32bit, index_count, index_base, index_size, endianness)) { + return; + } + + // Issue draw. + uint32_t start_index = 0; //rf.values[XE_GPU_REG_VGT_INDX_OFFSET].u32; + uint32_t base_vertex = 0; + context_->DrawIndexed(index_count, start_index, base_vertex); +} + +void D3D11GraphicsDriver::DrawIndexAuto( + XE_GPU_PRIMITIVE_TYPE prim_type, + uint32_t index_count) { + RegisterFile& rf = register_file_; + + XELOGGPU("D3D11: draw indexed %d (%d indicies)", + prim_type, index_count); + + // Setup shaders/etc. + if (SetupDraw(prim_type)) { return; } @@ -309,7 +336,25 @@ int D3D11GraphicsDriver::BindShaders() { sizeof(state_.constant_buffers) / sizeof(ID3D11Buffer*), (ID3D11Buffer**)&state_.constant_buffers); - //context_->PSSetSamplers + // TODO(benvanik): set samplers for all inputs. + D3D11_SAMPLER_DESC sampler_desc; + xe_zero_struct(&sampler_desc, sizeof(sampler_desc)); + //sampler_desc.Filter = ? + sampler_desc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; + sampler_desc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; + sampler_desc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; + sampler_desc.MipLODBias = 0; + sampler_desc.MaxAnisotropy = 1; + sampler_desc.ComparisonFunc = D3D11_COMPARISON_ALWAYS; + //sampler_desc.BorderColor = ...; + sampler_desc.MinLOD = 0; + sampler_desc.MaxLOD = 0; + ID3D11SamplerState* sampler_state = NULL; + device_->CreateSamplerState(&sampler_desc, &sampler_state); + ID3D11SamplerState* sampler_states[] = { sampler_state }; + context_->PSSetSamplers(0, XECOUNT(sampler_states), sampler_states); + sampler_state->Release(); + //context_->PSSetShaderResources } else { context_->PSSetShader(NULL, NULL, 0); @@ -409,12 +454,21 @@ int D3D11GraphicsDriver::PrepareVertexFetcher( int D3D11GraphicsDriver::PrepareTextureFetcher( int fetch_slot, xe_gpu_texture_fetch_t* fetch) { + RegisterFile& rf = register_file_; + + // maybe << 2? + uint32_t address = (fetch->address << 4) + address_translation_; return 0; } -int D3D11GraphicsDriver::PrepareIndexBuffer() { +int D3D11GraphicsDriver::PrepareIndexBuffer( + bool index_32bit, uint32_t index_count, + uint32_t index_base, uint32_t index_size, uint32_t endianness) { RegisterFile& rf = register_file_; + uint32_t address = (index_base << 2) + address_translation_; + //uint32_t size_dwords = fetch->size; + /* ID3D11Buffer* buffer = 0; D3D11_BUFFER_DESC buffer_desc; @@ -446,5 +500,5 @@ int D3D11GraphicsDriver::PrepareIndexBuffer() { buffer->Release();*/ - return 0; + return 1; } diff --git a/src/xenia/gpu/d3d11/d3d11_graphics_driver.h b/src/xenia/gpu/d3d11/d3d11_graphics_driver.h index cf86a1182..d660220f2 100644 --- a/src/xenia/gpu/d3d11/d3d11_graphics_driver.h +++ b/src/xenia/gpu/d3d11/d3d11_graphics_driver.h @@ -51,6 +51,7 @@ public: uint32_t index_count); private: + int SetupDraw(xenos::XE_GPU_PRIMITIVE_TYPE prim_type); int UpdateState(); int UpdateConstantBuffers(); int BindShaders(); @@ -59,7 +60,9 @@ private: int fetch_slot, xenos::xe_gpu_vertex_fetch_t* fetch); int PrepareTextureFetcher( int fetch_slot, xenos::xe_gpu_texture_fetch_t* fetch); - int PrepareIndexBuffer(); + int PrepareIndexBuffer( + bool index_32bit, uint32_t index_count, + uint32_t index_base, uint32_t index_size, uint32_t endianness); private: ID3D11Device* device_; diff --git a/src/xenia/gpu/d3d11/d3d11_shader.cc b/src/xenia/gpu/d3d11/d3d11_shader.cc index 2ead1c019..7bac8a788 100644 --- a/src/xenia/gpu/d3d11/d3d11_shader.cc +++ b/src/xenia/gpu/d3d11/d3d11_shader.cc @@ -605,10 +605,15 @@ void AppendDestReg( break; } } + // TODO(benvanik): masking! if (mask != 0xf) { - ctx.output->append("."); + // ctx.output->append("."); for (int i = 0; i < 4; i++) { - ctx.output->append("%c", (mask & 0x1) ? chan_names[i] : '_'); + // TODO(benvanik): mask out values? mix in old value as temp? + // ctx.output->append("%c", (mask & 0x1) ? chan_names[i] : 'w'); + if (!(mask & 0x1)) { + XELOGW("D3D11 shader compiler skipping dest write mask!"); + } mask >>= 1; } } @@ -1177,7 +1182,117 @@ int TranslateTextureFetch( xe_gpu_translate_ctx_t& ctx, const instr_fetch_tex_t* tex, int sync) { Output* output = ctx.output; - return 1; + // Disassemble. + static const char *filter[] = { + "POINT", // TEX_FILTER_POINT + "LINEAR", // TEX_FILTER_LINEAR + "BASEMAP", // TEX_FILTER_BASEMAP + }; + static const char *aniso_filter[] = { + "DISABLED", // ANISO_FILTER_DISABLED + "MAX_1_1", // ANISO_FILTER_MAX_1_1 + "MAX_2_1", // ANISO_FILTER_MAX_2_1 + "MAX_4_1", // ANISO_FILTER_MAX_4_1 + "MAX_8_1", // ANISO_FILTER_MAX_8_1 + "MAX_16_1", // ANISO_FILTER_MAX_16_1 + }; + static const char *arbitrary_filter[] = { + "2x4_SYM", // ARBITRARY_FILTER_2X4_SYM + "2x4_ASYM", // ARBITRARY_FILTER_2X4_ASYM + "4x2_SYM", // ARBITRARY_FILTER_4X2_SYM + "4x2_ASYM", // ARBITRARY_FILTER_4X2_ASYM + "4x4_SYM", // ARBITRARY_FILTER_4X4_SYM + "4x4_ASYM", // ARBITRARY_FILTER_4X4_ASYM + }; + static const char *sample_loc[] = { + "CENTROID", // SAMPLE_CENTROID + "CENTER", // SAMPLE_CENTER + }; + uint32_t src_swiz = tex->src_swiz; + output->append(" // %sFETCH:\t", sync ? "(S)" : " "); + if (tex->pred_select) { + output->append(tex->pred_condition ? "EQ" : "NE"); + } + print_fetch_dst(output, tex->dst_reg, tex->dst_swiz); + output->append(" = R%u.", tex->src_reg); + for (int i = 0; i < 3; i++) { + output->append("%c", chan_names[src_swiz & 0x3]); + src_swiz >>= 2; + } + output->append(" CONST(%u)", tex->const_idx); + if (tex->fetch_valid_only) { + output->append(" VALID_ONLY"); + } + if (tex->tx_coord_denorm) { + output->append(" DENORM"); + } + if (tex->mag_filter != TEX_FILTER_USE_FETCH_CONST) { + output->append(" MAG(%s)", filter[tex->mag_filter]); + } + if (tex->min_filter != TEX_FILTER_USE_FETCH_CONST) { + output->append(" MIN(%s)", filter[tex->min_filter]); + } + if (tex->mip_filter != TEX_FILTER_USE_FETCH_CONST) { + output->append(" MIP(%s)", filter[tex->mip_filter]); + } + if (tex->aniso_filter != ANISO_FILTER_USE_FETCH_CONST) { + output->append(" ANISO(%s)", aniso_filter[tex->aniso_filter]); + } + if (tex->arbitrary_filter != ARBITRARY_FILTER_USE_FETCH_CONST) { + output->append(" ARBITRARY(%s)", arbitrary_filter[tex->arbitrary_filter]); + } + if (tex->vol_mag_filter != TEX_FILTER_USE_FETCH_CONST) { + output->append(" VOL_MAG(%s)", filter[tex->vol_mag_filter]); + } + if (tex->vol_min_filter != TEX_FILTER_USE_FETCH_CONST) { + output->append(" VOL_MIN(%s)", filter[tex->vol_min_filter]); + } + if (!tex->use_comp_lod) { + output->append(" LOD(%u)", tex->use_comp_lod); + output->append(" LOD_BIAS(%u)", tex->lod_bias); + } + if (tex->use_reg_lod) { + output->append(" REG_LOD(%u)", tex->use_reg_lod); + } + if (tex->use_reg_gradients) { + output->append(" USE_REG_GRADIENTS"); + } + output->append(" LOCATION(%s)", sample_loc[tex->sample_location]); + if (tex->offset_x || tex->offset_y || tex->offset_z) { + output->append(" OFFSET(%u,%u,%u)", tex->offset_x, tex->offset_y, tex->offset_z); + } + output->append("\n"); + + // Translate. + src_swiz = tex->src_swiz; + output->append(" "); + output->append("r%u.xyzw", tex->dst_reg); + output->append(" = "); + uint32_t fetch_slot = tex->const_idx * 3; + //output->append("i.vf%u_%d.", fetch_slot, vtx->offset); + // Texture2D some_texture; + // SamplerState some_sampler; + // some_texture.Sample(some_sampler, coords) + output->append("float4(1.0, 0.0, 0.0, 1.0)."); + // Pass one over dest does xyzw and fakes the special values. + // TODO(benvanik): detect and set as rN = float4(samp.xyz, 1.0); / etc + uint32_t dst_swiz = tex->dst_swiz; + for (int i = 0; i < 4; i++) { + output->append("%c", chan_names[dst_swiz & 0x3]); + dst_swiz >>= 3; + } + output->append(";\n"); + // Do another pass to set constant values. + dst_swiz = tex->dst_swiz; + for (int i = 0; i < 4; i++) { + if ((dst_swiz & 0x7) == 4) { + output->append(" r%u.%c = 0.0;\n", tex->dst_reg, chan_names[i]); + } else if ((dst_swiz & 0x7) == 5) { + output->append(" r%u.%c = 1.0;\n", tex->dst_reg, chan_names[i]); + } + dst_swiz >>= 3; + } + return 0; } struct { diff --git a/src/xenia/gpu/xenos/ucode.h b/src/xenia/gpu/xenos/ucode.h index 9eec3daf7..48c5a971a 100644 --- a/src/xenia/gpu/xenos/ucode.h +++ b/src/xenia/gpu/xenos/ucode.h @@ -454,11 +454,11 @@ XEPACKEDSTRUCT(instr_fetch_tex_t, { uint32_t fetch_valid_only : 1; uint32_t const_idx : 5; uint32_t tx_coord_denorm : 1; - uint32_t src_swiz : 6; + uint32_t src_swiz : 6; // xyz }); /* dword1: */ XEPACKEDSTRUCTANONYMOUS({ - uint32_t dst_swiz : 12; + uint32_t dst_swiz : 12; // xyzw uint32_t mag_filter : 2; // instr_tex_filter_t uint32_t min_filter : 2; // instr_tex_filter_t uint32_t mip_filter : 2; // instr_tex_filter_t diff --git a/src/xenia/gpu/xenos/xenos.h b/src/xenia/gpu/xenos/xenos.h index dc3793754..cf06d591a 100644 --- a/src/xenia/gpu/xenos/xenos.h +++ b/src/xenia/gpu/xenos/xenos.h @@ -98,12 +98,26 @@ XEPACKEDUNION(xe_gpu_vertex_fetch_t, { // XE_GPU_REG_SHADER_CONSTANT_FETCH_* XEPACKEDUNION(xe_gpu_texture_fetch_t, { XEPACKEDSTRUCTANONYMOUS({ - uint32_t unk0; - uint32_t unk1; - uint32_t unk2; - uint32_t unk3; - uint32_t unk4; - uint32_t unk5; + uint32_t type : 2; // dword_0 + uint32_t unk0 : 20; + uint32_t pitch : 9; + uint32_t tiled : 1; + uint32_t format : 6; // dword_1 + uint32_t endianness : 2; + uint32_t unk1 : 4; + uint32_t address : 20; + union { // dword_2 + struct { + uint32_t width : 13; + uint32_t height : 13; + uint32_t unksize2d : 6; + } size_2d; + }; + uint32_t unk3; // dword_3 + uint32_t unk4; // dword_4 + uint32_t unk5 : 9; // dword_5 + uint32_t dimension : 2; + uint32_t unk5b : 21; }); XEPACKEDSTRUCTANONYMOUS({ uint32_t dword_0;