diff --git a/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc b/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc index 4178ee90c..c31e74dcc 100644 --- a/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc +++ b/src/xenia/gpu/d3d11/d3d11_graphics_driver.cc @@ -812,25 +812,35 @@ int D3D11GraphicsDriver::BindShaders() { } int D3D11GraphicsDriver::PrepareFetchers() { + // Input assembly. XEASSERTNOTNULL(state_.vertex_shader); - auto inputs = state_.vertex_shader->GetVertexBufferInputs(); - for (size_t n = 0; n < inputs->count; n++) { - auto input = inputs->descs[n]; + auto vtx_inputs = state_.vertex_shader->GetVertexBufferInputs(); + for (size_t n = 0; n < vtx_inputs->count; n++) { + auto input = vtx_inputs->descs[n]; if (PrepareVertexBuffer(input)) { XELOGE("D3D11: unable to prepare vertex buffer"); return 1; } } - // TODO(benvanik): rewrite by sampler - RegisterFile& rf = register_file_; - for (int n = 0; n < 32; n++) { - int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + n * 6; - xe_gpu_fetch_group_t* group = (xe_gpu_fetch_group_t*)&rf.values[r]; - if (group->type_0 == 0x2) { - if (PrepareTextureFetcher(n, &group->texture_fetch)) { - return 1; - } + // Vertex texture inputs. + auto tex_inputs = state_.vertex_shader->GetTextureBufferInputs(); + for (size_t n = 0; n < tex_inputs->count; n++) { + auto input = tex_inputs->descs[n]; + if (PrepareTextureFetcher(XE_GPU_SHADER_TYPE_VERTEX, input)) { + XELOGE("D3D11: unable to prepare texture buffer"); + return 1; + } + } + + // Pixel shader texture inputs. + XEASSERTNOTNULL(state_.pixel_shader); + tex_inputs = state_.pixel_shader->GetTextureBufferInputs(); + for (size_t n = 0; n < tex_inputs->count; n++) { + auto input = tex_inputs->descs[n]; + if (PrepareTextureFetcher(XE_GPU_SHADER_TYPE_PIXEL, input)) { + XELOGE("D3D11: unable to prepare texture buffer"); + return 1; } } @@ -912,12 +922,307 @@ int D3D11GraphicsDriver::PrepareVertexBuffer(Shader::vtx_buffer_desc_t& desc) { return 0; } -int D3D11GraphicsDriver::PrepareTextureFetcher( - int fetch_slot, xe_gpu_texture_fetch_t* fetch) { - RegisterFile& rf = register_file_; +D3D11GraphicsDriver::TextureInfo D3D11GraphicsDriver::GetTextureInfo( + xe_gpu_texture_fetch_t& fetch) { + // a2xx_sq_surfaceformat + TextureInfo info; + info.format = DXGI_FORMAT_UNKNOWN; + info.block_width = 0; + info.block_height = 0; + switch (fetch.format) { + case FMT_8_8_8_8: + info.format = DXGI_FORMAT_R8G8B8A8_UNORM; + break; + case FMT_1_REVERSE: + case FMT_1: + case FMT_8: + case FMT_1_5_5_5: + case FMT_5_6_5: + case FMT_6_5_5: + case FMT_2_10_10_10: + case FMT_8_A: + case FMT_8_B: + case FMT_8_8: + case FMT_Cr_Y1_Cb_Y0: + case FMT_Y1_Cr_Y0_Cb: + case FMT_5_5_5_1: + case FMT_8_8_8_8_A: + case FMT_4_4_4_4: + case FMT_10_11_11: + case FMT_11_11_10: + case FMT_DXT1: + case FMT_DXT2_3: + case FMT_DXT4_5: + case FMT_24_8: + case FMT_24_8_FLOAT: + case FMT_16: + case FMT_16_16: + case FMT_16_16_16_16: + case FMT_16_EXPAND: + case FMT_16_16_EXPAND: + case FMT_16_16_16_16_EXPAND: + case FMT_16_FLOAT: + case FMT_16_16_FLOAT: + case FMT_16_16_16_16_FLOAT: + case FMT_32: + case FMT_32_32: + case FMT_32_32_32_32: + case FMT_32_FLOAT: + case FMT_32_32_FLOAT: + case FMT_32_32_32_32_FLOAT: + case FMT_32_AS_8: + case FMT_32_AS_8_8: + case FMT_16_MPEG: + case FMT_16_16_MPEG: + case FMT_8_INTERLACED: + case FMT_32_AS_8_INTERLACED: + case FMT_32_AS_8_8_INTERLACED: + case FMT_16_INTERLACED: + case FMT_16_MPEG_INTERLACED: + case FMT_16_16_MPEG_INTERLACED: + case FMT_DXN: + case FMT_8_8_8_8_AS_16_16_16_16: + case FMT_DXT1_AS_16_16_16_16: + case FMT_DXT2_3_AS_16_16_16_16: + case FMT_DXT4_5_AS_16_16_16_16: + case FMT_2_10_10_10_AS_16_16_16_16: + case FMT_10_11_11_AS_16_16_16_16: + case FMT_11_11_10_AS_16_16_16_16: + case FMT_32_32_32_FLOAT: + case FMT_DXT3A: + case FMT_DXT5A: + case FMT_CTX1: + case FMT_DXT3A_AS_1_1_1_1: + info.format = DXGI_FORMAT_UNKNOWN; + break; + } + return info; +} + +int D3D11GraphicsDriver::FetchTexture1D( + Shader::tex_buffer_desc_t& desc, + xe_gpu_texture_fetch_t& fetch, + TextureInfo& info, + ID3D11Resource** out_texture) { + uint32_t address = (fetch.address << 12) + address_translation_; + + uint32_t width = fetch.size_1d.width; + + D3D11_TEXTURE1D_DESC texture_desc; + xe_zero_struct(&texture_desc, sizeof(texture_desc)); + texture_desc.Width = width; + texture_desc.MipLevels = 1; + texture_desc.ArraySize = 1; + texture_desc.Format = info.format; + texture_desc.Usage = D3D11_USAGE_DYNAMIC; + texture_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + texture_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + texture_desc.MiscFlags = 0; // D3D11_RESOURCE_MISC_GENERATE_MIPS? + HRESULT hr = device_->CreateTexture1D( + &texture_desc, NULL, (ID3D11Texture1D**)out_texture); + if (FAILED(hr)) { + return 1; + } + + return 0; +} + +int D3D11GraphicsDriver::FetchTexture2D( + Shader::tex_buffer_desc_t& desc, + xe_gpu_texture_fetch_t& fetch, + TextureInfo& info, + ID3D11Resource** out_texture) { + uint32_t address = (fetch.address << 12) + address_translation_; + + uint32_t width = fetch.size_2d.width; + uint32_t height = fetch.size_2d.height; + uint32_t data_pitch = XEROUNDUP(width, 256); + // TODO(benvanik): block height rounding? + uint32_t data_height = height; + size_t data_size = data_pitch * data_height; + + D3D11_TEXTURE2D_DESC texture_desc; + xe_zero_struct(&texture_desc, sizeof(texture_desc)); + texture_desc.Width = width; + texture_desc.Height = height; + texture_desc.MipLevels = 1; + texture_desc.ArraySize = 1; + texture_desc.Format = info.format; + texture_desc.SampleDesc.Count = 1; + texture_desc.SampleDesc.Quality = 0; + texture_desc.Usage = D3D11_USAGE_DYNAMIC; + texture_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + texture_desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + texture_desc.MiscFlags = 0; // D3D11_RESOURCE_MISC_GENERATE_MIPS? + HRESULT hr = device_->CreateTexture2D( + &texture_desc, NULL, (ID3D11Texture2D**)out_texture); + if (FAILED(hr)) { + return 1; + } + + // TODO(benvanik): all mip levels. + D3D11_MAPPED_SUBRESOURCE res; + hr = context_->Map(*out_texture, 0, + D3D11_MAP_WRITE_DISCARD, 0, &res); + if (FAILED(hr)) { + XELOGE("D3D11: failed to map texture"); + return 1; + } + const uint8_t* src = memory_->Translate(address); + uint8_t* dest = (uint8_t*)res.pData; + for (size_t n = 0; n < data_size; n++) { + dest[n] = src[n]; + } + context_->Unmap(*out_texture, 0); + + return 0; +} + +int D3D11GraphicsDriver::FetchTexture3D( + Shader::tex_buffer_desc_t& desc, + xe_gpu_texture_fetch_t& fetch, + TextureInfo& info, + ID3D11Resource** out_texture) { + XELOGE("D3D11: FetchTexture2D not yet implemented"); + XEASSERTALWAYS(); + return 1; + //D3D11_TEXTURE3D_DESC texture_desc; + //xe_zero_struct(&texture_desc, sizeof(texture_desc)); + //texture_desc.Width; + //texture_desc.Height; + //texture_desc.Depth; + //texture_desc.MipLevels; + //texture_desc.Format; + //texture_desc.Usage; + //texture_desc.BindFlags; + //texture_desc.CPUAccessFlags; + //texture_desc.MiscFlags; + //hr = device_->CreateTexture3D( + // &texture_desc, &initial_data, (ID3D11Texture3D**)&texture); +} + +int D3D11GraphicsDriver::FetchTextureCube( + Shader::tex_buffer_desc_t& desc, + xe_gpu_texture_fetch_t& fetch, + TextureInfo& info, + ID3D11Resource** out_texture) { + XELOGE("D3D11: FetchTextureCube not yet implemented"); + XEASSERTALWAYS(); + return 1; +} + +int D3D11GraphicsDriver::PrepareTextureFetcher( + xenos::XE_GPU_SHADER_TYPE shader_type, Shader::tex_buffer_desc_t& desc) { + RegisterFile& rf = register_file_; + int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + desc.fetch_slot * 6; + xe_gpu_fetch_group_t* group = (xe_gpu_fetch_group_t*)&rf.values[r]; + auto& fetch = group->texture_fetch; + // If this assert doesn't hold, maybe we just abort? + if (fetch.type != 0x2) { + XELOGW("D3D11: texture fetcher pointed at a vertex group?"); + return 1; + } + + TextureInfo info = GetTextureInfo(fetch); + if (info.format == DXGI_FORMAT_UNKNOWN) { + XELOGE("D3D11: unrecognized texture format %d", fetch.format); + return 1; + } + + HRESULT hr; + + D3D11_SHADER_RESOURCE_VIEW_DESC texture_view_desc; + xe_zero_struct(&texture_view_desc, sizeof(texture_view_desc)); + // TODO(benvanik): this may need to be typed on the fetch instruction (float/int/etc?) + texture_view_desc.Format = info.format; + + ID3D11Resource* texture = NULL; + D3D_SRV_DIMENSION dimension = D3D11_SRV_DIMENSION_UNKNOWN; + switch (desc.tex_fetch.dimension) { + case DIMENSION_1D: + texture_view_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D; + texture_view_desc.Texture1D.MipLevels = 1; + texture_view_desc.Texture1D.MostDetailedMip = 0; + if (FetchTexture1D(desc, fetch, info, &texture)) { + XELOGE("D3D11: failed to fetch Texture1D"); + return 1; + } + break; + case DIMENSION_2D: + texture_view_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + texture_view_desc.Texture2D.MipLevels = 1; + texture_view_desc.Texture2D.MostDetailedMip = 0; + if (FetchTexture2D(desc, fetch, info, &texture)) { + XELOGE("D3D11: failed to fetch Texture2D"); + return 1; + } + break; + case DIMENSION_3D: + texture_view_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D; + texture_view_desc.Texture3D.MipLevels = 1; + texture_view_desc.Texture3D.MostDetailedMip = 0; + if (FetchTexture3D(desc, fetch, info, &texture)) { + XELOGE("D3D11: failed to fetch Texture3D"); + return 1; + } + break; + case DIMENSION_CUBE: + texture_view_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURECUBE; + texture_view_desc.TextureCube.MipLevels = 1; + texture_view_desc.TextureCube.MostDetailedMip = 0; + if (FetchTextureCube(desc, fetch, info, &texture)) { + XELOGE("D3D11: failed to fetch TextureCube"); + return 1; + } + break; + } + + XEASSERTNOTNULL(texture); + + ID3D11ShaderResourceView* texture_view = NULL; + hr = device_->CreateShaderResourceView( + texture, &texture_view_desc, &texture_view); + if (FAILED(hr)) { + XELOGE("D3D11: unable to create texture resource view"); + texture->Release(); + return 1; + } + if (shader_type == XE_GPU_SHADER_TYPE_VERTEX) { + context_->VSSetShaderResources(desc.input_index, 1, &texture_view); + } else { + context_->PSSetShaderResources(desc.input_index, 1, &texture_view); + } + texture_view->Release(); + texture->Release(); + + D3D11_SAMPLER_DESC sampler_desc; + xe_zero_struct(&sampler_desc, sizeof(sampler_desc)); + sampler_desc.Filter; + sampler_desc.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP; + sampler_desc.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP; + sampler_desc.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP; + sampler_desc.MipLODBias; + sampler_desc.MaxAnisotropy = 1; + sampler_desc.ComparisonFunc = D3D11_COMPARISON_ALWAYS; + sampler_desc.BorderColor[0]; + sampler_desc.BorderColor[1]; + sampler_desc.BorderColor[2]; + sampler_desc.BorderColor[3]; + sampler_desc.MinLOD; + sampler_desc.MaxLOD; + ID3D11SamplerState* sampler_state = NULL; + hr = device_->CreateSamplerState(&sampler_desc, &sampler_state); + if (FAILED(hr)) { + XELOGE("D3D11:: unable to create sampler state"); + return 1; + } + if (shader_type == XE_GPU_SHADER_TYPE_VERTEX) { + context_->VSSetSamplers(desc.input_index, 1, &sampler_state); + } else { + context_->PSSetSamplers(desc.input_index, 1, &sampler_state); + } + sampler_state->Release(); - // maybe << 2? - uint32_t address = (fetch->address << 4) + address_translation_; return 0; } diff --git a/src/xenia/gpu/d3d11/d3d11_graphics_driver.h b/src/xenia/gpu/d3d11/d3d11_graphics_driver.h index 16f24204f..8f409f94d 100644 --- a/src/xenia/gpu/d3d11/d3d11_graphics_driver.h +++ b/src/xenia/gpu/d3d11/d3d11_graphics_driver.h @@ -63,8 +63,30 @@ private: int BindShaders(); int PrepareFetchers(); int PrepareVertexBuffer(Shader::vtx_buffer_desc_t& desc); - int PrepareTextureFetcher( - int fetch_slot, xenos::xe_gpu_texture_fetch_t* fetch); + int PrepareTextureFetcher(xenos::XE_GPU_SHADER_TYPE shader_type, + Shader::tex_buffer_desc_t& desc); + typedef struct { + DXGI_FORMAT format; + uint32_t block_width; + uint32_t block_height; + } TextureInfo; + TextureInfo GetTextureInfo(xenos::xe_gpu_texture_fetch_t& fetch); + int FetchTexture1D(Shader::tex_buffer_desc_t& desc, + xenos::xe_gpu_texture_fetch_t& fetch, + TextureInfo& info, + ID3D11Resource** out_texture); + int FetchTexture2D(Shader::tex_buffer_desc_t& desc, + xenos::xe_gpu_texture_fetch_t& fetch, + TextureInfo& info, + ID3D11Resource** out_texture); + int FetchTexture3D(Shader::tex_buffer_desc_t& desc, + xenos::xe_gpu_texture_fetch_t& fetch, + TextureInfo& info, + ID3D11Resource** out_texture); + int FetchTextureCube(Shader::tex_buffer_desc_t& desc, + xenos::xe_gpu_texture_fetch_t& fetch, + TextureInfo& info, + ID3D11Resource** out_texture); int PrepareIndexBuffer( bool index_32bit, uint32_t index_count, uint32_t index_base, uint32_t index_size, uint32_t endianness); diff --git a/src/xenia/gpu/shader.cc b/src/xenia/gpu/shader.cc index 8774c5fea..31bc6c55d 100644 --- a/src/xenia/gpu/shader.cc +++ b/src/xenia/gpu/shader.cc @@ -24,6 +24,7 @@ Shader::Shader( type_(type), hash_(hash), is_prepared_(false), disasm_src_(NULL) { xe_zero_struct(&alloc_counts_, sizeof(alloc_counts_)); xe_zero_struct(&vtx_buffer_inputs_, sizeof(vtx_buffer_inputs_)); + xe_zero_struct(&tex_buffer_inputs_, sizeof(tex_buffer_inputs_)); // Verify. dword_count_ = length / 4; @@ -248,7 +249,18 @@ const Shader::vtx_buffer_inputs_t* Shader::GetVertexBufferInputs() { } void Shader::GatherTextureFetch(const xenos::instr_fetch_tex_t* tex) { - fetch_texs_.push_back(*tex); + // TODO(benvanik): check dest_swiz to see if we are writing anything. - // slots + auto& inputs = tex_buffer_inputs_; + XEASSERT(inputs.count + 1 < XECOUNT(inputs.descs)); + auto& input = inputs.descs[inputs.count++]; + input.input_index = inputs.count - 1; + input.fetch_slot = tex->const_idx - 16; // ? + input.tex_fetch = *tex; + + // Format mangling, size estimation, etc. +} + +const Shader::tex_buffer_inputs_t* Shader::GetTextureBufferInputs() { + return &tex_buffer_inputs_; } diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h index a02cd7f4e..9aae74925 100644 --- a/src/xenia/gpu/shader.h +++ b/src/xenia/gpu/shader.h @@ -53,6 +53,18 @@ public: } vtx_buffer_inputs_t; const vtx_buffer_inputs_t* GetVertexBufferInputs(); + typedef struct { + uint32_t input_index; + uint32_t fetch_slot; + xenos::instr_fetch_tex_t tex_fetch; + uint32_t format; + } tex_buffer_desc_t; + typedef struct { + uint32_t count; + tex_buffer_desc_t descs[16]; + } tex_buffer_inputs_t; + const tex_buffer_inputs_t* GetTextureBufferInputs(); + typedef struct { uint32_t positions; uint32_t params; @@ -78,10 +90,10 @@ protected: char* disasm_src_; alloc_counts_t alloc_counts_; - std::vector execs_; + std::vector execs_; std::vector allocs_; vtx_buffer_inputs_t vtx_buffer_inputs_; - std::vector fetch_texs_; + tex_buffer_inputs_t tex_buffer_inputs_; }; diff --git a/src/xenia/gpu/xenos/ucode.h b/src/xenia/gpu/xenos/ucode.h index 48c5a971a..2727b0328 100644 --- a/src/xenia/gpu/xenos/ucode.h +++ b/src/xenia/gpu/xenos/ucode.h @@ -441,6 +441,13 @@ typedef enum { SAMPLE_CENTER = 1, } instr_sample_loc_t; +typedef enum { + DIMENSION_1D = 0, + DIMENSION_2D = 1, + DIMENSION_3D = 2, + DIMENSION_CUBE = 3, +} instr_dimension_t; + typedef enum a2xx_sq_surfaceformat instr_surf_fmt_t; XEPACKEDSTRUCT(instr_fetch_tex_t, { @@ -467,7 +474,8 @@ XEPACKEDSTRUCT(instr_fetch_tex_t, { uint32_t vol_mag_filter : 2; // instr_tex_filter_t uint32_t vol_min_filter : 2; // instr_tex_filter_t uint32_t use_comp_lod : 1; - uint32_t use_reg_lod : 2; + uint32_t use_reg_lod : 1; + uint32_t unk : 1; uint32_t pred_select : 1; }); /* dword2: */ @@ -476,7 +484,7 @@ XEPACKEDSTRUCT(instr_fetch_tex_t, { uint32_t sample_location : 1; // instr_sample_loc_t uint32_t lod_bias : 7; uint32_t unused : 5; - uint32_t dimension : 2; + uint32_t dimension : 2; // instr_dimension_t uint32_t offset_x : 5; uint32_t offset_y : 5; uint32_t offset_z : 5; diff --git a/src/xenia/gpu/xenos/xenos.h b/src/xenia/gpu/xenos/xenos.h index 8eafb7a51..c0169c038 100644 --- a/src/xenia/gpu/xenos/xenos.h +++ b/src/xenia/gpu/xenos/xenos.h @@ -126,11 +126,25 @@ XEPACKEDUNION(xe_gpu_texture_fetch_t, { uint32_t unk1 : 4; uint32_t address : 20; union { // dword_2 + struct { + uint32_t width : 24; + uint32_t unused : 8; + } size_1d; struct { uint32_t width : 13; uint32_t height : 13; - uint32_t unksize2d : 6; + uint32_t unused : 6; } size_2d; + struct { + uint32_t width : 13; + uint32_t height : 13; + uint32_t depth : 6; + } size_stack; + struct { + uint32_t width : 11; + uint32_t height : 11; + uint32_t depth : 10; + } size_3d; }; uint32_t unk3; // dword_3 uint32_t unk4; // dword_4