From 19c48c7a90be614a8f4461455a2e73c2964caa5e Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sat, 31 May 2014 17:38:32 -0700 Subject: [PATCH] Partial texture cache - doesn't invalidate yet. --- src/xenia/gpu/d3d11/d3d11_texture.cc | 141 +++++++++++---------- src/xenia/gpu/d3d11/d3d11_texture.h | 21 ++- src/xenia/gpu/d3d11/d3d11_texture_cache.cc | 5 +- src/xenia/gpu/d3d11/d3d11_texture_cache.h | 2 +- src/xenia/gpu/texture.cc | 109 +++++++++++++++- src/xenia/gpu/texture.h | 44 ++++++- src/xenia/gpu/texture_cache.cc | 9 +- src/xenia/gpu/texture_cache.h | 3 +- 8 files changed, 250 insertions(+), 84 deletions(-) diff --git a/src/xenia/gpu/d3d11/d3d11_texture.cc b/src/xenia/gpu/d3d11/d3d11_texture.cc index b4beb2ca4..809a971ac 100644 --- a/src/xenia/gpu/d3d11/d3d11_texture.cc +++ b/src/xenia/gpu/d3d11/d3d11_texture.cc @@ -21,19 +21,18 @@ using namespace xe::gpu::d3d11; using namespace xe::gpu::xenos; -D3D11Texture::D3D11Texture(D3D11TextureCache* cache, uint32_t address) - : Texture(address), +D3D11Texture::D3D11Texture(D3D11TextureCache* cache, uint32_t address, + const uint8_t* host_address) + : Texture(address, host_address), cache_(cache) { } D3D11Texture::~D3D11Texture() { - // views } -TextureView* D3D11Texture::Fetch( +TextureView* D3D11Texture::FetchNew( const xenos::xe_gpu_texture_fetch_t& fetch) { D3D11TextureView* view = new D3D11TextureView(); - view->texture = this; if (!FillViewInfo(view, fetch)) { return nullptr; } @@ -49,7 +48,7 @@ TextureView* D3D11Texture::Fetch( srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D; srv_desc.Texture1D.MipLevels = 1; srv_desc.Texture1D.MostDetailedMip = 0; - if (!FetchTexture1D(view, fetch)) { + if (!CreateTexture1D(view, fetch)) { XELOGE("D3D11: failed to fetch Texture1D"); return nullptr; } @@ -58,7 +57,7 @@ TextureView* D3D11Texture::Fetch( srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; srv_desc.Texture2D.MipLevels = 1; srv_desc.Texture2D.MostDetailedMip = 0; - if (!FetchTexture2D(view, fetch)) { + if (!CreateTexture2D(view, fetch)) { XELOGE("D3D11: failed to fetch Texture2D"); return nullptr; } @@ -67,7 +66,7 @@ TextureView* D3D11Texture::Fetch( srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D; srv_desc.Texture3D.MipLevels = 1; srv_desc.Texture3D.MostDetailedMip = 0; - if (!FetchTexture3D(view, fetch)) { + if (!CreateTexture3D(view, fetch)) { XELOGE("D3D11: failed to fetch Texture3D"); return nullptr; } @@ -76,7 +75,7 @@ TextureView* D3D11Texture::Fetch( srv_desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURECUBE; srv_desc.TextureCube.MipLevels = 1; srv_desc.TextureCube.MostDetailedMip = 0; - if (!FetchTextureCube(view, fetch)) { + if (!CreateTextureCube(view, fetch)) { XELOGE("D3D11: failed to fetch TextureCube"); return nullptr; } @@ -93,10 +92,24 @@ TextureView* D3D11Texture::Fetch( return view; } -bool D3D11Texture::FetchTexture1D( - D3D11TextureView* view, const xe_gpu_texture_fetch_t& fetch) { - SCOPE_profile_cpu_f("gpu"); +bool D3D11Texture::FetchDirty( + TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) { + auto d3d_view = static_cast(view); + switch (view->dimensions) { + case DIMENSION_1D: + return FetchTexture1D(d3d_view, fetch); + case DIMENSION_2D: + return FetchTexture2D(d3d_view, fetch); + case DIMENSION_3D: + return FetchTexture3D(d3d_view, fetch); + case DIMENSION_CUBE: + return FetchTextureCube(d3d_view, fetch); + } + return false; +} +bool D3D11Texture::CreateTexture1D( + D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) { uint32_t width = 1 + fetch.size_1d.width; D3D11_TEXTURE1D_DESC texture_desc; @@ -115,55 +128,26 @@ bool D3D11Texture::FetchTexture1D( return false; } + return FetchTexture1D(view, fetch); +} + +bool D3D11Texture::FetchTexture1D( + D3D11TextureView* view, const xe_gpu_texture_fetch_t& fetch) { + SCOPE_profile_cpu_f("gpu"); + // TODO(benvanik): upload! XELOGE("D3D11: FetchTexture1D not yet implemented"); return false; } -bool D3D11Texture::FetchTexture2D( - D3D11TextureView* view, const xe_gpu_texture_fetch_t& fetch) { - SCOPE_profile_cpu_f("gpu"); - +bool D3D11Texture::CreateTexture2D( + D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) { XEASSERTTRUE(fetch.dimension == 1); - uint32_t logical_width = 1 + fetch.size_2d.width; - uint32_t logical_height = 1 + fetch.size_2d.height; - - uint32_t block_width = logical_width / view->block_size; - uint32_t block_height = logical_height / view->block_size; - - uint32_t input_width, input_height; - uint32_t output_width, output_height; - - if (!view->is_compressed) { - // must be 32x32, but also must have a pitch that is a multiple of 256 bytes - uint32_t bytes_per_block = view->block_size * view->block_size * - view->texel_pitch; - uint32_t width_multiple = 32; - if (bytes_per_block) { - uint32_t minimum_multiple = 256 / bytes_per_block; - if (width_multiple < minimum_multiple) { - width_multiple = minimum_multiple; - } - } - - input_width = XEROUNDUP(logical_width, width_multiple); - input_height = XEROUNDUP(logical_height, 32); - output_width = logical_width; - output_height = logical_height; - } - else { - // must be 128x128 - input_width = XEROUNDUP(logical_width, 128); - input_height = XEROUNDUP(logical_height, 128); - output_width = XENEXTPOW2(logical_width); - output_height = XENEXTPOW2(logical_height); - } - D3D11_TEXTURE2D_DESC texture_desc; xe_zero_struct(&texture_desc, sizeof(texture_desc)); - texture_desc.Width = output_width; - texture_desc.Height = output_height; + texture_desc.Width = view->sizes_2d.output_width; + texture_desc.Height = view->sizes_2d.output_height; texture_desc.MipLevels = 1; texture_desc.ArraySize = 1; texture_desc.Format = view->format; @@ -179,39 +163,50 @@ bool D3D11Texture::FetchTexture2D( return false; } + return FetchTexture2D(view, fetch); +} + +bool D3D11Texture::FetchTexture2D( + D3D11TextureView* view, const xe_gpu_texture_fetch_t& fetch) { + SCOPE_profile_cpu_f("gpu"); + + XEASSERTTRUE(fetch.dimension == 1); + + auto sizes = GetTextureSizes2D(view); + // TODO(benvanik): all mip levels. D3D11_MAPPED_SUBRESOURCE res; - hr = cache_->context()->Map(view->resource, 0, - D3D11_MAP_WRITE_DISCARD, 0, &res); + HRESULT hr = cache_->context()->Map(view->resource, 0, + D3D11_MAP_WRITE_DISCARD, 0, &res); if (FAILED(hr)) { XELOGE("D3D11: failed to map texture"); return false; } - auto logical_pitch = (logical_width / view->block_size) * view->texel_pitch; - auto input_pitch = (input_width / view->block_size) * view->texel_pitch; - auto output_pitch = res.RowPitch; // (output_width / info.block_size) * info.texel_pitch; - const uint8_t* src = cache_->memory()->Translate(address_); uint8_t* dest = (uint8_t*)res.pData; //memset(dest, 0, output_pitch * (output_height / view->block_size)); // TODO(gibbed): remove me later + uint32_t output_pitch = res.RowPitch; // (output_width / info.block_size) * info.texel_pitch; if (!fetch.tiled) { dest = (uint8_t*)res.pData; - for (uint32_t y = 0; y < block_height; y++) { - for (uint32_t x = 0; x < logical_pitch; x += view->texel_pitch) { + for (uint32_t y = 0; y < sizes.block_height; y++) { + for (uint32_t x = 0; x < sizes.logical_pitch; x += view->texel_pitch) { TextureSwap(dest + x, src + x, view->texel_pitch, (XE_GPU_ENDIAN)fetch.endianness); } - src += input_pitch; + src += sizes.input_pitch; dest += output_pitch; } - } - else { + } else { auto bpp = (view->texel_pitch >> 2) + ((view->texel_pitch >> 1) >> (view->texel_pitch >> 2)); - for (uint32_t y = 0, output_base_offset = 0; y < block_height; y++, output_base_offset += output_pitch) { - auto input_base_offset = TiledOffset2DOuter(y, (input_width / view->block_size), bpp); - for (uint32_t x = 0, output_offset = output_base_offset; x < block_width; x++, output_offset += view->texel_pitch) { + for (uint32_t y = 0, output_base_offset = 0; + y < sizes.block_height; + y++, output_base_offset += output_pitch) { + auto input_base_offset = TiledOffset2DOuter(y, (sizes.input_width / view->block_size), bpp); + for (uint32_t x = 0, output_offset = output_base_offset; + x < sizes.block_width; + x++, output_offset += view->texel_pitch) { auto input_offset = TiledOffset2DInner(x, y, bpp, input_base_offset) >> bpp; TextureSwap(dest + output_offset, src + input_offset * view->texel_pitch, @@ -223,6 +218,13 @@ bool D3D11Texture::FetchTexture2D( return true; } +bool D3D11Texture::CreateTexture3D( + D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) { + XELOGE("D3D11: CreateTexture3D not yet implemented"); + XEASSERTALWAYS(); + return false; +} + bool D3D11Texture::FetchTexture3D( D3D11TextureView* view, const xe_gpu_texture_fetch_t& fetch) { SCOPE_profile_cpu_f("gpu"); @@ -245,6 +247,13 @@ bool D3D11Texture::FetchTexture3D( // &texture_desc, &initial_data, (ID3D11Texture3D**)&view->resource); } +bool D3D11Texture::CreateTextureCube( + D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) { + XELOGE("D3D11: CreateTextureCube not yet implemented"); + XEASSERTALWAYS(); + return false; +} + bool D3D11Texture::FetchTextureCube( D3D11TextureView* view, const xe_gpu_texture_fetch_t& fetch) { SCOPE_profile_cpu_f("gpu"); diff --git a/src/xenia/gpu/d3d11/d3d11_texture.h b/src/xenia/gpu/d3d11/d3d11_texture.h index 06cceb041..a8ee91662 100644 --- a/src/xenia/gpu/d3d11/d3d11_texture.h +++ b/src/xenia/gpu/d3d11/d3d11_texture.h @@ -39,25 +39,34 @@ struct D3D11TextureView : TextureView { class D3D11Texture : public Texture { public: - D3D11Texture(D3D11TextureCache* cache, uint32_t address); + D3D11Texture(D3D11TextureCache* cache, uint32_t address, + const uint8_t* host_address); virtual ~D3D11Texture(); - TextureView* Fetch( - const xenos::xe_gpu_texture_fetch_t& fetch) override; - protected: + TextureView* FetchNew( + const xenos::xe_gpu_texture_fetch_t& fetch) override; + bool FetchDirty( + TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) override; + + bool CreateTexture1D( + D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch); bool FetchTexture1D( D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch); + bool CreateTexture2D( + D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch); bool FetchTexture2D( D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch); + bool CreateTexture3D( + D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch); bool FetchTexture3D( D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch); + bool CreateTextureCube( + D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch); bool FetchTextureCube( D3D11TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch); D3D11TextureCache* cache_; - - // views }; diff --git a/src/xenia/gpu/d3d11/d3d11_texture_cache.cc b/src/xenia/gpu/d3d11/d3d11_texture_cache.cc index ad8e4d09e..eb3442bfc 100644 --- a/src/xenia/gpu/d3d11/d3d11_texture_cache.cc +++ b/src/xenia/gpu/d3d11/d3d11_texture_cache.cc @@ -38,8 +38,9 @@ D3D11TextureCache::~D3D11TextureCache() { } Texture* D3D11TextureCache::CreateTexture( - uint32_t address, const xenos::xe_gpu_texture_fetch_t& fetch) { - return new D3D11Texture(this, address); + uint32_t address, const uint8_t* host_address, + const xenos::xe_gpu_texture_fetch_t& fetch) { + return new D3D11Texture(this, address, host_address); } ID3D11SamplerState* D3D11TextureCache::GetSamplerState( diff --git a/src/xenia/gpu/d3d11/d3d11_texture_cache.h b/src/xenia/gpu/d3d11/d3d11_texture_cache.h index ce0fdc310..63f275d02 100644 --- a/src/xenia/gpu/d3d11/d3d11_texture_cache.h +++ b/src/xenia/gpu/d3d11/d3d11_texture_cache.h @@ -38,7 +38,7 @@ public: const Shader::tex_buffer_desc_t& desc); protected: - Texture* CreateTexture(uint32_t address, + Texture* CreateTexture(uint32_t address, const uint8_t* host_address, const xenos::xe_gpu_texture_fetch_t& fetch) override; private: diff --git a/src/xenia/gpu/texture.cc b/src/xenia/gpu/texture.cc index 0b6ef8105..d624d82ce 100644 --- a/src/xenia/gpu/texture.cc +++ b/src/xenia/gpu/texture.cc @@ -21,8 +21,54 @@ using namespace xe::gpu; using namespace xe::gpu::xenos; -Texture::Texture(uint32_t address) - : address_(address) { +Texture::Texture(uint32_t address, const uint8_t* host_address) + : address_(address), host_address_(host_address) { +} + +Texture::~Texture() { + for (auto it = views_.begin(); it != views_.end(); ++it) { + auto view = *it; + delete view; + } + views_.clear(); +} + +TextureView* Texture::Fetch( + const xenos::xe_gpu_texture_fetch_t& fetch) { + // TODO(benvanik): compute length for hash check. + size_t length = 0; + switch (fetch.dimension) { + case DIMENSION_1D: + break; + case DIMENSION_2D: + break; + case DIMENSION_3D: + break; + case DIMENSION_CUBE: + break; + } + uint64_t hash = xe_hash64(host_address_, length); + + for (auto it = views_.begin(); it != views_.end(); ++it) { + auto view = *it; + if (memcmp(&view->fetch, &fetch, sizeof(fetch))) { + continue; + } + bool dirty = hash != view->hash; + if (dirty) { + return FetchDirty(view, fetch) ? view : nullptr; + } else { + return view; + } + } + + auto new_view = FetchNew(fetch); + if (!new_view) { + return nullptr; + } + new_view->hash = hash; + views_.push_back(new_view); + return new_view; } bool Texture::FillViewInfo(TextureView* view, @@ -30,6 +76,9 @@ bool Texture::FillViewInfo(TextureView* view, // http://msdn.microsoft.com/en-us/library/windows/desktop/cc308051(v=vs.85).aspx // a2xx_sq_surfaceformat + view->texture = this; + view->fetch = fetch; + view->dimensions = fetch.dimension; switch (fetch.dimension) { case DIMENSION_1D: @@ -213,9 +262,65 @@ bool Texture::FillViewInfo(TextureView* view, view->format = DXGI_FORMAT_UNKNOWN; break; } + + if (view->format == DXGI_FORMAT_UNKNOWN) { + return false; + } + + switch (fetch.dimension) { + case DIMENSION_1D: + break; + case DIMENSION_2D: + view->sizes_2d = GetTextureSizes2D(view); + break; + case DIMENSION_3D: + break; + case DIMENSION_CUBE: + break; + } return true; } +const TextureSizes2D Texture::GetTextureSizes2D(TextureView* view) { + TextureSizes2D sizes; + + sizes.logical_width = 1 + view->fetch.size_2d.width; + sizes.logical_height = 1 + view->fetch.size_2d.height; + + sizes.block_width = sizes.logical_width / view->block_size; + sizes.block_height = sizes.logical_height / view->block_size; + + if (!view->is_compressed) { + // must be 32x32, but also must have a pitch that is a multiple of 256 bytes + uint32_t bytes_per_block = view->block_size * view->block_size * + view->texel_pitch; + uint32_t width_multiple = 32; + if (bytes_per_block) { + uint32_t minimum_multiple = 256 / bytes_per_block; + if (width_multiple < minimum_multiple) { + width_multiple = minimum_multiple; + } + } + sizes.input_width = XEROUNDUP(sizes.logical_width, width_multiple); + sizes.input_height = XEROUNDUP(sizes.logical_height, 32); + sizes.output_width = sizes.logical_width; + sizes.output_height = sizes.logical_height; + } else { + // must be 128x128 + sizes.input_width = XEROUNDUP(sizes.logical_width, 128); + sizes.input_height = XEROUNDUP(sizes.logical_height, 128); + sizes.output_width = XENEXTPOW2(sizes.logical_width); + sizes.output_height = XENEXTPOW2(sizes.logical_height); + } + + sizes.logical_pitch = + (sizes.logical_width / view->block_size) * view->texel_pitch; + sizes.input_pitch = + (sizes.input_width / view->block_size) * view->texel_pitch; + + return sizes; +} + void Texture::TextureSwap(uint8_t* dest, const uint8_t* src, uint32_t pitch, XE_GPU_ENDIAN endianness) { switch (endianness) { diff --git a/src/xenia/gpu/texture.h b/src/xenia/gpu/texture.h index 24d595162..9b919a5d9 100644 --- a/src/xenia/gpu/texture.h +++ b/src/xenia/gpu/texture.h @@ -23,9 +23,34 @@ namespace gpu { class Texture; +struct TextureSizes1D {}; +struct TextureSizes2D { + uint32_t logical_width; + uint32_t logical_height; + uint32_t block_width; + uint32_t block_height; + uint32_t input_width; + uint32_t input_height; + uint32_t output_width; + uint32_t output_height; + uint32_t logical_pitch; + uint32_t input_pitch; +}; +struct TextureSizes3D {}; +struct TextureSizesCube {}; struct TextureView { Texture* texture; + xenos::xe_gpu_texture_fetch_t fetch; + uint64_t hash; + + union { + TextureSizes1D sizes_1d; + TextureSizes2D sizes_2d; + TextureSizes3D sizes_3d; + TextureSizesCube sizes_cube; + }; + int dimensions; uint32_t width; uint32_t height; @@ -46,16 +71,23 @@ struct TextureView { class Texture { public: - Texture(uint32_t address); - virtual ~Texture() = default; + Texture(uint32_t address, const uint8_t* host_address); + virtual ~Texture(); - virtual TextureView* Fetch( - const xenos::xe_gpu_texture_fetch_t& fetch) = 0; + TextureView* Fetch( + const xenos::xe_gpu_texture_fetch_t& fetch); protected: bool FillViewInfo(TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch); + virtual TextureView* FetchNew( + const xenos::xe_gpu_texture_fetch_t& fetch) = 0; + virtual bool FetchDirty( + TextureView* view, const xenos::xe_gpu_texture_fetch_t& fetch) = 0; + + const TextureSizes2D GetTextureSizes2D(TextureView* view); + static void TextureSwap(uint8_t* dest, const uint8_t* src, uint32_t pitch, xenos::XE_GPU_ENDIAN endianness); static uint32_t TiledOffset2DOuter(uint32_t y, uint32_t width, @@ -64,6 +96,10 @@ protected: uint32_t base_offset); uint32_t address_; + const uint8_t* host_address_; + + // TODO(benvanik): replace with LRU keyed list. + std::vector views_; }; diff --git a/src/xenia/gpu/texture_cache.cc b/src/xenia/gpu/texture_cache.cc index f205008c2..1f0a4a5ac 100644 --- a/src/xenia/gpu/texture_cache.cc +++ b/src/xenia/gpu/texture_cache.cc @@ -24,7 +24,11 @@ TextureCache::TextureCache(Memory* memory) } TextureCache::~TextureCache() { - // textures + for (auto it = textures_.begin(); it != textures_.end(); ++it) { + auto texture = it->second; + delete texture; + } + textures_.clear(); } TextureView* TextureCache::FetchTexture( @@ -32,7 +36,8 @@ TextureView* TextureCache::FetchTexture( auto it = textures_.find(address); if (it == textures_.end()) { // Texture not found. - auto texture = CreateTexture(address, fetch); + const uint8_t* host_address = memory_->Translate(address); + auto texture = CreateTexture(address, host_address, fetch); if (!texture) { return nullptr; } diff --git a/src/xenia/gpu/texture_cache.h b/src/xenia/gpu/texture_cache.h index dc796fe50..285ffe1d7 100644 --- a/src/xenia/gpu/texture_cache.h +++ b/src/xenia/gpu/texture_cache.h @@ -33,7 +33,8 @@ public: protected: virtual Texture* CreateTexture( - uint32_t address, const xenos::xe_gpu_texture_fetch_t& fetch) = 0; + uint32_t address, const uint8_t* host_address, + const xenos::xe_gpu_texture_fetch_t& fetch) = 0; Memory* memory_;