[D3D12] 64bpp resolve and clear - GTA IV ingame
This commit is contained in:
parent
5f0df6d1fa
commit
5997ec6668
|
@ -29,6 +29,7 @@ namespace d3d12 {
|
|||
|
||||
// Generated with `xb buildhlsl`.
|
||||
#include "xenia/gpu/d3d12/shaders/bin/edram_clear_32bpp_cs.h"
|
||||
#include "xenia/gpu/d3d12/shaders/bin/edram_clear_64bpp_cs.h"
|
||||
#include "xenia/gpu/d3d12/shaders/bin/edram_clear_depth_float_cs.h"
|
||||
#include "xenia/gpu/d3d12/shaders/bin/edram_load_color_32bpp_cs.h"
|
||||
#include "xenia/gpu/d3d12/shaders/bin/edram_load_color_64bpp_cs.h"
|
||||
|
@ -194,6 +195,16 @@ bool RenderTargetCache::Initialize() {
|
|||
return false;
|
||||
}
|
||||
edram_clear_32bpp_pipeline_->SetName(L"EDRAM Clear 32bpp");
|
||||
// Clear 64-bit color.
|
||||
edram_clear_64bpp_pipeline_ = ui::d3d12::util::CreateComputePipeline(
|
||||
device, edram_clear_64bpp_cs, sizeof(edram_clear_64bpp_cs),
|
||||
edram_clear_root_signature_);
|
||||
if (edram_clear_64bpp_pipeline_ == nullptr) {
|
||||
XELOGE("Failed to create the EDRAM 64bpp clear pipeline");
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
edram_clear_64bpp_pipeline_->SetName(L"EDRAM Clear 64bpp");
|
||||
// Clear float depth.
|
||||
edram_clear_depth_float_pipeline_ = ui::d3d12::util::CreateComputePipeline(
|
||||
device, edram_clear_depth_float_cs, sizeof(edram_clear_depth_float_cs),
|
||||
|
@ -272,6 +283,7 @@ void RenderTargetCache::Shutdown() {
|
|||
ui::d3d12::util::ReleaseAndNull(edram_tile_sample_64bpp_pipeline_);
|
||||
ui::d3d12::util::ReleaseAndNull(edram_tile_sample_32bpp_pipeline_);
|
||||
ui::d3d12::util::ReleaseAndNull(edram_clear_depth_float_pipeline_);
|
||||
ui::d3d12::util::ReleaseAndNull(edram_clear_64bpp_pipeline_);
|
||||
ui::d3d12::util::ReleaseAndNull(edram_clear_32bpp_pipeline_);
|
||||
for (uint32_t i = 0; i < uint32_t(EDRAMLoadStoreMode::kCount); ++i) {
|
||||
ui::d3d12::util::ReleaseAndNull(edram_store_pipelines_[i]);
|
||||
|
@ -1473,8 +1485,10 @@ bool RenderTargetCache::ResolveClear(uint32_t edram_base,
|
|||
}
|
||||
command_processor_->SetComputePipeline(edram_clear_depth_float_pipeline_);
|
||||
} else if (is_64bpp) {
|
||||
// TODO(Triang3l): 64bpp color clear.
|
||||
return false;
|
||||
// TODO(Triang3l): Check which 32-bit portion is in which register.
|
||||
root_constants.clear_color_high = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32;
|
||||
root_constants.clear_color_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LOW].u32;
|
||||
command_processor_->SetComputePipeline(edram_clear_64bpp_pipeline_);
|
||||
} else {
|
||||
Register reg =
|
||||
is_depth ? XE_GPU_REG_RB_DEPTH_CLEAR : XE_GPU_REG_RB_COLOR_CLEAR;
|
||||
|
|
|
@ -483,6 +483,7 @@ class RenderTargetCache {
|
|||
ID3D12PipelineState* edram_tile_sample_32bpp_pipeline_ = nullptr;
|
||||
ID3D12PipelineState* edram_tile_sample_64bpp_pipeline_ = nullptr;
|
||||
ID3D12PipelineState* edram_clear_32bpp_pipeline_ = nullptr;
|
||||
ID3D12PipelineState* edram_clear_64bpp_pipeline_ = nullptr;
|
||||
ID3D12PipelineState* edram_clear_depth_float_pipeline_ = nullptr;
|
||||
|
||||
// 48 MB heaps backing used render targets resources, created when needed.
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
#define XE_EDRAM_WRITE_ONLY
|
||||
#include "edram_load_store.hlsli"
|
||||
|
||||
// Load4/Store4 aren't needed here, but 80x16 threads is over the limit.
|
||||
[numthreads(40, 16, 1)]
|
||||
void main(uint3 xe_group_id : SV_GroupID,
|
||||
uint3 xe_group_thread_id : SV_GroupThreadID,
|
||||
uint3 xe_thread_id : SV_DispatchThreadID) {
|
||||
uint4 clear_rect;
|
||||
clear_rect.xz = xe_edram_clear_rect & 0xFFFFu;
|
||||
clear_rect.yw = xe_edram_clear_rect >> 16u;
|
||||
uint2 sample_index = xe_thread_id.xy;
|
||||
sample_index.x *= 2u;
|
||||
[branch] if (any(sample_index < clear_rect.xy) ||
|
||||
any(sample_index >= clear_rect.zw)) {
|
||||
return;
|
||||
}
|
||||
uint2 tile_sample_index = xe_group_thread_id.xy;
|
||||
tile_sample_index.x *= 2u;
|
||||
uint edram_offset = XeEDRAMOffset64bpp(xe_group_id.xy, tile_sample_index);
|
||||
xe_edram_load_store_dest.Store2(edram_offset, xe_edram_clear_color64);
|
||||
if (sample_index.x + 1u < clear_rect.z) {
|
||||
xe_edram_load_store_dest.Store2(edram_offset + 8u, xe_edram_clear_color64);
|
||||
}
|
||||
}
|
|
@ -14,7 +14,7 @@ void main(uint3 xe_thread_id : SV_DispatchThreadID) {
|
|||
uint4 texels = xe_texture_tile_source.Load4(
|
||||
xe_texture_tile_host_base + texel_index.y * xe_texture_tile_host_pitch +
|
||||
texel_index.x * 4u);
|
||||
texels = XeByteSwap(texels, xe_texture_tile_endian_guest_pitch & 7u);
|
||||
texels = XeByteSwap(texels, xe_texture_tile_endian_guest_pitch);
|
||||
uint4 texel_addresses = xe_texture_tile_guest_base + XeTextureTiledOffset2D(
|
||||
texel_index, xe_texture_tile_endian_guest_pitch >> 3u, 2u);
|
||||
xe_texture_tile_dest.Store(texel_addresses.x, texels.x);
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
#include "texture_tile.hlsli"
|
||||
|
||||
RWByteAddressBuffer xe_texture_tile_dest : register(u0);
|
||||
|
||||
[numthreads(8, 32, 1)]
|
||||
void main(uint3 xe_thread_id : SV_DispatchThreadID) {
|
||||
// 1 thread = 4 texels.
|
||||
uint2 texture_size = (xe_texture_tile_size >> uint2(0u, 16u)) & 0xFFFFu;
|
||||
uint2 texel_index = xe_thread_id.xy;
|
||||
texel_index.x <<= 2u;
|
||||
[branch] if (any(texel_index >= texture_size)) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint4 texel_addresses = xe_texture_tile_guest_base + XeTextureTiledOffset2D(
|
||||
texel_index, xe_texture_tile_endian_guest_pitch >> 3u, 3u);
|
||||
bool3 texels_inside = uint3(1u, 2u, 3u) + texel_index.x < texture_size.x;
|
||||
|
||||
uint texels_source_offset = xe_texture_tile_host_base + texel_index.y *
|
||||
xe_texture_tile_host_pitch + texel_index.x * 8u;
|
||||
uint4 texels = XeByteSwap64(
|
||||
xe_texture_tile_source.Load4(texels_source_offset),
|
||||
xe_texture_tile_endian_guest_pitch);
|
||||
xe_texture_tile_dest.Store2(texel_addresses.x, texels.xy);
|
||||
[branch] if (texels_inside.x) {
|
||||
xe_texture_tile_dest.Store2(texel_addresses.y, texels.zw);
|
||||
[branch] if (texels_inside.y) {
|
||||
texels = XeByteSwap64(
|
||||
xe_texture_tile_source.Load4(texels_source_offset + 16u),
|
||||
xe_texture_tile_endian_guest_pitch);
|
||||
xe_texture_tile_dest.Store2(texel_addresses.z, texels.xy);
|
||||
[branch] if (texels_inside.z) {
|
||||
xe_texture_tile_dest.Store2(texel_addresses.w, texels.zw);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -36,6 +36,7 @@ namespace d3d12 {
|
|||
#include "xenia/gpu/d3d12/shaders/bin/texture_load_depth_unorm_cs.h"
|
||||
#include "xenia/gpu/d3d12/shaders/bin/texture_load_dxt3a_cs.h"
|
||||
#include "xenia/gpu/d3d12/shaders/bin/texture_tile_32bpp_cs.h"
|
||||
#include "xenia/gpu/d3d12/shaders/bin/texture_tile_64bpp_cs.h"
|
||||
|
||||
const TextureCache::HostFormat TextureCache::host_formats_[64] = {
|
||||
// k_1_REVERSE
|
||||
|
@ -81,7 +82,7 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = {
|
|||
// k_DXT4_5
|
||||
{DXGI_FORMAT_BC3_UNORM, LoadMode::k128bpb, TileMode::kUnknown},
|
||||
// k_16_16_16_16_EDRAM
|
||||
{DXGI_FORMAT_R16G16B16A16_UNORM, LoadMode::k64bpb, TileMode::kUnknown},
|
||||
{DXGI_FORMAT_R16G16B16A16_UNORM, LoadMode::k64bpb, TileMode::k64bpp},
|
||||
// R32_FLOAT for depth because shaders would require an additional SRV to
|
||||
// sample stencil, which we don't provide.
|
||||
// k_24_8
|
||||
|
@ -93,19 +94,19 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = {
|
|||
// k_16_16
|
||||
{DXGI_FORMAT_R16G16_UNORM, LoadMode::k32bpb, TileMode::k32bpp},
|
||||
// k_16_16_16_16
|
||||
{DXGI_FORMAT_R16G16B16A16_UNORM, LoadMode::k64bpb, TileMode::kUnknown},
|
||||
{DXGI_FORMAT_R16G16B16A16_UNORM, LoadMode::k64bpb, TileMode::k64bpp},
|
||||
// k_16_EXPAND
|
||||
{DXGI_FORMAT_R16_FLOAT, LoadMode::k16bpb, TileMode::kUnknown},
|
||||
// k_16_16_EXPAND
|
||||
{DXGI_FORMAT_R16G16_FLOAT, LoadMode::k32bpb, TileMode::k32bpp},
|
||||
// k_16_16_16_16_EXPAND
|
||||
{DXGI_FORMAT_R16G16B16A16_FLOAT, LoadMode::k64bpb, TileMode::kUnknown},
|
||||
{DXGI_FORMAT_R16G16B16A16_FLOAT, LoadMode::k64bpb, TileMode::k64bpp},
|
||||
// k_16_FLOAT
|
||||
{DXGI_FORMAT_R16_FLOAT, LoadMode::k16bpb, TileMode::kUnknown},
|
||||
// k_16_16_FLOAT
|
||||
{DXGI_FORMAT_R16G16_FLOAT, LoadMode::k32bpb, TileMode::k32bpp},
|
||||
// k_16_16_16_16_FLOAT
|
||||
{DXGI_FORMAT_R16G16B16A16_FLOAT, LoadMode::k64bpb, TileMode::kUnknown},
|
||||
{DXGI_FORMAT_R16G16B16A16_FLOAT, LoadMode::k64bpb, TileMode::k64bpp},
|
||||
// k_32
|
||||
{DXGI_FORMAT_UNKNOWN, LoadMode::kUnknown, TileMode::kUnknown},
|
||||
// k_32_32
|
||||
|
@ -115,7 +116,7 @@ const TextureCache::HostFormat TextureCache::host_formats_[64] = {
|
|||
// k_32_FLOAT
|
||||
{DXGI_FORMAT_R32_FLOAT, LoadMode::k32bpb, TileMode::k32bpp},
|
||||
// k_32_32_FLOAT
|
||||
{DXGI_FORMAT_R32G32_FLOAT, LoadMode::k64bpb, TileMode::kUnknown},
|
||||
{DXGI_FORMAT_R32G32_FLOAT, LoadMode::k64bpb, TileMode::k64bpp},
|
||||
// k_32_32_32_32_FLOAT
|
||||
{DXGI_FORMAT_R32G32B32A32_FLOAT, LoadMode::k128bpb, TileMode::kUnknown},
|
||||
// k_32_AS_8
|
||||
|
@ -187,6 +188,7 @@ const TextureCache::LoadModeInfo TextureCache::load_mode_info_[] = {
|
|||
|
||||
const TextureCache::TileModeInfo TextureCache::tile_mode_info_[] = {
|
||||
{texture_tile_32bpp_cs, sizeof(texture_tile_32bpp_cs)},
|
||||
{texture_tile_64bpp_cs, sizeof(texture_tile_64bpp_cs)},
|
||||
};
|
||||
|
||||
TextureCache::TextureCache(D3D12CommandProcessor* command_processor,
|
||||
|
|
|
@ -118,6 +118,7 @@ class TextureCache {
|
|||
// formats that can be resolved to.
|
||||
enum class TileMode {
|
||||
k32bpp,
|
||||
k64bpp,
|
||||
|
||||
kCount,
|
||||
|
||||
|
|
Loading…
Reference in New Issue