[D3D12] Add red/blue swap to EDRAM loads and describe resolve modes

This commit is contained in:
Triang3l 2018-08-21 22:04:06 +03:00
parent d628e92ff5
commit da1be211eb
6 changed files with 105 additions and 9 deletions

View File

@ -19,6 +19,7 @@
#include "xenia/base/memory.h" #include "xenia/base/memory.h"
#include "xenia/base/profiling.h" #include "xenia/base/profiling.h"
#include "xenia/gpu/d3d12/d3d12_command_processor.h" #include "xenia/gpu/d3d12/d3d12_command_processor.h"
#include "xenia/gpu/texture_info.h"
namespace xe { namespace xe {
namespace gpu { namespace gpu {
@ -876,7 +877,7 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory, Memory* memory) {
bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory, bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
uint32_t edram_base, uint32_t surface_pitch, uint32_t edram_base, uint32_t surface_pitch,
MsaaSamples msaa_samples, bool is_depth, MsaaSamples msaa_samples, bool is_depth,
uint32_t format, uint32_t src_format,
const D3D12_RECT& src_rect) { const D3D12_RECT& src_rect) {
auto& regs = *register_file_; auto& regs = *register_file_;
@ -909,9 +910,77 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
uint32_t src_height = uint32_t src_height =
std::min(uint32_t(src_rect.bottom - src_rect.top), dest_height); std::min(uint32_t(src_rect.bottom - src_rect.top), dest_height);
XELOGGPU("Copying samples %u to 0x%.8X (%ux%u), info 0x%.8X", // Get format info.
(rb_copy_control >> 4) & 0x7, regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32, uint32_t dest_info = regs[XE_GPU_REG_RB_COPY_DEST_INFO].u32;
dest_pitch, dest_height, regs[XE_GPU_REG_RB_COPY_DEST_INFO].u32); TextureFormat src_texture_format;
if (is_depth) {
src_texture_format =
DepthRenderTargetToTextureFormat(DepthRenderTargetFormat(src_format));
} else {
src_texture_format =
ColorRenderTargetToTextureFormat(ColorRenderTargetFormat(src_format));
}
assert_true(src_texture_format != TextureFormat::kUnknown);
src_texture_format = GetBaseFormat(src_texture_format);
TextureFormat dest_format =
GetBaseFormat(TextureFormat((dest_info >> 7) & 0x3F));
// Get the destination location.
uint32_t dest_address = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32 & 0x1FFFFFFF;
if (dest_address & 0x3) {
assert_always();
// Not 4-aligning may break UAV access significantly, let's hope games don't
// resolve to 8bpp or 16bpp textures at very odd locations.
return false;
}
int32_t dest_exp_bias = int32_t((dest_info >> 16) << 26) >> 26;
uint32_t dest_swap = (dest_info >> 24) & 0x1;
// TODO(Triang3l): Copy to array slices.
// TODO(Triang3l): Investigate what copy_dest_number is.
XELOGGPU(
"Copying samples %u to 0x%.8X (%ux%u), destination format %s, "
"exponent bias %d, red and blue %sswapped",
(rb_copy_control >> 4) & 0x7, dest_address, dest_pitch, dest_height,
FormatInfo::Get(dest_format)->name, dest_exp_bias,
dest_swap ? "" : "not ");
// There are 3 paths for resolving in this function - they don't necessarily
// have to map directly to kRaw and kConvert CopyCommands.
// - Raw color - when the source is single-sampled and has the same format as
// the destination, and there's no need to apply exponent bias. A regular
// EDRAM load is done to a buffer, and the buffer is then tiled to the
// shared memory. Because swapping red and blue is very common, this path
// supports swapping.
// - Depth to depth - when the source and the destination formats are
// renderable depth-stencil ones (D24S8 or D24FS8). A single sample is
// taken from the EDRAM buffer, converted between D24 and D24F if needed,
// and tiled directly to the shared memory buffer.
// - Conversion - when a simple copy is not enough. The EDRAM region is loaded
// to a render target resource, which is then used as a texture in a shader
// performing the resolve (by sampling the texture on or between pixels with
// bilinear filtering), applying exponent bias and swapping red and blue in
// a format-agnostic way, then the resulting color is written to a temporary
// RTV of the destination format. This also works for converting depth to
// 16-bit or 32-bit.
if (dest_format == TextureFormat::k_24_8 ||
dest_format == TextureFormat::k_24_8_FLOAT) {
// Depth to depth.
XELOGGPU("Resolving to a depth texture");
if (!is_depth) {
return false;
}
// TODO(Triang3l): Depth to depth.
return false;
} else if (src_texture_format == dest_format &&
msaa_samples == MsaaSamples::k1X && dest_exp_bias == 0) {
XELOGGPU("Resolving a single-sampled surface without conversion");
// TODO(Triang3l): Raw resolve.
return false;
} else {
XELOGGPU("Resolving with a pixel shader");
// TODO(Triang3l): Conversion.
return false;
}
return true; return true;
} }
@ -1290,10 +1359,12 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() {
location_dest.PlacedFootprint = render_target->footprints[1]; location_dest.PlacedFootprint = render_target->footprints[1];
command_list->CopyTextureRegion(&location_dest, 0, 0, 0, &location_source, command_list->CopyTextureRegion(&location_dest, 0, 0, 0, &location_source,
nullptr); nullptr);
root_constants.rt_stencil_offset = root_constants.rt_stencil_offset_or_swap_red_blue =
uint32_t(location_dest.PlacedFootprint.Offset); uint32_t(location_dest.PlacedFootprint.Offset);
root_constants.rt_stencil_pitch = root_constants.rt_stencil_pitch =
location_dest.PlacedFootprint.Footprint.RowPitch; location_dest.PlacedFootprint.Footprint.RowPitch;
} else {
root_constants.rt_stencil_offset_or_swap_red_blue = 0;
} }
// Transition the copy buffer to SRV. // Transition the copy buffer to SRV.
@ -1458,10 +1529,12 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM(
root_constants.rt_color_depth_pitch = root_constants.rt_color_depth_pitch =
render_target->footprints[0].Footprint.RowPitch; render_target->footprints[0].Footprint.RowPitch;
if (render_target->key.is_depth) { if (render_target->key.is_depth) {
root_constants.rt_stencil_offset = root_constants.rt_stencil_offset_or_swap_red_blue =
uint32_t(render_target->footprints[1].Offset); uint32_t(render_target->footprints[1].Offset);
root_constants.rt_stencil_pitch = root_constants.rt_stencil_pitch =
render_target->footprints[1].Footprint.RowPitch; render_target->footprints[1].Footprint.RowPitch;
} else {
root_constants.rt_stencil_offset_or_swap_red_blue = 0;
} }
// Validate the height in case the resolve is somehow too large (shouldn't // Validate the height in case the resolve is somehow too large (shouldn't

View File

@ -338,7 +338,8 @@ class RenderTargetCache {
// Performs the copying part of a resolve. // Performs the copying part of a resolve.
bool ResolveCopy(SharedMemory* shared_memory, uint32_t edram_base, bool ResolveCopy(SharedMemory* shared_memory, uint32_t edram_base,
uint32_t surface_pitch, MsaaSamples msaa_samples, uint32_t surface_pitch, MsaaSamples msaa_samples,
bool is_depth, uint32_t format, const D3D12_RECT& src_rect); bool is_depth, uint32_t src_format,
const D3D12_RECT& src_rect);
D3D12CommandProcessor* command_processor_; D3D12CommandProcessor* command_processor_;
RegisterFile* register_file_; RegisterFile* register_file_;
@ -354,7 +355,7 @@ class RenderTargetCache {
uint32_t base_tiles; uint32_t base_tiles;
uint32_t pitch_tiles; uint32_t pitch_tiles;
uint32_t rt_color_depth_pitch; uint32_t rt_color_depth_pitch;
uint32_t rt_stencil_offset; uint32_t rt_stencil_offset_or_swap_red_blue;
uint32_t rt_stencil_pitch; uint32_t rt_stencil_pitch;
}; };
// EDRAM buffer load/store pipelines. // EDRAM buffer load/store pipelines.

View File

@ -8,6 +8,15 @@ void main(uint3 xe_group_id : SV_GroupID,
tile_dword_index.x *= 4u; tile_dword_index.x *= 4u;
uint4 pixels = xe_edram_load_store_source.Load4( uint4 pixels = xe_edram_load_store_source.Load4(
XeEDRAMOffset(xe_group_id.xy, tile_dword_index)); XeEDRAMOffset(xe_group_id.xy, tile_dword_index));
if (xe_edram_swap_red_blue != 0u) {
// Not a very long shift, just 16 or 20.
uint blue_shift = xe_edram_swap_red_blue >> 16u;
uint red_mask = xe_edram_swap_red_blue & 0xFFFFu;
uint blue_mask = red_mask << blue_shift;
pixels = (pixels & ~(red_mask | blue_mask)) |
((pixels & red_mask) << blue_shift) |
((pixels >> blue_shift) & red_mask);
}
uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch + uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch +
xe_thread_id.x * 16u; xe_thread_id.x * 16u;
xe_edram_load_store_dest.Store4(rt_offset, pixels); xe_edram_load_store_dest.Store4(rt_offset, pixels);

View File

@ -13,6 +13,11 @@ void main(uint3 xe_group_id : SV_GroupID,
} }
uint4 pixels = xe_edram_load_store_source.Load4( uint4 pixels = xe_edram_load_store_source.Load4(
XeEDRAMOffset(xe_group_id.xy, tile_dword_index)); XeEDRAMOffset(xe_group_id.xy, tile_dword_index));
if (xe_edram_swap_red_blue != 0u) {
// The only 64-bit formats with a blue component are 16_16_16_16 and
// 16_16_16_16_FLOAT.
pixels = (pixels.yxwz & 0xFFFFu) | (pixels & 0xFFFF0000u);
}
uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch + uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch +
xe_thread_id.x * 16u; xe_thread_id.x * 16u;
xe_edram_load_store_dest.Store4(rt_offset, pixels); xe_edram_load_store_dest.Store4(rt_offset, pixels);

View File

@ -14,6 +14,10 @@ void main(uint3 xe_group_id : SV_GroupID,
uint4 pixels_f16u32_packed = uint4 pixels_f16u32_packed =
uint4(pixel_0_f16u32.xz, pixel_1_f16u32.xz) | uint4(pixel_0_f16u32.xz, pixel_1_f16u32.xz) |
(uint4(pixel_0_f16u32.yw, pixel_1_f16u32.yw) << 16u); (uint4(pixel_0_f16u32.yw, pixel_1_f16u32.yw) << 16u);
if (xe_edram_swap_red_blue != 0u) {
pixels_f16u32_packed = (pixels_f16u32_packed.yxwz & 0xFFFFu) |
(pixels_f16u32_packed & 0xFFFF0000u);
}
uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch + uint rt_offset = xe_thread_id.y * xe_edram_rt_color_depth_pitch +
xe_thread_id.x * 16u; xe_thread_id.x * 16u;
xe_edram_load_store_dest.Store4(rt_offset, pixels_f16u32_packed); xe_edram_load_store_dest.Store4(rt_offset, pixels_f16u32_packed);

View File

@ -5,9 +5,13 @@ cbuffer XeEDRAMLoadStoreConstants : register(b0) {
uint xe_edram_base_tiles; uint xe_edram_base_tiles;
uint xe_edram_pitch_tiles; uint xe_edram_pitch_tiles;
uint xe_edram_rt_color_depth_pitch; uint xe_edram_rt_color_depth_pitch;
uint xe_edram_rt_stencil_offset; uint xe_edram_rt_stencil_offset_or_swap_red_blue;
uint xe_edram_rt_stencil_pitch; uint xe_edram_rt_stencil_pitch;
}; };
#define xe_edram_rt_stencil_offset xe_edram_rt_stencil_offset_or_swap_red_blue
// For loads only. How exactly it's handled depends on the specific load shader,
// but 0 always means red and blue shouldn't be swapped.
#define xe_edram_swap_red_blue xe_edram_rt_stencil_offset_or_swap_red_blue
ByteAddressBuffer xe_edram_load_store_source : register(t0); ByteAddressBuffer xe_edram_load_store_source : register(t0);
RWByteAddressBuffer xe_edram_load_store_dest : register(u0); RWByteAddressBuffer xe_edram_load_store_dest : register(u0);