[D3D12] 32bpp raw resolve shader

This commit is contained in:
Triang3l 2018-08-22 21:15:02 +03:00
parent 2d8527c9df
commit d204e9ba74
3 changed files with 77 additions and 2 deletions

View File

@ -37,6 +37,7 @@ namespace d3d12 {
#include "xenia/gpu/d3d12/shaders/bin/edram_store_color_7e3_cs.h"
#include "xenia/gpu/d3d12/shaders/bin/edram_store_depth_float_cs.h"
#include "xenia/gpu/d3d12/shaders/bin/edram_store_depth_unorm_cs.h"
#include "xenia/gpu/d3d12/shaders/bin/edram_tile_sample_32bpp_cs.h"
const RenderTargetCache::EDRAMLoadStoreModeInfo
RenderTargetCache::edram_load_store_mode_info_[size_t(
@ -169,7 +170,7 @@ bool RenderTargetCache::Initialize() {
pipeline_desc.CS.BytecodeLength = mode_info.load_shader_size;
if (FAILED(device->CreateComputePipelineState(
&pipeline_desc, IID_PPV_ARGS(&edram_load_pipelines_[i])))) {
XELOGE("Failed to create EDRAM load pipeline for mode %u", i);
XELOGE("Failed to create the EDRAM load pipeline for mode %u", i);
Shutdown();
return false;
}
@ -179,12 +180,22 @@ bool RenderTargetCache::Initialize() {
pipeline_desc.CS.BytecodeLength = mode_info.store_shader_size;
if (FAILED(device->CreateComputePipelineState(
&pipeline_desc, IID_PPV_ARGS(&edram_store_pipelines_[i])))) {
XELOGE("Failed to create EDRAM store pipeline for mode %u", i);
XELOGE("Failed to create the EDRAM store pipeline for mode %u", i);
Shutdown();
return false;
}
edram_store_pipelines_[i]->SetName(mode_info.store_pipeline_name);
}
// Tile single sample into a texture - 32 bits per pixel.
pipeline_desc.CS.pShaderBytecode = edram_tile_sample_32bpp_cs;
pipeline_desc.CS.BytecodeLength = sizeof(edram_tile_sample_32bpp_cs);
if (FAILED(device->CreateComputePipelineState(
&pipeline_desc, IID_PPV_ARGS(&edram_tile_sample_32bpp_pipeline_)))) {
XELOGE("Failed to create the 32bpp EDRAM raw resolve pipeline");
Shutdown();
return false;
}
edram_tile_sample_32bpp_pipeline_->SetName(L"EDRAM Raw Resolve 32bpp");
return true;
}
@ -192,6 +203,10 @@ bool RenderTargetCache::Initialize() {
void RenderTargetCache::Shutdown() {
ClearCache();
if (edram_tile_sample_32bpp_pipeline_ != nullptr) {
edram_tile_sample_32bpp_pipeline_->Release();
edram_tile_sample_32bpp_pipeline_ = nullptr;
}
for (uint32_t i = 0; i < uint32_t(EDRAMLoadStoreMode::kCount); ++i) {
if (edram_load_pipelines_[i] != nullptr) {
edram_load_pipelines_[i]->Release();

View File

@ -404,6 +404,7 @@ class RenderTargetCache {
edram_load_pipelines_[size_t(EDRAMLoadStoreMode::kCount)] = {};
ID3D12PipelineState*
edram_store_pipelines_[size_t(EDRAMLoadStoreMode::kCount)] = {};
ID3D12PipelineState* edram_tile_sample_32bpp_pipeline_ = nullptr;
// 32 MB heaps backing used render targets resources, created when needed.
// 24 MB proved to be not enough to store a single render target occupying the

View File

@ -0,0 +1,59 @@
#include "edram_load_store.hlsli"
#include "texture_address.hlsli"
[numthreads(20, 16, 1)]
void main(uint3 xe_group_id : SV_GroupID,
uint3 xe_group_thread_id : SV_GroupThreadID,
uint3 xe_thread_id : SV_DispatchThreadID) {
// Check if not outside of the destination texture completely.
uint4 copy_rect =
(xe_edram_tile_sample_rect.xyxy >> uint4(0u, 0u, 16u, 16u)) & 0xFFFFu;
uint2 texel_index = xe_thread_id.xy;
texel_index.x *= 4u;
[branch] if (any(texel_index < copy_rect.xy) ||
any(texel_index >= copy_rect.zw)) {
return;
}
// Get the samples from the EDRAM buffer.
// XY - log2(pixel size), ZW - selected sample offset.
uint4 sample_info =
(xe_edram_tile_sample_dest_info.xxxx >> uint4(15u, 14u, 17u, 16u)) & 1u;
uint edram_offset = XeEDRAMOffset(
xe_group_id.xy << sample_info.xy,
xe_thread_id.xy << (sample_info.xy + uint2(2u, 0u)) + sample_info.zw);
// At 1x and 2x, this contains samples of 4 pixels. At 4x, this contains
// samples of 2, need to load 2 more.
uint4 pixels = xe_edram_load_store_source.Load4(edram_offset);
[branch] if (sample_info.x != 0u) {
pixels.xy = pixels.xz;
pixels.zw = xe_edram_load_store_source.Load3(edram_offset + 16u).xz;
}
uint red_blue_swap = xe_edram_tile_sample_dest_info >> 20u;
if (red_blue_swap != 0u) {
uint red_mask = (1u << (red_blue_swap & 31u)) - 1u;
// No need to be ready for a long shift Barney, it's just 16 or 20.
uint blue_shift = red_blue_swap >> 5u;
uint blue_mask = red_mask << blue_shift;
pixels = (pixels & ~(red_mask | blue_mask)) |
((pixels & red_mask) << blue_shift) |
((pixels >> blue_shift) & red_mask);
}
// Tile the pixels to the shared memory.
uint4 texel_addresses =
xe_edram_tile_sample_dest_base +
XeTextureTiledOffset2D(texel_index - copy_rect.xy,
xe_edram_tile_sample_dest_info & 16383u, 2u);
xe_edram_load_store_dest.Store(texel_addresses.x, pixels.x);
[branch] if (texel_index.x + 1u < copy_rect.z) {
xe_edram_load_store_dest.Store(texel_addresses.y, pixels.y);
[branch] if (texel_index.x + 2u < copy_rect.z) {
xe_edram_load_store_dest.Store(texel_addresses.z, pixels.z);
[branch] if (texel_index.x + 3u < copy_rect.z) {
xe_edram_load_store_dest.Store(texel_addresses.w, pixels.w);
}
}
}
}