[D3D12] 32bpp raw resolve shader
This commit is contained in:
parent
2d8527c9df
commit
d204e9ba74
|
@ -37,6 +37,7 @@ namespace d3d12 {
|
|||
#include "xenia/gpu/d3d12/shaders/bin/edram_store_color_7e3_cs.h"
|
||||
#include "xenia/gpu/d3d12/shaders/bin/edram_store_depth_float_cs.h"
|
||||
#include "xenia/gpu/d3d12/shaders/bin/edram_store_depth_unorm_cs.h"
|
||||
#include "xenia/gpu/d3d12/shaders/bin/edram_tile_sample_32bpp_cs.h"
|
||||
|
||||
const RenderTargetCache::EDRAMLoadStoreModeInfo
|
||||
RenderTargetCache::edram_load_store_mode_info_[size_t(
|
||||
|
@ -169,7 +170,7 @@ bool RenderTargetCache::Initialize() {
|
|||
pipeline_desc.CS.BytecodeLength = mode_info.load_shader_size;
|
||||
if (FAILED(device->CreateComputePipelineState(
|
||||
&pipeline_desc, IID_PPV_ARGS(&edram_load_pipelines_[i])))) {
|
||||
XELOGE("Failed to create EDRAM load pipeline for mode %u", i);
|
||||
XELOGE("Failed to create the EDRAM load pipeline for mode %u", i);
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
|
@ -179,12 +180,22 @@ bool RenderTargetCache::Initialize() {
|
|||
pipeline_desc.CS.BytecodeLength = mode_info.store_shader_size;
|
||||
if (FAILED(device->CreateComputePipelineState(
|
||||
&pipeline_desc, IID_PPV_ARGS(&edram_store_pipelines_[i])))) {
|
||||
XELOGE("Failed to create EDRAM store pipeline for mode %u", i);
|
||||
XELOGE("Failed to create the EDRAM store pipeline for mode %u", i);
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
edram_store_pipelines_[i]->SetName(mode_info.store_pipeline_name);
|
||||
}
|
||||
// Tile single sample into a texture - 32 bits per pixel.
|
||||
pipeline_desc.CS.pShaderBytecode = edram_tile_sample_32bpp_cs;
|
||||
pipeline_desc.CS.BytecodeLength = sizeof(edram_tile_sample_32bpp_cs);
|
||||
if (FAILED(device->CreateComputePipelineState(
|
||||
&pipeline_desc, IID_PPV_ARGS(&edram_tile_sample_32bpp_pipeline_)))) {
|
||||
XELOGE("Failed to create the 32bpp EDRAM raw resolve pipeline");
|
||||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
edram_tile_sample_32bpp_pipeline_->SetName(L"EDRAM Raw Resolve 32bpp");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -192,6 +203,10 @@ bool RenderTargetCache::Initialize() {
|
|||
void RenderTargetCache::Shutdown() {
|
||||
ClearCache();
|
||||
|
||||
if (edram_tile_sample_32bpp_pipeline_ != nullptr) {
|
||||
edram_tile_sample_32bpp_pipeline_->Release();
|
||||
edram_tile_sample_32bpp_pipeline_ = nullptr;
|
||||
}
|
||||
for (uint32_t i = 0; i < uint32_t(EDRAMLoadStoreMode::kCount); ++i) {
|
||||
if (edram_load_pipelines_[i] != nullptr) {
|
||||
edram_load_pipelines_[i]->Release();
|
||||
|
|
|
@ -404,6 +404,7 @@ class RenderTargetCache {
|
|||
edram_load_pipelines_[size_t(EDRAMLoadStoreMode::kCount)] = {};
|
||||
ID3D12PipelineState*
|
||||
edram_store_pipelines_[size_t(EDRAMLoadStoreMode::kCount)] = {};
|
||||
ID3D12PipelineState* edram_tile_sample_32bpp_pipeline_ = nullptr;
|
||||
|
||||
// 32 MB heaps backing used render targets resources, created when needed.
|
||||
// 24 MB proved to be not enough to store a single render target occupying the
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
#include "edram_load_store.hlsli"
|
||||
#include "texture_address.hlsli"
|
||||
|
||||
[numthreads(20, 16, 1)]
|
||||
void main(uint3 xe_group_id : SV_GroupID,
|
||||
uint3 xe_group_thread_id : SV_GroupThreadID,
|
||||
uint3 xe_thread_id : SV_DispatchThreadID) {
|
||||
// Check if not outside of the destination texture completely.
|
||||
uint4 copy_rect =
|
||||
(xe_edram_tile_sample_rect.xyxy >> uint4(0u, 0u, 16u, 16u)) & 0xFFFFu;
|
||||
uint2 texel_index = xe_thread_id.xy;
|
||||
texel_index.x *= 4u;
|
||||
[branch] if (any(texel_index < copy_rect.xy) ||
|
||||
any(texel_index >= copy_rect.zw)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Get the samples from the EDRAM buffer.
|
||||
// XY - log2(pixel size), ZW - selected sample offset.
|
||||
uint4 sample_info =
|
||||
(xe_edram_tile_sample_dest_info.xxxx >> uint4(15u, 14u, 17u, 16u)) & 1u;
|
||||
uint edram_offset = XeEDRAMOffset(
|
||||
xe_group_id.xy << sample_info.xy,
|
||||
xe_thread_id.xy << (sample_info.xy + uint2(2u, 0u)) + sample_info.zw);
|
||||
// At 1x and 2x, this contains samples of 4 pixels. At 4x, this contains
|
||||
// samples of 2, need to load 2 more.
|
||||
uint4 pixels = xe_edram_load_store_source.Load4(edram_offset);
|
||||
[branch] if (sample_info.x != 0u) {
|
||||
pixels.xy = pixels.xz;
|
||||
pixels.zw = xe_edram_load_store_source.Load3(edram_offset + 16u).xz;
|
||||
}
|
||||
|
||||
uint red_blue_swap = xe_edram_tile_sample_dest_info >> 20u;
|
||||
if (red_blue_swap != 0u) {
|
||||
uint red_mask = (1u << (red_blue_swap & 31u)) - 1u;
|
||||
// No need to be ready for a long shift Barney, it's just 16 or 20.
|
||||
uint blue_shift = red_blue_swap >> 5u;
|
||||
uint blue_mask = red_mask << blue_shift;
|
||||
pixels = (pixels & ~(red_mask | blue_mask)) |
|
||||
((pixels & red_mask) << blue_shift) |
|
||||
((pixels >> blue_shift) & red_mask);
|
||||
}
|
||||
|
||||
// Tile the pixels to the shared memory.
|
||||
uint4 texel_addresses =
|
||||
xe_edram_tile_sample_dest_base +
|
||||
XeTextureTiledOffset2D(texel_index - copy_rect.xy,
|
||||
xe_edram_tile_sample_dest_info & 16383u, 2u);
|
||||
xe_edram_load_store_dest.Store(texel_addresses.x, pixels.x);
|
||||
[branch] if (texel_index.x + 1u < copy_rect.z) {
|
||||
xe_edram_load_store_dest.Store(texel_addresses.y, pixels.y);
|
||||
[branch] if (texel_index.x + 2u < copy_rect.z) {
|
||||
xe_edram_load_store_dest.Store(texel_addresses.z, pixels.z);
|
||||
[branch] if (texel_index.x + 3u < copy_rect.z) {
|
||||
xe_edram_load_store_dest.Store(texel_addresses.w, pixels.w);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue