[D3D12] 32bpp and 20e4 clearing in resolves
This commit is contained in:
parent
66510b2e6f
commit
50470d67a8
|
@ -27,6 +27,8 @@ namespace gpu {
|
||||||
namespace d3d12 {
|
namespace d3d12 {
|
||||||
|
|
||||||
// Generated with `xb buildhlsl`.
|
// Generated with `xb buildhlsl`.
|
||||||
|
#include "xenia/gpu/d3d12/shaders/bin/edram_clear_32bpp_cs.h"
|
||||||
|
#include "xenia/gpu/d3d12/shaders/bin/edram_clear_depth_float_cs.h"
|
||||||
#include "xenia/gpu/d3d12/shaders/bin/edram_load_color_32bpp_cs.h"
|
#include "xenia/gpu/d3d12/shaders/bin/edram_load_color_32bpp_cs.h"
|
||||||
#include "xenia/gpu/d3d12/shaders/bin/edram_load_color_64bpp_cs.h"
|
#include "xenia/gpu/d3d12/shaders/bin/edram_load_color_64bpp_cs.h"
|
||||||
#include "xenia/gpu/d3d12/shaders/bin/edram_load_color_7e3_cs.h"
|
#include "xenia/gpu/d3d12/shaders/bin/edram_load_color_7e3_cs.h"
|
||||||
|
@ -150,6 +152,7 @@ bool RenderTargetCache::Initialize() {
|
||||||
}
|
}
|
||||||
if (load_store_root_error_blob != nullptr) {
|
if (load_store_root_error_blob != nullptr) {
|
||||||
load_store_root_error_blob->Release();
|
load_store_root_error_blob->Release();
|
||||||
|
load_store_root_error_blob = nullptr;
|
||||||
}
|
}
|
||||||
if (FAILED(device->CreateRootSignature(
|
if (FAILED(device->CreateRootSignature(
|
||||||
0, load_store_root_blob->GetBufferPointer(),
|
0, load_store_root_blob->GetBufferPointer(),
|
||||||
|
@ -162,6 +165,36 @@ bool RenderTargetCache::Initialize() {
|
||||||
}
|
}
|
||||||
load_store_root_blob->Release();
|
load_store_root_blob->Release();
|
||||||
|
|
||||||
|
// Create the clear root signature (the same, but with the UAV only).
|
||||||
|
load_store_root_parameters[1].DescriptorTable.NumDescriptorRanges = 1;
|
||||||
|
++load_store_root_parameters[1].DescriptorTable.pDescriptorRanges;
|
||||||
|
if (FAILED(D3D12SerializeRootSignature(
|
||||||
|
&load_store_root_desc, D3D_ROOT_SIGNATURE_VERSION_1,
|
||||||
|
&load_store_root_blob, &load_store_root_error_blob))) {
|
||||||
|
XELOGE("Failed to serialize the EDRAM buffer clear root signature");
|
||||||
|
if (load_store_root_error_blob != nullptr) {
|
||||||
|
XELOGE("%s", reinterpret_cast<const char*>(
|
||||||
|
load_store_root_error_blob->GetBufferPointer()));
|
||||||
|
load_store_root_error_blob->Release();
|
||||||
|
}
|
||||||
|
Shutdown();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (load_store_root_error_blob != nullptr) {
|
||||||
|
load_store_root_error_blob->Release();
|
||||||
|
load_store_root_error_blob = nullptr;
|
||||||
|
}
|
||||||
|
if (FAILED(device->CreateRootSignature(
|
||||||
|
0, load_store_root_blob->GetBufferPointer(),
|
||||||
|
load_store_root_blob->GetBufferSize(),
|
||||||
|
IID_PPV_ARGS(&edram_clear_root_signature_)))) {
|
||||||
|
XELOGE("Failed to create the EDRAM buffer clear root signature");
|
||||||
|
load_store_root_blob->Release();
|
||||||
|
Shutdown();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
load_store_root_blob->Release();
|
||||||
|
|
||||||
// Create the load/store pipelines.
|
// Create the load/store pipelines.
|
||||||
D3D12_COMPUTE_PIPELINE_STATE_DESC pipeline_desc;
|
D3D12_COMPUTE_PIPELINE_STATE_DESC pipeline_desc;
|
||||||
pipeline_desc.pRootSignature = edram_load_store_root_signature_;
|
pipeline_desc.pRootSignature = edram_load_store_root_signature_;
|
||||||
|
@ -203,6 +236,29 @@ bool RenderTargetCache::Initialize() {
|
||||||
}
|
}
|
||||||
edram_tile_sample_32bpp_pipeline_->SetName(L"EDRAM Raw Resolve 32bpp");
|
edram_tile_sample_32bpp_pipeline_->SetName(L"EDRAM Raw Resolve 32bpp");
|
||||||
|
|
||||||
|
// Create the clear pipelines.
|
||||||
|
pipeline_desc.pRootSignature = edram_clear_root_signature_;
|
||||||
|
// 32-bit color or unorm depth.
|
||||||
|
pipeline_desc.CS.pShaderBytecode = edram_clear_32bpp_cs;
|
||||||
|
pipeline_desc.CS.BytecodeLength = sizeof(edram_clear_32bpp_cs);
|
||||||
|
if (FAILED(device->CreateComputePipelineState(
|
||||||
|
&pipeline_desc, IID_PPV_ARGS(&edram_clear_32bpp_pipeline_)))) {
|
||||||
|
XELOGE("Failed to create the EDRAM 32bpp clear pipeline");
|
||||||
|
Shutdown();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
edram_clear_32bpp_pipeline_->SetName(L"EDRAM Clear 32bpp");
|
||||||
|
// Float depth.
|
||||||
|
pipeline_desc.CS.pShaderBytecode = edram_clear_depth_float_cs;
|
||||||
|
pipeline_desc.CS.BytecodeLength = sizeof(edram_clear_depth_float_cs);
|
||||||
|
if (FAILED(device->CreateComputePipelineState(
|
||||||
|
&pipeline_desc, IID_PPV_ARGS(&edram_clear_depth_float_pipeline_)))) {
|
||||||
|
XELOGE("Failed to create the EDRAM float depth clear pipeline");
|
||||||
|
Shutdown();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
edram_clear_depth_float_pipeline_->SetName(L"EDRAM Clear Float Depth");
|
||||||
|
|
||||||
// Create the converting resolve root signature.
|
// Create the converting resolve root signature.
|
||||||
D3D12_ROOT_PARAMETER resolve_root_parameters[2];
|
D3D12_ROOT_PARAMETER resolve_root_parameters[2];
|
||||||
// Parameter 0 is constants.
|
// Parameter 0 is constants.
|
||||||
|
@ -295,6 +351,14 @@ void RenderTargetCache::Shutdown() {
|
||||||
edram_tile_sample_32bpp_pipeline_->Release();
|
edram_tile_sample_32bpp_pipeline_->Release();
|
||||||
edram_tile_sample_32bpp_pipeline_ = nullptr;
|
edram_tile_sample_32bpp_pipeline_ = nullptr;
|
||||||
}
|
}
|
||||||
|
if (edram_clear_depth_float_pipeline_ != nullptr) {
|
||||||
|
edram_clear_depth_float_pipeline_->Release();
|
||||||
|
edram_clear_depth_float_pipeline_ = nullptr;
|
||||||
|
}
|
||||||
|
if (edram_clear_32bpp_pipeline_ != nullptr) {
|
||||||
|
edram_clear_32bpp_pipeline_->Release();
|
||||||
|
edram_clear_32bpp_pipeline_ = nullptr;
|
||||||
|
}
|
||||||
for (uint32_t i = 0; i < uint32_t(EDRAMLoadStoreMode::kCount); ++i) {
|
for (uint32_t i = 0; i < uint32_t(EDRAMLoadStoreMode::kCount); ++i) {
|
||||||
if (edram_load_pipelines_[i] != nullptr) {
|
if (edram_load_pipelines_[i] != nullptr) {
|
||||||
edram_load_pipelines_[i]->Release();
|
edram_load_pipelines_[i]->Release();
|
||||||
|
@ -305,6 +369,10 @@ void RenderTargetCache::Shutdown() {
|
||||||
edram_store_pipelines_[i] = nullptr;
|
edram_store_pipelines_[i] = nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (edram_clear_root_signature_ != nullptr) {
|
||||||
|
edram_clear_root_signature_->Release();
|
||||||
|
edram_clear_root_signature_ = nullptr;
|
||||||
|
}
|
||||||
if (edram_load_store_root_signature_ != nullptr) {
|
if (edram_load_store_root_signature_ != nullptr) {
|
||||||
edram_load_store_root_signature_->Release();
|
edram_load_store_root_signature_->Release();
|
||||||
edram_load_store_root_signature_ = nullptr;
|
edram_load_store_root_signature_ = nullptr;
|
||||||
|
@ -924,8 +992,9 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory,
|
||||||
bool copied = ResolveCopy(shared_memory, texture_cache, surface_edram_base,
|
bool copied = ResolveCopy(shared_memory, texture_cache, surface_edram_base,
|
||||||
surface_pitch, msaa_samples, surface_is_depth,
|
surface_pitch, msaa_samples, surface_is_depth,
|
||||||
surface_format, src_rect);
|
surface_format, src_rect);
|
||||||
// TODO(Triang3l): Clear.
|
bool cleared = ResolveClear(surface_edram_base, surface_pitch, msaa_samples,
|
||||||
return copied;
|
surface_is_depth, surface_format, src_rect);
|
||||||
|
return copied || cleared;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
||||||
|
@ -1459,6 +1528,106 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool RenderTargetCache::ResolveClear(uint32_t edram_base,
|
||||||
|
uint32_t surface_pitch,
|
||||||
|
MsaaSamples msaa_samples, bool is_depth,
|
||||||
|
uint32_t format, const D3D12_RECT& rect) {
|
||||||
|
auto& regs = *register_file_;
|
||||||
|
|
||||||
|
// Check if clearing is enabled.
|
||||||
|
uint32_t rb_copy_control = regs[XE_GPU_REG_RB_COPY_CONTROL].u32;
|
||||||
|
if (!(rb_copy_control & (is_depth ? (1 << 9) : (1 << 8)))) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate the layout.
|
||||||
|
bool is_64bpp =
|
||||||
|
!is_depth && IsColorFormat64bpp(ColorRenderTargetFormat(format));
|
||||||
|
D3D12_RECT clear_rect = rect;
|
||||||
|
uint32_t surface_pitch_tiles, row_tiles, rows;
|
||||||
|
if (!GetEDRAMLayout(surface_pitch, msaa_samples, is_64bpp, edram_base,
|
||||||
|
clear_rect, surface_pitch_tiles, row_tiles, rows)) {
|
||||||
|
// Nothing to clear.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
uint32_t samples_x_log2 = msaa_samples >= MsaaSamples::k4X ? 1 : 0;
|
||||||
|
uint32_t samples_y_log2 = msaa_samples >= MsaaSamples::k2X ? 1 : 0;
|
||||||
|
|
||||||
|
// Get everything needed for clearing.
|
||||||
|
auto command_list = command_processor_->GetCurrentCommandList();
|
||||||
|
if (command_list == nullptr) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
auto device =
|
||||||
|
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
|
||||||
|
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
|
||||||
|
D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start;
|
||||||
|
if (command_processor_->RequestViewDescriptors(0, 1, 1, descriptor_cpu_start,
|
||||||
|
descriptor_gpu_start) == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Submit the clear.
|
||||||
|
command_processor_->PushTransitionBarrier(
|
||||||
|
edram_buffer_, edram_buffer_state_,
|
||||||
|
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||||
|
edram_buffer_state_ = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
||||||
|
command_processor_->SubmitBarriers();
|
||||||
|
EDRAMLoadStoreRootConstants root_constants;
|
||||||
|
root_constants.clear_rect_lt = (clear_rect.left << samples_x_log2) |
|
||||||
|
(clear_rect.top << (16 + samples_y_log2));
|
||||||
|
root_constants.clear_rect_rb = (clear_rect.right << samples_x_log2) |
|
||||||
|
(clear_rect.bottom << (16 + samples_y_log2));
|
||||||
|
root_constants.base_pitch_tiles = edram_base | (surface_pitch_tiles << 11);
|
||||||
|
if (is_depth &&
|
||||||
|
DepthRenderTargetFormat(format) == DepthRenderTargetFormat::kD24FS8) {
|
||||||
|
root_constants.clear_depth24 = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32;
|
||||||
|
// 20e4 [0,2), based on CFloat24 from d3dref9.dll and on 6e4 in DirectXTex.
|
||||||
|
uint32_t depth24 = root_constants.clear_depth24 >> 8;
|
||||||
|
if (depth24 == 0) {
|
||||||
|
root_constants.clear_depth32 = 0;
|
||||||
|
} else {
|
||||||
|
uint32_t mantissa = depth24 & 0xFFFFFu, exponent = depth24 >> 20;
|
||||||
|
if (exponent == 0) {
|
||||||
|
// Normalize the value in the resulting float.
|
||||||
|
// do { Exponent--; Mantissa <<= 1; } while ((Mantissa & 0x100000) == 0)
|
||||||
|
uint32_t mantissa_lzcnt = xe::lzcnt(mantissa) - (32u - 21u);
|
||||||
|
exponent = 1u - mantissa_lzcnt;
|
||||||
|
mantissa = (mantissa << mantissa_lzcnt) & 0xFFFFFu;
|
||||||
|
}
|
||||||
|
root_constants.clear_depth32 =
|
||||||
|
((exponent + 112u) << 23) | (mantissa << 3);
|
||||||
|
}
|
||||||
|
command_processor_->SetComputePipeline(edram_clear_depth_float_pipeline_);
|
||||||
|
} else if (is_64bpp) {
|
||||||
|
// TODO(Triang3l): 64bpp color clear.
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
Register reg =
|
||||||
|
is_depth ? XE_GPU_REG_RB_DEPTH_CLEAR : XE_GPU_REG_RB_COLOR_CLEAR;
|
||||||
|
root_constants.clear_color_high = regs[reg].u32;
|
||||||
|
command_processor_->SetComputePipeline(edram_clear_32bpp_pipeline_);
|
||||||
|
}
|
||||||
|
command_list->SetComputeRootSignature(edram_clear_root_signature_);
|
||||||
|
command_list->SetComputeRoot32BitConstants(
|
||||||
|
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
|
||||||
|
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
|
||||||
|
uav_desc.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||||
|
uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
|
||||||
|
uav_desc.Buffer.FirstElement = 0;
|
||||||
|
uav_desc.Buffer.NumElements = 2 * 2048 * 1280;
|
||||||
|
uav_desc.Buffer.StructureByteStride = 0;
|
||||||
|
uav_desc.Buffer.CounterOffsetInBytes = 0;
|
||||||
|
uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
|
||||||
|
device->CreateUnorderedAccessView(edram_buffer_, nullptr, &uav_desc,
|
||||||
|
descriptor_cpu_start);
|
||||||
|
command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start);
|
||||||
|
command_list->Dispatch(row_tiles, rows, 1);
|
||||||
|
command_processor_->PushUAVBarrier(edram_buffer_);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
ID3D12PipelineState* RenderTargetCache::GetResolvePipeline(
|
ID3D12PipelineState* RenderTargetCache::GetResolvePipeline(
|
||||||
DXGI_FORMAT dest_format) {
|
DXGI_FORMAT dest_format) {
|
||||||
// Try to find an existing pipeline.
|
// Try to find an existing pipeline.
|
||||||
|
|
|
@ -399,6 +399,10 @@ class RenderTargetCache {
|
||||||
uint32_t edram_base, uint32_t surface_pitch,
|
uint32_t edram_base, uint32_t surface_pitch,
|
||||||
MsaaSamples msaa_samples, bool is_depth, uint32_t src_format,
|
MsaaSamples msaa_samples, bool is_depth, uint32_t src_format,
|
||||||
const D3D12_RECT& src_rect);
|
const D3D12_RECT& src_rect);
|
||||||
|
// Performs the clearing part of a resolve.
|
||||||
|
bool ResolveClear(uint32_t edram_base, uint32_t surface_pitch,
|
||||||
|
MsaaSamples msaa_samples, bool is_depth, uint32_t format,
|
||||||
|
const D3D12_RECT& rect);
|
||||||
|
|
||||||
ID3D12PipelineState* GetResolvePipeline(DXGI_FORMAT dest_format);
|
ID3D12PipelineState* GetResolvePipeline(DXGI_FORMAT dest_format);
|
||||||
// Returns any available resolve target placed at least at
|
// Returns any available resolve target placed at least at
|
||||||
|
@ -416,8 +420,9 @@ class RenderTargetCache {
|
||||||
D3D12_RESOURCE_STATES edram_buffer_state_;
|
D3D12_RESOURCE_STATES edram_buffer_state_;
|
||||||
bool edram_buffer_cleared_;
|
bool edram_buffer_cleared_;
|
||||||
|
|
||||||
// EDRAM buffer load/store root signature.
|
// EDRAM root signatures.
|
||||||
ID3D12RootSignature* edram_load_store_root_signature_ = nullptr;
|
ID3D12RootSignature* edram_load_store_root_signature_ = nullptr;
|
||||||
|
ID3D12RootSignature* edram_clear_root_signature_ = nullptr;
|
||||||
struct EDRAMLoadStoreRootConstants {
|
struct EDRAMLoadStoreRootConstants {
|
||||||
union {
|
union {
|
||||||
struct {
|
struct {
|
||||||
|
@ -443,11 +448,26 @@ class RenderTargetCache {
|
||||||
// For 64 bits per pixel, it's 1 if need to swap 0:15 and 32:47.
|
// For 64 bits per pixel, it's 1 if need to swap 0:15 and 32:47.
|
||||||
uint32_t tile_sample_dest_info;
|
uint32_t tile_sample_dest_info;
|
||||||
};
|
};
|
||||||
|
struct {
|
||||||
|
// 16 bits for X, 16 bits for Y.
|
||||||
|
uint32_t clear_rect_lt;
|
||||||
|
uint32_t clear_rect_rb;
|
||||||
|
union {
|
||||||
|
struct {
|
||||||
|
uint32_t clear_color_high;
|
||||||
|
uint32_t clear_color_low;
|
||||||
|
};
|
||||||
|
struct {
|
||||||
|
uint32_t clear_depth24;
|
||||||
|
uint32_t clear_depth32;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
};
|
||||||
};
|
};
|
||||||
// Base in the lower 11 bits, pitch above.
|
// Base in the lower 11 bits, pitch above.
|
||||||
uint32_t base_pitch_tiles;
|
uint32_t base_pitch_tiles;
|
||||||
};
|
};
|
||||||
// EDRAM buffer load/store pipelines.
|
// EDRAM pipelines.
|
||||||
static const EDRAMLoadStoreModeInfo
|
static const EDRAMLoadStoreModeInfo
|
||||||
edram_load_store_mode_info_[size_t(EDRAMLoadStoreMode::kCount)];
|
edram_load_store_mode_info_[size_t(EDRAMLoadStoreMode::kCount)];
|
||||||
ID3D12PipelineState*
|
ID3D12PipelineState*
|
||||||
|
@ -455,6 +475,8 @@ class RenderTargetCache {
|
||||||
ID3D12PipelineState*
|
ID3D12PipelineState*
|
||||||
edram_store_pipelines_[size_t(EDRAMLoadStoreMode::kCount)] = {};
|
edram_store_pipelines_[size_t(EDRAMLoadStoreMode::kCount)] = {};
|
||||||
ID3D12PipelineState* edram_tile_sample_32bpp_pipeline_ = nullptr;
|
ID3D12PipelineState* edram_tile_sample_32bpp_pipeline_ = nullptr;
|
||||||
|
ID3D12PipelineState* edram_clear_32bpp_pipeline_ = nullptr;
|
||||||
|
ID3D12PipelineState* edram_clear_depth_float_pipeline_ = nullptr;
|
||||||
|
|
||||||
// 48 MB heaps backing used render targets resources, created when needed.
|
// 48 MB heaps backing used render targets resources, created when needed.
|
||||||
// 24 MB proved to be not enough to store a single render target occupying the
|
// 24 MB proved to be not enough to store a single render target occupying the
|
||||||
|
|
|
@ -0,0 +1,25 @@
|
||||||
|
#define XE_EDRAM_WRITE_ONLY
|
||||||
|
#include "edram_load_store.hlsli"
|
||||||
|
|
||||||
|
// Load4/Store4 aren't needed here, but 80x16 threads is over the limit.
|
||||||
|
[numthreads(40, 16, 1)]
|
||||||
|
void main(uint3 xe_group_id : SV_GroupID,
|
||||||
|
uint3 xe_group_thread_id : SV_GroupThreadID,
|
||||||
|
uint3 xe_thread_id : SV_DispatchThreadID) {
|
||||||
|
uint4 clear_rect;
|
||||||
|
clear_rect.xz = xe_edram_clear_rect & 0xFFFFu;
|
||||||
|
clear_rect.yw = xe_edram_clear_rect >> 16u;
|
||||||
|
uint2 sample_index = xe_thread_id.xy;
|
||||||
|
sample_index.x *= 2u;
|
||||||
|
[branch] if (any(sample_index < clear_rect.xy) ||
|
||||||
|
any(sample_index >= clear_rect.zw)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
uint2 tile_dword_index = xe_group_thread_id.xy;
|
||||||
|
tile_dword_index.x *= 2u;
|
||||||
|
uint edram_offset = XeEDRAMOffset(xe_group_id.xy, tile_dword_index);
|
||||||
|
xe_edram_load_store_dest.Store(edram_offset, xe_edram_clear_color32);
|
||||||
|
if (sample_index.x + 1u < clear_rect.z) {
|
||||||
|
xe_edram_load_store_dest.Store(edram_offset + 4u, xe_edram_clear_color32);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,34 @@
|
||||||
|
#define XE_EDRAM_WRITE_ONLY
|
||||||
|
#include "edram_load_store.hlsli"
|
||||||
|
|
||||||
|
// Load4/Store4 aren't needed here, but 80x16 threads is over the limit.
|
||||||
|
[numthreads(40, 16, 1)]
|
||||||
|
void main(uint3 xe_group_id : SV_GroupID,
|
||||||
|
uint3 xe_group_thread_id : SV_GroupThreadID,
|
||||||
|
uint3 xe_thread_id : SV_DispatchThreadID) {
|
||||||
|
uint4 clear_rect;
|
||||||
|
clear_rect.xz = xe_edram_clear_rect & 0xFFFFu;
|
||||||
|
clear_rect.yw = xe_edram_clear_rect >> 16u;
|
||||||
|
uint2 sample_index = xe_thread_id.xy;
|
||||||
|
sample_index.x *= 2u;
|
||||||
|
[branch] if (any(sample_index < clear_rect.xy) ||
|
||||||
|
any(sample_index >= clear_rect.zw)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
uint2 tile_dword_index = xe_group_thread_id.xy;
|
||||||
|
tile_dword_index.x *= 2u;
|
||||||
|
bool second_sample_inside = sample_index.x + 1u < clear_rect.z;
|
||||||
|
// 24-bit depth.
|
||||||
|
uint edram_offset = XeEDRAMOffset(xe_group_id.xy, tile_dword_index);
|
||||||
|
xe_edram_load_store_dest.Store(edram_offset, xe_edram_clear_depth24);
|
||||||
|
[branch] if (second_sample_inside) {
|
||||||
|
xe_edram_load_store_dest.Store(edram_offset + 4u, xe_edram_clear_depth24);
|
||||||
|
}
|
||||||
|
// 32-bit depth (pre-converted on the CPU).
|
||||||
|
xe_edram_load_store_dest.Store(edram_offset + 10485760u,
|
||||||
|
xe_edram_clear_depth32);
|
||||||
|
[branch] if (second_sample_inside) {
|
||||||
|
xe_edram_load_store_dest.Store(edram_offset + 10485764u,
|
||||||
|
xe_edram_clear_depth32);
|
||||||
|
}
|
||||||
|
}
|
|
@ -31,7 +31,18 @@ cbuffer XeEDRAMLoadStoreConstants : register(b0) {
|
||||||
// For 64 bits per pixel, it's 1 if need to swap 0:15 and 32:47.
|
// For 64 bits per pixel, it's 1 if need to swap 0:15 and 32:47.
|
||||||
#define xe_edram_tile_sample_dest_info (xe_edram_load_store_constants.w)
|
#define xe_edram_tile_sample_dest_info (xe_edram_load_store_constants.w)
|
||||||
|
|
||||||
|
// For clearing.
|
||||||
|
// Left/top of the cleared region (relative to EDRAM base) in the lower 16 bits,
|
||||||
|
// right/bottom in the upper, in samples.
|
||||||
|
#define xe_edram_clear_rect (xe_edram_load_store_constants.xy)
|
||||||
|
#define xe_edram_clear_color32 (xe_edram_load_store_constants.z)
|
||||||
|
#define xe_edram_clear_color64 (xe_edram_load_store_constants.zw)
|
||||||
|
#define xe_edram_clear_depth24 (xe_edram_load_store_constants.z)
|
||||||
|
#define xe_edram_clear_depth32 (xe_edram_load_store_constants.w)
|
||||||
|
|
||||||
|
#ifndef XE_EDRAM_WRITE_ONLY
|
||||||
ByteAddressBuffer xe_edram_load_store_source : register(t0);
|
ByteAddressBuffer xe_edram_load_store_source : register(t0);
|
||||||
|
#endif
|
||||||
RWByteAddressBuffer xe_edram_load_store_dest : register(u0);
|
RWByteAddressBuffer xe_edram_load_store_dest : register(u0);
|
||||||
|
|
||||||
uint XeEDRAMOffset(uint2 tile_index, uint2 tile_dword_index) {
|
uint XeEDRAMOffset(uint2 tile_index, uint2 tile_dword_index) {
|
||||||
|
|
Loading…
Reference in New Issue