[Vulkan] Truncate depth to float24 in EDRAM range ownership transfers and resolves by default

Doesn't ruin the "greater or equal" depth test in subsequent rendering passes if precision is lost, unlike rounding to the nearest
This commit is contained in:
Triang3l 2022-06-22 13:25:06 +03:00
parent 0d8bd0e0c6
commit 4514050f55
4 changed files with 33 additions and 20 deletions

View File

@ -208,10 +208,11 @@ class SpirvShaderTranslator : public ShaderTranslator {
spv::Id ext_inst_glsl_std_450); spv::Id ext_inst_glsl_std_450);
// Converts the depth value externally clamped to the representable [0, 2) // Converts the depth value externally clamped to the representable [0, 2)
// range to 20e4 floating point, with zeros in bits 24:31, rounding to the // range to 20e4 floating point, with zeros in bits 24:31, rounding to the
// nearest even. If remap_from_0_to_0_5 is true, it's assumed that 0...1 is // nearest even or towards zero. If remap_from_0_to_0_5 is true, it's assumed
// pre-remapped to 0...0.5 in the input. // that 0...1 is pre-remapped to 0...0.5 in the input.
static spv::Id PreClampedDepthTo20e4(spv::Builder& builder, static spv::Id PreClampedDepthTo20e4(spv::Builder& builder,
spv::Id f32_scalar, spv::Id f32_scalar,
bool round_to_nearest_even,
bool remap_from_0_to_0_5, bool remap_from_0_to_0_5,
spv::Id ext_inst_glsl_std_450); spv::Id ext_inst_glsl_std_450);
// Converts the 20e4 number in bits [f24_shift, f24_shift + 10) to a 32-bit // Converts the 20e4 number in bits [f24_shift, f24_shift + 10) to a 32-bit

View File

@ -230,8 +230,8 @@ spv::Id SpirvShaderTranslator::Float7e3To32(spv::Builder& builder,
} }
spv::Id SpirvShaderTranslator::PreClampedDepthTo20e4( spv::Id SpirvShaderTranslator::PreClampedDepthTo20e4(
spv::Builder& builder, spv::Id f32_scalar, bool remap_from_0_to_0_5, spv::Builder& builder, spv::Id f32_scalar, bool round_to_nearest_even,
spv::Id ext_inst_glsl_std_450) { bool remap_from_0_to_0_5, spv::Id ext_inst_glsl_std_450) {
// CFloat24 from d3dref9.dll + // CFloat24 from d3dref9.dll +
// https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp // https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp
// Assuming the value is already clamped to [0, 2) (in all places, the depth // Assuming the value is already clamped to [0, 2) (in all places, the depth
@ -305,18 +305,20 @@ spv::Id SpirvShaderTranslator::PreClampedDepthTo20e4(
builder.makeUintConstant(0x38800000 - (remap_bias << 23))), builder.makeUintConstant(0x38800000 - (remap_bias << 23))),
denormal_biased_f32, normal_biased_f32); denormal_biased_f32, normal_biased_f32);
// Build the 20e4 number rounding to the nearest even. // Build the 20e4 number rounding to the nearest even or towards zero.
// ((biased_f32 + 3 + ((biased_f32 >> 3) & 1)) >> 3) & 0xFFFFFF if (round_to_nearest_even) {
return builder.createTriOp( // biased_f32 += 3 + ((biased_f32 >> 3) & 1)
spv::OpBitFieldUExtract, type_uint, biased_f32 = builder.createBinOp(
builder.createBinOp(
spv::OpIAdd, type_uint, spv::OpIAdd, type_uint,
builder.createBinOp(spv::OpIAdd, type_uint, biased_f32, builder.createBinOp(spv::OpIAdd, type_uint, biased_f32,
builder.makeUintConstant(3)), builder.makeUintConstant(3)),
builder.createTriOp(spv::OpBitFieldUExtract, type_uint, biased_f32, builder.createTriOp(spv::OpBitFieldUExtract, type_uint, biased_f32,
builder.makeUintConstant(3), builder.makeUintConstant(3),
builder.makeUintConstant(1))), builder.makeUintConstant(1)));
builder.makeUintConstant(3), builder.makeUintConstant(24)); }
return builder.createTriOp(spv::OpBitFieldUExtract, type_uint, biased_f32,
builder.makeUintConstant(3),
builder.makeUintConstant(24));
} }
spv::Id SpirvShaderTranslator::Depth20e4To32(spv::Builder& builder, spv::Id SpirvShaderTranslator::Depth20e4To32(spv::Builder& builder,

View File

@ -416,6 +416,8 @@ bool VulkanRenderTargetCache::Initialize() {
// TODO(Triang3l): All paths (FSI). // TODO(Triang3l): All paths (FSI).
depth_float24_round_ = cvars::depth_float24_round;
// TODO(Triang3l): Handle sampledImageIntegerSampleCounts 4 not supported in // TODO(Triang3l): Handle sampledImageIntegerSampleCounts 4 not supported in
// transfers. // transfers.
if (cvars::native_2x_msaa) { if (cvars::native_2x_msaa) {
@ -3037,7 +3039,8 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader(
} break; } break;
case xenos::DepthRenderTargetFormat::kD24FS8: { case xenos::DepthRenderTargetFormat::kD24FS8: {
depth24 = SpirvShaderTranslator::PreClampedDepthTo20e4( depth24 = SpirvShaderTranslator::PreClampedDepthTo20e4(
builder, source_depth_float[i], true, ext_inst_glsl_std_450); builder, source_depth_float[i], depth_float24_round(), true,
ext_inst_glsl_std_450);
} break; } break;
} }
// Merge depth and stencil. // Merge depth and stencil.
@ -3353,7 +3356,8 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader(
} break; } break;
case xenos::DepthRenderTargetFormat::kD24FS8: { case xenos::DepthRenderTargetFormat::kD24FS8: {
packed = SpirvShaderTranslator::PreClampedDepthTo20e4( packed = SpirvShaderTranslator::PreClampedDepthTo20e4(
builder, source_depth_float[0], true, ext_inst_glsl_std_450); builder, source_depth_float[0], depth_float24_round(), true,
ext_inst_glsl_std_450);
} break; } break;
} }
if (mode.output == TransferOutput::kDepth) { if (mode.output == TransferOutput::kDepth) {
@ -3855,7 +3859,8 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader(
} break; } break;
case xenos::DepthRenderTargetFormat::kD24FS8: { case xenos::DepthRenderTargetFormat::kD24FS8: {
host_depth24 = SpirvShaderTranslator::PreClampedDepthTo20e4( host_depth24 = SpirvShaderTranslator::PreClampedDepthTo20e4(
builder, host_depth32, true, ext_inst_glsl_std_450); builder, host_depth32, depth_float24_round(), true,
ext_inst_glsl_std_450);
} break; } break;
} }
assert_true(host_depth24 != spv::NoResult); assert_true(host_depth24 != spv::NoResult);
@ -5548,7 +5553,8 @@ VkPipeline VulkanRenderTargetCache::GetDumpPipeline(DumpPipelineKey key) {
} break; } break;
case xenos::DepthRenderTargetFormat::kD24FS8: { case xenos::DepthRenderTargetFormat::kD24FS8: {
packed[0] = SpirvShaderTranslator::PreClampedDepthTo20e4( packed[0] = SpirvShaderTranslator::PreClampedDepthTo20e4(
builder, source_depth32, true, ext_inst_glsl_std_450); builder, source_depth32, depth_float24_round(), true,
ext_inst_glsl_std_450);
} break; } break;
} }
id_vector_temp.clear(); id_vector_temp.clear();

View File

@ -128,6 +128,8 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
return last_update_framebuffer_; return last_update_framebuffer_;
} }
bool depth_float24_round() const { return depth_float24_round_; }
bool msaa_2x_attachments_supported() const { bool msaa_2x_attachments_supported() const {
return msaa_2x_attachments_supported_; return msaa_2x_attachments_supported_;
} }
@ -824,6 +826,8 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
bool gamma_render_target_as_srgb_ = false; bool gamma_render_target_as_srgb_ = false;
bool depth_float24_round_ = false;
bool msaa_2x_attachments_supported_ = false; bool msaa_2x_attachments_supported_ = false;
bool msaa_2x_no_attachments_supported_ = false; bool msaa_2x_no_attachments_supported_ = false;