[Vulkan] Truncate depth to float24 in EDRAM range ownership transfers and resolves by default

Doesn't ruin the "greater or equal" depth test in subsequent rendering passes if precision is lost, unlike rounding to the nearest
This commit is contained in:
Triang3l 2022-06-22 13:25:06 +03:00
parent 0d8bd0e0c6
commit 4514050f55
4 changed files with 33 additions and 20 deletions

View File

@ -208,10 +208,11 @@ class SpirvShaderTranslator : public ShaderTranslator {
spv::Id ext_inst_glsl_std_450);
// Converts the depth value externally clamped to the representable [0, 2)
// range to 20e4 floating point, with zeros in bits 24:31, rounding to the
// nearest even. If remap_from_0_to_0_5 is true, it's assumed that 0...1 is
// pre-remapped to 0...0.5 in the input.
// nearest even or towards zero. If remap_from_0_to_0_5 is true, it's assumed
// that 0...1 is pre-remapped to 0...0.5 in the input.
static spv::Id PreClampedDepthTo20e4(spv::Builder& builder,
spv::Id f32_scalar,
bool round_to_nearest_even,
bool remap_from_0_to_0_5,
spv::Id ext_inst_glsl_std_450);
// Converts the 20e4 number in bits [f24_shift, f24_shift + 10) to a 32-bit

View File

@ -230,8 +230,8 @@ spv::Id SpirvShaderTranslator::Float7e3To32(spv::Builder& builder,
}
spv::Id SpirvShaderTranslator::PreClampedDepthTo20e4(
spv::Builder& builder, spv::Id f32_scalar, bool remap_from_0_to_0_5,
spv::Id ext_inst_glsl_std_450) {
spv::Builder& builder, spv::Id f32_scalar, bool round_to_nearest_even,
bool remap_from_0_to_0_5, spv::Id ext_inst_glsl_std_450) {
// CFloat24 from d3dref9.dll +
// https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp
// Assuming the value is already clamped to [0, 2) (in all places, the depth
@ -305,18 +305,20 @@ spv::Id SpirvShaderTranslator::PreClampedDepthTo20e4(
builder.makeUintConstant(0x38800000 - (remap_bias << 23))),
denormal_biased_f32, normal_biased_f32);
// Build the 20e4 number rounding to the nearest even.
// ((biased_f32 + 3 + ((biased_f32 >> 3) & 1)) >> 3) & 0xFFFFFF
return builder.createTriOp(
spv::OpBitFieldUExtract, type_uint,
builder.createBinOp(
spv::OpIAdd, type_uint,
builder.createBinOp(spv::OpIAdd, type_uint, biased_f32,
builder.makeUintConstant(3)),
builder.createTriOp(spv::OpBitFieldUExtract, type_uint, biased_f32,
builder.makeUintConstant(3),
builder.makeUintConstant(1))),
builder.makeUintConstant(3), builder.makeUintConstant(24));
// Build the 20e4 number rounding to the nearest even or towards zero.
if (round_to_nearest_even) {
// biased_f32 += 3 + ((biased_f32 >> 3) & 1)
biased_f32 = builder.createBinOp(
spv::OpIAdd, type_uint,
builder.createBinOp(spv::OpIAdd, type_uint, biased_f32,
builder.makeUintConstant(3)),
builder.createTriOp(spv::OpBitFieldUExtract, type_uint, biased_f32,
builder.makeUintConstant(3),
builder.makeUintConstant(1)));
}
return builder.createTriOp(spv::OpBitFieldUExtract, type_uint, biased_f32,
builder.makeUintConstant(3),
builder.makeUintConstant(24));
}
spv::Id SpirvShaderTranslator::Depth20e4To32(spv::Builder& builder,

View File

@ -416,6 +416,8 @@ bool VulkanRenderTargetCache::Initialize() {
// TODO(Triang3l): All paths (FSI).
depth_float24_round_ = cvars::depth_float24_round;
// TODO(Triang3l): Handle sampledImageIntegerSampleCounts 4 not supported in
// transfers.
if (cvars::native_2x_msaa) {
@ -3037,7 +3039,8 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader(
} break;
case xenos::DepthRenderTargetFormat::kD24FS8: {
depth24 = SpirvShaderTranslator::PreClampedDepthTo20e4(
builder, source_depth_float[i], true, ext_inst_glsl_std_450);
builder, source_depth_float[i], depth_float24_round(), true,
ext_inst_glsl_std_450);
} break;
}
// Merge depth and stencil.
@ -3353,7 +3356,8 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader(
} break;
case xenos::DepthRenderTargetFormat::kD24FS8: {
packed = SpirvShaderTranslator::PreClampedDepthTo20e4(
builder, source_depth_float[0], true, ext_inst_glsl_std_450);
builder, source_depth_float[0], depth_float24_round(), true,
ext_inst_glsl_std_450);
} break;
}
if (mode.output == TransferOutput::kDepth) {
@ -3855,7 +3859,8 @@ VkShaderModule VulkanRenderTargetCache::GetTransferShader(
} break;
case xenos::DepthRenderTargetFormat::kD24FS8: {
host_depth24 = SpirvShaderTranslator::PreClampedDepthTo20e4(
builder, host_depth32, true, ext_inst_glsl_std_450);
builder, host_depth32, depth_float24_round(), true,
ext_inst_glsl_std_450);
} break;
}
assert_true(host_depth24 != spv::NoResult);
@ -5548,7 +5553,8 @@ VkPipeline VulkanRenderTargetCache::GetDumpPipeline(DumpPipelineKey key) {
} break;
case xenos::DepthRenderTargetFormat::kD24FS8: {
packed[0] = SpirvShaderTranslator::PreClampedDepthTo20e4(
builder, source_depth32, true, ext_inst_glsl_std_450);
builder, source_depth32, depth_float24_round(), true,
ext_inst_glsl_std_450);
} break;
}
id_vector_temp.clear();

View File

@ -128,6 +128,8 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
return last_update_framebuffer_;
}
bool depth_float24_round() const { return depth_float24_round_; }
bool msaa_2x_attachments_supported() const {
return msaa_2x_attachments_supported_;
}
@ -824,6 +826,8 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
bool gamma_render_target_as_srgb_ = false;
bool depth_float24_round_ = false;
bool msaa_2x_attachments_supported_ = false;
bool msaa_2x_no_attachments_supported_ = false;