Merge branch 'master' into vulkan

This commit is contained in:
Triang3l 2022-03-22 21:54:34 +03:00
commit 7048baaf21
4 changed files with 157 additions and 165 deletions

View File

@ -3516,7 +3516,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
if (source_is_color) {
switch (source_color_format) {
case xenos::ColorRenderTargetFormat::k_8_8_8_8:
case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: {
color_packed_in_r0x_and_r1x = true;
for (uint32_t i = 0; i < 2; ++i) {
a.OpMAd(dxbc::Dest::R(i), dxbc::Src::R(i), dxbc::Src::LF(255.0f),
@ -3528,9 +3528,9 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
dxbc::Src::R(i, dxbc::Src::kXXXX));
}
}
break;
} break;
case xenos::ColorRenderTargetFormat::k_2_10_10_10:
case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10:
case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: {
color_packed_in_r0x_and_r1x = true;
for (uint32_t i = 0; i < 2; ++i) {
a.OpMAd(dxbc::Dest::R(i), dxbc::Src::R(i),
@ -3543,9 +3543,10 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
dxbc::Src::R(i, dxbc::Src::kXXXX));
}
}
break;
} break;
case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT:
case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16:
case xenos::ColorRenderTargetFormat::
k_2_10_10_10_FLOAT_AS_16_16_16_16: {
color_packed_in_r0x_and_r1x = true;
for (uint32_t i = 0; i < 2; ++i) {
// Float16 has a wider range for both color and alpha, also NaNs -
@ -3569,12 +3570,12 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
dxbc::Src::LU(30), dxbc::Src::R(i, dxbc::Src::kWWWW),
dxbc::Src::R(i, dxbc::Src::kXXXX));
}
break;
} break;
// All 64bpp formats, and all 16 bits per component formats, are
// represented as integers in ownership transfer for safe handling of
// NaNs and -32768 / -32767.
case xenos::ColorRenderTargetFormat::k_16_16:
case xenos::ColorRenderTargetFormat::k_16_16_FLOAT:
case xenos::ColorRenderTargetFormat::k_16_16_FLOAT: {
if (dest_color_format ==
xenos::ColorRenderTargetFormat::k_32_32_FLOAT) {
for (uint32_t i = 0; i < 2; ++i) {
@ -3586,9 +3587,9 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
a.OpMov(dxbc::Dest::O(0, 0b0011), dxbc::Src::R(0));
a.OpMov(dxbc::Dest::O(0, 0b1100), dxbc::Src::R(1, 0b0100 << 4));
}
break;
} break;
case xenos::ColorRenderTargetFormat::k_16_16_16_16:
case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT:
case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT: {
if (dest_color_format ==
xenos::ColorRenderTargetFormat::k_32_32_FLOAT) {
a.OpBFI(dxbc::Dest::O(0, 0b0011), dxbc::Src::LU(16),
@ -3597,11 +3598,11 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
} else {
a.OpMov(dxbc::Dest::O(0), dxbc::Src::R(1));
}
break;
case xenos::ColorRenderTargetFormat::k_32_FLOAT:
} break;
case xenos::ColorRenderTargetFormat::k_32_FLOAT: {
color_packed_in_r0x_and_r1x = true;
break;
case xenos::ColorRenderTargetFormat::k_32_32_FLOAT:
} break;
case xenos::ColorRenderTargetFormat::k_32_32_FLOAT: {
if (dest_color_format ==
xenos::ColorRenderTargetFormat::k_32_32_FLOAT) {
a.OpMov(dxbc::Dest::O(0, 0b0011), dxbc::Src::R(1));
@ -3609,14 +3610,14 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
a.OpUBFE(dxbc::Dest::O(0), dxbc::Src::LU(16),
dxbc::Src::LU(0, 16, 0, 16), dxbc::Src::R(1, 0b01010000));
}
break;
} break;
}
} else {
assert_not_zero(rs & kTransferUsedRootParameterDepthSRVBit);
color_packed_in_r0x_and_r1x = true;
for (uint32_t i = 0; i < 2; ++i) {
switch (source_depth_format) {
case xenos::DepthRenderTargetFormat::kD24S8:
case xenos::DepthRenderTargetFormat::kD24S8: {
// Round to the nearest even integer. This seems to be the correct,
// adding +0.5 and rounding towards zero results in red instead of
// black in the 4D5307E6 clear shader.
@ -3626,12 +3627,12 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
dxbc::Src::R(i, dxbc::Src::kWWWW));
a.OpFToU(dxbc::Dest::R(i, 0b1000),
dxbc::Src::R(i, dxbc::Src::kWWWW));
break;
case xenos::DepthRenderTargetFormat::kD24FS8:
} break;
case xenos::DepthRenderTargetFormat::kD24FS8: {
// Convert using r1.y as temporary.
DxbcShaderTranslator::PreClampedDepthTo20e4(a, i, 3, i, 3, 1, 1,
true);
break;
} break;
}
// Merge depth and stencil into r0/r1.x.
a.OpBFI(dxbc::Dest::R(i, 0b0001), dxbc::Src::LU(24), dxbc::Src::LU(8),
@ -3652,14 +3653,15 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
}
}
} else {
// Handle 32bpp color. If color_packed_in_r1x is true, a raw 32bpp color
// value was written, and common handling will be done.
// Handle a 32bpp destination (32bpp color, or depth / stencil). If
// color_packed_in_r1x is true, a raw 32bpp color value was written, and
// common handling will be done.
bool color_packed_in_r1x = false;
bool depth_loaded_in_guest_format = false;
if (source_is_color) {
switch (source_color_format) {
case xenos::ColorRenderTargetFormat::k_8_8_8_8:
case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: {
if (dest_is_stencil_bit) {
a.OpMAd(dxbc::Dest::R(1, 0b0001), dxbc::Src::R(1, dxbc::Src::kXXXX),
dxbc::Src::LF(255.0f), dxbc::Src::LF(0.5f));
@ -3706,9 +3708,9 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
dxbc::Src::R(1, dxbc::Src::kXXXX));
}
}
break;
} break;
case xenos::ColorRenderTargetFormat::k_2_10_10_10:
case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10:
case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: {
if (dest_is_stencil_bit) {
a.OpMAd(dxbc::Dest::R(1, 0b0001), dxbc::Src::R(1, dxbc::Src::kXXXX),
dxbc::Src::LF(1023.0f), dxbc::Src::LF(0.5f));
@ -3732,9 +3734,10 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
dxbc::Src::R(1, dxbc::Src::kXXXX));
}
}
break;
} break;
case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT:
case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16:
case xenos::ColorRenderTargetFormat::
k_2_10_10_10_FLOAT_AS_16_16_16_16: {
if (dest_is_stencil_bit) {
DxbcShaderTranslator::UnclampedFloat32To7e3(a, 1, 0, 1, 0, 2, 0);
} else if (dest_is_color &&
@ -3767,11 +3770,11 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
dxbc::Src::LU(30), dxbc::Src::R(1, dxbc::Src::kWWWW),
dxbc::Src::R(1, dxbc::Src::kXXXX));
}
break;
} break;
case xenos::ColorRenderTargetFormat::k_16_16:
case xenos::ColorRenderTargetFormat::k_16_16_16_16:
case xenos::ColorRenderTargetFormat::k_16_16_FLOAT:
case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT:
case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT: {
// All 16 bits per component formats are represented as integers in
// ownership transfer for safe handling of NaNs and -32768 / -32767.
if (dest_is_stencil_bit) {
@ -3789,11 +3792,11 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
dxbc::Src::LU(16), dxbc::Src::R(1, dxbc::Src::kYYYY),
dxbc::Src::R(1, dxbc::Src::kXXXX));
}
break;
} break;
case xenos::ColorRenderTargetFormat::k_32_FLOAT:
case xenos::ColorRenderTargetFormat::k_32_32_FLOAT:
case xenos::ColorRenderTargetFormat::k_32_32_FLOAT: {
color_packed_in_r1x = true;
break;
} break;
}
} else if (rs & kTransferUsedRootParameterDepthSRVBit) {
if (dest_is_color || dest_depth_format != source_depth_format) {
@ -3801,7 +3804,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
// format. Convert the depth within r1.w.
depth_loaded_in_guest_format = true;
switch (source_depth_format) {
case xenos::DepthRenderTargetFormat::kD24S8:
case xenos::DepthRenderTargetFormat::kD24S8: {
// Round to the nearest even integer. This seems to be the correct,
// adding +0.5 and rounding towards zero results in red instead of
// black in the 4D5307E6 clear shader.
@ -3811,12 +3814,12 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
dxbc::Src::R(1, dxbc::Src::kWWWW));
a.OpFToU(dxbc::Dest::R(1, 0b1000),
dxbc::Src::R(1, dxbc::Src::kWWWW));
break;
case xenos::DepthRenderTargetFormat::kD24FS8:
} break;
case xenos::DepthRenderTargetFormat::kD24FS8: {
// Convert using r1.y as temporary.
DxbcShaderTranslator::PreClampedDepthTo20e4(a, 1, 3, 1, 3, 1, 1,
true);
break;
} break;
}
if (dest_is_color) {
// Merge depth and stencil into r1.x for reinterpretation as color.
@ -3835,16 +3838,16 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
if (color_packed_in_r1x) {
switch (dest_color_format) {
case xenos::ColorRenderTargetFormat::k_8_8_8_8:
case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: {
a.OpUBFE(dxbc::Dest::R(1), dxbc::Src::LU(8),
dxbc::Src::LU(0, 8, 16, 24),
dxbc::Src::R(1, dxbc::Src::kXXXX));
a.OpUToF(dxbc::Dest::R(1), dxbc::Src::R(1));
a.OpMul(dxbc::Dest::O(0), dxbc::Src::R(1),
dxbc::Src::LF(1.0f / 255.0f));
break;
} break;
case xenos::ColorRenderTargetFormat::k_2_10_10_10:
case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10:
case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: {
a.OpUBFE(dxbc::Dest::R(1), dxbc::Src::LU(10, 10, 10, 2),
dxbc::Src::LU(0, 10, 20, 30),
dxbc::Src::R(1, dxbc::Src::kXXXX));
@ -3852,10 +3855,10 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
a.OpMul(dxbc::Dest::O(0), dxbc::Src::R(1),
dxbc::Src::LF(1.0f / 1023.0f, 1.0f / 1023.0f,
1.0f / 1023.0f, 1.0f / 3.0f));
break;
} break;
case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT:
case xenos::ColorRenderTargetFormat::
k_2_10_10_10_FLOAT_AS_16_16_16_16:
k_2_10_10_10_FLOAT_AS_16_16_16_16: {
// Color using r1.yz as temporary.
for (uint32_t i = 0; i < 3; ++i) {
DxbcShaderTranslator::Float7e3To32(a, dxbc::Dest::O(0, 1 << i),
@ -3869,21 +3872,21 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
a.OpMul(dxbc::Dest::O(0, 0b1000),
dxbc::Src::R(1, dxbc::Src::kWWWW),
dxbc::Src::LF(1.0f / 3.0f));
break;
} break;
case xenos::ColorRenderTargetFormat::k_16_16:
case xenos::ColorRenderTargetFormat::k_16_16_FLOAT:
case xenos::ColorRenderTargetFormat::k_16_16_FLOAT: {
// All 16 bits per component formats are represented as integers
// in ownership transfer for safe handling of NaNs and
// -32768 / -32767.
a.OpUBFE(dxbc::Dest::O(0, 0b0011), dxbc::Src::LU(16),
dxbc::Src::LU(0, 16, 0, 0),
dxbc::Src::R(1, dxbc::Src::kXXXX));
break;
case xenos::ColorRenderTargetFormat::k_32_FLOAT:
} break;
case xenos::ColorRenderTargetFormat::k_32_FLOAT: {
// Already as a 32-bit value.
a.OpMov(dxbc::Dest::O(0, 0b0001),
dxbc::Src::R(1, dxbc::Src::kXXXX));
break;
} break;
default:
// A 64bpp format (handled separately) or an invalid one.
assert_unhandled_case(dest_color_format);
@ -4178,7 +4181,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
// using r0.z as a temporary and check if it matches the value in
// the currently owning guest render target.
switch (dest_depth_format) {
case xenos::DepthRenderTargetFormat::kD24S8:
case xenos::DepthRenderTargetFormat::kD24S8: {
// Round to the nearest even integer. This seems to be the
// correct, adding +0.5 and rounding towards zero results in red
// instead of black in the 4D5307E6 clear shader.
@ -4189,11 +4192,11 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
dxbc::Src::R(0, dxbc::Src::kYYYY));
a.OpFToU(dxbc::Dest::R(0, 0b0010),
dxbc::Src::R(0, dxbc::Src::kYYYY));
break;
case xenos::DepthRenderTargetFormat::kD24FS8:
} break;
case xenos::DepthRenderTargetFormat::kD24FS8: {
DxbcShaderTranslator::PreClampedDepthTo20e4(a, 0, 1, 0, 0, 0, 2,
true);
break;
} break;
}
a.OpIEq(dxbc::Dest::R(0, 0b0010), dxbc::Src::R(0, dxbc::Src::kYYYY),
dxbc::Src::R(1, dxbc::Src::kWWWW));
@ -4206,7 +4209,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
}
// Convert using r0.x as a temporary.
switch (dest_depth_format) {
case xenos::DepthRenderTargetFormat::kD24S8:
case xenos::DepthRenderTargetFormat::kD24S8: {
// Multiplying by 1.0 / 0xFFFFFF produces an incorrect result (for
// 0xC00000, for instance - which is 2_10_10_10 clear to 0001) -
// rescale from 0...0xFFFFFF to 0...0x1000000 doing what true
@ -4223,11 +4226,11 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
a.OpMul(dxbc::Dest::R(1, 0b1000),
dxbc::Src::R(1, dxbc::Src::kWWWW),
dxbc::Src::LF(1.0f / float(1 << 24)));
break;
case xenos::DepthRenderTargetFormat::kD24FS8:
} break;
case xenos::DepthRenderTargetFormat::kD24FS8: {
DxbcShaderTranslator::Depth20e4To32(a, dxbc::Dest::R(1, 0b1000),
1, 3, 0, 1, 3, 0, 0, true);
break;
} break;
}
// Host depth is different, or not available - convert the guest depth
// to the destination format.
@ -4388,14 +4391,8 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
D3D12_STENCIL_OP_REPLACE;
pipeline_desc.DepthStencilState.FrontFace.StencilFunc =
D3D12_COMPARISON_FUNC_ALWAYS;
pipeline_desc.DepthStencilState.BackFace.StencilFailOp =
D3D12_STENCIL_OP_KEEP;
pipeline_desc.DepthStencilState.BackFace.StencilDepthFailOp =
D3D12_STENCIL_OP_KEEP;
pipeline_desc.DepthStencilState.BackFace.StencilPassOp =
D3D12_STENCIL_OP_REPLACE;
pipeline_desc.DepthStencilState.BackFace.StencilFunc =
D3D12_COMPARISON_FUNC_ALWAYS;
pipeline_desc.DepthStencilState.BackFace =
pipeline_desc.DepthStencilState.FrontFace;
pipeline_desc.DSVFormat = GetDepthDSVDXGIFormat(dest_depth_format);
// Even if creation fails, still store the null pointers not to try to
// create again.
@ -4448,14 +4445,8 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
// stencil being different.
pipeline_desc.DepthStencilState.FrontFace.StencilFunc =
D3D12_COMPARISON_FUNC_ALWAYS;
pipeline_desc.DepthStencilState.BackFace.StencilFailOp =
D3D12_STENCIL_OP_KEEP;
pipeline_desc.DepthStencilState.BackFace.StencilDepthFailOp =
D3D12_STENCIL_OP_REPLACE;
pipeline_desc.DepthStencilState.BackFace.StencilPassOp =
D3D12_STENCIL_OP_REPLACE;
pipeline_desc.DepthStencilState.BackFace.StencilFunc =
D3D12_COMPARISON_FUNC_ALWAYS;
pipeline_desc.DepthStencilState.BackFace =
pipeline_desc.DepthStencilState.FrontFace;
}
pipeline_desc.DSVFormat = GetDepthDSVDXGIFormat(dest_depth_format);
}
@ -5285,13 +5276,15 @@ void D3D12RenderTargetCache::PerformTransfersAndResolveClears(
if (transfer_root_parameters_used &
kTransferUsedRootParameterHostDepthAddressConstantBit) {
assert_not_null(host_depth_source_d3d12_rt);
RenderTargetKey host_depth_source_rt_key =
host_depth_source_d3d12_rt->key();
TransferAddressConstant host_depth_address_constant;
host_depth_address_constant.dest_pitch = dest_pitch_tiles;
host_depth_address_constant.source_pitch =
host_depth_source_d3d12_rt->key().GetPitchTiles();
host_depth_source_rt_key.GetPitchTiles();
host_depth_address_constant.source_to_dest =
int32_t(dest_rt_key.base_tiles) -
int32_t(host_depth_source_d3d12_rt->key().base_tiles);
int32_t(host_depth_source_rt_key.base_tiles);
if (last_host_depth_address_constant != host_depth_address_constant) {
last_host_depth_address_constant = host_depth_address_constant;
transfer_root_parameters_set &=
@ -5982,7 +5975,8 @@ ID3D12PipelineState* D3D12RenderTargetCache::GetOrCreateDumpPipeline(
a.OpDclInput(dxbc::Dest::VThreadID(0b0011));
// r0 - addressing before the load, then addressing and conversion scratch
// r1 - addressing scratch before the load, then data
a.OpDclTemps(2);
stat.temp_register_count = 2;
a.OpDclTemps(stat.temp_register_count);
// There's no strict dependency on the group size here, for simplicity of
// calculations especially with resolution scaling, dividing manually (as the
// group size is not unlimited). The only restriction is that an integer
@ -6367,8 +6361,6 @@ ID3D12PipelineState* D3D12RenderTargetCache::GetOrCreateDumpPipeline(
case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16:
// Float16 has a wider range for both color and alpha, also NaNs.
// Color - clamp and convert.
stat.temp_register_count =
std::max(stat.temp_register_count, uint32_t(3));
// Convert red in r1.x to the result register r1.x - the same, but
// UnclampedFloat32To7e3 allows that - using r0.x as a temporary.
DxbcShaderTranslator::UnclampedFloat32To7e3(a, 1, 0, 1, 0, 0, 0);

View File

@ -126,95 +126,6 @@ class D3D12RenderTargetCache final : public RenderTargetCache {
xenos::DepthRenderTargetFormat format);
protected:
class D3D12RenderTarget final : public RenderTarget {
public:
// descriptor_draw_srgb is only used for k_8_8_8_8 render targets when host
// sRGB (gamma_render_target_as_srgb) is used. descriptor_load is present
// when the DXGI formats are different for drawing and bit-exact loading
// (for NaN pattern preservation across EDRAM tile ownership transfers in
// floating-point formats, and to distinguish between two -1 representations
// in snorm formats).
D3D12RenderTarget(
RenderTargetKey key, ID3D12Resource* resource,
ui::d3d12::D3D12CpuDescriptorPool::Descriptor&& descriptor_draw,
ui::d3d12::D3D12CpuDescriptorPool::Descriptor&& descriptor_draw_srgb,
ui::d3d12::D3D12CpuDescriptorPool::Descriptor&&
descriptor_load_separate,
ui::d3d12::D3D12CpuDescriptorPool::Descriptor&& descriptor_srv,
ui::d3d12::D3D12CpuDescriptorPool::Descriptor&& descriptor_srv_stencil,
D3D12_RESOURCE_STATES resource_state)
: RenderTarget(key),
resource_(resource),
descriptor_draw_(std::move(descriptor_draw)),
descriptor_draw_srgb_(std::move(descriptor_draw_srgb)),
descriptor_load_separate_(std::move(descriptor_load_separate)),
descriptor_srv_(std::move(descriptor_srv)),
descriptor_srv_stencil_(std::move(descriptor_srv_stencil)),
resource_state_(resource_state) {}
ID3D12Resource* resource() const { return resource_.Get(); }
const ui::d3d12::D3D12CpuDescriptorPool::Descriptor& descriptor_draw()
const {
return descriptor_draw_;
}
const ui::d3d12::D3D12CpuDescriptorPool::Descriptor& descriptor_draw_srgb()
const {
return descriptor_draw_srgb_;
}
const ui::d3d12::D3D12CpuDescriptorPool::Descriptor& descriptor_srv()
const {
return descriptor_srv_;
}
const ui::d3d12::D3D12CpuDescriptorPool::Descriptor&
descriptor_srv_stencil() const {
return descriptor_srv_stencil_;
}
const ui::d3d12::D3D12CpuDescriptorPool::Descriptor&
descriptor_load_separate() const {
return descriptor_load_separate_;
}
D3D12_RESOURCE_STATES SetResourceState(D3D12_RESOURCE_STATES new_state) {
D3D12_RESOURCE_STATES old_state = resource_state_;
resource_state_ = new_state;
return old_state;
}
uint32_t temporary_srv_descriptor_index() const {
return temporary_srv_descriptor_index_;
}
void SetTemporarySRVDescriptorIndex(uint32_t index) {
temporary_srv_descriptor_index_ = index;
}
uint32_t temporary_srv_descriptor_index_stencil() const {
return temporary_srv_descriptor_index_stencil_;
}
void SetTemporarySRVDescriptorIndexStencil(uint32_t index) {
temporary_srv_descriptor_index_stencil_ = index;
}
uint32_t temporary_sort_index() const { return temporary_sort_index_; }
void SetTemporarySortIndex(uint32_t index) {
temporary_sort_index_ = index;
}
private:
Microsoft::WRL::ComPtr<ID3D12Resource> resource_;
ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_draw_;
ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_draw_srgb_;
ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_load_separate_;
// Texture SRV non-shader-visible descriptors, to prepare shader-visible
// descriptors faster, by copying rather than by creating every time.
// TODO(Triang3l): With bindless resources, persistently store them in the
// heap.
ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_srv_;
ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_srv_stencil_;
D3D12_RESOURCE_STATES resource_state_;
// Temporary storage for indices in operations like transfers and dumps.
uint32_t temporary_srv_descriptor_index_ = UINT32_MAX;
uint32_t temporary_srv_descriptor_index_stencil_ = UINT32_MAX;
uint32_t temporary_sort_index_ = 0;
};
uint32_t GetMaxRenderTargetWidth() const override {
return D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION;
}
@ -310,6 +221,95 @@ class D3D12RenderTargetCache final : public RenderTargetCache {
// For host render targets.
class D3D12RenderTarget final : public RenderTarget {
public:
// descriptor_draw_srgb is only used for k_8_8_8_8 render targets when host
// sRGB (gamma_render_target_as_srgb) is used. descriptor_load is present
// when the DXGI formats are different for drawing and bit-exact loading
// (for NaN pattern preservation across EDRAM tile ownership transfers in
// floating-point formats, and to distinguish between two -1 representations
// in snorm formats).
D3D12RenderTarget(
RenderTargetKey key, ID3D12Resource* resource,
ui::d3d12::D3D12CpuDescriptorPool::Descriptor&& descriptor_draw,
ui::d3d12::D3D12CpuDescriptorPool::Descriptor&& descriptor_draw_srgb,
ui::d3d12::D3D12CpuDescriptorPool::Descriptor&&
descriptor_load_separate,
ui::d3d12::D3D12CpuDescriptorPool::Descriptor&& descriptor_srv,
ui::d3d12::D3D12CpuDescriptorPool::Descriptor&& descriptor_srv_stencil,
D3D12_RESOURCE_STATES resource_state)
: RenderTarget(key),
resource_(resource),
descriptor_draw_(std::move(descriptor_draw)),
descriptor_draw_srgb_(std::move(descriptor_draw_srgb)),
descriptor_load_separate_(std::move(descriptor_load_separate)),
descriptor_srv_(std::move(descriptor_srv)),
descriptor_srv_stencil_(std::move(descriptor_srv_stencil)),
resource_state_(resource_state) {}
ID3D12Resource* resource() const { return resource_.Get(); }
const ui::d3d12::D3D12CpuDescriptorPool::Descriptor& descriptor_draw()
const {
return descriptor_draw_;
}
const ui::d3d12::D3D12CpuDescriptorPool::Descriptor& descriptor_draw_srgb()
const {
return descriptor_draw_srgb_;
}
const ui::d3d12::D3D12CpuDescriptorPool::Descriptor& descriptor_srv()
const {
return descriptor_srv_;
}
const ui::d3d12::D3D12CpuDescriptorPool::Descriptor&
descriptor_srv_stencil() const {
return descriptor_srv_stencil_;
}
const ui::d3d12::D3D12CpuDescriptorPool::Descriptor&
descriptor_load_separate() const {
return descriptor_load_separate_;
}
D3D12_RESOURCE_STATES SetResourceState(D3D12_RESOURCE_STATES new_state) {
D3D12_RESOURCE_STATES old_state = resource_state_;
resource_state_ = new_state;
return old_state;
}
uint32_t temporary_srv_descriptor_index() const {
return temporary_srv_descriptor_index_;
}
void SetTemporarySRVDescriptorIndex(uint32_t index) {
temporary_srv_descriptor_index_ = index;
}
uint32_t temporary_srv_descriptor_index_stencil() const {
return temporary_srv_descriptor_index_stencil_;
}
void SetTemporarySRVDescriptorIndexStencil(uint32_t index) {
temporary_srv_descriptor_index_stencil_ = index;
}
uint32_t temporary_sort_index() const { return temporary_sort_index_; }
void SetTemporarySortIndex(uint32_t index) {
temporary_sort_index_ = index;
}
private:
Microsoft::WRL::ComPtr<ID3D12Resource> resource_;
ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_draw_;
ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_draw_srgb_;
ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_load_separate_;
// Texture SRV non-shader-visible descriptors, to prepare shader-visible
// descriptors faster, by copying rather than by creating every time.
// TODO(Triang3l): With bindless resources, persistently store them in the
// heap.
ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_srv_;
ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_srv_stencil_;
D3D12_RESOURCE_STATES resource_state_;
// Temporary storage for indices in operations like transfers and dumps.
uint32_t temporary_srv_descriptor_index_ = UINT32_MAX;
uint32_t temporary_srv_descriptor_index_stencil_ = UINT32_MAX;
uint32_t temporary_sort_index_ = 0;
};
enum TransferCBVRegister : uint32_t {
kTransferCBVRegisterStencilMask,
kTransferCBVRegisterAddress,
@ -438,7 +438,7 @@ class D3D12RenderTargetCache final : public RenderTargetCache {
// Last bits because this affects the root signature - after sorting, only
// change it as fewer times as possible. Depth buffers have an additional
// depth SRV.
// stencil SRV.
static_assert(size_t(TransferMode::kCount) <= (size_t(1) << 3));
TransferMode mode : 3;
};

View File

@ -536,7 +536,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
// range to 20e4 floating point, with zeros in bits 24:31, rounding to the
// nearest even. Source and destination may be the same, temporary must be
// different than both. If remap_from_0_to_0_5 is true, it's assumed that
// 0...1 is pre-remapped to 0...0.5 on the input.
// 0...1 is pre-remapped to 0...0.5 in the input.
static void PreClampedDepthTo20e4(
dxbc::Assembler& a, uint32_t f24_temp, uint32_t f24_temp_component,
uint32_t f32_temp, uint32_t f32_temp_component, uint32_t temp_temp,

View File

@ -3140,7 +3140,7 @@ void DxbcShaderTranslator::PreClampedFloat32To7e3(
// https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp
// Assuming the color is already clamped to [0, 31.875].
// Check if the number is too small to be represented as normalized 20e4.
// Check if the number is too small to be represented as normalized 7e3.
// temp = f32 < 2^-2
a.OpULT(temp_dest, f32_src, dxbc::Src::LU(0x3E800000));
// Handle denormalized numbers separately.