Merge branch 'master' into vulkan
This commit is contained in:
commit
7048baaf21
|
@ -3516,7 +3516,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
if (source_is_color) {
|
||||
switch (source_color_format) {
|
||||
case xenos::ColorRenderTargetFormat::k_8_8_8_8:
|
||||
case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
|
||||
case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: {
|
||||
color_packed_in_r0x_and_r1x = true;
|
||||
for (uint32_t i = 0; i < 2; ++i) {
|
||||
a.OpMAd(dxbc::Dest::R(i), dxbc::Src::R(i), dxbc::Src::LF(255.0f),
|
||||
|
@ -3528,9 +3528,9 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
dxbc::Src::R(i, dxbc::Src::kXXXX));
|
||||
}
|
||||
}
|
||||
break;
|
||||
} break;
|
||||
case xenos::ColorRenderTargetFormat::k_2_10_10_10:
|
||||
case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10:
|
||||
case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: {
|
||||
color_packed_in_r0x_and_r1x = true;
|
||||
for (uint32_t i = 0; i < 2; ++i) {
|
||||
a.OpMAd(dxbc::Dest::R(i), dxbc::Src::R(i),
|
||||
|
@ -3543,9 +3543,10 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
dxbc::Src::R(i, dxbc::Src::kXXXX));
|
||||
}
|
||||
}
|
||||
break;
|
||||
} break;
|
||||
case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT:
|
||||
case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16:
|
||||
case xenos::ColorRenderTargetFormat::
|
||||
k_2_10_10_10_FLOAT_AS_16_16_16_16: {
|
||||
color_packed_in_r0x_and_r1x = true;
|
||||
for (uint32_t i = 0; i < 2; ++i) {
|
||||
// Float16 has a wider range for both color and alpha, also NaNs -
|
||||
|
@ -3569,12 +3570,12 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
dxbc::Src::LU(30), dxbc::Src::R(i, dxbc::Src::kWWWW),
|
||||
dxbc::Src::R(i, dxbc::Src::kXXXX));
|
||||
}
|
||||
break;
|
||||
} break;
|
||||
// All 64bpp formats, and all 16 bits per component formats, are
|
||||
// represented as integers in ownership transfer for safe handling of
|
||||
// NaNs and -32768 / -32767.
|
||||
case xenos::ColorRenderTargetFormat::k_16_16:
|
||||
case xenos::ColorRenderTargetFormat::k_16_16_FLOAT:
|
||||
case xenos::ColorRenderTargetFormat::k_16_16_FLOAT: {
|
||||
if (dest_color_format ==
|
||||
xenos::ColorRenderTargetFormat::k_32_32_FLOAT) {
|
||||
for (uint32_t i = 0; i < 2; ++i) {
|
||||
|
@ -3586,9 +3587,9 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
a.OpMov(dxbc::Dest::O(0, 0b0011), dxbc::Src::R(0));
|
||||
a.OpMov(dxbc::Dest::O(0, 0b1100), dxbc::Src::R(1, 0b0100 << 4));
|
||||
}
|
||||
break;
|
||||
} break;
|
||||
case xenos::ColorRenderTargetFormat::k_16_16_16_16:
|
||||
case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT:
|
||||
case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT: {
|
||||
if (dest_color_format ==
|
||||
xenos::ColorRenderTargetFormat::k_32_32_FLOAT) {
|
||||
a.OpBFI(dxbc::Dest::O(0, 0b0011), dxbc::Src::LU(16),
|
||||
|
@ -3597,11 +3598,11 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
} else {
|
||||
a.OpMov(dxbc::Dest::O(0), dxbc::Src::R(1));
|
||||
}
|
||||
break;
|
||||
case xenos::ColorRenderTargetFormat::k_32_FLOAT:
|
||||
} break;
|
||||
case xenos::ColorRenderTargetFormat::k_32_FLOAT: {
|
||||
color_packed_in_r0x_and_r1x = true;
|
||||
break;
|
||||
case xenos::ColorRenderTargetFormat::k_32_32_FLOAT:
|
||||
} break;
|
||||
case xenos::ColorRenderTargetFormat::k_32_32_FLOAT: {
|
||||
if (dest_color_format ==
|
||||
xenos::ColorRenderTargetFormat::k_32_32_FLOAT) {
|
||||
a.OpMov(dxbc::Dest::O(0, 0b0011), dxbc::Src::R(1));
|
||||
|
@ -3609,14 +3610,14 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
a.OpUBFE(dxbc::Dest::O(0), dxbc::Src::LU(16),
|
||||
dxbc::Src::LU(0, 16, 0, 16), dxbc::Src::R(1, 0b01010000));
|
||||
}
|
||||
break;
|
||||
} break;
|
||||
}
|
||||
} else {
|
||||
assert_not_zero(rs & kTransferUsedRootParameterDepthSRVBit);
|
||||
color_packed_in_r0x_and_r1x = true;
|
||||
for (uint32_t i = 0; i < 2; ++i) {
|
||||
switch (source_depth_format) {
|
||||
case xenos::DepthRenderTargetFormat::kD24S8:
|
||||
case xenos::DepthRenderTargetFormat::kD24S8: {
|
||||
// Round to the nearest even integer. This seems to be the correct,
|
||||
// adding +0.5 and rounding towards zero results in red instead of
|
||||
// black in the 4D5307E6 clear shader.
|
||||
|
@ -3626,12 +3627,12 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
dxbc::Src::R(i, dxbc::Src::kWWWW));
|
||||
a.OpFToU(dxbc::Dest::R(i, 0b1000),
|
||||
dxbc::Src::R(i, dxbc::Src::kWWWW));
|
||||
break;
|
||||
case xenos::DepthRenderTargetFormat::kD24FS8:
|
||||
} break;
|
||||
case xenos::DepthRenderTargetFormat::kD24FS8: {
|
||||
// Convert using r1.y as temporary.
|
||||
DxbcShaderTranslator::PreClampedDepthTo20e4(a, i, 3, i, 3, 1, 1,
|
||||
true);
|
||||
break;
|
||||
} break;
|
||||
}
|
||||
// Merge depth and stencil into r0/r1.x.
|
||||
a.OpBFI(dxbc::Dest::R(i, 0b0001), dxbc::Src::LU(24), dxbc::Src::LU(8),
|
||||
|
@ -3652,14 +3653,15 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
}
|
||||
}
|
||||
} else {
|
||||
// Handle 32bpp color. If color_packed_in_r1x is true, a raw 32bpp color
|
||||
// value was written, and common handling will be done.
|
||||
// Handle a 32bpp destination (32bpp color, or depth / stencil). If
|
||||
// color_packed_in_r1x is true, a raw 32bpp color value was written, and
|
||||
// common handling will be done.
|
||||
bool color_packed_in_r1x = false;
|
||||
bool depth_loaded_in_guest_format = false;
|
||||
if (source_is_color) {
|
||||
switch (source_color_format) {
|
||||
case xenos::ColorRenderTargetFormat::k_8_8_8_8:
|
||||
case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
|
||||
case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: {
|
||||
if (dest_is_stencil_bit) {
|
||||
a.OpMAd(dxbc::Dest::R(1, 0b0001), dxbc::Src::R(1, dxbc::Src::kXXXX),
|
||||
dxbc::Src::LF(255.0f), dxbc::Src::LF(0.5f));
|
||||
|
@ -3706,9 +3708,9 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
dxbc::Src::R(1, dxbc::Src::kXXXX));
|
||||
}
|
||||
}
|
||||
break;
|
||||
} break;
|
||||
case xenos::ColorRenderTargetFormat::k_2_10_10_10:
|
||||
case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10:
|
||||
case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: {
|
||||
if (dest_is_stencil_bit) {
|
||||
a.OpMAd(dxbc::Dest::R(1, 0b0001), dxbc::Src::R(1, dxbc::Src::kXXXX),
|
||||
dxbc::Src::LF(1023.0f), dxbc::Src::LF(0.5f));
|
||||
|
@ -3732,9 +3734,10 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
dxbc::Src::R(1, dxbc::Src::kXXXX));
|
||||
}
|
||||
}
|
||||
break;
|
||||
} break;
|
||||
case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT:
|
||||
case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16:
|
||||
case xenos::ColorRenderTargetFormat::
|
||||
k_2_10_10_10_FLOAT_AS_16_16_16_16: {
|
||||
if (dest_is_stencil_bit) {
|
||||
DxbcShaderTranslator::UnclampedFloat32To7e3(a, 1, 0, 1, 0, 2, 0);
|
||||
} else if (dest_is_color &&
|
||||
|
@ -3767,11 +3770,11 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
dxbc::Src::LU(30), dxbc::Src::R(1, dxbc::Src::kWWWW),
|
||||
dxbc::Src::R(1, dxbc::Src::kXXXX));
|
||||
}
|
||||
break;
|
||||
} break;
|
||||
case xenos::ColorRenderTargetFormat::k_16_16:
|
||||
case xenos::ColorRenderTargetFormat::k_16_16_16_16:
|
||||
case xenos::ColorRenderTargetFormat::k_16_16_FLOAT:
|
||||
case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT:
|
||||
case xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT: {
|
||||
// All 16 bits per component formats are represented as integers in
|
||||
// ownership transfer for safe handling of NaNs and -32768 / -32767.
|
||||
if (dest_is_stencil_bit) {
|
||||
|
@ -3789,11 +3792,11 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
dxbc::Src::LU(16), dxbc::Src::R(1, dxbc::Src::kYYYY),
|
||||
dxbc::Src::R(1, dxbc::Src::kXXXX));
|
||||
}
|
||||
break;
|
||||
} break;
|
||||
case xenos::ColorRenderTargetFormat::k_32_FLOAT:
|
||||
case xenos::ColorRenderTargetFormat::k_32_32_FLOAT:
|
||||
case xenos::ColorRenderTargetFormat::k_32_32_FLOAT: {
|
||||
color_packed_in_r1x = true;
|
||||
break;
|
||||
} break;
|
||||
}
|
||||
} else if (rs & kTransferUsedRootParameterDepthSRVBit) {
|
||||
if (dest_is_color || dest_depth_format != source_depth_format) {
|
||||
|
@ -3801,7 +3804,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
// format. Convert the depth within r1.w.
|
||||
depth_loaded_in_guest_format = true;
|
||||
switch (source_depth_format) {
|
||||
case xenos::DepthRenderTargetFormat::kD24S8:
|
||||
case xenos::DepthRenderTargetFormat::kD24S8: {
|
||||
// Round to the nearest even integer. This seems to be the correct,
|
||||
// adding +0.5 and rounding towards zero results in red instead of
|
||||
// black in the 4D5307E6 clear shader.
|
||||
|
@ -3811,12 +3814,12 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
dxbc::Src::R(1, dxbc::Src::kWWWW));
|
||||
a.OpFToU(dxbc::Dest::R(1, 0b1000),
|
||||
dxbc::Src::R(1, dxbc::Src::kWWWW));
|
||||
break;
|
||||
case xenos::DepthRenderTargetFormat::kD24FS8:
|
||||
} break;
|
||||
case xenos::DepthRenderTargetFormat::kD24FS8: {
|
||||
// Convert using r1.y as temporary.
|
||||
DxbcShaderTranslator::PreClampedDepthTo20e4(a, 1, 3, 1, 3, 1, 1,
|
||||
true);
|
||||
break;
|
||||
} break;
|
||||
}
|
||||
if (dest_is_color) {
|
||||
// Merge depth and stencil into r1.x for reinterpretation as color.
|
||||
|
@ -3835,16 +3838,16 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
if (color_packed_in_r1x) {
|
||||
switch (dest_color_format) {
|
||||
case xenos::ColorRenderTargetFormat::k_8_8_8_8:
|
||||
case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
|
||||
case xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA: {
|
||||
a.OpUBFE(dxbc::Dest::R(1), dxbc::Src::LU(8),
|
||||
dxbc::Src::LU(0, 8, 16, 24),
|
||||
dxbc::Src::R(1, dxbc::Src::kXXXX));
|
||||
a.OpUToF(dxbc::Dest::R(1), dxbc::Src::R(1));
|
||||
a.OpMul(dxbc::Dest::O(0), dxbc::Src::R(1),
|
||||
dxbc::Src::LF(1.0f / 255.0f));
|
||||
break;
|
||||
} break;
|
||||
case xenos::ColorRenderTargetFormat::k_2_10_10_10:
|
||||
case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10:
|
||||
case xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10: {
|
||||
a.OpUBFE(dxbc::Dest::R(1), dxbc::Src::LU(10, 10, 10, 2),
|
||||
dxbc::Src::LU(0, 10, 20, 30),
|
||||
dxbc::Src::R(1, dxbc::Src::kXXXX));
|
||||
|
@ -3852,10 +3855,10 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
a.OpMul(dxbc::Dest::O(0), dxbc::Src::R(1),
|
||||
dxbc::Src::LF(1.0f / 1023.0f, 1.0f / 1023.0f,
|
||||
1.0f / 1023.0f, 1.0f / 3.0f));
|
||||
break;
|
||||
} break;
|
||||
case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT:
|
||||
case xenos::ColorRenderTargetFormat::
|
||||
k_2_10_10_10_FLOAT_AS_16_16_16_16:
|
||||
k_2_10_10_10_FLOAT_AS_16_16_16_16: {
|
||||
// Color using r1.yz as temporary.
|
||||
for (uint32_t i = 0; i < 3; ++i) {
|
||||
DxbcShaderTranslator::Float7e3To32(a, dxbc::Dest::O(0, 1 << i),
|
||||
|
@ -3869,21 +3872,21 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
a.OpMul(dxbc::Dest::O(0, 0b1000),
|
||||
dxbc::Src::R(1, dxbc::Src::kWWWW),
|
||||
dxbc::Src::LF(1.0f / 3.0f));
|
||||
break;
|
||||
} break;
|
||||
case xenos::ColorRenderTargetFormat::k_16_16:
|
||||
case xenos::ColorRenderTargetFormat::k_16_16_FLOAT:
|
||||
case xenos::ColorRenderTargetFormat::k_16_16_FLOAT: {
|
||||
// All 16 bits per component formats are represented as integers
|
||||
// in ownership transfer for safe handling of NaNs and
|
||||
// -32768 / -32767.
|
||||
a.OpUBFE(dxbc::Dest::O(0, 0b0011), dxbc::Src::LU(16),
|
||||
dxbc::Src::LU(0, 16, 0, 0),
|
||||
dxbc::Src::R(1, dxbc::Src::kXXXX));
|
||||
break;
|
||||
case xenos::ColorRenderTargetFormat::k_32_FLOAT:
|
||||
} break;
|
||||
case xenos::ColorRenderTargetFormat::k_32_FLOAT: {
|
||||
// Already as a 32-bit value.
|
||||
a.OpMov(dxbc::Dest::O(0, 0b0001),
|
||||
dxbc::Src::R(1, dxbc::Src::kXXXX));
|
||||
break;
|
||||
} break;
|
||||
default:
|
||||
// A 64bpp format (handled separately) or an invalid one.
|
||||
assert_unhandled_case(dest_color_format);
|
||||
|
@ -4178,7 +4181,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
// using r0.z as a temporary and check if it matches the value in
|
||||
// the currently owning guest render target.
|
||||
switch (dest_depth_format) {
|
||||
case xenos::DepthRenderTargetFormat::kD24S8:
|
||||
case xenos::DepthRenderTargetFormat::kD24S8: {
|
||||
// Round to the nearest even integer. This seems to be the
|
||||
// correct, adding +0.5 and rounding towards zero results in red
|
||||
// instead of black in the 4D5307E6 clear shader.
|
||||
|
@ -4189,11 +4192,11 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
dxbc::Src::R(0, dxbc::Src::kYYYY));
|
||||
a.OpFToU(dxbc::Dest::R(0, 0b0010),
|
||||
dxbc::Src::R(0, dxbc::Src::kYYYY));
|
||||
break;
|
||||
case xenos::DepthRenderTargetFormat::kD24FS8:
|
||||
} break;
|
||||
case xenos::DepthRenderTargetFormat::kD24FS8: {
|
||||
DxbcShaderTranslator::PreClampedDepthTo20e4(a, 0, 1, 0, 0, 0, 2,
|
||||
true);
|
||||
break;
|
||||
} break;
|
||||
}
|
||||
a.OpIEq(dxbc::Dest::R(0, 0b0010), dxbc::Src::R(0, dxbc::Src::kYYYY),
|
||||
dxbc::Src::R(1, dxbc::Src::kWWWW));
|
||||
|
@ -4206,7 +4209,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
}
|
||||
// Convert using r0.x as a temporary.
|
||||
switch (dest_depth_format) {
|
||||
case xenos::DepthRenderTargetFormat::kD24S8:
|
||||
case xenos::DepthRenderTargetFormat::kD24S8: {
|
||||
// Multiplying by 1.0 / 0xFFFFFF produces an incorrect result (for
|
||||
// 0xC00000, for instance - which is 2_10_10_10 clear to 0001) -
|
||||
// rescale from 0...0xFFFFFF to 0...0x1000000 doing what true
|
||||
|
@ -4223,11 +4226,11 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
a.OpMul(dxbc::Dest::R(1, 0b1000),
|
||||
dxbc::Src::R(1, dxbc::Src::kWWWW),
|
||||
dxbc::Src::LF(1.0f / float(1 << 24)));
|
||||
break;
|
||||
case xenos::DepthRenderTargetFormat::kD24FS8:
|
||||
} break;
|
||||
case xenos::DepthRenderTargetFormat::kD24FS8: {
|
||||
DxbcShaderTranslator::Depth20e4To32(a, dxbc::Dest::R(1, 0b1000),
|
||||
1, 3, 0, 1, 3, 0, 0, true);
|
||||
break;
|
||||
} break;
|
||||
}
|
||||
// Host depth is different, or not available - convert the guest depth
|
||||
// to the destination format.
|
||||
|
@ -4388,14 +4391,8 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
D3D12_STENCIL_OP_REPLACE;
|
||||
pipeline_desc.DepthStencilState.FrontFace.StencilFunc =
|
||||
D3D12_COMPARISON_FUNC_ALWAYS;
|
||||
pipeline_desc.DepthStencilState.BackFace.StencilFailOp =
|
||||
D3D12_STENCIL_OP_KEEP;
|
||||
pipeline_desc.DepthStencilState.BackFace.StencilDepthFailOp =
|
||||
D3D12_STENCIL_OP_KEEP;
|
||||
pipeline_desc.DepthStencilState.BackFace.StencilPassOp =
|
||||
D3D12_STENCIL_OP_REPLACE;
|
||||
pipeline_desc.DepthStencilState.BackFace.StencilFunc =
|
||||
D3D12_COMPARISON_FUNC_ALWAYS;
|
||||
pipeline_desc.DepthStencilState.BackFace =
|
||||
pipeline_desc.DepthStencilState.FrontFace;
|
||||
pipeline_desc.DSVFormat = GetDepthDSVDXGIFormat(dest_depth_format);
|
||||
// Even if creation fails, still store the null pointers not to try to
|
||||
// create again.
|
||||
|
@ -4448,14 +4445,8 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
// stencil being different.
|
||||
pipeline_desc.DepthStencilState.FrontFace.StencilFunc =
|
||||
D3D12_COMPARISON_FUNC_ALWAYS;
|
||||
pipeline_desc.DepthStencilState.BackFace.StencilFailOp =
|
||||
D3D12_STENCIL_OP_KEEP;
|
||||
pipeline_desc.DepthStencilState.BackFace.StencilDepthFailOp =
|
||||
D3D12_STENCIL_OP_REPLACE;
|
||||
pipeline_desc.DepthStencilState.BackFace.StencilPassOp =
|
||||
D3D12_STENCIL_OP_REPLACE;
|
||||
pipeline_desc.DepthStencilState.BackFace.StencilFunc =
|
||||
D3D12_COMPARISON_FUNC_ALWAYS;
|
||||
pipeline_desc.DepthStencilState.BackFace =
|
||||
pipeline_desc.DepthStencilState.FrontFace;
|
||||
}
|
||||
pipeline_desc.DSVFormat = GetDepthDSVDXGIFormat(dest_depth_format);
|
||||
}
|
||||
|
@ -5285,13 +5276,15 @@ void D3D12RenderTargetCache::PerformTransfersAndResolveClears(
|
|||
if (transfer_root_parameters_used &
|
||||
kTransferUsedRootParameterHostDepthAddressConstantBit) {
|
||||
assert_not_null(host_depth_source_d3d12_rt);
|
||||
RenderTargetKey host_depth_source_rt_key =
|
||||
host_depth_source_d3d12_rt->key();
|
||||
TransferAddressConstant host_depth_address_constant;
|
||||
host_depth_address_constant.dest_pitch = dest_pitch_tiles;
|
||||
host_depth_address_constant.source_pitch =
|
||||
host_depth_source_d3d12_rt->key().GetPitchTiles();
|
||||
host_depth_source_rt_key.GetPitchTiles();
|
||||
host_depth_address_constant.source_to_dest =
|
||||
int32_t(dest_rt_key.base_tiles) -
|
||||
int32_t(host_depth_source_d3d12_rt->key().base_tiles);
|
||||
int32_t(host_depth_source_rt_key.base_tiles);
|
||||
if (last_host_depth_address_constant != host_depth_address_constant) {
|
||||
last_host_depth_address_constant = host_depth_address_constant;
|
||||
transfer_root_parameters_set &=
|
||||
|
@ -5982,7 +5975,8 @@ ID3D12PipelineState* D3D12RenderTargetCache::GetOrCreateDumpPipeline(
|
|||
a.OpDclInput(dxbc::Dest::VThreadID(0b0011));
|
||||
// r0 - addressing before the load, then addressing and conversion scratch
|
||||
// r1 - addressing scratch before the load, then data
|
||||
a.OpDclTemps(2);
|
||||
stat.temp_register_count = 2;
|
||||
a.OpDclTemps(stat.temp_register_count);
|
||||
// There's no strict dependency on the group size here, for simplicity of
|
||||
// calculations especially with resolution scaling, dividing manually (as the
|
||||
// group size is not unlimited). The only restriction is that an integer
|
||||
|
@ -6367,8 +6361,6 @@ ID3D12PipelineState* D3D12RenderTargetCache::GetOrCreateDumpPipeline(
|
|||
case xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16:
|
||||
// Float16 has a wider range for both color and alpha, also NaNs.
|
||||
// Color - clamp and convert.
|
||||
stat.temp_register_count =
|
||||
std::max(stat.temp_register_count, uint32_t(3));
|
||||
// Convert red in r1.x to the result register r1.x - the same, but
|
||||
// UnclampedFloat32To7e3 allows that - using r0.x as a temporary.
|
||||
DxbcShaderTranslator::UnclampedFloat32To7e3(a, 1, 0, 1, 0, 0, 0);
|
||||
|
|
|
@ -126,95 +126,6 @@ class D3D12RenderTargetCache final : public RenderTargetCache {
|
|||
xenos::DepthRenderTargetFormat format);
|
||||
|
||||
protected:
|
||||
class D3D12RenderTarget final : public RenderTarget {
|
||||
public:
|
||||
// descriptor_draw_srgb is only used for k_8_8_8_8 render targets when host
|
||||
// sRGB (gamma_render_target_as_srgb) is used. descriptor_load is present
|
||||
// when the DXGI formats are different for drawing and bit-exact loading
|
||||
// (for NaN pattern preservation across EDRAM tile ownership transfers in
|
||||
// floating-point formats, and to distinguish between two -1 representations
|
||||
// in snorm formats).
|
||||
D3D12RenderTarget(
|
||||
RenderTargetKey key, ID3D12Resource* resource,
|
||||
ui::d3d12::D3D12CpuDescriptorPool::Descriptor&& descriptor_draw,
|
||||
ui::d3d12::D3D12CpuDescriptorPool::Descriptor&& descriptor_draw_srgb,
|
||||
ui::d3d12::D3D12CpuDescriptorPool::Descriptor&&
|
||||
descriptor_load_separate,
|
||||
ui::d3d12::D3D12CpuDescriptorPool::Descriptor&& descriptor_srv,
|
||||
ui::d3d12::D3D12CpuDescriptorPool::Descriptor&& descriptor_srv_stencil,
|
||||
D3D12_RESOURCE_STATES resource_state)
|
||||
: RenderTarget(key),
|
||||
resource_(resource),
|
||||
descriptor_draw_(std::move(descriptor_draw)),
|
||||
descriptor_draw_srgb_(std::move(descriptor_draw_srgb)),
|
||||
descriptor_load_separate_(std::move(descriptor_load_separate)),
|
||||
descriptor_srv_(std::move(descriptor_srv)),
|
||||
descriptor_srv_stencil_(std::move(descriptor_srv_stencil)),
|
||||
resource_state_(resource_state) {}
|
||||
|
||||
ID3D12Resource* resource() const { return resource_.Get(); }
|
||||
const ui::d3d12::D3D12CpuDescriptorPool::Descriptor& descriptor_draw()
|
||||
const {
|
||||
return descriptor_draw_;
|
||||
}
|
||||
const ui::d3d12::D3D12CpuDescriptorPool::Descriptor& descriptor_draw_srgb()
|
||||
const {
|
||||
return descriptor_draw_srgb_;
|
||||
}
|
||||
const ui::d3d12::D3D12CpuDescriptorPool::Descriptor& descriptor_srv()
|
||||
const {
|
||||
return descriptor_srv_;
|
||||
}
|
||||
const ui::d3d12::D3D12CpuDescriptorPool::Descriptor&
|
||||
descriptor_srv_stencil() const {
|
||||
return descriptor_srv_stencil_;
|
||||
}
|
||||
const ui::d3d12::D3D12CpuDescriptorPool::Descriptor&
|
||||
descriptor_load_separate() const {
|
||||
return descriptor_load_separate_;
|
||||
}
|
||||
|
||||
D3D12_RESOURCE_STATES SetResourceState(D3D12_RESOURCE_STATES new_state) {
|
||||
D3D12_RESOURCE_STATES old_state = resource_state_;
|
||||
resource_state_ = new_state;
|
||||
return old_state;
|
||||
}
|
||||
|
||||
uint32_t temporary_srv_descriptor_index() const {
|
||||
return temporary_srv_descriptor_index_;
|
||||
}
|
||||
void SetTemporarySRVDescriptorIndex(uint32_t index) {
|
||||
temporary_srv_descriptor_index_ = index;
|
||||
}
|
||||
uint32_t temporary_srv_descriptor_index_stencil() const {
|
||||
return temporary_srv_descriptor_index_stencil_;
|
||||
}
|
||||
void SetTemporarySRVDescriptorIndexStencil(uint32_t index) {
|
||||
temporary_srv_descriptor_index_stencil_ = index;
|
||||
}
|
||||
uint32_t temporary_sort_index() const { return temporary_sort_index_; }
|
||||
void SetTemporarySortIndex(uint32_t index) {
|
||||
temporary_sort_index_ = index;
|
||||
}
|
||||
|
||||
private:
|
||||
Microsoft::WRL::ComPtr<ID3D12Resource> resource_;
|
||||
ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_draw_;
|
||||
ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_draw_srgb_;
|
||||
ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_load_separate_;
|
||||
// Texture SRV non-shader-visible descriptors, to prepare shader-visible
|
||||
// descriptors faster, by copying rather than by creating every time.
|
||||
// TODO(Triang3l): With bindless resources, persistently store them in the
|
||||
// heap.
|
||||
ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_srv_;
|
||||
ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_srv_stencil_;
|
||||
D3D12_RESOURCE_STATES resource_state_;
|
||||
// Temporary storage for indices in operations like transfers and dumps.
|
||||
uint32_t temporary_srv_descriptor_index_ = UINT32_MAX;
|
||||
uint32_t temporary_srv_descriptor_index_stencil_ = UINT32_MAX;
|
||||
uint32_t temporary_sort_index_ = 0;
|
||||
};
|
||||
|
||||
uint32_t GetMaxRenderTargetWidth() const override {
|
||||
return D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION;
|
||||
}
|
||||
|
@ -310,6 +221,95 @@ class D3D12RenderTargetCache final : public RenderTargetCache {
|
|||
|
||||
// For host render targets.
|
||||
|
||||
class D3D12RenderTarget final : public RenderTarget {
|
||||
public:
|
||||
// descriptor_draw_srgb is only used for k_8_8_8_8 render targets when host
|
||||
// sRGB (gamma_render_target_as_srgb) is used. descriptor_load is present
|
||||
// when the DXGI formats are different for drawing and bit-exact loading
|
||||
// (for NaN pattern preservation across EDRAM tile ownership transfers in
|
||||
// floating-point formats, and to distinguish between two -1 representations
|
||||
// in snorm formats).
|
||||
D3D12RenderTarget(
|
||||
RenderTargetKey key, ID3D12Resource* resource,
|
||||
ui::d3d12::D3D12CpuDescriptorPool::Descriptor&& descriptor_draw,
|
||||
ui::d3d12::D3D12CpuDescriptorPool::Descriptor&& descriptor_draw_srgb,
|
||||
ui::d3d12::D3D12CpuDescriptorPool::Descriptor&&
|
||||
descriptor_load_separate,
|
||||
ui::d3d12::D3D12CpuDescriptorPool::Descriptor&& descriptor_srv,
|
||||
ui::d3d12::D3D12CpuDescriptorPool::Descriptor&& descriptor_srv_stencil,
|
||||
D3D12_RESOURCE_STATES resource_state)
|
||||
: RenderTarget(key),
|
||||
resource_(resource),
|
||||
descriptor_draw_(std::move(descriptor_draw)),
|
||||
descriptor_draw_srgb_(std::move(descriptor_draw_srgb)),
|
||||
descriptor_load_separate_(std::move(descriptor_load_separate)),
|
||||
descriptor_srv_(std::move(descriptor_srv)),
|
||||
descriptor_srv_stencil_(std::move(descriptor_srv_stencil)),
|
||||
resource_state_(resource_state) {}
|
||||
|
||||
ID3D12Resource* resource() const { return resource_.Get(); }
|
||||
const ui::d3d12::D3D12CpuDescriptorPool::Descriptor& descriptor_draw()
|
||||
const {
|
||||
return descriptor_draw_;
|
||||
}
|
||||
const ui::d3d12::D3D12CpuDescriptorPool::Descriptor& descriptor_draw_srgb()
|
||||
const {
|
||||
return descriptor_draw_srgb_;
|
||||
}
|
||||
const ui::d3d12::D3D12CpuDescriptorPool::Descriptor& descriptor_srv()
|
||||
const {
|
||||
return descriptor_srv_;
|
||||
}
|
||||
const ui::d3d12::D3D12CpuDescriptorPool::Descriptor&
|
||||
descriptor_srv_stencil() const {
|
||||
return descriptor_srv_stencil_;
|
||||
}
|
||||
const ui::d3d12::D3D12CpuDescriptorPool::Descriptor&
|
||||
descriptor_load_separate() const {
|
||||
return descriptor_load_separate_;
|
||||
}
|
||||
|
||||
D3D12_RESOURCE_STATES SetResourceState(D3D12_RESOURCE_STATES new_state) {
|
||||
D3D12_RESOURCE_STATES old_state = resource_state_;
|
||||
resource_state_ = new_state;
|
||||
return old_state;
|
||||
}
|
||||
|
||||
uint32_t temporary_srv_descriptor_index() const {
|
||||
return temporary_srv_descriptor_index_;
|
||||
}
|
||||
void SetTemporarySRVDescriptorIndex(uint32_t index) {
|
||||
temporary_srv_descriptor_index_ = index;
|
||||
}
|
||||
uint32_t temporary_srv_descriptor_index_stencil() const {
|
||||
return temporary_srv_descriptor_index_stencil_;
|
||||
}
|
||||
void SetTemporarySRVDescriptorIndexStencil(uint32_t index) {
|
||||
temporary_srv_descriptor_index_stencil_ = index;
|
||||
}
|
||||
uint32_t temporary_sort_index() const { return temporary_sort_index_; }
|
||||
void SetTemporarySortIndex(uint32_t index) {
|
||||
temporary_sort_index_ = index;
|
||||
}
|
||||
|
||||
private:
|
||||
Microsoft::WRL::ComPtr<ID3D12Resource> resource_;
|
||||
ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_draw_;
|
||||
ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_draw_srgb_;
|
||||
ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_load_separate_;
|
||||
// Texture SRV non-shader-visible descriptors, to prepare shader-visible
|
||||
// descriptors faster, by copying rather than by creating every time.
|
||||
// TODO(Triang3l): With bindless resources, persistently store them in the
|
||||
// heap.
|
||||
ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_srv_;
|
||||
ui::d3d12::D3D12CpuDescriptorPool::Descriptor descriptor_srv_stencil_;
|
||||
D3D12_RESOURCE_STATES resource_state_;
|
||||
// Temporary storage for indices in operations like transfers and dumps.
|
||||
uint32_t temporary_srv_descriptor_index_ = UINT32_MAX;
|
||||
uint32_t temporary_srv_descriptor_index_stencil_ = UINT32_MAX;
|
||||
uint32_t temporary_sort_index_ = 0;
|
||||
};
|
||||
|
||||
enum TransferCBVRegister : uint32_t {
|
||||
kTransferCBVRegisterStencilMask,
|
||||
kTransferCBVRegisterAddress,
|
||||
|
@ -438,7 +438,7 @@ class D3D12RenderTargetCache final : public RenderTargetCache {
|
|||
|
||||
// Last bits because this affects the root signature - after sorting, only
|
||||
// change it as fewer times as possible. Depth buffers have an additional
|
||||
// depth SRV.
|
||||
// stencil SRV.
|
||||
static_assert(size_t(TransferMode::kCount) <= (size_t(1) << 3));
|
||||
TransferMode mode : 3;
|
||||
};
|
||||
|
|
|
@ -536,7 +536,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
// range to 20e4 floating point, with zeros in bits 24:31, rounding to the
|
||||
// nearest even. Source and destination may be the same, temporary must be
|
||||
// different than both. If remap_from_0_to_0_5 is true, it's assumed that
|
||||
// 0...1 is pre-remapped to 0...0.5 on the input.
|
||||
// 0...1 is pre-remapped to 0...0.5 in the input.
|
||||
static void PreClampedDepthTo20e4(
|
||||
dxbc::Assembler& a, uint32_t f24_temp, uint32_t f24_temp_component,
|
||||
uint32_t f32_temp, uint32_t f32_temp_component, uint32_t temp_temp,
|
||||
|
|
|
@ -3140,7 +3140,7 @@ void DxbcShaderTranslator::PreClampedFloat32To7e3(
|
|||
// https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp
|
||||
// Assuming the color is already clamped to [0, 31.875].
|
||||
|
||||
// Check if the number is too small to be represented as normalized 20e4.
|
||||
// Check if the number is too small to be represented as normalized 7e3.
|
||||
// temp = f32 < 2^-2
|
||||
a.OpULT(temp_dest, f32_src, dxbc::Src::LU(0x3E800000));
|
||||
// Handle denormalized numbers separately.
|
||||
|
|
Loading…
Reference in New Issue