[GPU] Fix host_depth_store_2xmsaa CS after 2x MSAA sample swap
This commit is contained in:
parent
e48a678ac7
commit
8e83042bdf
|
@ -4604,11 +4604,8 @@ void D3D12RenderTargetCache::PerformTransfersAndResolveClears(
|
|||
dest_rt_key.pitch_tiles_at_32bpp;
|
||||
host_depth_store_render_target_constant.resolution_scale =
|
||||
resolution_scale_;
|
||||
host_depth_store_render_target_constant.second_sample_index =
|
||||
(dest_rt_key.msaa_samples == xenos::MsaaSamples::k2X &&
|
||||
!msaa_2x_supported_)
|
||||
? 3
|
||||
: 1;
|
||||
host_depth_store_render_target_constant.msaa_2x_supported =
|
||||
uint32_t(msaa_2x_supported_);
|
||||
command_list.D3DSetComputeRoot32BitConstants(
|
||||
kHostDepthStoreRootParameterRenderTargetConstant,
|
||||
sizeof(host_depth_store_render_target_constant) / sizeof(uint32_t),
|
||||
|
|
|
@ -536,8 +536,8 @@ class D3D12RenderTargetCache final : public RenderTargetCache {
|
|||
uint32_t pitch_tiles : xenos::kEdramPitchTilesBits;
|
||||
// 1 to 3.
|
||||
uint32_t resolution_scale : 2;
|
||||
// For native 2x MSAA vs. 2x over 4x.
|
||||
uint32_t second_sample_index : 2;
|
||||
// Whether 2x MSAA is supported natively rather than through 4x.
|
||||
uint32_t msaa_2x_supported : 1;
|
||||
};
|
||||
uint32_t constant = 0;
|
||||
};
|
||||
|
|
|
@ -77,8 +77,9 @@ imul null, r0.w, r2.z, r2.x
|
|||
imad r0.y, r0.z, r2.x, r0.y
|
||||
imad r0.x, r0.x, r0.w, r0.y
|
||||
ushr r0.x, r0.x, l(2)
|
||||
ubfe r0.y, l(2), l(12), CB1[1][0].x
|
||||
movc r0.y, r2.y, r0.y, l(0)
|
||||
ubfe r0.y, l(1), l(12), CB1[1][0].x
|
||||
movc r0.zw, r2.yyyy, l(0,0,0,3), l(0,0,1,0)
|
||||
movc r0.y, r0.y, r0.z, r0.w
|
||||
mov r1.w, l(0)
|
||||
ldms r2.x, r1.xyww, T0[0].xyzw, r0.y
|
||||
iadd r3.xyzw, r1.xyxy, l(2, 0, 1, 0)
|
||||
|
@ -106,20 +107,20 @@ mov r1.zw, l(0,0,0,0)
|
|||
ldms r2.w, r1.xyzw, T0[0].yzwx, r0.y
|
||||
store_uav_typed U0[0].xyzw, r0.zzzz, r2.xyzw
|
||||
ret
|
||||
// Approximately 55 instruction slots used
|
||||
// Approximately 56 instruction slots used
|
||||
#endif
|
||||
|
||||
const BYTE host_depth_store_2xmsaa_cs[] =
|
||||
{
|
||||
68, 88, 66, 67, 70, 151,
|
||||
47, 41, 106, 214, 147, 230,
|
||||
77, 220, 169, 203, 166, 115,
|
||||
42, 93, 1, 0, 0, 0,
|
||||
248, 10, 0, 0, 5, 0,
|
||||
68, 88, 66, 67, 15, 231,
|
||||
223, 186, 190, 135, 229, 39,
|
||||
211, 185, 26, 121, 39, 17,
|
||||
25, 229, 1, 0, 0, 0,
|
||||
52, 11, 0, 0, 5, 0,
|
||||
0, 0, 52, 0, 0, 0,
|
||||
172, 2, 0, 0, 188, 2,
|
||||
0, 0, 204, 2, 0, 0,
|
||||
92, 10, 0, 0, 82, 68,
|
||||
152, 10, 0, 0, 82, 68,
|
||||
69, 70, 112, 2, 0, 0,
|
||||
2, 0, 0, 0, 92, 1,
|
||||
0, 0, 4, 0, 0, 0,
|
||||
|
@ -231,8 +232,8 @@ const BYTE host_depth_store_2xmsaa_cs[] =
|
|||
71, 78, 8, 0, 0, 0,
|
||||
0, 0, 0, 0, 8, 0,
|
||||
0, 0, 83, 72, 69, 88,
|
||||
136, 7, 0, 0, 81, 0,
|
||||
5, 0, 226, 1, 0, 0,
|
||||
196, 7, 0, 0, 81, 0,
|
||||
5, 0, 241, 1, 0, 0,
|
||||
106, 8, 0, 1, 89, 0,
|
||||
0, 7, 70, 142, 48, 0,
|
||||
0, 0, 0, 0, 0, 0,
|
||||
|
@ -395,17 +396,27 @@ const BYTE host_depth_store_2xmsaa_cs[] =
|
|||
0, 0, 2, 0, 0, 0,
|
||||
138, 0, 0, 11, 34, 0,
|
||||
16, 0, 0, 0, 0, 0,
|
||||
1, 64, 0, 0, 2, 0,
|
||||
1, 64, 0, 0, 1, 0,
|
||||
0, 0, 1, 64, 0, 0,
|
||||
12, 0, 0, 0, 10, 128,
|
||||
48, 0, 1, 0, 0, 0,
|
||||
1, 0, 0, 0, 0, 0,
|
||||
0, 0, 55, 0, 0, 15,
|
||||
194, 0, 16, 0, 0, 0,
|
||||
0, 0, 86, 5, 16, 0,
|
||||
2, 0, 0, 0, 2, 64,
|
||||
0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0,
|
||||
0, 0, 3, 0, 0, 0,
|
||||
2, 64, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0,
|
||||
1, 0, 0, 0, 0, 0,
|
||||
0, 0, 55, 0, 0, 9,
|
||||
34, 0, 16, 0, 0, 0,
|
||||
0, 0, 26, 0, 16, 0,
|
||||
2, 0, 0, 0, 26, 0,
|
||||
0, 0, 0, 0, 42, 0,
|
||||
16, 0, 0, 0, 0, 0,
|
||||
1, 64, 0, 0, 0, 0,
|
||||
58, 0, 16, 0, 0, 0,
|
||||
0, 0, 54, 0, 0, 5,
|
||||
130, 0, 16, 0, 1, 0,
|
||||
0, 0, 1, 64, 0, 0,
|
||||
|
@ -554,7 +565,7 @@ const BYTE host_depth_store_2xmsaa_cs[] =
|
|||
70, 14, 16, 0, 2, 0,
|
||||
0, 0, 62, 0, 0, 1,
|
||||
83, 84, 65, 84, 148, 0,
|
||||
0, 0, 55, 0, 0, 0,
|
||||
0, 0, 56, 0, 0, 0,
|
||||
5, 0, 0, 0, 0, 0,
|
||||
0, 0, 1, 0, 0, 0,
|
||||
0, 0, 0, 0, 16, 0,
|
||||
|
@ -567,7 +578,7 @@ const BYTE host_depth_store_2xmsaa_cs[] =
|
|||
0, 0, 8, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0,
|
||||
13, 0, 0, 0, 1, 0,
|
||||
13, 0, 0, 0, 2, 0,
|
||||
0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0,
|
||||
|
|
|
@ -30,8 +30,8 @@ uint XeHostDepthStoreResolutionScale() {
|
|||
return (xe_host_depth_store_render_target >> 10u) & 0x3u;
|
||||
}
|
||||
|
||||
uint XeHostDepthStoreSecondSampleIndex() {
|
||||
return (xe_host_depth_store_render_target >> 12u) & 0x3u;
|
||||
bool XeHostDepthStoreMsaa2xSupported() {
|
||||
return bool((xe_host_depth_store_render_target >> 12u) & 0x1u);
|
||||
}
|
||||
|
||||
// 40-sample columns are not swapped for addressing simplicity (because this is
|
||||
|
|
|
@ -21,8 +21,11 @@ void main(uint3 xe_thread_id : SV_DispatchThreadID) {
|
|||
kXenosMsaaSamples_2X, false, 0u, dest_sample_index,
|
||||
resolution_scale)
|
||||
>> 2u;
|
||||
// Top and bottom to Direct3D 10.1+ top 1 and bottom 0 (for 2x) or top-left 0
|
||||
// and bottom-right 3 (for 4x).
|
||||
int source_sample_index =
|
||||
int(dest_sample_index != 0u ? XeHostDepthStoreSecondSampleIndex() : 0u);
|
||||
XeHostDepthStoreMsaa2xSupported() ? (dest_sample_index ? 0u : 1u)
|
||||
: (dest_sample_index ? 3u : 0u);
|
||||
xe_host_depth_store_dest[edram_address_int4s] = asuint(float4(
|
||||
xe_host_depth_store_source.Load(int2(pixel_index), source_sample_index),
|
||||
xe_host_depth_store_source.Load(int2(pixel_index) + int2(1, 0),
|
||||
|
|
Loading…
Reference in New Issue