Merge branch 'master' into vulkan

This commit is contained in:
Triang3l 2021-06-14 19:13:15 +03:00
commit 962f7daeb2
6 changed files with 44 additions and 33 deletions

View File

@ -4604,11 +4604,8 @@ void D3D12RenderTargetCache::PerformTransfersAndResolveClears(
dest_rt_key.pitch_tiles_at_32bpp; dest_rt_key.pitch_tiles_at_32bpp;
host_depth_store_render_target_constant.resolution_scale = host_depth_store_render_target_constant.resolution_scale =
resolution_scale_; resolution_scale_;
host_depth_store_render_target_constant.second_sample_index = host_depth_store_render_target_constant.msaa_2x_supported =
(dest_rt_key.msaa_samples == xenos::MsaaSamples::k2X && uint32_t(msaa_2x_supported_);
!msaa_2x_supported_)
? 3
: 1;
command_list.D3DSetComputeRoot32BitConstants( command_list.D3DSetComputeRoot32BitConstants(
kHostDepthStoreRootParameterRenderTargetConstant, kHostDepthStoreRootParameterRenderTargetConstant,
sizeof(host_depth_store_render_target_constant) / sizeof(uint32_t), sizeof(host_depth_store_render_target_constant) / sizeof(uint32_t),

View File

@ -536,8 +536,8 @@ class D3D12RenderTargetCache final : public RenderTargetCache {
uint32_t pitch_tiles : xenos::kEdramPitchTilesBits; uint32_t pitch_tiles : xenos::kEdramPitchTilesBits;
// 1 to 3. // 1 to 3.
uint32_t resolution_scale : 2; uint32_t resolution_scale : 2;
// For native 2x MSAA vs. 2x over 4x. // Whether 2x MSAA is supported natively rather than through 4x.
uint32_t second_sample_index : 2; uint32_t msaa_2x_supported : 1;
}; };
uint32_t constant = 0; uint32_t constant = 0;
}; };

View File

@ -801,10 +801,10 @@ bool RenderTargetCache::Update(bool is_rasterization_done,
uint32_t RenderTargetCache::GetLastUpdateBoundRenderTargets( uint32_t RenderTargetCache::GetLastUpdateBoundRenderTargets(
bool distinguish_gamma_formats, bool distinguish_gamma_formats,
uint32_t* depth_and_color_resource_formats_out) const { uint32_t* depth_and_color_formats_out) const {
if (GetPath() != Path::kHostRenderTargets) { if (GetPath() != Path::kHostRenderTargets) {
if (depth_and_color_resource_formats_out) { if (depth_and_color_formats_out) {
std::memset(depth_and_color_resource_formats_out, 0, std::memset(depth_and_color_formats_out, 0,
sizeof(uint32_t) * (1 + xenos::kMaxColorRenderTargets)); sizeof(uint32_t) * (1 + xenos::kMaxColorRenderTargets));
} }
return 0; return 0;
@ -814,14 +814,14 @@ uint32_t RenderTargetCache::GetLastUpdateBoundRenderTargets(
const RenderTarget* render_target = const RenderTarget* render_target =
last_update_accumulated_render_targets_[i]; last_update_accumulated_render_targets_[i];
if (!render_target) { if (!render_target) {
if (depth_and_color_resource_formats_out) { if (depth_and_color_formats_out) {
depth_and_color_resource_formats_out[i] = 0; depth_and_color_formats_out[i] = 0;
} }
continue; continue;
} }
rts_used |= uint32_t(1) << i; rts_used |= uint32_t(1) << i;
if (depth_and_color_resource_formats_out) { if (depth_and_color_formats_out) {
depth_and_color_resource_formats_out[i] = depth_and_color_formats_out[i] =
(distinguish_gamma_formats && i && (distinguish_gamma_formats && i &&
(last_update_accumulated_color_targets_are_gamma_ & (last_update_accumulated_color_targets_are_gamma_ &
(uint32_t(1) << (i - 1)))) (uint32_t(1) << (i - 1))))

View File

@ -77,8 +77,9 @@ imul null, r0.w, r2.z, r2.x
imad r0.y, r0.z, r2.x, r0.y imad r0.y, r0.z, r2.x, r0.y
imad r0.x, r0.x, r0.w, r0.y imad r0.x, r0.x, r0.w, r0.y
ushr r0.x, r0.x, l(2) ushr r0.x, r0.x, l(2)
ubfe r0.y, l(2), l(12), CB1[1][0].x ubfe r0.y, l(1), l(12), CB1[1][0].x
movc r0.y, r2.y, r0.y, l(0) movc r0.zw, r2.yyyy, l(0,0,0,3), l(0,0,1,0)
movc r0.y, r0.y, r0.z, r0.w
mov r1.w, l(0) mov r1.w, l(0)
ldms r2.x, r1.xyww, T0[0].xyzw, r0.y ldms r2.x, r1.xyww, T0[0].xyzw, r0.y
iadd r3.xyzw, r1.xyxy, l(2, 0, 1, 0) iadd r3.xyzw, r1.xyxy, l(2, 0, 1, 0)
@ -106,20 +107,20 @@ mov r1.zw, l(0,0,0,0)
ldms r2.w, r1.xyzw, T0[0].yzwx, r0.y ldms r2.w, r1.xyzw, T0[0].yzwx, r0.y
store_uav_typed U0[0].xyzw, r0.zzzz, r2.xyzw store_uav_typed U0[0].xyzw, r0.zzzz, r2.xyzw
ret ret
// Approximately 55 instruction slots used // Approximately 56 instruction slots used
#endif #endif
const BYTE host_depth_store_2xmsaa_cs[] = const BYTE host_depth_store_2xmsaa_cs[] =
{ {
68, 88, 66, 67, 70, 151, 68, 88, 66, 67, 15, 231,
47, 41, 106, 214, 147, 230, 223, 186, 190, 135, 229, 39,
77, 220, 169, 203, 166, 115, 211, 185, 26, 121, 39, 17,
42, 93, 1, 0, 0, 0, 25, 229, 1, 0, 0, 0,
248, 10, 0, 0, 5, 0, 52, 11, 0, 0, 5, 0,
0, 0, 52, 0, 0, 0, 0, 0, 52, 0, 0, 0,
172, 2, 0, 0, 188, 2, 172, 2, 0, 0, 188, 2,
0, 0, 204, 2, 0, 0, 0, 0, 204, 2, 0, 0,
92, 10, 0, 0, 82, 68, 152, 10, 0, 0, 82, 68,
69, 70, 112, 2, 0, 0, 69, 70, 112, 2, 0, 0,
2, 0, 0, 0, 92, 1, 2, 0, 0, 0, 92, 1,
0, 0, 4, 0, 0, 0, 0, 0, 4, 0, 0, 0,
@ -231,8 +232,8 @@ const BYTE host_depth_store_2xmsaa_cs[] =
71, 78, 8, 0, 0, 0, 71, 78, 8, 0, 0, 0,
0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 8, 0,
0, 0, 83, 72, 69, 88, 0, 0, 83, 72, 69, 88,
136, 7, 0, 0, 81, 0, 196, 7, 0, 0, 81, 0,
5, 0, 226, 1, 0, 0, 5, 0, 241, 1, 0, 0,
106, 8, 0, 1, 89, 0, 106, 8, 0, 1, 89, 0,
0, 7, 70, 142, 48, 0, 0, 7, 70, 142, 48, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@ -395,17 +396,27 @@ const BYTE host_depth_store_2xmsaa_cs[] =
0, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0,
138, 0, 0, 11, 34, 0, 138, 0, 0, 11, 34, 0,
16, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0,
1, 64, 0, 0, 2, 0, 1, 64, 0, 0, 1, 0,
0, 0, 1, 64, 0, 0, 0, 0, 1, 64, 0, 0,
12, 0, 0, 0, 10, 128, 12, 0, 0, 0, 10, 128,
48, 0, 1, 0, 0, 0, 48, 0, 1, 0, 0, 0,
1, 0, 0, 0, 0, 0,
0, 0, 55, 0, 0, 15,
194, 0, 16, 0, 0, 0,
0, 0, 86, 5, 16, 0,
2, 0, 0, 0, 2, 64,
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 3, 0, 0, 0,
2, 64, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 55, 0, 0, 9, 0, 0, 55, 0, 0, 9,
34, 0, 16, 0, 0, 0, 34, 0, 16, 0, 0, 0,
0, 0, 26, 0, 16, 0, 0, 0, 26, 0, 16, 0,
2, 0, 0, 0, 26, 0, 0, 0, 0, 0, 42, 0,
16, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0,
1, 64, 0, 0, 0, 0, 58, 0, 16, 0, 0, 0,
0, 0, 54, 0, 0, 5, 0, 0, 54, 0, 0, 5,
130, 0, 16, 0, 1, 0, 130, 0, 16, 0, 1, 0,
0, 0, 1, 64, 0, 0, 0, 0, 1, 64, 0, 0,
@ -554,7 +565,7 @@ const BYTE host_depth_store_2xmsaa_cs[] =
70, 14, 16, 0, 2, 0, 70, 14, 16, 0, 2, 0,
0, 0, 62, 0, 0, 1, 0, 0, 62, 0, 0, 1,
83, 84, 65, 84, 148, 0, 83, 84, 65, 84, 148, 0,
0, 0, 55, 0, 0, 0, 0, 0, 56, 0, 0, 0,
5, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 16, 0,
@ -567,7 +578,7 @@ const BYTE host_depth_store_2xmsaa_cs[] =
0, 0, 8, 0, 0, 0, 0, 0, 8, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
13, 0, 0, 0, 1, 0, 13, 0, 0, 0, 2, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

View File

@ -30,8 +30,8 @@ uint XeHostDepthStoreResolutionScale() {
return (xe_host_depth_store_render_target >> 10u) & 0x3u; return (xe_host_depth_store_render_target >> 10u) & 0x3u;
} }
uint XeHostDepthStoreSecondSampleIndex() { bool XeHostDepthStoreMsaa2xSupported() {
return (xe_host_depth_store_render_target >> 12u) & 0x3u; return bool((xe_host_depth_store_render_target >> 12u) & 0x1u);
} }
// 40-sample columns are not swapped for addressing simplicity (because this is // 40-sample columns are not swapped for addressing simplicity (because this is

View File

@ -21,8 +21,11 @@ void main(uint3 xe_thread_id : SV_DispatchThreadID) {
kXenosMsaaSamples_2X, false, 0u, dest_sample_index, kXenosMsaaSamples_2X, false, 0u, dest_sample_index,
resolution_scale) resolution_scale)
>> 2u; >> 2u;
// Top and bottom to Direct3D 10.1+ top 1 and bottom 0 (for 2x) or top-left 0
// and bottom-right 3 (for 4x).
int source_sample_index = int source_sample_index =
int(dest_sample_index != 0u ? XeHostDepthStoreSecondSampleIndex() : 0u); XeHostDepthStoreMsaa2xSupported() ? (dest_sample_index ? 0u : 1u)
: (dest_sample_index ? 3u : 0u);
xe_host_depth_store_dest[edram_address_int4s] = asuint(float4( xe_host_depth_store_dest[edram_address_int4s] = asuint(float4(
xe_host_depth_store_source.Load(int2(pixel_index), source_sample_index), xe_host_depth_store_source.Load(int2(pixel_index), source_sample_index),
xe_host_depth_store_source.Load(int2(pixel_index) + int2(1, 0), xe_host_depth_store_source.Load(int2(pixel_index) + int2(1, 0),