Merge branch 'master' into vulkan

This commit is contained in:
Triang3l 2021-06-14 19:13:15 +03:00
commit 962f7daeb2
6 changed files with 44 additions and 33 deletions

View File

@ -4604,11 +4604,8 @@ void D3D12RenderTargetCache::PerformTransfersAndResolveClears(
dest_rt_key.pitch_tiles_at_32bpp;
host_depth_store_render_target_constant.resolution_scale =
resolution_scale_;
host_depth_store_render_target_constant.second_sample_index =
(dest_rt_key.msaa_samples == xenos::MsaaSamples::k2X &&
!msaa_2x_supported_)
? 3
: 1;
host_depth_store_render_target_constant.msaa_2x_supported =
uint32_t(msaa_2x_supported_);
command_list.D3DSetComputeRoot32BitConstants(
kHostDepthStoreRootParameterRenderTargetConstant,
sizeof(host_depth_store_render_target_constant) / sizeof(uint32_t),

View File

@ -536,8 +536,8 @@ class D3D12RenderTargetCache final : public RenderTargetCache {
uint32_t pitch_tiles : xenos::kEdramPitchTilesBits;
// 1 to 3.
uint32_t resolution_scale : 2;
// For native 2x MSAA vs. 2x over 4x.
uint32_t second_sample_index : 2;
// Whether 2x MSAA is supported natively rather than through 4x.
uint32_t msaa_2x_supported : 1;
};
uint32_t constant = 0;
};

View File

@ -801,10 +801,10 @@ bool RenderTargetCache::Update(bool is_rasterization_done,
uint32_t RenderTargetCache::GetLastUpdateBoundRenderTargets(
bool distinguish_gamma_formats,
uint32_t* depth_and_color_resource_formats_out) const {
uint32_t* depth_and_color_formats_out) const {
if (GetPath() != Path::kHostRenderTargets) {
if (depth_and_color_resource_formats_out) {
std::memset(depth_and_color_resource_formats_out, 0,
if (depth_and_color_formats_out) {
std::memset(depth_and_color_formats_out, 0,
sizeof(uint32_t) * (1 + xenos::kMaxColorRenderTargets));
}
return 0;
@ -814,14 +814,14 @@ uint32_t RenderTargetCache::GetLastUpdateBoundRenderTargets(
const RenderTarget* render_target =
last_update_accumulated_render_targets_[i];
if (!render_target) {
if (depth_and_color_resource_formats_out) {
depth_and_color_resource_formats_out[i] = 0;
if (depth_and_color_formats_out) {
depth_and_color_formats_out[i] = 0;
}
continue;
}
rts_used |= uint32_t(1) << i;
if (depth_and_color_resource_formats_out) {
depth_and_color_resource_formats_out[i] =
if (depth_and_color_formats_out) {
depth_and_color_formats_out[i] =
(distinguish_gamma_formats && i &&
(last_update_accumulated_color_targets_are_gamma_ &
(uint32_t(1) << (i - 1))))

View File

@ -77,8 +77,9 @@ imul null, r0.w, r2.z, r2.x
imad r0.y, r0.z, r2.x, r0.y
imad r0.x, r0.x, r0.w, r0.y
ushr r0.x, r0.x, l(2)
ubfe r0.y, l(2), l(12), CB1[1][0].x
movc r0.y, r2.y, r0.y, l(0)
ubfe r0.y, l(1), l(12), CB1[1][0].x
movc r0.zw, r2.yyyy, l(0,0,0,3), l(0,0,1,0)
movc r0.y, r0.y, r0.z, r0.w
mov r1.w, l(0)
ldms r2.x, r1.xyww, T0[0].xyzw, r0.y
iadd r3.xyzw, r1.xyxy, l(2, 0, 1, 0)
@ -106,20 +107,20 @@ mov r1.zw, l(0,0,0,0)
ldms r2.w, r1.xyzw, T0[0].yzwx, r0.y
store_uav_typed U0[0].xyzw, r0.zzzz, r2.xyzw
ret
// Approximately 55 instruction slots used
// Approximately 56 instruction slots used
#endif
const BYTE host_depth_store_2xmsaa_cs[] =
{
68, 88, 66, 67, 70, 151,
47, 41, 106, 214, 147, 230,
77, 220, 169, 203, 166, 115,
42, 93, 1, 0, 0, 0,
248, 10, 0, 0, 5, 0,
68, 88, 66, 67, 15, 231,
223, 186, 190, 135, 229, 39,
211, 185, 26, 121, 39, 17,
25, 229, 1, 0, 0, 0,
52, 11, 0, 0, 5, 0,
0, 0, 52, 0, 0, 0,
172, 2, 0, 0, 188, 2,
0, 0, 204, 2, 0, 0,
92, 10, 0, 0, 82, 68,
152, 10, 0, 0, 82, 68,
69, 70, 112, 2, 0, 0,
2, 0, 0, 0, 92, 1,
0, 0, 4, 0, 0, 0,
@ -231,8 +232,8 @@ const BYTE host_depth_store_2xmsaa_cs[] =
71, 78, 8, 0, 0, 0,
0, 0, 0, 0, 8, 0,
0, 0, 83, 72, 69, 88,
136, 7, 0, 0, 81, 0,
5, 0, 226, 1, 0, 0,
196, 7, 0, 0, 81, 0,
5, 0, 241, 1, 0, 0,
106, 8, 0, 1, 89, 0,
0, 7, 70, 142, 48, 0,
0, 0, 0, 0, 0, 0,
@ -395,17 +396,27 @@ const BYTE host_depth_store_2xmsaa_cs[] =
0, 0, 2, 0, 0, 0,
138, 0, 0, 11, 34, 0,
16, 0, 0, 0, 0, 0,
1, 64, 0, 0, 2, 0,
1, 64, 0, 0, 1, 0,
0, 0, 1, 64, 0, 0,
12, 0, 0, 0, 10, 128,
48, 0, 1, 0, 0, 0,
1, 0, 0, 0, 0, 0,
0, 0, 55, 0, 0, 15,
194, 0, 16, 0, 0, 0,
0, 0, 86, 5, 16, 0,
2, 0, 0, 0, 2, 64,
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 3, 0, 0, 0,
2, 64, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0,
0, 0, 55, 0, 0, 9,
34, 0, 16, 0, 0, 0,
0, 0, 26, 0, 16, 0,
2, 0, 0, 0, 26, 0,
0, 0, 0, 0, 42, 0,
16, 0, 0, 0, 0, 0,
1, 64, 0, 0, 0, 0,
58, 0, 16, 0, 0, 0,
0, 0, 54, 0, 0, 5,
130, 0, 16, 0, 1, 0,
0, 0, 1, 64, 0, 0,
@ -554,7 +565,7 @@ const BYTE host_depth_store_2xmsaa_cs[] =
70, 14, 16, 0, 2, 0,
0, 0, 62, 0, 0, 1,
83, 84, 65, 84, 148, 0,
0, 0, 55, 0, 0, 0,
0, 0, 56, 0, 0, 0,
5, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 16, 0,
@ -567,7 +578,7 @@ const BYTE host_depth_store_2xmsaa_cs[] =
0, 0, 8, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
13, 0, 0, 0, 1, 0,
13, 0, 0, 0, 2, 0,
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,

View File

@ -30,8 +30,8 @@ uint XeHostDepthStoreResolutionScale() {
return (xe_host_depth_store_render_target >> 10u) & 0x3u;
}
uint XeHostDepthStoreSecondSampleIndex() {
return (xe_host_depth_store_render_target >> 12u) & 0x3u;
bool XeHostDepthStoreMsaa2xSupported() {
return bool((xe_host_depth_store_render_target >> 12u) & 0x1u);
}
// 40-sample columns are not swapped for addressing simplicity (because this is

View File

@ -21,8 +21,11 @@ void main(uint3 xe_thread_id : SV_DispatchThreadID) {
kXenosMsaaSamples_2X, false, 0u, dest_sample_index,
resolution_scale)
>> 2u;
// Top and bottom to Direct3D 10.1+ top 1 and bottom 0 (for 2x) or top-left 0
// and bottom-right 3 (for 4x).
int source_sample_index =
int(dest_sample_index != 0u ? XeHostDepthStoreSecondSampleIndex() : 0u);
XeHostDepthStoreMsaa2xSupported() ? (dest_sample_index ? 0u : 1u)
: (dest_sample_index ? 3u : 0u);
xe_host_depth_store_dest[edram_address_int4s] = asuint(float4(
xe_host_depth_store_source.Load(int2(pixel_index), source_sample_index),
xe_host_depth_store_source.Load(int2(pixel_index) + int2(1, 0),