[D3D12] Take 0 being bottom-right sample with 2x MSAA in D3D12 into account

This commit is contained in:
Triang3l 2021-06-06 20:58:50 +03:00
parent c4a5048e62
commit 3f156f26c4
3 changed files with 130 additions and 82 deletions

View File

@ -3081,10 +3081,11 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
uint32_t source_tile_pixel_x_reg = 0;
uint32_t source_tile_pixel_y_reg = 0;
// First sample bit at 4x - horizontal sample.
// Second sample bit at 4x - vertical sample.
// At 2x, the vertical sample is either the first or the second bit
// depending on whether 2x is emulated as 4x.
// First sample bit at 4x in Direct3D 10.1+ - horizontal sample.
// Second sample bit at 4x in Direct3D 10.1+ - vertical sample.
// At 2x:
// - Native 2x: top is 1 in Direct3D 10.1+, bottom is 0.
// - 2x as 4x: top is 0, bottom is 3.
if (!source_is_64bpp && dest_is_64bpp) {
// 32bpp -> 64bpp, need two samples of the source.
@ -3113,14 +3114,15 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
// D p0,0 s1 = S p0,0 s0,1 | S p0,0 s1,1
// D p1,0 s0 = S p1,0 s0,0 | S p1,0 s1,0
// D p1,0 s1 = S p1,0 s0,1 | S p1,0 s1,1
// Pixel index can be reused. Sample 0 should become samples 01,
// sample 1 or 3 should become samples 23.
// Pixel index can be reused. Sample 1 (for native 2x) or 0 (for 2x as
// 4x) should become samples 01, sample 0 or 3 should become samples 23.
source_sample = dxbc::Src::R(1, dxbc::Src::kZZZZ);
if (msaa_2x_supported_) {
a.OpIShL(dxbc::Dest::R(1, 0b0100), dest_sample, dxbc::Src::LU(1));
a.OpXOr(dxbc::Dest::R(1, 0b0100), dest_sample, dxbc::Src::LU(1));
a.OpIShL(dxbc::Dest::R(1, 0b0100), source_sample, dxbc::Src::LU(1));
} else {
a.OpAnd(dxbc::Dest::R(1, 0b0100), dest_sample, dxbc::Src::LU(0b10));
}
source_sample = dxbc::Src::R(1, dxbc::Src::kZZZZ);
} else {
// 32bpp -> 64bpp, 4x -> 1x.
// 1 destination horizontal pixel = 2 source horizontal samples.
@ -3201,11 +3203,14 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
source_sample = dxbc::Src::R(1, dxbc::Src::kZZZZ);
if (key.dest_msaa_samples == xenos::MsaaSamples::k2X) {
// 64bpp -> 32bpp, 4x -> 2x.
// Destination vertical samples (first or second bit, depending on
// support) = source vertical samples (second bit).
// Destination vertical samples (1/0 in the first bit for native 2x or
// 0/1 in the second bit for 2x as 4x) = source vertical samples
// (second bit).
if (msaa_2x_supported_) {
a.OpBFI(dxbc::Dest::R(1, 0b0100), dxbc::Src::LU(1),
dxbc::Src::LU(1), dest_sample, source_sample);
a.OpXOr(dxbc::Dest::R(1, 0b0100), source_sample,
dxbc::Src::LU(1 << 1));
} else {
a.OpBFI(dxbc::Dest::R(1, 0b0100), dxbc::Src::LU(1),
dxbc::Src::LU(0), source_sample, dest_sample);
@ -3244,18 +3249,21 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
// Same BPP, 4x -> 1x/2x.
if (key.dest_msaa_samples == xenos::MsaaSamples::k2X) {
// Same BPP, 4x -> 2x.
// Horizontal pixels to samples. Vertical sample (first or second bit,
// depending on support) to second sample bit.
// Horizontal pixels to samples. Vertical sample (1/0 in the first bit
// for native 2x or 0/1 in the second bit for 2x as 4x) to second
// sample bit.
source_sample = dxbc::Src::R(1, dxbc::Src::kZZZZ);
if (msaa_2x_supported_) {
a.OpBFI(dxbc::Dest::R(1, 0b0100), dxbc::Src::LU(31),
dxbc::Src::LU(1), dest_sample,
dxbc::Src::R(0, dxbc::Src::kXXXX));
a.OpXOr(dxbc::Dest::R(1, 0b0100), source_sample,
dxbc::Src::LU(1 << 1));
} else {
a.OpBFI(dxbc::Dest::R(1, 0b0100), dxbc::Src::LU(1),
dxbc::Src::LU(0), dxbc::Src::R(0, dxbc::Src::kXXXX),
dest_sample);
}
source_sample = dxbc::Src::R(1, dxbc::Src::kZZZZ);
a.OpUShR(dxbc::Dest::R(1, 0b0001), dxbc::Src::R(0, dxbc::Src::kXXXX),
dxbc::Src::LU(1));
source_tile_pixel_x_reg = 1;
@ -3292,10 +3300,12 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
if (key.source_msaa_samples == xenos::MsaaSamples::k2X) {
// 2x -> 4x.
// Vertical samples (second bit) of 4x destination to vertical sample
// (01 or 03, depending on support) of 2x source.
// (1, 0 for native 2x, or 0, 3 for 2x as 4x) of 2x source.
a.OpUShR(dxbc::Dest::R(1, 0b0100), dest_sample, dxbc::Src::LU(1));
source_sample = dxbc::Src::R(1, dxbc::Src::kZZZZ);
if (!msaa_2x_supported_) {
if (msaa_2x_supported_) {
a.OpXOr(dxbc::Dest::R(1, 0b0100), source_sample, dxbc::Src::LU(1));
} else {
a.OpBFI(dxbc::Dest::R(1, 0b0100), dxbc::Src::LU(1), dxbc::Src::LU(1),
source_sample, source_sample);
}
@ -3312,12 +3322,14 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
// 1x/2x -> different 1x/2x.
if (key.source_msaa_samples == xenos::MsaaSamples::k2X) {
// 2x -> 1x.
// Vertical pixels of 2x destination to vertical samples (01 or 03,
// depending on support) of 1x source.
// Vertical pixels of 2x destination to vertical samples (1, 0 for
// native 2x, or 0, 3 for 2x as 4x) of 1x source.
a.OpAnd(dxbc::Dest::R(1, 0b0100), dxbc::Src::R(0, dxbc::Src::kYYYY),
dxbc::Src::LU(1));
source_sample = dxbc::Src::R(1, dxbc::Src::kZZZZ);
if (!msaa_2x_supported_) {
if (msaa_2x_supported_) {
a.OpXOr(dxbc::Dest::R(1, 0b0100), source_sample, dxbc::Src::LU(1));
} else {
a.OpBFI(dxbc::Dest::R(1, 0b0100), dxbc::Src::LU(1), dxbc::Src::LU(1),
source_sample, source_sample);
}
@ -3326,15 +3338,20 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
source_tile_pixel_y_reg = 1;
} else {
// 1x -> 2x.
// Vertical samples (first or second bit, depending on support) of 2x
// destination to vertical pixels of 1x source.
if (!msaa_2x_supported_) {
// Vertical samples (1/0 in the first bit for native 2x or 0/1 in the
// second bit for 2x as 4x) of 2x destination to vertical pixels of 1x
// source.
if (msaa_2x_supported_) {
a.OpBFI(dxbc::Dest::R(1, 0b0010), dxbc::Src::LU(31), dxbc::Src::LU(1),
dxbc::Src::R(0, dxbc::Src::kYYYY), dest_sample);
a.OpXOr(dxbc::Dest::R(1, 0b0010), dxbc::Src::R(1, dxbc::Src::kYYYY),
dxbc::Src::LU(1));
} else {
a.OpUShR(dxbc::Dest::R(1, 0b0010), dest_sample, dxbc::Src::LU(1));
a.OpBFI(dxbc::Dest::R(1, 0b0010), dxbc::Src::LU(31), dxbc::Src::LU(1),
dxbc::Src::R(0, dxbc::Src::kYYYY),
dxbc::Src::R(1, dxbc::Src::kYYYY));
}
a.OpBFI(dxbc::Dest::R(1, 0b0010), dxbc::Src::LU(31), dxbc::Src::LU(1),
dxbc::Src::R(0, dxbc::Src::kYYYY),
msaa_2x_supported_ ? dest_sample
: dxbc::Src::R(1, dxbc::Src::kYYYY));
source_tile_pixel_y_reg = 1;
}
}
@ -3935,13 +3952,15 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
dxbc::Src::LU(1), dxbc::Src::R(0, dxbc::Src::kXXXX),
dest_sample);
}
// Vertical sample index in bit 0 for true 2x or in bit 1 for
// 4x or for 2x emulated as 4x.
// Vertical sample index as 1 or 0 in bit 0 for true 2x or as 0
// or 1 in bit 1 for 4x or for 2x emulated as 4x.
if (key.dest_msaa_samples == xenos::MsaaSamples::k2X &&
msaa_2x_supported_) {
a.OpBFI(dxbc::Dest::R(0, 0b0010), dxbc::Src::LU(31),
dxbc::Src::LU(1), dxbc::Src::R(0, dxbc::Src::kYYYY),
dest_sample);
a.OpXOr(dxbc::Dest::R(0, 0b0010),
dxbc::Src::R(0, dxbc::Src::kYYYY), dxbc::Src::LU(1));
} else {
// Using r0.w as a temporary.
a.OpUShR(dxbc::Dest::R(0, 0b1000), dest_sample,
@ -3993,19 +4012,22 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
// 4x -> 1x/2x.
if (key.dest_msaa_samples == xenos::MsaaSamples::k2X) {
// 4x -> 2x.
// Horizontal pixels to samples. Vertical sample (first or
// second bit, depending on support) to second sample bit.
// Horizontal pixels to samples. Vertical sample (1, 0 in
// the first bit for native 2x or 0, 1 in the second bit for
// 2x as 4x) to second sample bit.
host_depth_source_sample =
dxbc::Src::R(0, dxbc::Src::kWWWW);
if (msaa_2x_supported_) {
a.OpBFI(dxbc::Dest::R(0, 0b1000), dxbc::Src::LU(31),
dxbc::Src::LU(1), dest_sample,
dxbc::Src::R(0, dxbc::Src::kXXXX));
a.OpXOr(dxbc::Dest::R(0, 0b1000),
host_depth_source_sample, dxbc::Src::LU(1 << 1));
} else {
a.OpBFI(dxbc::Dest::R(0, 0b1000), dxbc::Src::LU(1),
dxbc::Src::LU(0),
dxbc::Src::R(0, dxbc::Src::kXXXX), dest_sample);
}
host_depth_source_sample =
dxbc::Src::R(0, dxbc::Src::kWWWW);
a.OpUShR(dxbc::Dest::R(0, 0b0001),
dxbc::Src::R(0, dxbc::Src::kXXXX),
dxbc::Src::LU(1));
@ -4042,13 +4064,16 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
xenos::MsaaSamples::k2X) {
// 2x -> 4x.
// Vertical samples (second bit) of 4x destination to
// vertical sample (01 or 03, depending on support) of 2x
// source.
// vertical sample (1, 0 for native 2x, or 0, 3 for 2x as
// 4x) of 2x source.
a.OpUShR(dxbc::Dest::R(0, 0b1000), dest_sample,
dxbc::Src::LU(1));
host_depth_source_sample =
dxbc::Src::R(0, dxbc::Src::kWWWW);
if (!msaa_2x_supported_) {
if (msaa_2x_supported_) {
a.OpXOr(dxbc::Dest::R(0, 0b1000),
host_depth_source_sample, dxbc::Src::LU(1));
} else {
a.OpBFI(dxbc::Dest::R(0, 0b1000), dxbc::Src::LU(1),
dxbc::Src::LU(1), host_depth_source_sample,
host_depth_source_sample);
@ -4070,13 +4095,17 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
xenos::MsaaSamples::k2X) {
// 2x -> 1x.
// Vertical pixels of 2x destination to vertical samples
// (01 or 03, depending on support) of 1x source.
// (1, 0 for native 2x, or 0, 3 for 2x as 4x) of 1x
// source.
a.OpAnd(dxbc::Dest::R(0, 0b1000),
dxbc::Src::R(0, dxbc::Src::kYYYY),
dxbc::Src::LU(1));
host_depth_source_sample =
dxbc::Src::R(0, dxbc::Src::kWWWW);
if (!msaa_2x_supported_) {
if (msaa_2x_supported_) {
a.OpXOr(dxbc::Dest::R(0, 0b1000),
host_depth_source_sample, dxbc::Src::LU(1));
} else {
a.OpBFI(dxbc::Dest::R(0, 0b1000), dxbc::Src::LU(1),
dxbc::Src::LU(1), host_depth_source_sample,
host_depth_source_sample);
@ -4086,21 +4115,26 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
dxbc::Src::LU(1));
} else {
// 1x -> 2x.
// Vertical samples (first or second bit, depending on
// support) of 2x destination to vertical pixels of 1x
// source.
// Vertical samples (1, 0 in the first bit for native 2x
// or 0, 1 in the second bit for 2x as 4x) of 2x
// destination to vertical pixels of 1x source.
// Using r0.w (not needed without source MSAA) as a
// temporary.
if (!msaa_2x_supported_) {
if (msaa_2x_supported_) {
a.OpBFI(dxbc::Dest::R(0, 0b0010), dxbc::Src::LU(31),
dxbc::Src::LU(1),
dxbc::Src::R(0, dxbc::Src::kYYYY), dest_sample);
a.OpXOr(dxbc::Dest::R(0, 0b0010),
dxbc::Src::R(0, dxbc::Src::kYYYY),
dxbc::Src::LU(1));
} else {
a.OpUShR(dxbc::Dest::R(0, 0b1000), dest_sample,
dxbc::Src::LU(1));
a.OpBFI(dxbc::Dest::R(0, 0b0010), dxbc::Src::LU(31),
dxbc::Src::LU(1),
dxbc::Src::R(0, dxbc::Src::kYYYY),
dxbc::Src::R(0, dxbc::Src::kWWWW));
}
a.OpBFI(dxbc::Dest::R(0, 0b0010), dxbc::Src::LU(31),
dxbc::Src::LU(1),
dxbc::Src::R(0, dxbc::Src::kYYYY),
msaa_2x_supported_
? dest_sample
: dxbc::Src::R(0, dxbc::Src::kWWWW));
}
}
}
@ -6038,7 +6072,7 @@ ID3D12PipelineState* D3D12RenderTargetCache::GetOrCreateDumpPipeline(
dxbc::Src::LU(xenos::kEdramTileWidthSamples >>
uint32_t(format_is_64bpp)),
dxbc::Src::VThreadIDInGroup(dxbc::Src::kXXXX));
// r0.w for 4x MSAA = pixel Y in the group
// r0.w for 2x MSAA = pixel Y in the group
a.OpUShR(dxbc::Dest::R(0, 0b1000),
dxbc::Src::VThreadIDInGroup(dxbc::Src::kYYYY), dxbc::Src::LU(1));
// r0.w = free
@ -6067,7 +6101,7 @@ ID3D12PipelineState* D3D12RenderTargetCache::GetOrCreateDumpPipeline(
if (key.msaa_samples != xenos::MsaaSamples::k1X) {
// Sample index.
// For 4x, bit 0 for horizontal, bit 1 for vertical.
// For 2x, only vertical - but 0 or 1 for true 2x MSAA or 0 or 3 for 2x MSAA
// For 2x, only vertical - but 1 or 0 for true 2x MSAA or 0 or 3 for 2x MSAA
// via two samples of 4x.
// r0.w = vertical sample index
a.OpAnd(dxbc::Dest::R(0, 0b1000),
@ -6077,11 +6111,17 @@ ID3D12PipelineState* D3D12RenderTargetCache::GetOrCreateDumpPipeline(
a.OpBFI(dxbc::Dest::R(0, 0b1000), dxbc::Src::LU(31), dxbc::Src::LU(1),
dxbc::Src::R(0, dxbc::Src::kWWWW),
dxbc::Src::VThreadIDInGroup(dxbc::Src::kXXXX));
} else if (!msaa_2x_supported_) {
// r0.w = source sample 0 or 3 for 2x MSAA emulated via 4x
a.OpBFI(dxbc::Dest::R(0, 0b1000), dxbc::Src::LU(1), dxbc::Src::LU(1),
dxbc::Src::R(0, dxbc::Src::kWWWW),
dxbc::Src::R(0, dxbc::Src::kWWWW));
} else {
if (msaa_2x_supported_) {
// r0.w = source sample 1 or 0 for native 2x MSAA
a.OpXOr(dxbc::Dest::R(0, 0b1000), dxbc::Src::R(0, dxbc::Src::kWWWW),
dxbc::Src::LU(1));
} else {
// r0.w = source sample 0 or 3 for 2x MSAA emulated via 4x
a.OpBFI(dxbc::Dest::R(0, 0b1000), dxbc::Src::LU(1), dxbc::Src::LU(1),
dxbc::Src::R(0, dxbc::Src::kWWWW),
dxbc::Src::R(0, dxbc::Src::kWWWW));
}
}
}

View File

@ -768,15 +768,16 @@ class DxbcShaderTranslator : public ShaderTranslator {
void ExportToMemory();
void CompleteVertexOrDomainShader();
// For RTV, adds the sample to coverage_temp.coverage_temp_component if it
// passes alpha to mask (except for sample 0, which overwrites the output to
// initialize it).
// passes alpha to mask (or, if initialize == true (for the first sample
// tested), overwrites the output to initialize it).
// For ROV, masks the sample away from coverage_temp.coverage_temp_component
// if it doesn't pass alpha to mask.
// threshold_offset and temp.temp_component can be the same if needed.
void CompletePixelShader_AlphaToMaskSample(
uint32_t sample_index, float threshold_base, dxbc::Src threshold_offset,
float threshold_offset_scale, uint32_t coverage_temp,
uint32_t coverage_temp_component, uint32_t temp, uint32_t temp_component);
bool initialize, uint32_t sample_index, float threshold_base,
dxbc::Src threshold_offset, float threshold_offset_scale,
uint32_t coverage_temp, uint32_t coverage_temp_component, uint32_t temp,
uint32_t temp_component);
// Performs alpha to coverage if necessary, for RTV, writing to oMask, and for
// ROV, updating the low (coverage) bits of system_temp_rov_params_.x. Done
// manually even for RTV to maintain the guest dithering pattern and because

View File

@ -1828,9 +1828,10 @@ void DxbcShaderTranslator::CompletePixelShader_DSV_DepthTo24Bit() {
}
void DxbcShaderTranslator::CompletePixelShader_AlphaToMaskSample(
uint32_t sample_index, float threshold_base, dxbc::Src threshold_offset,
float threshold_offset_scale, uint32_t coverage_temp,
uint32_t coverage_temp_component, uint32_t temp, uint32_t temp_component) {
bool initialize, uint32_t sample_index, float threshold_base,
dxbc::Src threshold_offset, float threshold_offset_scale,
uint32_t coverage_temp, uint32_t coverage_temp_component, uint32_t temp,
uint32_t temp_component) {
dxbc::Dest temp_dest(dxbc::Dest::R(temp, 1 << temp_component));
dxbc::Src temp_src(dxbc::Src::R(temp).Select(temp_component));
// Calculate the threshold.
@ -1858,16 +1859,16 @@ void DxbcShaderTranslator::CompletePixelShader_AlphaToMaskSample(
// Clear the coverage for samples that have failed the test.
a_.OpAnd(coverage_dest, coverage_src, temp_src);
} else {
if (sample_index) {
// Not first sample - add.
a_.OpAnd(temp_dest, temp_src, dxbc::Src::LU(uint32_t(1) << sample_index));
a_.OpOr(coverage_dest, coverage_src, temp_src);
} else {
// First sample - initialize.
if (initialize) {
// First sample tested - initialize.
assert_true(coverage_temp != temp ||
coverage_temp_component != temp_component);
a_.OpAnd(coverage_dest, temp_src,
dxbc::Src::LU(uint32_t(1) << sample_index));
} else {
// Not first sample tested - add.
a_.OpAnd(temp_dest, temp_src, dxbc::Src::LU(uint32_t(1) << sample_index));
a_.OpOr(coverage_dest, coverage_src, temp_src);
}
}
}
@ -1930,32 +1931,38 @@ void DxbcShaderTranslator::CompletePixelShader_AlphaToMask() {
// 4x MSAA.
// Sample 0 must be checked first - CompletePixelShader_AlphaToMaskSample
// initializes the result for sample index 0.
CompletePixelShader_AlphaToMaskSample(0, 0.75f, temp_x_src, 1.0f / 16.0f,
coverage_temp,
CompletePixelShader_AlphaToMaskSample(true, 0, 0.75f, temp_x_src,
1.0f / 16.0f, coverage_temp,
coverage_temp_component, temp, 1);
CompletePixelShader_AlphaToMaskSample(1, 0.25f, temp_x_src, 1.0f / 16.0f,
coverage_temp,
CompletePixelShader_AlphaToMaskSample(false, 1, 0.25f, temp_x_src,
1.0f / 16.0f, coverage_temp,
coverage_temp_component, temp, 1);
CompletePixelShader_AlphaToMaskSample(2, 0.5f, temp_x_src, 1.0f / 16.0f,
coverage_temp,
CompletePixelShader_AlphaToMaskSample(false, 2, 0.5f, temp_x_src,
1.0f / 16.0f, coverage_temp,
coverage_temp_component, temp, 1);
CompletePixelShader_AlphaToMaskSample(3, 1.0f, temp_x_src, 1.0f / 16.0f,
coverage_temp,
CompletePixelShader_AlphaToMaskSample(false, 3, 1.0f, temp_x_src,
1.0f / 16.0f, coverage_temp,
coverage_temp_component, temp, 1);
// 2x MSAA (as 2x or samples 0 and 3 of 4x).
// 2x MSAA.
// With ROV, using guest sample indices.
// Without ROV:
// - Native 2x: top (0 in Xenia) is 1 in D3D10.1+, bottom (1 in Xenia) is 0.
// - 2x as 4x: top is 0, bottom is 3.
a_.OpElse();
CompletePixelShader_AlphaToMaskSample(0, 0.5f, temp_x_src, 1.0f / 8.0f,
coverage_temp,
coverage_temp_component, temp, 1);
CompletePixelShader_AlphaToMaskSample(
(!edram_rov_used_ && !msaa_2x_supported_) ? 3 : 1, 1.0f, temp_x_src,
1.0f / 8.0f, coverage_temp, coverage_temp_component, temp, 1);
true, (!edram_rov_used_ && msaa_2x_supported_) ? 1 : 0, 0.5f,
temp_x_src, 1.0f / 8.0f, coverage_temp, coverage_temp_component, temp,
1);
CompletePixelShader_AlphaToMaskSample(
false, edram_rov_used_ ? 1 : (msaa_2x_supported_ ? 0 : 3), 1.0f,
temp_x_src, 1.0f / 8.0f, coverage_temp, coverage_temp_component, temp,
1);
// Close the 4x check.
a_.OpEndIf();
}
// MSAA is disabled.
a_.OpElse();
CompletePixelShader_AlphaToMaskSample(0, 1.0f, temp_x_src, 1.0f / 4.0f,
CompletePixelShader_AlphaToMaskSample(true, 0, 1.0f, temp_x_src, 1.0f / 4.0f,
coverage_temp, coverage_temp_component,
temp, 1);
// Close the 2x/4x check.