[D3D12] Take 0 being bottom-right sample with 2x MSAA in D3D12 into account

This commit is contained in:
Triang3l 2021-06-06 20:58:50 +03:00
parent c4a5048e62
commit 3f156f26c4
3 changed files with 130 additions and 82 deletions

View File

@ -3081,10 +3081,11 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
uint32_t source_tile_pixel_x_reg = 0; uint32_t source_tile_pixel_x_reg = 0;
uint32_t source_tile_pixel_y_reg = 0; uint32_t source_tile_pixel_y_reg = 0;
// First sample bit at 4x - horizontal sample. // First sample bit at 4x in Direct3D 10.1+ - horizontal sample.
// Second sample bit at 4x - vertical sample. // Second sample bit at 4x in Direct3D 10.1+ - vertical sample.
// At 2x, the vertical sample is either the first or the second bit // At 2x:
// depending on whether 2x is emulated as 4x. // - Native 2x: top is 1 in Direct3D 10.1+, bottom is 0.
// - 2x as 4x: top is 0, bottom is 3.
if (!source_is_64bpp && dest_is_64bpp) { if (!source_is_64bpp && dest_is_64bpp) {
// 32bpp -> 64bpp, need two samples of the source. // 32bpp -> 64bpp, need two samples of the source.
@ -3113,14 +3114,15 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
// D p0,0 s1 = S p0,0 s0,1 | S p0,0 s1,1 // D p0,0 s1 = S p0,0 s0,1 | S p0,0 s1,1
// D p1,0 s0 = S p1,0 s0,0 | S p1,0 s1,0 // D p1,0 s0 = S p1,0 s0,0 | S p1,0 s1,0
// D p1,0 s1 = S p1,0 s0,1 | S p1,0 s1,1 // D p1,0 s1 = S p1,0 s0,1 | S p1,0 s1,1
// Pixel index can be reused. Sample 0 should become samples 01, // Pixel index can be reused. Sample 1 (for native 2x) or 0 (for 2x as
// sample 1 or 3 should become samples 23. // 4x) should become samples 01, sample 0 or 3 should become samples 23.
source_sample = dxbc::Src::R(1, dxbc::Src::kZZZZ);
if (msaa_2x_supported_) { if (msaa_2x_supported_) {
a.OpIShL(dxbc::Dest::R(1, 0b0100), dest_sample, dxbc::Src::LU(1)); a.OpXOr(dxbc::Dest::R(1, 0b0100), dest_sample, dxbc::Src::LU(1));
a.OpIShL(dxbc::Dest::R(1, 0b0100), source_sample, dxbc::Src::LU(1));
} else { } else {
a.OpAnd(dxbc::Dest::R(1, 0b0100), dest_sample, dxbc::Src::LU(0b10)); a.OpAnd(dxbc::Dest::R(1, 0b0100), dest_sample, dxbc::Src::LU(0b10));
} }
source_sample = dxbc::Src::R(1, dxbc::Src::kZZZZ);
} else { } else {
// 32bpp -> 64bpp, 4x -> 1x. // 32bpp -> 64bpp, 4x -> 1x.
// 1 destination horizontal pixel = 2 source horizontal samples. // 1 destination horizontal pixel = 2 source horizontal samples.
@ -3201,11 +3203,14 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
source_sample = dxbc::Src::R(1, dxbc::Src::kZZZZ); source_sample = dxbc::Src::R(1, dxbc::Src::kZZZZ);
if (key.dest_msaa_samples == xenos::MsaaSamples::k2X) { if (key.dest_msaa_samples == xenos::MsaaSamples::k2X) {
// 64bpp -> 32bpp, 4x -> 2x. // 64bpp -> 32bpp, 4x -> 2x.
// Destination vertical samples (first or second bit, depending on // Destination vertical samples (1/0 in the first bit for native 2x or
// support) = source vertical samples (second bit). // 0/1 in the second bit for 2x as 4x) = source vertical samples
// (second bit).
if (msaa_2x_supported_) { if (msaa_2x_supported_) {
a.OpBFI(dxbc::Dest::R(1, 0b0100), dxbc::Src::LU(1), a.OpBFI(dxbc::Dest::R(1, 0b0100), dxbc::Src::LU(1),
dxbc::Src::LU(1), dest_sample, source_sample); dxbc::Src::LU(1), dest_sample, source_sample);
a.OpXOr(dxbc::Dest::R(1, 0b0100), source_sample,
dxbc::Src::LU(1 << 1));
} else { } else {
a.OpBFI(dxbc::Dest::R(1, 0b0100), dxbc::Src::LU(1), a.OpBFI(dxbc::Dest::R(1, 0b0100), dxbc::Src::LU(1),
dxbc::Src::LU(0), source_sample, dest_sample); dxbc::Src::LU(0), source_sample, dest_sample);
@ -3244,18 +3249,21 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
// Same BPP, 4x -> 1x/2x. // Same BPP, 4x -> 1x/2x.
if (key.dest_msaa_samples == xenos::MsaaSamples::k2X) { if (key.dest_msaa_samples == xenos::MsaaSamples::k2X) {
// Same BPP, 4x -> 2x. // Same BPP, 4x -> 2x.
// Horizontal pixels to samples. Vertical sample (first or second bit, // Horizontal pixels to samples. Vertical sample (1/0 in the first bit
// depending on support) to second sample bit. // for native 2x or 0/1 in the second bit for 2x as 4x) to second
// sample bit.
source_sample = dxbc::Src::R(1, dxbc::Src::kZZZZ);
if (msaa_2x_supported_) { if (msaa_2x_supported_) {
a.OpBFI(dxbc::Dest::R(1, 0b0100), dxbc::Src::LU(31), a.OpBFI(dxbc::Dest::R(1, 0b0100), dxbc::Src::LU(31),
dxbc::Src::LU(1), dest_sample, dxbc::Src::LU(1), dest_sample,
dxbc::Src::R(0, dxbc::Src::kXXXX)); dxbc::Src::R(0, dxbc::Src::kXXXX));
a.OpXOr(dxbc::Dest::R(1, 0b0100), source_sample,
dxbc::Src::LU(1 << 1));
} else { } else {
a.OpBFI(dxbc::Dest::R(1, 0b0100), dxbc::Src::LU(1), a.OpBFI(dxbc::Dest::R(1, 0b0100), dxbc::Src::LU(1),
dxbc::Src::LU(0), dxbc::Src::R(0, dxbc::Src::kXXXX), dxbc::Src::LU(0), dxbc::Src::R(0, dxbc::Src::kXXXX),
dest_sample); dest_sample);
} }
source_sample = dxbc::Src::R(1, dxbc::Src::kZZZZ);
a.OpUShR(dxbc::Dest::R(1, 0b0001), dxbc::Src::R(0, dxbc::Src::kXXXX), a.OpUShR(dxbc::Dest::R(1, 0b0001), dxbc::Src::R(0, dxbc::Src::kXXXX),
dxbc::Src::LU(1)); dxbc::Src::LU(1));
source_tile_pixel_x_reg = 1; source_tile_pixel_x_reg = 1;
@ -3292,10 +3300,12 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
if (key.source_msaa_samples == xenos::MsaaSamples::k2X) { if (key.source_msaa_samples == xenos::MsaaSamples::k2X) {
// 2x -> 4x. // 2x -> 4x.
// Vertical samples (second bit) of 4x destination to vertical sample // Vertical samples (second bit) of 4x destination to vertical sample
// (01 or 03, depending on support) of 2x source. // (1, 0 for native 2x, or 0, 3 for 2x as 4x) of 2x source.
a.OpUShR(dxbc::Dest::R(1, 0b0100), dest_sample, dxbc::Src::LU(1)); a.OpUShR(dxbc::Dest::R(1, 0b0100), dest_sample, dxbc::Src::LU(1));
source_sample = dxbc::Src::R(1, dxbc::Src::kZZZZ); source_sample = dxbc::Src::R(1, dxbc::Src::kZZZZ);
if (!msaa_2x_supported_) { if (msaa_2x_supported_) {
a.OpXOr(dxbc::Dest::R(1, 0b0100), source_sample, dxbc::Src::LU(1));
} else {
a.OpBFI(dxbc::Dest::R(1, 0b0100), dxbc::Src::LU(1), dxbc::Src::LU(1), a.OpBFI(dxbc::Dest::R(1, 0b0100), dxbc::Src::LU(1), dxbc::Src::LU(1),
source_sample, source_sample); source_sample, source_sample);
} }
@ -3312,12 +3322,14 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
// 1x/2x -> different 1x/2x. // 1x/2x -> different 1x/2x.
if (key.source_msaa_samples == xenos::MsaaSamples::k2X) { if (key.source_msaa_samples == xenos::MsaaSamples::k2X) {
// 2x -> 1x. // 2x -> 1x.
// Vertical pixels of 2x destination to vertical samples (01 or 03, // Vertical pixels of 2x destination to vertical samples (1, 0 for
// depending on support) of 1x source. // native 2x, or 0, 3 for 2x as 4x) of 1x source.
a.OpAnd(dxbc::Dest::R(1, 0b0100), dxbc::Src::R(0, dxbc::Src::kYYYY), a.OpAnd(dxbc::Dest::R(1, 0b0100), dxbc::Src::R(0, dxbc::Src::kYYYY),
dxbc::Src::LU(1)); dxbc::Src::LU(1));
source_sample = dxbc::Src::R(1, dxbc::Src::kZZZZ); source_sample = dxbc::Src::R(1, dxbc::Src::kZZZZ);
if (!msaa_2x_supported_) { if (msaa_2x_supported_) {
a.OpXOr(dxbc::Dest::R(1, 0b0100), source_sample, dxbc::Src::LU(1));
} else {
a.OpBFI(dxbc::Dest::R(1, 0b0100), dxbc::Src::LU(1), dxbc::Src::LU(1), a.OpBFI(dxbc::Dest::R(1, 0b0100), dxbc::Src::LU(1), dxbc::Src::LU(1),
source_sample, source_sample); source_sample, source_sample);
} }
@ -3326,15 +3338,20 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
source_tile_pixel_y_reg = 1; source_tile_pixel_y_reg = 1;
} else { } else {
// 1x -> 2x. // 1x -> 2x.
// Vertical samples (first or second bit, depending on support) of 2x // Vertical samples (1/0 in the first bit for native 2x or 0/1 in the
// destination to vertical pixels of 1x source. // second bit for 2x as 4x) of 2x destination to vertical pixels of 1x
if (!msaa_2x_supported_) { // source.
if (msaa_2x_supported_) {
a.OpBFI(dxbc::Dest::R(1, 0b0010), dxbc::Src::LU(31), dxbc::Src::LU(1),
dxbc::Src::R(0, dxbc::Src::kYYYY), dest_sample);
a.OpXOr(dxbc::Dest::R(1, 0b0010), dxbc::Src::R(1, dxbc::Src::kYYYY),
dxbc::Src::LU(1));
} else {
a.OpUShR(dxbc::Dest::R(1, 0b0010), dest_sample, dxbc::Src::LU(1)); a.OpUShR(dxbc::Dest::R(1, 0b0010), dest_sample, dxbc::Src::LU(1));
}
a.OpBFI(dxbc::Dest::R(1, 0b0010), dxbc::Src::LU(31), dxbc::Src::LU(1), a.OpBFI(dxbc::Dest::R(1, 0b0010), dxbc::Src::LU(31), dxbc::Src::LU(1),
dxbc::Src::R(0, dxbc::Src::kYYYY), dxbc::Src::R(0, dxbc::Src::kYYYY),
msaa_2x_supported_ ? dest_sample dxbc::Src::R(1, dxbc::Src::kYYYY));
: dxbc::Src::R(1, dxbc::Src::kYYYY)); }
source_tile_pixel_y_reg = 1; source_tile_pixel_y_reg = 1;
} }
} }
@ -3935,13 +3952,15 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
dxbc::Src::LU(1), dxbc::Src::R(0, dxbc::Src::kXXXX), dxbc::Src::LU(1), dxbc::Src::R(0, dxbc::Src::kXXXX),
dest_sample); dest_sample);
} }
// Vertical sample index in bit 0 for true 2x or in bit 1 for // Vertical sample index as 1 or 0 in bit 0 for true 2x or as 0
// 4x or for 2x emulated as 4x. // or 1 in bit 1 for 4x or for 2x emulated as 4x.
if (key.dest_msaa_samples == xenos::MsaaSamples::k2X && if (key.dest_msaa_samples == xenos::MsaaSamples::k2X &&
msaa_2x_supported_) { msaa_2x_supported_) {
a.OpBFI(dxbc::Dest::R(0, 0b0010), dxbc::Src::LU(31), a.OpBFI(dxbc::Dest::R(0, 0b0010), dxbc::Src::LU(31),
dxbc::Src::LU(1), dxbc::Src::R(0, dxbc::Src::kYYYY), dxbc::Src::LU(1), dxbc::Src::R(0, dxbc::Src::kYYYY),
dest_sample); dest_sample);
a.OpXOr(dxbc::Dest::R(0, 0b0010),
dxbc::Src::R(0, dxbc::Src::kYYYY), dxbc::Src::LU(1));
} else { } else {
// Using r0.w as a temporary. // Using r0.w as a temporary.
a.OpUShR(dxbc::Dest::R(0, 0b1000), dest_sample, a.OpUShR(dxbc::Dest::R(0, 0b1000), dest_sample,
@ -3993,19 +4012,22 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
// 4x -> 1x/2x. // 4x -> 1x/2x.
if (key.dest_msaa_samples == xenos::MsaaSamples::k2X) { if (key.dest_msaa_samples == xenos::MsaaSamples::k2X) {
// 4x -> 2x. // 4x -> 2x.
// Horizontal pixels to samples. Vertical sample (first or // Horizontal pixels to samples. Vertical sample (1, 0 in
// second bit, depending on support) to second sample bit. // the first bit for native 2x or 0, 1 in the second bit for
// 2x as 4x) to second sample bit.
host_depth_source_sample =
dxbc::Src::R(0, dxbc::Src::kWWWW);
if (msaa_2x_supported_) { if (msaa_2x_supported_) {
a.OpBFI(dxbc::Dest::R(0, 0b1000), dxbc::Src::LU(31), a.OpBFI(dxbc::Dest::R(0, 0b1000), dxbc::Src::LU(31),
dxbc::Src::LU(1), dest_sample, dxbc::Src::LU(1), dest_sample,
dxbc::Src::R(0, dxbc::Src::kXXXX)); dxbc::Src::R(0, dxbc::Src::kXXXX));
a.OpXOr(dxbc::Dest::R(0, 0b1000),
host_depth_source_sample, dxbc::Src::LU(1 << 1));
} else { } else {
a.OpBFI(dxbc::Dest::R(0, 0b1000), dxbc::Src::LU(1), a.OpBFI(dxbc::Dest::R(0, 0b1000), dxbc::Src::LU(1),
dxbc::Src::LU(0), dxbc::Src::LU(0),
dxbc::Src::R(0, dxbc::Src::kXXXX), dest_sample); dxbc::Src::R(0, dxbc::Src::kXXXX), dest_sample);
} }
host_depth_source_sample =
dxbc::Src::R(0, dxbc::Src::kWWWW);
a.OpUShR(dxbc::Dest::R(0, 0b0001), a.OpUShR(dxbc::Dest::R(0, 0b0001),
dxbc::Src::R(0, dxbc::Src::kXXXX), dxbc::Src::R(0, dxbc::Src::kXXXX),
dxbc::Src::LU(1)); dxbc::Src::LU(1));
@ -4042,13 +4064,16 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
xenos::MsaaSamples::k2X) { xenos::MsaaSamples::k2X) {
// 2x -> 4x. // 2x -> 4x.
// Vertical samples (second bit) of 4x destination to // Vertical samples (second bit) of 4x destination to
// vertical sample (01 or 03, depending on support) of 2x // vertical sample (1, 0 for native 2x, or 0, 3 for 2x as
// source. // 4x) of 2x source.
a.OpUShR(dxbc::Dest::R(0, 0b1000), dest_sample, a.OpUShR(dxbc::Dest::R(0, 0b1000), dest_sample,
dxbc::Src::LU(1)); dxbc::Src::LU(1));
host_depth_source_sample = host_depth_source_sample =
dxbc::Src::R(0, dxbc::Src::kWWWW); dxbc::Src::R(0, dxbc::Src::kWWWW);
if (!msaa_2x_supported_) { if (msaa_2x_supported_) {
a.OpXOr(dxbc::Dest::R(0, 0b1000),
host_depth_source_sample, dxbc::Src::LU(1));
} else {
a.OpBFI(dxbc::Dest::R(0, 0b1000), dxbc::Src::LU(1), a.OpBFI(dxbc::Dest::R(0, 0b1000), dxbc::Src::LU(1),
dxbc::Src::LU(1), host_depth_source_sample, dxbc::Src::LU(1), host_depth_source_sample,
host_depth_source_sample); host_depth_source_sample);
@ -4070,13 +4095,17 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
xenos::MsaaSamples::k2X) { xenos::MsaaSamples::k2X) {
// 2x -> 1x. // 2x -> 1x.
// Vertical pixels of 2x destination to vertical samples // Vertical pixels of 2x destination to vertical samples
// (01 or 03, depending on support) of 1x source. // (1, 0 for native 2x, or 0, 3 for 2x as 4x) of 1x
// source.
a.OpAnd(dxbc::Dest::R(0, 0b1000), a.OpAnd(dxbc::Dest::R(0, 0b1000),
dxbc::Src::R(0, dxbc::Src::kYYYY), dxbc::Src::R(0, dxbc::Src::kYYYY),
dxbc::Src::LU(1)); dxbc::Src::LU(1));
host_depth_source_sample = host_depth_source_sample =
dxbc::Src::R(0, dxbc::Src::kWWWW); dxbc::Src::R(0, dxbc::Src::kWWWW);
if (!msaa_2x_supported_) { if (msaa_2x_supported_) {
a.OpXOr(dxbc::Dest::R(0, 0b1000),
host_depth_source_sample, dxbc::Src::LU(1));
} else {
a.OpBFI(dxbc::Dest::R(0, 0b1000), dxbc::Src::LU(1), a.OpBFI(dxbc::Dest::R(0, 0b1000), dxbc::Src::LU(1),
dxbc::Src::LU(1), host_depth_source_sample, dxbc::Src::LU(1), host_depth_source_sample,
host_depth_source_sample); host_depth_source_sample);
@ -4086,21 +4115,26 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
dxbc::Src::LU(1)); dxbc::Src::LU(1));
} else { } else {
// 1x -> 2x. // 1x -> 2x.
// Vertical samples (first or second bit, depending on // Vertical samples (1, 0 in the first bit for native 2x
// support) of 2x destination to vertical pixels of 1x // or 0, 1 in the second bit for 2x as 4x) of 2x
// source. // destination to vertical pixels of 1x source.
// Using r0.w (not needed without source MSAA) as a // Using r0.w (not needed without source MSAA) as a
// temporary. // temporary.
if (!msaa_2x_supported_) { if (msaa_2x_supported_) {
a.OpBFI(dxbc::Dest::R(0, 0b0010), dxbc::Src::LU(31),
dxbc::Src::LU(1),
dxbc::Src::R(0, dxbc::Src::kYYYY), dest_sample);
a.OpXOr(dxbc::Dest::R(0, 0b0010),
dxbc::Src::R(0, dxbc::Src::kYYYY),
dxbc::Src::LU(1));
} else {
a.OpUShR(dxbc::Dest::R(0, 0b1000), dest_sample, a.OpUShR(dxbc::Dest::R(0, 0b1000), dest_sample,
dxbc::Src::LU(1)); dxbc::Src::LU(1));
}
a.OpBFI(dxbc::Dest::R(0, 0b0010), dxbc::Src::LU(31), a.OpBFI(dxbc::Dest::R(0, 0b0010), dxbc::Src::LU(31),
dxbc::Src::LU(1), dxbc::Src::LU(1),
dxbc::Src::R(0, dxbc::Src::kYYYY), dxbc::Src::R(0, dxbc::Src::kYYYY),
msaa_2x_supported_ dxbc::Src::R(0, dxbc::Src::kWWWW));
? dest_sample }
: dxbc::Src::R(0, dxbc::Src::kWWWW));
} }
} }
} }
@ -6038,7 +6072,7 @@ ID3D12PipelineState* D3D12RenderTargetCache::GetOrCreateDumpPipeline(
dxbc::Src::LU(xenos::kEdramTileWidthSamples >> dxbc::Src::LU(xenos::kEdramTileWidthSamples >>
uint32_t(format_is_64bpp)), uint32_t(format_is_64bpp)),
dxbc::Src::VThreadIDInGroup(dxbc::Src::kXXXX)); dxbc::Src::VThreadIDInGroup(dxbc::Src::kXXXX));
// r0.w for 4x MSAA = pixel Y in the group // r0.w for 2x MSAA = pixel Y in the group
a.OpUShR(dxbc::Dest::R(0, 0b1000), a.OpUShR(dxbc::Dest::R(0, 0b1000),
dxbc::Src::VThreadIDInGroup(dxbc::Src::kYYYY), dxbc::Src::LU(1)); dxbc::Src::VThreadIDInGroup(dxbc::Src::kYYYY), dxbc::Src::LU(1));
// r0.w = free // r0.w = free
@ -6067,7 +6101,7 @@ ID3D12PipelineState* D3D12RenderTargetCache::GetOrCreateDumpPipeline(
if (key.msaa_samples != xenos::MsaaSamples::k1X) { if (key.msaa_samples != xenos::MsaaSamples::k1X) {
// Sample index. // Sample index.
// For 4x, bit 0 for horizontal, bit 1 for vertical. // For 4x, bit 0 for horizontal, bit 1 for vertical.
// For 2x, only vertical - but 0 or 1 for true 2x MSAA or 0 or 3 for 2x MSAA // For 2x, only vertical - but 1 or 0 for true 2x MSAA or 0 or 3 for 2x MSAA
// via two samples of 4x. // via two samples of 4x.
// r0.w = vertical sample index // r0.w = vertical sample index
a.OpAnd(dxbc::Dest::R(0, 0b1000), a.OpAnd(dxbc::Dest::R(0, 0b1000),
@ -6077,13 +6111,19 @@ ID3D12PipelineState* D3D12RenderTargetCache::GetOrCreateDumpPipeline(
a.OpBFI(dxbc::Dest::R(0, 0b1000), dxbc::Src::LU(31), dxbc::Src::LU(1), a.OpBFI(dxbc::Dest::R(0, 0b1000), dxbc::Src::LU(31), dxbc::Src::LU(1),
dxbc::Src::R(0, dxbc::Src::kWWWW), dxbc::Src::R(0, dxbc::Src::kWWWW),
dxbc::Src::VThreadIDInGroup(dxbc::Src::kXXXX)); dxbc::Src::VThreadIDInGroup(dxbc::Src::kXXXX));
} else if (!msaa_2x_supported_) { } else {
if (msaa_2x_supported_) {
// r0.w = source sample 1 or 0 for native 2x MSAA
a.OpXOr(dxbc::Dest::R(0, 0b1000), dxbc::Src::R(0, dxbc::Src::kWWWW),
dxbc::Src::LU(1));
} else {
// r0.w = source sample 0 or 3 for 2x MSAA emulated via 4x // r0.w = source sample 0 or 3 for 2x MSAA emulated via 4x
a.OpBFI(dxbc::Dest::R(0, 0b1000), dxbc::Src::LU(1), dxbc::Src::LU(1), a.OpBFI(dxbc::Dest::R(0, 0b1000), dxbc::Src::LU(1), dxbc::Src::LU(1),
dxbc::Src::R(0, dxbc::Src::kWWWW), dxbc::Src::R(0, dxbc::Src::kWWWW),
dxbc::Src::R(0, dxbc::Src::kWWWW)); dxbc::Src::R(0, dxbc::Src::kWWWW));
} }
} }
}
// Calculate the EDRAM address into r0.x (in buffer texels - uints for 32bpp // Calculate the EDRAM address into r0.x (in buffer texels - uints for 32bpp
// or uint2s for 64bpp). // or uint2s for 64bpp).

View File

@ -768,15 +768,16 @@ class DxbcShaderTranslator : public ShaderTranslator {
void ExportToMemory(); void ExportToMemory();
void CompleteVertexOrDomainShader(); void CompleteVertexOrDomainShader();
// For RTV, adds the sample to coverage_temp.coverage_temp_component if it // For RTV, adds the sample to coverage_temp.coverage_temp_component if it
// passes alpha to mask (except for sample 0, which overwrites the output to // passes alpha to mask (or, if initialize == true (for the first sample
// initialize it). // tested), overwrites the output to initialize it).
// For ROV, masks the sample away from coverage_temp.coverage_temp_component // For ROV, masks the sample away from coverage_temp.coverage_temp_component
// if it doesn't pass alpha to mask. // if it doesn't pass alpha to mask.
// threshold_offset and temp.temp_component can be the same if needed. // threshold_offset and temp.temp_component can be the same if needed.
void CompletePixelShader_AlphaToMaskSample( void CompletePixelShader_AlphaToMaskSample(
uint32_t sample_index, float threshold_base, dxbc::Src threshold_offset, bool initialize, uint32_t sample_index, float threshold_base,
float threshold_offset_scale, uint32_t coverage_temp, dxbc::Src threshold_offset, float threshold_offset_scale,
uint32_t coverage_temp_component, uint32_t temp, uint32_t temp_component); uint32_t coverage_temp, uint32_t coverage_temp_component, uint32_t temp,
uint32_t temp_component);
// Performs alpha to coverage if necessary, for RTV, writing to oMask, and for // Performs alpha to coverage if necessary, for RTV, writing to oMask, and for
// ROV, updating the low (coverage) bits of system_temp_rov_params_.x. Done // ROV, updating the low (coverage) bits of system_temp_rov_params_.x. Done
// manually even for RTV to maintain the guest dithering pattern and because // manually even for RTV to maintain the guest dithering pattern and because

View File

@ -1828,9 +1828,10 @@ void DxbcShaderTranslator::CompletePixelShader_DSV_DepthTo24Bit() {
} }
void DxbcShaderTranslator::CompletePixelShader_AlphaToMaskSample( void DxbcShaderTranslator::CompletePixelShader_AlphaToMaskSample(
uint32_t sample_index, float threshold_base, dxbc::Src threshold_offset, bool initialize, uint32_t sample_index, float threshold_base,
float threshold_offset_scale, uint32_t coverage_temp, dxbc::Src threshold_offset, float threshold_offset_scale,
uint32_t coverage_temp_component, uint32_t temp, uint32_t temp_component) { uint32_t coverage_temp, uint32_t coverage_temp_component, uint32_t temp,
uint32_t temp_component) {
dxbc::Dest temp_dest(dxbc::Dest::R(temp, 1 << temp_component)); dxbc::Dest temp_dest(dxbc::Dest::R(temp, 1 << temp_component));
dxbc::Src temp_src(dxbc::Src::R(temp).Select(temp_component)); dxbc::Src temp_src(dxbc::Src::R(temp).Select(temp_component));
// Calculate the threshold. // Calculate the threshold.
@ -1858,16 +1859,16 @@ void DxbcShaderTranslator::CompletePixelShader_AlphaToMaskSample(
// Clear the coverage for samples that have failed the test. // Clear the coverage for samples that have failed the test.
a_.OpAnd(coverage_dest, coverage_src, temp_src); a_.OpAnd(coverage_dest, coverage_src, temp_src);
} else { } else {
if (sample_index) { if (initialize) {
// Not first sample - add. // First sample tested - initialize.
a_.OpAnd(temp_dest, temp_src, dxbc::Src::LU(uint32_t(1) << sample_index));
a_.OpOr(coverage_dest, coverage_src, temp_src);
} else {
// First sample - initialize.
assert_true(coverage_temp != temp || assert_true(coverage_temp != temp ||
coverage_temp_component != temp_component); coverage_temp_component != temp_component);
a_.OpAnd(coverage_dest, temp_src, a_.OpAnd(coverage_dest, temp_src,
dxbc::Src::LU(uint32_t(1) << sample_index)); dxbc::Src::LU(uint32_t(1) << sample_index));
} else {
// Not first sample tested - add.
a_.OpAnd(temp_dest, temp_src, dxbc::Src::LU(uint32_t(1) << sample_index));
a_.OpOr(coverage_dest, coverage_src, temp_src);
} }
} }
} }
@ -1930,32 +1931,38 @@ void DxbcShaderTranslator::CompletePixelShader_AlphaToMask() {
// 4x MSAA. // 4x MSAA.
// Sample 0 must be checked first - CompletePixelShader_AlphaToMaskSample // Sample 0 must be checked first - CompletePixelShader_AlphaToMaskSample
// initializes the result for sample index 0. // initializes the result for sample index 0.
CompletePixelShader_AlphaToMaskSample(0, 0.75f, temp_x_src, 1.0f / 16.0f, CompletePixelShader_AlphaToMaskSample(true, 0, 0.75f, temp_x_src,
coverage_temp, 1.0f / 16.0f, coverage_temp,
coverage_temp_component, temp, 1); coverage_temp_component, temp, 1);
CompletePixelShader_AlphaToMaskSample(1, 0.25f, temp_x_src, 1.0f / 16.0f, CompletePixelShader_AlphaToMaskSample(false, 1, 0.25f, temp_x_src,
coverage_temp, 1.0f / 16.0f, coverage_temp,
coverage_temp_component, temp, 1); coverage_temp_component, temp, 1);
CompletePixelShader_AlphaToMaskSample(2, 0.5f, temp_x_src, 1.0f / 16.0f, CompletePixelShader_AlphaToMaskSample(false, 2, 0.5f, temp_x_src,
coverage_temp, 1.0f / 16.0f, coverage_temp,
coverage_temp_component, temp, 1); coverage_temp_component, temp, 1);
CompletePixelShader_AlphaToMaskSample(3, 1.0f, temp_x_src, 1.0f / 16.0f, CompletePixelShader_AlphaToMaskSample(false, 3, 1.0f, temp_x_src,
coverage_temp, 1.0f / 16.0f, coverage_temp,
coverage_temp_component, temp, 1); coverage_temp_component, temp, 1);
// 2x MSAA (as 2x or samples 0 and 3 of 4x). // 2x MSAA.
// With ROV, using guest sample indices.
// Without ROV:
// - Native 2x: top (0 in Xenia) is 1 in D3D10.1+, bottom (1 in Xenia) is 0.
// - 2x as 4x: top is 0, bottom is 3.
a_.OpElse(); a_.OpElse();
CompletePixelShader_AlphaToMaskSample(0, 0.5f, temp_x_src, 1.0f / 8.0f,
coverage_temp,
coverage_temp_component, temp, 1);
CompletePixelShader_AlphaToMaskSample( CompletePixelShader_AlphaToMaskSample(
(!edram_rov_used_ && !msaa_2x_supported_) ? 3 : 1, 1.0f, temp_x_src, true, (!edram_rov_used_ && msaa_2x_supported_) ? 1 : 0, 0.5f,
1.0f / 8.0f, coverage_temp, coverage_temp_component, temp, 1); temp_x_src, 1.0f / 8.0f, coverage_temp, coverage_temp_component, temp,
1);
CompletePixelShader_AlphaToMaskSample(
false, edram_rov_used_ ? 1 : (msaa_2x_supported_ ? 0 : 3), 1.0f,
temp_x_src, 1.0f / 8.0f, coverage_temp, coverage_temp_component, temp,
1);
// Close the 4x check. // Close the 4x check.
a_.OpEndIf(); a_.OpEndIf();
} }
// MSAA is disabled. // MSAA is disabled.
a_.OpElse(); a_.OpElse();
CompletePixelShader_AlphaToMaskSample(0, 1.0f, temp_x_src, 1.0f / 4.0f, CompletePixelShader_AlphaToMaskSample(true, 0, 1.0f, temp_x_src, 1.0f / 4.0f,
coverage_temp, coverage_temp_component, coverage_temp, coverage_temp_component,
temp, 1); temp, 1);
// Close the 2x/4x check. // Close the 2x/4x check.