[D3D12] Use udiv by constant tile size + minor transfer cleanup
Drivers compile that to a multiplication and a shift anyway.
This commit is contained in:
parent
207e11c8d2
commit
e2f632f8fa
|
@ -2918,73 +2918,29 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
uint32_t draw_resolution_scale_x = this->draw_resolution_scale_x();
|
||||
uint32_t draw_resolution_scale_y = this->draw_resolution_scale_y();
|
||||
|
||||
uint32_t tile_width_samples_scaled =
|
||||
uint32_t tile_width_samples =
|
||||
xenos::kEdramTileWidthSamples * draw_resolution_scale_x;
|
||||
uint32_t tile_height_samples_scaled =
|
||||
uint32_t tile_height_samples =
|
||||
xenos::kEdramTileHeightSamples * draw_resolution_scale_y;
|
||||
|
||||
// Split the destination pixel index into 32bpp tile in r0.z and
|
||||
// Split the destination pixel index into 32bpp tile in r0.zw and
|
||||
// 32bpp-tile-relative pixel index in r0.xy.
|
||||
// r0.xy = pixel XY as uint
|
||||
a.OpFToU(dxbc::Dest::R(0, 0b0011), dxbc::Src::V1D(kInputRegisterPosition));
|
||||
uint32_t dest_sample_width_log2 =
|
||||
uint32_t(dest_is_64bpp) +
|
||||
uint32_t(key.dest_msaa_samples >= xenos::MsaaSamples::k4X);
|
||||
uint32_t dest_sample_height_log2 =
|
||||
uint32_t dest_tile_width_pixels =
|
||||
tile_width_samples >>
|
||||
(uint32_t(dest_is_64bpp) +
|
||||
uint32_t(key.dest_msaa_samples >= xenos::MsaaSamples::k4X));
|
||||
uint32_t dest_tile_height_pixels =
|
||||
tile_height_samples >>
|
||||
uint32_t(key.dest_msaa_samples >= xenos::MsaaSamples::k2X);
|
||||
uint32_t dest_tile_width_divide_scale, dest_tile_width_divide_upper_shift;
|
||||
draw_util::GetEdramTileWidthDivideScaleAndUpperShift(
|
||||
draw_resolution_scale_x, dest_tile_width_divide_scale,
|
||||
dest_tile_width_divide_upper_shift);
|
||||
assert_true(dest_tile_width_divide_upper_shift >= dest_sample_width_log2);
|
||||
// Need the host tile size in pixels, not samples.
|
||||
dest_tile_width_divide_upper_shift -= dest_sample_width_log2;
|
||||
static_assert(
|
||||
TextureCache::kMaxDrawResolutionScaleAlongAxis <= 3,
|
||||
"D3D12RenderTargetCache EDRAM range ownership transfer shader generation "
|
||||
"supports Y draw resolution scaling factors of only up to 3");
|
||||
if (draw_resolution_scale_y == 3) {
|
||||
// r0.zw = upper 32 bits in the division process of pixel XY by pixel count
|
||||
// in a 32bpp tile
|
||||
a.OpUMul(dxbc::Dest::R(0, 0b1100), dxbc::Dest::Null(),
|
||||
dxbc::Src::R(0, 0b0100 << 4),
|
||||
dxbc::Src::LU(0, 0, dest_tile_width_divide_scale,
|
||||
draw_util::kDivideScale3));
|
||||
// r0.zw = 32bpp tile XY index
|
||||
a.OpUShR(dxbc::Dest::R(0, 0b1100), dxbc::Src::R(0),
|
||||
dxbc::Src::LU(
|
||||
0, 0, dest_tile_width_divide_upper_shift,
|
||||
draw_util::kDivideUpperShift3 + 4 - dest_sample_height_log2));
|
||||
// r0.xy = destination pixel XY index within the 32bpp tile
|
||||
a.OpIMAd(
|
||||
dxbc::Dest::R(0, 0b0011), dxbc::Src::R(0, 0b1110),
|
||||
dxbc::Src::LI(
|
||||
-int32_t((80 * draw_resolution_scale_x) >> dest_sample_width_log2),
|
||||
-int32_t((16 * draw_resolution_scale_y) >> dest_sample_height_log2),
|
||||
0, 0),
|
||||
dxbc::Src::R(0, 0b0100));
|
||||
} else {
|
||||
assert_true(draw_resolution_scale_y <= 2);
|
||||
uint32_t dest_tile_height_pixels_log2 =
|
||||
(draw_resolution_scale_y == 2 ? 5 : 4) - dest_sample_height_log2;
|
||||
// r0.z = upper 32 bits in the division process of pixel X by pixel count in
|
||||
// a 32bpp tile
|
||||
a.OpUMul(dxbc::Dest::R(0, 0b0100), dxbc::Dest::Null(),
|
||||
dxbc::Src::R(0, dxbc::Src::kXXXX),
|
||||
dxbc::Src::LU(dest_tile_width_divide_scale));
|
||||
// r0.zw = 32bpp tile XY index
|
||||
a.OpUShR(dxbc::Dest::R(0, 0b1100), dxbc::Src::R(0, 0b0110 << 4),
|
||||
dxbc::Src::LU(0, 0, dest_tile_width_divide_upper_shift,
|
||||
dest_tile_height_pixels_log2));
|
||||
// r0.x = destination pixel X index within the 32bpp tile
|
||||
a.OpIMAd(dxbc::Dest::R(0, 0b0001), dxbc::Src::R(0, dxbc::Src::kZZZZ),
|
||||
dxbc::Src::LI(-int32_t((80 * draw_resolution_scale_x) >>
|
||||
dest_sample_width_log2)),
|
||||
dxbc::Src::R(0, dxbc::Src::kXXXX));
|
||||
// r0.y = destination pixel Y index within the 32bpp tile
|
||||
a.OpAnd(dxbc::Dest::R(0, 0b0010), dxbc::Src::R(0, dxbc::Src::kYYYY),
|
||||
dxbc::Src::LU((1 << dest_tile_height_pixels_log2) - 1));
|
||||
}
|
||||
a.OpUDiv(dxbc::Dest::R(0, 0b1100), dxbc::Dest::R(0, 0b0011),
|
||||
dxbc::Src::R(0, 0b01000100),
|
||||
dxbc::Src::LU(dest_tile_width_pixels, dest_tile_height_pixels,
|
||||
dest_tile_width_pixels, dest_tile_height_pixels));
|
||||
|
||||
// r1.x = destination pitch in 32bpp tiles
|
||||
a.OpUBFE(dxbc::Dest::R(1, 0b0001), dxbc::Src::LU(xenos::kEdramPitchTilesBits),
|
||||
dxbc::Src::LU(0),
|
||||
|
@ -3305,7 +3261,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
// Copying between color and depth / stencil - swap 40-32bpp-sample columns
|
||||
// in the pixel index within the source 32bpp tile using r1.w as temporary.
|
||||
uint32_t source_32bpp_tile_half_pixels =
|
||||
tile_width_samples_scaled >> (1 + source_pixel_width_dwords_log2);
|
||||
tile_width_samples >> (1 + source_pixel_width_dwords_log2);
|
||||
a.OpULT(dxbc::Dest::R(1, 0b1000),
|
||||
dxbc::Src::R(source_tile_pixel_x_reg, dxbc::Src::kXXXX),
|
||||
dxbc::Src::LU(source_32bpp_tile_half_pixels));
|
||||
|
@ -3348,16 +3304,15 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
// r1.x = pixel X within the source texture
|
||||
// r2.x = free
|
||||
a.OpUMAd(dxbc::Dest::R(1, 0b0001),
|
||||
dxbc::Src::LU(tile_width_samples_scaled >>
|
||||
source_pixel_width_dwords_log2),
|
||||
dxbc::Src::LU(tile_width_samples >> source_pixel_width_dwords_log2),
|
||||
dxbc::Src::R(2, dxbc::Src::kXXXX),
|
||||
dxbc::Src::R(source_tile_pixel_x_reg, dxbc::Src::kXXXX));
|
||||
// r1.y = pixel Y within the source texture
|
||||
// r1.w = free
|
||||
a.OpUMAd(dxbc::Dest::R(1, 0b0010),
|
||||
dxbc::Src::LU(
|
||||
tile_height_samples_scaled >>
|
||||
uint32_t(key.source_msaa_samples >= xenos::MsaaSamples::k2X)),
|
||||
a.OpUMAd(
|
||||
dxbc::Dest::R(1, 0b0010),
|
||||
dxbc::Src::LU(tile_height_samples >> uint32_t(key.source_msaa_samples >=
|
||||
xenos::MsaaSamples::k2X)),
|
||||
dxbc::Src::R(1, dxbc::Src::kWWWW),
|
||||
dxbc::Src::R(source_tile_pixel_y_reg, dxbc::Src::kYYYY));
|
||||
|
||||
|
@ -3575,9 +3530,9 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
for (uint32_t i = 0; i < 2; ++i) {
|
||||
switch (source_depth_format) {
|
||||
case xenos::DepthRenderTargetFormat::kD24S8: {
|
||||
// Round to the nearest even integer. This seems to be the correct,
|
||||
// adding +0.5 and rounding towards zero results in red instead of
|
||||
// black in the 4D5307E6 clear shader.
|
||||
// Round to the nearest even integer. This seems to be the correct
|
||||
// conversion, adding +0.5 and rounding towards zero results in red
|
||||
// instead of black in the 4D5307E6 clear shader.
|
||||
a.OpMul(dxbc::Dest::R(i, 0b1000), dxbc::Src::R(i, dxbc::Src::kWWWW),
|
||||
dxbc::Src::LF(float(0xFFFFFF)));
|
||||
a.OpRoundNE(dxbc::Dest::R(i, 0b1000),
|
||||
|
@ -3762,9 +3717,9 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
depth_loaded_in_guest_format = true;
|
||||
switch (source_depth_format) {
|
||||
case xenos::DepthRenderTargetFormat::kD24S8: {
|
||||
// Round to the nearest even integer. This seems to be the correct,
|
||||
// adding +0.5 and rounding towards zero results in red instead of
|
||||
// black in the 4D5307E6 clear shader.
|
||||
// Round to the nearest even integer. This seems to be the correct
|
||||
// conversion, adding +0.5 and rounding towards zero results in red
|
||||
// instead of black in the 4D5307E6 clear shader.
|
||||
a.OpMul(dxbc::Dest::R(1, 0b1000), dxbc::Src::R(1, dxbc::Src::kWWWW),
|
||||
dxbc::Src::LF(float(0xFFFFFF)));
|
||||
a.OpRoundNE(dxbc::Dest::R(1, 0b1000),
|
||||
|
@ -3920,12 +3875,11 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
// Combine the tile sample index and the tile index into buffer
|
||||
// address to r0.x.
|
||||
a.OpUMAd(dxbc::Dest::R(0, 0b0001),
|
||||
dxbc::Src::LU(tile_width_samples_scaled),
|
||||
dxbc::Src::LU(tile_width_samples),
|
||||
dxbc::Src::R(0, dxbc::Src::kYYYY),
|
||||
dxbc::Src::R(0, dxbc::Src::kXXXX));
|
||||
a.OpUMAd(dxbc::Dest::R(0, 0b0001),
|
||||
dxbc::Src::LU(tile_width_samples_scaled *
|
||||
tile_height_samples_scaled),
|
||||
dxbc::Src::LU(tile_width_samples * tile_height_samples),
|
||||
dxbc::Src::R(0, dxbc::Src::kZZZZ),
|
||||
dxbc::Src::R(0, dxbc::Src::kXXXX));
|
||||
// Load from the buffer.
|
||||
|
@ -4102,7 +4056,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
// r1.x = free
|
||||
a.OpUMAd(
|
||||
dxbc::Dest::R(0, 0b0001),
|
||||
dxbc::Src::LU(tile_width_samples_scaled >>
|
||||
dxbc::Src::LU(tile_width_samples >>
|
||||
uint32_t(key.host_depth_source_msaa_samples >=
|
||||
xenos::MsaaSamples::k4X)),
|
||||
dxbc::Src::R(1, dxbc::Src::kXXXX),
|
||||
|
@ -4111,7 +4065,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) {
|
|||
// r0.z = free
|
||||
a.OpUMAd(
|
||||
dxbc::Dest::R(0, 0b0010),
|
||||
dxbc::Src::LU(tile_height_samples_scaled >>
|
||||
dxbc::Src::LU(tile_height_samples >>
|
||||
uint32_t(key.host_depth_source_msaa_samples >=
|
||||
xenos::MsaaSamples::k2X)),
|
||||
dxbc::Src::R(0, dxbc::Src::kZZZZ),
|
||||
|
@ -5933,97 +5887,42 @@ ID3D12PipelineState* D3D12RenderTargetCache::GetOrCreateDumpPipeline(
|
|||
// 32bpp is unknown, treating 64bpp tiles as storing 40x16 samples rather than
|
||||
// 80x16 for simplicity of addressing into the texture.
|
||||
|
||||
// Get the parts of the address along Y - tile row index within the dispatch
|
||||
// to r0.w, sample Y within the tile to r0.y.
|
||||
static_assert(
|
||||
TextureCache::kMaxDrawResolutionScaleAlongAxis <= 3,
|
||||
"D3D12RenderTargetCache render target dump shader generation supports Y "
|
||||
"draw resolution scaling factors of only up to 3");
|
||||
if (draw_resolution_scale_y == 3) {
|
||||
// Multiplication part of the division by the (16 * scale) tile height,
|
||||
// specifically 48 here, or 16 * 3.
|
||||
// r0.w = (Y * kDivideScale3) >> 32
|
||||
a.OpUMul(dxbc::Dest::R(0, 0b1000), dxbc::Dest::Null(),
|
||||
dxbc::Src::VThreadID(dxbc::Src::kYYYY),
|
||||
dxbc::Src::LU(draw_util::kDivideScale3));
|
||||
// Shift part of the division by 16 * scale.
|
||||
// r0.w = Y tile position
|
||||
a.OpUShR(dxbc::Dest::R(0, 0b1000), dxbc::Src::R(0, dxbc::Src::kWWWW),
|
||||
dxbc::Src::LU(draw_util::kDivideUpperShift3 + 4));
|
||||
// Take the remainder of the performed division to r0.y.
|
||||
// r0.y = Y sample position within the tile
|
||||
// r0.w = Y tile position
|
||||
a.OpIMAd(dxbc::Dest::R(0, 0b0010), dxbc::Src::R(0, dxbc::Src::kWWWW),
|
||||
dxbc::Src::LI(-16 * draw_resolution_scale_y),
|
||||
dxbc::Src::VThreadID(dxbc::Src::kYYYY));
|
||||
} else {
|
||||
assert_true(draw_resolution_scale_y <= 2);
|
||||
// Tile height is a power of two, can use bit operations.
|
||||
// Get the tile row index into r0.w.
|
||||
// r0.w = Y tile position.
|
||||
a.OpUShR(dxbc::Dest::R(0, 0b1000), dxbc::Src::VThreadID(dxbc::Src::kYYYY),
|
||||
dxbc::Src::LU(draw_resolution_scale_y == 2 ? 5 : 4));
|
||||
// Get the Y sample position within the tile into r0.y.
|
||||
// r0.y = Y sample position within the tile
|
||||
// r0.w = Y tile position
|
||||
a.OpAnd(dxbc::Dest::R(0, 0b0010), dxbc::Src::VThreadID(dxbc::Src::kYYYY),
|
||||
dxbc::Src::LU((16 * draw_resolution_scale_y) - 1));
|
||||
}
|
||||
uint32_t tile_width =
|
||||
(xenos::kEdramTileWidthSamples * draw_resolution_scale_x) >>
|
||||
uint32_t(format_is_64bpp);
|
||||
uint32_t tile_height =
|
||||
xenos::kEdramTileHeightSamples * draw_resolution_scale_y;
|
||||
|
||||
// Get the X tile offset within the dispatch to r0.z.
|
||||
uint32_t tile_width = xenos::kEdramTileWidthSamples * draw_resolution_scale_x;
|
||||
uint32_t tile_width_divide_scale;
|
||||
uint32_t tile_width_divide_upper_shift;
|
||||
draw_util::GetEdramTileWidthDivideScaleAndUpperShift(
|
||||
draw_resolution_scale_x, tile_width_divide_scale,
|
||||
tile_width_divide_upper_shift);
|
||||
if (format_is_64bpp) {
|
||||
tile_width >>= 1;
|
||||
assert_not_zero(tile_width_divide_upper_shift);
|
||||
--tile_width_divide_upper_shift;
|
||||
}
|
||||
// Multiplication part of the division by 80|40 * scale.
|
||||
// r0.y = Y sample position within the tile
|
||||
// r0.z = (X * tile_width_divide_scale) >> 32
|
||||
// r0.w = Y tile position
|
||||
a.OpUMul(dxbc::Dest::R(0, 0b0100), dxbc::Dest::Null(),
|
||||
dxbc::Src::VThreadID(dxbc::Src::kXXXX),
|
||||
dxbc::Src::LU(tile_width_divide_scale));
|
||||
// Shift part of the division by 80|40 * scale.
|
||||
// Get the parts of the address - tile row index within the dispatch to r0.zw,
|
||||
// sample Y within the tile to r0.xy.
|
||||
// r0.x = X sample position within the tile
|
||||
// r0.y = Y sample position within the tile
|
||||
// r0.z = X tile position
|
||||
// r0.w = Y tile position
|
||||
a.OpUShR(dxbc::Dest::R(0, 0b0100), dxbc::Src::R(0, dxbc::Src::kZZZZ),
|
||||
dxbc::Src::LU(tile_width_divide_upper_shift));
|
||||
a.OpUDiv(dxbc::Dest::R(0, 0b1100), dxbc::Dest::R(0, 0b0011),
|
||||
dxbc::Src::VThreadID(0b01000100),
|
||||
dxbc::Src::LU(tile_width, tile_height, tile_width, tile_height));
|
||||
|
||||
// Extract the dump rectangle tile row pitch to r0.x.
|
||||
// r0.x = dump rectangle pitch in tiles
|
||||
// Extract the dump rectangle tile row pitch to r1.x.
|
||||
// r0.x = X sample position within the tile
|
||||
// r0.y = Y sample position within the tile
|
||||
// r0.z = X tile position
|
||||
// r0.w = Y tile position
|
||||
a.OpUBFE(dxbc::Dest::R(0, 0b0001), dxbc::Src::LU(xenos::kEdramPitchTilesBits),
|
||||
// r1.x = dump rectangle pitch in tiles
|
||||
a.OpUBFE(dxbc::Dest::R(1, 0b0001), dxbc::Src::LU(xenos::kEdramPitchTilesBits),
|
||||
dxbc::Src::LU(0),
|
||||
dxbc::Src::CB(kDumpCbufferPitches, kDumpCbufferPitches, 0,
|
||||
dxbc::Src::kXXXX));
|
||||
// Get the tile index in the EDRAM relative to the dump rectangle base tile to
|
||||
// r0.w.
|
||||
// r0.x = free
|
||||
// r0.y = Y sample position within the tile
|
||||
// r0.z = X tile position
|
||||
// r0.w = tile index relative to the dump rectangle base
|
||||
a.OpUMAd(dxbc::Dest::R(0, 0b1000), dxbc::Src::R(0, dxbc::Src::kWWWW),
|
||||
dxbc::Src::R(0, dxbc::Src::kXXXX),
|
||||
dxbc::Src::R(0, dxbc::Src::kZZZZ));
|
||||
|
||||
// Take the X sample index within the tile as the remainder of the division of
|
||||
// the thread index by tile width to r0.x.
|
||||
// r0.x = X sample position within the tile
|
||||
// r0.y = Y sample position within the tile
|
||||
// r0.z = free
|
||||
// r0.w = tile index relative to the dump rectangle base
|
||||
a.OpIMAd(dxbc::Dest::R(0, 0b0001), dxbc::Src::R(0, dxbc::Src::kZZZZ),
|
||||
dxbc::Src::LI(-int32_t(tile_width)),
|
||||
dxbc::Src::VThreadID(dxbc::Src::kXXXX));
|
||||
// r1.x = free
|
||||
a.OpUMAd(dxbc::Dest::R(0, 0b1000), dxbc::Src::R(0, dxbc::Src::kWWWW),
|
||||
dxbc::Src::R(1, dxbc::Src::kXXXX),
|
||||
dxbc::Src::R(0, dxbc::Src::kZZZZ));
|
||||
|
||||
// Extract the index of the first tile of the dispatch in the EDRAM to r0.z.
|
||||
// r0.x = X sample position within the tile
|
||||
|
@ -6053,7 +5952,7 @@ ID3D12PipelineState* D3D12RenderTargetCache::GetOrCreateDumpPipeline(
|
|||
xenos::kEdramTileHeightSamples),
|
||||
dxbc::Src::R(0, dxbc::Src::kXXXX));
|
||||
// Add the contribution of the Y sample position within the tile to the sample
|
||||
// address in the EDRAM to r0.w.
|
||||
// address in the EDRAM to r0.z.
|
||||
// r0.x = X sample position within the tile
|
||||
// r0.y = Y sample position within the tile
|
||||
// r0.z = sample offset in the EDRAM without the depth column swapping
|
||||
|
@ -6119,7 +6018,6 @@ ID3D12PipelineState* D3D12RenderTargetCache::GetOrCreateDumpPipeline(
|
|||
dxbc::Src::CB(kDumpCbufferPitches, kDumpCbufferPitches, 0,
|
||||
dxbc::Src::kXXXX));
|
||||
// Split the linear tile index in the source texture into X and Y in tiles.
|
||||
// Get the source texture pitch in tiles to r1.x.
|
||||
// r0.x = X sample position within the tile
|
||||
// r0.y = Y sample position within the tile
|
||||
// r0.z = sample offset in the EDRAM
|
||||
|
@ -6257,9 +6155,9 @@ ID3D12PipelineState* D3D12RenderTargetCache::GetOrCreateDumpPipeline(
|
|||
if (key.is_depth) {
|
||||
switch (key.GetDepthFormat()) {
|
||||
case xenos::DepthRenderTargetFormat::kD24S8:
|
||||
// Round to the nearest even integer. This seems to be the correct,
|
||||
// adding +0.5 and rounding towards zero results in red instead of
|
||||
// black in the 4D5307E6 clear shader.
|
||||
// Round to the nearest even integer. This seems to be the correct
|
||||
// conversion, adding +0.5 and rounding towards zero results in red
|
||||
// instead of black in the 4D5307E6 clear shader.
|
||||
a.OpMul(dxbc::Dest::R(1, 0b0001), dxbc::Src::R(1, dxbc::Src::kXXXX),
|
||||
dxbc::Src::LF(float(0xFFFFFF)));
|
||||
a.OpRoundNE(dxbc::Dest::R(1, 0b0001),
|
||||
|
|
|
@ -649,31 +649,6 @@ uint32_t GetNormalizedColorMask(const RegisterFile& regs,
|
|||
return normalized_color_mask;
|
||||
}
|
||||
|
||||
void GetEdramTileWidthDivideScaleAndUpperShift(
|
||||
uint32_t draw_resolution_scale_x, uint32_t& divide_scale_out,
|
||||
uint32_t& divide_upper_shift_out) {
|
||||
static_assert(
|
||||
TextureCache::kMaxDrawResolutionScaleAlongAxis <= 3,
|
||||
"GetEdramTileWidthDivideScaleAndUpperShift provides values only for draw "
|
||||
"resolution scaling factors of up to 3");
|
||||
switch (draw_resolution_scale_x) {
|
||||
case 1:
|
||||
divide_scale_out = kDivideScale5;
|
||||
divide_upper_shift_out = kDivideUpperShift5 + 4;
|
||||
break;
|
||||
case 2:
|
||||
divide_scale_out = kDivideScale5;
|
||||
divide_upper_shift_out = kDivideUpperShift5 + 5;
|
||||
break;
|
||||
case 3:
|
||||
divide_scale_out = kDivideScale15;
|
||||
divide_upper_shift_out = kDivideUpperShift15 + 4;
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(draw_resolution_scale_x);
|
||||
}
|
||||
}
|
||||
|
||||
xenos::CopySampleSelect SanitizeCopySampleSelect(
|
||||
xenos::CopySampleSelect copy_sample_select, xenos::MsaaSamples msaa_samples,
|
||||
bool is_depth) {
|
||||
|
|
|
@ -226,20 +226,6 @@ void GetScissor(const RegisterFile& regs, Scissor& scissor_out,
|
|||
uint32_t GetNormalizedColorMask(const RegisterFile& regs,
|
||||
uint32_t pixel_shader_writes_color_targets);
|
||||
|
||||
// Scales, and shift amounts of the upper 32 bits of the 32x32=64-bit
|
||||
// multiplication result, for fast division and multiplication by
|
||||
// EDRAM-tile-related amounts.
|
||||
constexpr uint32_t kDivideScale3 = 0xAAAAAAABu;
|
||||
constexpr uint32_t kDivideUpperShift3 = 1;
|
||||
constexpr uint32_t kDivideScale5 = 0xCCCCCCCDu;
|
||||
constexpr uint32_t kDivideUpperShift5 = 2;
|
||||
constexpr uint32_t kDivideScale15 = 0x88888889u;
|
||||
constexpr uint32_t kDivideUpperShift15 = 3;
|
||||
|
||||
void GetEdramTileWidthDivideScaleAndUpperShift(
|
||||
uint32_t draw_resolution_scale_x, uint32_t& divide_scale_out,
|
||||
uint32_t& divide_upper_shift_out);
|
||||
|
||||
// Never an identity conversion - can always write conditional move instructions
|
||||
// to shaders that will be no-ops for conversion from guest to host samples.
|
||||
// While we don't know the exact guest sample pattern, due to the way
|
||||
|
|
|
@ -120,80 +120,49 @@ void DxbcShaderTranslator::ExportToMemory() {
|
|||
a_.OpIf(true, dxbc::Src::R(control_temp, dxbc::Src::kXXXX));
|
||||
|
||||
// Check more fine-grained limitations.
|
||||
// The flag in control_temp.x can be 0 or 1 for simplicity, not necessarily
|
||||
// 0 or 0xFFFFFFFF.
|
||||
bool inner_condition_provided = false;
|
||||
if (is_pixel_shader()) {
|
||||
uint32_t resolution_scaled_axes =
|
||||
uint32_t(draw_resolution_scale_x_ > 1) |
|
||||
(uint32_t(draw_resolution_scale_y_ > 1) << 1);
|
||||
if (resolution_scaled_axes) {
|
||||
// Only do memexport for one host pixel in a guest pixel.
|
||||
// For 2x - pixel 1 because it's covered with half-pixel offset that
|
||||
// becomes full-pixel.
|
||||
// For 3x - also pixel 1 because it's still covered with half-pixel
|
||||
// offset, but close to the center.
|
||||
// If X needs resolution scaling, writing 1 or 0 - whether the column is
|
||||
// the one where memexport should be done - to control_temp.y.
|
||||
// For Y, doing that to control_temp.z.
|
||||
// Then, if both axes are resolution-scaled, merging the conditions for
|
||||
// the two.
|
||||
// Only do memexport for one host pixel in a guest pixel - prefer the
|
||||
// host pixel closer to the center of the guest pixel, but one that's
|
||||
// covered with the half-pixel offset according to the top-left rule (1
|
||||
// for 2x because 0 isn't covered with the half-pixel offset, 1 for 3x
|
||||
// because it's the center and is covered with the half-pixel offset too).
|
||||
// Using control_temp.yz as per-axis temporary variables.
|
||||
in_position_used_ |= resolution_scaled_axes;
|
||||
a_.OpFToU(
|
||||
dxbc::Dest::R(control_temp, resolution_scaled_axes << 1),
|
||||
dxbc::Src::V1D(uint32_t(InOutRegister::kPSInPosition), 0b0100 << 2));
|
||||
dxbc::Dest resolution_scaling_temp_dest(
|
||||
dxbc::Dest::R(control_temp, 0b1000));
|
||||
dxbc::Src resolution_scaling_temp_src(
|
||||
dxbc::Src::R(control_temp, dxbc::Src::kWWWW));
|
||||
a_.OpUDiv(dxbc::Dest::Null(),
|
||||
dxbc::Dest::R(control_temp, resolution_scaled_axes << 1),
|
||||
dxbc::Src::R(control_temp, 0b1001 << 2),
|
||||
dxbc::Src::LU(0, draw_resolution_scale_x_,
|
||||
draw_resolution_scale_y_, 0));
|
||||
for (uint32_t i = 0; i < 2; ++i) {
|
||||
if (!(resolution_scaled_axes & (1 << i))) {
|
||||
continue;
|
||||
}
|
||||
// If there's no inner condition in control_temp.x yet, the condition
|
||||
// for the current axis can go directly to it. Otherwise, need to merge
|
||||
// with the previous condition, using control_temp.w as an intermediate
|
||||
// variable.
|
||||
dxbc::Dest resolution_scaled_axis_result(
|
||||
inner_condition_provided ? resolution_scaling_temp_dest
|
||||
: dxbc::Dest::R(control_temp, 0b0001));
|
||||
// with the previous condition, using control_temp.y or .z as an
|
||||
// intermediate variable.
|
||||
dxbc::Src resolution_scaled_axis_src(
|
||||
dxbc::Src::R(control_temp).Select(1 + i));
|
||||
uint32_t axis_resolution_scale =
|
||||
i ? draw_resolution_scale_y_ : draw_resolution_scale_x_;
|
||||
static_assert(
|
||||
TextureCache::kMaxDrawResolutionScaleAlongAxis <= 3,
|
||||
"DxbcShaderTranslator memexport draw resolution scaling "
|
||||
"conditional generation supports draw resolution scaling factors "
|
||||
"of only up to 3");
|
||||
switch (axis_resolution_scale) {
|
||||
case 2:
|
||||
// xy & 1 == 1.
|
||||
a_.OpAnd(resolution_scaled_axis_result, resolution_scaled_axis_src,
|
||||
dxbc::Src::LU(1));
|
||||
// No need to do IEq - already 1 for right / bottom, 0 for left /
|
||||
// top.
|
||||
break;
|
||||
case 3:
|
||||
// xy % 3 == 1.
|
||||
a_.OpUMul(resolution_scaling_temp_dest, dxbc::Dest::Null(),
|
||||
a_.OpIEq(
|
||||
dxbc::Dest::R(control_temp,
|
||||
inner_condition_provided ? 1 << (1 + i) : 0b0001),
|
||||
resolution_scaled_axis_src,
|
||||
dxbc::Src::LU(draw_util::kDivideScale3));
|
||||
a_.OpUShR(resolution_scaling_temp_dest, resolution_scaling_temp_src,
|
||||
dxbc::Src::LU(draw_util::kDivideUpperShift3));
|
||||
a_.OpIMAd(resolution_scaling_temp_dest, resolution_scaling_temp_src,
|
||||
dxbc::Src::LI(-3), resolution_scaled_axis_src);
|
||||
a_.OpIEq(resolution_scaled_axis_result, resolution_scaling_temp_src,
|
||||
dxbc::Src::LU(1));
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(axis_resolution_scale);
|
||||
}
|
||||
dxbc::Src::LU(
|
||||
(i ? draw_resolution_scale_y_ : draw_resolution_scale_x_) >>
|
||||
1));
|
||||
if (inner_condition_provided) {
|
||||
// Merge with the previous condition in control_temp.x.
|
||||
a_.OpAnd(dxbc::Dest::R(control_temp, 0b0001),
|
||||
dxbc::Src::R(control_temp, dxbc::Src::kXXXX),
|
||||
resolution_scaling_temp_src);
|
||||
resolution_scaled_axis_src);
|
||||
}
|
||||
inner_condition_provided = true;
|
||||
}
|
||||
|
|
|
@ -190,91 +190,22 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() {
|
|||
// dividing by 40, not by 80.
|
||||
// For depth-only:
|
||||
// Same, but for full 80x16 tiles, not 40x16 half-tiles.
|
||||
uint32_t tile_or_half_tile_width = 80 * draw_resolution_scale_x_;
|
||||
uint32_t tile_or_half_tile_width_divide_scale;
|
||||
uint32_t tile_or_half_tile_width_divide_upper_shift;
|
||||
draw_util::GetEdramTileWidthDivideScaleAndUpperShift(
|
||||
draw_resolution_scale_x_, tile_or_half_tile_width_divide_scale,
|
||||
tile_or_half_tile_width_divide_upper_shift);
|
||||
if (any_color_targets_written) {
|
||||
tile_or_half_tile_width >>= 1;
|
||||
assert_not_zero(tile_or_half_tile_width_divide_upper_shift);
|
||||
--tile_or_half_tile_width_divide_upper_shift;
|
||||
}
|
||||
static_assert(
|
||||
TextureCache::kMaxDrawResolutionScaleAlongAxis <= 3,
|
||||
"DxbcShaderTranslator ROV sample address calculation supports Y draw "
|
||||
"resolution scaling factors of only up to 3");
|
||||
if (draw_resolution_scale_y_ == 3) {
|
||||
// Multiplication part of the division by 40|80 x 16 x scale (specifically
|
||||
// 40|80 * scale width here, and 48 height, or 16 * 3 height).
|
||||
// system_temp_rov_params_.x = X sample 0 position
|
||||
// system_temp_rov_params_.y = Y sample 0 position
|
||||
// system_temp_rov_params_.z = (X * tile_or_half_tile_width_divide_scale) >>
|
||||
// 32
|
||||
// system_temp_rov_params_.w = (Y * kDivideScale3) >> 32
|
||||
a_.OpUMul(dxbc::Dest::R(system_temp_rov_params_, 0b1100),
|
||||
dxbc::Dest::Null(),
|
||||
dxbc::Src::R(system_temp_rov_params_, 0b0100 << 4),
|
||||
dxbc::Src::LU(0, 0, tile_or_half_tile_width_divide_scale,
|
||||
draw_util::kDivideScale3));
|
||||
// Shift part of the division by 40|80 x 16 x scale.
|
||||
// system_temp_rov_params_.x = X sample 0 position
|
||||
// system_temp_rov_params_.y = Y sample 0 position
|
||||
// system_temp_rov_params_.z = X half-tile or tile position
|
||||
// system_temp_rov_params_.w = Y tile position
|
||||
a_.OpUShR(dxbc::Dest::R(system_temp_rov_params_, 0b1100),
|
||||
dxbc::Src::R(system_temp_rov_params_),
|
||||
dxbc::Src::LU(0, 0, tile_or_half_tile_width_divide_upper_shift,
|
||||
draw_util::kDivideUpperShift3 + 4));
|
||||
// Take the remainder of the performed division to
|
||||
// system_temp_rov_params_.xy.
|
||||
// system_temp_rov_params_.x = X sample 0 position within the half-tile
|
||||
// system_temp_rov_params_.y = Y sample 0 position within the (half-)tile
|
||||
// system_temp_rov_params_.z = X half-tile or tile position
|
||||
// system_temp_rov_params_.w = Y tile position
|
||||
a_.OpIMAd(dxbc::Dest::R(system_temp_rov_params_, 0b0011),
|
||||
dxbc::Src::R(system_temp_rov_params_, 0b1110),
|
||||
dxbc::Src::LI(-int32_t(tile_or_half_tile_width),
|
||||
-16 * draw_resolution_scale_y_, 0, 0),
|
||||
dxbc::Src::R(system_temp_rov_params_));
|
||||
} else {
|
||||
assert_true(draw_resolution_scale_y_ <= 2);
|
||||
// Multiplication part of the division of X by 40|80 * scale.
|
||||
// system_temp_rov_params_.x = X sample 0 position
|
||||
// system_temp_rov_params_.y = Y sample 0 position
|
||||
// system_temp_rov_params_.z = (X * tile_or_half_tile_width_divide_scale) >>
|
||||
// 32
|
||||
a_.OpUMul(dxbc::Dest::R(system_temp_rov_params_, 0b0100),
|
||||
dxbc::Dest::Null(),
|
||||
dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX),
|
||||
dxbc::Src::LU(tile_or_half_tile_width_divide_scale));
|
||||
// Shift part of the division of X by 40 * scale, division of Y by
|
||||
// 16 * scale as it's power of two in this case.
|
||||
// system_temp_rov_params_.x = X sample 0 position
|
||||
// system_temp_rov_params_.y = Y sample 0 position
|
||||
// system_temp_rov_params_.z = X half-tile or tile position
|
||||
// system_temp_rov_params_.w = Y tile position
|
||||
a_.OpUShR(dxbc::Dest::R(system_temp_rov_params_, 0b1100),
|
||||
dxbc::Src::R(system_temp_rov_params_, 0b0110 << 4),
|
||||
dxbc::Src::LU(0, 0, tile_or_half_tile_width_divide_upper_shift,
|
||||
draw_resolution_scale_y_ == 2 ? 5 : 4));
|
||||
// Take the remainder of the performed division (via multiply-subtract for
|
||||
// X, via AND for Y which is power-of-two here) to
|
||||
// system_temp_rov_params_.xy.
|
||||
uint32_t tile_width =
|
||||
xenos::kEdramTileWidthSamples * draw_resolution_scale_x_;
|
||||
uint32_t tile_or_tile_half_width =
|
||||
tile_width >> uint32_t(any_color_targets_written);
|
||||
uint32_t tile_height =
|
||||
xenos::kEdramTileHeightSamples * draw_resolution_scale_y_;
|
||||
// system_temp_rov_params_.x = X sample 0 position within the half-tile or
|
||||
// tile
|
||||
// system_temp_rov_params_.y = Y sample 0 position within the (half-)tile
|
||||
// system_temp_rov_params_.z = X half-tile or tile position
|
||||
// system_temp_rov_params_.w = Y tile position
|
||||
a_.OpIMAd(dxbc::Dest::R(system_temp_rov_params_, 0b0001),
|
||||
dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kZZZZ),
|
||||
dxbc::Src::LI(-int32_t(tile_or_half_tile_width)),
|
||||
dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX));
|
||||
a_.OpAnd(dxbc::Dest::R(system_temp_rov_params_, 0b0010),
|
||||
dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kYYYY),
|
||||
dxbc::Src::LU((16 * draw_resolution_scale_y_) - 1));
|
||||
}
|
||||
a_.OpUDiv(dxbc::Dest::R(system_temp_rov_params_, 0b1100),
|
||||
dxbc::Dest::R(system_temp_rov_params_, 0b0011),
|
||||
dxbc::Src::R(system_temp_rov_params_, 0b01000100),
|
||||
dxbc::Src::LU(tile_or_tile_half_width, tile_height,
|
||||
tile_or_tile_half_width, tile_height));
|
||||
|
||||
// Convert the Y sample 0 position within the half-tile or tile to the dword
|
||||
// offset of the row within a 80x16 32bpp tile or a 40x16 64bpp half-tile to
|
||||
|
@ -287,8 +218,10 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() {
|
|||
// system_temp_rov_params_.w = Y tile position
|
||||
a_.OpUMul(dxbc::Dest::Null(), dxbc::Dest::R(system_temp_rov_params_, 0b0010),
|
||||
dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kYYYY),
|
||||
dxbc::Src::LU(80 * draw_resolution_scale_x_));
|
||||
dxbc::Src::LU(tile_width));
|
||||
|
||||
uint32_t tile_size = tile_width * tile_height;
|
||||
uint32_t tile_half_width = tile_width >> 1;
|
||||
if (any_color_targets_written) {
|
||||
// Depth, 32bpp color, 64bpp color are all needed.
|
||||
|
||||
|
@ -336,11 +269,9 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() {
|
|||
// system_temp_rov_params_.w = Y tile row dword origin in a 32bpp surface
|
||||
// rov_address_temp.x = dword offset of the beginning of the row of samples
|
||||
// within a row of 32bpp tiles
|
||||
a_.OpUMAd(
|
||||
dxbc::Dest::R(rov_address_temp, 0b0001),
|
||||
a_.OpUMAd(dxbc::Dest::R(rov_address_temp, 0b0001),
|
||||
dxbc::Src::R(rov_address_temp, dxbc::Src::kXXXX),
|
||||
dxbc::Src::LU(80 * 16 *
|
||||
(draw_resolution_scale_x_ * draw_resolution_scale_y_)),
|
||||
dxbc::Src::LU(tile_size),
|
||||
dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kYYYY));
|
||||
// Get the dword offset of the beginning of the row of samples within a
|
||||
// 32bpp surface to rov_address_temp.x.
|
||||
|
@ -365,11 +296,9 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() {
|
|||
// system_temp_rov_params_.w = Y tile row dword origin in a 32bpp surface
|
||||
// rov_address_temp.x = dword offset of the beginning of the row of samples
|
||||
// within a 32bpp surface
|
||||
a_.OpUMAd(
|
||||
dxbc::Dest::R(system_temp_rov_params_, 0b0010),
|
||||
a_.OpUMAd(dxbc::Dest::R(system_temp_rov_params_, 0b0010),
|
||||
dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kZZZZ),
|
||||
dxbc::Src::LU(80 * 16 *
|
||||
(draw_resolution_scale_x_ * draw_resolution_scale_y_)),
|
||||
dxbc::Src::LU(tile_size),
|
||||
dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kYYYY));
|
||||
// Get the dword offset of the beginning of the row of samples within a
|
||||
// 64bpp surface to system_temp_rov_params_.w (last time the Y tile row
|
||||
|
@ -420,7 +349,7 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() {
|
|||
// within a 32bpp surface
|
||||
a_.OpUMAd(dxbc::Dest::R(system_temp_rov_params_, 0b0100),
|
||||
dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kYYYY),
|
||||
dxbc::Src::LU(40 * draw_resolution_scale_x_),
|
||||
dxbc::Src::LU(tile_half_width),
|
||||
dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX));
|
||||
// Get the final offset of the sample 0 within a 32bpp color surface to
|
||||
// system_temp_rov_params_.z (last time the 32bpp row offset is needed).
|
||||
|
@ -439,8 +368,8 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() {
|
|||
// system_temp_rov_params_.w = dword sample 0 offset within a 64bpp surface
|
||||
a_.OpMovC(dxbc::Dest::R(system_temp_rov_params_, 0b0010),
|
||||
dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kYYYY),
|
||||
dxbc::Src::LI(-40 * draw_resolution_scale_x_),
|
||||
dxbc::Src::LI(40 * draw_resolution_scale_x_));
|
||||
dxbc::Src::LI(-int32_t(tile_half_width)),
|
||||
dxbc::Src::LI(int32_t(tile_half_width)));
|
||||
// Flip the 40x16 half-tiles for depth / stencil as opposed to 32bpp color -
|
||||
// get the final offset of the sample 0 within a 32bpp depth / stencil
|
||||
// surface to system_temp_rov_params_.y.
|
||||
|
@ -466,11 +395,9 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() {
|
|||
// system_temp_rov_params_.z = dword offset of the beginning of the row of
|
||||
// samples within a row of 32bpp tiles
|
||||
// system_temp_rov_params_.w = Y tile position
|
||||
a_.OpUMAd(
|
||||
dxbc::Dest::R(system_temp_rov_params_, 0b0100),
|
||||
a_.OpUMAd(dxbc::Dest::R(system_temp_rov_params_, 0b0100),
|
||||
dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kZZZZ),
|
||||
dxbc::Src::LU(80 * 16 *
|
||||
(draw_resolution_scale_x_ * draw_resolution_scale_y_)),
|
||||
dxbc::Src::LU(tile_size),
|
||||
dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kYYYY));
|
||||
// Get the dword offset of the beginning of the row of samples within a
|
||||
// 32bpp surface to system_temp_rov_params_.y (last time anything Y-related
|
||||
|
@ -502,15 +429,15 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() {
|
|||
// otherwise
|
||||
a_.OpUGE(dxbc::Dest::R(system_temp_rov_params_, 0b0001),
|
||||
dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX),
|
||||
dxbc::Src::LU(40 * draw_resolution_scale_x_));
|
||||
dxbc::Src::LU(tile_half_width));
|
||||
// Flip the 40x16 half-tiles for depth / stencil as opposed to 32bpp color -
|
||||
// get the dword offset to add for flipping to system_temp_rov_params_.x.
|
||||
// system_temp_rov_params_.x = depth half-tile flipping offset
|
||||
// system_temp_rov_params_.y = dword sample 0 offset within a 32bpp surface
|
||||
a_.OpMovC(dxbc::Dest::R(system_temp_rov_params_, 0b0001),
|
||||
dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX),
|
||||
dxbc::Src::LI(-40 * draw_resolution_scale_x_),
|
||||
dxbc::Src::LI(40 * draw_resolution_scale_x_));
|
||||
dxbc::Src::LI(-int32_t(tile_half_width)),
|
||||
dxbc::Src::LI(int32_t(tile_half_width)));
|
||||
// Flip the 40x16 half-tiles for depth / stencil as opposed to 32bpp color -
|
||||
// get the final offset of the sample 0 within a 32bpp depth / stencil
|
||||
// surface to system_temp_rov_params_.y.
|
||||
|
@ -1288,10 +1215,12 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() {
|
|||
// Go to the next sample (samples are at +0, +(80*scale_x), +1,
|
||||
// +(80*scale_x+1), so need to do +(80*scale_x), -(80*scale_x-1),
|
||||
// +(80*scale_x) and -(80*scale_x+1) after each sample).
|
||||
uint32_t tile_width =
|
||||
xenos::kEdramTileWidthSamples * draw_resolution_scale_x_;
|
||||
a_.OpIAdd(dxbc::Dest::R(system_temp_rov_params_, 0b0010),
|
||||
dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kYYYY),
|
||||
dxbc::Src::LI((i & 1) ? -80 * draw_resolution_scale_x_ + 2 - i
|
||||
: 80 * draw_resolution_scale_x_));
|
||||
dxbc::Src::LI((i & 1) ? -int32_t(tile_width) + 2 - i
|
||||
: int32_t(tile_width)));
|
||||
}
|
||||
|
||||
if (ROV_IsDepthStencilEarly()) {
|
||||
|
@ -2181,6 +2110,9 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
dxbc::Dest temp_w_dest(dxbc::Dest::R(temp, 0b1000));
|
||||
dxbc::Src temp_w_src(dxbc::Src::R(temp, dxbc::Src::kWWWW));
|
||||
|
||||
uint32_t tile_width =
|
||||
xenos::kEdramTileWidthSamples * draw_resolution_scale_x_;
|
||||
|
||||
// Do late depth/stencil test (which includes writing) if needed or deferred
|
||||
// depth writing.
|
||||
if (ROV_IsDepthStencilEarly()) {
|
||||
|
@ -2212,8 +2144,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
if (i < 3) {
|
||||
a_.OpIAdd(dxbc::Dest::R(system_temp_rov_params_, 0b0010),
|
||||
dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kYYYY),
|
||||
dxbc::Src::LI((i & 1) ? -80 * draw_resolution_scale_x_ + 2 - i
|
||||
: 80 * draw_resolution_scale_x_));
|
||||
dxbc::Src::LI((i & 1) ? -int32_t(tile_width) + 2 - i
|
||||
: int32_t(tile_width)));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -3021,8 +2953,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
// +(80*scale_x+1), so need to do +(80*scale_x), -(80*scale_x-1),
|
||||
// +(80*scale_x) and -(80*scale_x+1) after each sample).
|
||||
int32_t next_sample_distance =
|
||||
(j & 1) ? -80 * draw_resolution_scale_x_ + 2 - j
|
||||
: 80 * draw_resolution_scale_x_;
|
||||
(j & 1) ? -int32_t(tile_width) + 2 - j : int32_t(tile_width);
|
||||
a_.OpIAdd(
|
||||
dxbc::Dest::R(system_temp_rov_params_, 0b1100),
|
||||
dxbc::Src::R(system_temp_rov_params_),
|
||||
|
|
Loading…
Reference in New Issue