[D3D12] EDRAM layout of a rectangle and other resolve stuff
This commit is contained in:
parent
ddc8f17fa5
commit
c4f80aac0d
|
@ -20,6 +20,7 @@
|
||||||
#include "xenia/base/profiling.h"
|
#include "xenia/base/profiling.h"
|
||||||
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
|
#include "xenia/gpu/d3d12/d3d12_command_processor.h"
|
||||||
#include "xenia/gpu/texture_info.h"
|
#include "xenia/gpu/texture_info.h"
|
||||||
|
#include "xenia/gpu/texture_util.h"
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
|
@ -744,11 +745,7 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory, Memory* memory) {
|
||||||
|
|
||||||
// Get the render target properties.
|
// Get the render target properties.
|
||||||
uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
|
uint32_t rb_surface_info = regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
|
||||||
uint32_t surface_pitch = std::min(rb_surface_info & 0x3FFF, 2560u);
|
uint32_t surface_pitch = rb_surface_info & 0x3FFF;
|
||||||
if (surface_pitch == 0) {
|
|
||||||
// Nothing to copy or clear.
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
MsaaSamples msaa_samples = MsaaSamples((rb_surface_info >> 16) & 0x3);
|
MsaaSamples msaa_samples = MsaaSamples((rb_surface_info >> 16) & 0x3);
|
||||||
uint32_t rb_copy_control = regs[XE_GPU_REG_RB_COPY_CONTROL].u32;
|
uint32_t rb_copy_control = regs[XE_GPU_REG_RB_COPY_CONTROL].u32;
|
||||||
uint32_t surface_index = rb_copy_control & 0x7;
|
uint32_t surface_index = rb_copy_control & 0x7;
|
||||||
|
@ -759,12 +756,10 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory, Memory* memory) {
|
||||||
bool surface_is_depth = surface_index == 4;
|
bool surface_is_depth = surface_index == 4;
|
||||||
uint32_t surface_edram_base;
|
uint32_t surface_edram_base;
|
||||||
uint32_t surface_format;
|
uint32_t surface_format;
|
||||||
bool surface_format_64bpp;
|
|
||||||
if (surface_is_depth) {
|
if (surface_is_depth) {
|
||||||
uint32_t rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32;
|
uint32_t rb_depth_info = regs[XE_GPU_REG_RB_DEPTH_INFO].u32;
|
||||||
surface_edram_base = rb_depth_info & 0xFFF;
|
surface_edram_base = rb_depth_info & 0xFFF;
|
||||||
surface_format = (rb_depth_info >> 16) & 0x1;
|
surface_format = (rb_depth_info >> 16) & 0x1;
|
||||||
surface_format_64bpp = false;
|
|
||||||
} else {
|
} else {
|
||||||
uint32_t rb_color_info;
|
uint32_t rb_color_info;
|
||||||
switch (surface_index) {
|
switch (surface_index) {
|
||||||
|
@ -783,26 +778,7 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory, Memory* memory) {
|
||||||
}
|
}
|
||||||
surface_edram_base = rb_color_info & 0xFFF;
|
surface_edram_base = rb_color_info & 0xFFF;
|
||||||
surface_format = (rb_color_info >> 16) & 0xF;
|
surface_format = (rb_color_info >> 16) & 0xF;
|
||||||
surface_format_64bpp =
|
|
||||||
IsColorFormat64bpp(ColorRenderTargetFormat(surface_format));
|
|
||||||
}
|
}
|
||||||
if (surface_edram_base >= 2048) {
|
|
||||||
// The surface is totally outside of EDRAM - shouldn't happen.
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Calculate the maximum number of rows to clamp the source rectangle.
|
|
||||||
uint32_t surface_pitch_ss =
|
|
||||||
surface_pitch * (msaa_samples >= MsaaSamples::k4X ? 2 : 1);
|
|
||||||
uint32_t surface_pitch_tiles =
|
|
||||||
(surface_pitch_ss + 79) / 80 * (surface_format_64bpp ? 2 : 1);
|
|
||||||
uint32_t surface_edram_max_rows =
|
|
||||||
(2048 - surface_edram_base) / surface_pitch_tiles;
|
|
||||||
if (surface_edram_max_rows == 0) {
|
|
||||||
// The surface is too close to the end of EDRAM.
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
uint32_t surface_max_height =
|
|
||||||
surface_edram_max_rows * (msaa_samples >= MsaaSamples::k2X ? 8 : 16);
|
|
||||||
|
|
||||||
// Get the resolve region since both copying and clearing need it.
|
// Get the resolve region since both copying and clearing need it.
|
||||||
// HACK: Vertices to use are always in vf0.
|
// HACK: Vertices to use are always in vf0.
|
||||||
|
@ -849,15 +825,6 @@ bool RenderTargetCache::Resolve(SharedMemory* shared_memory, Memory* memory) {
|
||||||
src_rect.top += window_offset_y;
|
src_rect.top += window_offset_y;
|
||||||
src_rect.bottom += window_offset_y;
|
src_rect.bottom += window_offset_y;
|
||||||
}
|
}
|
||||||
src_rect.right = std::min(src_rect.right, LONG(surface_pitch));
|
|
||||||
src_rect.bottom = std::min(src_rect.bottom, LONG(surface_max_height));
|
|
||||||
if (src_rect.right <= 0 || src_rect.bottom <= 0 ||
|
|
||||||
src_rect.right <= src_rect.left || src_rect.bottom <= src_rect.top) {
|
|
||||||
// Totally off screen or empty - nothing to copy.
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
src_rect.left = std::max(src_rect.left, LONG(0));
|
|
||||||
src_rect.top = std::max(src_rect.top, LONG(0));
|
|
||||||
|
|
||||||
XELOGGPU(
|
XELOGGPU(
|
||||||
"Resolving (%d,%d)->(%d,%d) of RT %u (pitch %u, %u sample%s, format "
|
"Resolving (%d,%d)->(%d,%d) of RT %u (pitch %u, %u sample%s, format "
|
||||||
|
@ -903,22 +870,24 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
||||||
// Nothing to copy.
|
// Nothing to copy.
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
uint32_t src_x = uint32_t(src_rect.left);
|
D3D12_RECT copy_rect = src_rect;
|
||||||
uint32_t src_y = uint32_t(src_rect.top);
|
copy_rect.right =
|
||||||
uint32_t src_width =
|
std::min(copy_rect.right, LONG(copy_rect.left + dest_pitch));
|
||||||
std::min(uint32_t(src_rect.right - src_rect.left), dest_pitch);
|
copy_rect.bottom =
|
||||||
uint32_t src_height =
|
std::min(copy_rect.bottom, LONG(copy_rect.top + dest_height));
|
||||||
std::min(uint32_t(src_rect.bottom - src_rect.top), dest_height);
|
|
||||||
|
|
||||||
// Get format info.
|
// Get format info.
|
||||||
uint32_t dest_info = regs[XE_GPU_REG_RB_COPY_DEST_INFO].u32;
|
uint32_t dest_info = regs[XE_GPU_REG_RB_COPY_DEST_INFO].u32;
|
||||||
TextureFormat src_texture_format;
|
TextureFormat src_texture_format;
|
||||||
|
bool src_64bpp;
|
||||||
if (is_depth) {
|
if (is_depth) {
|
||||||
src_texture_format =
|
src_texture_format =
|
||||||
DepthRenderTargetToTextureFormat(DepthRenderTargetFormat(src_format));
|
DepthRenderTargetToTextureFormat(DepthRenderTargetFormat(src_format));
|
||||||
|
src_64bpp = false;
|
||||||
} else {
|
} else {
|
||||||
src_texture_format =
|
src_texture_format =
|
||||||
ColorRenderTargetToTextureFormat(ColorRenderTargetFormat(src_format));
|
ColorRenderTargetToTextureFormat(ColorRenderTargetFormat(src_format));
|
||||||
|
src_64bpp = IsColorFormat64bpp(ColorRenderTargetFormat(src_format));
|
||||||
}
|
}
|
||||||
assert_true(src_texture_format != TextureFormat::kUnknown);
|
assert_true(src_texture_format != TextureFormat::kUnknown);
|
||||||
src_texture_format = GetBaseFormat(src_texture_format);
|
src_texture_format = GetBaseFormat(src_texture_format);
|
||||||
|
@ -928,6 +897,17 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
||||||
is_depth ? src_texture_format
|
is_depth ? src_texture_format
|
||||||
: GetBaseFormat(TextureFormat((dest_info >> 7) & 0x3F));
|
: GetBaseFormat(TextureFormat((dest_info >> 7) & 0x3F));
|
||||||
|
|
||||||
|
// See what samples we need and what we should do with them.
|
||||||
|
xenos::CopySampleSelect sample_select =
|
||||||
|
xenos::CopySampleSelect((rb_copy_control >> 4) & 0x7);
|
||||||
|
if (is_depth && sample_select > xenos::CopySampleSelect::k3) {
|
||||||
|
assert_always();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
int32_t dest_exp_bias =
|
||||||
|
!is_depth ? (int32_t((dest_info >> 16) << 26) >> 26) : 0;
|
||||||
|
uint32_t dest_swap = (dest_info >> 24) & 0x1;
|
||||||
|
|
||||||
// Get the destination location.
|
// Get the destination location.
|
||||||
uint32_t dest_address = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32 & 0x1FFFFFFF;
|
uint32_t dest_address = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32 & 0x1FFFFFFF;
|
||||||
if (dest_address & 0x3) {
|
if (dest_address & 0x3) {
|
||||||
|
@ -936,44 +916,59 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
||||||
// resolve to 8bpp or 16bpp textures at very odd locations.
|
// resolve to 8bpp or 16bpp textures at very odd locations.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
int32_t dest_exp_bias = int32_t((dest_info >> 16) << 26) >> 26;
|
uint32_t dest_size = texture_util::GetGuestMipStorageSize(
|
||||||
uint32_t dest_swap = (dest_info >> 24) & 0x1;
|
xe::align(dest_pitch, 32u), xe::align(dest_height, 32u), 1, true,
|
||||||
// TODO(Triang3l): Copy to array slices.
|
dest_format, nullptr);
|
||||||
|
if (dest_info & (1 << 3)) {
|
||||||
|
// Copying to an array slice.
|
||||||
|
dest_address += dest_size * ((dest_info >> 4) & 0x7);
|
||||||
|
}
|
||||||
// TODO(Triang3l): Investigate what copy_dest_number is.
|
// TODO(Triang3l): Investigate what copy_dest_number is.
|
||||||
|
|
||||||
XELOGGPU(
|
XELOGGPU(
|
||||||
"Copying samples %u to 0x%.8X (%ux%u), destination format %s, "
|
"Copying samples %u to 0x%.8X (%ux%u), destination format %s, "
|
||||||
"exponent bias %d, red and blue %sswapped",
|
"exponent bias %d, red and blue %sswapped",
|
||||||
(rb_copy_control >> 4) & 0x7, dest_address, dest_pitch, dest_height,
|
uint32_t(sample_select), dest_address, dest_pitch, dest_height,
|
||||||
FormatInfo::Get(dest_format)->name, dest_exp_bias,
|
FormatInfo::Get(dest_format)->name, dest_exp_bias,
|
||||||
dest_swap ? "" : "not ");
|
dest_swap ? "" : "not ");
|
||||||
|
|
||||||
// There are 3 paths for resolving in this function - they don't necessarily
|
// Validate and clamp the source region, skip parts that don't need to be
|
||||||
|
// copied and calculate the number of threads needed for copying/loading.
|
||||||
|
uint32_t surface_pitch_tiles, row_tiles, rows;
|
||||||
|
if (!GetEDRAMLayout(surface_pitch, msaa_samples, src_64bpp, edram_base,
|
||||||
|
copy_rect, surface_pitch_tiles, row_tiles, rows)) {
|
||||||
|
// Nothing to copy.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
XELOGGPU("Pitch is %u tiles, %u rows of %u tiles", surface_pitch_tiles, rows,
|
||||||
|
row_tiles);
|
||||||
|
|
||||||
|
// There are 2 paths for resolving in this function - they don't necessarily
|
||||||
// have to map directly to kRaw and kConvert CopyCommands.
|
// have to map directly to kRaw and kConvert CopyCommands.
|
||||||
// - Depth - tiling raw D24S8 or D24FS8 directly from the EDRAM buffer to the
|
// - Raw - when extracting a single color to a texture of the same format as
|
||||||
// shared memory. Only 1 sample is resolved from a depth buffer, and it
|
// the EDRAM surface and exponent bias is not applied, or when resolving a
|
||||||
// looks like format conversion can't be done when resolving depth buffers
|
// depth buffer (games read only one sample of it - resolving multiple
|
||||||
// since k_8_8_8_8 is specified as the destination format, while the texture
|
// samples of a depth buffer is meaningless anyway - and apparently there's
|
||||||
// is being used as k_24_8 or k_24_8_FLOAT.
|
// no format conversion as well because k_8_8_8_8 is specified in the
|
||||||
// - Raw color - when the source is single-sampled and has the same format as
|
// destination format in the register, which is obviously not true, and the
|
||||||
// the destination, and there's no need to apply exponent bias. A regular
|
// texture is then read as k_24_8 or k_24_8_FLOAT). Swapping red and blue is
|
||||||
// EDRAM load is done to a buffer, and the buffer is then tiled to the
|
// possible in this mode.
|
||||||
// shared memory. Because swapping red and blue is very common, this path
|
|
||||||
// supports swapping.
|
|
||||||
// - Conversion - when a simple copy is not enough. The EDRAM region is loaded
|
// - Conversion - when a simple copy is not enough. The EDRAM region is loaded
|
||||||
// to a render target resource, which is then used as a texture in a shader
|
// to a render target resource, which is then used as a texture in a shader
|
||||||
// performing the resolve (by sampling the texture on or between pixels with
|
// performing the resolve (by sampling the texture on or between pixels with
|
||||||
// bilinear filtering), applying exponent bias and swapping red and blue in
|
// bilinear filtering), applying exponent bias and swapping red and blue in
|
||||||
// a format-agnostic way, then the resulting color is written to a temporary
|
// a format-agnostic way, then the resulting color is written to a temporary
|
||||||
// RTV of the destination format.
|
// RTV of the destination format.
|
||||||
if (is_depth) {
|
if (sample_select <= xenos::CopySampleSelect::k3 &&
|
||||||
// Depth.
|
src_texture_format == dest_format && dest_exp_bias == 0) {
|
||||||
// TODO(Triang3l): Resolve depth.
|
XELOGGPU("Resolving a single sample without conversion");
|
||||||
return false;
|
// Make sure we have the memory to write to.
|
||||||
} else if (src_texture_format == dest_format &&
|
if (!shared_memory->MakeTilesResident(dest_address, dest_size)) {
|
||||||
msaa_samples == MsaaSamples::k1X && dest_exp_bias == 0) {
|
return false;
|
||||||
XELOGGPU("Resolving a single-sampled surface without conversion");
|
}
|
||||||
// TODO(Triang3l): Raw resolve.
|
// TODO(Triang3l): Raw resolve.
|
||||||
return false;
|
// Make the texture cache refresh the data.
|
||||||
|
shared_memory->RangeWrittenByGPU(dest_address, dest_size);
|
||||||
} else {
|
} else {
|
||||||
XELOGGPU("Resolving with a pixel shader");
|
XELOGGPU("Resolving with a pixel shader");
|
||||||
// TODO(Triang3l): Conversion.
|
// TODO(Triang3l): Conversion.
|
||||||
|
@ -1188,6 +1183,65 @@ RenderTargetCache::RenderTarget* RenderTargetCache::FindOrCreateRenderTarget(
|
||||||
return render_target;
|
return render_target;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool RenderTargetCache::GetEDRAMLayout(
|
||||||
|
uint32_t pitch_pixels, MsaaSamples msaa_samples, bool is_64bpp,
|
||||||
|
uint32_t& base_in_out, D3D12_RECT& rect_in_out, uint32_t& pitch_tiles_out,
|
||||||
|
uint32_t& row_tiles_out, uint32_t& rows_out) {
|
||||||
|
if (pitch_pixels == 0 || rect_in_out.right <= 0 || rect_in_out.bottom <= 0 ||
|
||||||
|
rect_in_out.top >= rect_in_out.bottom) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
pitch_pixels = std::min(pitch_pixels, 2560u);
|
||||||
|
D3D12_RECT rect = rect_in_out;
|
||||||
|
rect.left = std::max(rect.left, LONG(0));
|
||||||
|
rect.top = std::max(rect.top, LONG(0));
|
||||||
|
rect.right = std::min(rect.right, LONG(pitch_pixels));
|
||||||
|
if (rect.left >= rect.right) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t samples_x_log2 = msaa_samples >= MsaaSamples::k4X ? 1 : 0;
|
||||||
|
uint32_t samples_y_log2 = msaa_samples >= MsaaSamples::k2X ? 1 : 0;
|
||||||
|
uint32_t sample_size_log2 = is_64bpp ? 1 : 0;
|
||||||
|
|
||||||
|
uint32_t pitch_tiles = (((pitch_pixels << samples_x_log2) + 79) / 80)
|
||||||
|
<< sample_size_log2;
|
||||||
|
|
||||||
|
// Adjust the base and the rectangle to skip tiles to the left of the left
|
||||||
|
// bound of the rectangle and to the top of the top bound.
|
||||||
|
uint32_t base = base_in_out;
|
||||||
|
uint32_t skip = rect.top << samples_y_log2 >> 4;
|
||||||
|
base += skip * pitch_tiles;
|
||||||
|
skip <<= 4 - samples_y_log2;
|
||||||
|
rect.top -= skip;
|
||||||
|
rect.bottom -= skip;
|
||||||
|
skip = (rect.left << samples_x_log2) / 80;
|
||||||
|
base += skip << sample_size_log2;
|
||||||
|
skip *= 80 >> samples_x_log2;
|
||||||
|
rect.left -= skip;
|
||||||
|
rect.right -= skip;
|
||||||
|
|
||||||
|
// Calculate the number of 16-sample rows this rectangle spans.
|
||||||
|
uint32_t rows = ((rect.bottom << samples_y_log2) + 15) >> 4;
|
||||||
|
uint32_t rows_max = (2048 - base) / pitch_tiles;
|
||||||
|
if (rows_max == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (rows > rows_max) {
|
||||||
|
// Clamp the rectangle if it's partially outside of EDRAM.
|
||||||
|
rows = rows_max;
|
||||||
|
rect.bottom = rows_max << (4 - samples_y_log2);
|
||||||
|
}
|
||||||
|
|
||||||
|
base_in_out = base;
|
||||||
|
rect_in_out = rect;
|
||||||
|
pitch_tiles_out = pitch_tiles;
|
||||||
|
row_tiles_out = (((rect.right << samples_x_log2) + 79) / 80)
|
||||||
|
<< sample_size_log2;
|
||||||
|
rows_out = rows;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
RenderTargetCache::EDRAMLoadStoreMode RenderTargetCache::GetLoadStoreMode(
|
RenderTargetCache::EDRAMLoadStoreMode RenderTargetCache::GetLoadStoreMode(
|
||||||
bool is_depth, uint32_t format) {
|
bool is_depth, uint32_t format) {
|
||||||
if (is_depth) {
|
if (is_depth) {
|
||||||
|
|
|
@ -324,7 +324,24 @@ class RenderTargetCache {
|
||||||
RenderTarget* FindOrCreateRenderTarget(RenderTargetKey key,
|
RenderTarget* FindOrCreateRenderTarget(RenderTargetKey key,
|
||||||
uint32_t heap_page_first);
|
uint32_t heap_page_first);
|
||||||
|
|
||||||
|
// Calculates the tile layout for a rectangle on a render target of the given
|
||||||
|
// configuration. The base is adjusted so it points to the tile containing the
|
||||||
|
// top-left pixel of the rectangle, the rectangle is also adjusted so it's
|
||||||
|
// relative to that tile (because its coordinates don't have to be multiples
|
||||||
|
// of the tile size) and so it's not larger than the pitch and the available
|
||||||
|
// memory space. EDRAM row pitch in tiles (for memory access) and actual width
|
||||||
|
// and height of the region containing the rectangle in tiles (for thread
|
||||||
|
// group count) are also written. This function returns true if the requested
|
||||||
|
// rectangle is within the bounds of EDRAM and is not empty, but if it returns
|
||||||
|
// false, the output values may not be written, so the return value must be
|
||||||
|
// checked.
|
||||||
|
static bool GetEDRAMLayout(uint32_t pitch_pixels, MsaaSamples msaa_samples,
|
||||||
|
bool is_64bpp, uint32_t& base_in_out,
|
||||||
|
D3D12_RECT& rect_in_out, uint32_t& pitch_tiles_out,
|
||||||
|
uint32_t& row_tiles_out, uint32_t& rows_out);
|
||||||
|
|
||||||
static EDRAMLoadStoreMode GetLoadStoreMode(bool is_depth, uint32_t format);
|
static EDRAMLoadStoreMode GetLoadStoreMode(bool is_depth, uint32_t format);
|
||||||
|
|
||||||
// Must be in a frame to call. Stores the dirty areas of the currently bound
|
// Must be in a frame to call. Stores the dirty areas of the currently bound
|
||||||
// render targets and marks them as clean.
|
// render targets and marks them as clean.
|
||||||
void StoreRenderTargetsToEDRAM();
|
void StoreRenderTargetsToEDRAM();
|
||||||
|
|
|
@ -661,7 +661,7 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
|
||||||
height_blocks, depth_blocks);
|
height_blocks, depth_blocks);
|
||||||
texture->base_slice_size = texture_util::GetGuestMipStorageSize(
|
texture->base_slice_size = texture_util::GetGuestMipStorageSize(
|
||||||
width_blocks, height_blocks, depth_blocks, key.tiled, key.format,
|
width_blocks, height_blocks, depth_blocks, key.tiled, key.format,
|
||||||
texture->mip_pitches[0]);
|
&texture->mip_pitches[0]);
|
||||||
texture->base_in_sync = false;
|
texture->base_in_sync = false;
|
||||||
} else {
|
} else {
|
||||||
texture->base_slice_size = 0;
|
texture->base_slice_size = 0;
|
||||||
|
@ -684,7 +684,7 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
|
||||||
texture->mip_offsets[i] = texture->mip_slice_size;
|
texture->mip_offsets[i] = texture->mip_slice_size;
|
||||||
texture->mip_slice_size += texture_util::GetGuestMipStorageSize(
|
texture->mip_slice_size += texture_util::GetGuestMipStorageSize(
|
||||||
width_blocks, height_blocks, depth_blocks, key.tiled, key.format,
|
width_blocks, height_blocks, depth_blocks, key.tiled, key.format,
|
||||||
texture->mip_pitches[i]);
|
&texture->mip_pitches[i]);
|
||||||
}
|
}
|
||||||
// The rest are either packed levels or don't exist at all.
|
// The rest are either packed levels or don't exist at all.
|
||||||
for (uint32_t i = mip_max_storage_level + 1;
|
for (uint32_t i = mip_max_storage_level + 1;
|
||||||
|
|
|
@ -253,7 +253,7 @@ union RB_DEPTH_INFO {
|
||||||
|
|
||||||
union RB_COPY_CONTROL {
|
union RB_COPY_CONTROL {
|
||||||
xe::bf<uint32_t, 0, 3> copy_src_select;
|
xe::bf<uint32_t, 0, 3> copy_src_select;
|
||||||
xe::bf<uint32_t, 4, 3> copy_sample_select;
|
xe::bf<xenos::CopySampleSelect, 4, 3> copy_sample_select;
|
||||||
xe::bf<uint32_t, 8, 1> color_clear_enable;
|
xe::bf<uint32_t, 8, 1> color_clear_enable;
|
||||||
xe::bf<uint32_t, 9, 1> depth_clear_enable;
|
xe::bf<uint32_t, 9, 1> depth_clear_enable;
|
||||||
|
|
||||||
|
|
|
@ -56,7 +56,7 @@ void GetGuestMipBlocks(Dimension dimension, uint32_t width, uint32_t height,
|
||||||
|
|
||||||
uint32_t GetGuestMipStorageSize(uint32_t width_blocks, uint32_t height_blocks,
|
uint32_t GetGuestMipStorageSize(uint32_t width_blocks, uint32_t height_blocks,
|
||||||
uint32_t depth_blocks, bool is_tiled,
|
uint32_t depth_blocks, bool is_tiled,
|
||||||
TextureFormat format, uint32_t& row_pitch_out) {
|
TextureFormat format, uint32_t* row_pitch_out) {
|
||||||
const FormatInfo* format_info = FormatInfo::Get(format);
|
const FormatInfo* format_info = FormatInfo::Get(format);
|
||||||
uint32_t row_pitch = width_blocks * format_info->block_width *
|
uint32_t row_pitch = width_blocks * format_info->block_width *
|
||||||
format_info->block_height * format_info->bits_per_pixel /
|
format_info->block_height * format_info->bits_per_pixel /
|
||||||
|
@ -64,7 +64,9 @@ uint32_t GetGuestMipStorageSize(uint32_t width_blocks, uint32_t height_blocks,
|
||||||
if (!is_tiled) {
|
if (!is_tiled) {
|
||||||
row_pitch = xe::align(row_pitch, 256u);
|
row_pitch = xe::align(row_pitch, 256u);
|
||||||
}
|
}
|
||||||
row_pitch_out = row_pitch;
|
if (row_pitch_out != nullptr) {
|
||||||
|
*row_pitch_out = row_pitch;
|
||||||
|
}
|
||||||
return xe::align(row_pitch * height_blocks * depth_blocks, 4096u);
|
return xe::align(row_pitch * height_blocks * depth_blocks, 4096u);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -33,7 +33,7 @@ void GetGuestMipBlocks(Dimension dimension, uint32_t width, uint32_t height,
|
||||||
// height and depth must be obtained via GetGuestMipExtent.
|
// height and depth must be obtained via GetGuestMipExtent.
|
||||||
uint32_t GetGuestMipStorageSize(uint32_t width_blocks, uint32_t height_blocks,
|
uint32_t GetGuestMipStorageSize(uint32_t width_blocks, uint32_t height_blocks,
|
||||||
uint32_t depth_blocks, bool is_tiled,
|
uint32_t depth_blocks, bool is_tiled,
|
||||||
TextureFormat format, uint32_t& row_pitch_out);
|
TextureFormat format, uint32_t* row_pitch_out);
|
||||||
|
|
||||||
// Gets the number of the mipmap level where the packed mips are stored.
|
// Gets the number of the mipmap level where the packed mips are stored.
|
||||||
inline uint32_t GetPackedMipLevel(uint32_t width, uint32_t height) {
|
inline uint32_t GetPackedMipLevel(uint32_t width, uint32_t height) {
|
||||||
|
|
|
@ -285,6 +285,17 @@ enum class CopyCommand : uint32_t {
|
||||||
kNull = 3, // ?
|
kNull = 3, // ?
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// a2xx_rb_copy_sample_select
|
||||||
|
enum class CopySampleSelect : uint32_t {
|
||||||
|
k0,
|
||||||
|
k1,
|
||||||
|
k2,
|
||||||
|
k3,
|
||||||
|
k01,
|
||||||
|
k23,
|
||||||
|
k0123,
|
||||||
|
};
|
||||||
|
|
||||||
#define XE_GPU_MAKE_SWIZZLE(x, y, z, w) \
|
#define XE_GPU_MAKE_SWIZZLE(x, y, z, w) \
|
||||||
(((XE_GPU_SWIZZLE_##x) << 0) | ((XE_GPU_SWIZZLE_##y) << 3) | \
|
(((XE_GPU_SWIZZLE_##x) << 0) | ((XE_GPU_SWIZZLE_##y) << 3) | \
|
||||||
((XE_GPU_SWIZZLE_##z) << 6) | ((XE_GPU_SWIZZLE_##w) << 9))
|
((XE_GPU_SWIZZLE_##z) << 6) | ((XE_GPU_SWIZZLE_##w) << 9))
|
||||||
|
|
Loading…
Reference in New Issue