xemu/hw/xbox/nv2a/pgraph/vk/surface.c

1722 lines
64 KiB
C

/*
* Geforce NV2A PGRAPH Vulkan Renderer
*
* Copyright (c) 2024-2025 Matt Borgerson
*
* Based on GL implementation:
*
* Copyright (c) 2012 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2018-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "hw/xbox/nv2a/nv2a_int.h"
#include "hw/xbox/nv2a/pgraph/swizzle.h"
#include "qemu/compiler.h"
#include "ui/xemu-settings.h"
#include "renderer.h"
const int num_invalid_surfaces_to_keep = 10; // FIXME: Make automatic
const int max_surface_frame_time_delta = 5;
void pgraph_vk_set_surface_scale_factor(NV2AState *d, unsigned int scale)
{
g_config.display.quality.surface_scale = scale < 1 ? 1 : scale;
qemu_mutex_lock(&d->pfifo.lock);
qatomic_set(&d->pfifo.halt, true);
qemu_mutex_unlock(&d->pfifo.lock);
// FIXME: It's just flush
qemu_mutex_lock(&d->pgraph.lock);
qemu_event_reset(&d->pgraph.vk_renderer_state->dirty_surfaces_download_complete);
qatomic_set(&d->pgraph.vk_renderer_state->download_dirty_surfaces_pending, true);
qemu_mutex_unlock(&d->pgraph.lock);
qemu_mutex_lock(&d->pfifo.lock);
pfifo_kick(d);
qemu_mutex_unlock(&d->pfifo.lock);
qemu_event_wait(&d->pgraph.vk_renderer_state->dirty_surfaces_download_complete);
qemu_mutex_lock(&d->pgraph.lock);
qemu_event_reset(&d->pgraph.flush_complete);
qatomic_set(&d->pgraph.flush_pending, true);
qemu_mutex_unlock(&d->pgraph.lock);
qemu_mutex_lock(&d->pfifo.lock);
pfifo_kick(d);
qemu_mutex_unlock(&d->pfifo.lock);
qemu_event_wait(&d->pgraph.flush_complete);
qemu_mutex_lock(&d->pfifo.lock);
qatomic_set(&d->pfifo.halt, false);
pfifo_kick(d);
qemu_mutex_unlock(&d->pfifo.lock);
}
unsigned int pgraph_vk_get_surface_scale_factor(NV2AState *d)
{
return d->pgraph.surface_scale_factor; // FIXME: Move internal to renderer
}
void pgraph_vk_reload_surface_scale_factor(PGRAPHState *pg)
{
int factor = g_config.display.quality.surface_scale;
pg->surface_scale_factor = MAX(factor, 1);
}
// FIXME: Move to common
static void get_surface_dimensions(PGRAPHState const *pg, unsigned int *width,
unsigned int *height)
{
bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE);
if (swizzle) {
*width = 1 << pg->surface_shape.log_width;
*height = 1 << pg->surface_shape.log_height;
} else {
*width = pg->surface_shape.clip_width;
*height = pg->surface_shape.clip_height;
}
}
// FIXME: Move to common
static bool framebuffer_dirty(PGRAPHState const *pg)
{
bool shape_changed = memcmp(&pg->surface_shape, &pg->last_surface_shape,
sizeof(SurfaceShape)) != 0;
if (!shape_changed || (!pg->surface_shape.color_format
&& !pg->surface_shape.zeta_format)) {
return false;
}
return true;
}
static void memcpy_image(void *dst, void const *src, int dst_stride,
int src_stride, int height)
{
if (dst_stride == src_stride) {
memcpy(dst, src, dst_stride * height);
return;
}
uint8_t *dst_ptr = (uint8_t *)dst;
uint8_t const *src_ptr = (uint8_t *)src;
size_t copy_stride = MIN(src_stride, dst_stride);
for (int i = 0; i < height; i++) {
memcpy(dst_ptr, src_ptr, copy_stride);
dst_ptr += dst_stride;
src_ptr += src_stride;
}
}
void pgraph_vk_download_surfaces_in_range_if_dirty(PGRAPHState *pg, hwaddr start, hwaddr size)
{
PGRAPHVkState *r = pg->vk_renderer_state;
SurfaceBinding *surface;
hwaddr end = start + size - 1;
QTAILQ_FOREACH(surface, &r->surfaces, entry) {
hwaddr surf_end = surface->vram_addr + surface->size - 1;
bool overlapping = !(surface->vram_addr >= end || start >= surf_end);
if (overlapping) {
pgraph_vk_surface_download_if_dirty(
container_of(pg, NV2AState, pgraph), surface);
}
}
}
static void download_surface_to_buffer(NV2AState *d, SurfaceBinding *surface,
uint8_t *pixels)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHVkState *r = pg->vk_renderer_state;
nv2a_profile_inc_counter(NV2A_PROF_SURF_DOWNLOAD);
bool use_compute_to_convert_depth_stencil_format =
surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT;
bool no_conversion_necessary =
surface->color || use_compute_to_convert_depth_stencil_format ||
surface->host_fmt.vk_format == VK_FORMAT_D16_UNORM;
assert(no_conversion_necessary);
bool compute_needs_finish = (use_compute_to_convert_depth_stencil_format &&
pgraph_vk_compute_needs_finish(r));
if (r->in_command_buffer &&
surface->draw_time >= r->command_buffer_start_time) {
pgraph_vk_finish(pg, VK_FINISH_REASON_SURFACE_DOWN);
} else if (compute_needs_finish) {
pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE);
}
bool downscale = (pg->surface_scale_factor != 1);
trace_nv2a_pgraph_surface_download(
surface->color ? "COLOR" : "ZETA",
surface->swizzle ? "sz" : "lin", surface->vram_addr,
surface->width, surface->height, surface->pitch,
surface->fmt.bytes_per_pixel);
// Read surface into memory
uint8_t *gl_read_buf = pixels;
uint8_t *swizzle_buf = pixels;
if (surface->swizzle) {
// FIXME: Swizzle in shader
assert(pg->surface_scale_factor == 1 || downscale);
swizzle_buf = (uint8_t *)g_malloc(surface->size);
gl_read_buf = swizzle_buf;
}
unsigned int scaled_width = surface->width,
scaled_height = surface->height;
pgraph_apply_scaling_factor(pg, &scaled_width, &scaled_height);
VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg);
pgraph_vk_begin_debug_marker(r, cmd, RGBA_RED, __func__);
pgraph_vk_transition_image_layout(
pg, cmd, surface->image, surface->host_fmt.vk_format,
surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL :
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
int num_copy_regions = 1;
VkBufferImageCopy copy_regions[2];
copy_regions[0] = (VkBufferImageCopy){
.imageSubresource.aspectMask = surface->color ?
VK_IMAGE_ASPECT_COLOR_BIT :
VK_IMAGE_ASPECT_DEPTH_BIT,
.imageSubresource.layerCount = 1,
};
VkImage surface_image_loc;
if (downscale && !use_compute_to_convert_depth_stencil_format) {
copy_regions[0].imageExtent =
(VkExtent3D){ surface->width, surface->height, 1 };
if (surface->image_scratch_current_layout !=
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
pgraph_vk_transition_image_layout(
pg, cmd, surface->image_scratch, surface->host_fmt.vk_format,
surface->image_scratch_current_layout,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
surface->image_scratch_current_layout =
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
}
VkImageBlit blit_region = {
.srcSubresource.aspectMask = surface->host_fmt.aspect,
.srcSubresource.mipLevel = 0,
.srcSubresource.baseArrayLayer = 0,
.srcSubresource.layerCount = 1,
.srcOffsets[0] = (VkOffset3D){0, 0, 0},
.srcOffsets[1] = (VkOffset3D){scaled_width, scaled_height, 1},
.dstSubresource.aspectMask = surface->host_fmt.aspect,
.dstSubresource.mipLevel = 0,
.dstSubresource.baseArrayLayer = 0,
.dstSubresource.layerCount = 1,
.dstOffsets[0] = (VkOffset3D){0, 0, 0},
.dstOffsets[1] = (VkOffset3D){surface->width, surface->height, 1},
};
vkCmdBlitImage(cmd, surface->image,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
surface->image_scratch,
surface->image_scratch_current_layout, 1, &blit_region,
surface->color ? VK_FILTER_LINEAR : VK_FILTER_NEAREST);
pgraph_vk_transition_image_layout(pg, cmd, surface->image_scratch,
surface->host_fmt.vk_format,
surface->image_scratch_current_layout,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
surface->image_scratch_current_layout =
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
surface_image_loc = surface->image_scratch;
} else {
copy_regions[0].imageExtent =
(VkExtent3D){ scaled_width, scaled_height, 1 };
surface_image_loc = surface->image;
}
if (surface->host_fmt.aspect & VK_IMAGE_ASPECT_STENCIL_BIT) {
size_t depth_size = scaled_width * scaled_height * 4;
copy_regions[num_copy_regions++] = (VkBufferImageCopy){
.bufferOffset = ROUND_UP(
depth_size,
r->device_props.limits.minStorageBufferOffsetAlignment),
.imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT,
.imageSubresource.layerCount = 1,
.imageExtent = (VkExtent3D){ scaled_width, scaled_height, 1 },
};
}
//
// Copy image to staging buffer, or to compute_dst if we need to pack it
//
size_t downloaded_image_size = surface->host_fmt.host_bytes_per_pixel *
surface->width * surface->height;
assert((downloaded_image_size) <=
r->storage_buffers[BUFFER_STAGING_DST].buffer_size);
int copy_buffer_idx = use_compute_to_convert_depth_stencil_format ?
BUFFER_COMPUTE_DST :
BUFFER_STAGING_DST;
VkBuffer copy_buffer = r->storage_buffers[copy_buffer_idx].buffer;
{
VkBufferMemoryBarrier pre_copy_dst_barrier = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = copy_buffer,
.size = VK_WHOLE_SIZE
};
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
&pre_copy_dst_barrier, 0, NULL);
}
vkCmdCopyImageToBuffer(cmd, surface_image_loc,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, copy_buffer,
num_copy_regions, copy_regions);
pgraph_vk_transition_image_layout(
pg, cmd, surface->image, surface->host_fmt.vk_format,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL :
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
// FIXME: Verify output of depth stencil conversion
// FIXME: Track current layout and only transition when required
if (use_compute_to_convert_depth_stencil_format) {
size_t bytes_per_pixel = 4;
size_t packed_size =
downscale ? (surface->width * surface->height * bytes_per_pixel) :
(scaled_width * scaled_height * bytes_per_pixel);
//
// Pack the depth-stencil image into compute_src buffer
//
VkBufferMemoryBarrier pre_compute_src_barrier = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = copy_buffer,
.size = VK_WHOLE_SIZE
};
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL,
1, &pre_compute_src_barrier, 0, NULL);
VkBuffer pack_buffer = r->storage_buffers[BUFFER_COMPUTE_SRC].buffer;
VkBufferMemoryBarrier pre_compute_dst_barrier = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = pack_buffer,
.size = VK_WHOLE_SIZE
};
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL,
1, &pre_compute_dst_barrier, 0, NULL);
pgraph_vk_pack_depth_stencil(pg, surface, cmd, copy_buffer, pack_buffer,
downscale);
VkBufferMemoryBarrier post_compute_src_barrier = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_SHADER_READ_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = copy_buffer,
.size = VK_WHOLE_SIZE
};
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
&post_compute_src_barrier, 0, NULL);
VkBufferMemoryBarrier post_compute_dst_barrier = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = pack_buffer,
.size = packed_size
};
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
&post_compute_dst_barrier, 0, NULL);
//
// Copy packed image over to staging buffer for host download
//
copy_buffer = r->storage_buffers[BUFFER_STAGING_DST].buffer;
VkBufferMemoryBarrier pre_copy_dst_barrier = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = copy_buffer,
.size = VK_WHOLE_SIZE
};
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
&pre_copy_dst_barrier, 0, NULL);
VkBufferCopy buffer_copy_region = {
.size = packed_size,
};
vkCmdCopyBuffer(cmd, pack_buffer, copy_buffer, 1, &buffer_copy_region);
VkBufferMemoryBarrier post_copy_src_barrier = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = pack_buffer,
.size = VK_WHOLE_SIZE
};
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
&post_copy_src_barrier, 0, NULL);
}
//
// Download image data to host
//
VkBufferMemoryBarrier post_copy_dst_barrier = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_HOST_READ_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = copy_buffer,
.size = VK_WHOLE_SIZE
};
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_HOST_BIT, 0, 0, NULL, 1,
&post_copy_dst_barrier, 0, NULL);
nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_1);
pgraph_vk_end_debug_marker(r, cmd);
pgraph_vk_end_single_time_commands(pg, cmd);
void *mapped_memory_ptr = NULL;
VK_CHECK(vmaMapMemory(r->allocator,
r->storage_buffers[BUFFER_STAGING_DST].allocation,
&mapped_memory_ptr));
vmaInvalidateAllocation(r->allocator,
r->storage_buffers[BUFFER_STAGING_DST].allocation,
0, VK_WHOLE_SIZE);
memcpy_image(gl_read_buf, mapped_memory_ptr, surface->pitch,
surface->width * surface->fmt.bytes_per_pixel,
surface->height);
vmaUnmapMemory(r->allocator,
r->storage_buffers[BUFFER_STAGING_DST].allocation);
if (surface->swizzle) {
// FIXME: Swizzle in shader
swizzle_rect(swizzle_buf, surface->width, surface->height, pixels,
surface->pitch, surface->fmt.bytes_per_pixel);
nv2a_profile_inc_counter(NV2A_PROF_SURF_SWIZZLE);
g_free(swizzle_buf);
}
}
static void download_surface(NV2AState *d, SurfaceBinding *surface, bool force)
{
if (!(surface->download_pending || force)) {
return;
}
// FIXME: Respect write enable at last TOU?
download_surface_to_buffer(d, surface, d->vram_ptr + surface->vram_addr);
memory_region_set_client_dirty(d->vram, surface->vram_addr,
surface->pitch * surface->height,
DIRTY_MEMORY_VGA);
memory_region_set_client_dirty(d->vram, surface->vram_addr,
surface->pitch * surface->height,
DIRTY_MEMORY_NV2A_TEX);
surface->download_pending = false;
surface->draw_dirty = false;
}
void pgraph_vk_wait_for_surface_download(SurfaceBinding *surface)
{
NV2AState *d = g_nv2a;
if (qatomic_read(&surface->draw_dirty)) {
qemu_mutex_lock(&d->pfifo.lock);
qemu_event_reset(&d->pgraph.vk_renderer_state->downloads_complete);
qatomic_set(&surface->download_pending, true);
qatomic_set(&d->pgraph.vk_renderer_state->downloads_pending, true);
pfifo_kick(d);
qemu_mutex_unlock(&d->pfifo.lock);
qemu_event_wait(&d->pgraph.vk_renderer_state->downloads_complete);
}
}
void pgraph_vk_process_pending_downloads(NV2AState *d)
{
PGRAPHVkState *r = d->pgraph.vk_renderer_state;
SurfaceBinding *surface;
QTAILQ_FOREACH(surface, &r->surfaces, entry) {
download_surface(d, surface, false);
}
qatomic_set(&r->downloads_pending, false);
qemu_event_set(&r->downloads_complete);
}
void pgraph_vk_download_dirty_surfaces(NV2AState *d)
{
PGRAPHVkState *r = d->pgraph.vk_renderer_state;
SurfaceBinding *surface;
QTAILQ_FOREACH(surface, &r->surfaces, entry) {
pgraph_vk_surface_download_if_dirty(d, surface);
}
qatomic_set(&r->download_dirty_surfaces_pending, false);
qemu_event_set(&r->dirty_surfaces_download_complete);
}
static void surface_access_callback(void *opaque, MemoryRegion *mr, hwaddr addr,
hwaddr len, bool write)
{
SurfaceBinding *e = opaque;
assert(addr >= e->vram_addr);
hwaddr offset = addr - e->vram_addr;
assert(offset < e->size);
if (qatomic_read(&e->draw_dirty)) {
trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset);
pgraph_vk_wait_for_surface_download(e);
}
if (write && !qatomic_read(&e->upload_pending)) {
trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset);
qatomic_set(&e->upload_pending, true);
}
}
static void register_cpu_access_callback(NV2AState *d, SurfaceBinding *surface)
{
if (tcg_enabled()) {
qemu_mutex_unlock(&d->pgraph.lock);
bql_lock();
mem_access_callback_insert(qemu_get_cpu(0),
d->vram, surface->vram_addr, surface->size,
&surface->access_cb, &surface_access_callback,
surface);
bql_unlock();
qemu_mutex_lock(&d->pgraph.lock);
}
}
static void unregister_cpu_access_callback(NV2AState *d,
SurfaceBinding const *surface)
{
if (tcg_enabled()) {
qemu_mutex_unlock(&d->pgraph.lock);
bql_lock();
mem_access_callback_remove_by_ref(qemu_get_cpu(0), surface->access_cb);
bql_unlock();
qemu_mutex_lock(&d->pgraph.lock);
}
}
static void bind_surface(PGRAPHVkState *r, SurfaceBinding *surface)
{
if (surface->color) {
r->color_binding = surface;
} else {
r->zeta_binding = surface;
}
r->framebuffer_dirty = true;
}
static void unbind_surface(NV2AState *d, bool color)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHVkState *r = pg->vk_renderer_state;
if (color) {
if (r->color_binding) {
r->color_binding = NULL;
r->framebuffer_dirty = true;
}
} else {
if (r->zeta_binding) {
r->zeta_binding = NULL;
r->framebuffer_dirty = true;
}
}
}
static void invalidate_surface(NV2AState *d, SurfaceBinding *surface)
{
PGRAPHVkState *r = d->pgraph.vk_renderer_state;
trace_nv2a_pgraph_surface_invalidated(surface->vram_addr);
// FIXME: We may be reading from the surface in the current command buffer!
// Add a detection to handle it. For now, finish to be safe.
pgraph_vk_finish(&d->pgraph, VK_FINISH_REASON_SURFACE_DOWN);
assert((!r->in_command_buffer ||
surface->draw_time < r->command_buffer_start_time) &&
"Surface evicted while in use!");
if (surface == r->color_binding) {
assert(d->pgraph.surface_color.buffer_dirty);
unbind_surface(d, true);
}
if (surface == r->zeta_binding) {
assert(d->pgraph.surface_zeta.buffer_dirty);
unbind_surface(d, false);
}
unregister_cpu_access_callback(d, surface);
QTAILQ_REMOVE(&r->surfaces, surface, entry);
QTAILQ_INSERT_HEAD(&r->invalid_surfaces, surface, entry);
}
static void invalidate_overlapping_surfaces(NV2AState *d,
SurfaceBinding const *surface)
{
PGRAPHVkState *r = d->pgraph.vk_renderer_state;
uintptr_t e_end = surface->vram_addr + surface->size - 1;
SurfaceBinding *s, *next;
QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) {
uintptr_t s_end = s->vram_addr + s->size - 1;
bool overlapping =
!(s->vram_addr > e_end || surface->vram_addr > s_end);
if (overlapping) {
trace_nv2a_pgraph_surface_evict_overlapping(
s->vram_addr, s->width, s->height,
s->pitch);
pgraph_vk_surface_download_if_dirty(d, s);
invalidate_surface(d, s);
}
}
}
static void surface_put(NV2AState *d, SurfaceBinding *surface)
{
PGRAPHVkState *r = d->pgraph.vk_renderer_state;
assert(pgraph_vk_surface_get(d, surface->vram_addr) == NULL);
invalidate_overlapping_surfaces(d, surface);
register_cpu_access_callback(d, surface);
QTAILQ_INSERT_HEAD(&r->surfaces, surface, entry);
}
SurfaceBinding *pgraph_vk_surface_get(NV2AState *d, hwaddr addr)
{
PGRAPHVkState *r = d->pgraph.vk_renderer_state;
SurfaceBinding *surface;
QTAILQ_FOREACH (surface, &r->surfaces, entry) {
if (surface->vram_addr == addr) {
return surface;
}
}
return NULL;
}
SurfaceBinding *pgraph_vk_surface_get_within(NV2AState *d, hwaddr addr)
{
PGRAPHVkState *r = d->pgraph.vk_renderer_state;
SurfaceBinding *surface;
QTAILQ_FOREACH (surface, &r->surfaces, entry) {
if (addr >= surface->vram_addr &&
addr < (surface->vram_addr + surface->size)) {
return surface;
}
}
return NULL;
}
static void set_surface_label(PGRAPHState *pg, SurfaceBinding const *surface)
{
PGRAPHVkState *r = pg->vk_renderer_state;
g_autofree gchar *label = g_strdup_printf(
"Surface %" HWADDR_PRIx "h fmt:%s,%02xh %dx%d aa:%d",
surface->vram_addr, surface->color ? "Color" : "Zeta",
surface->color ? surface->shape.color_format :
surface->shape.zeta_format,
surface->width, surface->height, pg->surface_shape.anti_aliasing);
VkDebugUtilsObjectNameInfoEXT name_info = {
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
.objectType = VK_OBJECT_TYPE_IMAGE,
.objectHandle = (uint64_t)surface->image,
.pObjectName = label,
};
if (r->debug_utils_extension_enabled) {
vkSetDebugUtilsObjectNameEXT(r->device, &name_info);
}
vmaSetAllocationName(r->allocator, surface->allocation, label);
if (surface->image_scratch) {
g_autofree gchar *label_scratch =
g_strdup_printf("%s (scratch)", label);
name_info.objectHandle = (uint64_t)surface->image_scratch;
name_info.pObjectName = label_scratch;
if (r->debug_utils_extension_enabled) {
vkSetDebugUtilsObjectNameEXT(r->device, &name_info);
}
vmaSetAllocationName(r->allocator, surface->allocation_scratch,
label_scratch);
}
}
static void create_surface_image(PGRAPHState *pg, SurfaceBinding *surface)
{
PGRAPHVkState *r = pg->vk_renderer_state;
unsigned int width = surface->width, height = surface->height;
pgraph_apply_scaling_factor(pg, &width, &height);
assert(!surface->image);
assert(!surface->image_scratch);
NV2A_VK_DPRINTF(
"Creating new surface image width=%d height=%d @ %08" HWADDR_PRIx,
width, height, surface->vram_addr);
VkImageCreateInfo image_create_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.imageType = VK_IMAGE_TYPE_2D,
.extent.width = width,
.extent.height = height,
.extent.depth = 1,
.mipLevels = 1,
.arrayLayers = 1,
.format = surface->host_fmt.vk_format,
.tiling = VK_IMAGE_TILING_OPTIMAL,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.usage = VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_TRANSFER_DST_BIT |
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | surface->host_fmt.usage,
.samples = VK_SAMPLE_COUNT_1_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
};
VmaAllocationCreateInfo alloc_create_info = {
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
};
VK_CHECK(vmaCreateImage(r->allocator, &image_create_info,
&alloc_create_info, &surface->image,
&surface->allocation, NULL));
VK_CHECK(vmaCreateImage(r->allocator, &image_create_info,
&alloc_create_info, &surface->image_scratch,
&surface->allocation_scratch, NULL));
surface->image_scratch_current_layout = VK_IMAGE_LAYOUT_UNDEFINED;
VkImageViewCreateInfo image_view_create_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = surface->image,
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = surface->host_fmt.vk_format,
.subresourceRange.aspectMask = surface->host_fmt.aspect,
.subresourceRange.levelCount = 1,
.subresourceRange.layerCount = 1,
};
VK_CHECK(vkCreateImageView(r->device, &image_view_create_info, NULL,
&surface->image_view));
// FIXME: Go right into main command buffer
VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg);
pgraph_vk_begin_debug_marker(r, cmd, RGBA_RED, __func__);
pgraph_vk_transition_image_layout(
pg, cmd, surface->image, surface->host_fmt.vk_format,
VK_IMAGE_LAYOUT_UNDEFINED,
surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL :
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_3);
pgraph_vk_end_debug_marker(r, cmd);
pgraph_vk_end_single_time_commands(pg, cmd);
nv2a_profile_inc_counter(NV2A_PROF_SURF_CREATE);
}
static void migrate_surface_image(SurfaceBinding *dst, SurfaceBinding *src)
{
dst->image = src->image;
dst->image_view = src->image_view;
dst->allocation = src->allocation;
dst->image_scratch = src->image_scratch;
dst->image_scratch_current_layout = src->image_scratch_current_layout;
dst->allocation_scratch = src->allocation_scratch;
src->image = VK_NULL_HANDLE;
src->image_view = VK_NULL_HANDLE;
src->allocation = VK_NULL_HANDLE;
src->image_scratch = VK_NULL_HANDLE;
src->image_scratch_current_layout = VK_IMAGE_LAYOUT_UNDEFINED;
src->allocation_scratch = VK_NULL_HANDLE;
}
static void destroy_surface_image(PGRAPHVkState *r, SurfaceBinding *surface)
{
vkDestroyImageView(r->device, surface->image_view, NULL);
surface->image_view = VK_NULL_HANDLE;
vmaDestroyImage(r->allocator, surface->image, surface->allocation);
surface->image = VK_NULL_HANDLE;
surface->allocation = VK_NULL_HANDLE;
vmaDestroyImage(r->allocator, surface->image_scratch,
surface->allocation_scratch);
surface->image_scratch = VK_NULL_HANDLE;
surface->allocation_scratch = VK_NULL_HANDLE;
}
static bool check_invalid_surface_is_compatibile(SurfaceBinding *surface,
SurfaceBinding *target)
{
return surface->host_fmt.vk_format == target->host_fmt.vk_format &&
surface->width == target->width &&
surface->height == target->height &&
surface->host_fmt.usage == target->host_fmt.usage;
}
static SurfaceBinding *
get_any_compatible_invalid_surface(PGRAPHVkState *r, SurfaceBinding *target)
{
SurfaceBinding *surface, *next;
QTAILQ_FOREACH_SAFE(surface, &r->invalid_surfaces, entry, next) {
if (check_invalid_surface_is_compatibile(surface, target)) {
QTAILQ_REMOVE(&r->invalid_surfaces, surface, entry);
return surface;
}
}
return NULL;
}
static void prune_invalid_surfaces(PGRAPHVkState *r, int keep)
{
int num_surfaces = 0;
SurfaceBinding *surface, *next;
QTAILQ_FOREACH_SAFE(surface, &r->invalid_surfaces, entry, next) {
num_surfaces += 1;
if (num_surfaces > keep) {
QTAILQ_REMOVE(&r->invalid_surfaces, surface, entry);
destroy_surface_image(r, surface);
g_free(surface);
}
}
}
static void expire_old_surfaces(NV2AState *d)
{
PGRAPHVkState *r = d->pgraph.vk_renderer_state;
SurfaceBinding *s, *next;
QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) {
int last_used = d->pgraph.frame_time - s->frame_time;
if (last_used >= max_surface_frame_time_delta) {
trace_nv2a_pgraph_surface_evict_reason("old", s->vram_addr);
pgraph_vk_surface_download_if_dirty(d, s);
invalidate_surface(d, s);
}
}
}
static bool check_surface_compatibility(SurfaceBinding const *s1,
SurfaceBinding const *s2, bool strict)
{
bool format_compatible =
(s1->color == s2->color) &&
(s1->host_fmt.vk_format == s2->host_fmt.vk_format) &&
(s1->pitch == s2->pitch);
if (!format_compatible) {
return false;
}
if (!strict) {
return (s1->width >= s2->width) && (s1->height >= s2->height);
} else {
return (s1->width == s2->width) && (s1->height == s2->height);
}
}
void pgraph_vk_surface_download_if_dirty(NV2AState *d, SurfaceBinding *surface)
{
if (surface->draw_dirty) {
download_surface(d, surface, true);
}
}
void pgraph_vk_upload_surface_data(NV2AState *d, SurfaceBinding *surface,
bool force)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHVkState *r = pg->vk_renderer_state;
if (!(surface->upload_pending || force)) {
return;
}
nv2a_profile_inc_counter(NV2A_PROF_SURF_UPLOAD);
pgraph_vk_finish(pg, VK_FINISH_REASON_SURFACE_CREATE); // FIXME: SURFACE_UP
trace_nv2a_pgraph_surface_upload(
surface->color ? "COLOR" : "ZETA",
surface->swizzle ? "sz" : "lin", surface->vram_addr,
surface->width, surface->height, surface->pitch,
surface->fmt.bytes_per_pixel);
surface->upload_pending = false;
surface->draw_time = pg->draw_time;
uint8_t *data = d->vram_ptr;
uint8_t *buf = data + surface->vram_addr;
g_autofree uint8_t *swizzle_buf = NULL;
uint8_t *gl_read_buf = NULL;
if (surface->swizzle) {
swizzle_buf = (uint8_t*)g_malloc(surface->size);
gl_read_buf = swizzle_buf;
unswizzle_rect(data + surface->vram_addr,
surface->width, surface->height,
swizzle_buf,
surface->pitch,
surface->fmt.bytes_per_pixel);
nv2a_profile_inc_counter(NV2A_PROF_SURF_SWIZZLE);
} else {
gl_read_buf = buf;
}
//
// Upload image data from host to staging buffer
//
StorageBuffer *copy_buffer = &r->storage_buffers[BUFFER_STAGING_SRC];
size_t uploaded_image_size = surface->height * surface->width *
surface->fmt.bytes_per_pixel;
assert(uploaded_image_size <= copy_buffer->buffer_size);
void *mapped_memory_ptr = NULL;
VK_CHECK(vmaMapMemory(r->allocator, copy_buffer->allocation,
&mapped_memory_ptr));
bool use_compute_to_convert_depth_stencil_format =
surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT;
bool no_conversion_necessary =
surface->color || surface->host_fmt.vk_format == VK_FORMAT_D16_UNORM ||
use_compute_to_convert_depth_stencil_format;
assert(no_conversion_necessary);
memcpy_image(mapped_memory_ptr, gl_read_buf,
surface->width * surface->fmt.bytes_per_pixel, surface->pitch,
surface->height);
vmaFlushAllocation(r->allocator, copy_buffer->allocation, 0, VK_WHOLE_SIZE);
vmaUnmapMemory(r->allocator, copy_buffer->allocation);
VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg);
pgraph_vk_begin_debug_marker(r, cmd, RGBA_RED, __func__);
VkBufferMemoryBarrier host_barrier = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = copy_buffer->buffer,
.size = VK_WHOLE_SIZE
};
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_HOST_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
&host_barrier, 0, NULL);
// Set up image copy regions (which may be modified by compute unpack)
VkBufferImageCopy regions[2];
int num_regions = 0;
regions[num_regions++] = (VkBufferImageCopy){
.imageSubresource.aspectMask = surface->color ?
VK_IMAGE_ASPECT_COLOR_BIT :
VK_IMAGE_ASPECT_DEPTH_BIT,
.imageSubresource.layerCount = 1,
.imageExtent = (VkExtent3D){ surface->width, surface->height, 1 },
};
if (surface->host_fmt.aspect & VK_IMAGE_ASPECT_STENCIL_BIT) {
regions[num_regions++] = (VkBufferImageCopy){
.imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT,
.imageSubresource.layerCount = 1,
.imageExtent = (VkExtent3D){ surface->width, surface->height, 1 },
};
}
unsigned int scaled_width = surface->width, scaled_height = surface->height;
pgraph_apply_scaling_factor(pg, &scaled_width, &scaled_height);
if (use_compute_to_convert_depth_stencil_format) {
//
// Copy packed image buffer to compute_dst for unpacking
//
size_t packed_size = uploaded_image_size;
VkBufferCopy buffer_copy_region = {
.size = packed_size,
};
vkCmdCopyBuffer(cmd, copy_buffer->buffer,
r->storage_buffers[BUFFER_COMPUTE_DST].buffer, 1,
&buffer_copy_region);
size_t num_pixels = scaled_width * scaled_height;
size_t unpacked_depth_image_size = num_pixels * 4;
size_t unpacked_stencil_image_size = num_pixels;
size_t unpacked_size =
unpacked_depth_image_size + unpacked_stencil_image_size;
VkBufferMemoryBarrier post_copy_src_barrier = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = copy_buffer->buffer,
.size = VK_WHOLE_SIZE
};
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
&post_copy_src_barrier, 0, NULL);
//
// Unpack depth-stencil image into compute_src
//
VkBufferMemoryBarrier pre_unpack_src_barrier = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = r->storage_buffers[BUFFER_COMPUTE_DST].buffer,
.size = VK_WHOLE_SIZE
};
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL,
1, &pre_unpack_src_barrier, 0, NULL);
StorageBuffer *unpack_buffer = &r->storage_buffers[BUFFER_COMPUTE_SRC];
VkBufferMemoryBarrier pre_unpack_dst_barrier = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = unpack_buffer->buffer,
.size = unpacked_size
};
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 1,
&pre_unpack_dst_barrier, 0, NULL);
pgraph_vk_unpack_depth_stencil(
pg, surface, cmd, r->storage_buffers[BUFFER_COMPUTE_DST].buffer,
unpack_buffer->buffer);
VkBufferMemoryBarrier post_unpack_src_barrier = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_SHADER_READ_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = r->storage_buffers[BUFFER_COMPUTE_DST].buffer,
.size = VK_WHOLE_SIZE
};
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
&post_unpack_src_barrier, 0, NULL);
VkBufferMemoryBarrier post_unpack_dst_barrier = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = unpack_buffer->buffer,
.size = unpacked_size
};
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
&post_unpack_dst_barrier, 0, NULL);
// Already scaled during compute. Adjust copy regions.
regions[0].imageExtent = (VkExtent3D){ scaled_width, scaled_height, 1 };
regions[1].imageExtent = regions[0].imageExtent;
regions[1].bufferOffset =
ROUND_UP(unpacked_depth_image_size,
r->device_props.limits.minStorageBufferOffsetAlignment);
copy_buffer = unpack_buffer;
}
//
// Copy image data from buffer to staging image
//
if (surface->image_scratch_current_layout !=
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
pgraph_vk_transition_image_layout(pg, cmd, surface->image_scratch,
surface->host_fmt.vk_format,
surface->image_scratch_current_layout,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
surface->image_scratch_current_layout =
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
}
vkCmdCopyBufferToImage(cmd, copy_buffer->buffer, surface->image_scratch,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, num_regions,
regions);
VkBufferMemoryBarrier post_copy_src_buffer_barrier = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = copy_buffer->buffer,
.size = VK_WHOLE_SIZE
};
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
&post_copy_src_buffer_barrier, 0, NULL);
//
// Copy staging image to final image
//
pgraph_vk_transition_image_layout(pg, cmd, surface->image_scratch,
surface->host_fmt.vk_format,
surface->image_scratch_current_layout,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
surface->image_scratch_current_layout =
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
pgraph_vk_transition_image_layout(
pg, cmd, surface->image, surface->host_fmt.vk_format,
surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL :
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
bool upscale = pg->surface_scale_factor > 1 &&
!use_compute_to_convert_depth_stencil_format;
if (upscale) {
VkImageBlit blitRegion = {
.srcSubresource.aspectMask = surface->host_fmt.aspect,
.srcSubresource.mipLevel = 0,
.srcSubresource.baseArrayLayer = 0,
.srcSubresource.layerCount = 1,
.srcOffsets[0] = (VkOffset3D){0, 0, 0},
.srcOffsets[1] = (VkOffset3D){surface->width, surface->height, 1},
.dstSubresource.aspectMask = surface->host_fmt.aspect,
.dstSubresource.mipLevel = 0,
.dstSubresource.baseArrayLayer = 0,
.dstSubresource.layerCount = 1,
.dstOffsets[0] = (VkOffset3D){0, 0, 0},
.dstOffsets[1] = (VkOffset3D){scaled_width, scaled_height, 1},
};
vkCmdBlitImage(cmd, surface->image_scratch,
surface->image_scratch_current_layout, surface->image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &blitRegion,
surface->color ? VK_FILTER_LINEAR : VK_FILTER_NEAREST);
} else {
// Note: We should be able to vkCmdCopyBufferToImage directly into
// surface->image, but there is an apparent AMD Windows driver
// synchronization bug we'll hit when doing this. For this reason,
// always use a staging image.
for (int i = 0; i < num_regions; i++) {
VkImageAspectFlags aspect = regions[i].imageSubresource.aspectMask;
VkImageCopy copy_region = {
.srcSubresource.aspectMask = aspect,
.srcSubresource.layerCount = 1,
.dstSubresource.aspectMask = aspect,
.dstSubresource.layerCount = 1,
.extent = regions[i].imageExtent,
};
vkCmdCopyImage(cmd, surface->image_scratch,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, surface->image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1,
&copy_region);
}
}
pgraph_vk_transition_image_layout(
pg, cmd, surface->image, surface->host_fmt.vk_format,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL :
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_2);
pgraph_vk_end_debug_marker(r, cmd);
pgraph_vk_end_single_time_commands(pg, cmd);
surface->initialized = true;
}
static void compare_surfaces(SurfaceBinding const *a, SurfaceBinding const *b)
{
#define DO_CMP(fld) \
if (a->fld != b->fld) \
trace_nv2a_pgraph_surface_compare_mismatch( \
#fld, (long int)a->fld, (long int)b->fld);
DO_CMP(shape.clip_x)
DO_CMP(shape.clip_width)
DO_CMP(shape.clip_y)
DO_CMP(shape.clip_height)
DO_CMP(fmt.bytes_per_pixel)
DO_CMP(host_fmt.vk_format)
DO_CMP(color)
DO_CMP(swizzle)
DO_CMP(vram_addr)
DO_CMP(width)
DO_CMP(height)
DO_CMP(pitch)
DO_CMP(size)
DO_CMP(dma_addr)
DO_CMP(dma_len)
DO_CMP(frame_time)
DO_CMP(draw_time)
#undef DO_CMP
}
static void populate_surface_binding_target_sized(NV2AState *d, bool color,
unsigned int width,
unsigned int height,
SurfaceBinding *target)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHVkState *r = pg->vk_renderer_state;
Surface *surface;
hwaddr dma_address;
BasicSurfaceFormatInfo fmt;
SurfaceFormatInfo host_fmt;
if (color) {
surface = &pg->surface_color;
dma_address = pg->dma_color;
assert(pg->surface_shape.color_format != 0);
assert(pg->surface_shape.color_format <
ARRAY_SIZE(kelvin_surface_color_format_vk_map));
fmt = kelvin_surface_color_format_map[pg->surface_shape.color_format];
host_fmt = kelvin_surface_color_format_vk_map[pg->surface_shape.color_format];
if (host_fmt.host_bytes_per_pixel == 0) {
fprintf(stderr, "nv2a: unimplemented color surface format 0x%x\n",
pg->surface_shape.color_format);
abort();
}
} else {
surface = &pg->surface_zeta;
dma_address = pg->dma_zeta;
assert(pg->surface_shape.zeta_format != 0);
assert(pg->surface_shape.zeta_format <
ARRAY_SIZE(r->kelvin_surface_zeta_vk_map));
fmt = kelvin_surface_zeta_format_map[pg->surface_shape.zeta_format];
host_fmt = r->kelvin_surface_zeta_vk_map[pg->surface_shape.zeta_format];
// FIXME: Support float 16,24b float format surface
}
DMAObject dma = nv_dma_load(d, dma_address);
// There's a bunch of bugs that could cause us to hit this function
// at the wrong time and get a invalid dma object.
// Check that it's sane.
assert(dma.dma_class == NV_DMA_IN_MEMORY_CLASS);
// assert(dma.address + surface->offset != 0);
assert(surface->offset <= dma.limit);
assert(surface->offset + surface->pitch * height <= dma.limit + 1);
assert(surface->pitch % fmt.bytes_per_pixel == 0);
assert((dma.address & ~0x07FFFFFF) == 0);
target->shape = (color || !r->color_binding) ? pg->surface_shape :
r->color_binding->shape;
target->fmt = fmt;
target->host_fmt = host_fmt;
target->color = color;
target->swizzle =
(pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE);
target->vram_addr = dma.address + surface->offset;
target->width = width;
target->height = height;
target->pitch = surface->pitch;
target->size = height * MAX(surface->pitch, width * fmt.bytes_per_pixel);
target->upload_pending = true;
target->download_pending = false;
target->draw_dirty = false;
target->dma_addr = dma.address;
target->dma_len = dma.limit;
target->frame_time = pg->frame_time;
target->draw_time = pg->draw_time;
target->cleared = false;
target->initialized = false;
}
static void populate_surface_binding_target(NV2AState *d, bool color,
SurfaceBinding *target)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHVkState *r = pg->vk_renderer_state;
unsigned int width, height;
if (color || !r->color_binding) {
get_surface_dimensions(pg, &width, &height);
pgraph_apply_anti_aliasing_factor(pg, &width, &height);
// Since we determine surface dimensions based on the clipping
// rectangle, make sure to include the surface offset as well.
if (pg->surface_type != NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE) {
width += pg->surface_shape.clip_x;
height += pg->surface_shape.clip_y;
}
} else {
width = r->color_binding->width;
height = r->color_binding->height;
}
populate_surface_binding_target_sized(d, color, width, height, target);
}
static void update_surface_part(NV2AState *d, bool upload, bool color)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHVkState *r = pg->vk_renderer_state;
SurfaceBinding target;
memset(&target, 0, sizeof(target));
populate_surface_binding_target(d, color, &target);
Surface *pg_surface = color ? &pg->surface_color : &pg->surface_zeta;
bool mem_dirty = !tcg_enabled() && memory_region_test_and_clear_dirty(
d->vram, target.vram_addr,
target.size, DIRTY_MEMORY_NV2A);
SurfaceBinding *current_binding = color ? r->color_binding
: r->zeta_binding;
if (!current_binding ||
(upload && (pg_surface->buffer_dirty || mem_dirty))) {
// FIXME: We don't need to be so aggressive flushing the command list
// pgraph_vk_finish(pg, VK_FINISH_REASON_SURFACE_CREATE);
pgraph_vk_ensure_not_in_render_pass(pg);
unbind_surface(d, color);
SurfaceBinding *surface = pgraph_vk_surface_get(d, target.vram_addr);
if (surface != NULL) {
// FIXME: Support same color/zeta surface target? In the mean time,
// if the surface we just found is currently bound, just unbind it.
SurfaceBinding *other = (color ? r->zeta_binding
: r->color_binding);
if (surface == other) {
NV2A_UNIMPLEMENTED("Same color & zeta surface offset");
unbind_surface(d, !color);
}
}
trace_nv2a_pgraph_surface_target(
color ? "COLOR" : "ZETA", target.vram_addr,
target.swizzle ? "sz" : "ln",
pg->surface_shape.anti_aliasing,
pg->surface_shape.clip_x,
pg->surface_shape.clip_width, pg->surface_shape.clip_y,
pg->surface_shape.clip_height);
bool should_create = true;
if (surface != NULL) {
bool is_compatible =
check_surface_compatibility(surface, &target, false);
void (*trace_fn)(uint32_t addr, uint32_t width, uint32_t height,
const char *layout, uint32_t anti_aliasing,
uint32_t clip_x, uint32_t clip_width,
uint32_t clip_y, uint32_t clip_height,
uint32_t pitch) =
surface->color ? trace_nv2a_pgraph_surface_match_color :
trace_nv2a_pgraph_surface_match_zeta;
trace_fn(surface->vram_addr, surface->width, surface->height,
surface->swizzle ? "sz" : "ln", surface->shape.anti_aliasing,
surface->shape.clip_x, surface->shape.clip_width,
surface->shape.clip_y, surface->shape.clip_height,
surface->pitch);
assert(!(target.swizzle && pg->clearing));
#if 0
if (surface->swizzle != target.swizzle) {
// Clears should only be done on linear surfaces. Avoid
// synchronization by allowing (1) a surface marked swizzled to
// be cleared under the assumption the entire surface is
// destined to be cleared and (2) a fully cleared linear surface
// to be marked swizzled. Strictly match size to avoid
// pathological cases.
is_compatible &= (pg->clearing || surface->cleared) &&
check_surface_compatibility(surface, &target, true);
if (is_compatible) {
trace_nv2a_pgraph_surface_migrate_type(
target.swizzle ? "swizzled" : "linear");
}
}
#endif
if (is_compatible && color &&
!check_surface_compatibility(surface, &target, true)) {
SurfaceBinding zeta_entry;
populate_surface_binding_target_sized(
d, !color, surface->width, surface->height, &zeta_entry);
hwaddr color_end = surface->vram_addr + surface->size;
hwaddr zeta_end = zeta_entry.vram_addr + zeta_entry.size;
is_compatible &= surface->vram_addr >= zeta_end ||
zeta_entry.vram_addr >= color_end;
}
if (is_compatible && !color && r->color_binding) {
is_compatible &= (surface->width == r->color_binding->width) &&
(surface->height == r->color_binding->height);
}
if (is_compatible) {
// FIXME: Refactor
pg->surface_binding_dim.width = surface->width;
pg->surface_binding_dim.clip_x = surface->shape.clip_x;
pg->surface_binding_dim.clip_width = surface->shape.clip_width;
pg->surface_binding_dim.height = surface->height;
pg->surface_binding_dim.clip_y = surface->shape.clip_y;
pg->surface_binding_dim.clip_height = surface->shape.clip_height;
surface->upload_pending |= mem_dirty;
pg->surface_zeta.buffer_dirty |= color;
should_create = false;
} else {
trace_nv2a_pgraph_surface_evict_reason(
"incompatible", surface->vram_addr);
compare_surfaces(surface, &target);
pgraph_vk_surface_download_if_dirty(d, surface);
invalidate_surface(d, surface);
}
}
if (should_create) {
surface = get_any_compatible_invalid_surface(r, &target);
if (surface) {
migrate_surface_image(&target, surface);
} else {
surface = g_malloc(sizeof(SurfaceBinding));
create_surface_image(pg, &target);
}
*surface = target;
set_surface_label(pg, surface);
surface_put(d, surface);
// FIXME: Refactor
pg->surface_binding_dim.width = target.width;
pg->surface_binding_dim.clip_x = target.shape.clip_x;
pg->surface_binding_dim.clip_width = target.shape.clip_width;
pg->surface_binding_dim.height = target.height;
pg->surface_binding_dim.clip_y = target.shape.clip_y;
pg->surface_binding_dim.clip_height = target.shape.clip_height;
if (color && r->zeta_binding &&
(r->zeta_binding->width != target.width ||
r->zeta_binding->height != target.height)) {
pg->surface_zeta.buffer_dirty = true;
}
}
void (*trace_fn)(uint32_t addr, uint32_t width, uint32_t height,
const char *layout, uint32_t anti_aliasing,
uint32_t clip_x, uint32_t clip_width, uint32_t clip_y,
uint32_t clip_height, uint32_t pitch) =
color ? (should_create ? trace_nv2a_pgraph_surface_create_color :
trace_nv2a_pgraph_surface_hit_color) :
(should_create ? trace_nv2a_pgraph_surface_create_zeta :
trace_nv2a_pgraph_surface_hit_zeta);
trace_fn(surface->vram_addr, surface->width, surface->height,
surface->swizzle ? "sz" : "ln", surface->shape.anti_aliasing,
surface->shape.clip_x, surface->shape.clip_width,
surface->shape.clip_y, surface->shape.clip_height, surface->pitch);
bind_surface(r, surface);
pg_surface->buffer_dirty = false;
}
if (!upload && pg_surface->draw_dirty) {
if (!tcg_enabled()) {
// FIXME: Cannot monitor for reads/writes; flush now
download_surface(d, color ? r->color_binding : r->zeta_binding,
true);
}
pg_surface->write_enabled_cache = false;
pg_surface->draw_dirty = false;
}
}
// FIXME: Move to common?
void pgraph_vk_surface_update(NV2AState *d, bool upload, bool color_write,
bool zeta_write)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHVkState *r = pg->vk_renderer_state;
pg->surface_shape.z_format =
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER),
NV_PGRAPH_SETUPRASTER_Z_FORMAT);
color_write = color_write &&
(pg->clearing || pgraph_color_write_enabled(pg));
zeta_write = zeta_write && (pg->clearing || pgraph_zeta_write_enabled(pg));
if (upload) {
bool fb_dirty = framebuffer_dirty(pg);
if (fb_dirty) {
memcpy(&pg->last_surface_shape, &pg->surface_shape,
sizeof(SurfaceShape));
pg->surface_color.buffer_dirty = true;
pg->surface_zeta.buffer_dirty = true;
}
if (pg->surface_color.buffer_dirty) {
unbind_surface(d, true);
}
if (color_write) {
update_surface_part(d, true, true);
}
if (pg->surface_zeta.buffer_dirty) {
unbind_surface(d, false);
}
if (zeta_write) {
update_surface_part(d, true, false);
}
} else {
if ((color_write || pg->surface_color.write_enabled_cache)
&& pg->surface_color.draw_dirty) {
update_surface_part(d, false, true);
}
if ((zeta_write || pg->surface_zeta.write_enabled_cache)
&& pg->surface_zeta.draw_dirty) {
update_surface_part(d, false, false);
}
}
if (upload) {
pg->draw_time++;
}
bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE);
if (r->color_binding) {
r->color_binding->frame_time = pg->frame_time;
if (upload) {
pgraph_vk_upload_surface_data(d, r->color_binding, false);
r->color_binding->draw_time = pg->draw_time;
r->color_binding->swizzle = swizzle;
}
}
if (r->zeta_binding) {
r->zeta_binding->frame_time = pg->frame_time;
if (upload) {
pgraph_vk_upload_surface_data(d, r->zeta_binding, false);
r->zeta_binding->draw_time = pg->draw_time;
r->zeta_binding->swizzle = swizzle;
}
}
// Sanity check color and zeta dimensions match
if (r->color_binding && r->zeta_binding) {
assert(r->color_binding->width == r->zeta_binding->width);
assert(r->color_binding->height == r->zeta_binding->height);
}
expire_old_surfaces(d);
prune_invalid_surfaces(r, num_invalid_surfaces_to_keep);
}
static bool check_format_and_usage_supported(PGRAPHVkState *r, VkFormat format,
VkImageUsageFlags usage)
{
VkPhysicalDeviceImageFormatInfo2 pdif2 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
.format = format,
.type = VK_IMAGE_TYPE_2D,
.tiling = VK_IMAGE_TILING_OPTIMAL,
.usage = usage,
};
VkImageFormatProperties2 props = {
.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
};
VkResult result = vkGetPhysicalDeviceImageFormatProperties2(
r->physical_device, &pdif2, &props);
return result == VK_SUCCESS;
}
static bool check_surface_internal_formats_supported(
PGRAPHVkState *r, const SurfaceFormatInfo *fmts, size_t count)
{
bool all_supported = true;
for (int i = 0; i < count; i++) {
const SurfaceFormatInfo *f = &fmts[i];
if (f->host_bytes_per_pixel) {
all_supported &=
check_format_and_usage_supported(r, f->vk_format, f->usage);
}
}
return all_supported;
}
void pgraph_vk_init_surfaces(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
// Make sure all surface format types are supported. We don't expect issue
// with these, and therefore have no fallback mechanism.
bool color_formats_supported = check_surface_internal_formats_supported(
r, kelvin_surface_color_format_vk_map,
ARRAY_SIZE(kelvin_surface_color_format_vk_map));
assert(color_formats_supported);
// Check if the device supports preferred VK_FORMAT_D24_UNORM_S8_UINT
// format, fall back to D32_SFLOAT_S8_UINT otherwise.
r->kelvin_surface_zeta_vk_map[NV097_SET_SURFACE_FORMAT_ZETA_Z16] = zeta_d16;
if (check_surface_internal_formats_supported(r, &zeta_d24_unorm_s8_uint,
1)) {
r->kelvin_surface_zeta_vk_map[NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] =
zeta_d24_unorm_s8_uint;
} else if (check_surface_internal_formats_supported(
r, &zeta_d32_sfloat_s8_uint, 1)) {
r->kelvin_surface_zeta_vk_map[NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] =
zeta_d32_sfloat_s8_uint;
} else {
assert(!"No suitable depth-stencil format supported");
}
QTAILQ_INIT(&r->surfaces);
QTAILQ_INIT(&r->invalid_surfaces);
r->downloads_pending = false;
qemu_event_init(&r->downloads_complete, false);
qemu_event_init(&r->dirty_surfaces_download_complete, false);
r->color_binding = NULL;
r->zeta_binding = NULL;
r->framebuffer_dirty = true;
pgraph_vk_reload_surface_scale_factor(pg); // FIXME: Move internal
}
void pgraph_vk_finalize_surfaces(PGRAPHState *pg)
{
pgraph_vk_surface_flush(container_of(pg, NV2AState, pgraph));
}
void pgraph_vk_surface_flush(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHVkState *r = pg->vk_renderer_state;
// Clear last surface shape to force recreation of buffers at next draw
pg->surface_color.draw_dirty = false;
pg->surface_zeta.draw_dirty = false;
memset(&pg->last_surface_shape, 0, sizeof(pg->last_surface_shape));
unbind_surface(d, true);
unbind_surface(d, false);
SurfaceBinding *s, *next;
QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) {
// FIXME: We should download all surfaces to ram, but need to
// investigate corruption issue
pgraph_vk_surface_download_if_dirty(d, s);
invalidate_surface(d, s);
}
prune_invalid_surfaces(r, 0);
pgraph_vk_reload_surface_scale_factor(pg);
}