/* * Geforce NV2A PGRAPH Vulkan Renderer * * Copyright (c) 2024-2025 Matt Borgerson * * Based on GL implementation: * * Copyright (c) 2012 espes * Copyright (c) 2015 Jannik Vogel * Copyright (c) 2018-2024 Matt Borgerson * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, see . */ #include "hw/xbox/nv2a/nv2a_int.h" #include "hw/xbox/nv2a/pgraph/swizzle.h" #include "qemu/compiler.h" #include "ui/xemu-settings.h" #include "renderer.h" const int num_invalid_surfaces_to_keep = 10; // FIXME: Make automatic const int max_surface_frame_time_delta = 5; void pgraph_vk_set_surface_scale_factor(NV2AState *d, unsigned int scale) { g_config.display.quality.surface_scale = scale < 1 ? 1 : scale; qemu_mutex_lock(&d->pfifo.lock); qatomic_set(&d->pfifo.halt, true); qemu_mutex_unlock(&d->pfifo.lock); // FIXME: It's just flush qemu_mutex_lock(&d->pgraph.lock); qemu_event_reset(&d->pgraph.vk_renderer_state->dirty_surfaces_download_complete); qatomic_set(&d->pgraph.vk_renderer_state->download_dirty_surfaces_pending, true); qemu_mutex_unlock(&d->pgraph.lock); qemu_mutex_lock(&d->pfifo.lock); pfifo_kick(d); qemu_mutex_unlock(&d->pfifo.lock); qemu_event_wait(&d->pgraph.vk_renderer_state->dirty_surfaces_download_complete); qemu_mutex_lock(&d->pgraph.lock); qemu_event_reset(&d->pgraph.flush_complete); qatomic_set(&d->pgraph.flush_pending, true); qemu_mutex_unlock(&d->pgraph.lock); qemu_mutex_lock(&d->pfifo.lock); pfifo_kick(d); qemu_mutex_unlock(&d->pfifo.lock); qemu_event_wait(&d->pgraph.flush_complete); qemu_mutex_lock(&d->pfifo.lock); qatomic_set(&d->pfifo.halt, false); pfifo_kick(d); qemu_mutex_unlock(&d->pfifo.lock); } unsigned int pgraph_vk_get_surface_scale_factor(NV2AState *d) { return d->pgraph.surface_scale_factor; // FIXME: Move internal to renderer } void pgraph_vk_reload_surface_scale_factor(PGRAPHState *pg) { int factor = g_config.display.quality.surface_scale; pg->surface_scale_factor = MAX(factor, 1); } // FIXME: Move to common static void get_surface_dimensions(PGRAPHState const *pg, unsigned int *width, unsigned int *height) { bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE); if (swizzle) { *width = 1 << pg->surface_shape.log_width; *height = 1 << pg->surface_shape.log_height; } else { *width = pg->surface_shape.clip_width; *height = pg->surface_shape.clip_height; } } // FIXME: Move to common static bool framebuffer_dirty(PGRAPHState const *pg) { bool shape_changed = memcmp(&pg->surface_shape, &pg->last_surface_shape, sizeof(SurfaceShape)) != 0; if (!shape_changed || (!pg->surface_shape.color_format && !pg->surface_shape.zeta_format)) { return false; } return true; } static void memcpy_image(void *dst, void const *src, int dst_stride, int src_stride, int height) { if (dst_stride == src_stride) { memcpy(dst, src, dst_stride * height); return; } uint8_t *dst_ptr = (uint8_t *)dst; uint8_t const *src_ptr = (uint8_t *)src; size_t copy_stride = MIN(src_stride, dst_stride); for (int i = 0; i < height; i++) { memcpy(dst_ptr, src_ptr, copy_stride); dst_ptr += dst_stride; src_ptr += src_stride; } } void pgraph_vk_download_surfaces_in_range_if_dirty(PGRAPHState *pg, hwaddr start, hwaddr size) { PGRAPHVkState *r = pg->vk_renderer_state; SurfaceBinding *surface; hwaddr end = start + size - 1; QTAILQ_FOREACH(surface, &r->surfaces, entry) { hwaddr surf_end = surface->vram_addr + surface->size - 1; bool overlapping = !(surface->vram_addr >= end || start >= surf_end); if (overlapping) { pgraph_vk_surface_download_if_dirty( container_of(pg, NV2AState, pgraph), surface); } } } static void download_surface_to_buffer(NV2AState *d, SurfaceBinding *surface, uint8_t *pixels) { PGRAPHState *pg = &d->pgraph; PGRAPHVkState *r = pg->vk_renderer_state; nv2a_profile_inc_counter(NV2A_PROF_SURF_DOWNLOAD); bool use_compute_to_convert_depth_stencil_format = surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT || surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT; bool no_conversion_necessary = surface->color || use_compute_to_convert_depth_stencil_format || surface->host_fmt.vk_format == VK_FORMAT_D16_UNORM; assert(no_conversion_necessary); bool compute_needs_finish = (use_compute_to_convert_depth_stencil_format && pgraph_vk_compute_needs_finish(r)); if (r->in_command_buffer && surface->draw_time >= r->command_buffer_start_time) { pgraph_vk_finish(pg, VK_FINISH_REASON_SURFACE_DOWN); } else if (compute_needs_finish) { pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE); } bool downscale = (pg->surface_scale_factor != 1); trace_nv2a_pgraph_surface_download( surface->color ? "COLOR" : "ZETA", surface->swizzle ? "sz" : "lin", surface->vram_addr, surface->width, surface->height, surface->pitch, surface->fmt.bytes_per_pixel); // Read surface into memory uint8_t *gl_read_buf = pixels; uint8_t *swizzle_buf = pixels; if (surface->swizzle) { // FIXME: Swizzle in shader assert(pg->surface_scale_factor == 1 || downscale); swizzle_buf = (uint8_t *)g_malloc(surface->size); gl_read_buf = swizzle_buf; } unsigned int scaled_width = surface->width, scaled_height = surface->height; pgraph_apply_scaling_factor(pg, &scaled_width, &scaled_height); VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); pgraph_vk_begin_debug_marker(r, cmd, RGBA_RED, __func__); pgraph_vk_transition_image_layout( pg, cmd, surface->image, surface->host_fmt.vk_format, surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); int num_copy_regions = 1; VkBufferImageCopy copy_regions[2]; copy_regions[0] = (VkBufferImageCopy){ .imageSubresource.aspectMask = surface->color ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT, .imageSubresource.layerCount = 1, }; VkImage surface_image_loc; if (downscale && !use_compute_to_convert_depth_stencil_format) { copy_regions[0].imageExtent = (VkExtent3D){ surface->width, surface->height, 1 }; if (surface->image_scratch_current_layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { pgraph_vk_transition_image_layout( pg, cmd, surface->image_scratch, surface->host_fmt.vk_format, surface->image_scratch_current_layout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); surface->image_scratch_current_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; } VkImageBlit blit_region = { .srcSubresource.aspectMask = surface->host_fmt.aspect, .srcSubresource.mipLevel = 0, .srcSubresource.baseArrayLayer = 0, .srcSubresource.layerCount = 1, .srcOffsets[0] = (VkOffset3D){0, 0, 0}, .srcOffsets[1] = (VkOffset3D){scaled_width, scaled_height, 1}, .dstSubresource.aspectMask = surface->host_fmt.aspect, .dstSubresource.mipLevel = 0, .dstSubresource.baseArrayLayer = 0, .dstSubresource.layerCount = 1, .dstOffsets[0] = (VkOffset3D){0, 0, 0}, .dstOffsets[1] = (VkOffset3D){surface->width, surface->height, 1}, }; vkCmdBlitImage(cmd, surface->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, surface->image_scratch, surface->image_scratch_current_layout, 1, &blit_region, surface->color ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); pgraph_vk_transition_image_layout(pg, cmd, surface->image_scratch, surface->host_fmt.vk_format, surface->image_scratch_current_layout, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); surface->image_scratch_current_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; surface_image_loc = surface->image_scratch; } else { copy_regions[0].imageExtent = (VkExtent3D){ scaled_width, scaled_height, 1 }; surface_image_loc = surface->image; } if (surface->host_fmt.aspect & VK_IMAGE_ASPECT_STENCIL_BIT) { size_t depth_size = scaled_width * scaled_height * 4; copy_regions[num_copy_regions++] = (VkBufferImageCopy){ .bufferOffset = ROUND_UP( depth_size, r->device_props.limits.minStorageBufferOffsetAlignment), .imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT, .imageSubresource.layerCount = 1, .imageExtent = (VkExtent3D){ scaled_width, scaled_height, 1 }, }; } // // Copy image to staging buffer, or to compute_dst if we need to pack it // size_t downloaded_image_size = surface->host_fmt.host_bytes_per_pixel * surface->width * surface->height; assert((downloaded_image_size) <= r->storage_buffers[BUFFER_STAGING_DST].buffer_size); int copy_buffer_idx = use_compute_to_convert_depth_stencil_format ? BUFFER_COMPUTE_DST : BUFFER_STAGING_DST; VkBuffer copy_buffer = r->storage_buffers[copy_buffer_idx].buffer; { VkBufferMemoryBarrier pre_copy_dst_barrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT, .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = copy_buffer, .size = VK_WHOLE_SIZE }; vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, &pre_copy_dst_barrier, 0, NULL); } vkCmdCopyImageToBuffer(cmd, surface_image_loc, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, copy_buffer, num_copy_regions, copy_regions); pgraph_vk_transition_image_layout( pg, cmd, surface->image, surface->host_fmt.vk_format, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); // FIXME: Verify output of depth stencil conversion // FIXME: Track current layout and only transition when required if (use_compute_to_convert_depth_stencil_format) { size_t bytes_per_pixel = 4; size_t packed_size = downscale ? (surface->width * surface->height * bytes_per_pixel) : (scaled_width * scaled_height * bytes_per_pixel); // // Pack the depth-stencil image into compute_src buffer // VkBufferMemoryBarrier pre_compute_src_barrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = copy_buffer, .size = VK_WHOLE_SIZE }; vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 1, &pre_compute_src_barrier, 0, NULL); VkBuffer pack_buffer = r->storage_buffers[BUFFER_COMPUTE_SRC].buffer; VkBufferMemoryBarrier pre_compute_dst_barrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT, .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = pack_buffer, .size = VK_WHOLE_SIZE }; vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 1, &pre_compute_dst_barrier, 0, NULL); pgraph_vk_pack_depth_stencil(pg, surface, cmd, copy_buffer, pack_buffer, downscale); VkBufferMemoryBarrier post_compute_src_barrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_SHADER_READ_BIT, .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = copy_buffer, .size = VK_WHOLE_SIZE }; vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, &post_compute_src_barrier, 0, NULL); VkBufferMemoryBarrier post_compute_dst_barrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = pack_buffer, .size = packed_size }; vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, &post_compute_dst_barrier, 0, NULL); // // Copy packed image over to staging buffer for host download // copy_buffer = r->storage_buffers[BUFFER_STAGING_DST].buffer; VkBufferMemoryBarrier pre_copy_dst_barrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT, .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = copy_buffer, .size = VK_WHOLE_SIZE }; vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, &pre_copy_dst_barrier, 0, NULL); VkBufferCopy buffer_copy_region = { .size = packed_size, }; vkCmdCopyBuffer(cmd, pack_buffer, copy_buffer, 1, &buffer_copy_region); VkBufferMemoryBarrier post_copy_src_barrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT, .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = pack_buffer, .size = VK_WHOLE_SIZE }; vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, &post_copy_src_barrier, 0, NULL); } // // Download image data to host // VkBufferMemoryBarrier post_copy_dst_barrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .dstAccessMask = VK_ACCESS_HOST_READ_BIT, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = copy_buffer, .size = VK_WHOLE_SIZE }; vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, 0, NULL, 1, &post_copy_dst_barrier, 0, NULL); nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_1); pgraph_vk_end_debug_marker(r, cmd); pgraph_vk_end_single_time_commands(pg, cmd); void *mapped_memory_ptr = NULL; VK_CHECK(vmaMapMemory(r->allocator, r->storage_buffers[BUFFER_STAGING_DST].allocation, &mapped_memory_ptr)); vmaInvalidateAllocation(r->allocator, r->storage_buffers[BUFFER_STAGING_DST].allocation, 0, VK_WHOLE_SIZE); memcpy_image(gl_read_buf, mapped_memory_ptr, surface->pitch, surface->width * surface->fmt.bytes_per_pixel, surface->height); vmaUnmapMemory(r->allocator, r->storage_buffers[BUFFER_STAGING_DST].allocation); if (surface->swizzle) { // FIXME: Swizzle in shader swizzle_rect(swizzle_buf, surface->width, surface->height, pixels, surface->pitch, surface->fmt.bytes_per_pixel); nv2a_profile_inc_counter(NV2A_PROF_SURF_SWIZZLE); g_free(swizzle_buf); } } static void download_surface(NV2AState *d, SurfaceBinding *surface, bool force) { if (!(surface->download_pending || force)) { return; } // FIXME: Respect write enable at last TOU? download_surface_to_buffer(d, surface, d->vram_ptr + surface->vram_addr); memory_region_set_client_dirty(d->vram, surface->vram_addr, surface->pitch * surface->height, DIRTY_MEMORY_VGA); memory_region_set_client_dirty(d->vram, surface->vram_addr, surface->pitch * surface->height, DIRTY_MEMORY_NV2A_TEX); surface->download_pending = false; surface->draw_dirty = false; } void pgraph_vk_wait_for_surface_download(SurfaceBinding *surface) { NV2AState *d = g_nv2a; if (qatomic_read(&surface->draw_dirty)) { qemu_mutex_lock(&d->pfifo.lock); qemu_event_reset(&d->pgraph.vk_renderer_state->downloads_complete); qatomic_set(&surface->download_pending, true); qatomic_set(&d->pgraph.vk_renderer_state->downloads_pending, true); pfifo_kick(d); qemu_mutex_unlock(&d->pfifo.lock); qemu_event_wait(&d->pgraph.vk_renderer_state->downloads_complete); } } void pgraph_vk_process_pending_downloads(NV2AState *d) { PGRAPHVkState *r = d->pgraph.vk_renderer_state; SurfaceBinding *surface; QTAILQ_FOREACH(surface, &r->surfaces, entry) { download_surface(d, surface, false); } qatomic_set(&r->downloads_pending, false); qemu_event_set(&r->downloads_complete); } void pgraph_vk_download_dirty_surfaces(NV2AState *d) { PGRAPHVkState *r = d->pgraph.vk_renderer_state; SurfaceBinding *surface; QTAILQ_FOREACH(surface, &r->surfaces, entry) { pgraph_vk_surface_download_if_dirty(d, surface); } qatomic_set(&r->download_dirty_surfaces_pending, false); qemu_event_set(&r->dirty_surfaces_download_complete); } static void surface_access_callback(void *opaque, MemoryRegion *mr, hwaddr addr, hwaddr len, bool write) { SurfaceBinding *e = opaque; assert(addr >= e->vram_addr); hwaddr offset = addr - e->vram_addr; assert(offset < e->size); if (qatomic_read(&e->draw_dirty)) { trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset); pgraph_vk_wait_for_surface_download(e); } if (write && !qatomic_read(&e->upload_pending)) { trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset); qatomic_set(&e->upload_pending, true); } } static void register_cpu_access_callback(NV2AState *d, SurfaceBinding *surface) { if (tcg_enabled()) { qemu_mutex_unlock(&d->pgraph.lock); bql_lock(); mem_access_callback_insert(qemu_get_cpu(0), d->vram, surface->vram_addr, surface->size, &surface->access_cb, &surface_access_callback, surface); bql_unlock(); qemu_mutex_lock(&d->pgraph.lock); } } static void unregister_cpu_access_callback(NV2AState *d, SurfaceBinding const *surface) { if (tcg_enabled()) { qemu_mutex_unlock(&d->pgraph.lock); bql_lock(); mem_access_callback_remove_by_ref(qemu_get_cpu(0), surface->access_cb); bql_unlock(); qemu_mutex_lock(&d->pgraph.lock); } } static void bind_surface(PGRAPHVkState *r, SurfaceBinding *surface) { if (surface->color) { r->color_binding = surface; } else { r->zeta_binding = surface; } r->framebuffer_dirty = true; } static void unbind_surface(NV2AState *d, bool color) { PGRAPHState *pg = &d->pgraph; PGRAPHVkState *r = pg->vk_renderer_state; if (color) { if (r->color_binding) { r->color_binding = NULL; r->framebuffer_dirty = true; } } else { if (r->zeta_binding) { r->zeta_binding = NULL; r->framebuffer_dirty = true; } } } static void invalidate_surface(NV2AState *d, SurfaceBinding *surface) { PGRAPHVkState *r = d->pgraph.vk_renderer_state; trace_nv2a_pgraph_surface_invalidated(surface->vram_addr); // FIXME: We may be reading from the surface in the current command buffer! // Add a detection to handle it. For now, finish to be safe. pgraph_vk_finish(&d->pgraph, VK_FINISH_REASON_SURFACE_DOWN); assert((!r->in_command_buffer || surface->draw_time < r->command_buffer_start_time) && "Surface evicted while in use!"); if (surface == r->color_binding) { assert(d->pgraph.surface_color.buffer_dirty); unbind_surface(d, true); } if (surface == r->zeta_binding) { assert(d->pgraph.surface_zeta.buffer_dirty); unbind_surface(d, false); } unregister_cpu_access_callback(d, surface); QTAILQ_REMOVE(&r->surfaces, surface, entry); QTAILQ_INSERT_HEAD(&r->invalid_surfaces, surface, entry); } static void invalidate_overlapping_surfaces(NV2AState *d, SurfaceBinding const *surface) { PGRAPHVkState *r = d->pgraph.vk_renderer_state; uintptr_t e_end = surface->vram_addr + surface->size - 1; SurfaceBinding *s, *next; QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) { uintptr_t s_end = s->vram_addr + s->size - 1; bool overlapping = !(s->vram_addr > e_end || surface->vram_addr > s_end); if (overlapping) { trace_nv2a_pgraph_surface_evict_overlapping( s->vram_addr, s->width, s->height, s->pitch); pgraph_vk_surface_download_if_dirty(d, s); invalidate_surface(d, s); } } } static void surface_put(NV2AState *d, SurfaceBinding *surface) { PGRAPHVkState *r = d->pgraph.vk_renderer_state; assert(pgraph_vk_surface_get(d, surface->vram_addr) == NULL); invalidate_overlapping_surfaces(d, surface); register_cpu_access_callback(d, surface); QTAILQ_INSERT_HEAD(&r->surfaces, surface, entry); } SurfaceBinding *pgraph_vk_surface_get(NV2AState *d, hwaddr addr) { PGRAPHVkState *r = d->pgraph.vk_renderer_state; SurfaceBinding *surface; QTAILQ_FOREACH (surface, &r->surfaces, entry) { if (surface->vram_addr == addr) { return surface; } } return NULL; } SurfaceBinding *pgraph_vk_surface_get_within(NV2AState *d, hwaddr addr) { PGRAPHVkState *r = d->pgraph.vk_renderer_state; SurfaceBinding *surface; QTAILQ_FOREACH (surface, &r->surfaces, entry) { if (addr >= surface->vram_addr && addr < (surface->vram_addr + surface->size)) { return surface; } } return NULL; } static void set_surface_label(PGRAPHState *pg, SurfaceBinding const *surface) { PGRAPHVkState *r = pg->vk_renderer_state; g_autofree gchar *label = g_strdup_printf( "Surface %" HWADDR_PRIx "h fmt:%s,%02xh %dx%d aa:%d", surface->vram_addr, surface->color ? "Color" : "Zeta", surface->color ? surface->shape.color_format : surface->shape.zeta_format, surface->width, surface->height, pg->surface_shape.anti_aliasing); VkDebugUtilsObjectNameInfoEXT name_info = { .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, .objectType = VK_OBJECT_TYPE_IMAGE, .objectHandle = (uint64_t)surface->image, .pObjectName = label, }; if (r->debug_utils_extension_enabled) { vkSetDebugUtilsObjectNameEXT(r->device, &name_info); } vmaSetAllocationName(r->allocator, surface->allocation, label); if (surface->image_scratch) { g_autofree gchar *label_scratch = g_strdup_printf("%s (scratch)", label); name_info.objectHandle = (uint64_t)surface->image_scratch; name_info.pObjectName = label_scratch; if (r->debug_utils_extension_enabled) { vkSetDebugUtilsObjectNameEXT(r->device, &name_info); } vmaSetAllocationName(r->allocator, surface->allocation_scratch, label_scratch); } } static void create_surface_image(PGRAPHState *pg, SurfaceBinding *surface) { PGRAPHVkState *r = pg->vk_renderer_state; unsigned int width = surface->width, height = surface->height; pgraph_apply_scaling_factor(pg, &width, &height); assert(!surface->image); assert(!surface->image_scratch); NV2A_VK_DPRINTF( "Creating new surface image width=%d height=%d @ %08" HWADDR_PRIx, width, height, surface->vram_addr); VkImageCreateInfo image_create_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .imageType = VK_IMAGE_TYPE_2D, .extent.width = width, .extent.height = height, .extent.depth = 1, .mipLevels = 1, .arrayLayers = 1, .format = surface->host_fmt.vk_format, .tiling = VK_IMAGE_TILING_OPTIMAL, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | surface->host_fmt.usage, .samples = VK_SAMPLE_COUNT_1_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, }; VmaAllocationCreateInfo alloc_create_info = { .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, }; VK_CHECK(vmaCreateImage(r->allocator, &image_create_info, &alloc_create_info, &surface->image, &surface->allocation, NULL)); VK_CHECK(vmaCreateImage(r->allocator, &image_create_info, &alloc_create_info, &surface->image_scratch, &surface->allocation_scratch, NULL)); surface->image_scratch_current_layout = VK_IMAGE_LAYOUT_UNDEFINED; VkImageViewCreateInfo image_view_create_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = surface->image, .viewType = VK_IMAGE_VIEW_TYPE_2D, .format = surface->host_fmt.vk_format, .subresourceRange.aspectMask = surface->host_fmt.aspect, .subresourceRange.levelCount = 1, .subresourceRange.layerCount = 1, }; VK_CHECK(vkCreateImageView(r->device, &image_view_create_info, NULL, &surface->image_view)); // FIXME: Go right into main command buffer VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); pgraph_vk_begin_debug_marker(r, cmd, RGBA_RED, __func__); pgraph_vk_transition_image_layout( pg, cmd, surface->image, surface->host_fmt.vk_format, VK_IMAGE_LAYOUT_UNDEFINED, surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_3); pgraph_vk_end_debug_marker(r, cmd); pgraph_vk_end_single_time_commands(pg, cmd); nv2a_profile_inc_counter(NV2A_PROF_SURF_CREATE); } static void migrate_surface_image(SurfaceBinding *dst, SurfaceBinding *src) { dst->image = src->image; dst->image_view = src->image_view; dst->allocation = src->allocation; dst->image_scratch = src->image_scratch; dst->image_scratch_current_layout = src->image_scratch_current_layout; dst->allocation_scratch = src->allocation_scratch; src->image = VK_NULL_HANDLE; src->image_view = VK_NULL_HANDLE; src->allocation = VK_NULL_HANDLE; src->image_scratch = VK_NULL_HANDLE; src->image_scratch_current_layout = VK_IMAGE_LAYOUT_UNDEFINED; src->allocation_scratch = VK_NULL_HANDLE; } static void destroy_surface_image(PGRAPHVkState *r, SurfaceBinding *surface) { vkDestroyImageView(r->device, surface->image_view, NULL); surface->image_view = VK_NULL_HANDLE; vmaDestroyImage(r->allocator, surface->image, surface->allocation); surface->image = VK_NULL_HANDLE; surface->allocation = VK_NULL_HANDLE; vmaDestroyImage(r->allocator, surface->image_scratch, surface->allocation_scratch); surface->image_scratch = VK_NULL_HANDLE; surface->allocation_scratch = VK_NULL_HANDLE; } static bool check_invalid_surface_is_compatibile(SurfaceBinding *surface, SurfaceBinding *target) { return surface->host_fmt.vk_format == target->host_fmt.vk_format && surface->width == target->width && surface->height == target->height && surface->host_fmt.usage == target->host_fmt.usage; } static SurfaceBinding * get_any_compatible_invalid_surface(PGRAPHVkState *r, SurfaceBinding *target) { SurfaceBinding *surface, *next; QTAILQ_FOREACH_SAFE(surface, &r->invalid_surfaces, entry, next) { if (check_invalid_surface_is_compatibile(surface, target)) { QTAILQ_REMOVE(&r->invalid_surfaces, surface, entry); return surface; } } return NULL; } static void prune_invalid_surfaces(PGRAPHVkState *r, int keep) { int num_surfaces = 0; SurfaceBinding *surface, *next; QTAILQ_FOREACH_SAFE(surface, &r->invalid_surfaces, entry, next) { num_surfaces += 1; if (num_surfaces > keep) { QTAILQ_REMOVE(&r->invalid_surfaces, surface, entry); destroy_surface_image(r, surface); g_free(surface); } } } static void expire_old_surfaces(NV2AState *d) { PGRAPHVkState *r = d->pgraph.vk_renderer_state; SurfaceBinding *s, *next; QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) { int last_used = d->pgraph.frame_time - s->frame_time; if (last_used >= max_surface_frame_time_delta) { trace_nv2a_pgraph_surface_evict_reason("old", s->vram_addr); pgraph_vk_surface_download_if_dirty(d, s); invalidate_surface(d, s); } } } static bool check_surface_compatibility(SurfaceBinding const *s1, SurfaceBinding const *s2, bool strict) { bool format_compatible = (s1->color == s2->color) && (s1->host_fmt.vk_format == s2->host_fmt.vk_format) && (s1->pitch == s2->pitch); if (!format_compatible) { return false; } if (!strict) { return (s1->width >= s2->width) && (s1->height >= s2->height); } else { return (s1->width == s2->width) && (s1->height == s2->height); } } void pgraph_vk_surface_download_if_dirty(NV2AState *d, SurfaceBinding *surface) { if (surface->draw_dirty) { download_surface(d, surface, true); } } void pgraph_vk_upload_surface_data(NV2AState *d, SurfaceBinding *surface, bool force) { PGRAPHState *pg = &d->pgraph; PGRAPHVkState *r = pg->vk_renderer_state; if (!(surface->upload_pending || force)) { return; } nv2a_profile_inc_counter(NV2A_PROF_SURF_UPLOAD); pgraph_vk_finish(pg, VK_FINISH_REASON_SURFACE_CREATE); // FIXME: SURFACE_UP trace_nv2a_pgraph_surface_upload( surface->color ? "COLOR" : "ZETA", surface->swizzle ? "sz" : "lin", surface->vram_addr, surface->width, surface->height, surface->pitch, surface->fmt.bytes_per_pixel); surface->upload_pending = false; surface->draw_time = pg->draw_time; uint8_t *data = d->vram_ptr; uint8_t *buf = data + surface->vram_addr; g_autofree uint8_t *swizzle_buf = NULL; uint8_t *gl_read_buf = NULL; if (surface->swizzle) { swizzle_buf = (uint8_t*)g_malloc(surface->size); gl_read_buf = swizzle_buf; unswizzle_rect(data + surface->vram_addr, surface->width, surface->height, swizzle_buf, surface->pitch, surface->fmt.bytes_per_pixel); nv2a_profile_inc_counter(NV2A_PROF_SURF_SWIZZLE); } else { gl_read_buf = buf; } // // Upload image data from host to staging buffer // StorageBuffer *copy_buffer = &r->storage_buffers[BUFFER_STAGING_SRC]; size_t uploaded_image_size = surface->height * surface->width * surface->fmt.bytes_per_pixel; assert(uploaded_image_size <= copy_buffer->buffer_size); void *mapped_memory_ptr = NULL; VK_CHECK(vmaMapMemory(r->allocator, copy_buffer->allocation, &mapped_memory_ptr)); bool use_compute_to_convert_depth_stencil_format = surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT || surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT; bool no_conversion_necessary = surface->color || surface->host_fmt.vk_format == VK_FORMAT_D16_UNORM || use_compute_to_convert_depth_stencil_format; assert(no_conversion_necessary); memcpy_image(mapped_memory_ptr, gl_read_buf, surface->width * surface->fmt.bytes_per_pixel, surface->pitch, surface->height); vmaFlushAllocation(r->allocator, copy_buffer->allocation, 0, VK_WHOLE_SIZE); vmaUnmapMemory(r->allocator, copy_buffer->allocation); VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); pgraph_vk_begin_debug_marker(r, cmd, RGBA_RED, __func__); VkBufferMemoryBarrier host_barrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_HOST_WRITE_BIT, .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = copy_buffer->buffer, .size = VK_WHOLE_SIZE }; vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, &host_barrier, 0, NULL); // Set up image copy regions (which may be modified by compute unpack) VkBufferImageCopy regions[2]; int num_regions = 0; regions[num_regions++] = (VkBufferImageCopy){ .imageSubresource.aspectMask = surface->color ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT, .imageSubresource.layerCount = 1, .imageExtent = (VkExtent3D){ surface->width, surface->height, 1 }, }; if (surface->host_fmt.aspect & VK_IMAGE_ASPECT_STENCIL_BIT) { regions[num_regions++] = (VkBufferImageCopy){ .imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT, .imageSubresource.layerCount = 1, .imageExtent = (VkExtent3D){ surface->width, surface->height, 1 }, }; } unsigned int scaled_width = surface->width, scaled_height = surface->height; pgraph_apply_scaling_factor(pg, &scaled_width, &scaled_height); if (use_compute_to_convert_depth_stencil_format) { // // Copy packed image buffer to compute_dst for unpacking // size_t packed_size = uploaded_image_size; VkBufferCopy buffer_copy_region = { .size = packed_size, }; vkCmdCopyBuffer(cmd, copy_buffer->buffer, r->storage_buffers[BUFFER_COMPUTE_DST].buffer, 1, &buffer_copy_region); size_t num_pixels = scaled_width * scaled_height; size_t unpacked_depth_image_size = num_pixels * 4; size_t unpacked_stencil_image_size = num_pixels; size_t unpacked_size = unpacked_depth_image_size + unpacked_stencil_image_size; VkBufferMemoryBarrier post_copy_src_barrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT, .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = copy_buffer->buffer, .size = VK_WHOLE_SIZE }; vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, &post_copy_src_barrier, 0, NULL); // // Unpack depth-stencil image into compute_src // VkBufferMemoryBarrier pre_unpack_src_barrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = r->storage_buffers[BUFFER_COMPUTE_DST].buffer, .size = VK_WHOLE_SIZE }; vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 1, &pre_unpack_src_barrier, 0, NULL); StorageBuffer *unpack_buffer = &r->storage_buffers[BUFFER_COMPUTE_SRC]; VkBufferMemoryBarrier pre_unpack_dst_barrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT, .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = unpack_buffer->buffer, .size = unpacked_size }; vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 1, &pre_unpack_dst_barrier, 0, NULL); pgraph_vk_unpack_depth_stencil( pg, surface, cmd, r->storage_buffers[BUFFER_COMPUTE_DST].buffer, unpack_buffer->buffer); VkBufferMemoryBarrier post_unpack_src_barrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_SHADER_READ_BIT, .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = r->storage_buffers[BUFFER_COMPUTE_DST].buffer, .size = VK_WHOLE_SIZE }; vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, &post_unpack_src_barrier, 0, NULL); VkBufferMemoryBarrier post_unpack_dst_barrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = unpack_buffer->buffer, .size = unpacked_size }; vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, &post_unpack_dst_barrier, 0, NULL); // Already scaled during compute. Adjust copy regions. regions[0].imageExtent = (VkExtent3D){ scaled_width, scaled_height, 1 }; regions[1].imageExtent = regions[0].imageExtent; regions[1].bufferOffset = ROUND_UP(unpacked_depth_image_size, r->device_props.limits.minStorageBufferOffsetAlignment); copy_buffer = unpack_buffer; } // // Copy image data from buffer to staging image // if (surface->image_scratch_current_layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { pgraph_vk_transition_image_layout(pg, cmd, surface->image_scratch, surface->host_fmt.vk_format, surface->image_scratch_current_layout, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); surface->image_scratch_current_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; } vkCmdCopyBufferToImage(cmd, copy_buffer->buffer, surface->image_scratch, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, num_regions, regions); VkBufferMemoryBarrier post_copy_src_buffer_barrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT, .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = copy_buffer->buffer, .size = VK_WHOLE_SIZE }; vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1, &post_copy_src_buffer_barrier, 0, NULL); // // Copy staging image to final image // pgraph_vk_transition_image_layout(pg, cmd, surface->image_scratch, surface->host_fmt.vk_format, surface->image_scratch_current_layout, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); surface->image_scratch_current_layout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; pgraph_vk_transition_image_layout( pg, cmd, surface->image, surface->host_fmt.vk_format, surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); bool upscale = pg->surface_scale_factor > 1 && !use_compute_to_convert_depth_stencil_format; if (upscale) { VkImageBlit blitRegion = { .srcSubresource.aspectMask = surface->host_fmt.aspect, .srcSubresource.mipLevel = 0, .srcSubresource.baseArrayLayer = 0, .srcSubresource.layerCount = 1, .srcOffsets[0] = (VkOffset3D){0, 0, 0}, .srcOffsets[1] = (VkOffset3D){surface->width, surface->height, 1}, .dstSubresource.aspectMask = surface->host_fmt.aspect, .dstSubresource.mipLevel = 0, .dstSubresource.baseArrayLayer = 0, .dstSubresource.layerCount = 1, .dstOffsets[0] = (VkOffset3D){0, 0, 0}, .dstOffsets[1] = (VkOffset3D){scaled_width, scaled_height, 1}, }; vkCmdBlitImage(cmd, surface->image_scratch, surface->image_scratch_current_layout, surface->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &blitRegion, surface->color ? VK_FILTER_LINEAR : VK_FILTER_NEAREST); } else { // Note: We should be able to vkCmdCopyBufferToImage directly into // surface->image, but there is an apparent AMD Windows driver // synchronization bug we'll hit when doing this. For this reason, // always use a staging image. for (int i = 0; i < num_regions; i++) { VkImageAspectFlags aspect = regions[i].imageSubresource.aspectMask; VkImageCopy copy_region = { .srcSubresource.aspectMask = aspect, .srcSubresource.layerCount = 1, .dstSubresource.aspectMask = aspect, .dstSubresource.layerCount = 1, .extent = regions[i].imageExtent, }; vkCmdCopyImage(cmd, surface->image_scratch, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, surface->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©_region); } } pgraph_vk_transition_image_layout( pg, cmd, surface->image, surface->host_fmt.vk_format, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL); nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_2); pgraph_vk_end_debug_marker(r, cmd); pgraph_vk_end_single_time_commands(pg, cmd); surface->initialized = true; } static void compare_surfaces(SurfaceBinding const *a, SurfaceBinding const *b) { #define DO_CMP(fld) \ if (a->fld != b->fld) \ trace_nv2a_pgraph_surface_compare_mismatch( \ #fld, (long int)a->fld, (long int)b->fld); DO_CMP(shape.clip_x) DO_CMP(shape.clip_width) DO_CMP(shape.clip_y) DO_CMP(shape.clip_height) DO_CMP(fmt.bytes_per_pixel) DO_CMP(host_fmt.vk_format) DO_CMP(color) DO_CMP(swizzle) DO_CMP(vram_addr) DO_CMP(width) DO_CMP(height) DO_CMP(pitch) DO_CMP(size) DO_CMP(dma_addr) DO_CMP(dma_len) DO_CMP(frame_time) DO_CMP(draw_time) #undef DO_CMP } static void populate_surface_binding_target_sized(NV2AState *d, bool color, unsigned int width, unsigned int height, SurfaceBinding *target) { PGRAPHState *pg = &d->pgraph; PGRAPHVkState *r = pg->vk_renderer_state; Surface *surface; hwaddr dma_address; BasicSurfaceFormatInfo fmt; SurfaceFormatInfo host_fmt; if (color) { surface = &pg->surface_color; dma_address = pg->dma_color; assert(pg->surface_shape.color_format != 0); assert(pg->surface_shape.color_format < ARRAY_SIZE(kelvin_surface_color_format_vk_map)); fmt = kelvin_surface_color_format_map[pg->surface_shape.color_format]; host_fmt = kelvin_surface_color_format_vk_map[pg->surface_shape.color_format]; if (host_fmt.host_bytes_per_pixel == 0) { fprintf(stderr, "nv2a: unimplemented color surface format 0x%x\n", pg->surface_shape.color_format); abort(); } } else { surface = &pg->surface_zeta; dma_address = pg->dma_zeta; assert(pg->surface_shape.zeta_format != 0); assert(pg->surface_shape.zeta_format < ARRAY_SIZE(r->kelvin_surface_zeta_vk_map)); fmt = kelvin_surface_zeta_format_map[pg->surface_shape.zeta_format]; host_fmt = r->kelvin_surface_zeta_vk_map[pg->surface_shape.zeta_format]; // FIXME: Support float 16,24b float format surface } DMAObject dma = nv_dma_load(d, dma_address); // There's a bunch of bugs that could cause us to hit this function // at the wrong time and get a invalid dma object. // Check that it's sane. assert(dma.dma_class == NV_DMA_IN_MEMORY_CLASS); // assert(dma.address + surface->offset != 0); assert(surface->offset <= dma.limit); assert(surface->offset + surface->pitch * height <= dma.limit + 1); assert(surface->pitch % fmt.bytes_per_pixel == 0); assert((dma.address & ~0x07FFFFFF) == 0); target->shape = (color || !r->color_binding) ? pg->surface_shape : r->color_binding->shape; target->fmt = fmt; target->host_fmt = host_fmt; target->color = color; target->swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE); target->vram_addr = dma.address + surface->offset; target->width = width; target->height = height; target->pitch = surface->pitch; target->size = height * MAX(surface->pitch, width * fmt.bytes_per_pixel); target->upload_pending = true; target->download_pending = false; target->draw_dirty = false; target->dma_addr = dma.address; target->dma_len = dma.limit; target->frame_time = pg->frame_time; target->draw_time = pg->draw_time; target->cleared = false; target->initialized = false; } static void populate_surface_binding_target(NV2AState *d, bool color, SurfaceBinding *target) { PGRAPHState *pg = &d->pgraph; PGRAPHVkState *r = pg->vk_renderer_state; unsigned int width, height; if (color || !r->color_binding) { get_surface_dimensions(pg, &width, &height); pgraph_apply_anti_aliasing_factor(pg, &width, &height); // Since we determine surface dimensions based on the clipping // rectangle, make sure to include the surface offset as well. if (pg->surface_type != NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE) { width += pg->surface_shape.clip_x; height += pg->surface_shape.clip_y; } } else { width = r->color_binding->width; height = r->color_binding->height; } populate_surface_binding_target_sized(d, color, width, height, target); } static void update_surface_part(NV2AState *d, bool upload, bool color) { PGRAPHState *pg = &d->pgraph; PGRAPHVkState *r = pg->vk_renderer_state; SurfaceBinding target; memset(&target, 0, sizeof(target)); populate_surface_binding_target(d, color, &target); Surface *pg_surface = color ? &pg->surface_color : &pg->surface_zeta; bool mem_dirty = !tcg_enabled() && memory_region_test_and_clear_dirty( d->vram, target.vram_addr, target.size, DIRTY_MEMORY_NV2A); SurfaceBinding *current_binding = color ? r->color_binding : r->zeta_binding; if (!current_binding || (upload && (pg_surface->buffer_dirty || mem_dirty))) { // FIXME: We don't need to be so aggressive flushing the command list // pgraph_vk_finish(pg, VK_FINISH_REASON_SURFACE_CREATE); pgraph_vk_ensure_not_in_render_pass(pg); unbind_surface(d, color); SurfaceBinding *surface = pgraph_vk_surface_get(d, target.vram_addr); if (surface != NULL) { // FIXME: Support same color/zeta surface target? In the mean time, // if the surface we just found is currently bound, just unbind it. SurfaceBinding *other = (color ? r->zeta_binding : r->color_binding); if (surface == other) { NV2A_UNIMPLEMENTED("Same color & zeta surface offset"); unbind_surface(d, !color); } } trace_nv2a_pgraph_surface_target( color ? "COLOR" : "ZETA", target.vram_addr, target.swizzle ? "sz" : "ln", pg->surface_shape.anti_aliasing, pg->surface_shape.clip_x, pg->surface_shape.clip_width, pg->surface_shape.clip_y, pg->surface_shape.clip_height); bool should_create = true; if (surface != NULL) { bool is_compatible = check_surface_compatibility(surface, &target, false); void (*trace_fn)(uint32_t addr, uint32_t width, uint32_t height, const char *layout, uint32_t anti_aliasing, uint32_t clip_x, uint32_t clip_width, uint32_t clip_y, uint32_t clip_height, uint32_t pitch) = surface->color ? trace_nv2a_pgraph_surface_match_color : trace_nv2a_pgraph_surface_match_zeta; trace_fn(surface->vram_addr, surface->width, surface->height, surface->swizzle ? "sz" : "ln", surface->shape.anti_aliasing, surface->shape.clip_x, surface->shape.clip_width, surface->shape.clip_y, surface->shape.clip_height, surface->pitch); assert(!(target.swizzle && pg->clearing)); #if 0 if (surface->swizzle != target.swizzle) { // Clears should only be done on linear surfaces. Avoid // synchronization by allowing (1) a surface marked swizzled to // be cleared under the assumption the entire surface is // destined to be cleared and (2) a fully cleared linear surface // to be marked swizzled. Strictly match size to avoid // pathological cases. is_compatible &= (pg->clearing || surface->cleared) && check_surface_compatibility(surface, &target, true); if (is_compatible) { trace_nv2a_pgraph_surface_migrate_type( target.swizzle ? "swizzled" : "linear"); } } #endif if (is_compatible && color && !check_surface_compatibility(surface, &target, true)) { SurfaceBinding zeta_entry; populate_surface_binding_target_sized( d, !color, surface->width, surface->height, &zeta_entry); hwaddr color_end = surface->vram_addr + surface->size; hwaddr zeta_end = zeta_entry.vram_addr + zeta_entry.size; is_compatible &= surface->vram_addr >= zeta_end || zeta_entry.vram_addr >= color_end; } if (is_compatible && !color && r->color_binding) { is_compatible &= (surface->width == r->color_binding->width) && (surface->height == r->color_binding->height); } if (is_compatible) { // FIXME: Refactor pg->surface_binding_dim.width = surface->width; pg->surface_binding_dim.clip_x = surface->shape.clip_x; pg->surface_binding_dim.clip_width = surface->shape.clip_width; pg->surface_binding_dim.height = surface->height; pg->surface_binding_dim.clip_y = surface->shape.clip_y; pg->surface_binding_dim.clip_height = surface->shape.clip_height; surface->upload_pending |= mem_dirty; pg->surface_zeta.buffer_dirty |= color; should_create = false; } else { trace_nv2a_pgraph_surface_evict_reason( "incompatible", surface->vram_addr); compare_surfaces(surface, &target); pgraph_vk_surface_download_if_dirty(d, surface); invalidate_surface(d, surface); } } if (should_create) { surface = get_any_compatible_invalid_surface(r, &target); if (surface) { migrate_surface_image(&target, surface); } else { surface = g_malloc(sizeof(SurfaceBinding)); create_surface_image(pg, &target); } *surface = target; set_surface_label(pg, surface); surface_put(d, surface); // FIXME: Refactor pg->surface_binding_dim.width = target.width; pg->surface_binding_dim.clip_x = target.shape.clip_x; pg->surface_binding_dim.clip_width = target.shape.clip_width; pg->surface_binding_dim.height = target.height; pg->surface_binding_dim.clip_y = target.shape.clip_y; pg->surface_binding_dim.clip_height = target.shape.clip_height; if (color && r->zeta_binding && (r->zeta_binding->width != target.width || r->zeta_binding->height != target.height)) { pg->surface_zeta.buffer_dirty = true; } } void (*trace_fn)(uint32_t addr, uint32_t width, uint32_t height, const char *layout, uint32_t anti_aliasing, uint32_t clip_x, uint32_t clip_width, uint32_t clip_y, uint32_t clip_height, uint32_t pitch) = color ? (should_create ? trace_nv2a_pgraph_surface_create_color : trace_nv2a_pgraph_surface_hit_color) : (should_create ? trace_nv2a_pgraph_surface_create_zeta : trace_nv2a_pgraph_surface_hit_zeta); trace_fn(surface->vram_addr, surface->width, surface->height, surface->swizzle ? "sz" : "ln", surface->shape.anti_aliasing, surface->shape.clip_x, surface->shape.clip_width, surface->shape.clip_y, surface->shape.clip_height, surface->pitch); bind_surface(r, surface); pg_surface->buffer_dirty = false; } if (!upload && pg_surface->draw_dirty) { if (!tcg_enabled()) { // FIXME: Cannot monitor for reads/writes; flush now download_surface(d, color ? r->color_binding : r->zeta_binding, true); } pg_surface->write_enabled_cache = false; pg_surface->draw_dirty = false; } } // FIXME: Move to common? void pgraph_vk_surface_update(NV2AState *d, bool upload, bool color_write, bool zeta_write) { PGRAPHState *pg = &d->pgraph; PGRAPHVkState *r = pg->vk_renderer_state; pg->surface_shape.z_format = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER), NV_PGRAPH_SETUPRASTER_Z_FORMAT); color_write = color_write && (pg->clearing || pgraph_color_write_enabled(pg)); zeta_write = zeta_write && (pg->clearing || pgraph_zeta_write_enabled(pg)); if (upload) { bool fb_dirty = framebuffer_dirty(pg); if (fb_dirty) { memcpy(&pg->last_surface_shape, &pg->surface_shape, sizeof(SurfaceShape)); pg->surface_color.buffer_dirty = true; pg->surface_zeta.buffer_dirty = true; } if (pg->surface_color.buffer_dirty) { unbind_surface(d, true); } if (color_write) { update_surface_part(d, true, true); } if (pg->surface_zeta.buffer_dirty) { unbind_surface(d, false); } if (zeta_write) { update_surface_part(d, true, false); } } else { if ((color_write || pg->surface_color.write_enabled_cache) && pg->surface_color.draw_dirty) { update_surface_part(d, false, true); } if ((zeta_write || pg->surface_zeta.write_enabled_cache) && pg->surface_zeta.draw_dirty) { update_surface_part(d, false, false); } } if (upload) { pg->draw_time++; } bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE); if (r->color_binding) { r->color_binding->frame_time = pg->frame_time; if (upload) { pgraph_vk_upload_surface_data(d, r->color_binding, false); r->color_binding->draw_time = pg->draw_time; r->color_binding->swizzle = swizzle; } } if (r->zeta_binding) { r->zeta_binding->frame_time = pg->frame_time; if (upload) { pgraph_vk_upload_surface_data(d, r->zeta_binding, false); r->zeta_binding->draw_time = pg->draw_time; r->zeta_binding->swizzle = swizzle; } } // Sanity check color and zeta dimensions match if (r->color_binding && r->zeta_binding) { assert(r->color_binding->width == r->zeta_binding->width); assert(r->color_binding->height == r->zeta_binding->height); } expire_old_surfaces(d); prune_invalid_surfaces(r, num_invalid_surfaces_to_keep); } static bool check_format_and_usage_supported(PGRAPHVkState *r, VkFormat format, VkImageUsageFlags usage) { VkPhysicalDeviceImageFormatInfo2 pdif2 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2, .format = format, .type = VK_IMAGE_TYPE_2D, .tiling = VK_IMAGE_TILING_OPTIMAL, .usage = usage, }; VkImageFormatProperties2 props = { .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2, }; VkResult result = vkGetPhysicalDeviceImageFormatProperties2( r->physical_device, &pdif2, &props); return result == VK_SUCCESS; } static bool check_surface_internal_formats_supported( PGRAPHVkState *r, const SurfaceFormatInfo *fmts, size_t count) { bool all_supported = true; for (int i = 0; i < count; i++) { const SurfaceFormatInfo *f = &fmts[i]; if (f->host_bytes_per_pixel) { all_supported &= check_format_and_usage_supported(r, f->vk_format, f->usage); } } return all_supported; } void pgraph_vk_init_surfaces(PGRAPHState *pg) { PGRAPHVkState *r = pg->vk_renderer_state; // Make sure all surface format types are supported. We don't expect issue // with these, and therefore have no fallback mechanism. bool color_formats_supported = check_surface_internal_formats_supported( r, kelvin_surface_color_format_vk_map, ARRAY_SIZE(kelvin_surface_color_format_vk_map)); assert(color_formats_supported); // Check if the device supports preferred VK_FORMAT_D24_UNORM_S8_UINT // format, fall back to D32_SFLOAT_S8_UINT otherwise. r->kelvin_surface_zeta_vk_map[NV097_SET_SURFACE_FORMAT_ZETA_Z16] = zeta_d16; if (check_surface_internal_formats_supported(r, &zeta_d24_unorm_s8_uint, 1)) { r->kelvin_surface_zeta_vk_map[NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] = zeta_d24_unorm_s8_uint; } else if (check_surface_internal_formats_supported( r, &zeta_d32_sfloat_s8_uint, 1)) { r->kelvin_surface_zeta_vk_map[NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] = zeta_d32_sfloat_s8_uint; } else { assert(!"No suitable depth-stencil format supported"); } QTAILQ_INIT(&r->surfaces); QTAILQ_INIT(&r->invalid_surfaces); r->downloads_pending = false; qemu_event_init(&r->downloads_complete, false); qemu_event_init(&r->dirty_surfaces_download_complete, false); r->color_binding = NULL; r->zeta_binding = NULL; r->framebuffer_dirty = true; pgraph_vk_reload_surface_scale_factor(pg); // FIXME: Move internal } void pgraph_vk_finalize_surfaces(PGRAPHState *pg) { pgraph_vk_surface_flush(container_of(pg, NV2AState, pgraph)); } void pgraph_vk_surface_flush(NV2AState *d) { PGRAPHState *pg = &d->pgraph; PGRAPHVkState *r = pg->vk_renderer_state; // Clear last surface shape to force recreation of buffers at next draw pg->surface_color.draw_dirty = false; pg->surface_zeta.draw_dirty = false; memset(&pg->last_surface_shape, 0, sizeof(pg->last_surface_shape)); unbind_surface(d, true); unbind_surface(d, false); SurfaceBinding *s, *next; QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) { // FIXME: We should download all surfaces to ram, but need to // investigate corruption issue pgraph_vk_surface_download_if_dirty(d, s); invalidate_surface(d, s); } prune_invalid_surfaces(r, 0); pgraph_vk_reload_surface_scale_factor(pg); }