/* * Geforce NV2A PGRAPH Vulkan Renderer * * Copyright (c) 2024 Matt Borgerson * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, see . */ #ifndef HW_XBOX_NV2A_PGRAPH_VK_RENDERER_H #define HW_XBOX_NV2A_PGRAPH_VK_RENDERER_H #include "qemu/osdep.h" #include "qemu/thread.h" #include "qemu/queue.h" #include "qemu/lru.h" #include "hw/hw.h" #include "hw/xbox/nv2a/nv2a_int.h" #include "hw/xbox/nv2a/nv2a_regs.h" #include "hw/xbox/nv2a/pgraph/surface.h" #include "hw/xbox/nv2a/pgraph/texture.h" #include "hw/xbox/nv2a/pgraph/shaders.h" #include #include #include #include #include #include "debug.h" #include "constants.h" #include "glsl.h" #define HAVE_EXTERNAL_MEMORY 1 typedef struct QueueFamilyIndices { int queue_family; } QueueFamilyIndices; typedef struct MemorySyncRequirement { hwaddr addr, size; } MemorySyncRequirement; typedef struct RenderPassState { VkFormat color_format; VkFormat zeta_format; } RenderPassState; typedef struct RenderPass { RenderPassState state; VkRenderPass render_pass; } RenderPass; typedef struct PipelineKey { bool clear; RenderPassState render_pass_state; ShaderState shader_state; uint32_t regs[9]; VkVertexInputBindingDescription binding_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES]; VkVertexInputAttributeDescription attribute_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES]; } PipelineKey; typedef struct PipelineBinding { LruNode node; PipelineKey key; VkPipelineLayout layout; VkPipeline pipeline; VkRenderPass render_pass; unsigned int draw_time; } PipelineBinding; enum Buffer { BUFFER_STAGING_DST, BUFFER_STAGING_SRC, BUFFER_COMPUTE_DST, BUFFER_COMPUTE_SRC, BUFFER_INDEX, BUFFER_INDEX_STAGING, BUFFER_VERTEX_RAM, BUFFER_VERTEX_INLINE, BUFFER_VERTEX_INLINE_STAGING, BUFFER_UNIFORM, BUFFER_UNIFORM_STAGING, BUFFER_COUNT }; typedef struct StorageBuffer { VkBuffer buffer; VkBufferUsageFlags usage; VmaAllocationCreateInfo alloc_info; VmaAllocation allocation; VkMemoryPropertyFlags properties; size_t buffer_offset; size_t buffer_size; uint8_t *mapped; } StorageBuffer; typedef struct SurfaceBinding { QTAILQ_ENTRY(SurfaceBinding) entry; MemAccessCallback *access_cb; hwaddr vram_addr; SurfaceShape shape; uintptr_t dma_addr; uintptr_t dma_len; bool color; bool swizzle; unsigned int width; unsigned int height; unsigned int pitch; size_t size; bool cleared; int frame_time; int draw_time; bool draw_dirty; bool download_pending; bool upload_pending; BasicSurfaceFormatInfo fmt; SurfaceFormatInfo host_fmt; VkImage image; VkImageView image_view; VmaAllocation allocation; // Used for scaling VkImage image_scratch; VkImageLayout image_scratch_current_layout; VmaAllocation allocation_scratch; bool initialized; } SurfaceBinding; typedef struct ShaderModuleInfo { char *glsl; GByteArray *spirv; VkShaderModule module; SpvReflectShaderModule reflect_module; SpvReflectDescriptorSet **descriptor_sets; ShaderUniformLayout uniforms; ShaderUniformLayout push_constants; } ShaderModuleInfo; typedef struct ShaderBinding { LruNode node; bool initialized; ShaderState state; ShaderModuleInfo *geometry; ShaderModuleInfo *vertex; ShaderModuleInfo *fragment; int psh_constant_loc[9][2]; int alpha_ref_loc; int bump_mat_loc[NV2A_MAX_TEXTURES]; int bump_scale_loc[NV2A_MAX_TEXTURES]; int bump_offset_loc[NV2A_MAX_TEXTURES]; int tex_scale_loc[NV2A_MAX_TEXTURES]; int surface_size_loc; int clip_range_loc; int clip_range_floc; int depth_offset_loc; int vsh_constant_loc; uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4]; int inv_viewport_loc; int ltctxa_loc; int ltctxb_loc; int ltc1_loc; int fog_color_loc; int fog_param_loc; int light_infinite_half_vector_loc[NV2A_MAX_LIGHTS]; int light_infinite_direction_loc[NV2A_MAX_LIGHTS]; int light_local_position_loc[NV2A_MAX_LIGHTS]; int light_local_attenuation_loc[NV2A_MAX_LIGHTS]; int specular_power_loc; int clip_region_loc; int material_alpha_loc; int uniform_attrs_loc; } ShaderBinding; typedef struct TextureKey { TextureShape state; hwaddr texture_vram_offset; hwaddr texture_length; hwaddr palette_vram_offset; hwaddr palette_length; float scale; uint32_t filter; uint32_t address; uint32_t border_color; } TextureKey; typedef struct TextureBinding { LruNode node; TextureKey key; VkImage image; VkImageLayout current_layout; VkImageView image_view; VmaAllocation allocation; VkSampler sampler; bool possibly_dirty; uint64_t hash; unsigned int draw_time; uint32_t submit_time; } TextureBinding; typedef struct QueryReport { QSIMPLEQ_ENTRY(QueryReport) entry; bool clear; uint32_t parameter; unsigned int query_count; } QueryReport; typedef struct PvideoState { bool enabled; hwaddr base; hwaddr limit; hwaddr offset; int pitch; int format; int in_width; int in_height; int out_width; int out_height; int in_s; int in_t; int out_x; int out_y; float scale_x; float scale_y; bool color_key_enabled; uint32_t color_key; } PvideoState; typedef struct PGRAPHVkDisplayState { ShaderModuleInfo *display_frag; VkDescriptorPool descriptor_pool; VkDescriptorSetLayout descriptor_set_layout; VkDescriptorSet descriptor_set; VkPipelineLayout pipeline_layout; VkPipeline pipeline; VkRenderPass render_pass; VkFramebuffer framebuffer; VkImage image; VkImageView image_view; VkDeviceMemory memory; VkSampler sampler; struct { PvideoState state; int width, height; VkImage image; VkImageView image_view; VmaAllocation allocation; VkSampler sampler; } pvideo; int width, height; int draw_time; // OpenGL Interop #ifdef WIN32 HANDLE handle; #else int fd; #endif GLuint gl_memory_obj; GLuint gl_texture_id; } PGRAPHVkDisplayState; typedef struct ComputePipelineKey { VkFormat host_fmt; bool pack; int workgroup_size; } ComputePipelineKey; typedef struct ComputePipeline { LruNode node; ComputePipelineKey key; VkPipeline pipeline; } ComputePipeline; typedef struct PGRAPHVkComputeState { VkDescriptorPool descriptor_pool; VkDescriptorSetLayout descriptor_set_layout; VkDescriptorSet descriptor_sets[1024]; int descriptor_set_index; VkPipelineLayout pipeline_layout; Lru pipeline_cache; ComputePipeline *pipeline_cache_entries; } PGRAPHVkComputeState; typedef struct PGRAPHVkState { void *window; VkInstance instance; VkDebugUtilsMessengerEXT debug_messenger; int debug_depth; bool debug_utils_extension_enabled; bool custom_border_color_extension_enabled; bool provoking_vertex_extension_enabled; bool memory_budget_extension_enabled; VkPhysicalDevice physical_device; VkPhysicalDeviceProperties device_props; VkDevice device; VmaAllocator allocator; uint32_t allocator_last_submit_index; VkQueue queue; VkCommandPool command_pool; VkCommandBuffer command_buffers[2]; VkCommandBuffer command_buffer; VkSemaphore command_buffer_semaphore; VkFence command_buffer_fence; unsigned int command_buffer_start_time; bool in_command_buffer; uint32_t submit_count; VkCommandBuffer aux_command_buffer; bool in_aux_command_buffer; VkFramebuffer framebuffers[50]; int framebuffer_index; bool framebuffer_dirty; VkRenderPass render_pass; GArray *render_passes; // RenderPass bool in_render_pass; bool in_draw; Lru pipeline_cache; VkPipelineCache vk_pipeline_cache; PipelineBinding *pipeline_cache_entries; PipelineBinding *pipeline_binding; bool pipeline_binding_changed; VkDescriptorPool descriptor_pool; VkDescriptorSetLayout descriptor_set_layout; VkDescriptorSet descriptor_sets[1024]; int descriptor_set_index; StorageBuffer storage_buffers[BUFFER_COUNT]; MemorySyncRequirement vertex_ram_buffer_syncs[NV2A_VERTEXSHADER_ATTRIBUTES]; size_t num_vertex_ram_buffer_syncs; unsigned long *uploaded_bitmap; size_t bitmap_size; VkVertexInputAttributeDescription vertex_attribute_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES]; int vertex_attribute_to_description_location[NV2A_VERTEXSHADER_ATTRIBUTES]; int num_active_vertex_attribute_descriptions; VkVertexInputBindingDescription vertex_binding_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES]; int num_active_vertex_binding_descriptions; hwaddr vertex_attribute_offsets[NV2A_VERTEXSHADER_ATTRIBUTES]; QTAILQ_HEAD(, SurfaceBinding) surfaces; QTAILQ_HEAD(, SurfaceBinding) invalid_surfaces; SurfaceBinding *color_binding, *zeta_binding; bool downloads_pending; QemuEvent downloads_complete; bool download_dirty_surfaces_pending; QemuEvent dirty_surfaces_download_complete; // common Lru texture_cache; TextureBinding *texture_cache_entries; TextureBinding *texture_bindings[NV2A_MAX_TEXTURES]; TextureBinding dummy_texture; bool texture_bindings_changed; VkFormatProperties *texture_format_properties; Lru shader_cache; ShaderBinding *shader_cache_entries; ShaderBinding *shader_binding; ShaderModuleInfo *quad_vert_module, *solid_frag_module; bool shader_bindings_changed; // FIXME: Merge these into a structure uint64_t uniform_buffer_hashes[2]; size_t uniform_buffer_offsets[2]; bool uniforms_changed; VkQueryPool query_pool; int max_queries_in_flight; // FIXME: Move out to constant int num_queries_in_flight; bool new_query_needed; bool query_in_flight; uint32_t zpass_pixel_count_result; QSIMPLEQ_HEAD(, QueryReport) report_queue; // FIXME: Statically allocate SurfaceFormatInfo kelvin_surface_zeta_vk_map[3]; uint32_t clear_parameter; PGRAPHVkDisplayState display; PGRAPHVkComputeState compute; } PGRAPHVkState; // renderer.c void pgraph_vk_check_memory_budget(PGRAPHState *pg); // debug.c #define RGBA_RED (float[4]){1,0,0,1} #define RGBA_YELLOW (float[4]){1,1,0,1} #define RGBA_GREEN (float[4]){0,1,0,1} #define RGBA_BLUE (float[4]){0,0,1,1} #define RGBA_PINK (float[4]){1,0,1,1} #define RGBA_DEFAULT (float[4]){0,0,0,0} void pgraph_vk_debug_init(void); void pgraph_vk_insert_debug_marker(PGRAPHVkState *r, VkCommandBuffer cmd, float color[4], const char *format, ...) __attribute__ ((format (printf, 4, 5))); void pgraph_vk_begin_debug_marker(PGRAPHVkState *r, VkCommandBuffer cmd, float color[4], const char *format, ...) __attribute__ ((format (printf, 4, 5))); void pgraph_vk_end_debug_marker(PGRAPHVkState *r, VkCommandBuffer cmd); // instance.c void pgraph_vk_init_instance(PGRAPHState *pg, Error **errp); void pgraph_vk_finalize_instance(PGRAPHState *pg); QueueFamilyIndices pgraph_vk_find_queue_families(VkPhysicalDevice device); uint32_t pgraph_vk_get_memory_type(PGRAPHState *pg, uint32_t type_bits, VkMemoryPropertyFlags properties); // glsl.c void pgraph_vk_init_glsl_compiler(void); void pgraph_vk_finalize_glsl_compiler(void); GByteArray *pgraph_vk_compile_glsl_to_spv(glslang_stage_t stage, const char *glsl_source); VkShaderModule pgraph_vk_create_shader_module_from_spv(PGRAPHVkState *r, GByteArray *spv); ShaderModuleInfo *pgraph_vk_create_shader_module_from_glsl( PGRAPHVkState *r, VkShaderStageFlagBits stage, const char *glsl); void pgraph_vk_destroy_shader_module(PGRAPHVkState *r, ShaderModuleInfo *info); // buffer.c void pgraph_vk_init_buffers(NV2AState *d); void pgraph_vk_finalize_buffers(NV2AState *d); bool pgraph_vk_buffer_has_space_for(PGRAPHState *pg, int index, VkDeviceSize size, VkDeviceAddress alignment); VkDeviceSize pgraph_vk_append_to_buffer(PGRAPHState *pg, int index, void **data, VkDeviceSize *sizes, size_t count, VkDeviceAddress alignment); // command.c void pgraph_vk_init_command_buffers(PGRAPHState *pg); void pgraph_vk_finalize_command_buffers(PGRAPHState *pg); VkCommandBuffer pgraph_vk_begin_single_time_commands(PGRAPHState *pg); void pgraph_vk_end_single_time_commands(PGRAPHState *pg, VkCommandBuffer cmd); // image.c void pgraph_vk_transition_image_layout(PGRAPHState *pg, VkCommandBuffer cmd, VkImage image, VkFormat format, VkImageLayout oldLayout, VkImageLayout newLayout); // vertex.c void pgraph_vk_bind_vertex_attributes(NV2AState *d, unsigned int min_element, unsigned int max_element, bool inline_data, unsigned int inline_stride, unsigned int provoking_element); void pgraph_vk_bind_vertex_attributes_inline(NV2AState *d); void pgraph_vk_update_vertex_ram_buffer(PGRAPHState *pg, hwaddr offset, void *data, VkDeviceSize size); VkDeviceSize pgraph_vk_update_index_buffer(PGRAPHState *pg, void *data, VkDeviceSize size); VkDeviceSize pgraph_vk_update_vertex_inline_buffer(PGRAPHState *pg, void **data, VkDeviceSize *sizes, size_t count); // surface.c void pgraph_vk_init_surfaces(PGRAPHState *pg); void pgraph_vk_finalize_surfaces(PGRAPHState *pg); void pgraph_vk_surface_flush(NV2AState *d); void pgraph_vk_process_pending_downloads(NV2AState *d); void pgraph_vk_surface_download_if_dirty(NV2AState *d, SurfaceBinding *surface); SurfaceBinding *pgraph_vk_surface_get_within(NV2AState *d, hwaddr addr); void pgraph_vk_wait_for_surface_download(SurfaceBinding *e); void pgraph_vk_download_dirty_surfaces(NV2AState *d); void pgraph_vk_download_surfaces_in_range_if_dirty(PGRAPHState *pg, hwaddr start, hwaddr size); void pgraph_vk_upload_surface_data(NV2AState *d, SurfaceBinding *surface, bool force); void pgraph_vk_surface_update(NV2AState *d, bool upload, bool color_write, bool zeta_write); SurfaceBinding *pgraph_vk_surface_get(NV2AState *d, hwaddr addr); void pgraph_vk_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta); void pgraph_vk_set_surface_scale_factor(NV2AState *d, unsigned int scale); unsigned int pgraph_vk_get_surface_scale_factor(NV2AState *d); void pgraph_vk_reload_surface_scale_factor(PGRAPHState *pg); // surface-compute.c void pgraph_vk_init_compute(PGRAPHState *pg); bool pgraph_vk_compute_needs_finish(PGRAPHVkState *r); void pgraph_vk_compute_finish_complete(PGRAPHVkState *r); void pgraph_vk_finalize_compute(PGRAPHState *pg); void pgraph_vk_pack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, VkCommandBuffer cmd, VkBuffer src, VkBuffer dst, bool downscale); void pgraph_vk_unpack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface, VkCommandBuffer cmd, VkBuffer src, VkBuffer dst); // display.c void pgraph_vk_init_display(PGRAPHState *pg); void pgraph_vk_finalize_display(PGRAPHState *pg); void pgraph_vk_render_display(PGRAPHState *pg); // texture.c void pgraph_vk_init_textures(PGRAPHState *pg); void pgraph_vk_finalize_textures(PGRAPHState *pg); void pgraph_vk_bind_textures(NV2AState *d); void pgraph_vk_mark_textures_possibly_dirty(NV2AState *d, hwaddr addr, hwaddr size); void pgraph_vk_trim_texture_cache(PGRAPHState *pg); // shaders.c void pgraph_vk_init_shaders(PGRAPHState *pg); void pgraph_vk_finalize_shaders(PGRAPHState *pg); void pgraph_vk_update_descriptor_sets(PGRAPHState *pg); void pgraph_vk_bind_shaders(PGRAPHState *pg); void pgraph_vk_update_shader_uniforms(PGRAPHState *pg); // reports.c void pgraph_vk_init_reports(PGRAPHState *pg); void pgraph_vk_finalize_reports(PGRAPHState *pg); void pgraph_vk_clear_report_value(NV2AState *d); void pgraph_vk_get_report(NV2AState *d, uint32_t parameter); void pgraph_vk_process_pending_reports(NV2AState *d); void pgraph_vk_process_pending_reports_internal(NV2AState *d); typedef enum FinishReason { VK_FINISH_REASON_VERTEX_BUFFER_DIRTY, VK_FINISH_REASON_SURFACE_CREATE, VK_FINISH_REASON_SURFACE_DOWN, VK_FINISH_REASON_NEED_BUFFER_SPACE, VK_FINISH_REASON_FRAMEBUFFER_DIRTY, VK_FINISH_REASON_PRESENTING, VK_FINISH_REASON_FLIP_STALL, VK_FINISH_REASON_FLUSH, VK_FINISH_REASON_STALLED, } FinishReason; // draw.c void pgraph_vk_init_pipelines(PGRAPHState *pg); void pgraph_vk_finalize_pipelines(PGRAPHState *pg); void pgraph_vk_clear_surface(NV2AState *d, uint32_t parameter); void pgraph_vk_draw_begin(NV2AState *d); void pgraph_vk_draw_end(NV2AState *d); void pgraph_vk_finish(PGRAPHState *pg, FinishReason why); void pgraph_vk_flush_draw(NV2AState *d); void pgraph_vk_begin_command_buffer(PGRAPHState *pg); void pgraph_vk_ensure_command_buffer(PGRAPHState *pg); void pgraph_vk_ensure_not_in_render_pass(PGRAPHState *pg); VkCommandBuffer pgraph_vk_begin_nondraw_commands(PGRAPHState *pg); void pgraph_vk_end_nondraw_commands(PGRAPHState *pg, VkCommandBuffer cmd); // blit.c void pgraph_vk_image_blit(NV2AState *d); #endif