/* * Geforce NV2A PGRAPH Vulkan Renderer * * Copyright (c) 2024 Matt Borgerson * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, see . */ #include "renderer.h" #include static void create_buffer(PGRAPHState *pg, StorageBuffer *buffer) { PGRAPHVkState *r = pg->vk_renderer_state; VkBufferCreateInfo buffer_create_info = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .size = buffer->buffer_size, .usage = buffer->usage, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, }; VK_CHECK(vmaCreateBuffer(r->allocator, &buffer_create_info, &buffer->alloc_info, &buffer->buffer, &buffer->allocation, NULL)); } static void destroy_buffer(PGRAPHState *pg, StorageBuffer *buffer) { PGRAPHVkState *r = pg->vk_renderer_state; vmaDestroyBuffer(r->allocator, buffer->buffer, buffer->allocation); buffer->buffer = VK_NULL_HANDLE; buffer->allocation = VK_NULL_HANDLE; } void pgraph_vk_init_buffers(NV2AState *d) { PGRAPHState *pg = &d->pgraph; PGRAPHVkState *r = pg->vk_renderer_state; // FIXME: Profile buffer sizes VmaAllocationCreateInfo host_alloc_create_info = { .usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST, .flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT, }; VmaAllocationCreateInfo device_alloc_create_info = { .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, .flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT }; r->storage_buffers[BUFFER_STAGING_DST] = (StorageBuffer){ .alloc_info = host_alloc_create_info, .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT, .buffer_size = 4096 * 4096 * 4, }; r->storage_buffers[BUFFER_STAGING_SRC] = (StorageBuffer){ .alloc_info = host_alloc_create_info, .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT, .buffer_size = r->storage_buffers[BUFFER_STAGING_DST].buffer_size, }; r->storage_buffers[BUFFER_COMPUTE_DST] = (StorageBuffer){ .alloc_info = device_alloc_create_info, .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, .buffer_size = (1024 * 10) * (1024 * 10) * 8, }; r->storage_buffers[BUFFER_COMPUTE_SRC] = (StorageBuffer){ .alloc_info = device_alloc_create_info, .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, .buffer_size = r->storage_buffers[BUFFER_COMPUTE_DST].buffer_size, }; r->storage_buffers[BUFFER_INDEX] = (StorageBuffer){ .alloc_info = device_alloc_create_info, .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, .buffer_size = sizeof(pg->inline_elements) * 100, }; r->storage_buffers[BUFFER_INDEX_STAGING] = (StorageBuffer){ .alloc_info = host_alloc_create_info, .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT, .buffer_size = r->storage_buffers[BUFFER_INDEX].buffer_size, }; // FIXME: Don't assume that we can render with host mapped buffer r->storage_buffers[BUFFER_VERTEX_RAM] = (StorageBuffer){ .alloc_info = host_alloc_create_info, .usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, .buffer_size = memory_region_size(d->vram), }; r->bitmap_size = memory_region_size(d->vram) / 4096; r->uploaded_bitmap = bitmap_new(r->bitmap_size); bitmap_clear(r->uploaded_bitmap, 0, r->bitmap_size); r->storage_buffers[BUFFER_VERTEX_INLINE] = (StorageBuffer){ .alloc_info = device_alloc_create_info, .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, .buffer_size = NV2A_VERTEXSHADER_ATTRIBUTES * NV2A_MAX_BATCH_LENGTH * 4 * sizeof(float) * 10, }; r->storage_buffers[BUFFER_VERTEX_INLINE_STAGING] = (StorageBuffer){ .alloc_info = host_alloc_create_info, .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT, .buffer_size = r->storage_buffers[BUFFER_VERTEX_INLINE].buffer_size, }; r->storage_buffers[BUFFER_UNIFORM] = (StorageBuffer){ .alloc_info = device_alloc_create_info, .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, .buffer_size = 8 * 1024 * 1024, }; r->storage_buffers[BUFFER_UNIFORM_STAGING] = (StorageBuffer){ .alloc_info = host_alloc_create_info, .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT, .buffer_size = r->storage_buffers[BUFFER_UNIFORM].buffer_size, }; for (int i = 0; i < BUFFER_COUNT; i++) { create_buffer(pg, &r->storage_buffers[i]); } // FIXME: Add fallback path for device using host mapped memory int buffers_to_map[] = { BUFFER_VERTEX_RAM, BUFFER_INDEX_STAGING, BUFFER_VERTEX_INLINE_STAGING, BUFFER_UNIFORM_STAGING }; for (int i = 0; i < ARRAY_SIZE(buffers_to_map); i++) { VK_CHECK(vmaMapMemory( r->allocator, r->storage_buffers[buffers_to_map[i]].allocation, (void **)&r->storage_buffers[buffers_to_map[i]].mapped)); } } void pgraph_vk_finalize_buffers(NV2AState *d) { PGRAPHState *pg = &d->pgraph; PGRAPHVkState *r = pg->vk_renderer_state; for (int i = 0; i < BUFFER_COUNT; i++) { if (r->storage_buffers[i].mapped) { vmaUnmapMemory(r->allocator, r->storage_buffers[i].allocation); } destroy_buffer(pg, &r->storage_buffers[i]); } g_free(r->uploaded_bitmap); r->uploaded_bitmap = NULL; } bool pgraph_vk_buffer_has_space_for(PGRAPHState *pg, int index, VkDeviceSize size, VkDeviceAddress alignment) { PGRAPHVkState *r = pg->vk_renderer_state; StorageBuffer *b = &r->storage_buffers[index]; return (ROUND_UP(b->buffer_offset, alignment) + size) <= b->buffer_size; } VkDeviceSize pgraph_vk_append_to_buffer(PGRAPHState *pg, int index, void **data, VkDeviceSize *sizes, size_t count, VkDeviceAddress alignment) { PGRAPHVkState *r = pg->vk_renderer_state; VkDeviceSize total_size = 0; for (int i = 0; i < count; i++) { total_size += sizes[i]; } assert(pgraph_vk_buffer_has_space_for(pg, index, total_size, alignment)); StorageBuffer *b = &r->storage_buffers[index]; VkDeviceSize starting_offset = ROUND_UP(b->buffer_offset, alignment); assert(b->mapped); for (int i = 0; i < count; i++) { b->buffer_offset = ROUND_UP(b->buffer_offset, alignment); memcpy(b->mapped + b->buffer_offset, data[i], sizes[i]); b->buffer_offset += sizes[i]; } return starting_offset; }