xemu/hw/xbox/nv2a/pgraph/vk/draw.c

2213 lines
79 KiB
C

/*
* Geforce NV2A PGRAPH Vulkan Renderer
*
* Copyright (c) 2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "qemu/fast-hash.h"
#include "renderer.h"
void pgraph_vk_draw_begin(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
NV2A_VK_DPRINTF("NV097_SET_BEGIN_END: 0x%x", d->pgraph.primitive_mode);
uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0);
bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE;
bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE;
bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE;
bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE;
bool color_write = mask_alpha || mask_red || mask_green || mask_blue;
bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE;
bool stencil_test =
pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE;
bool is_nop_draw = !(color_write || depth_test || stencil_test);
pgraph_vk_surface_update(d, true, true, depth_test || stencil_test);
if (is_nop_draw) {
NV2A_VK_DPRINTF("nop!");
return;
}
}
static VkPrimitiveTopology get_primitive_topology(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
int polygon_mode = r->shader_binding->state.polygon_front_mode;
int primitive_mode = r->shader_binding->state.primitive_mode;
if (polygon_mode == POLY_MODE_POINT) {
return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
}
// FIXME: Replace with LUT
switch (primitive_mode) {
case PRIM_TYPE_POINTS:
return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
case PRIM_TYPE_LINES:
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
case PRIM_TYPE_LINE_LOOP:
// FIXME: line strips, except that the first and last vertices are also used as a line
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
case PRIM_TYPE_LINE_STRIP:
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
case PRIM_TYPE_TRIANGLES:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
case PRIM_TYPE_TRIANGLE_STRIP:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
case PRIM_TYPE_TRIANGLE_FAN:
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
case PRIM_TYPE_QUADS:
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY;
case PRIM_TYPE_QUAD_STRIP:
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY;
case PRIM_TYPE_POLYGON:
if (polygon_mode == POLY_MODE_LINE) {
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; // FIXME
} else if (polygon_mode == POLY_MODE_FILL) {
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
}
assert(!"PRIM_TYPE_POLYGON with invalid polygon_mode");
return 0;
default:
assert(!"Invalid primitive_mode");
return 0;
}
}
static void pipeline_cache_entry_init(Lru *lru, LruNode *node, void *state)
{
PipelineBinding *snode = container_of(node, PipelineBinding, node);
snode->layout = VK_NULL_HANDLE;
snode->pipeline = VK_NULL_HANDLE;
snode->draw_time = 0;
}
static void pipeline_cache_entry_post_evict(Lru *lru, LruNode *node)
{
PGRAPHVkState *r = container_of(lru, PGRAPHVkState, pipeline_cache);
PipelineBinding *snode = container_of(node, PipelineBinding, node);
assert((!r->in_command_buffer ||
snode->draw_time < r->command_buffer_start_time) &&
"Pipeline evicted while in use!");
vkDestroyPipeline(r->device, snode->pipeline, NULL);
snode->pipeline = VK_NULL_HANDLE;
vkDestroyPipelineLayout(r->device, snode->layout, NULL);
snode->layout = VK_NULL_HANDLE;
}
static bool pipeline_cache_entry_compare(Lru *lru, LruNode *node, void *key)
{
PipelineBinding *snode = container_of(node, PipelineBinding, node);
return memcmp(&snode->key, key, sizeof(PipelineKey));
}
static void init_pipeline_cache(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
VkPipelineCacheCreateInfo cache_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO,
.flags = 0,
.initialDataSize = 0,
.pInitialData = NULL,
.pNext = NULL,
};
VK_CHECK(vkCreatePipelineCache(r->device, &cache_info, NULL,
&r->vk_pipeline_cache));
const size_t pipeline_cache_size = 2048;
lru_init(&r->pipeline_cache);
r->pipeline_cache_entries =
g_malloc_n(pipeline_cache_size, sizeof(PipelineBinding));
assert(r->pipeline_cache_entries != NULL);
for (int i = 0; i < pipeline_cache_size; i++) {
lru_add_free(&r->pipeline_cache, &r->pipeline_cache_entries[i].node);
}
r->pipeline_cache.init_node = pipeline_cache_entry_init;
r->pipeline_cache.compare_nodes = pipeline_cache_entry_compare;
r->pipeline_cache.post_node_evict = pipeline_cache_entry_post_evict;
}
static void finalize_pipeline_cache(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
lru_flush(&r->pipeline_cache);
g_free(r->pipeline_cache_entries);
r->pipeline_cache_entries = NULL;
vkDestroyPipelineCache(r->device, r->vk_pipeline_cache, NULL);
}
static char const *const quad_glsl =
"#version 450\n"
"void main()\n"
"{\n"
" float x = -1.0 + float((gl_VertexIndex & 1) << 2);\n"
" float y = -1.0 + float((gl_VertexIndex & 2) << 1);\n"
" gl_Position = vec4(x, y, 0, 1);\n"
"}\n";
static char const *const solid_frag_glsl =
"#version 450\n"
"layout(location = 0) out vec4 fragColor;\n"
"void main()\n"
"{\n"
" fragColor = vec4(1.0);"
"}\n";
static void init_clear_shaders(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
r->quad_vert_module = pgraph_vk_create_shader_module_from_glsl(
r, VK_SHADER_STAGE_VERTEX_BIT, quad_glsl);
r->solid_frag_module = pgraph_vk_create_shader_module_from_glsl(
r, VK_SHADER_STAGE_FRAGMENT_BIT, solid_frag_glsl);
}
static void finalize_clear_shaders(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
pgraph_vk_destroy_shader_module(r, r->quad_vert_module);
pgraph_vk_destroy_shader_module(r, r->solid_frag_module);
}
static void init_render_passes(PGRAPHVkState *r)
{
r->render_passes = g_array_new(false, false, sizeof(RenderPass));
}
static void finalize_render_passes(PGRAPHVkState *r)
{
for (int i = 0; i < r->render_passes->len; i++) {
RenderPass *p = &g_array_index(r->render_passes, RenderPass, i);
vkDestroyRenderPass(r->device, p->render_pass, NULL);
}
g_array_free(r->render_passes, true);
r->render_passes = NULL;
}
void pgraph_vk_init_pipelines(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
init_pipeline_cache(pg);
init_clear_shaders(pg);
init_render_passes(r);
VkSemaphoreCreateInfo semaphore_info = {
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO
};
VK_CHECK(vkCreateSemaphore(r->device, &semaphore_info, NULL,
&r->command_buffer_semaphore));
VkFenceCreateInfo fence_info = {
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
};
VK_CHECK(
vkCreateFence(r->device, &fence_info, NULL, &r->command_buffer_fence));
}
void pgraph_vk_finalize_pipelines(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
finalize_clear_shaders(pg);
finalize_pipeline_cache(pg);
finalize_render_passes(r);
vkDestroyFence(r->device, r->command_buffer_fence, NULL);
vkDestroySemaphore(r->device, r->command_buffer_semaphore, NULL);
}
static void init_render_pass_state(PGRAPHState *pg, RenderPassState *state)
{
PGRAPHVkState *r = pg->vk_renderer_state;
state->color_format = r->color_binding ?
r->color_binding->host_fmt.vk_format :
VK_FORMAT_UNDEFINED;
state->zeta_format = r->zeta_binding ? r->zeta_binding->host_fmt.vk_format :
VK_FORMAT_UNDEFINED;
}
static VkRenderPass create_render_pass(PGRAPHVkState *r, RenderPassState *state)
{
NV2A_VK_DPRINTF("Creating render pass");
VkAttachmentDescription attachments[2];
int num_attachments = 0;
bool color = state->color_format != VK_FORMAT_UNDEFINED;
bool zeta = state->zeta_format != VK_FORMAT_UNDEFINED;
VkAttachmentReference color_reference;
if (color) {
attachments[num_attachments] = (VkAttachmentDescription){
.format = state->color_format,
.samples = VK_SAMPLE_COUNT_1_BIT,
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
};
color_reference = (VkAttachmentReference){
num_attachments, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
};
num_attachments++;
}
VkAttachmentReference depth_reference;
if (zeta) {
attachments[num_attachments] = (VkAttachmentDescription){
.format = state->zeta_format,
.samples = VK_SAMPLE_COUNT_1_BIT,
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
};
depth_reference = (VkAttachmentReference){
num_attachments, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
};
num_attachments++;
}
VkSubpassDependency dependency = {
.srcSubpass = VK_SUBPASS_EXTERNAL,
};
if (color) {
dependency.srcStageMask |=
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
dependency.srcAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
dependency.dstStageMask |=
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
dependency.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
}
if (zeta) {
dependency.srcStageMask |=
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
dependency.srcAccessMask |=
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
dependency.dstStageMask |=
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
dependency.dstAccessMask |=
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
}
VkSubpassDescription subpass = {
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
.colorAttachmentCount = color ? 1 : 0,
.pColorAttachments = color ? &color_reference : NULL,
.pDepthStencilAttachment = zeta ? &depth_reference : NULL,
};
VkRenderPassCreateInfo renderpass_create_info = {
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
.attachmentCount = num_attachments,
.pAttachments = attachments,
.subpassCount = 1,
.pSubpasses = &subpass,
.dependencyCount = 1,
.pDependencies = &dependency,
};
VkRenderPass render_pass;
VK_CHECK(vkCreateRenderPass(r->device, &renderpass_create_info, NULL,
&render_pass));
return render_pass;
}
static VkRenderPass add_new_render_pass(PGRAPHVkState *r, RenderPassState *state)
{
RenderPass new_pass;
memcpy(&new_pass.state, state, sizeof(*state));
new_pass.render_pass = create_render_pass(r, state);
g_array_append_vals(r->render_passes, &new_pass, 1);
return new_pass.render_pass;
}
static VkRenderPass get_render_pass(PGRAPHVkState *r, RenderPassState *state)
{
for (int i = 0; i < r->render_passes->len; i++) {
RenderPass *p = &g_array_index(r->render_passes, RenderPass, i);
if (!memcmp(&p->state, state, sizeof(*state))) {
return p->render_pass;
}
}
return add_new_render_pass(r, state);
}
static void create_frame_buffer(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
NV2A_VK_DPRINTF("Creating framebuffer");
assert(r->color_binding || r->zeta_binding);
if (r->framebuffer_index >= ARRAY_SIZE(r->framebuffers)) {
pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE);
}
VkImageView attachments[2];
int attachment_count = 0;
if (r->color_binding) {
attachments[attachment_count++] = r->color_binding->image_view;
}
if (r->zeta_binding) {
attachments[attachment_count++] = r->zeta_binding->image_view;
}
SurfaceBinding *binding = r->color_binding ? : r->zeta_binding;
VkFramebufferCreateInfo create_info = {
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
.renderPass = r->render_pass,
.attachmentCount = attachment_count,
.pAttachments = attachments,
.width = binding->width,
.height = binding->height,
.layers = 1,
};
pgraph_apply_scaling_factor(pg, &create_info.width, &create_info.height);
VK_CHECK(vkCreateFramebuffer(r->device, &create_info, NULL,
&r->framebuffers[r->framebuffer_index++]));
}
static void destroy_framebuffers(PGRAPHState *pg)
{
NV2A_VK_DPRINTF("Destroying framebuffer");
PGRAPHVkState *r = pg->vk_renderer_state;
for (int i = 0; i < r->framebuffer_index; i++) {
vkDestroyFramebuffer(r->device, r->framebuffers[i], NULL);
r->framebuffers[i] = VK_NULL_HANDLE;
}
r->framebuffer_index = 0;
}
static void create_clear_pipeline(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
NV2A_VK_DGROUP_BEGIN("Creating clear pipeline");
PipelineKey key;
memset(&key, 0, sizeof(key));
key.clear = true;
init_render_pass_state(pg, &key.render_pass_state);
key.regs[0] = r->clear_parameter;
uint64_t hash = fast_hash((void *)&key, sizeof(key));
LruNode *node = lru_lookup(&r->pipeline_cache, hash, &key);
PipelineBinding *snode = container_of(node, PipelineBinding, node);
if (snode->pipeline != VK_NULL_HANDLE) {
NV2A_VK_DPRINTF("Cache hit");
r->pipeline_binding_changed = r->pipeline_binding != snode;
r->pipeline_binding = snode;
NV2A_VK_DGROUP_END();
return;
}
NV2A_VK_DPRINTF("Cache miss");
nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_GEN);
memcpy(&snode->key, &key, sizeof(key));
bool clear_any_color_channels =
r->clear_parameter & NV097_CLEAR_SURFACE_COLOR;
bool clear_all_color_channels =
(r->clear_parameter & NV097_CLEAR_SURFACE_COLOR) ==
(NV097_CLEAR_SURFACE_R | NV097_CLEAR_SURFACE_G | NV097_CLEAR_SURFACE_B |
NV097_CLEAR_SURFACE_A);
bool partial_color_clear =
clear_any_color_channels && !clear_all_color_channels;
int num_active_shader_stages = 0;
VkPipelineShaderStageCreateInfo shader_stages[2];
shader_stages[num_active_shader_stages++] =
(VkPipelineShaderStageCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_VERTEX_BIT,
.module = r->quad_vert_module->module,
.pName = "main",
};
if (partial_color_clear) {
shader_stages[num_active_shader_stages++] =
(VkPipelineShaderStageCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
.module = r->solid_frag_module->module,
.pName = "main",
};
}
VkPipelineVertexInputStateCreateInfo vertex_input = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
};
VkPipelineInputAssemblyStateCreateInfo input_assembly = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
.primitiveRestartEnable = VK_FALSE,
};
VkPipelineViewportStateCreateInfo viewport_state = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.viewportCount = 1,
.scissorCount = 1,
};
VkPipelineRasterizationStateCreateInfo rasterizer = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.depthClampEnable = VK_FALSE,
.rasterizerDiscardEnable = VK_FALSE,
.polygonMode = VK_POLYGON_MODE_FILL,
.lineWidth = 1.0f,
.cullMode = VK_CULL_MODE_BACK_BIT,
.frontFace = VK_FRONT_FACE_CLOCKWISE,
.depthBiasEnable = VK_FALSE,
};
VkPipelineMultisampleStateCreateInfo multisampling = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.sampleShadingEnable = VK_FALSE,
.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
};
VkPipelineDepthStencilStateCreateInfo depth_stencil = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.depthTestEnable = VK_TRUE,
.depthWriteEnable =
(r->clear_parameter & NV097_CLEAR_SURFACE_Z) ? VK_TRUE : VK_FALSE,
.depthCompareOp = VK_COMPARE_OP_ALWAYS,
.depthBoundsTestEnable = VK_FALSE,
};
if (r->clear_parameter & NV097_CLEAR_SURFACE_STENCIL) {
depth_stencil.stencilTestEnable = VK_TRUE;
depth_stencil.front.failOp = VK_STENCIL_OP_REPLACE;
depth_stencil.front.passOp = VK_STENCIL_OP_REPLACE;
depth_stencil.front.depthFailOp = VK_STENCIL_OP_REPLACE;
depth_stencil.front.compareOp = VK_COMPARE_OP_ALWAYS;
depth_stencil.front.compareMask = 0xff;
depth_stencil.front.writeMask = 0xff;
depth_stencil.front.reference = 0xff;
depth_stencil.back = depth_stencil.front;
}
VkColorComponentFlags write_mask = 0;
if (r->clear_parameter & NV097_CLEAR_SURFACE_R)
write_mask |= VK_COLOR_COMPONENT_R_BIT;
if (r->clear_parameter & NV097_CLEAR_SURFACE_G)
write_mask |= VK_COLOR_COMPONENT_G_BIT;
if (r->clear_parameter & NV097_CLEAR_SURFACE_B)
write_mask |= VK_COLOR_COMPONENT_B_BIT;
if (r->clear_parameter & NV097_CLEAR_SURFACE_A)
write_mask |= VK_COLOR_COMPONENT_A_BIT;
VkPipelineColorBlendAttachmentState color_blend_attachment = {
.colorWriteMask = write_mask,
.blendEnable = VK_TRUE,
.colorBlendOp = VK_BLEND_OP_ADD,
.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
.srcColorBlendFactor = VK_BLEND_FACTOR_CONSTANT_COLOR,
.alphaBlendOp = VK_BLEND_OP_ADD,
.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
.srcAlphaBlendFactor = VK_BLEND_FACTOR_CONSTANT_ALPHA,
};
VkPipelineColorBlendStateCreateInfo color_blending = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.logicOpEnable = VK_FALSE,
.logicOp = VK_LOGIC_OP_COPY,
.attachmentCount = r->color_binding ? 1 : 0,
.pAttachments = r->color_binding ? &color_blend_attachment : NULL,
};
VkDynamicState dynamic_states[] = { VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_BLEND_CONSTANTS };
VkPipelineDynamicStateCreateInfo dynamic_state = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.dynamicStateCount = partial_color_clear ? 3 : 2,
.pDynamicStates = dynamic_states,
};
VkPipelineLayoutCreateInfo pipeline_layout_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
};
VkPipelineLayout layout;
VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL,
&layout));
VkGraphicsPipelineCreateInfo pipeline_info = {
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.stageCount = num_active_shader_stages,
.pStages = shader_stages,
.pVertexInputState = &vertex_input,
.pInputAssemblyState = &input_assembly,
.pViewportState = &viewport_state,
.pRasterizationState = &rasterizer,
.pMultisampleState = &multisampling,
.pDepthStencilState = r->zeta_binding ? &depth_stencil : NULL,
.pColorBlendState = &color_blending,
.pDynamicState = &dynamic_state,
.layout = layout,
.renderPass = get_render_pass(r, &key.render_pass_state),
.subpass = 0,
.basePipelineHandle = VK_NULL_HANDLE,
};
VkPipeline pipeline;
VK_CHECK(vkCreateGraphicsPipelines(r->device, r->vk_pipeline_cache, 1,
&pipeline_info, NULL, &pipeline));
snode->pipeline = pipeline;
snode->layout = layout;
snode->render_pass = pipeline_info.renderPass;
snode->draw_time = pg->draw_time;
r->pipeline_binding = snode;
r->pipeline_binding_changed = true;
NV2A_VK_DGROUP_END();
}
static bool check_render_pass_dirty(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
assert(r->pipeline_binding);
RenderPassState state;
init_render_pass_state(pg, &state);
return memcmp(&state, &r->pipeline_binding->key.render_pass_state,
sizeof(state)) != 0;
}
// Quickly check for any state changes that would require more analysis
static bool check_pipeline_dirty(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
assert(r->pipeline_binding);
if (r->shader_bindings_changed || r->texture_bindings_changed ||
check_render_pass_dirty(pg)) {
return true;
}
const unsigned int regs[] = {
NV_PGRAPH_BLEND, NV_PGRAPH_BLENDCOLOR,
NV_PGRAPH_CONTROL_0, NV_PGRAPH_CONTROL_1,
NV_PGRAPH_CONTROL_2, NV_PGRAPH_CONTROL_3,
NV_PGRAPH_SETUPRASTER, NV_PGRAPH_ZCOMPRESSOCCLUDE,
NV_PGRAPH_ZOFFSETBIAS, NV_PGRAPH_ZOFFSETFACTOR,
};
for (int i = 0; i < ARRAY_SIZE(regs); i++) {
if (pgraph_is_reg_dirty(pg, regs[i])) {
return true;
}
}
// FIXME: Use dirty bits instead
if (memcmp(r->vertex_attribute_descriptions,
r->pipeline_binding->key.attribute_descriptions,
r->num_active_vertex_attribute_descriptions *
sizeof(r->vertex_attribute_descriptions[0])) ||
memcmp(r->vertex_binding_descriptions,
r->pipeline_binding->key.binding_descriptions,
r->num_active_vertex_binding_descriptions *
sizeof(r->vertex_binding_descriptions[0]))) {
return true;
}
nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_NOTDIRTY);
return false;
}
static void init_pipeline_key(PGRAPHState *pg, PipelineKey *key)
{
PGRAPHVkState *r = pg->vk_renderer_state;
memset(key, 0, sizeof(*key));
init_render_pass_state(pg, &key->render_pass_state);
memcpy(&key->shader_state, &r->shader_binding->state, sizeof(ShaderState));
memcpy(key->binding_descriptions, r->vertex_binding_descriptions,
sizeof(key->binding_descriptions[0]) *
r->num_active_vertex_binding_descriptions);
memcpy(key->attribute_descriptions, r->vertex_attribute_descriptions,
sizeof(key->attribute_descriptions[0]) *
r->num_active_vertex_attribute_descriptions);
// FIXME: Register masking
// FIXME: Use more dynamic state updates
const int regs[] = {
NV_PGRAPH_BLEND, NV_PGRAPH_BLENDCOLOR,
NV_PGRAPH_CONTROL_0, NV_PGRAPH_CONTROL_1,
NV_PGRAPH_CONTROL_2, NV_PGRAPH_CONTROL_3,
NV_PGRAPH_SETUPRASTER, NV_PGRAPH_ZCOMPRESSOCCLUDE,
NV_PGRAPH_ZOFFSETBIAS, NV_PGRAPH_ZOFFSETFACTOR,
};
assert(ARRAY_SIZE(regs) == ARRAY_SIZE(key->regs));
for (int i = 0; i < ARRAY_SIZE(regs); i++) {
key->regs[i] = pgraph_reg_r(pg, regs[i]);
}
}
static void create_pipeline(PGRAPHState *pg)
{
NV2A_VK_DGROUP_BEGIN("Creating pipeline");
NV2AState *d = container_of(pg, NV2AState, pgraph);
PGRAPHVkState *r = pg->vk_renderer_state;
pgraph_vk_bind_textures(d);
pgraph_vk_bind_shaders(pg);
// FIXME: If nothing was dirty, don't even try creating the key or hashing.
// Just use the same pipeline.
if (r->pipeline_binding && !check_pipeline_dirty(pg)) {
NV2A_VK_DPRINTF("Cache hit");
NV2A_VK_DGROUP_END();
return;
}
PipelineKey key;
init_pipeline_key(pg, &key);
uint64_t hash = fast_hash((void *)&key, sizeof(key));
static uint64_t last_hash;
if (hash == last_hash) {
nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_MERGE);
}
last_hash = hash;
LruNode *node = lru_lookup(&r->pipeline_cache, hash, &key);
PipelineBinding *snode = container_of(node, PipelineBinding, node);
if (snode->pipeline != VK_NULL_HANDLE) {
NV2A_VK_DPRINTF("Cache hit");
r->pipeline_binding_changed = r->pipeline_binding != snode;
r->pipeline_binding = snode;
NV2A_VK_DGROUP_END();
return;
}
NV2A_VK_DPRINTF("Cache miss");
nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_GEN);
memcpy(&snode->key, &key, sizeof(key));
uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0);
bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE;
bool depth_write = !!(control_0 & NV_PGRAPH_CONTROL_0_ZWRITEENABLE);
bool stencil_test =
pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE;
int num_active_shader_stages = 0;
VkPipelineShaderStageCreateInfo shader_stages[3];
shader_stages[num_active_shader_stages++] =
(VkPipelineShaderStageCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_VERTEX_BIT,
.module = r->shader_binding->vertex->module,
.pName = "main",
};
if (r->shader_binding->geometry) {
shader_stages[num_active_shader_stages++] =
(VkPipelineShaderStageCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_GEOMETRY_BIT,
.module = r->shader_binding->geometry->module,
.pName = "main",
};
}
shader_stages[num_active_shader_stages++] =
(VkPipelineShaderStageCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
.module = r->shader_binding->fragment->module,
.pName = "main",
};
VkPipelineVertexInputStateCreateInfo vertex_input = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.vertexBindingDescriptionCount =
r->num_active_vertex_binding_descriptions,
.pVertexBindingDescriptions = r->vertex_binding_descriptions,
.vertexAttributeDescriptionCount =
r->num_active_vertex_attribute_descriptions,
.pVertexAttributeDescriptions = r->vertex_attribute_descriptions,
};
VkPipelineInputAssemblyStateCreateInfo input_assembly = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.topology = get_primitive_topology(pg),
.primitiveRestartEnable = VK_FALSE,
};
VkPipelineViewportStateCreateInfo viewport_state = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.viewportCount = 1,
.scissorCount = 1,
};
void *rasterizer_next_struct = NULL;
VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_state;
if (r->provoking_vertex_extension_enabled) {
VkProvokingVertexModeEXT provoking_mode =
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3),
NV_PGRAPH_CONTROL_3_SHADEMODE) ==
NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT ?
VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT :
VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT;
provoking_state =
(VkPipelineRasterizationProvokingVertexStateCreateInfoEXT){
.sType =
VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT,
.provokingVertexMode = provoking_mode,
};
rasterizer_next_struct = &provoking_state;
} else {
// FIXME: Handle in shader?
}
VkPipelineRasterizationStateCreateInfo rasterizer = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.depthClampEnable = VK_FALSE,
.rasterizerDiscardEnable = VK_FALSE,
.polygonMode = pgraph_polygon_mode_vk_map[r->shader_binding->state
.polygon_front_mode],
.lineWidth = 1.0f,
.frontFace = (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
NV_PGRAPH_SETUPRASTER_FRONTFACE) ?
VK_FRONT_FACE_COUNTER_CLOCKWISE :
VK_FRONT_FACE_CLOCKWISE,
.depthBiasEnable = VK_FALSE,
.pNext = rasterizer_next_struct,
};
if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & NV_PGRAPH_SETUPRASTER_CULLENABLE) {
uint32_t cull_face = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER),
NV_PGRAPH_SETUPRASTER_CULLCTRL);
assert(cull_face < ARRAY_SIZE(pgraph_cull_face_vk_map));
rasterizer.cullMode = pgraph_cull_face_vk_map[cull_face];
} else {
rasterizer.cullMode = VK_CULL_MODE_NONE;
}
VkPipelineMultisampleStateCreateInfo multisampling = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.sampleShadingEnable = VK_FALSE,
.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
};
VkPipelineDepthStencilStateCreateInfo depth_stencil = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.depthWriteEnable = depth_write ? VK_TRUE : VK_FALSE,
};
if (depth_test) {
depth_stencil.depthTestEnable = VK_TRUE;
uint32_t depth_func =
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0), NV_PGRAPH_CONTROL_0_ZFUNC);
assert(depth_func < ARRAY_SIZE(pgraph_depth_func_vk_map));
depth_stencil.depthCompareOp = pgraph_depth_func_vk_map[depth_func];
}
if (stencil_test) {
depth_stencil.stencilTestEnable = VK_TRUE;
uint32_t stencil_func = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
NV_PGRAPH_CONTROL_1_STENCIL_FUNC);
uint32_t stencil_ref = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
NV_PGRAPH_CONTROL_1_STENCIL_REF);
uint32_t mask_read = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
NV_PGRAPH_CONTROL_1_STENCIL_MASK_READ);
uint32_t mask_write = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
NV_PGRAPH_CONTROL_1_STENCIL_MASK_WRITE);
uint32_t op_fail = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2),
NV_PGRAPH_CONTROL_2_STENCIL_OP_FAIL);
uint32_t op_zfail = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2),
NV_PGRAPH_CONTROL_2_STENCIL_OP_ZFAIL);
uint32_t op_zpass = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2),
NV_PGRAPH_CONTROL_2_STENCIL_OP_ZPASS);
assert(stencil_func < ARRAY_SIZE(pgraph_stencil_func_vk_map));
assert(op_fail < ARRAY_SIZE(pgraph_stencil_op_vk_map));
assert(op_zfail < ARRAY_SIZE(pgraph_stencil_op_vk_map));
assert(op_zpass < ARRAY_SIZE(pgraph_stencil_op_vk_map));
depth_stencil.front.failOp = pgraph_stencil_op_vk_map[op_fail];
depth_stencil.front.passOp = pgraph_stencil_op_vk_map[op_zpass];
depth_stencil.front.depthFailOp = pgraph_stencil_op_vk_map[op_zfail];
depth_stencil.front.compareOp =
pgraph_stencil_func_vk_map[stencil_func];
depth_stencil.front.compareMask = mask_read;
depth_stencil.front.writeMask = mask_write;
depth_stencil.front.reference = stencil_ref;
depth_stencil.back = depth_stencil.front;
}
VkColorComponentFlags write_mask = 0;
if (control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE)
write_mask |= VK_COLOR_COMPONENT_R_BIT;
if (control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE)
write_mask |= VK_COLOR_COMPONENT_G_BIT;
if (control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE)
write_mask |= VK_COLOR_COMPONENT_B_BIT;
if (control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE)
write_mask |= VK_COLOR_COMPONENT_A_BIT;
VkPipelineColorBlendAttachmentState color_blend_attachment = {
.colorWriteMask = write_mask,
};
float blend_constant[4] = { 0, 0, 0, 0 };
if (pgraph_reg_r(pg, NV_PGRAPH_BLEND) & NV_PGRAPH_BLEND_EN) {
color_blend_attachment.blendEnable = VK_TRUE;
uint32_t sfactor =
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND), NV_PGRAPH_BLEND_SFACTOR);
uint32_t dfactor =
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND), NV_PGRAPH_BLEND_DFACTOR);
assert(sfactor < ARRAY_SIZE(pgraph_blend_factor_vk_map));
assert(dfactor < ARRAY_SIZE(pgraph_blend_factor_vk_map));
color_blend_attachment.srcColorBlendFactor =
pgraph_blend_factor_vk_map[sfactor];
color_blend_attachment.dstColorBlendFactor =
pgraph_blend_factor_vk_map[dfactor];
color_blend_attachment.srcAlphaBlendFactor =
pgraph_blend_factor_vk_map[sfactor];
color_blend_attachment.dstAlphaBlendFactor =
pgraph_blend_factor_vk_map[dfactor];
uint32_t equation =
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND), NV_PGRAPH_BLEND_EQN);
assert(equation < ARRAY_SIZE(pgraph_blend_equation_vk_map));
color_blend_attachment.colorBlendOp =
pgraph_blend_equation_vk_map[equation];
color_blend_attachment.alphaBlendOp =
pgraph_blend_equation_vk_map[equation];
uint32_t blend_color = pgraph_reg_r(pg, NV_PGRAPH_BLENDCOLOR);
pgraph_argb_pack32_to_rgba_float(blend_color, blend_constant);
}
VkPipelineColorBlendStateCreateInfo color_blending = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.logicOpEnable = VK_FALSE,
.logicOp = VK_LOGIC_OP_COPY,
.attachmentCount = r->color_binding ? 1 : 0,
.pAttachments = r->color_binding ? &color_blend_attachment : NULL,
.blendConstants[0] = blend_constant[0],
.blendConstants[1] = blend_constant[1],
.blendConstants[2] = blend_constant[2],
.blendConstants[3] = blend_constant[3],
};
VkDynamicState dynamic_states[2] = { VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR };
VkPipelineDynamicStateCreateInfo dynamic_state = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.dynamicStateCount = ARRAY_SIZE(dynamic_states),
.pDynamicStates = dynamic_states,
};
// /* Clipping */
// glEnable(GL_CLIP_DISTANCE0);
// glEnable(GL_CLIP_DISTANCE1);
// /* Polygon offset */
// /* FIXME: GL implementation-specific, maybe do this in VS? */
// if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
// NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE)
// if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
// NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE)
// if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
// NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)
if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
(NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE |
NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE |
NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) {
uint32_t zfactor_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR);
float zfactor = *(float *)&zfactor_u32;
uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS);
float zbias = *(float *)&zbias_u32;
rasterizer.depthBiasEnable = VK_TRUE;
rasterizer.depthBiasSlopeFactor = zfactor;
rasterizer.depthBiasConstantFactor = zbias;
}
if (GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ZCOMPRESSOCCLUDE),
NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN) ==
NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP) {
rasterizer.depthClampEnable = VK_TRUE;
}
// FIXME: Dither
// if (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) &
// NV_PGRAPH_CONTROL_0_DITHERENABLE))
// FIXME: point size
// FIXME: Edge Antialiasing
// bool anti_aliasing = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ANTIALIASING),
// NV_PGRAPH_ANTIALIASING_ENABLE);
// if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
// NV_PGRAPH_SETUPRASTER_LINESMOOTHENABLE) {
// FIXME: VK_EXT_line_rasterization
// }
// if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
// NV_PGRAPH_SETUPRASTER_POLYSMOOTHENABLE) {
// FIXME: No direct analog. Just do it with MSAA.
// }
VkPipelineLayoutCreateInfo pipeline_layout_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = &r->descriptor_set_layout,
};
VkPushConstantRange push_constant_range;
if (r->shader_binding->state.use_push_constants_for_uniform_attrs) {
int num_uniform_attributes =
__builtin_popcount(r->shader_binding->state.uniform_attrs);
if (num_uniform_attributes) {
push_constant_range = (VkPushConstantRange){
.stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
.offset = 0,
// FIXME: Minimize push constants
.size = num_uniform_attributes * 4 * sizeof(float),
};
pipeline_layout_info.pushConstantRangeCount = 1;
pipeline_layout_info.pPushConstantRanges = &push_constant_range;
}
}
VkPipelineLayout layout;
VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL,
&layout));
VkGraphicsPipelineCreateInfo pipeline_create_info = {
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.stageCount = num_active_shader_stages,
.pStages = shader_stages,
.pVertexInputState = &vertex_input,
.pInputAssemblyState = &input_assembly,
.pViewportState = &viewport_state,
.pRasterizationState = &rasterizer,
.pMultisampleState = &multisampling,
.pDepthStencilState = r->zeta_binding ? &depth_stencil : NULL,
.pColorBlendState = &color_blending,
.pDynamicState = &dynamic_state,
.layout = layout,
.renderPass = get_render_pass(r, &key.render_pass_state),
.subpass = 0,
.basePipelineHandle = VK_NULL_HANDLE,
};
VkPipeline pipeline;
VK_CHECK(vkCreateGraphicsPipelines(r->device, r->vk_pipeline_cache, 1,
&pipeline_create_info, NULL, &pipeline));
snode->pipeline = pipeline;
snode->layout = layout;
snode->render_pass = pipeline_create_info.renderPass;
snode->draw_time = pg->draw_time;
r->pipeline_binding = snode;
r->pipeline_binding_changed = true;
NV2A_VK_DGROUP_END();
}
static void push_vertex_attr_values(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
if (!r->shader_binding->state.use_push_constants_for_uniform_attrs) {
return;
}
// FIXME: Partial updates
float values[NV2A_VERTEXSHADER_ATTRIBUTES][4];
int num_uniform_attrs = 0;
pgraph_get_inline_values(pg, r->shader_binding->state.uniform_attrs, values,
&num_uniform_attrs);
if (num_uniform_attrs > 0) {
vkCmdPushConstants(r->command_buffer, r->pipeline_binding->layout,
VK_SHADER_STAGE_VERTEX_BIT, 0,
num_uniform_attrs * 4 * sizeof(float),
&values);
}
}
static void bind_descriptor_sets(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
assert(r->descriptor_set_index >= 1);
vkCmdBindDescriptorSets(r->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
r->pipeline_binding->layout, 0, 1,
&r->descriptor_sets[r->descriptor_set_index - 1], 0,
NULL);
}
static void begin_query(PGRAPHVkState *r)
{
assert(r->in_command_buffer);
assert(!r->in_render_pass);
assert(!r->query_in_flight);
// FIXME: We should handle this. Make the query buffer bigger, but at least
// flush current queries.
assert(r->num_queries_in_flight < r->max_queries_in_flight);
nv2a_profile_inc_counter(NV2A_PROF_QUERY);
vkCmdResetQueryPool(r->command_buffer, r->query_pool,
r->num_queries_in_flight, 1);
vkCmdBeginQuery(r->command_buffer, r->query_pool, r->num_queries_in_flight,
VK_QUERY_CONTROL_PRECISE_BIT);
r->query_in_flight = true;
r->new_query_needed = false;
r->num_queries_in_flight++;
}
static void end_query(PGRAPHVkState *r)
{
assert(r->in_command_buffer);
assert(!r->in_render_pass);
assert(r->query_in_flight);
vkCmdEndQuery(r->command_buffer, r->query_pool,
r->num_queries_in_flight - 1);
r->query_in_flight = false;
}
static void sync_staging_buffer(PGRAPHState *pg, VkCommandBuffer cmd,
int index_src, int index_dst)
{
PGRAPHVkState *r = pg->vk_renderer_state;
StorageBuffer *b_src = &r->storage_buffers[index_src];
StorageBuffer *b_dst = &r->storage_buffers[index_dst];
if (!b_src->buffer_offset) {
return;
}
VkBufferCopy copy_region = { .size = b_src->buffer_offset };
vkCmdCopyBuffer(cmd, b_src->buffer, b_dst->buffer, 1, &copy_region);
VkAccessFlags dst_access_mask;
VkPipelineStageFlags dst_stage_mask;
switch (index_dst) {
case BUFFER_INDEX:
dst_access_mask = VK_ACCESS_INDEX_READ_BIT;
dst_stage_mask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
break;
case BUFFER_VERTEX_INLINE:
dst_access_mask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
dst_stage_mask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
break;
case BUFFER_UNIFORM:
dst_access_mask = VK_ACCESS_UNIFORM_READ_BIT;
dst_stage_mask = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
break;
default:
assert(0);
break;
}
VkBufferMemoryBarrier barrier = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = dst_access_mask,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = b_dst->buffer,
.size = b_src->buffer_offset
};
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, 0,
0, NULL, 1, &barrier, 0, NULL);
b_src->buffer_offset = 0;
}
static void flush_memory_buffer(PGRAPHState *pg, VkCommandBuffer cmd)
{
PGRAPHVkState *r = pg->vk_renderer_state;
VK_CHECK(vmaFlushAllocation(
r->allocator, r->storage_buffers[BUFFER_VERTEX_RAM].allocation, 0,
VK_WHOLE_SIZE));
VkBufferMemoryBarrier barrier = {
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT,
.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = r->storage_buffers[BUFFER_VERTEX_RAM].buffer,
.offset = 0,
.size = VK_WHOLE_SIZE,
};
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_HOST_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, 0, NULL, 1,
&barrier, 0, NULL);
}
static void begin_render_pass(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
assert(r->in_command_buffer);
assert(!r->in_render_pass);
nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_RENDERPASSES);
unsigned int vp_width = pg->surface_binding_dim.width,
vp_height = pg->surface_binding_dim.height;
pgraph_apply_scaling_factor(pg, &vp_width, &vp_height);
assert(r->framebuffer_index > 0);
VkRenderPassBeginInfo render_pass_begin_info = {
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.renderPass = r->render_pass,
.framebuffer = r->framebuffers[r->framebuffer_index - 1],
.renderArea.extent.width = vp_width,
.renderArea.extent.height = vp_height,
.clearValueCount = 0,
.pClearValues = NULL,
};
vkCmdBeginRenderPass(r->command_buffer, &render_pass_begin_info,
VK_SUBPASS_CONTENTS_INLINE);
r->in_render_pass = true;
}
static void end_render_pass(PGRAPHVkState *r)
{
if (r->in_render_pass) {
vkCmdEndRenderPass(r->command_buffer);
r->in_render_pass = false;
}
}
const enum NV2A_PROF_COUNTERS_ENUM finish_reason_to_counter_enum[] = {
[VK_FINISH_REASON_VERTEX_BUFFER_DIRTY] = NV2A_PROF_FINISH_VERTEX_BUFFER_DIRTY,
[VK_FINISH_REASON_SURFACE_CREATE] = NV2A_PROF_FINISH_SURFACE_CREATE,
[VK_FINISH_REASON_SURFACE_DOWN] = NV2A_PROF_FINISH_SURFACE_DOWN,
[VK_FINISH_REASON_NEED_BUFFER_SPACE] = NV2A_PROF_FINISH_NEED_BUFFER_SPACE,
[VK_FINISH_REASON_FRAMEBUFFER_DIRTY] = NV2A_PROF_FINISH_FRAMEBUFFER_DIRTY,
[VK_FINISH_REASON_PRESENTING] = NV2A_PROF_FINISH_PRESENTING,
[VK_FINISH_REASON_FLIP_STALL] = NV2A_PROF_FINISH_FLIP_STALL,
[VK_FINISH_REASON_FLUSH] = NV2A_PROF_FINISH_FLUSH,
[VK_FINISH_REASON_STALLED] = NV2A_PROF_FINISH_STALLED,
};
void pgraph_vk_finish(PGRAPHState *pg, FinishReason finish_reason)
{
PGRAPHVkState *r = pg->vk_renderer_state;
assert(!r->in_draw);
if (r->in_command_buffer) {
nv2a_profile_inc_counter(finish_reason_to_counter_enum[finish_reason]);
if (r->in_render_pass) {
end_render_pass(r);
}
if (r->query_in_flight) {
end_query(r);
}
VK_CHECK(vkEndCommandBuffer(r->command_buffer));
VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); // FIXME: Cleanup
sync_staging_buffer(pg, cmd, BUFFER_INDEX_STAGING, BUFFER_INDEX);
sync_staging_buffer(pg, cmd, BUFFER_VERTEX_INLINE_STAGING,
BUFFER_VERTEX_INLINE);
sync_staging_buffer(pg, cmd, BUFFER_UNIFORM_STAGING, BUFFER_UNIFORM);
bitmap_clear(r->uploaded_bitmap, 0, r->bitmap_size);
flush_memory_buffer(pg, cmd);
VK_CHECK(vkEndCommandBuffer(r->aux_command_buffer));
r->in_aux_command_buffer = false;
VkPipelineStageFlags wait_stage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
VkSubmitInfo submit_infos[] = {
{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.commandBufferCount = 1,
.pCommandBuffers = &r->aux_command_buffer,
.signalSemaphoreCount = 1,
.pSignalSemaphores = &r->command_buffer_semaphore,
},
{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.commandBufferCount = 1,
.pCommandBuffers = &r->command_buffer,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &r->command_buffer_semaphore,
.pWaitDstStageMask = &wait_stage,
}
};
nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT);
vkResetFences(r->device, 1, &r->command_buffer_fence);
VK_CHECK(vkQueueSubmit(r->queue, ARRAY_SIZE(submit_infos), submit_infos,
r->command_buffer_fence));
r->submit_count += 1;
bool check_budget = false;
// Periodically check memory budget
const int max_num_submits_before_budget_update = 5;
if (finish_reason == VK_FINISH_REASON_FLIP_STALL ||
(r->submit_count - r->allocator_last_submit_index) >
max_num_submits_before_budget_update) {
// VMA queries budget via vmaSetCurrentFrameIndex
vmaSetCurrentFrameIndex(r->allocator, r->submit_count);
r->allocator_last_submit_index = r->submit_count;
check_budget = true;
}
VK_CHECK(vkWaitForFences(r->device, 1, &r->command_buffer_fence,
VK_TRUE, UINT64_MAX));
r->descriptor_set_index = 0;
r->in_command_buffer = false;
destroy_framebuffers(pg);
if (check_budget) {
pgraph_vk_check_memory_budget(pg);
}
}
NV2AState *d = container_of(pg, NV2AState, pgraph);
pgraph_vk_process_pending_reports_internal(d);
pgraph_vk_compute_finish_complete(r);
}
void pgraph_vk_begin_command_buffer(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
assert(!r->in_command_buffer);
VkCommandBufferBeginInfo command_buffer_begin_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
};
VK_CHECK(vkBeginCommandBuffer(r->command_buffer,
&command_buffer_begin_info));
r->command_buffer_start_time = pg->draw_time;
r->in_command_buffer = true;
}
// FIXME: Refactor below
void pgraph_vk_ensure_command_buffer(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
if (!r->in_command_buffer) {
pgraph_vk_begin_command_buffer(pg);
}
}
void pgraph_vk_ensure_not_in_render_pass(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
end_render_pass(r);
if (r->query_in_flight) {
end_query(r);
}
}
VkCommandBuffer pgraph_vk_begin_nondraw_commands(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
pgraph_vk_ensure_command_buffer(pg);
pgraph_vk_ensure_not_in_render_pass(pg);
return r->command_buffer;
}
void pgraph_vk_end_nondraw_commands(PGRAPHState *pg, VkCommandBuffer cmd)
{
PGRAPHVkState *r = pg->vk_renderer_state;
assert(cmd == r->command_buffer);
}
// FIXME: Add more metrics for determining command buffer 'fullness' and
// conservatively flush. Unfortunately there doesn't appear to be a good
// way to determine what the actual maximum capacity of a command buffer
// is, but we are obviously not supposed to endlessly append to one command
// buffer. For other reasons though (like descriptor set amount, surface
// changes, etc) we do flush often.
static void begin_pre_draw(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
assert(r->color_binding || r->zeta_binding);
assert(!r->color_binding || r->color_binding->initialized);
assert(!r->zeta_binding || r->zeta_binding->initialized);
if (pg->clearing) {
create_clear_pipeline(pg);
} else {
create_pipeline(pg);
}
bool render_pass_dirty = r->pipeline_binding->render_pass != r->render_pass;
if (r->framebuffer_dirty || render_pass_dirty) {
pgraph_vk_ensure_not_in_render_pass(pg);
}
if (render_pass_dirty) {
r->render_pass = r->pipeline_binding->render_pass;
}
if (r->framebuffer_dirty) {
create_frame_buffer(pg);
r->framebuffer_dirty = false;
}
if (!pg->clearing) {
pgraph_vk_update_descriptor_sets(pg);
}
if (r->framebuffer_index == 0) {
create_frame_buffer(pg);
}
pgraph_vk_ensure_command_buffer(pg);
}
static void begin_draw(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
assert(r->in_command_buffer);
// Visibility testing
if (!pg->clearing && pg->zpass_pixel_count_enable) {
if (r->new_query_needed && r->query_in_flight) {
end_render_pass(r);
end_query(r);
}
if (!r->query_in_flight) {
end_render_pass(r);
begin_query(r);
}
} else if (r->query_in_flight) {
end_render_pass(r);
end_query(r);
}
if (pg->clearing) {
end_render_pass(r);
}
bool must_bind_pipeline = r->pipeline_binding_changed;
if (!r->in_render_pass) {
begin_render_pass(pg);
must_bind_pipeline = true;
}
if (must_bind_pipeline) {
nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_BIND);
vkCmdBindPipeline(r->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
r->pipeline_binding->pipeline);
r->pipeline_binding->draw_time = pg->draw_time;
unsigned int vp_width = pg->surface_binding_dim.width,
vp_height = pg->surface_binding_dim.height;
pgraph_apply_scaling_factor(pg, &vp_width, &vp_height);
VkViewport viewport = {
.width = vp_width,
.height = vp_height,
.minDepth = 0.0,
.maxDepth = 1.0,
};
vkCmdSetViewport(r->command_buffer, 0, 1, &viewport);
/* Surface clip */
/* FIXME: Consider moving to PSH w/ window clip */
unsigned int xmin = pg->surface_shape.clip_x -
pg->surface_binding_dim.clip_x,
ymin = pg->surface_shape.clip_y -
pg->surface_binding_dim.clip_y;
unsigned int xmax = xmin + pg->surface_shape.clip_width - 1,
ymax = ymin + pg->surface_shape.clip_height - 1;
unsigned int scissor_width = xmax - xmin + 1,
scissor_height = ymax - ymin + 1;
pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin);
pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height);
pgraph_apply_scaling_factor(pg, &xmin, &ymin);
pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height);
VkRect2D scissor = {
.offset.x = xmin,
.offset.y = ymin,
.extent.width = scissor_width,
.extent.height = scissor_height,
};
vkCmdSetScissor(r->command_buffer, 0, 1, &scissor);
}
if (!pg->clearing) {
bind_descriptor_sets(pg);
push_vertex_attr_values(pg);
}
r->in_draw = true;
}
static void end_draw(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
assert(r->in_command_buffer);
assert(r->in_render_pass);
if (pg->clearing) {
end_render_pass(r);
}
r->in_draw = false;
// FIXME: We could clear less
pgraph_clear_dirty_reg_map(pg);
}
void pgraph_vk_draw_end(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHVkState *r = pg->vk_renderer_state;
uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0);
bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE;
bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE;
bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE;
bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE;
bool color_write = mask_alpha || mask_red || mask_green || mask_blue;
bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE;
bool stencil_test =
pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE;
bool is_nop_draw = !(color_write || depth_test || stencil_test);
if (is_nop_draw) {
// FIXME: Check PGRAPH register 0x880.
// HW uses bit 11 in 0x880 to enable or disable a color/zeta limit
// check that will raise an exception in the case that a draw should
// modify the color and/or zeta buffer but the target(s) are masked
// off. This check only seems to trigger during the fragment
// processing, it is legal to attempt a draw that is entirely
// clipped regardless of 0x880. See xemu#635 for context.
NV2A_VK_DPRINTF("nop draw!\n");
return;
}
pgraph_vk_flush_draw(d);
pg->draw_time++;
if (r->color_binding && pgraph_color_write_enabled(pg)) {
r->color_binding->draw_time = pg->draw_time;
}
if (r->zeta_binding && pgraph_zeta_write_enabled(pg)) {
r->zeta_binding->draw_time = pg->draw_time;
}
pgraph_vk_set_surface_dirty(pg, color_write, depth_test || stencil_test);
}
static int compare_memory_sync_requirement_by_addr(const void *p1,
const void *p2)
{
const MemorySyncRequirement *l = p1, *r = p2;
if (l->addr < r->addr)
return -1;
if (l->addr > r->addr)
return 1;
return 0;
}
static void sync_vertex_ram_buffer(PGRAPHState *pg)
{
NV2AState *d = container_of(pg, NV2AState, pgraph);
PGRAPHVkState *r = pg->vk_renderer_state;
if (r->num_vertex_ram_buffer_syncs == 0) {
return;
}
// Align sync requirements to page boundaries
NV2A_VK_DGROUP_BEGIN("Sync vertex RAM buffer");
for (int i = 0; i < r->num_vertex_ram_buffer_syncs; i++) {
NV2A_VK_DPRINTF("Need to sync vertex memory @%" HWADDR_PRIx
", %" HWADDR_PRIx " bytes",
r->vertex_ram_buffer_syncs[i].addr,
r->vertex_ram_buffer_syncs[i].size);
hwaddr start_addr =
r->vertex_ram_buffer_syncs[i].addr & TARGET_PAGE_MASK;
hwaddr end_addr = r->vertex_ram_buffer_syncs[i].addr +
r->vertex_ram_buffer_syncs[i].size;
end_addr = ROUND_UP(end_addr, TARGET_PAGE_SIZE);
NV2A_VK_DPRINTF("- %d: %08" HWADDR_PRIx " %zd bytes"
" -> %08" HWADDR_PRIx " %zd bytes", i,
r->vertex_ram_buffer_syncs[i].addr,
r->vertex_ram_buffer_syncs[i].size, start_addr,
end_addr - start_addr);
r->vertex_ram_buffer_syncs[i].addr = start_addr;
r->vertex_ram_buffer_syncs[i].size = end_addr - start_addr;
}
// Sort the requirements in increasing order of addresses
qsort(r->vertex_ram_buffer_syncs, r->num_vertex_ram_buffer_syncs,
sizeof(MemorySyncRequirement),
compare_memory_sync_requirement_by_addr);
// Merge overlapping/adjacent requests to minimize number of tests
MemorySyncRequirement merged[16];
int num_syncs = 1;
merged[0] = r->vertex_ram_buffer_syncs[0];
for (int i = 1; i < r->num_vertex_ram_buffer_syncs; i++) {
MemorySyncRequirement *p = &merged[num_syncs - 1];
MemorySyncRequirement *t = &r->vertex_ram_buffer_syncs[i];
if (t->addr <= (p->addr + p->size)) {
// Merge with previous
hwaddr p_end_addr = p->addr + p->size;
hwaddr t_end_addr = t->addr + t->size;
hwaddr new_end_addr = MAX(p_end_addr, t_end_addr);
p->size = new_end_addr - p->addr;
} else {
merged[num_syncs++] = *t;
}
}
if (num_syncs < r->num_vertex_ram_buffer_syncs) {
NV2A_VK_DPRINTF("Reduced to %d sync checks", num_syncs);
}
for (int i = 0; i < num_syncs; i++) {
hwaddr addr = merged[i].addr;
VkDeviceSize size = merged[i].size;
NV2A_VK_DPRINTF("- %d: %08"HWADDR_PRIx" %zd bytes", i, addr, size);
if (memory_region_test_and_clear_dirty(d->vram, addr, size,
DIRTY_MEMORY_NV2A)) {
NV2A_VK_DPRINTF("Memory dirty. Synchronizing...");
pgraph_vk_update_vertex_ram_buffer(pg, addr, d->vram_ptr + addr,
size);
}
}
r->num_vertex_ram_buffer_syncs = 0;
NV2A_VK_DGROUP_END();
}
void pgraph_vk_clear_surface(NV2AState *d, uint32_t parameter)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHVkState *r = pg->vk_renderer_state;
nv2a_profile_inc_counter(NV2A_PROF_CLEAR);
bool write_color = (parameter & NV097_CLEAR_SURFACE_COLOR);
bool write_zeta =
(parameter & (NV097_CLEAR_SURFACE_Z | NV097_CLEAR_SURFACE_STENCIL));
pg->clearing = true;
// FIXME: If doing a full surface clear, mark the surface for full clear
// and we can just do the clear as part of the surface load.
pgraph_vk_surface_update(d, true, write_color, write_zeta);
SurfaceBinding *binding = r->color_binding ?: r->zeta_binding;
if (!binding) {
/* Nothing bound to clear */
pg->clearing = false;
return;
}
r->clear_parameter = parameter;
uint32_t clearrectx = pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTX);
uint32_t clearrecty = pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTY);
int xmin = GET_MASK(clearrectx, NV_PGRAPH_CLEARRECTX_XMIN);
int xmax = GET_MASK(clearrectx, NV_PGRAPH_CLEARRECTX_XMAX);
int ymin = GET_MASK(clearrecty, NV_PGRAPH_CLEARRECTY_YMIN);
int ymax = GET_MASK(clearrecty, NV_PGRAPH_CLEARRECTY_YMAX);
NV2A_VK_DGROUP_BEGIN("CLEAR min=(%d,%d) max=(%d,%d)%s%s", xmin, ymin, xmax,
ymax, write_color ? " color" : "",
write_zeta ? " zeta" : "");
begin_pre_draw(pg);
begin_draw(pg);
// FIXME: What does hardware do when min <= max?
xmin = MIN(xmin, binding->width - 1);
ymin = MIN(ymin, binding->height - 1);
xmax = MIN(xmax, binding->width - 1);
ymax = MIN(ymax, binding->height - 1);
int scissor_width = MAX(0, xmax - xmin + 1),
scissor_height = MAX(0, ymax - ymin + 1);
pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin);
pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height);
pgraph_apply_scaling_factor(pg, &xmin, &ymin);
pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height);
VkClearRect clear_rect = {
.rect = {
.offset = { .x = xmin, .y = ymin },
.extent = { .width = scissor_width, .height = scissor_height },
},
.baseArrayLayer = 0,
.layerCount = 1,
};
int num_attachments = 0;
VkClearAttachment attachments[2];
if (write_color && r->color_binding) {
const bool clear_all_color_channels =
(parameter & NV097_CLEAR_SURFACE_COLOR) ==
(NV097_CLEAR_SURFACE_R | NV097_CLEAR_SURFACE_G |
NV097_CLEAR_SURFACE_B | NV097_CLEAR_SURFACE_A);
if (clear_all_color_channels) {
attachments[num_attachments] = (VkClearAttachment){
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.colorAttachment = 0,
};
pgraph_get_clear_color(
pg, attachments[num_attachments].clearValue.color.float32);
num_attachments++;
} else {
float blend_constants[4];
pgraph_get_clear_color(pg, blend_constants);
vkCmdSetScissor(r->command_buffer, 0, 1, &clear_rect.rect);
vkCmdSetBlendConstants(r->command_buffer, blend_constants);
vkCmdDraw(r->command_buffer, 3, 1, 0, 0);
}
}
if (write_zeta && r->zeta_binding) {
int stencil_value = 0;
float depth_value = 1.0;
pgraph_get_clear_depth_stencil_value(pg, &depth_value, &stencil_value);
VkImageAspectFlags aspect = 0;
if (parameter & NV097_CLEAR_SURFACE_Z) {
aspect |= VK_IMAGE_ASPECT_DEPTH_BIT;
}
if ((parameter & NV097_CLEAR_SURFACE_STENCIL) &&
(r->zeta_binding->host_fmt.aspect & VK_IMAGE_ASPECT_STENCIL_BIT)) {
aspect |= VK_IMAGE_ASPECT_STENCIL_BIT;
}
attachments[num_attachments++] = (VkClearAttachment){
.aspectMask = aspect,
.clearValue.depthStencil.depth = depth_value,
.clearValue.depthStencil.stencil = stencil_value,
};
}
if (num_attachments) {
vkCmdClearAttachments(r->command_buffer, num_attachments, attachments,
1, &clear_rect);
}
end_draw(pg);
pg->clearing = false;
pgraph_vk_set_surface_dirty(pg, write_color, write_zeta);
NV2A_VK_DGROUP_END();
}
#if 0
static void pgraph_vk_debug_attrs(NV2AState *d)
{
for (int vertex_idx = 0; vertex_idx < pg->draw_arrays_count[i]; vertex_idx++) {
NV2A_VK_DGROUP_BEGIN("Vertex %d+%d", pg->draw_arrays_start[i], vertex_idx);
for (int attr_idx = 0; attr_idx < NV2A_VERTEXSHADER_ATTRIBUTES; attr_idx++) {
VertexAttribute *attr = &pg->vertex_attributes[attr_idx];
if (attr->count) {
char *p = (char *)d->vram_ptr + r->attribute_offsets[attr_idx] + (pg->draw_arrays_start[i] + vertex_idx) * attr->stride;
NV2A_VK_DGROUP_BEGIN("Attribute %d data at %tx", attr_idx, (ptrdiff_t)(p - (char*)d->vram_ptr));
for (int count_idx = 0; count_idx < attr->count; count_idx++) {
switch (attr->format) {
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F:
NV2A_VK_DPRINTF("[%d] %f", count_idx, *(float*)p);
p += sizeof(float);
break;
default:
assert(0);
break;
}
}
NV2A_VK_DGROUP_END();
}
}
NV2A_VK_DGROUP_END();
}
}
#endif
static void bind_vertex_buffer(PGRAPHState *pg, uint16_t inline_map,
VkDeviceSize offset)
{
PGRAPHVkState *r = pg->vk_renderer_state;
if (r->num_active_vertex_binding_descriptions == 0) {
return;
}
VkBuffer buffers[NV2A_VERTEXSHADER_ATTRIBUTES];
VkDeviceSize offsets[NV2A_VERTEXSHADER_ATTRIBUTES];
for (int i = 0; i < r->num_active_vertex_binding_descriptions; i++) {
int attr_idx = r->vertex_attribute_descriptions[i].location;
int buffer_idx = (inline_map & (1 << attr_idx)) ? BUFFER_VERTEX_INLINE :
BUFFER_VERTEX_RAM;
buffers[i] = r->storage_buffers[buffer_idx].buffer;
offsets[i] = offset + r->vertex_attribute_offsets[attr_idx];
}
vkCmdBindVertexBuffers(r->command_buffer, 0,
r->num_active_vertex_binding_descriptions, buffers,
offsets);
}
static void bind_inline_vertex_buffer(PGRAPHState *pg, VkDeviceSize offset)
{
bind_vertex_buffer(pg, 0xffff, offset);
}
void pgraph_vk_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta)
{
NV2A_DPRINTF("pgraph_set_surface_dirty(%d, %d) -- %d %d\n", color, zeta,
pgraph_color_write_enabled(pg), pgraph_zeta_write_enabled(pg));
PGRAPHVkState *r = pg->vk_renderer_state;
/* FIXME: Does this apply to CLEARs too? */
color = color && pgraph_color_write_enabled(pg);
zeta = zeta && pgraph_zeta_write_enabled(pg);
pg->surface_color.draw_dirty |= color;
pg->surface_zeta.draw_dirty |= zeta;
if (r->color_binding) {
r->color_binding->draw_dirty |= color;
r->color_binding->frame_time = pg->frame_time;
r->color_binding->cleared = false;
}
if (r->zeta_binding) {
r->zeta_binding->draw_dirty |= zeta;
r->zeta_binding->frame_time = pg->frame_time;
r->zeta_binding->cleared = false;
}
}
static bool ensure_buffer_space(PGRAPHState *pg, int index, VkDeviceSize size)
{
if (!pgraph_vk_buffer_has_space_for(pg, index, size, 1)) {
pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE);
return true;
}
return false;
}
static void get_size_and_count_for_format(VkFormat fmt, size_t *size, size_t *count)
{
static const struct {
size_t size;
size_t count;
} table[] = {
[VK_FORMAT_R8_UNORM] = { 1, 1 },
[VK_FORMAT_R8G8_UNORM] = { 1, 2 },
[VK_FORMAT_R8G8B8_UNORM] = { 1, 3 },
[VK_FORMAT_R8G8B8A8_UNORM] = { 1, 4 },
[VK_FORMAT_R16_SNORM] = { 2, 1 },
[VK_FORMAT_R16G16_SNORM] = { 2, 2 },
[VK_FORMAT_R16G16B16_SNORM] = { 2, 3 },
[VK_FORMAT_R16G16B16A16_SNORM] = { 2, 4 },
[VK_FORMAT_R16_SSCALED] = { 2, 1 },
[VK_FORMAT_R16G16_SSCALED] = { 2, 2 },
[VK_FORMAT_R16G16B16_SSCALED] = { 2, 3 },
[VK_FORMAT_R16G16B16A16_SSCALED] = { 2, 4 },
[VK_FORMAT_R32_SFLOAT] = { 4, 1 },
[VK_FORMAT_R32G32_SFLOAT] = { 4, 2 },
[VK_FORMAT_R32G32B32_SFLOAT] = { 4, 3 },
[VK_FORMAT_R32G32B32A32_SFLOAT] = { 4, 4 },
[VK_FORMAT_R32_SINT] = { 4, 1 },
};
assert(fmt < ARRAY_SIZE(table));
assert(table[fmt].size);
*size = table[fmt].size;
*count = table[fmt].count;
}
typedef struct VertexBufferRemap {
uint16_t attributes;
size_t buffer_space_required;
struct {
VkDeviceAddress offset;
VkDeviceSize stride;
} map[NV2A_VERTEXSHADER_ATTRIBUTES];
} VertexBufferRemap;
static VertexBufferRemap remap_unaligned_attributes(PGRAPHState *pg,
uint32_t num_vertices)
{
PGRAPHVkState *r = pg->vk_renderer_state;
VertexBufferRemap remap = {0};
VkDeviceAddress output_offset = 0;
for (int attr_id = 0; attr_id < NV2A_VERTEXSHADER_ATTRIBUTES; attr_id++) {
int desc_loc = r->vertex_attribute_to_description_location[attr_id];
if (desc_loc < 0) {
continue;
}
VkVertexInputBindingDescription *desc =
&r->vertex_binding_descriptions[desc_loc];
VkVertexInputAttributeDescription *attr =
&r->vertex_attribute_descriptions[desc_loc];
size_t element_size, element_count;
get_size_and_count_for_format(attr->format, &element_size, &element_count);
bool offset_valid =
(r->vertex_attribute_offsets[attr_id] % element_size == 0);
bool stride_valid = (desc->stride % element_size == 0);
if (offset_valid && stride_valid) {
continue;
}
remap.attributes |= 1 << attr_id;
remap.map[attr_id].offset = ROUND_UP(output_offset, element_size);
remap.map[attr_id].stride = element_size * element_count;
// fprintf(stderr,
// "attr %02d remapped: "
// "%08" HWADDR_PRIx "->%08" HWADDR_PRIx " "
// "stride=%d->%zd\n",
// attr_id, r->vertex_attribute_offsets[attr_id],
// remap.map[attr_id].offset, desc->stride,
// remap.map[attr_id].stride);
output_offset =
remap.map[attr_id].offset + remap.map[attr_id].stride * num_vertices;
}
remap.buffer_space_required = output_offset;
return remap;
}
static void copy_remapped_attributes_to_inline_buffer(PGRAPHState *pg,
VertexBufferRemap remap,
uint32_t start_vertex,
uint32_t num_vertices)
{
NV2AState *d = container_of(pg, NV2AState, pgraph);
PGRAPHVkState *r = pg->vk_renderer_state;
StorageBuffer *buffer = &r->storage_buffers[BUFFER_VERTEX_INLINE_STAGING];
r->vertex_buffer_inline = remap.attributes;
if (!remap.attributes) {
return;
}
VkDeviceSize starting_offset = ROUND_UP(buffer->buffer_offset, 16);
size_t total_space_required =
(starting_offset - buffer->buffer_offset) + remap.buffer_space_required;
ensure_buffer_space(pg, BUFFER_VERTEX_INLINE_STAGING, total_space_required);
assert(pgraph_vk_buffer_has_space_for(pg, BUFFER_VERTEX_INLINE_STAGING,
total_space_required, 1));
buffer->buffer_offset = starting_offset; // Aligned
// FIXME: SIMD memcpy
// FIXME: Caching
// FIXME: Account for only what is drawn
assert(start_vertex == 0);
assert(buffer->mapped);
// Copy vertex data
for (int attr_id = 0; attr_id < NV2A_VERTEXSHADER_ATTRIBUTES; attr_id++) {
if (!(remap.attributes & (1 << attr_id))) {
continue;
}
int bind_desc_loc =
r->vertex_attribute_to_description_location[attr_id];
assert(bind_desc_loc >= 0);
VkVertexInputBindingDescription *bind_desc =
&r->vertex_binding_descriptions[bind_desc_loc];
VkDeviceSize attr_buffer_offset =
buffer->buffer_offset + remap.map[attr_id].offset;
uint8_t *out_ptr = buffer->mapped + attr_buffer_offset;
uint8_t *in_ptr = d->vram_ptr + r->vertex_attribute_offsets[attr_id];
for (int vertex_id = 0; vertex_id < num_vertices; vertex_id++) {
memcpy(out_ptr, in_ptr, remap.map[attr_id].stride);
out_ptr += remap.map[attr_id].stride;
in_ptr += bind_desc->stride;
}
r->vertex_attribute_offsets[attr_id] = attr_buffer_offset;
bind_desc->stride = remap.map[attr_id].stride;
}
buffer->buffer_offset += remap.buffer_space_required;
}
void pgraph_vk_flush_draw(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHVkState *r = pg->vk_renderer_state;
if (!(r->color_binding || r->zeta_binding)) {
NV2A_VK_DPRINTF("No binding present!!!\n");
return;
}
r->num_vertex_ram_buffer_syncs = 0;
if (pg->draw_arrays_length) {
NV2A_VK_DGROUP_BEGIN("Draw Arrays");
nv2a_profile_inc_counter(NV2A_PROF_DRAW_ARRAYS);
assert(pg->inline_elements_length == 0);
assert(pg->inline_buffer_length == 0);
assert(pg->inline_array_length == 0);
pgraph_vk_bind_vertex_attributes(d, pg->draw_arrays_min_start,
pg->draw_arrays_max_count - 1, false,
0, pg->draw_arrays_max_count - 1);
uint32_t min_element = INT_MAX;
uint32_t max_element = 0;
for (int i = 0; i < pg->draw_arrays_length; i++) {
min_element = MIN(pg->draw_arrays_start[i], min_element);
max_element = MAX(max_element, pg->draw_arrays_start[i] + pg->draw_arrays_count[i]);
}
sync_vertex_ram_buffer(pg);
VertexBufferRemap remap = remap_unaligned_attributes(pg, max_element);
copy_remapped_attributes_to_inline_buffer(pg, remap, 0, max_element);
begin_pre_draw(pg);
begin_draw(pg);
bind_vertex_buffer(pg, remap.attributes, 0);
for (int i = 0; i < pg->draw_arrays_length; i++) {
uint32_t start = pg->draw_arrays_start[i],
count = pg->draw_arrays_count[i];
NV2A_VK_DPRINTF("- [%d] Start:%d Count:%d", i, start, count);
vkCmdDraw(r->command_buffer, count, 1, start, 0);
}
end_draw(pg);
NV2A_VK_DGROUP_END();
} else if (pg->inline_elements_length) {
NV2A_VK_DGROUP_BEGIN("Inline Elements");
assert(pg->inline_buffer_length == 0);
assert(pg->inline_array_length == 0);
nv2a_profile_inc_counter(NV2A_PROF_INLINE_ELEMENTS);
size_t index_data_size =
pg->inline_elements_length * sizeof(pg->inline_elements[0]);
ensure_buffer_space(pg, BUFFER_INDEX_STAGING, index_data_size);
uint32_t min_element = (uint32_t)-1;
uint32_t max_element = 0;
for (int i = 0; i < pg->inline_elements_length; i++) {
max_element = MAX(pg->inline_elements[i], max_element);
min_element = MIN(pg->inline_elements[i], min_element);
}
pgraph_vk_bind_vertex_attributes(
d, min_element, max_element, false, 0,
pg->inline_elements[pg->inline_elements_length - 1]);
sync_vertex_ram_buffer(pg);
VertexBufferRemap remap = remap_unaligned_attributes(pg, max_element + 1);
copy_remapped_attributes_to_inline_buffer(pg, remap, 0, max_element + 1);
begin_pre_draw(pg);
VkDeviceSize buffer_offset = pgraph_vk_update_index_buffer(
pg, pg->inline_elements, index_data_size);
begin_draw(pg);
bind_vertex_buffer(pg, remap.attributes, 0);
vkCmdBindIndexBuffer(r->command_buffer,
r->storage_buffers[BUFFER_INDEX].buffer,
buffer_offset, VK_INDEX_TYPE_UINT32);
vkCmdDrawIndexed(r->command_buffer, pg->inline_elements_length, 1, 0, 0,
0);
end_draw(pg);
NV2A_VK_DGROUP_END();
} else if (pg->inline_buffer_length) {
NV2A_VK_DGROUP_BEGIN("Inline Buffer");
nv2a_profile_inc_counter(NV2A_PROF_INLINE_BUFFERS);
assert(pg->inline_array_length == 0);
size_t vertex_data_size = pg->inline_buffer_length * sizeof(float) * 4;
void *data[NV2A_VERTEXSHADER_ATTRIBUTES];
size_t sizes[NV2A_VERTEXSHADER_ATTRIBUTES];
size_t offset = 0;
pgraph_vk_bind_vertex_attributes_inline(d);
for (int i = 0; i < r->num_active_vertex_attribute_descriptions; i++) {
int attr_index = r->vertex_attribute_descriptions[i].location;
VertexAttribute *attr = &pg->vertex_attributes[attr_index];
r->vertex_attribute_offsets[attr_index] = offset;
data[i] = attr->inline_buffer;
sizes[i] = vertex_data_size;
attr->inline_buffer_populated = false;
offset += vertex_data_size;
}
ensure_buffer_space(pg, BUFFER_VERTEX_INLINE_STAGING, offset);
begin_pre_draw(pg);
VkDeviceSize buffer_offset = pgraph_vk_update_vertex_inline_buffer(
pg, data, sizes, r->num_active_vertex_attribute_descriptions);
begin_draw(pg);
bind_inline_vertex_buffer(pg, buffer_offset);
vkCmdDraw(r->command_buffer, pg->inline_buffer_length, 1, 0, 0);
end_draw(pg);
NV2A_VK_DGROUP_END();
} else if (pg->inline_array_length) {
NV2A_VK_DGROUP_BEGIN("Inline Array");
nv2a_profile_inc_counter(NV2A_PROF_INLINE_ARRAYS);
VkDeviceSize inline_array_data_size = pg->inline_array_length * 4;
ensure_buffer_space(pg, BUFFER_VERTEX_INLINE_STAGING,
inline_array_data_size);
unsigned int offset = 0;
for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
VertexAttribute *attr = &pg->vertex_attributes[i];
if (attr->count == 0) {
continue;
}
/* FIXME: Double check */
offset = ROUND_UP(offset, attr->size);
attr->inline_array_offset = offset;
NV2A_DPRINTF("bind inline attribute %d size=%d, count=%d\n", i,
attr->size, attr->count);
offset += attr->size * attr->count;
offset = ROUND_UP(offset, attr->size);
}
unsigned int vertex_size = offset;
unsigned int index_count = pg->inline_array_length * 4 / vertex_size;
NV2A_DPRINTF("draw inline array %d, %d\n", vertex_size, index_count);
pgraph_vk_bind_vertex_attributes(d, 0, index_count - 1, true,
vertex_size, index_count - 1);
begin_pre_draw(pg);
void *inline_array_data = pg->inline_array;
VkDeviceSize buffer_offset = pgraph_vk_update_vertex_inline_buffer(
pg, &inline_array_data, &inline_array_data_size, 1);
begin_draw(pg);
bind_inline_vertex_buffer(pg, buffer_offset);
vkCmdDraw(r->command_buffer, index_count, 1, 0, 0);
end_draw(pg);
NV2A_VK_DGROUP_END();
} else {
NV2A_VK_DPRINTF("EMPTY NV097_SET_BEGIN_END");
NV2A_UNCONFIRMED("EMPTY NV097_SET_BEGIN_END");
}
}