mirror of https://github.com/xemu-project/xemu.git
2213 lines
79 KiB
C
2213 lines
79 KiB
C
/*
|
|
* Geforce NV2A PGRAPH Vulkan Renderer
|
|
*
|
|
* Copyright (c) 2024 Matt Borgerson
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "qemu/fast-hash.h"
|
|
#include "renderer.h"
|
|
|
|
void pgraph_vk_draw_begin(NV2AState *d)
|
|
{
|
|
PGRAPHState *pg = &d->pgraph;
|
|
|
|
NV2A_VK_DPRINTF("NV097_SET_BEGIN_END: 0x%x", d->pgraph.primitive_mode);
|
|
|
|
uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0);
|
|
bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE;
|
|
bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE;
|
|
bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE;
|
|
bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE;
|
|
bool color_write = mask_alpha || mask_red || mask_green || mask_blue;
|
|
bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE;
|
|
bool stencil_test =
|
|
pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE;
|
|
bool is_nop_draw = !(color_write || depth_test || stencil_test);
|
|
|
|
pgraph_vk_surface_update(d, true, true, depth_test || stencil_test);
|
|
|
|
if (is_nop_draw) {
|
|
NV2A_VK_DPRINTF("nop!");
|
|
return;
|
|
}
|
|
}
|
|
|
|
static VkPrimitiveTopology get_primitive_topology(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
int polygon_mode = r->shader_binding->state.polygon_front_mode;
|
|
int primitive_mode = r->shader_binding->state.primitive_mode;
|
|
|
|
if (polygon_mode == POLY_MODE_POINT) {
|
|
return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
|
|
}
|
|
|
|
// FIXME: Replace with LUT
|
|
switch (primitive_mode) {
|
|
case PRIM_TYPE_POINTS:
|
|
return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
|
|
case PRIM_TYPE_LINES:
|
|
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
|
|
case PRIM_TYPE_LINE_LOOP:
|
|
// FIXME: line strips, except that the first and last vertices are also used as a line
|
|
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
|
|
case PRIM_TYPE_LINE_STRIP:
|
|
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
|
|
case PRIM_TYPE_TRIANGLES:
|
|
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
|
case PRIM_TYPE_TRIANGLE_STRIP:
|
|
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
|
|
case PRIM_TYPE_TRIANGLE_FAN:
|
|
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
|
|
case PRIM_TYPE_QUADS:
|
|
return VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY;
|
|
case PRIM_TYPE_QUAD_STRIP:
|
|
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY;
|
|
case PRIM_TYPE_POLYGON:
|
|
if (polygon_mode == POLY_MODE_LINE) {
|
|
return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; // FIXME
|
|
} else if (polygon_mode == POLY_MODE_FILL) {
|
|
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
|
|
}
|
|
assert(!"PRIM_TYPE_POLYGON with invalid polygon_mode");
|
|
return 0;
|
|
default:
|
|
assert(!"Invalid primitive_mode");
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
static void pipeline_cache_entry_init(Lru *lru, LruNode *node, void *state)
|
|
{
|
|
PipelineBinding *snode = container_of(node, PipelineBinding, node);
|
|
snode->layout = VK_NULL_HANDLE;
|
|
snode->pipeline = VK_NULL_HANDLE;
|
|
snode->draw_time = 0;
|
|
}
|
|
|
|
static void pipeline_cache_entry_post_evict(Lru *lru, LruNode *node)
|
|
{
|
|
PGRAPHVkState *r = container_of(lru, PGRAPHVkState, pipeline_cache);
|
|
PipelineBinding *snode = container_of(node, PipelineBinding, node);
|
|
|
|
assert((!r->in_command_buffer ||
|
|
snode->draw_time < r->command_buffer_start_time) &&
|
|
"Pipeline evicted while in use!");
|
|
|
|
vkDestroyPipeline(r->device, snode->pipeline, NULL);
|
|
snode->pipeline = VK_NULL_HANDLE;
|
|
|
|
vkDestroyPipelineLayout(r->device, snode->layout, NULL);
|
|
snode->layout = VK_NULL_HANDLE;
|
|
}
|
|
|
|
static bool pipeline_cache_entry_compare(Lru *lru, LruNode *node, void *key)
|
|
{
|
|
PipelineBinding *snode = container_of(node, PipelineBinding, node);
|
|
return memcmp(&snode->key, key, sizeof(PipelineKey));
|
|
}
|
|
|
|
static void init_pipeline_cache(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
VkPipelineCacheCreateInfo cache_info = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO,
|
|
.flags = 0,
|
|
.initialDataSize = 0,
|
|
.pInitialData = NULL,
|
|
.pNext = NULL,
|
|
};
|
|
VK_CHECK(vkCreatePipelineCache(r->device, &cache_info, NULL,
|
|
&r->vk_pipeline_cache));
|
|
|
|
const size_t pipeline_cache_size = 2048;
|
|
lru_init(&r->pipeline_cache);
|
|
r->pipeline_cache_entries =
|
|
g_malloc_n(pipeline_cache_size, sizeof(PipelineBinding));
|
|
assert(r->pipeline_cache_entries != NULL);
|
|
for (int i = 0; i < pipeline_cache_size; i++) {
|
|
lru_add_free(&r->pipeline_cache, &r->pipeline_cache_entries[i].node);
|
|
}
|
|
|
|
r->pipeline_cache.init_node = pipeline_cache_entry_init;
|
|
r->pipeline_cache.compare_nodes = pipeline_cache_entry_compare;
|
|
r->pipeline_cache.post_node_evict = pipeline_cache_entry_post_evict;
|
|
}
|
|
|
|
static void finalize_pipeline_cache(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
lru_flush(&r->pipeline_cache);
|
|
g_free(r->pipeline_cache_entries);
|
|
r->pipeline_cache_entries = NULL;
|
|
|
|
vkDestroyPipelineCache(r->device, r->vk_pipeline_cache, NULL);
|
|
}
|
|
|
|
static char const *const quad_glsl =
|
|
"#version 450\n"
|
|
"void main()\n"
|
|
"{\n"
|
|
" float x = -1.0 + float((gl_VertexIndex & 1) << 2);\n"
|
|
" float y = -1.0 + float((gl_VertexIndex & 2) << 1);\n"
|
|
" gl_Position = vec4(x, y, 0, 1);\n"
|
|
"}\n";
|
|
|
|
static char const *const solid_frag_glsl =
|
|
"#version 450\n"
|
|
"layout(location = 0) out vec4 fragColor;\n"
|
|
"void main()\n"
|
|
"{\n"
|
|
" fragColor = vec4(1.0);"
|
|
"}\n";
|
|
|
|
static void init_clear_shaders(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
r->quad_vert_module = pgraph_vk_create_shader_module_from_glsl(
|
|
r, VK_SHADER_STAGE_VERTEX_BIT, quad_glsl);
|
|
r->solid_frag_module = pgraph_vk_create_shader_module_from_glsl(
|
|
r, VK_SHADER_STAGE_FRAGMENT_BIT, solid_frag_glsl);
|
|
}
|
|
|
|
static void finalize_clear_shaders(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
pgraph_vk_destroy_shader_module(r, r->quad_vert_module);
|
|
pgraph_vk_destroy_shader_module(r, r->solid_frag_module);
|
|
}
|
|
|
|
static void init_render_passes(PGRAPHVkState *r)
|
|
{
|
|
r->render_passes = g_array_new(false, false, sizeof(RenderPass));
|
|
}
|
|
|
|
static void finalize_render_passes(PGRAPHVkState *r)
|
|
{
|
|
for (int i = 0; i < r->render_passes->len; i++) {
|
|
RenderPass *p = &g_array_index(r->render_passes, RenderPass, i);
|
|
vkDestroyRenderPass(r->device, p->render_pass, NULL);
|
|
}
|
|
g_array_free(r->render_passes, true);
|
|
r->render_passes = NULL;
|
|
}
|
|
|
|
void pgraph_vk_init_pipelines(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
init_pipeline_cache(pg);
|
|
init_clear_shaders(pg);
|
|
init_render_passes(r);
|
|
|
|
VkSemaphoreCreateInfo semaphore_info = {
|
|
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO
|
|
};
|
|
VK_CHECK(vkCreateSemaphore(r->device, &semaphore_info, NULL,
|
|
&r->command_buffer_semaphore));
|
|
|
|
VkFenceCreateInfo fence_info = {
|
|
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
|
|
};
|
|
VK_CHECK(
|
|
vkCreateFence(r->device, &fence_info, NULL, &r->command_buffer_fence));
|
|
}
|
|
|
|
void pgraph_vk_finalize_pipelines(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
finalize_clear_shaders(pg);
|
|
finalize_pipeline_cache(pg);
|
|
finalize_render_passes(r);
|
|
|
|
vkDestroyFence(r->device, r->command_buffer_fence, NULL);
|
|
vkDestroySemaphore(r->device, r->command_buffer_semaphore, NULL);
|
|
}
|
|
|
|
static void init_render_pass_state(PGRAPHState *pg, RenderPassState *state)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
state->color_format = r->color_binding ?
|
|
r->color_binding->host_fmt.vk_format :
|
|
VK_FORMAT_UNDEFINED;
|
|
state->zeta_format = r->zeta_binding ? r->zeta_binding->host_fmt.vk_format :
|
|
VK_FORMAT_UNDEFINED;
|
|
}
|
|
|
|
static VkRenderPass create_render_pass(PGRAPHVkState *r, RenderPassState *state)
|
|
{
|
|
NV2A_VK_DPRINTF("Creating render pass");
|
|
|
|
VkAttachmentDescription attachments[2];
|
|
int num_attachments = 0;
|
|
|
|
bool color = state->color_format != VK_FORMAT_UNDEFINED;
|
|
bool zeta = state->zeta_format != VK_FORMAT_UNDEFINED;
|
|
|
|
VkAttachmentReference color_reference;
|
|
if (color) {
|
|
attachments[num_attachments] = (VkAttachmentDescription){
|
|
.format = state->color_format,
|
|
.samples = VK_SAMPLE_COUNT_1_BIT,
|
|
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
|
|
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
|
|
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
|
|
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
|
|
.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
|
.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
|
};
|
|
color_reference = (VkAttachmentReference){
|
|
num_attachments, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
|
|
};
|
|
num_attachments++;
|
|
}
|
|
|
|
VkAttachmentReference depth_reference;
|
|
if (zeta) {
|
|
attachments[num_attachments] = (VkAttachmentDescription){
|
|
.format = state->zeta_format,
|
|
.samples = VK_SAMPLE_COUNT_1_BIT,
|
|
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
|
|
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
|
|
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
|
|
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
|
|
.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
|
|
.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
|
|
};
|
|
depth_reference = (VkAttachmentReference){
|
|
num_attachments, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
|
|
};
|
|
num_attachments++;
|
|
}
|
|
|
|
VkSubpassDependency dependency = {
|
|
.srcSubpass = VK_SUBPASS_EXTERNAL,
|
|
};
|
|
|
|
if (color) {
|
|
dependency.srcStageMask |=
|
|
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
|
dependency.srcAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
|
|
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
|
dependency.dstStageMask |=
|
|
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
|
dependency.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
|
|
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
|
}
|
|
|
|
if (zeta) {
|
|
dependency.srcStageMask |=
|
|
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
|
|
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
|
|
dependency.srcAccessMask |=
|
|
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
|
|
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
|
dependency.dstStageMask |=
|
|
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
|
|
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
|
|
dependency.dstAccessMask |=
|
|
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
|
|
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
|
}
|
|
|
|
VkSubpassDescription subpass = {
|
|
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
|
|
.colorAttachmentCount = color ? 1 : 0,
|
|
.pColorAttachments = color ? &color_reference : NULL,
|
|
.pDepthStencilAttachment = zeta ? &depth_reference : NULL,
|
|
};
|
|
|
|
VkRenderPassCreateInfo renderpass_create_info = {
|
|
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
|
|
.attachmentCount = num_attachments,
|
|
.pAttachments = attachments,
|
|
.subpassCount = 1,
|
|
.pSubpasses = &subpass,
|
|
.dependencyCount = 1,
|
|
.pDependencies = &dependency,
|
|
};
|
|
VkRenderPass render_pass;
|
|
VK_CHECK(vkCreateRenderPass(r->device, &renderpass_create_info, NULL,
|
|
&render_pass));
|
|
return render_pass;
|
|
}
|
|
|
|
static VkRenderPass add_new_render_pass(PGRAPHVkState *r, RenderPassState *state)
|
|
{
|
|
RenderPass new_pass;
|
|
memcpy(&new_pass.state, state, sizeof(*state));
|
|
new_pass.render_pass = create_render_pass(r, state);
|
|
g_array_append_vals(r->render_passes, &new_pass, 1);
|
|
return new_pass.render_pass;
|
|
}
|
|
|
|
static VkRenderPass get_render_pass(PGRAPHVkState *r, RenderPassState *state)
|
|
{
|
|
for (int i = 0; i < r->render_passes->len; i++) {
|
|
RenderPass *p = &g_array_index(r->render_passes, RenderPass, i);
|
|
if (!memcmp(&p->state, state, sizeof(*state))) {
|
|
return p->render_pass;
|
|
}
|
|
}
|
|
return add_new_render_pass(r, state);
|
|
}
|
|
|
|
static void create_frame_buffer(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
NV2A_VK_DPRINTF("Creating framebuffer");
|
|
|
|
assert(r->color_binding || r->zeta_binding);
|
|
|
|
if (r->framebuffer_index >= ARRAY_SIZE(r->framebuffers)) {
|
|
pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE);
|
|
}
|
|
|
|
VkImageView attachments[2];
|
|
int attachment_count = 0;
|
|
|
|
if (r->color_binding) {
|
|
attachments[attachment_count++] = r->color_binding->image_view;
|
|
}
|
|
if (r->zeta_binding) {
|
|
attachments[attachment_count++] = r->zeta_binding->image_view;
|
|
}
|
|
|
|
SurfaceBinding *binding = r->color_binding ? : r->zeta_binding;
|
|
|
|
VkFramebufferCreateInfo create_info = {
|
|
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
|
|
.renderPass = r->render_pass,
|
|
.attachmentCount = attachment_count,
|
|
.pAttachments = attachments,
|
|
.width = binding->width,
|
|
.height = binding->height,
|
|
.layers = 1,
|
|
};
|
|
pgraph_apply_scaling_factor(pg, &create_info.width, &create_info.height);
|
|
VK_CHECK(vkCreateFramebuffer(r->device, &create_info, NULL,
|
|
&r->framebuffers[r->framebuffer_index++]));
|
|
}
|
|
|
|
static void destroy_framebuffers(PGRAPHState *pg)
|
|
{
|
|
NV2A_VK_DPRINTF("Destroying framebuffer");
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
for (int i = 0; i < r->framebuffer_index; i++) {
|
|
vkDestroyFramebuffer(r->device, r->framebuffers[i], NULL);
|
|
r->framebuffers[i] = VK_NULL_HANDLE;
|
|
}
|
|
r->framebuffer_index = 0;
|
|
}
|
|
|
|
static void create_clear_pipeline(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
NV2A_VK_DGROUP_BEGIN("Creating clear pipeline");
|
|
|
|
PipelineKey key;
|
|
memset(&key, 0, sizeof(key));
|
|
key.clear = true;
|
|
init_render_pass_state(pg, &key.render_pass_state);
|
|
|
|
key.regs[0] = r->clear_parameter;
|
|
|
|
uint64_t hash = fast_hash((void *)&key, sizeof(key));
|
|
LruNode *node = lru_lookup(&r->pipeline_cache, hash, &key);
|
|
PipelineBinding *snode = container_of(node, PipelineBinding, node);
|
|
|
|
if (snode->pipeline != VK_NULL_HANDLE) {
|
|
NV2A_VK_DPRINTF("Cache hit");
|
|
r->pipeline_binding_changed = r->pipeline_binding != snode;
|
|
r->pipeline_binding = snode;
|
|
NV2A_VK_DGROUP_END();
|
|
return;
|
|
}
|
|
|
|
NV2A_VK_DPRINTF("Cache miss");
|
|
nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_GEN);
|
|
memcpy(&snode->key, &key, sizeof(key));
|
|
|
|
bool clear_any_color_channels =
|
|
r->clear_parameter & NV097_CLEAR_SURFACE_COLOR;
|
|
bool clear_all_color_channels =
|
|
(r->clear_parameter & NV097_CLEAR_SURFACE_COLOR) ==
|
|
(NV097_CLEAR_SURFACE_R | NV097_CLEAR_SURFACE_G | NV097_CLEAR_SURFACE_B |
|
|
NV097_CLEAR_SURFACE_A);
|
|
bool partial_color_clear =
|
|
clear_any_color_channels && !clear_all_color_channels;
|
|
|
|
int num_active_shader_stages = 0;
|
|
VkPipelineShaderStageCreateInfo shader_stages[2];
|
|
shader_stages[num_active_shader_stages++] =
|
|
(VkPipelineShaderStageCreateInfo){
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
|
.stage = VK_SHADER_STAGE_VERTEX_BIT,
|
|
.module = r->quad_vert_module->module,
|
|
.pName = "main",
|
|
};
|
|
if (partial_color_clear) {
|
|
shader_stages[num_active_shader_stages++] =
|
|
(VkPipelineShaderStageCreateInfo){
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
|
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
|
|
.module = r->solid_frag_module->module,
|
|
.pName = "main",
|
|
};
|
|
}
|
|
|
|
VkPipelineVertexInputStateCreateInfo vertex_input = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
|
|
};
|
|
|
|
VkPipelineInputAssemblyStateCreateInfo input_assembly = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
|
|
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
|
|
.primitiveRestartEnable = VK_FALSE,
|
|
};
|
|
|
|
VkPipelineViewportStateCreateInfo viewport_state = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
|
|
.viewportCount = 1,
|
|
.scissorCount = 1,
|
|
};
|
|
|
|
VkPipelineRasterizationStateCreateInfo rasterizer = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
|
|
.depthClampEnable = VK_FALSE,
|
|
.rasterizerDiscardEnable = VK_FALSE,
|
|
.polygonMode = VK_POLYGON_MODE_FILL,
|
|
.lineWidth = 1.0f,
|
|
.cullMode = VK_CULL_MODE_BACK_BIT,
|
|
.frontFace = VK_FRONT_FACE_CLOCKWISE,
|
|
.depthBiasEnable = VK_FALSE,
|
|
};
|
|
|
|
VkPipelineMultisampleStateCreateInfo multisampling = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
|
.sampleShadingEnable = VK_FALSE,
|
|
.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
|
|
};
|
|
|
|
VkPipelineDepthStencilStateCreateInfo depth_stencil = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
|
|
.depthTestEnable = VK_TRUE,
|
|
.depthWriteEnable =
|
|
(r->clear_parameter & NV097_CLEAR_SURFACE_Z) ? VK_TRUE : VK_FALSE,
|
|
.depthCompareOp = VK_COMPARE_OP_ALWAYS,
|
|
.depthBoundsTestEnable = VK_FALSE,
|
|
};
|
|
|
|
if (r->clear_parameter & NV097_CLEAR_SURFACE_STENCIL) {
|
|
depth_stencil.stencilTestEnable = VK_TRUE;
|
|
depth_stencil.front.failOp = VK_STENCIL_OP_REPLACE;
|
|
depth_stencil.front.passOp = VK_STENCIL_OP_REPLACE;
|
|
depth_stencil.front.depthFailOp = VK_STENCIL_OP_REPLACE;
|
|
depth_stencil.front.compareOp = VK_COMPARE_OP_ALWAYS;
|
|
depth_stencil.front.compareMask = 0xff;
|
|
depth_stencil.front.writeMask = 0xff;
|
|
depth_stencil.front.reference = 0xff;
|
|
depth_stencil.back = depth_stencil.front;
|
|
}
|
|
|
|
VkColorComponentFlags write_mask = 0;
|
|
if (r->clear_parameter & NV097_CLEAR_SURFACE_R)
|
|
write_mask |= VK_COLOR_COMPONENT_R_BIT;
|
|
if (r->clear_parameter & NV097_CLEAR_SURFACE_G)
|
|
write_mask |= VK_COLOR_COMPONENT_G_BIT;
|
|
if (r->clear_parameter & NV097_CLEAR_SURFACE_B)
|
|
write_mask |= VK_COLOR_COMPONENT_B_BIT;
|
|
if (r->clear_parameter & NV097_CLEAR_SURFACE_A)
|
|
write_mask |= VK_COLOR_COMPONENT_A_BIT;
|
|
|
|
VkPipelineColorBlendAttachmentState color_blend_attachment = {
|
|
.colorWriteMask = write_mask,
|
|
.blendEnable = VK_TRUE,
|
|
.colorBlendOp = VK_BLEND_OP_ADD,
|
|
.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
|
|
.srcColorBlendFactor = VK_BLEND_FACTOR_CONSTANT_COLOR,
|
|
.alphaBlendOp = VK_BLEND_OP_ADD,
|
|
.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
|
|
.srcAlphaBlendFactor = VK_BLEND_FACTOR_CONSTANT_ALPHA,
|
|
};
|
|
|
|
VkPipelineColorBlendStateCreateInfo color_blending = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
|
|
.logicOpEnable = VK_FALSE,
|
|
.logicOp = VK_LOGIC_OP_COPY,
|
|
.attachmentCount = r->color_binding ? 1 : 0,
|
|
.pAttachments = r->color_binding ? &color_blend_attachment : NULL,
|
|
};
|
|
|
|
VkDynamicState dynamic_states[] = { VK_DYNAMIC_STATE_VIEWPORT,
|
|
VK_DYNAMIC_STATE_SCISSOR,
|
|
VK_DYNAMIC_STATE_BLEND_CONSTANTS };
|
|
VkPipelineDynamicStateCreateInfo dynamic_state = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
|
|
.dynamicStateCount = partial_color_clear ? 3 : 2,
|
|
.pDynamicStates = dynamic_states,
|
|
};
|
|
|
|
VkPipelineLayoutCreateInfo pipeline_layout_info = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
|
};
|
|
|
|
VkPipelineLayout layout;
|
|
VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL,
|
|
&layout));
|
|
|
|
VkGraphicsPipelineCreateInfo pipeline_info = {
|
|
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
|
.stageCount = num_active_shader_stages,
|
|
.pStages = shader_stages,
|
|
.pVertexInputState = &vertex_input,
|
|
.pInputAssemblyState = &input_assembly,
|
|
.pViewportState = &viewport_state,
|
|
.pRasterizationState = &rasterizer,
|
|
.pMultisampleState = &multisampling,
|
|
.pDepthStencilState = r->zeta_binding ? &depth_stencil : NULL,
|
|
.pColorBlendState = &color_blending,
|
|
.pDynamicState = &dynamic_state,
|
|
.layout = layout,
|
|
.renderPass = get_render_pass(r, &key.render_pass_state),
|
|
.subpass = 0,
|
|
.basePipelineHandle = VK_NULL_HANDLE,
|
|
};
|
|
|
|
VkPipeline pipeline;
|
|
VK_CHECK(vkCreateGraphicsPipelines(r->device, r->vk_pipeline_cache, 1,
|
|
&pipeline_info, NULL, &pipeline));
|
|
|
|
snode->pipeline = pipeline;
|
|
snode->layout = layout;
|
|
snode->render_pass = pipeline_info.renderPass;
|
|
snode->draw_time = pg->draw_time;
|
|
|
|
r->pipeline_binding = snode;
|
|
r->pipeline_binding_changed = true;
|
|
|
|
NV2A_VK_DGROUP_END();
|
|
}
|
|
|
|
static bool check_render_pass_dirty(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
assert(r->pipeline_binding);
|
|
|
|
RenderPassState state;
|
|
init_render_pass_state(pg, &state);
|
|
|
|
return memcmp(&state, &r->pipeline_binding->key.render_pass_state,
|
|
sizeof(state)) != 0;
|
|
}
|
|
|
|
// Quickly check for any state changes that would require more analysis
|
|
static bool check_pipeline_dirty(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
assert(r->pipeline_binding);
|
|
|
|
if (r->shader_bindings_changed || r->texture_bindings_changed ||
|
|
check_render_pass_dirty(pg)) {
|
|
return true;
|
|
}
|
|
|
|
const unsigned int regs[] = {
|
|
NV_PGRAPH_BLEND, NV_PGRAPH_BLENDCOLOR,
|
|
NV_PGRAPH_CONTROL_0, NV_PGRAPH_CONTROL_1,
|
|
NV_PGRAPH_CONTROL_2, NV_PGRAPH_CONTROL_3,
|
|
NV_PGRAPH_SETUPRASTER, NV_PGRAPH_ZCOMPRESSOCCLUDE,
|
|
NV_PGRAPH_ZOFFSETBIAS, NV_PGRAPH_ZOFFSETFACTOR,
|
|
};
|
|
|
|
for (int i = 0; i < ARRAY_SIZE(regs); i++) {
|
|
if (pgraph_is_reg_dirty(pg, regs[i])) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// FIXME: Use dirty bits instead
|
|
if (memcmp(r->vertex_attribute_descriptions,
|
|
r->pipeline_binding->key.attribute_descriptions,
|
|
r->num_active_vertex_attribute_descriptions *
|
|
sizeof(r->vertex_attribute_descriptions[0])) ||
|
|
memcmp(r->vertex_binding_descriptions,
|
|
r->pipeline_binding->key.binding_descriptions,
|
|
r->num_active_vertex_binding_descriptions *
|
|
sizeof(r->vertex_binding_descriptions[0]))) {
|
|
return true;
|
|
}
|
|
|
|
nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_NOTDIRTY);
|
|
|
|
return false;
|
|
}
|
|
|
|
static void init_pipeline_key(PGRAPHState *pg, PipelineKey *key)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
memset(key, 0, sizeof(*key));
|
|
init_render_pass_state(pg, &key->render_pass_state);
|
|
memcpy(&key->shader_state, &r->shader_binding->state, sizeof(ShaderState));
|
|
memcpy(key->binding_descriptions, r->vertex_binding_descriptions,
|
|
sizeof(key->binding_descriptions[0]) *
|
|
r->num_active_vertex_binding_descriptions);
|
|
memcpy(key->attribute_descriptions, r->vertex_attribute_descriptions,
|
|
sizeof(key->attribute_descriptions[0]) *
|
|
r->num_active_vertex_attribute_descriptions);
|
|
|
|
// FIXME: Register masking
|
|
// FIXME: Use more dynamic state updates
|
|
const int regs[] = {
|
|
NV_PGRAPH_BLEND, NV_PGRAPH_BLENDCOLOR,
|
|
NV_PGRAPH_CONTROL_0, NV_PGRAPH_CONTROL_1,
|
|
NV_PGRAPH_CONTROL_2, NV_PGRAPH_CONTROL_3,
|
|
NV_PGRAPH_SETUPRASTER, NV_PGRAPH_ZCOMPRESSOCCLUDE,
|
|
NV_PGRAPH_ZOFFSETBIAS, NV_PGRAPH_ZOFFSETFACTOR,
|
|
};
|
|
assert(ARRAY_SIZE(regs) == ARRAY_SIZE(key->regs));
|
|
for (int i = 0; i < ARRAY_SIZE(regs); i++) {
|
|
key->regs[i] = pgraph_reg_r(pg, regs[i]);
|
|
}
|
|
}
|
|
|
|
static void create_pipeline(PGRAPHState *pg)
|
|
{
|
|
NV2A_VK_DGROUP_BEGIN("Creating pipeline");
|
|
|
|
NV2AState *d = container_of(pg, NV2AState, pgraph);
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
pgraph_vk_bind_textures(d);
|
|
pgraph_vk_bind_shaders(pg);
|
|
|
|
// FIXME: If nothing was dirty, don't even try creating the key or hashing.
|
|
// Just use the same pipeline.
|
|
if (r->pipeline_binding && !check_pipeline_dirty(pg)) {
|
|
NV2A_VK_DPRINTF("Cache hit");
|
|
NV2A_VK_DGROUP_END();
|
|
return;
|
|
}
|
|
|
|
PipelineKey key;
|
|
init_pipeline_key(pg, &key);
|
|
uint64_t hash = fast_hash((void *)&key, sizeof(key));
|
|
|
|
static uint64_t last_hash;
|
|
if (hash == last_hash) {
|
|
nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_MERGE);
|
|
}
|
|
last_hash = hash;
|
|
|
|
LruNode *node = lru_lookup(&r->pipeline_cache, hash, &key);
|
|
PipelineBinding *snode = container_of(node, PipelineBinding, node);
|
|
if (snode->pipeline != VK_NULL_HANDLE) {
|
|
NV2A_VK_DPRINTF("Cache hit");
|
|
r->pipeline_binding_changed = r->pipeline_binding != snode;
|
|
r->pipeline_binding = snode;
|
|
NV2A_VK_DGROUP_END();
|
|
return;
|
|
}
|
|
|
|
NV2A_VK_DPRINTF("Cache miss");
|
|
nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_GEN);
|
|
|
|
memcpy(&snode->key, &key, sizeof(key));
|
|
|
|
uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0);
|
|
bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE;
|
|
bool depth_write = !!(control_0 & NV_PGRAPH_CONTROL_0_ZWRITEENABLE);
|
|
bool stencil_test =
|
|
pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE;
|
|
|
|
int num_active_shader_stages = 0;
|
|
VkPipelineShaderStageCreateInfo shader_stages[3];
|
|
|
|
shader_stages[num_active_shader_stages++] =
|
|
(VkPipelineShaderStageCreateInfo){
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
|
.stage = VK_SHADER_STAGE_VERTEX_BIT,
|
|
.module = r->shader_binding->vertex->module,
|
|
.pName = "main",
|
|
};
|
|
if (r->shader_binding->geometry) {
|
|
shader_stages[num_active_shader_stages++] =
|
|
(VkPipelineShaderStageCreateInfo){
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
|
.stage = VK_SHADER_STAGE_GEOMETRY_BIT,
|
|
.module = r->shader_binding->geometry->module,
|
|
.pName = "main",
|
|
};
|
|
}
|
|
shader_stages[num_active_shader_stages++] =
|
|
(VkPipelineShaderStageCreateInfo){
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
|
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
|
|
.module = r->shader_binding->fragment->module,
|
|
.pName = "main",
|
|
};
|
|
|
|
VkPipelineVertexInputStateCreateInfo vertex_input = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
|
|
.vertexBindingDescriptionCount =
|
|
r->num_active_vertex_binding_descriptions,
|
|
.pVertexBindingDescriptions = r->vertex_binding_descriptions,
|
|
.vertexAttributeDescriptionCount =
|
|
r->num_active_vertex_attribute_descriptions,
|
|
.pVertexAttributeDescriptions = r->vertex_attribute_descriptions,
|
|
};
|
|
|
|
VkPipelineInputAssemblyStateCreateInfo input_assembly = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
|
|
.topology = get_primitive_topology(pg),
|
|
.primitiveRestartEnable = VK_FALSE,
|
|
};
|
|
|
|
VkPipelineViewportStateCreateInfo viewport_state = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
|
|
.viewportCount = 1,
|
|
.scissorCount = 1,
|
|
};
|
|
|
|
void *rasterizer_next_struct = NULL;
|
|
|
|
VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_state;
|
|
|
|
if (r->provoking_vertex_extension_enabled) {
|
|
VkProvokingVertexModeEXT provoking_mode =
|
|
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3),
|
|
NV_PGRAPH_CONTROL_3_SHADEMODE) ==
|
|
NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT ?
|
|
VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT :
|
|
VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT;
|
|
|
|
provoking_state =
|
|
(VkPipelineRasterizationProvokingVertexStateCreateInfoEXT){
|
|
.sType =
|
|
VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT,
|
|
.provokingVertexMode = provoking_mode,
|
|
};
|
|
rasterizer_next_struct = &provoking_state;
|
|
} else {
|
|
// FIXME: Handle in shader?
|
|
}
|
|
|
|
VkPipelineRasterizationStateCreateInfo rasterizer = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
|
|
.depthClampEnable = VK_FALSE,
|
|
.rasterizerDiscardEnable = VK_FALSE,
|
|
.polygonMode = pgraph_polygon_mode_vk_map[r->shader_binding->state
|
|
.polygon_front_mode],
|
|
.lineWidth = 1.0f,
|
|
.frontFace = (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
|
|
NV_PGRAPH_SETUPRASTER_FRONTFACE) ?
|
|
VK_FRONT_FACE_COUNTER_CLOCKWISE :
|
|
VK_FRONT_FACE_CLOCKWISE,
|
|
.depthBiasEnable = VK_FALSE,
|
|
.pNext = rasterizer_next_struct,
|
|
};
|
|
|
|
if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & NV_PGRAPH_SETUPRASTER_CULLENABLE) {
|
|
uint32_t cull_face = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER),
|
|
NV_PGRAPH_SETUPRASTER_CULLCTRL);
|
|
assert(cull_face < ARRAY_SIZE(pgraph_cull_face_vk_map));
|
|
rasterizer.cullMode = pgraph_cull_face_vk_map[cull_face];
|
|
} else {
|
|
rasterizer.cullMode = VK_CULL_MODE_NONE;
|
|
}
|
|
|
|
VkPipelineMultisampleStateCreateInfo multisampling = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
|
.sampleShadingEnable = VK_FALSE,
|
|
.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
|
|
};
|
|
|
|
VkPipelineDepthStencilStateCreateInfo depth_stencil = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
|
|
.depthWriteEnable = depth_write ? VK_TRUE : VK_FALSE,
|
|
};
|
|
|
|
if (depth_test) {
|
|
depth_stencil.depthTestEnable = VK_TRUE;
|
|
uint32_t depth_func =
|
|
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0), NV_PGRAPH_CONTROL_0_ZFUNC);
|
|
assert(depth_func < ARRAY_SIZE(pgraph_depth_func_vk_map));
|
|
depth_stencil.depthCompareOp = pgraph_depth_func_vk_map[depth_func];
|
|
}
|
|
|
|
if (stencil_test) {
|
|
depth_stencil.stencilTestEnable = VK_TRUE;
|
|
uint32_t stencil_func = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
|
|
NV_PGRAPH_CONTROL_1_STENCIL_FUNC);
|
|
uint32_t stencil_ref = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
|
|
NV_PGRAPH_CONTROL_1_STENCIL_REF);
|
|
uint32_t mask_read = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
|
|
NV_PGRAPH_CONTROL_1_STENCIL_MASK_READ);
|
|
uint32_t mask_write = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
|
|
NV_PGRAPH_CONTROL_1_STENCIL_MASK_WRITE);
|
|
uint32_t op_fail = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2),
|
|
NV_PGRAPH_CONTROL_2_STENCIL_OP_FAIL);
|
|
uint32_t op_zfail = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2),
|
|
NV_PGRAPH_CONTROL_2_STENCIL_OP_ZFAIL);
|
|
uint32_t op_zpass = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2),
|
|
NV_PGRAPH_CONTROL_2_STENCIL_OP_ZPASS);
|
|
|
|
assert(stencil_func < ARRAY_SIZE(pgraph_stencil_func_vk_map));
|
|
assert(op_fail < ARRAY_SIZE(pgraph_stencil_op_vk_map));
|
|
assert(op_zfail < ARRAY_SIZE(pgraph_stencil_op_vk_map));
|
|
assert(op_zpass < ARRAY_SIZE(pgraph_stencil_op_vk_map));
|
|
|
|
depth_stencil.front.failOp = pgraph_stencil_op_vk_map[op_fail];
|
|
depth_stencil.front.passOp = pgraph_stencil_op_vk_map[op_zpass];
|
|
depth_stencil.front.depthFailOp = pgraph_stencil_op_vk_map[op_zfail];
|
|
depth_stencil.front.compareOp =
|
|
pgraph_stencil_func_vk_map[stencil_func];
|
|
depth_stencil.front.compareMask = mask_read;
|
|
depth_stencil.front.writeMask = mask_write;
|
|
depth_stencil.front.reference = stencil_ref;
|
|
depth_stencil.back = depth_stencil.front;
|
|
}
|
|
|
|
VkColorComponentFlags write_mask = 0;
|
|
if (control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE)
|
|
write_mask |= VK_COLOR_COMPONENT_R_BIT;
|
|
if (control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE)
|
|
write_mask |= VK_COLOR_COMPONENT_G_BIT;
|
|
if (control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE)
|
|
write_mask |= VK_COLOR_COMPONENT_B_BIT;
|
|
if (control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE)
|
|
write_mask |= VK_COLOR_COMPONENT_A_BIT;
|
|
|
|
VkPipelineColorBlendAttachmentState color_blend_attachment = {
|
|
.colorWriteMask = write_mask,
|
|
};
|
|
|
|
float blend_constant[4] = { 0, 0, 0, 0 };
|
|
|
|
if (pgraph_reg_r(pg, NV_PGRAPH_BLEND) & NV_PGRAPH_BLEND_EN) {
|
|
color_blend_attachment.blendEnable = VK_TRUE;
|
|
|
|
uint32_t sfactor =
|
|
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND), NV_PGRAPH_BLEND_SFACTOR);
|
|
uint32_t dfactor =
|
|
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND), NV_PGRAPH_BLEND_DFACTOR);
|
|
assert(sfactor < ARRAY_SIZE(pgraph_blend_factor_vk_map));
|
|
assert(dfactor < ARRAY_SIZE(pgraph_blend_factor_vk_map));
|
|
color_blend_attachment.srcColorBlendFactor =
|
|
pgraph_blend_factor_vk_map[sfactor];
|
|
color_blend_attachment.dstColorBlendFactor =
|
|
pgraph_blend_factor_vk_map[dfactor];
|
|
color_blend_attachment.srcAlphaBlendFactor =
|
|
pgraph_blend_factor_vk_map[sfactor];
|
|
color_blend_attachment.dstAlphaBlendFactor =
|
|
pgraph_blend_factor_vk_map[dfactor];
|
|
|
|
uint32_t equation =
|
|
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND), NV_PGRAPH_BLEND_EQN);
|
|
assert(equation < ARRAY_SIZE(pgraph_blend_equation_vk_map));
|
|
|
|
color_blend_attachment.colorBlendOp =
|
|
pgraph_blend_equation_vk_map[equation];
|
|
color_blend_attachment.alphaBlendOp =
|
|
pgraph_blend_equation_vk_map[equation];
|
|
|
|
uint32_t blend_color = pgraph_reg_r(pg, NV_PGRAPH_BLENDCOLOR);
|
|
pgraph_argb_pack32_to_rgba_float(blend_color, blend_constant);
|
|
}
|
|
|
|
VkPipelineColorBlendStateCreateInfo color_blending = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
|
|
.logicOpEnable = VK_FALSE,
|
|
.logicOp = VK_LOGIC_OP_COPY,
|
|
.attachmentCount = r->color_binding ? 1 : 0,
|
|
.pAttachments = r->color_binding ? &color_blend_attachment : NULL,
|
|
.blendConstants[0] = blend_constant[0],
|
|
.blendConstants[1] = blend_constant[1],
|
|
.blendConstants[2] = blend_constant[2],
|
|
.blendConstants[3] = blend_constant[3],
|
|
};
|
|
|
|
VkDynamicState dynamic_states[2] = { VK_DYNAMIC_STATE_VIEWPORT,
|
|
VK_DYNAMIC_STATE_SCISSOR };
|
|
|
|
VkPipelineDynamicStateCreateInfo dynamic_state = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
|
|
.dynamicStateCount = ARRAY_SIZE(dynamic_states),
|
|
.pDynamicStates = dynamic_states,
|
|
};
|
|
|
|
// /* Clipping */
|
|
// glEnable(GL_CLIP_DISTANCE0);
|
|
// glEnable(GL_CLIP_DISTANCE1);
|
|
|
|
// /* Polygon offset */
|
|
// /* FIXME: GL implementation-specific, maybe do this in VS? */
|
|
// if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
|
|
// NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE)
|
|
// if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
|
|
// NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE)
|
|
// if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
|
|
// NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)
|
|
if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
|
|
(NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE |
|
|
NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE |
|
|
NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) {
|
|
uint32_t zfactor_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR);
|
|
float zfactor = *(float *)&zfactor_u32;
|
|
uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS);
|
|
float zbias = *(float *)&zbias_u32;
|
|
rasterizer.depthBiasEnable = VK_TRUE;
|
|
rasterizer.depthBiasSlopeFactor = zfactor;
|
|
rasterizer.depthBiasConstantFactor = zbias;
|
|
}
|
|
|
|
if (GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ZCOMPRESSOCCLUDE),
|
|
NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN) ==
|
|
NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP) {
|
|
rasterizer.depthClampEnable = VK_TRUE;
|
|
}
|
|
|
|
// FIXME: Dither
|
|
// if (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) &
|
|
// NV_PGRAPH_CONTROL_0_DITHERENABLE))
|
|
// FIXME: point size
|
|
// FIXME: Edge Antialiasing
|
|
// bool anti_aliasing = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ANTIALIASING),
|
|
// NV_PGRAPH_ANTIALIASING_ENABLE);
|
|
// if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
|
|
// NV_PGRAPH_SETUPRASTER_LINESMOOTHENABLE) {
|
|
// FIXME: VK_EXT_line_rasterization
|
|
// }
|
|
|
|
// if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
|
|
// NV_PGRAPH_SETUPRASTER_POLYSMOOTHENABLE) {
|
|
// FIXME: No direct analog. Just do it with MSAA.
|
|
// }
|
|
|
|
|
|
VkPipelineLayoutCreateInfo pipeline_layout_info = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
|
.setLayoutCount = 1,
|
|
.pSetLayouts = &r->descriptor_set_layout,
|
|
};
|
|
|
|
VkPushConstantRange push_constant_range;
|
|
if (r->shader_binding->state.use_push_constants_for_uniform_attrs) {
|
|
int num_uniform_attributes =
|
|
__builtin_popcount(r->shader_binding->state.uniform_attrs);
|
|
if (num_uniform_attributes) {
|
|
push_constant_range = (VkPushConstantRange){
|
|
.stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
|
|
.offset = 0,
|
|
// FIXME: Minimize push constants
|
|
.size = num_uniform_attributes * 4 * sizeof(float),
|
|
};
|
|
pipeline_layout_info.pushConstantRangeCount = 1;
|
|
pipeline_layout_info.pPushConstantRanges = &push_constant_range;
|
|
}
|
|
}
|
|
|
|
VkPipelineLayout layout;
|
|
VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL,
|
|
&layout));
|
|
|
|
VkGraphicsPipelineCreateInfo pipeline_create_info = {
|
|
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
|
.stageCount = num_active_shader_stages,
|
|
.pStages = shader_stages,
|
|
.pVertexInputState = &vertex_input,
|
|
.pInputAssemblyState = &input_assembly,
|
|
.pViewportState = &viewport_state,
|
|
.pRasterizationState = &rasterizer,
|
|
.pMultisampleState = &multisampling,
|
|
.pDepthStencilState = r->zeta_binding ? &depth_stencil : NULL,
|
|
.pColorBlendState = &color_blending,
|
|
.pDynamicState = &dynamic_state,
|
|
.layout = layout,
|
|
.renderPass = get_render_pass(r, &key.render_pass_state),
|
|
.subpass = 0,
|
|
.basePipelineHandle = VK_NULL_HANDLE,
|
|
};
|
|
VkPipeline pipeline;
|
|
VK_CHECK(vkCreateGraphicsPipelines(r->device, r->vk_pipeline_cache, 1,
|
|
&pipeline_create_info, NULL, &pipeline));
|
|
|
|
snode->pipeline = pipeline;
|
|
snode->layout = layout;
|
|
snode->render_pass = pipeline_create_info.renderPass;
|
|
snode->draw_time = pg->draw_time;
|
|
|
|
r->pipeline_binding = snode;
|
|
r->pipeline_binding_changed = true;
|
|
|
|
NV2A_VK_DGROUP_END();
|
|
}
|
|
|
|
static void push_vertex_attr_values(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
if (!r->shader_binding->state.use_push_constants_for_uniform_attrs) {
|
|
return;
|
|
}
|
|
|
|
// FIXME: Partial updates
|
|
|
|
float values[NV2A_VERTEXSHADER_ATTRIBUTES][4];
|
|
int num_uniform_attrs = 0;
|
|
|
|
pgraph_get_inline_values(pg, r->shader_binding->state.uniform_attrs, values,
|
|
&num_uniform_attrs);
|
|
|
|
if (num_uniform_attrs > 0) {
|
|
vkCmdPushConstants(r->command_buffer, r->pipeline_binding->layout,
|
|
VK_SHADER_STAGE_VERTEX_BIT, 0,
|
|
num_uniform_attrs * 4 * sizeof(float),
|
|
&values);
|
|
}
|
|
}
|
|
|
|
static void bind_descriptor_sets(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
assert(r->descriptor_set_index >= 1);
|
|
|
|
vkCmdBindDescriptorSets(r->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
|
r->pipeline_binding->layout, 0, 1,
|
|
&r->descriptor_sets[r->descriptor_set_index - 1], 0,
|
|
NULL);
|
|
}
|
|
|
|
static void begin_query(PGRAPHVkState *r)
|
|
{
|
|
assert(r->in_command_buffer);
|
|
assert(!r->in_render_pass);
|
|
assert(!r->query_in_flight);
|
|
|
|
// FIXME: We should handle this. Make the query buffer bigger, but at least
|
|
// flush current queries.
|
|
assert(r->num_queries_in_flight < r->max_queries_in_flight);
|
|
|
|
nv2a_profile_inc_counter(NV2A_PROF_QUERY);
|
|
vkCmdResetQueryPool(r->command_buffer, r->query_pool,
|
|
r->num_queries_in_flight, 1);
|
|
vkCmdBeginQuery(r->command_buffer, r->query_pool, r->num_queries_in_flight,
|
|
VK_QUERY_CONTROL_PRECISE_BIT);
|
|
|
|
r->query_in_flight = true;
|
|
r->new_query_needed = false;
|
|
r->num_queries_in_flight++;
|
|
}
|
|
|
|
static void end_query(PGRAPHVkState *r)
|
|
{
|
|
assert(r->in_command_buffer);
|
|
assert(!r->in_render_pass);
|
|
assert(r->query_in_flight);
|
|
|
|
vkCmdEndQuery(r->command_buffer, r->query_pool,
|
|
r->num_queries_in_flight - 1);
|
|
r->query_in_flight = false;
|
|
}
|
|
|
|
static void sync_staging_buffer(PGRAPHState *pg, VkCommandBuffer cmd,
|
|
int index_src, int index_dst)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
StorageBuffer *b_src = &r->storage_buffers[index_src];
|
|
StorageBuffer *b_dst = &r->storage_buffers[index_dst];
|
|
|
|
if (!b_src->buffer_offset) {
|
|
return;
|
|
}
|
|
|
|
VkBufferCopy copy_region = { .size = b_src->buffer_offset };
|
|
vkCmdCopyBuffer(cmd, b_src->buffer, b_dst->buffer, 1, ©_region);
|
|
|
|
VkAccessFlags dst_access_mask;
|
|
VkPipelineStageFlags dst_stage_mask;
|
|
|
|
switch (index_dst) {
|
|
case BUFFER_INDEX:
|
|
dst_access_mask = VK_ACCESS_INDEX_READ_BIT;
|
|
dst_stage_mask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
|
|
break;
|
|
case BUFFER_VERTEX_INLINE:
|
|
dst_access_mask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
|
|
dst_stage_mask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
|
|
break;
|
|
case BUFFER_UNIFORM:
|
|
dst_access_mask = VK_ACCESS_UNIFORM_READ_BIT;
|
|
dst_stage_mask = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
|
|
break;
|
|
default:
|
|
assert(0);
|
|
break;
|
|
}
|
|
|
|
VkBufferMemoryBarrier barrier = {
|
|
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
|
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
|
.dstAccessMask = dst_access_mask,
|
|
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
|
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
|
.buffer = b_dst->buffer,
|
|
.size = b_src->buffer_offset
|
|
};
|
|
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, 0,
|
|
0, NULL, 1, &barrier, 0, NULL);
|
|
|
|
b_src->buffer_offset = 0;
|
|
}
|
|
|
|
static void flush_memory_buffer(PGRAPHState *pg, VkCommandBuffer cmd)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
VK_CHECK(vmaFlushAllocation(
|
|
r->allocator, r->storage_buffers[BUFFER_VERTEX_RAM].allocation, 0,
|
|
VK_WHOLE_SIZE));
|
|
|
|
VkBufferMemoryBarrier barrier = {
|
|
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
|
.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT,
|
|
.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
|
|
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
|
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
|
.buffer = r->storage_buffers[BUFFER_VERTEX_RAM].buffer,
|
|
.offset = 0,
|
|
.size = VK_WHOLE_SIZE,
|
|
};
|
|
|
|
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_HOST_BIT,
|
|
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, 0, NULL, 1,
|
|
&barrier, 0, NULL);
|
|
}
|
|
|
|
static void begin_render_pass(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
assert(r->in_command_buffer);
|
|
assert(!r->in_render_pass);
|
|
|
|
nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_RENDERPASSES);
|
|
|
|
unsigned int vp_width = pg->surface_binding_dim.width,
|
|
vp_height = pg->surface_binding_dim.height;
|
|
pgraph_apply_scaling_factor(pg, &vp_width, &vp_height);
|
|
|
|
assert(r->framebuffer_index > 0);
|
|
|
|
VkRenderPassBeginInfo render_pass_begin_info = {
|
|
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
|
|
.renderPass = r->render_pass,
|
|
.framebuffer = r->framebuffers[r->framebuffer_index - 1],
|
|
.renderArea.extent.width = vp_width,
|
|
.renderArea.extent.height = vp_height,
|
|
.clearValueCount = 0,
|
|
.pClearValues = NULL,
|
|
};
|
|
vkCmdBeginRenderPass(r->command_buffer, &render_pass_begin_info,
|
|
VK_SUBPASS_CONTENTS_INLINE);
|
|
r->in_render_pass = true;
|
|
|
|
}
|
|
|
|
static void end_render_pass(PGRAPHVkState *r)
|
|
{
|
|
if (r->in_render_pass) {
|
|
vkCmdEndRenderPass(r->command_buffer);
|
|
r->in_render_pass = false;
|
|
}
|
|
}
|
|
|
|
const enum NV2A_PROF_COUNTERS_ENUM finish_reason_to_counter_enum[] = {
|
|
[VK_FINISH_REASON_VERTEX_BUFFER_DIRTY] = NV2A_PROF_FINISH_VERTEX_BUFFER_DIRTY,
|
|
[VK_FINISH_REASON_SURFACE_CREATE] = NV2A_PROF_FINISH_SURFACE_CREATE,
|
|
[VK_FINISH_REASON_SURFACE_DOWN] = NV2A_PROF_FINISH_SURFACE_DOWN,
|
|
[VK_FINISH_REASON_NEED_BUFFER_SPACE] = NV2A_PROF_FINISH_NEED_BUFFER_SPACE,
|
|
[VK_FINISH_REASON_FRAMEBUFFER_DIRTY] = NV2A_PROF_FINISH_FRAMEBUFFER_DIRTY,
|
|
[VK_FINISH_REASON_PRESENTING] = NV2A_PROF_FINISH_PRESENTING,
|
|
[VK_FINISH_REASON_FLIP_STALL] = NV2A_PROF_FINISH_FLIP_STALL,
|
|
[VK_FINISH_REASON_FLUSH] = NV2A_PROF_FINISH_FLUSH,
|
|
[VK_FINISH_REASON_STALLED] = NV2A_PROF_FINISH_STALLED,
|
|
};
|
|
|
|
void pgraph_vk_finish(PGRAPHState *pg, FinishReason finish_reason)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
assert(!r->in_draw);
|
|
|
|
if (r->in_command_buffer) {
|
|
|
|
nv2a_profile_inc_counter(finish_reason_to_counter_enum[finish_reason]);
|
|
|
|
if (r->in_render_pass) {
|
|
end_render_pass(r);
|
|
}
|
|
if (r->query_in_flight) {
|
|
end_query(r);
|
|
}
|
|
VK_CHECK(vkEndCommandBuffer(r->command_buffer));
|
|
|
|
VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); // FIXME: Cleanup
|
|
sync_staging_buffer(pg, cmd, BUFFER_INDEX_STAGING, BUFFER_INDEX);
|
|
sync_staging_buffer(pg, cmd, BUFFER_VERTEX_INLINE_STAGING,
|
|
BUFFER_VERTEX_INLINE);
|
|
sync_staging_buffer(pg, cmd, BUFFER_UNIFORM_STAGING, BUFFER_UNIFORM);
|
|
bitmap_clear(r->uploaded_bitmap, 0, r->bitmap_size);
|
|
flush_memory_buffer(pg, cmd);
|
|
VK_CHECK(vkEndCommandBuffer(r->aux_command_buffer));
|
|
r->in_aux_command_buffer = false;
|
|
|
|
VkPipelineStageFlags wait_stage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
|
|
VkSubmitInfo submit_infos[] = {
|
|
{
|
|
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
|
.commandBufferCount = 1,
|
|
.pCommandBuffers = &r->aux_command_buffer,
|
|
.signalSemaphoreCount = 1,
|
|
.pSignalSemaphores = &r->command_buffer_semaphore,
|
|
},
|
|
{
|
|
|
|
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
|
.commandBufferCount = 1,
|
|
.pCommandBuffers = &r->command_buffer,
|
|
.waitSemaphoreCount = 1,
|
|
.pWaitSemaphores = &r->command_buffer_semaphore,
|
|
.pWaitDstStageMask = &wait_stage,
|
|
}
|
|
};
|
|
nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT);
|
|
vkResetFences(r->device, 1, &r->command_buffer_fence);
|
|
VK_CHECK(vkQueueSubmit(r->queue, ARRAY_SIZE(submit_infos), submit_infos,
|
|
r->command_buffer_fence));
|
|
r->submit_count += 1;
|
|
|
|
bool check_budget = false;
|
|
|
|
// Periodically check memory budget
|
|
const int max_num_submits_before_budget_update = 5;
|
|
if (finish_reason == VK_FINISH_REASON_FLIP_STALL ||
|
|
(r->submit_count - r->allocator_last_submit_index) >
|
|
max_num_submits_before_budget_update) {
|
|
|
|
// VMA queries budget via vmaSetCurrentFrameIndex
|
|
vmaSetCurrentFrameIndex(r->allocator, r->submit_count);
|
|
r->allocator_last_submit_index = r->submit_count;
|
|
check_budget = true;
|
|
}
|
|
|
|
VK_CHECK(vkWaitForFences(r->device, 1, &r->command_buffer_fence,
|
|
VK_TRUE, UINT64_MAX));
|
|
|
|
r->descriptor_set_index = 0;
|
|
r->in_command_buffer = false;
|
|
destroy_framebuffers(pg);
|
|
|
|
if (check_budget) {
|
|
pgraph_vk_check_memory_budget(pg);
|
|
}
|
|
}
|
|
|
|
NV2AState *d = container_of(pg, NV2AState, pgraph);
|
|
pgraph_vk_process_pending_reports_internal(d);
|
|
|
|
pgraph_vk_compute_finish_complete(r);
|
|
}
|
|
|
|
void pgraph_vk_begin_command_buffer(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
assert(!r->in_command_buffer);
|
|
|
|
VkCommandBufferBeginInfo command_buffer_begin_info = {
|
|
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
|
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
|
|
};
|
|
VK_CHECK(vkBeginCommandBuffer(r->command_buffer,
|
|
&command_buffer_begin_info));
|
|
r->command_buffer_start_time = pg->draw_time;
|
|
r->in_command_buffer = true;
|
|
}
|
|
|
|
// FIXME: Refactor below
|
|
|
|
void pgraph_vk_ensure_command_buffer(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
if (!r->in_command_buffer) {
|
|
pgraph_vk_begin_command_buffer(pg);
|
|
}
|
|
}
|
|
|
|
void pgraph_vk_ensure_not_in_render_pass(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
end_render_pass(r);
|
|
if (r->query_in_flight) {
|
|
end_query(r);
|
|
}
|
|
}
|
|
|
|
VkCommandBuffer pgraph_vk_begin_nondraw_commands(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
pgraph_vk_ensure_command_buffer(pg);
|
|
pgraph_vk_ensure_not_in_render_pass(pg);
|
|
return r->command_buffer;
|
|
}
|
|
|
|
void pgraph_vk_end_nondraw_commands(PGRAPHState *pg, VkCommandBuffer cmd)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
assert(cmd == r->command_buffer);
|
|
}
|
|
|
|
// FIXME: Add more metrics for determining command buffer 'fullness' and
|
|
// conservatively flush. Unfortunately there doesn't appear to be a good
|
|
// way to determine what the actual maximum capacity of a command buffer
|
|
// is, but we are obviously not supposed to endlessly append to one command
|
|
// buffer. For other reasons though (like descriptor set amount, surface
|
|
// changes, etc) we do flush often.
|
|
|
|
static void begin_pre_draw(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
assert(r->color_binding || r->zeta_binding);
|
|
assert(!r->color_binding || r->color_binding->initialized);
|
|
assert(!r->zeta_binding || r->zeta_binding->initialized);
|
|
|
|
if (pg->clearing) {
|
|
create_clear_pipeline(pg);
|
|
} else {
|
|
create_pipeline(pg);
|
|
}
|
|
|
|
bool render_pass_dirty = r->pipeline_binding->render_pass != r->render_pass;
|
|
|
|
if (r->framebuffer_dirty || render_pass_dirty) {
|
|
pgraph_vk_ensure_not_in_render_pass(pg);
|
|
}
|
|
if (render_pass_dirty) {
|
|
r->render_pass = r->pipeline_binding->render_pass;
|
|
}
|
|
if (r->framebuffer_dirty) {
|
|
create_frame_buffer(pg);
|
|
r->framebuffer_dirty = false;
|
|
}
|
|
if (!pg->clearing) {
|
|
pgraph_vk_update_descriptor_sets(pg);
|
|
}
|
|
if (r->framebuffer_index == 0) {
|
|
create_frame_buffer(pg);
|
|
}
|
|
|
|
pgraph_vk_ensure_command_buffer(pg);
|
|
}
|
|
|
|
static void begin_draw(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
assert(r->in_command_buffer);
|
|
|
|
// Visibility testing
|
|
if (!pg->clearing && pg->zpass_pixel_count_enable) {
|
|
if (r->new_query_needed && r->query_in_flight) {
|
|
end_render_pass(r);
|
|
end_query(r);
|
|
}
|
|
if (!r->query_in_flight) {
|
|
end_render_pass(r);
|
|
begin_query(r);
|
|
}
|
|
} else if (r->query_in_flight) {
|
|
end_render_pass(r);
|
|
end_query(r);
|
|
}
|
|
|
|
if (pg->clearing) {
|
|
end_render_pass(r);
|
|
}
|
|
|
|
bool must_bind_pipeline = r->pipeline_binding_changed;
|
|
|
|
if (!r->in_render_pass) {
|
|
begin_render_pass(pg);
|
|
must_bind_pipeline = true;
|
|
}
|
|
|
|
if (must_bind_pipeline) {
|
|
nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_BIND);
|
|
vkCmdBindPipeline(r->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
|
r->pipeline_binding->pipeline);
|
|
r->pipeline_binding->draw_time = pg->draw_time;
|
|
|
|
unsigned int vp_width = pg->surface_binding_dim.width,
|
|
vp_height = pg->surface_binding_dim.height;
|
|
pgraph_apply_scaling_factor(pg, &vp_width, &vp_height);
|
|
|
|
VkViewport viewport = {
|
|
.width = vp_width,
|
|
.height = vp_height,
|
|
.minDepth = 0.0,
|
|
.maxDepth = 1.0,
|
|
};
|
|
vkCmdSetViewport(r->command_buffer, 0, 1, &viewport);
|
|
|
|
/* Surface clip */
|
|
/* FIXME: Consider moving to PSH w/ window clip */
|
|
unsigned int xmin = pg->surface_shape.clip_x -
|
|
pg->surface_binding_dim.clip_x,
|
|
ymin = pg->surface_shape.clip_y -
|
|
pg->surface_binding_dim.clip_y;
|
|
|
|
unsigned int xmax = xmin + pg->surface_shape.clip_width - 1,
|
|
ymax = ymin + pg->surface_shape.clip_height - 1;
|
|
|
|
unsigned int scissor_width = xmax - xmin + 1,
|
|
scissor_height = ymax - ymin + 1;
|
|
|
|
pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin);
|
|
pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height);
|
|
|
|
pgraph_apply_scaling_factor(pg, &xmin, &ymin);
|
|
pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height);
|
|
|
|
VkRect2D scissor = {
|
|
.offset.x = xmin,
|
|
.offset.y = ymin,
|
|
.extent.width = scissor_width,
|
|
.extent.height = scissor_height,
|
|
};
|
|
vkCmdSetScissor(r->command_buffer, 0, 1, &scissor);
|
|
}
|
|
|
|
if (!pg->clearing) {
|
|
bind_descriptor_sets(pg);
|
|
push_vertex_attr_values(pg);
|
|
}
|
|
|
|
r->in_draw = true;
|
|
}
|
|
|
|
static void end_draw(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
assert(r->in_command_buffer);
|
|
assert(r->in_render_pass);
|
|
|
|
if (pg->clearing) {
|
|
end_render_pass(r);
|
|
}
|
|
|
|
r->in_draw = false;
|
|
|
|
// FIXME: We could clear less
|
|
pgraph_clear_dirty_reg_map(pg);
|
|
}
|
|
|
|
void pgraph_vk_draw_end(NV2AState *d)
|
|
{
|
|
PGRAPHState *pg = &d->pgraph;
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0);
|
|
bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE;
|
|
bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE;
|
|
bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE;
|
|
bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE;
|
|
bool color_write = mask_alpha || mask_red || mask_green || mask_blue;
|
|
bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE;
|
|
bool stencil_test =
|
|
pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE;
|
|
bool is_nop_draw = !(color_write || depth_test || stencil_test);
|
|
|
|
if (is_nop_draw) {
|
|
// FIXME: Check PGRAPH register 0x880.
|
|
// HW uses bit 11 in 0x880 to enable or disable a color/zeta limit
|
|
// check that will raise an exception in the case that a draw should
|
|
// modify the color and/or zeta buffer but the target(s) are masked
|
|
// off. This check only seems to trigger during the fragment
|
|
// processing, it is legal to attempt a draw that is entirely
|
|
// clipped regardless of 0x880. See xemu#635 for context.
|
|
NV2A_VK_DPRINTF("nop draw!\n");
|
|
return;
|
|
}
|
|
|
|
pgraph_vk_flush_draw(d);
|
|
|
|
pg->draw_time++;
|
|
if (r->color_binding && pgraph_color_write_enabled(pg)) {
|
|
r->color_binding->draw_time = pg->draw_time;
|
|
}
|
|
if (r->zeta_binding && pgraph_zeta_write_enabled(pg)) {
|
|
r->zeta_binding->draw_time = pg->draw_time;
|
|
}
|
|
|
|
pgraph_vk_set_surface_dirty(pg, color_write, depth_test || stencil_test);
|
|
}
|
|
|
|
static int compare_memory_sync_requirement_by_addr(const void *p1,
|
|
const void *p2)
|
|
{
|
|
const MemorySyncRequirement *l = p1, *r = p2;
|
|
if (l->addr < r->addr)
|
|
return -1;
|
|
if (l->addr > r->addr)
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
static void sync_vertex_ram_buffer(PGRAPHState *pg)
|
|
{
|
|
NV2AState *d = container_of(pg, NV2AState, pgraph);
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
if (r->num_vertex_ram_buffer_syncs == 0) {
|
|
return;
|
|
}
|
|
|
|
// Align sync requirements to page boundaries
|
|
NV2A_VK_DGROUP_BEGIN("Sync vertex RAM buffer");
|
|
|
|
for (int i = 0; i < r->num_vertex_ram_buffer_syncs; i++) {
|
|
NV2A_VK_DPRINTF("Need to sync vertex memory @%" HWADDR_PRIx
|
|
", %" HWADDR_PRIx " bytes",
|
|
r->vertex_ram_buffer_syncs[i].addr,
|
|
r->vertex_ram_buffer_syncs[i].size);
|
|
|
|
hwaddr start_addr =
|
|
r->vertex_ram_buffer_syncs[i].addr & TARGET_PAGE_MASK;
|
|
hwaddr end_addr = r->vertex_ram_buffer_syncs[i].addr +
|
|
r->vertex_ram_buffer_syncs[i].size;
|
|
end_addr = ROUND_UP(end_addr, TARGET_PAGE_SIZE);
|
|
|
|
NV2A_VK_DPRINTF("- %d: %08" HWADDR_PRIx " %zd bytes"
|
|
" -> %08" HWADDR_PRIx " %zd bytes", i,
|
|
r->vertex_ram_buffer_syncs[i].addr,
|
|
r->vertex_ram_buffer_syncs[i].size, start_addr,
|
|
end_addr - start_addr);
|
|
|
|
r->vertex_ram_buffer_syncs[i].addr = start_addr;
|
|
r->vertex_ram_buffer_syncs[i].size = end_addr - start_addr;
|
|
}
|
|
|
|
// Sort the requirements in increasing order of addresses
|
|
qsort(r->vertex_ram_buffer_syncs, r->num_vertex_ram_buffer_syncs,
|
|
sizeof(MemorySyncRequirement),
|
|
compare_memory_sync_requirement_by_addr);
|
|
|
|
// Merge overlapping/adjacent requests to minimize number of tests
|
|
MemorySyncRequirement merged[16];
|
|
int num_syncs = 1;
|
|
|
|
merged[0] = r->vertex_ram_buffer_syncs[0];
|
|
|
|
for (int i = 1; i < r->num_vertex_ram_buffer_syncs; i++) {
|
|
MemorySyncRequirement *p = &merged[num_syncs - 1];
|
|
MemorySyncRequirement *t = &r->vertex_ram_buffer_syncs[i];
|
|
|
|
if (t->addr <= (p->addr + p->size)) {
|
|
// Merge with previous
|
|
hwaddr p_end_addr = p->addr + p->size;
|
|
hwaddr t_end_addr = t->addr + t->size;
|
|
hwaddr new_end_addr = MAX(p_end_addr, t_end_addr);
|
|
p->size = new_end_addr - p->addr;
|
|
} else {
|
|
merged[num_syncs++] = *t;
|
|
}
|
|
}
|
|
|
|
if (num_syncs < r->num_vertex_ram_buffer_syncs) {
|
|
NV2A_VK_DPRINTF("Reduced to %d sync checks", num_syncs);
|
|
}
|
|
|
|
for (int i = 0; i < num_syncs; i++) {
|
|
hwaddr addr = merged[i].addr;
|
|
VkDeviceSize size = merged[i].size;
|
|
|
|
NV2A_VK_DPRINTF("- %d: %08"HWADDR_PRIx" %zd bytes", i, addr, size);
|
|
|
|
if (memory_region_test_and_clear_dirty(d->vram, addr, size,
|
|
DIRTY_MEMORY_NV2A)) {
|
|
NV2A_VK_DPRINTF("Memory dirty. Synchronizing...");
|
|
pgraph_vk_update_vertex_ram_buffer(pg, addr, d->vram_ptr + addr,
|
|
size);
|
|
}
|
|
}
|
|
|
|
r->num_vertex_ram_buffer_syncs = 0;
|
|
|
|
NV2A_VK_DGROUP_END();
|
|
}
|
|
|
|
void pgraph_vk_clear_surface(NV2AState *d, uint32_t parameter)
|
|
{
|
|
PGRAPHState *pg = &d->pgraph;
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
nv2a_profile_inc_counter(NV2A_PROF_CLEAR);
|
|
|
|
bool write_color = (parameter & NV097_CLEAR_SURFACE_COLOR);
|
|
bool write_zeta =
|
|
(parameter & (NV097_CLEAR_SURFACE_Z | NV097_CLEAR_SURFACE_STENCIL));
|
|
|
|
pg->clearing = true;
|
|
|
|
// FIXME: If doing a full surface clear, mark the surface for full clear
|
|
// and we can just do the clear as part of the surface load.
|
|
pgraph_vk_surface_update(d, true, write_color, write_zeta);
|
|
|
|
SurfaceBinding *binding = r->color_binding ?: r->zeta_binding;
|
|
if (!binding) {
|
|
/* Nothing bound to clear */
|
|
pg->clearing = false;
|
|
return;
|
|
}
|
|
|
|
r->clear_parameter = parameter;
|
|
|
|
uint32_t clearrectx = pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTX);
|
|
uint32_t clearrecty = pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTY);
|
|
|
|
int xmin = GET_MASK(clearrectx, NV_PGRAPH_CLEARRECTX_XMIN);
|
|
int xmax = GET_MASK(clearrectx, NV_PGRAPH_CLEARRECTX_XMAX);
|
|
int ymin = GET_MASK(clearrecty, NV_PGRAPH_CLEARRECTY_YMIN);
|
|
int ymax = GET_MASK(clearrecty, NV_PGRAPH_CLEARRECTY_YMAX);
|
|
|
|
NV2A_VK_DGROUP_BEGIN("CLEAR min=(%d,%d) max=(%d,%d)%s%s", xmin, ymin, xmax,
|
|
ymax, write_color ? " color" : "",
|
|
write_zeta ? " zeta" : "");
|
|
|
|
begin_pre_draw(pg);
|
|
begin_draw(pg);
|
|
|
|
// FIXME: What does hardware do when min <= max?
|
|
xmin = MIN(xmin, binding->width - 1);
|
|
ymin = MIN(ymin, binding->height - 1);
|
|
xmax = MIN(xmax, binding->width - 1);
|
|
ymax = MIN(ymax, binding->height - 1);
|
|
|
|
int scissor_width = MAX(0, xmax - xmin + 1),
|
|
scissor_height = MAX(0, ymax - ymin + 1);
|
|
|
|
pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin);
|
|
pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height);
|
|
|
|
pgraph_apply_scaling_factor(pg, &xmin, &ymin);
|
|
pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height);
|
|
|
|
VkClearRect clear_rect = {
|
|
.rect = {
|
|
.offset = { .x = xmin, .y = ymin },
|
|
.extent = { .width = scissor_width, .height = scissor_height },
|
|
},
|
|
.baseArrayLayer = 0,
|
|
.layerCount = 1,
|
|
};
|
|
|
|
int num_attachments = 0;
|
|
VkClearAttachment attachments[2];
|
|
|
|
if (write_color && r->color_binding) {
|
|
const bool clear_all_color_channels =
|
|
(parameter & NV097_CLEAR_SURFACE_COLOR) ==
|
|
(NV097_CLEAR_SURFACE_R | NV097_CLEAR_SURFACE_G |
|
|
NV097_CLEAR_SURFACE_B | NV097_CLEAR_SURFACE_A);
|
|
|
|
if (clear_all_color_channels) {
|
|
attachments[num_attachments] = (VkClearAttachment){
|
|
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
|
.colorAttachment = 0,
|
|
};
|
|
pgraph_get_clear_color(
|
|
pg, attachments[num_attachments].clearValue.color.float32);
|
|
num_attachments++;
|
|
} else {
|
|
float blend_constants[4];
|
|
pgraph_get_clear_color(pg, blend_constants);
|
|
vkCmdSetScissor(r->command_buffer, 0, 1, &clear_rect.rect);
|
|
vkCmdSetBlendConstants(r->command_buffer, blend_constants);
|
|
vkCmdDraw(r->command_buffer, 3, 1, 0, 0);
|
|
}
|
|
}
|
|
|
|
if (write_zeta && r->zeta_binding) {
|
|
int stencil_value = 0;
|
|
float depth_value = 1.0;
|
|
pgraph_get_clear_depth_stencil_value(pg, &depth_value, &stencil_value);
|
|
|
|
VkImageAspectFlags aspect = 0;
|
|
if (parameter & NV097_CLEAR_SURFACE_Z) {
|
|
aspect |= VK_IMAGE_ASPECT_DEPTH_BIT;
|
|
}
|
|
if ((parameter & NV097_CLEAR_SURFACE_STENCIL) &&
|
|
(r->zeta_binding->host_fmt.aspect & VK_IMAGE_ASPECT_STENCIL_BIT)) {
|
|
aspect |= VK_IMAGE_ASPECT_STENCIL_BIT;
|
|
}
|
|
|
|
attachments[num_attachments++] = (VkClearAttachment){
|
|
.aspectMask = aspect,
|
|
.clearValue.depthStencil.depth = depth_value,
|
|
.clearValue.depthStencil.stencil = stencil_value,
|
|
};
|
|
}
|
|
|
|
if (num_attachments) {
|
|
vkCmdClearAttachments(r->command_buffer, num_attachments, attachments,
|
|
1, &clear_rect);
|
|
}
|
|
end_draw(pg);
|
|
|
|
pg->clearing = false;
|
|
|
|
pgraph_vk_set_surface_dirty(pg, write_color, write_zeta);
|
|
|
|
NV2A_VK_DGROUP_END();
|
|
}
|
|
|
|
#if 0
|
|
static void pgraph_vk_debug_attrs(NV2AState *d)
|
|
{
|
|
for (int vertex_idx = 0; vertex_idx < pg->draw_arrays_count[i]; vertex_idx++) {
|
|
NV2A_VK_DGROUP_BEGIN("Vertex %d+%d", pg->draw_arrays_start[i], vertex_idx);
|
|
for (int attr_idx = 0; attr_idx < NV2A_VERTEXSHADER_ATTRIBUTES; attr_idx++) {
|
|
VertexAttribute *attr = &pg->vertex_attributes[attr_idx];
|
|
if (attr->count) {
|
|
char *p = (char *)d->vram_ptr + r->attribute_offsets[attr_idx] + (pg->draw_arrays_start[i] + vertex_idx) * attr->stride;
|
|
NV2A_VK_DGROUP_BEGIN("Attribute %d data at %tx", attr_idx, (ptrdiff_t)(p - (char*)d->vram_ptr));
|
|
for (int count_idx = 0; count_idx < attr->count; count_idx++) {
|
|
switch (attr->format) {
|
|
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F:
|
|
NV2A_VK_DPRINTF("[%d] %f", count_idx, *(float*)p);
|
|
p += sizeof(float);
|
|
break;
|
|
default:
|
|
assert(0);
|
|
break;
|
|
}
|
|
}
|
|
NV2A_VK_DGROUP_END();
|
|
}
|
|
}
|
|
NV2A_VK_DGROUP_END();
|
|
}
|
|
}
|
|
#endif
|
|
|
|
static void bind_vertex_buffer(PGRAPHState *pg, uint16_t inline_map,
|
|
VkDeviceSize offset)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
if (r->num_active_vertex_binding_descriptions == 0) {
|
|
return;
|
|
}
|
|
|
|
VkBuffer buffers[NV2A_VERTEXSHADER_ATTRIBUTES];
|
|
VkDeviceSize offsets[NV2A_VERTEXSHADER_ATTRIBUTES];
|
|
|
|
for (int i = 0; i < r->num_active_vertex_binding_descriptions; i++) {
|
|
int attr_idx = r->vertex_attribute_descriptions[i].location;
|
|
int buffer_idx = (inline_map & (1 << attr_idx)) ? BUFFER_VERTEX_INLINE :
|
|
BUFFER_VERTEX_RAM;
|
|
buffers[i] = r->storage_buffers[buffer_idx].buffer;
|
|
offsets[i] = offset + r->vertex_attribute_offsets[attr_idx];
|
|
}
|
|
|
|
vkCmdBindVertexBuffers(r->command_buffer, 0,
|
|
r->num_active_vertex_binding_descriptions, buffers,
|
|
offsets);
|
|
}
|
|
|
|
static void bind_inline_vertex_buffer(PGRAPHState *pg, VkDeviceSize offset)
|
|
{
|
|
bind_vertex_buffer(pg, 0xffff, offset);
|
|
}
|
|
|
|
void pgraph_vk_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta)
|
|
{
|
|
NV2A_DPRINTF("pgraph_set_surface_dirty(%d, %d) -- %d %d\n", color, zeta,
|
|
pgraph_color_write_enabled(pg), pgraph_zeta_write_enabled(pg));
|
|
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
/* FIXME: Does this apply to CLEARs too? */
|
|
color = color && pgraph_color_write_enabled(pg);
|
|
zeta = zeta && pgraph_zeta_write_enabled(pg);
|
|
pg->surface_color.draw_dirty |= color;
|
|
pg->surface_zeta.draw_dirty |= zeta;
|
|
|
|
if (r->color_binding) {
|
|
r->color_binding->draw_dirty |= color;
|
|
r->color_binding->frame_time = pg->frame_time;
|
|
r->color_binding->cleared = false;
|
|
}
|
|
|
|
if (r->zeta_binding) {
|
|
r->zeta_binding->draw_dirty |= zeta;
|
|
r->zeta_binding->frame_time = pg->frame_time;
|
|
r->zeta_binding->cleared = false;
|
|
}
|
|
}
|
|
|
|
static bool ensure_buffer_space(PGRAPHState *pg, int index, VkDeviceSize size)
|
|
{
|
|
if (!pgraph_vk_buffer_has_space_for(pg, index, size, 1)) {
|
|
pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE);
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static void get_size_and_count_for_format(VkFormat fmt, size_t *size, size_t *count)
|
|
{
|
|
static const struct {
|
|
size_t size;
|
|
size_t count;
|
|
} table[] = {
|
|
[VK_FORMAT_R8_UNORM] = { 1, 1 },
|
|
[VK_FORMAT_R8G8_UNORM] = { 1, 2 },
|
|
[VK_FORMAT_R8G8B8_UNORM] = { 1, 3 },
|
|
[VK_FORMAT_R8G8B8A8_UNORM] = { 1, 4 },
|
|
[VK_FORMAT_R16_SNORM] = { 2, 1 },
|
|
[VK_FORMAT_R16G16_SNORM] = { 2, 2 },
|
|
[VK_FORMAT_R16G16B16_SNORM] = { 2, 3 },
|
|
[VK_FORMAT_R16G16B16A16_SNORM] = { 2, 4 },
|
|
[VK_FORMAT_R16_SSCALED] = { 2, 1 },
|
|
[VK_FORMAT_R16G16_SSCALED] = { 2, 2 },
|
|
[VK_FORMAT_R16G16B16_SSCALED] = { 2, 3 },
|
|
[VK_FORMAT_R16G16B16A16_SSCALED] = { 2, 4 },
|
|
[VK_FORMAT_R32_SFLOAT] = { 4, 1 },
|
|
[VK_FORMAT_R32G32_SFLOAT] = { 4, 2 },
|
|
[VK_FORMAT_R32G32B32_SFLOAT] = { 4, 3 },
|
|
[VK_FORMAT_R32G32B32A32_SFLOAT] = { 4, 4 },
|
|
[VK_FORMAT_R32_SINT] = { 4, 1 },
|
|
};
|
|
|
|
assert(fmt < ARRAY_SIZE(table));
|
|
assert(table[fmt].size);
|
|
|
|
*size = table[fmt].size;
|
|
*count = table[fmt].count;
|
|
}
|
|
|
|
typedef struct VertexBufferRemap {
|
|
uint16_t attributes;
|
|
size_t buffer_space_required;
|
|
struct {
|
|
VkDeviceAddress offset;
|
|
VkDeviceSize stride;
|
|
} map[NV2A_VERTEXSHADER_ATTRIBUTES];
|
|
} VertexBufferRemap;
|
|
|
|
static VertexBufferRemap remap_unaligned_attributes(PGRAPHState *pg,
|
|
uint32_t num_vertices)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
VertexBufferRemap remap = {0};
|
|
|
|
VkDeviceAddress output_offset = 0;
|
|
|
|
for (int attr_id = 0; attr_id < NV2A_VERTEXSHADER_ATTRIBUTES; attr_id++) {
|
|
int desc_loc = r->vertex_attribute_to_description_location[attr_id];
|
|
if (desc_loc < 0) {
|
|
continue;
|
|
}
|
|
|
|
VkVertexInputBindingDescription *desc =
|
|
&r->vertex_binding_descriptions[desc_loc];
|
|
VkVertexInputAttributeDescription *attr =
|
|
&r->vertex_attribute_descriptions[desc_loc];
|
|
|
|
size_t element_size, element_count;
|
|
get_size_and_count_for_format(attr->format, &element_size, &element_count);
|
|
|
|
bool offset_valid =
|
|
(r->vertex_attribute_offsets[attr_id] % element_size == 0);
|
|
bool stride_valid = (desc->stride % element_size == 0);
|
|
|
|
if (offset_valid && stride_valid) {
|
|
continue;
|
|
}
|
|
|
|
remap.attributes |= 1 << attr_id;
|
|
remap.map[attr_id].offset = ROUND_UP(output_offset, element_size);
|
|
remap.map[attr_id].stride = element_size * element_count;
|
|
|
|
// fprintf(stderr,
|
|
// "attr %02d remapped: "
|
|
// "%08" HWADDR_PRIx "->%08" HWADDR_PRIx " "
|
|
// "stride=%d->%zd\n",
|
|
// attr_id, r->vertex_attribute_offsets[attr_id],
|
|
// remap.map[attr_id].offset, desc->stride,
|
|
// remap.map[attr_id].stride);
|
|
|
|
output_offset =
|
|
remap.map[attr_id].offset + remap.map[attr_id].stride * num_vertices;
|
|
}
|
|
|
|
remap.buffer_space_required = output_offset;
|
|
return remap;
|
|
}
|
|
|
|
static void copy_remapped_attributes_to_inline_buffer(PGRAPHState *pg,
|
|
VertexBufferRemap remap,
|
|
uint32_t start_vertex,
|
|
uint32_t num_vertices)
|
|
{
|
|
NV2AState *d = container_of(pg, NV2AState, pgraph);
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
StorageBuffer *buffer = &r->storage_buffers[BUFFER_VERTEX_INLINE_STAGING];
|
|
|
|
r->vertex_buffer_inline = remap.attributes;
|
|
|
|
if (!remap.attributes) {
|
|
return;
|
|
}
|
|
|
|
VkDeviceSize starting_offset = ROUND_UP(buffer->buffer_offset, 16);
|
|
size_t total_space_required =
|
|
(starting_offset - buffer->buffer_offset) + remap.buffer_space_required;
|
|
ensure_buffer_space(pg, BUFFER_VERTEX_INLINE_STAGING, total_space_required);
|
|
assert(pgraph_vk_buffer_has_space_for(pg, BUFFER_VERTEX_INLINE_STAGING,
|
|
total_space_required, 1));
|
|
|
|
buffer->buffer_offset = starting_offset; // Aligned
|
|
|
|
// FIXME: SIMD memcpy
|
|
// FIXME: Caching
|
|
// FIXME: Account for only what is drawn
|
|
assert(start_vertex == 0);
|
|
assert(buffer->mapped);
|
|
|
|
// Copy vertex data
|
|
for (int attr_id = 0; attr_id < NV2A_VERTEXSHADER_ATTRIBUTES; attr_id++) {
|
|
if (!(remap.attributes & (1 << attr_id))) {
|
|
continue;
|
|
}
|
|
|
|
int bind_desc_loc =
|
|
r->vertex_attribute_to_description_location[attr_id];
|
|
assert(bind_desc_loc >= 0);
|
|
|
|
VkVertexInputBindingDescription *bind_desc =
|
|
&r->vertex_binding_descriptions[bind_desc_loc];
|
|
|
|
VkDeviceSize attr_buffer_offset =
|
|
buffer->buffer_offset + remap.map[attr_id].offset;
|
|
|
|
uint8_t *out_ptr = buffer->mapped + attr_buffer_offset;
|
|
uint8_t *in_ptr = d->vram_ptr + r->vertex_attribute_offsets[attr_id];
|
|
|
|
for (int vertex_id = 0; vertex_id < num_vertices; vertex_id++) {
|
|
memcpy(out_ptr, in_ptr, remap.map[attr_id].stride);
|
|
out_ptr += remap.map[attr_id].stride;
|
|
in_ptr += bind_desc->stride;
|
|
}
|
|
|
|
r->vertex_attribute_offsets[attr_id] = attr_buffer_offset;
|
|
bind_desc->stride = remap.map[attr_id].stride;
|
|
}
|
|
|
|
buffer->buffer_offset += remap.buffer_space_required;
|
|
}
|
|
|
|
void pgraph_vk_flush_draw(NV2AState *d)
|
|
{
|
|
PGRAPHState *pg = &d->pgraph;
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
if (!(r->color_binding || r->zeta_binding)) {
|
|
NV2A_VK_DPRINTF("No binding present!!!\n");
|
|
return;
|
|
}
|
|
|
|
r->num_vertex_ram_buffer_syncs = 0;
|
|
|
|
if (pg->draw_arrays_length) {
|
|
NV2A_VK_DGROUP_BEGIN("Draw Arrays");
|
|
nv2a_profile_inc_counter(NV2A_PROF_DRAW_ARRAYS);
|
|
|
|
assert(pg->inline_elements_length == 0);
|
|
assert(pg->inline_buffer_length == 0);
|
|
assert(pg->inline_array_length == 0);
|
|
|
|
pgraph_vk_bind_vertex_attributes(d, pg->draw_arrays_min_start,
|
|
pg->draw_arrays_max_count - 1, false,
|
|
0, pg->draw_arrays_max_count - 1);
|
|
uint32_t min_element = INT_MAX;
|
|
uint32_t max_element = 0;
|
|
for (int i = 0; i < pg->draw_arrays_length; i++) {
|
|
min_element = MIN(pg->draw_arrays_start[i], min_element);
|
|
max_element = MAX(max_element, pg->draw_arrays_start[i] + pg->draw_arrays_count[i]);
|
|
}
|
|
sync_vertex_ram_buffer(pg);
|
|
VertexBufferRemap remap = remap_unaligned_attributes(pg, max_element);
|
|
copy_remapped_attributes_to_inline_buffer(pg, remap, 0, max_element);
|
|
|
|
begin_pre_draw(pg);
|
|
begin_draw(pg);
|
|
bind_vertex_buffer(pg, remap.attributes, 0);
|
|
for (int i = 0; i < pg->draw_arrays_length; i++) {
|
|
uint32_t start = pg->draw_arrays_start[i],
|
|
count = pg->draw_arrays_count[i];
|
|
NV2A_VK_DPRINTF("- [%d] Start:%d Count:%d", i, start, count);
|
|
vkCmdDraw(r->command_buffer, count, 1, start, 0);
|
|
}
|
|
end_draw(pg);
|
|
|
|
NV2A_VK_DGROUP_END();
|
|
} else if (pg->inline_elements_length) {
|
|
NV2A_VK_DGROUP_BEGIN("Inline Elements");
|
|
assert(pg->inline_buffer_length == 0);
|
|
assert(pg->inline_array_length == 0);
|
|
|
|
nv2a_profile_inc_counter(NV2A_PROF_INLINE_ELEMENTS);
|
|
|
|
size_t index_data_size =
|
|
pg->inline_elements_length * sizeof(pg->inline_elements[0]);
|
|
|
|
ensure_buffer_space(pg, BUFFER_INDEX_STAGING, index_data_size);
|
|
|
|
uint32_t min_element = (uint32_t)-1;
|
|
uint32_t max_element = 0;
|
|
for (int i = 0; i < pg->inline_elements_length; i++) {
|
|
max_element = MAX(pg->inline_elements[i], max_element);
|
|
min_element = MIN(pg->inline_elements[i], min_element);
|
|
}
|
|
pgraph_vk_bind_vertex_attributes(
|
|
d, min_element, max_element, false, 0,
|
|
pg->inline_elements[pg->inline_elements_length - 1]);
|
|
sync_vertex_ram_buffer(pg);
|
|
VertexBufferRemap remap = remap_unaligned_attributes(pg, max_element + 1);
|
|
copy_remapped_attributes_to_inline_buffer(pg, remap, 0, max_element + 1);
|
|
|
|
begin_pre_draw(pg);
|
|
VkDeviceSize buffer_offset = pgraph_vk_update_index_buffer(
|
|
pg, pg->inline_elements, index_data_size);
|
|
begin_draw(pg);
|
|
bind_vertex_buffer(pg, remap.attributes, 0);
|
|
vkCmdBindIndexBuffer(r->command_buffer,
|
|
r->storage_buffers[BUFFER_INDEX].buffer,
|
|
buffer_offset, VK_INDEX_TYPE_UINT32);
|
|
vkCmdDrawIndexed(r->command_buffer, pg->inline_elements_length, 1, 0, 0,
|
|
0);
|
|
end_draw(pg);
|
|
|
|
NV2A_VK_DGROUP_END();
|
|
} else if (pg->inline_buffer_length) {
|
|
NV2A_VK_DGROUP_BEGIN("Inline Buffer");
|
|
nv2a_profile_inc_counter(NV2A_PROF_INLINE_BUFFERS);
|
|
assert(pg->inline_array_length == 0);
|
|
|
|
size_t vertex_data_size = pg->inline_buffer_length * sizeof(float) * 4;
|
|
void *data[NV2A_VERTEXSHADER_ATTRIBUTES];
|
|
size_t sizes[NV2A_VERTEXSHADER_ATTRIBUTES];
|
|
size_t offset = 0;
|
|
|
|
pgraph_vk_bind_vertex_attributes_inline(d);
|
|
for (int i = 0; i < r->num_active_vertex_attribute_descriptions; i++) {
|
|
int attr_index = r->vertex_attribute_descriptions[i].location;
|
|
|
|
VertexAttribute *attr = &pg->vertex_attributes[attr_index];
|
|
r->vertex_attribute_offsets[attr_index] = offset;
|
|
|
|
data[i] = attr->inline_buffer;
|
|
sizes[i] = vertex_data_size;
|
|
|
|
attr->inline_buffer_populated = false;
|
|
offset += vertex_data_size;
|
|
}
|
|
ensure_buffer_space(pg, BUFFER_VERTEX_INLINE_STAGING, offset);
|
|
|
|
begin_pre_draw(pg);
|
|
VkDeviceSize buffer_offset = pgraph_vk_update_vertex_inline_buffer(
|
|
pg, data, sizes, r->num_active_vertex_attribute_descriptions);
|
|
begin_draw(pg);
|
|
bind_inline_vertex_buffer(pg, buffer_offset);
|
|
vkCmdDraw(r->command_buffer, pg->inline_buffer_length, 1, 0, 0);
|
|
end_draw(pg);
|
|
|
|
NV2A_VK_DGROUP_END();
|
|
} else if (pg->inline_array_length) {
|
|
NV2A_VK_DGROUP_BEGIN("Inline Array");
|
|
nv2a_profile_inc_counter(NV2A_PROF_INLINE_ARRAYS);
|
|
|
|
VkDeviceSize inline_array_data_size = pg->inline_array_length * 4;
|
|
ensure_buffer_space(pg, BUFFER_VERTEX_INLINE_STAGING,
|
|
inline_array_data_size);
|
|
|
|
unsigned int offset = 0;
|
|
for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
|
|
VertexAttribute *attr = &pg->vertex_attributes[i];
|
|
if (attr->count == 0) {
|
|
continue;
|
|
}
|
|
|
|
/* FIXME: Double check */
|
|
offset = ROUND_UP(offset, attr->size);
|
|
attr->inline_array_offset = offset;
|
|
NV2A_DPRINTF("bind inline attribute %d size=%d, count=%d\n", i,
|
|
attr->size, attr->count);
|
|
offset += attr->size * attr->count;
|
|
offset = ROUND_UP(offset, attr->size);
|
|
}
|
|
|
|
unsigned int vertex_size = offset;
|
|
unsigned int index_count = pg->inline_array_length * 4 / vertex_size;
|
|
|
|
NV2A_DPRINTF("draw inline array %d, %d\n", vertex_size, index_count);
|
|
pgraph_vk_bind_vertex_attributes(d, 0, index_count - 1, true,
|
|
vertex_size, index_count - 1);
|
|
|
|
begin_pre_draw(pg);
|
|
void *inline_array_data = pg->inline_array;
|
|
VkDeviceSize buffer_offset = pgraph_vk_update_vertex_inline_buffer(
|
|
pg, &inline_array_data, &inline_array_data_size, 1);
|
|
begin_draw(pg);
|
|
bind_inline_vertex_buffer(pg, buffer_offset);
|
|
vkCmdDraw(r->command_buffer, index_count, 1, 0, 0);
|
|
end_draw(pg);
|
|
NV2A_VK_DGROUP_END();
|
|
} else {
|
|
NV2A_VK_DPRINTF("EMPTY NV097_SET_BEGIN_END");
|
|
NV2A_UNCONFIRMED("EMPTY NV097_SET_BEGIN_END");
|
|
}
|
|
}
|