mirror of https://github.com/xemu-project/xemu.git
922 lines
34 KiB
C
922 lines
34 KiB
C
/*
|
|
* Geforce NV2A PGRAPH Vulkan Renderer
|
|
*
|
|
* Copyright (c) 2024 Matt Borgerson
|
|
*
|
|
* Based on GL implementation:
|
|
*
|
|
* Copyright (c) 2015 espes
|
|
* Copyright (c) 2015 Jannik Vogel
|
|
* Copyright (c) 2018-2024 Matt Borgerson
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "hw/xbox/nv2a/pgraph/shaders.h"
|
|
#include "hw/xbox/nv2a/pgraph/util.h"
|
|
#include "hw/xbox/nv2a/pgraph/glsl/geom.h"
|
|
#include "hw/xbox/nv2a/pgraph/glsl/vsh.h"
|
|
#include "hw/xbox/nv2a/pgraph/glsl/psh.h"
|
|
#include "qemu/fast-hash.h"
|
|
#include "qemu/mstring.h"
|
|
#include "renderer.h"
|
|
#include <locale.h>
|
|
|
|
#define VSH_UBO_BINDING 0
|
|
#define PSH_UBO_BINDING 1
|
|
#define PSH_TEX_BINDING 2
|
|
|
|
const size_t MAX_UNIFORM_ATTR_VALUES_SIZE = NV2A_VERTEXSHADER_ATTRIBUTES * 4 * sizeof(float);
|
|
|
|
static void create_descriptor_pool(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
size_t num_sets = ARRAY_SIZE(r->descriptor_sets);
|
|
|
|
VkDescriptorPoolSize pool_sizes[] = {
|
|
{
|
|
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
|
.descriptorCount = 2 * num_sets,
|
|
},
|
|
{
|
|
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
|
.descriptorCount = NV2A_MAX_TEXTURES * num_sets,
|
|
}
|
|
};
|
|
|
|
VkDescriptorPoolCreateInfo pool_info = {
|
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
|
.poolSizeCount = ARRAY_SIZE(pool_sizes),
|
|
.pPoolSizes = pool_sizes,
|
|
.maxSets = ARRAY_SIZE(r->descriptor_sets),
|
|
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
|
|
};
|
|
VK_CHECK(vkCreateDescriptorPool(r->device, &pool_info, NULL,
|
|
&r->descriptor_pool));
|
|
}
|
|
|
|
static void destroy_descriptor_pool(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
vkDestroyDescriptorPool(r->device, r->descriptor_pool, NULL);
|
|
r->descriptor_pool = VK_NULL_HANDLE;
|
|
}
|
|
|
|
static void create_descriptor_set_layout(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
VkDescriptorSetLayoutBinding bindings[2 + NV2A_MAX_TEXTURES];
|
|
|
|
bindings[0] = (VkDescriptorSetLayoutBinding){
|
|
.binding = VSH_UBO_BINDING,
|
|
.descriptorCount = 1,
|
|
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
|
.stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
|
|
};
|
|
bindings[1] = (VkDescriptorSetLayoutBinding){
|
|
.binding = PSH_UBO_BINDING,
|
|
.descriptorCount = 1,
|
|
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
|
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
|
|
};
|
|
for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
|
|
bindings[2 + i] = (VkDescriptorSetLayoutBinding){
|
|
.binding = PSH_TEX_BINDING + i,
|
|
.descriptorCount = 1,
|
|
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
|
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
|
|
};
|
|
}
|
|
VkDescriptorSetLayoutCreateInfo layout_info = {
|
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
|
.bindingCount = ARRAY_SIZE(bindings),
|
|
.pBindings = bindings,
|
|
};
|
|
VK_CHECK(vkCreateDescriptorSetLayout(r->device, &layout_info, NULL,
|
|
&r->descriptor_set_layout));
|
|
}
|
|
|
|
static void destroy_descriptor_set_layout(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
vkDestroyDescriptorSetLayout(r->device, r->descriptor_set_layout, NULL);
|
|
r->descriptor_set_layout = VK_NULL_HANDLE;
|
|
}
|
|
|
|
static void create_descriptor_sets(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
VkDescriptorSetLayout layouts[ARRAY_SIZE(r->descriptor_sets)];
|
|
for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
|
|
layouts[i] = r->descriptor_set_layout;
|
|
}
|
|
|
|
VkDescriptorSetAllocateInfo alloc_info = {
|
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
|
.descriptorPool = r->descriptor_pool,
|
|
.descriptorSetCount = ARRAY_SIZE(r->descriptor_sets),
|
|
.pSetLayouts = layouts,
|
|
};
|
|
VK_CHECK(
|
|
vkAllocateDescriptorSets(r->device, &alloc_info, r->descriptor_sets));
|
|
}
|
|
|
|
static void destroy_descriptor_sets(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
vkFreeDescriptorSets(r->device, r->descriptor_pool,
|
|
ARRAY_SIZE(r->descriptor_sets), r->descriptor_sets);
|
|
for (int i = 0; i < ARRAY_SIZE(r->descriptor_sets); i++) {
|
|
r->descriptor_sets[i] = VK_NULL_HANDLE;
|
|
}
|
|
}
|
|
|
|
void pgraph_vk_update_descriptor_sets(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
bool need_uniform_write =
|
|
r->uniforms_changed ||
|
|
!r->storage_buffers[BUFFER_UNIFORM_STAGING].buffer_offset;
|
|
|
|
if (!(r->shader_bindings_changed || r->texture_bindings_changed ||
|
|
(r->descriptor_set_index == 0) || need_uniform_write)) {
|
|
return; // Nothing changed
|
|
}
|
|
|
|
ShaderBinding *binding = r->shader_binding;
|
|
ShaderUniformLayout *layouts[] = { &binding->vertex->uniforms,
|
|
&binding->fragment->uniforms };
|
|
VkDeviceSize ubo_buffer_total_size = 0;
|
|
for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
|
|
ubo_buffer_total_size += layouts[i]->total_size;
|
|
}
|
|
bool need_ubo_staging_buffer_reset =
|
|
r->uniforms_changed &&
|
|
!pgraph_vk_buffer_has_space_for(pg, BUFFER_UNIFORM_STAGING,
|
|
ubo_buffer_total_size,
|
|
r->device_props.limits.minUniformBufferOffsetAlignment);
|
|
|
|
bool need_descriptor_write_reset =
|
|
(r->descriptor_set_index >= ARRAY_SIZE(r->descriptor_sets));
|
|
|
|
if (need_descriptor_write_reset || need_ubo_staging_buffer_reset) {
|
|
pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE);
|
|
need_uniform_write = true;
|
|
}
|
|
|
|
VkWriteDescriptorSet descriptor_writes[2 + NV2A_MAX_TEXTURES];
|
|
|
|
assert(r->descriptor_set_index < ARRAY_SIZE(r->descriptor_sets));
|
|
|
|
if (need_uniform_write) {
|
|
for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
|
|
void *data = layouts[i]->allocation;
|
|
VkDeviceSize size = layouts[i]->total_size;
|
|
r->uniform_buffer_offsets[i] = pgraph_vk_append_to_buffer(
|
|
pg, BUFFER_UNIFORM_STAGING, &data, &size, 1,
|
|
r->device_props.limits.minUniformBufferOffsetAlignment);
|
|
}
|
|
|
|
r->uniforms_changed = false;
|
|
}
|
|
|
|
VkDescriptorBufferInfo ubo_buffer_infos[2];
|
|
for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
|
|
ubo_buffer_infos[i] = (VkDescriptorBufferInfo){
|
|
.buffer = r->storage_buffers[BUFFER_UNIFORM].buffer,
|
|
.offset = r->uniform_buffer_offsets[i],
|
|
.range = layouts[i]->total_size,
|
|
};
|
|
descriptor_writes[i] = (VkWriteDescriptorSet){
|
|
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
|
.dstSet = r->descriptor_sets[r->descriptor_set_index],
|
|
.dstBinding = i == 0 ? VSH_UBO_BINDING : PSH_UBO_BINDING,
|
|
.dstArrayElement = 0,
|
|
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
|
.descriptorCount = 1,
|
|
.pBufferInfo = &ubo_buffer_infos[i],
|
|
};
|
|
}
|
|
|
|
VkDescriptorImageInfo image_infos[NV2A_MAX_TEXTURES];
|
|
for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
|
|
image_infos[i] = (VkDescriptorImageInfo){
|
|
.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
|
|
.imageView = r->texture_bindings[i]->image_view,
|
|
.sampler = r->texture_bindings[i]->sampler,
|
|
};
|
|
descriptor_writes[2 + i] = (VkWriteDescriptorSet){
|
|
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
|
.dstSet = r->descriptor_sets[r->descriptor_set_index],
|
|
.dstBinding = PSH_TEX_BINDING + i,
|
|
.dstArrayElement = 0,
|
|
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
|
.descriptorCount = 1,
|
|
.pImageInfo = &image_infos[i],
|
|
};
|
|
}
|
|
|
|
vkUpdateDescriptorSets(r->device, 6, descriptor_writes, 0, NULL);
|
|
|
|
r->descriptor_set_index++;
|
|
}
|
|
|
|
static void update_shader_constant_locations(ShaderBinding *binding)
|
|
{
|
|
char tmp[64];
|
|
|
|
/* lookup fragment shader uniforms */
|
|
for (int i = 0; i < 9; i++) {
|
|
for (int j = 0; j < 2; j++) {
|
|
snprintf(tmp, sizeof(tmp), "c%d_%d", j, i);
|
|
binding->uniform_locs.psh.psh_constant[i][j] =
|
|
uniform_index(&binding->fragment->uniforms, tmp);
|
|
}
|
|
}
|
|
binding->uniform_locs.psh.alpha_ref =
|
|
uniform_index(&binding->fragment->uniforms, "alphaRef");
|
|
binding->uniform_locs.psh.fog_color =
|
|
uniform_index(&binding->fragment->uniforms, "fogColor");
|
|
for (int i = 1; i < NV2A_MAX_TEXTURES; i++) {
|
|
snprintf(tmp, sizeof(tmp), "bumpMat%d", i);
|
|
binding->uniform_locs.psh.bump_mat[i] =
|
|
uniform_index(&binding->fragment->uniforms, tmp);
|
|
snprintf(tmp, sizeof(tmp), "bumpScale%d", i);
|
|
binding->uniform_locs.psh.bump_scale[i] =
|
|
uniform_index(&binding->fragment->uniforms, tmp);
|
|
snprintf(tmp, sizeof(tmp), "bumpOffset%d", i);
|
|
binding->uniform_locs.psh.bump_offset[i] =
|
|
uniform_index(&binding->fragment->uniforms, tmp);
|
|
}
|
|
|
|
for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
|
|
snprintf(tmp, sizeof(tmp), "texScale%d", i);
|
|
binding->uniform_locs.psh.tex_scale[i] =
|
|
uniform_index(&binding->fragment->uniforms, tmp);
|
|
}
|
|
|
|
/* lookup vertex shader uniforms */
|
|
binding->uniform_locs.vsh.vsh_constant =
|
|
uniform_index(&binding->vertex->uniforms, "c");
|
|
binding->uniform_locs.psh.surface_size =
|
|
uniform_index(&binding->vertex->uniforms, "surfaceSize");
|
|
binding->uniform_locs.vsh.clip_range =
|
|
uniform_index(&binding->vertex->uniforms, "clipRange");
|
|
binding->uniform_locs.psh.clip_range =
|
|
uniform_index(&binding->fragment->uniforms, "clipRange");
|
|
binding->uniform_locs.psh.depth_offset =
|
|
uniform_index(&binding->fragment->uniforms, "depthOffset");
|
|
binding->uniform_locs.vsh.fog_param =
|
|
uniform_index(&binding->vertex->uniforms, "fogParam");
|
|
|
|
binding->uniform_locs.vsh.ltctxa =
|
|
uniform_index(&binding->vertex->uniforms, "ltctxa");
|
|
binding->uniform_locs.vsh.ltctxb =
|
|
uniform_index(&binding->vertex->uniforms, "ltctxb");
|
|
binding->uniform_locs.vsh.ltc1 =
|
|
uniform_index(&binding->vertex->uniforms, "ltc1");
|
|
|
|
for (int i = 0; i < NV2A_MAX_LIGHTS; i++) {
|
|
snprintf(tmp, sizeof(tmp), "lightInfiniteHalfVector%d", i);
|
|
binding->uniform_locs.vsh.light_infinite_half_vector[i] =
|
|
uniform_index(&binding->vertex->uniforms, tmp);
|
|
snprintf(tmp, sizeof(tmp), "lightInfiniteDirection%d", i);
|
|
binding->uniform_locs.vsh.light_infinite_direction[i] =
|
|
uniform_index(&binding->vertex->uniforms, tmp);
|
|
|
|
snprintf(tmp, sizeof(tmp), "lightLocalPosition%d", i);
|
|
binding->uniform_locs.vsh.light_local_position[i] =
|
|
uniform_index(&binding->vertex->uniforms, tmp);
|
|
snprintf(tmp, sizeof(tmp), "lightLocalAttenuation%d", i);
|
|
binding->uniform_locs.vsh.light_local_attenuation[i] =
|
|
uniform_index(&binding->vertex->uniforms, tmp);
|
|
}
|
|
|
|
binding->uniform_locs.psh.clip_region =
|
|
uniform_index(&binding->fragment->uniforms, "clipRegion");
|
|
|
|
binding->uniform_locs.vsh.point_params =
|
|
uniform_index(&binding->vertex->uniforms, "pointParams");
|
|
|
|
binding->uniform_locs.vsh.material_alpha =
|
|
uniform_index(&binding->vertex->uniforms, "material_alpha");
|
|
|
|
binding->uniform_locs.psh.color_key =
|
|
uniform_index(&binding->fragment->uniforms, "colorKey");
|
|
|
|
binding->uniform_locs.psh.color_key_mask =
|
|
uniform_index(&binding->fragment->uniforms, "colorKeyMask");
|
|
|
|
binding->uniform_locs.vsh.uniform_attrs =
|
|
uniform_index(&binding->vertex->uniforms, "inlineValue");
|
|
|
|
binding->uniform_locs.vsh.specular_power =
|
|
uniform_index(&binding->vertex->uniforms, "specularPower");
|
|
}
|
|
|
|
static void shader_cache_entry_init(Lru *lru, LruNode *node, void *state)
|
|
{
|
|
ShaderBinding *snode = container_of(node, ShaderBinding, node);
|
|
memcpy(&snode->state, state, sizeof(ShaderState));
|
|
snode->initialized = false;
|
|
}
|
|
|
|
static void shader_cache_entry_post_evict(Lru *lru, LruNode *node)
|
|
{
|
|
PGRAPHVkState *r = container_of(lru, PGRAPHVkState, shader_cache);
|
|
ShaderBinding *snode = container_of(node, ShaderBinding, node);
|
|
|
|
ShaderModuleInfo *modules[] = {
|
|
snode->geometry,
|
|
snode->vertex,
|
|
snode->fragment,
|
|
};
|
|
for (int i = 0; i < ARRAY_SIZE(modules); i++) {
|
|
if (modules[i]) {
|
|
pgraph_vk_destroy_shader_module(r, modules[i]);
|
|
}
|
|
}
|
|
|
|
snode->initialized = false;
|
|
}
|
|
|
|
static bool shader_cache_entry_compare(Lru *lru, LruNode *node, void *key)
|
|
{
|
|
ShaderBinding *snode = container_of(node, ShaderBinding, node);
|
|
return memcmp(&snode->state, key, sizeof(ShaderState));
|
|
}
|
|
|
|
static void shader_cache_init(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
const size_t shader_cache_size = 1024;
|
|
lru_init(&r->shader_cache);
|
|
r->shader_cache_entries = g_malloc_n(shader_cache_size, sizeof(ShaderBinding));
|
|
assert(r->shader_cache_entries != NULL);
|
|
for (int i = 0; i < shader_cache_size; i++) {
|
|
lru_add_free(&r->shader_cache, &r->shader_cache_entries[i].node);
|
|
}
|
|
r->shader_cache.init_node = shader_cache_entry_init;
|
|
r->shader_cache.compare_nodes = shader_cache_entry_compare;
|
|
r->shader_cache.post_node_evict = shader_cache_entry_post_evict;
|
|
}
|
|
|
|
static void shader_cache_finalize(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
lru_flush(&r->shader_cache);
|
|
g_free(r->shader_cache_entries);
|
|
r->shader_cache_entries = NULL;
|
|
}
|
|
|
|
static ShaderBinding *gen_shaders(PGRAPHState *pg, ShaderState *state)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
uint64_t hash = fast_hash((void *)state, sizeof(*state));
|
|
LruNode *node = lru_lookup(&r->shader_cache, hash, state);
|
|
ShaderBinding *snode = container_of(node, ShaderBinding, node);
|
|
|
|
NV2A_VK_DPRINTF("shader state hash: %016" PRIx64 " %p", hash, snode);
|
|
|
|
if (!snode->initialized) {
|
|
NV2A_VK_DPRINTF("cache miss");
|
|
nv2a_profile_inc_counter(NV2A_PROF_SHADER_GEN);
|
|
|
|
char *previous_numeric_locale = setlocale(LC_NUMERIC, NULL);
|
|
if (previous_numeric_locale) {
|
|
previous_numeric_locale = g_strdup(previous_numeric_locale);
|
|
}
|
|
|
|
/* Ensure numeric values are printed with '.' radix, no grouping */
|
|
setlocale(LC_NUMERIC, "C");
|
|
|
|
MString *geometry_shader_code = pgraph_gen_geom_glsl(
|
|
state->vsh.polygon_front_mode, state->vsh.polygon_back_mode,
|
|
state->vsh.primitive_mode, state->vsh.smooth_shading, true);
|
|
if (geometry_shader_code) {
|
|
NV2A_VK_DPRINTF("geometry shader: \n%s",
|
|
mstring_get_str(geometry_shader_code));
|
|
snode->geometry = pgraph_vk_create_shader_module_from_glsl(
|
|
r, VK_SHADER_STAGE_GEOMETRY_BIT,
|
|
mstring_get_str(geometry_shader_code));
|
|
mstring_unref(geometry_shader_code);
|
|
} else {
|
|
snode->geometry = NULL;
|
|
}
|
|
|
|
MString *vertex_shader_code = pgraph_gen_vsh_glsl(
|
|
&state->vsh, (GenVshGlslOptions){
|
|
.vulkan = true,
|
|
.prefix_outputs = geometry_shader_code != NULL,
|
|
.use_push_constants_for_uniform_attrs =
|
|
r->use_push_constants_for_uniform_attrs,
|
|
.ubo_binding = VSH_UBO_BINDING,
|
|
});
|
|
NV2A_VK_DPRINTF("vertex shader: \n%s",
|
|
mstring_get_str(vertex_shader_code));
|
|
snode->vertex = pgraph_vk_create_shader_module_from_glsl(
|
|
r, VK_SHADER_STAGE_VERTEX_BIT,
|
|
mstring_get_str(vertex_shader_code));
|
|
mstring_unref(vertex_shader_code);
|
|
|
|
MString *fragment_shader_code = pgraph_gen_psh_glsl(
|
|
state->psh, (GenPshGlslOptions){
|
|
.vulkan = true,
|
|
.ubo_binding = PSH_UBO_BINDING,
|
|
.tex_binding = PSH_TEX_BINDING,
|
|
});
|
|
NV2A_VK_DPRINTF("fragment shader: \n%s",
|
|
mstring_get_str(fragment_shader_code));
|
|
snode->fragment = pgraph_vk_create_shader_module_from_glsl(
|
|
r, VK_SHADER_STAGE_FRAGMENT_BIT,
|
|
mstring_get_str(fragment_shader_code));
|
|
mstring_unref(fragment_shader_code);
|
|
|
|
if (previous_numeric_locale) {
|
|
setlocale(LC_NUMERIC, previous_numeric_locale);
|
|
g_free(previous_numeric_locale);
|
|
}
|
|
|
|
update_shader_constant_locations(snode);
|
|
|
|
snode->initialized = true;
|
|
}
|
|
|
|
return snode;
|
|
}
|
|
|
|
static void update_uniform_attr_values(PGRAPHState *pg, ShaderBinding *binding)
|
|
{
|
|
float values[NV2A_VERTEXSHADER_ATTRIBUTES][4];
|
|
int num_uniform_attrs = 0;
|
|
|
|
pgraph_get_inline_values(pg, binding->state.vsh.uniform_attrs, values,
|
|
&num_uniform_attrs);
|
|
|
|
if (num_uniform_attrs > 0) {
|
|
uniform1fv(&binding->vertex->uniforms,
|
|
binding->uniform_locs.vsh.uniform_attrs,
|
|
num_uniform_attrs * 4, &values[0][0]);
|
|
}
|
|
}
|
|
|
|
// FIXME: Move to common
|
|
static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
ShaderState *state = &binding->state;
|
|
|
|
/* update combiner constants */
|
|
for (int i = 0; i < 9; i++) {
|
|
uint32_t constant[2];
|
|
if (i == 8) {
|
|
/* final combiner */
|
|
constant[0] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR0);
|
|
constant[1] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR1);
|
|
} else {
|
|
constant[0] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR0 + i * 4);
|
|
constant[1] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR1 + i * 4);
|
|
}
|
|
|
|
for (int j = 0; j < 2; j++) {
|
|
GLint loc = binding->uniform_locs.psh.psh_constant[i][j];
|
|
if (loc != -1) {
|
|
float value[4];
|
|
pgraph_argb_pack32_to_rgba_float(constant[j], value);
|
|
uniform1fv(&binding->fragment->uniforms, loc, 4, value);
|
|
}
|
|
}
|
|
}
|
|
if (binding->uniform_locs.psh.alpha_ref != -1) {
|
|
int alpha_ref = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0),
|
|
NV_PGRAPH_CONTROL_0_ALPHAREF);
|
|
uniform1i(&binding->fragment->uniforms,
|
|
binding->uniform_locs.psh.alpha_ref, alpha_ref);
|
|
}
|
|
if (binding->uniform_locs.psh.color_key != -1) {
|
|
uint32_t color_key_colors[4] = {
|
|
pgraph_reg_r(pg, NV_PGRAPH_COLORKEYCOLOR0),
|
|
pgraph_reg_r(pg, NV_PGRAPH_COLORKEYCOLOR1),
|
|
pgraph_reg_r(pg, NV_PGRAPH_COLORKEYCOLOR2),
|
|
pgraph_reg_r(pg, NV_PGRAPH_COLORKEYCOLOR3),
|
|
};
|
|
uniform1uiv(&binding->fragment->uniforms,
|
|
binding->uniform_locs.psh.color_key, 4, color_key_colors);
|
|
}
|
|
uint32_t color_key_mask[4] = { 0 };
|
|
|
|
/* For each texture stage */
|
|
for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
|
|
int loc;
|
|
|
|
/* Bump luminance only during stages 1 - 3 */
|
|
if (i > 0) {
|
|
loc = binding->uniform_locs.psh.bump_mat[i];
|
|
if (loc != -1) {
|
|
uint32_t m_u32[4];
|
|
m_u32[0] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT00 + 4 * (i - 1));
|
|
m_u32[1] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT01 + 4 * (i - 1));
|
|
m_u32[2] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT10 + 4 * (i - 1));
|
|
m_u32[3] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT11 + 4 * (i - 1));
|
|
float m[4];
|
|
m[0] = *(float *)&m_u32[0];
|
|
m[1] = *(float *)&m_u32[1];
|
|
m[2] = *(float *)&m_u32[2];
|
|
m[3] = *(float *)&m_u32[3];
|
|
uniformMatrix2fv(&binding->fragment->uniforms, loc, m);
|
|
}
|
|
loc = binding->uniform_locs.psh.bump_scale[i];
|
|
if (loc != -1) {
|
|
uint32_t v =
|
|
pgraph_reg_r(pg, NV_PGRAPH_BUMPSCALE1 + (i - 1) * 4);
|
|
uniform1f(&binding->fragment->uniforms, loc, *(float *)&v);
|
|
}
|
|
loc = binding->uniform_locs.psh.bump_offset[i];
|
|
if (loc != -1) {
|
|
uint32_t v =
|
|
pgraph_reg_r(pg, NV_PGRAPH_BUMPOFFSET1 + (i - 1) * 4);
|
|
uniform1f(&binding->fragment->uniforms, loc, *(float *)&v);
|
|
}
|
|
}
|
|
|
|
loc = binding->uniform_locs.psh.tex_scale[i];
|
|
if (loc != -1) {
|
|
assert(pg->vk_renderer_state->texture_bindings[i] != NULL);
|
|
float scale = pg->vk_renderer_state->texture_bindings[i]->key.scale;
|
|
BasicColorFormatInfo f_basic =
|
|
kelvin_color_format_info_map[pg->vk_renderer_state
|
|
->texture_bindings[i]
|
|
->key.state.color_format];
|
|
if (!f_basic.linear) {
|
|
scale = 1.0;
|
|
}
|
|
uniform1f(&binding->fragment->uniforms, loc, scale);
|
|
}
|
|
|
|
color_key_mask[i] = pgraph_get_color_key_mask_for_texture(pg, i);
|
|
}
|
|
|
|
if (binding->uniform_locs.psh.color_key_mask != -1) {
|
|
uniform1uiv(&binding->fragment->uniforms,
|
|
binding->uniform_locs.psh.color_key_mask, 4,
|
|
color_key_mask);
|
|
}
|
|
|
|
if (binding->uniform_locs.psh.fog_color != -1) {
|
|
uint32_t fog_color = pgraph_reg_r(pg, NV_PGRAPH_FOGCOLOR);
|
|
uniform4f(&binding->fragment->uniforms,
|
|
binding->uniform_locs.psh.fog_color,
|
|
GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_RED) / 255.0,
|
|
GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_GREEN) / 255.0,
|
|
GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_BLUE) / 255.0,
|
|
GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_ALPHA) / 255.0);
|
|
}
|
|
if (binding->uniform_locs.vsh.fog_param != -1) {
|
|
uint32_t v[2];
|
|
v[0] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM0);
|
|
v[1] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM1);
|
|
uniform2f(&binding->vertex->uniforms,
|
|
binding->uniform_locs.vsh.fog_param, *(float *)&v[0],
|
|
*(float *)&v[1]);
|
|
}
|
|
|
|
float zmax;
|
|
switch (pg->surface_shape.zeta_format) {
|
|
case NV097_SET_SURFACE_FORMAT_ZETA_Z16:
|
|
zmax = pg->surface_shape.z_format ? f16_max : (float)0xFFFF;
|
|
break;
|
|
case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8:
|
|
zmax = pg->surface_shape.z_format ? f24_max : (float)0xFFFFFF;
|
|
break;
|
|
default:
|
|
assert(0);
|
|
}
|
|
|
|
if (binding->state.vsh.is_fixed_function) {
|
|
/* update lighting constants */
|
|
struct {
|
|
uint32_t *v;
|
|
int locs;
|
|
size_t len;
|
|
} lighting_arrays[] = {
|
|
{ &pg->ltctxa[0][0], binding->uniform_locs.vsh.ltctxa,
|
|
NV2A_LTCTXA_COUNT },
|
|
{ &pg->ltctxb[0][0], binding->uniform_locs.vsh.ltctxb,
|
|
NV2A_LTCTXB_COUNT },
|
|
{ &pg->ltc1[0][0], binding->uniform_locs.vsh.ltc1,
|
|
NV2A_LTC1_COUNT },
|
|
};
|
|
|
|
for (int i = 0; i < ARRAY_SIZE(lighting_arrays); i++) {
|
|
uniform1iv(&binding->vertex->uniforms, lighting_arrays[i].locs,
|
|
lighting_arrays[i].len * 4,
|
|
(void *)lighting_arrays[i].v);
|
|
}
|
|
|
|
for (int i = 0; i < NV2A_MAX_LIGHTS; i++) {
|
|
int loc = binding->uniform_locs.vsh.light_infinite_half_vector[i];
|
|
if (loc != -1) {
|
|
uniform1fv(&binding->vertex->uniforms, loc, 3,
|
|
pg->light_infinite_half_vector[i]);
|
|
}
|
|
loc = binding->uniform_locs.vsh.light_infinite_direction[i];
|
|
if (loc != -1) {
|
|
uniform1fv(&binding->vertex->uniforms, loc, 3,
|
|
pg->light_infinite_direction[i]);
|
|
}
|
|
|
|
loc = binding->uniform_locs.vsh.light_local_position[i];
|
|
if (loc != -1) {
|
|
uniform1fv(&binding->vertex->uniforms, loc, 3,
|
|
pg->light_local_position[i]);
|
|
}
|
|
loc = binding->uniform_locs.vsh.light_local_attenuation[i];
|
|
if (loc != -1) {
|
|
uniform1fv(&binding->vertex->uniforms, loc, 3,
|
|
pg->light_local_attenuation[i]);
|
|
}
|
|
}
|
|
|
|
if (binding->uniform_locs.vsh.specular_power != -1) {
|
|
uniform1f(&binding->vertex->uniforms,
|
|
binding->uniform_locs.vsh.specular_power,
|
|
pg->specular_power);
|
|
}
|
|
}
|
|
|
|
/* update vertex program constants */
|
|
uniform1iv(&binding->vertex->uniforms,
|
|
binding->uniform_locs.vsh.vsh_constant,
|
|
NV2A_VERTEXSHADER_CONSTANTS * 4, (void *)pg->vsh_constants);
|
|
|
|
if (binding->uniform_locs.psh.surface_size != -1) {
|
|
unsigned int aa_width = 1, aa_height = 1;
|
|
pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height);
|
|
uniform2f(&binding->vertex->uniforms,
|
|
binding->uniform_locs.psh.surface_size,
|
|
pg->surface_binding_dim.width / aa_width,
|
|
pg->surface_binding_dim.height / aa_height);
|
|
}
|
|
|
|
if (binding->uniform_locs.vsh.clip_range != -1 ||
|
|
binding->uniform_locs.psh.clip_range != -1) {
|
|
uint32_t v[2];
|
|
v[0] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMIN);
|
|
v[1] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMAX);
|
|
float zclip_min = *(float *)&v[0];
|
|
float zclip_max = *(float *)&v[1];
|
|
|
|
if (binding->uniform_locs.vsh.clip_range != -1) {
|
|
uniform4f(&binding->vertex->uniforms,
|
|
binding->uniform_locs.vsh.clip_range, 0, zmax, zclip_min,
|
|
zclip_max);
|
|
}
|
|
if (binding->uniform_locs.psh.clip_range != -1) {
|
|
uniform4f(&binding->fragment->uniforms,
|
|
binding->uniform_locs.psh.clip_range, 0, zmax, zclip_min,
|
|
zclip_max);
|
|
}
|
|
}
|
|
|
|
if (binding->uniform_locs.psh.depth_offset != -1) {
|
|
float zbias = 0.0f;
|
|
|
|
if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
|
|
(NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE |
|
|
NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE |
|
|
NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) {
|
|
uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS);
|
|
zbias = *(float *)&zbias_u32;
|
|
|
|
if (pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR) != 0 &&
|
|
(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) &
|
|
NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE)) {
|
|
/* TODO: emulate zfactor when z_perspective true, i.e.
|
|
* w-buffering. Perhaps calculate an additional offset based on
|
|
* triangle orientation in geometry shader and pass the result
|
|
* to fragment shader and add it to gl_FragDepth as well.
|
|
*/
|
|
NV2A_UNIMPLEMENTED("NV_PGRAPH_ZOFFSETFACTOR for w-buffering");
|
|
}
|
|
}
|
|
|
|
uniform1f(&binding->fragment->uniforms,
|
|
binding->uniform_locs.psh.depth_offset, zbias);
|
|
}
|
|
|
|
/* Clipping regions */
|
|
unsigned int max_gl_width = pg->surface_binding_dim.width;
|
|
unsigned int max_gl_height = pg->surface_binding_dim.height;
|
|
pgraph_apply_scaling_factor(pg, &max_gl_width, &max_gl_height);
|
|
|
|
uint32_t clip_regions[8][4];
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
uint32_t x = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPX0 + i * 4);
|
|
unsigned int x_min = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMIN);
|
|
unsigned int x_max = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMAX) + 1;
|
|
uint32_t y = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPY0 + i * 4);
|
|
unsigned int y_min = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMIN);
|
|
unsigned int y_max = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMAX) + 1;
|
|
pgraph_apply_anti_aliasing_factor(pg, &x_min, &y_min);
|
|
pgraph_apply_anti_aliasing_factor(pg, &x_max, &y_max);
|
|
|
|
pgraph_apply_scaling_factor(pg, &x_min, &y_min);
|
|
pgraph_apply_scaling_factor(pg, &x_max, &y_max);
|
|
|
|
clip_regions[i][0] = x_min;
|
|
clip_regions[i][1] = y_min;
|
|
clip_regions[i][2] = x_max;
|
|
clip_regions[i][3] = y_max;
|
|
}
|
|
uniform1iv(&binding->fragment->uniforms,
|
|
binding->uniform_locs.psh.clip_region, 8 * 4,
|
|
(void *)clip_regions);
|
|
|
|
if (binding->uniform_locs.vsh.point_params != -1) {
|
|
uniform1iv(&binding->vertex->uniforms,
|
|
binding->uniform_locs.vsh.point_params,
|
|
ARRAY_SIZE(pg->point_params), (void *)pg->point_params);
|
|
}
|
|
|
|
if (binding->uniform_locs.vsh.material_alpha != -1) {
|
|
uniform1f(&binding->vertex->uniforms,
|
|
binding->uniform_locs.vsh.material_alpha, pg->material_alpha);
|
|
}
|
|
|
|
if (!r->use_push_constants_for_uniform_attrs && state->vsh.uniform_attrs) {
|
|
update_uniform_attr_values(pg, binding);
|
|
}
|
|
}
|
|
|
|
// Quickly check PGRAPH state to see if any registers have changed that
|
|
// necessitate a full shader state inspection.
|
|
static bool check_shaders_dirty(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
if (!r->shader_binding) {
|
|
return true;
|
|
}
|
|
if (pg->program_data_dirty) {
|
|
return true;
|
|
}
|
|
|
|
int num_stages = pgraph_reg_r(pg, NV_PGRAPH_COMBINECTL) & 0xFF;
|
|
for (int i = 0; i < num_stages; i++) {
|
|
if (pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINEALPHAI0 + i * 4) ||
|
|
pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINEALPHAO0 + i * 4) ||
|
|
pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINECOLORI0 + i * 4) ||
|
|
pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINECOLORO0 + i * 4)) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
unsigned int regs[] = {
|
|
NV_PGRAPH_COMBINECTL,
|
|
NV_PGRAPH_COMBINESPECFOG0,
|
|
NV_PGRAPH_COMBINESPECFOG1,
|
|
NV_PGRAPH_CONTROL_0,
|
|
NV_PGRAPH_CONTROL_3,
|
|
NV_PGRAPH_CSV0_C,
|
|
NV_PGRAPH_CSV0_D,
|
|
NV_PGRAPH_CSV1_A,
|
|
NV_PGRAPH_CSV1_B,
|
|
NV_PGRAPH_POINTSIZE,
|
|
NV_PGRAPH_SETUPRASTER,
|
|
NV_PGRAPH_SHADERCLIPMODE,
|
|
NV_PGRAPH_SHADERCTL,
|
|
NV_PGRAPH_SHADERPROG,
|
|
NV_PGRAPH_SHADOWCTL,
|
|
NV_PGRAPH_ZCOMPRESSOCCLUDE,
|
|
};
|
|
for (int i = 0; i < ARRAY_SIZE(regs); i++) {
|
|
if (pgraph_is_reg_dirty(pg, regs[i])) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
ShaderState *state = &r->shader_binding->state;
|
|
if (pg->uniform_attrs != state->vsh.uniform_attrs ||
|
|
pg->swizzle_attrs != state->vsh.swizzle_attrs ||
|
|
pg->compressed_attrs != state->vsh.compressed_attrs ||
|
|
pg->primitive_mode != state->vsh.primitive_mode ||
|
|
pg->surface_scale_factor != state->vsh.surface_scale_factor) {
|
|
return true;
|
|
}
|
|
|
|
// Textures
|
|
for (int i = 0; i < 4; i++) {
|
|
if (pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXCTL0_0 + i * 4) ||
|
|
pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXFILTER0 + i * 4) ||
|
|
pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXFMT0 + i * 4)) {
|
|
return true;
|
|
}
|
|
|
|
if (pg->vk_renderer_state->shader_binding->state.vsh
|
|
.is_fixed_function &&
|
|
(pg->texture_matrix_enable[i] !=
|
|
pg->vk_renderer_state->shader_binding->state.vsh.fixed_function
|
|
.texture_matrix_enable[i])) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND_NOTDIRTY);
|
|
|
|
return false;
|
|
}
|
|
|
|
void pgraph_vk_bind_shaders(PGRAPHState *pg)
|
|
{
|
|
NV2A_VK_DGROUP_BEGIN("%s", __func__);
|
|
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
r->shader_bindings_changed = false;
|
|
|
|
if (check_shaders_dirty(pg)) {
|
|
ShaderState new_state = pgraph_get_shader_state(pg);
|
|
if (!r->shader_binding || memcmp(&r->shader_binding->state, &new_state, sizeof(ShaderState))) {
|
|
r->shader_binding = gen_shaders(pg, &new_state);
|
|
r->shader_bindings_changed = true;
|
|
}
|
|
}
|
|
|
|
// FIXME: Use dirty bits
|
|
pgraph_vk_update_shader_uniforms(pg);
|
|
|
|
NV2A_VK_DGROUP_END();
|
|
}
|
|
|
|
void pgraph_vk_update_shader_uniforms(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
NV2A_VK_DGROUP_BEGIN("%s", __func__);
|
|
nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND);
|
|
|
|
assert(r->shader_binding);
|
|
ShaderBinding *binding = r->shader_binding;
|
|
ShaderUniformLayout *layouts[] = { &binding->vertex->uniforms,
|
|
&binding->fragment->uniforms };
|
|
shader_update_constants(pg, r->shader_binding);
|
|
|
|
for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
|
|
uint64_t hash = fast_hash(layouts[i]->allocation, layouts[i]->total_size);
|
|
r->uniforms_changed |= (hash != r->uniform_buffer_hashes[i]);
|
|
r->uniform_buffer_hashes[i] = hash;
|
|
}
|
|
|
|
nv2a_profile_inc_counter(r->uniforms_changed ?
|
|
NV2A_PROF_SHADER_UBO_DIRTY :
|
|
NV2A_PROF_SHADER_UBO_NOTDIRTY);
|
|
|
|
NV2A_VK_DGROUP_END();
|
|
}
|
|
|
|
void pgraph_vk_init_shaders(PGRAPHState *pg)
|
|
{
|
|
PGRAPHVkState *r = pg->vk_renderer_state;
|
|
|
|
pgraph_vk_init_glsl_compiler();
|
|
create_descriptor_pool(pg);
|
|
create_descriptor_set_layout(pg);
|
|
create_descriptor_sets(pg);
|
|
shader_cache_init(pg);
|
|
|
|
r->use_push_constants_for_uniform_attrs =
|
|
(r->device_props.limits.maxPushConstantsSize >=
|
|
MAX_UNIFORM_ATTR_VALUES_SIZE);
|
|
}
|
|
|
|
void pgraph_vk_finalize_shaders(PGRAPHState *pg)
|
|
{
|
|
shader_cache_finalize(pg);
|
|
destroy_descriptor_sets(pg);
|
|
destroy_descriptor_set_layout(pg);
|
|
destroy_descriptor_pool(pg);
|
|
pgraph_vk_finalize_glsl_compiler();
|
|
}
|