xemu/hw/xbox/nv2a/pgraph/vk/shaders.c

922 lines
34 KiB
C

/*
* Geforce NV2A PGRAPH Vulkan Renderer
*
* Copyright (c) 2024 Matt Borgerson
*
* Based on GL implementation:
*
* Copyright (c) 2015 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2018-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "hw/xbox/nv2a/pgraph/shaders.h"
#include "hw/xbox/nv2a/pgraph/util.h"
#include "hw/xbox/nv2a/pgraph/glsl/geom.h"
#include "hw/xbox/nv2a/pgraph/glsl/vsh.h"
#include "hw/xbox/nv2a/pgraph/glsl/psh.h"
#include "qemu/fast-hash.h"
#include "qemu/mstring.h"
#include "renderer.h"
#include <locale.h>
#define VSH_UBO_BINDING 0
#define PSH_UBO_BINDING 1
#define PSH_TEX_BINDING 2
const size_t MAX_UNIFORM_ATTR_VALUES_SIZE = NV2A_VERTEXSHADER_ATTRIBUTES * 4 * sizeof(float);
static void create_descriptor_pool(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
size_t num_sets = ARRAY_SIZE(r->descriptor_sets);
VkDescriptorPoolSize pool_sizes[] = {
{
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.descriptorCount = 2 * num_sets,
},
{
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = NV2A_MAX_TEXTURES * num_sets,
}
};
VkDescriptorPoolCreateInfo pool_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.poolSizeCount = ARRAY_SIZE(pool_sizes),
.pPoolSizes = pool_sizes,
.maxSets = ARRAY_SIZE(r->descriptor_sets),
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
};
VK_CHECK(vkCreateDescriptorPool(r->device, &pool_info, NULL,
&r->descriptor_pool));
}
static void destroy_descriptor_pool(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
vkDestroyDescriptorPool(r->device, r->descriptor_pool, NULL);
r->descriptor_pool = VK_NULL_HANDLE;
}
static void create_descriptor_set_layout(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
VkDescriptorSetLayoutBinding bindings[2 + NV2A_MAX_TEXTURES];
bindings[0] = (VkDescriptorSetLayoutBinding){
.binding = VSH_UBO_BINDING,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
};
bindings[1] = (VkDescriptorSetLayoutBinding){
.binding = PSH_UBO_BINDING,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
};
for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
bindings[2 + i] = (VkDescriptorSetLayoutBinding){
.binding = PSH_TEX_BINDING + i,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
};
}
VkDescriptorSetLayoutCreateInfo layout_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.bindingCount = ARRAY_SIZE(bindings),
.pBindings = bindings,
};
VK_CHECK(vkCreateDescriptorSetLayout(r->device, &layout_info, NULL,
&r->descriptor_set_layout));
}
static void destroy_descriptor_set_layout(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
vkDestroyDescriptorSetLayout(r->device, r->descriptor_set_layout, NULL);
r->descriptor_set_layout = VK_NULL_HANDLE;
}
static void create_descriptor_sets(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
VkDescriptorSetLayout layouts[ARRAY_SIZE(r->descriptor_sets)];
for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
layouts[i] = r->descriptor_set_layout;
}
VkDescriptorSetAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.descriptorPool = r->descriptor_pool,
.descriptorSetCount = ARRAY_SIZE(r->descriptor_sets),
.pSetLayouts = layouts,
};
VK_CHECK(
vkAllocateDescriptorSets(r->device, &alloc_info, r->descriptor_sets));
}
static void destroy_descriptor_sets(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
vkFreeDescriptorSets(r->device, r->descriptor_pool,
ARRAY_SIZE(r->descriptor_sets), r->descriptor_sets);
for (int i = 0; i < ARRAY_SIZE(r->descriptor_sets); i++) {
r->descriptor_sets[i] = VK_NULL_HANDLE;
}
}
void pgraph_vk_update_descriptor_sets(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
bool need_uniform_write =
r->uniforms_changed ||
!r->storage_buffers[BUFFER_UNIFORM_STAGING].buffer_offset;
if (!(r->shader_bindings_changed || r->texture_bindings_changed ||
(r->descriptor_set_index == 0) || need_uniform_write)) {
return; // Nothing changed
}
ShaderBinding *binding = r->shader_binding;
ShaderUniformLayout *layouts[] = { &binding->vertex->uniforms,
&binding->fragment->uniforms };
VkDeviceSize ubo_buffer_total_size = 0;
for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
ubo_buffer_total_size += layouts[i]->total_size;
}
bool need_ubo_staging_buffer_reset =
r->uniforms_changed &&
!pgraph_vk_buffer_has_space_for(pg, BUFFER_UNIFORM_STAGING,
ubo_buffer_total_size,
r->device_props.limits.minUniformBufferOffsetAlignment);
bool need_descriptor_write_reset =
(r->descriptor_set_index >= ARRAY_SIZE(r->descriptor_sets));
if (need_descriptor_write_reset || need_ubo_staging_buffer_reset) {
pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE);
need_uniform_write = true;
}
VkWriteDescriptorSet descriptor_writes[2 + NV2A_MAX_TEXTURES];
assert(r->descriptor_set_index < ARRAY_SIZE(r->descriptor_sets));
if (need_uniform_write) {
for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
void *data = layouts[i]->allocation;
VkDeviceSize size = layouts[i]->total_size;
r->uniform_buffer_offsets[i] = pgraph_vk_append_to_buffer(
pg, BUFFER_UNIFORM_STAGING, &data, &size, 1,
r->device_props.limits.minUniformBufferOffsetAlignment);
}
r->uniforms_changed = false;
}
VkDescriptorBufferInfo ubo_buffer_infos[2];
for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
ubo_buffer_infos[i] = (VkDescriptorBufferInfo){
.buffer = r->storage_buffers[BUFFER_UNIFORM].buffer,
.offset = r->uniform_buffer_offsets[i],
.range = layouts[i]->total_size,
};
descriptor_writes[i] = (VkWriteDescriptorSet){
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstSet = r->descriptor_sets[r->descriptor_set_index],
.dstBinding = i == 0 ? VSH_UBO_BINDING : PSH_UBO_BINDING,
.dstArrayElement = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.descriptorCount = 1,
.pBufferInfo = &ubo_buffer_infos[i],
};
}
VkDescriptorImageInfo image_infos[NV2A_MAX_TEXTURES];
for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
image_infos[i] = (VkDescriptorImageInfo){
.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
.imageView = r->texture_bindings[i]->image_view,
.sampler = r->texture_bindings[i]->sampler,
};
descriptor_writes[2 + i] = (VkWriteDescriptorSet){
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstSet = r->descriptor_sets[r->descriptor_set_index],
.dstBinding = PSH_TEX_BINDING + i,
.dstArrayElement = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = 1,
.pImageInfo = &image_infos[i],
};
}
vkUpdateDescriptorSets(r->device, 6, descriptor_writes, 0, NULL);
r->descriptor_set_index++;
}
static void update_shader_constant_locations(ShaderBinding *binding)
{
char tmp[64];
/* lookup fragment shader uniforms */
for (int i = 0; i < 9; i++) {
for (int j = 0; j < 2; j++) {
snprintf(tmp, sizeof(tmp), "c%d_%d", j, i);
binding->uniform_locs.psh.psh_constant[i][j] =
uniform_index(&binding->fragment->uniforms, tmp);
}
}
binding->uniform_locs.psh.alpha_ref =
uniform_index(&binding->fragment->uniforms, "alphaRef");
binding->uniform_locs.psh.fog_color =
uniform_index(&binding->fragment->uniforms, "fogColor");
for (int i = 1; i < NV2A_MAX_TEXTURES; i++) {
snprintf(tmp, sizeof(tmp), "bumpMat%d", i);
binding->uniform_locs.psh.bump_mat[i] =
uniform_index(&binding->fragment->uniforms, tmp);
snprintf(tmp, sizeof(tmp), "bumpScale%d", i);
binding->uniform_locs.psh.bump_scale[i] =
uniform_index(&binding->fragment->uniforms, tmp);
snprintf(tmp, sizeof(tmp), "bumpOffset%d", i);
binding->uniform_locs.psh.bump_offset[i] =
uniform_index(&binding->fragment->uniforms, tmp);
}
for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
snprintf(tmp, sizeof(tmp), "texScale%d", i);
binding->uniform_locs.psh.tex_scale[i] =
uniform_index(&binding->fragment->uniforms, tmp);
}
/* lookup vertex shader uniforms */
binding->uniform_locs.vsh.vsh_constant =
uniform_index(&binding->vertex->uniforms, "c");
binding->uniform_locs.psh.surface_size =
uniform_index(&binding->vertex->uniforms, "surfaceSize");
binding->uniform_locs.vsh.clip_range =
uniform_index(&binding->vertex->uniforms, "clipRange");
binding->uniform_locs.psh.clip_range =
uniform_index(&binding->fragment->uniforms, "clipRange");
binding->uniform_locs.psh.depth_offset =
uniform_index(&binding->fragment->uniforms, "depthOffset");
binding->uniform_locs.vsh.fog_param =
uniform_index(&binding->vertex->uniforms, "fogParam");
binding->uniform_locs.vsh.ltctxa =
uniform_index(&binding->vertex->uniforms, "ltctxa");
binding->uniform_locs.vsh.ltctxb =
uniform_index(&binding->vertex->uniforms, "ltctxb");
binding->uniform_locs.vsh.ltc1 =
uniform_index(&binding->vertex->uniforms, "ltc1");
for (int i = 0; i < NV2A_MAX_LIGHTS; i++) {
snprintf(tmp, sizeof(tmp), "lightInfiniteHalfVector%d", i);
binding->uniform_locs.vsh.light_infinite_half_vector[i] =
uniform_index(&binding->vertex->uniforms, tmp);
snprintf(tmp, sizeof(tmp), "lightInfiniteDirection%d", i);
binding->uniform_locs.vsh.light_infinite_direction[i] =
uniform_index(&binding->vertex->uniforms, tmp);
snprintf(tmp, sizeof(tmp), "lightLocalPosition%d", i);
binding->uniform_locs.vsh.light_local_position[i] =
uniform_index(&binding->vertex->uniforms, tmp);
snprintf(tmp, sizeof(tmp), "lightLocalAttenuation%d", i);
binding->uniform_locs.vsh.light_local_attenuation[i] =
uniform_index(&binding->vertex->uniforms, tmp);
}
binding->uniform_locs.psh.clip_region =
uniform_index(&binding->fragment->uniforms, "clipRegion");
binding->uniform_locs.vsh.point_params =
uniform_index(&binding->vertex->uniforms, "pointParams");
binding->uniform_locs.vsh.material_alpha =
uniform_index(&binding->vertex->uniforms, "material_alpha");
binding->uniform_locs.psh.color_key =
uniform_index(&binding->fragment->uniforms, "colorKey");
binding->uniform_locs.psh.color_key_mask =
uniform_index(&binding->fragment->uniforms, "colorKeyMask");
binding->uniform_locs.vsh.uniform_attrs =
uniform_index(&binding->vertex->uniforms, "inlineValue");
binding->uniform_locs.vsh.specular_power =
uniform_index(&binding->vertex->uniforms, "specularPower");
}
static void shader_cache_entry_init(Lru *lru, LruNode *node, void *state)
{
ShaderBinding *snode = container_of(node, ShaderBinding, node);
memcpy(&snode->state, state, sizeof(ShaderState));
snode->initialized = false;
}
static void shader_cache_entry_post_evict(Lru *lru, LruNode *node)
{
PGRAPHVkState *r = container_of(lru, PGRAPHVkState, shader_cache);
ShaderBinding *snode = container_of(node, ShaderBinding, node);
ShaderModuleInfo *modules[] = {
snode->geometry,
snode->vertex,
snode->fragment,
};
for (int i = 0; i < ARRAY_SIZE(modules); i++) {
if (modules[i]) {
pgraph_vk_destroy_shader_module(r, modules[i]);
}
}
snode->initialized = false;
}
static bool shader_cache_entry_compare(Lru *lru, LruNode *node, void *key)
{
ShaderBinding *snode = container_of(node, ShaderBinding, node);
return memcmp(&snode->state, key, sizeof(ShaderState));
}
static void shader_cache_init(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
const size_t shader_cache_size = 1024;
lru_init(&r->shader_cache);
r->shader_cache_entries = g_malloc_n(shader_cache_size, sizeof(ShaderBinding));
assert(r->shader_cache_entries != NULL);
for (int i = 0; i < shader_cache_size; i++) {
lru_add_free(&r->shader_cache, &r->shader_cache_entries[i].node);
}
r->shader_cache.init_node = shader_cache_entry_init;
r->shader_cache.compare_nodes = shader_cache_entry_compare;
r->shader_cache.post_node_evict = shader_cache_entry_post_evict;
}
static void shader_cache_finalize(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
lru_flush(&r->shader_cache);
g_free(r->shader_cache_entries);
r->shader_cache_entries = NULL;
}
static ShaderBinding *gen_shaders(PGRAPHState *pg, ShaderState *state)
{
PGRAPHVkState *r = pg->vk_renderer_state;
uint64_t hash = fast_hash((void *)state, sizeof(*state));
LruNode *node = lru_lookup(&r->shader_cache, hash, state);
ShaderBinding *snode = container_of(node, ShaderBinding, node);
NV2A_VK_DPRINTF("shader state hash: %016" PRIx64 " %p", hash, snode);
if (!snode->initialized) {
NV2A_VK_DPRINTF("cache miss");
nv2a_profile_inc_counter(NV2A_PROF_SHADER_GEN);
char *previous_numeric_locale = setlocale(LC_NUMERIC, NULL);
if (previous_numeric_locale) {
previous_numeric_locale = g_strdup(previous_numeric_locale);
}
/* Ensure numeric values are printed with '.' radix, no grouping */
setlocale(LC_NUMERIC, "C");
MString *geometry_shader_code = pgraph_gen_geom_glsl(
state->vsh.polygon_front_mode, state->vsh.polygon_back_mode,
state->vsh.primitive_mode, state->vsh.smooth_shading, true);
if (geometry_shader_code) {
NV2A_VK_DPRINTF("geometry shader: \n%s",
mstring_get_str(geometry_shader_code));
snode->geometry = pgraph_vk_create_shader_module_from_glsl(
r, VK_SHADER_STAGE_GEOMETRY_BIT,
mstring_get_str(geometry_shader_code));
mstring_unref(geometry_shader_code);
} else {
snode->geometry = NULL;
}
MString *vertex_shader_code = pgraph_gen_vsh_glsl(
&state->vsh, (GenVshGlslOptions){
.vulkan = true,
.prefix_outputs = geometry_shader_code != NULL,
.use_push_constants_for_uniform_attrs =
r->use_push_constants_for_uniform_attrs,
.ubo_binding = VSH_UBO_BINDING,
});
NV2A_VK_DPRINTF("vertex shader: \n%s",
mstring_get_str(vertex_shader_code));
snode->vertex = pgraph_vk_create_shader_module_from_glsl(
r, VK_SHADER_STAGE_VERTEX_BIT,
mstring_get_str(vertex_shader_code));
mstring_unref(vertex_shader_code);
MString *fragment_shader_code = pgraph_gen_psh_glsl(
state->psh, (GenPshGlslOptions){
.vulkan = true,
.ubo_binding = PSH_UBO_BINDING,
.tex_binding = PSH_TEX_BINDING,
});
NV2A_VK_DPRINTF("fragment shader: \n%s",
mstring_get_str(fragment_shader_code));
snode->fragment = pgraph_vk_create_shader_module_from_glsl(
r, VK_SHADER_STAGE_FRAGMENT_BIT,
mstring_get_str(fragment_shader_code));
mstring_unref(fragment_shader_code);
if (previous_numeric_locale) {
setlocale(LC_NUMERIC, previous_numeric_locale);
g_free(previous_numeric_locale);
}
update_shader_constant_locations(snode);
snode->initialized = true;
}
return snode;
}
static void update_uniform_attr_values(PGRAPHState *pg, ShaderBinding *binding)
{
float values[NV2A_VERTEXSHADER_ATTRIBUTES][4];
int num_uniform_attrs = 0;
pgraph_get_inline_values(pg, binding->state.vsh.uniform_attrs, values,
&num_uniform_attrs);
if (num_uniform_attrs > 0) {
uniform1fv(&binding->vertex->uniforms,
binding->uniform_locs.vsh.uniform_attrs,
num_uniform_attrs * 4, &values[0][0]);
}
}
// FIXME: Move to common
static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding)
{
PGRAPHVkState *r = pg->vk_renderer_state;
ShaderState *state = &binding->state;
/* update combiner constants */
for (int i = 0; i < 9; i++) {
uint32_t constant[2];
if (i == 8) {
/* final combiner */
constant[0] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR0);
constant[1] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR1);
} else {
constant[0] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR0 + i * 4);
constant[1] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR1 + i * 4);
}
for (int j = 0; j < 2; j++) {
GLint loc = binding->uniform_locs.psh.psh_constant[i][j];
if (loc != -1) {
float value[4];
pgraph_argb_pack32_to_rgba_float(constant[j], value);
uniform1fv(&binding->fragment->uniforms, loc, 4, value);
}
}
}
if (binding->uniform_locs.psh.alpha_ref != -1) {
int alpha_ref = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0),
NV_PGRAPH_CONTROL_0_ALPHAREF);
uniform1i(&binding->fragment->uniforms,
binding->uniform_locs.psh.alpha_ref, alpha_ref);
}
if (binding->uniform_locs.psh.color_key != -1) {
uint32_t color_key_colors[4] = {
pgraph_reg_r(pg, NV_PGRAPH_COLORKEYCOLOR0),
pgraph_reg_r(pg, NV_PGRAPH_COLORKEYCOLOR1),
pgraph_reg_r(pg, NV_PGRAPH_COLORKEYCOLOR2),
pgraph_reg_r(pg, NV_PGRAPH_COLORKEYCOLOR3),
};
uniform1uiv(&binding->fragment->uniforms,
binding->uniform_locs.psh.color_key, 4, color_key_colors);
}
uint32_t color_key_mask[4] = { 0 };
/* For each texture stage */
for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
int loc;
/* Bump luminance only during stages 1 - 3 */
if (i > 0) {
loc = binding->uniform_locs.psh.bump_mat[i];
if (loc != -1) {
uint32_t m_u32[4];
m_u32[0] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT00 + 4 * (i - 1));
m_u32[1] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT01 + 4 * (i - 1));
m_u32[2] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT10 + 4 * (i - 1));
m_u32[3] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT11 + 4 * (i - 1));
float m[4];
m[0] = *(float *)&m_u32[0];
m[1] = *(float *)&m_u32[1];
m[2] = *(float *)&m_u32[2];
m[3] = *(float *)&m_u32[3];
uniformMatrix2fv(&binding->fragment->uniforms, loc, m);
}
loc = binding->uniform_locs.psh.bump_scale[i];
if (loc != -1) {
uint32_t v =
pgraph_reg_r(pg, NV_PGRAPH_BUMPSCALE1 + (i - 1) * 4);
uniform1f(&binding->fragment->uniforms, loc, *(float *)&v);
}
loc = binding->uniform_locs.psh.bump_offset[i];
if (loc != -1) {
uint32_t v =
pgraph_reg_r(pg, NV_PGRAPH_BUMPOFFSET1 + (i - 1) * 4);
uniform1f(&binding->fragment->uniforms, loc, *(float *)&v);
}
}
loc = binding->uniform_locs.psh.tex_scale[i];
if (loc != -1) {
assert(pg->vk_renderer_state->texture_bindings[i] != NULL);
float scale = pg->vk_renderer_state->texture_bindings[i]->key.scale;
BasicColorFormatInfo f_basic =
kelvin_color_format_info_map[pg->vk_renderer_state
->texture_bindings[i]
->key.state.color_format];
if (!f_basic.linear) {
scale = 1.0;
}
uniform1f(&binding->fragment->uniforms, loc, scale);
}
color_key_mask[i] = pgraph_get_color_key_mask_for_texture(pg, i);
}
if (binding->uniform_locs.psh.color_key_mask != -1) {
uniform1uiv(&binding->fragment->uniforms,
binding->uniform_locs.psh.color_key_mask, 4,
color_key_mask);
}
if (binding->uniform_locs.psh.fog_color != -1) {
uint32_t fog_color = pgraph_reg_r(pg, NV_PGRAPH_FOGCOLOR);
uniform4f(&binding->fragment->uniforms,
binding->uniform_locs.psh.fog_color,
GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_RED) / 255.0,
GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_GREEN) / 255.0,
GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_BLUE) / 255.0,
GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_ALPHA) / 255.0);
}
if (binding->uniform_locs.vsh.fog_param != -1) {
uint32_t v[2];
v[0] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM0);
v[1] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM1);
uniform2f(&binding->vertex->uniforms,
binding->uniform_locs.vsh.fog_param, *(float *)&v[0],
*(float *)&v[1]);
}
float zmax;
switch (pg->surface_shape.zeta_format) {
case NV097_SET_SURFACE_FORMAT_ZETA_Z16:
zmax = pg->surface_shape.z_format ? f16_max : (float)0xFFFF;
break;
case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8:
zmax = pg->surface_shape.z_format ? f24_max : (float)0xFFFFFF;
break;
default:
assert(0);
}
if (binding->state.vsh.is_fixed_function) {
/* update lighting constants */
struct {
uint32_t *v;
int locs;
size_t len;
} lighting_arrays[] = {
{ &pg->ltctxa[0][0], binding->uniform_locs.vsh.ltctxa,
NV2A_LTCTXA_COUNT },
{ &pg->ltctxb[0][0], binding->uniform_locs.vsh.ltctxb,
NV2A_LTCTXB_COUNT },
{ &pg->ltc1[0][0], binding->uniform_locs.vsh.ltc1,
NV2A_LTC1_COUNT },
};
for (int i = 0; i < ARRAY_SIZE(lighting_arrays); i++) {
uniform1iv(&binding->vertex->uniforms, lighting_arrays[i].locs,
lighting_arrays[i].len * 4,
(void *)lighting_arrays[i].v);
}
for (int i = 0; i < NV2A_MAX_LIGHTS; i++) {
int loc = binding->uniform_locs.vsh.light_infinite_half_vector[i];
if (loc != -1) {
uniform1fv(&binding->vertex->uniforms, loc, 3,
pg->light_infinite_half_vector[i]);
}
loc = binding->uniform_locs.vsh.light_infinite_direction[i];
if (loc != -1) {
uniform1fv(&binding->vertex->uniforms, loc, 3,
pg->light_infinite_direction[i]);
}
loc = binding->uniform_locs.vsh.light_local_position[i];
if (loc != -1) {
uniform1fv(&binding->vertex->uniforms, loc, 3,
pg->light_local_position[i]);
}
loc = binding->uniform_locs.vsh.light_local_attenuation[i];
if (loc != -1) {
uniform1fv(&binding->vertex->uniforms, loc, 3,
pg->light_local_attenuation[i]);
}
}
if (binding->uniform_locs.vsh.specular_power != -1) {
uniform1f(&binding->vertex->uniforms,
binding->uniform_locs.vsh.specular_power,
pg->specular_power);
}
}
/* update vertex program constants */
uniform1iv(&binding->vertex->uniforms,
binding->uniform_locs.vsh.vsh_constant,
NV2A_VERTEXSHADER_CONSTANTS * 4, (void *)pg->vsh_constants);
if (binding->uniform_locs.psh.surface_size != -1) {
unsigned int aa_width = 1, aa_height = 1;
pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height);
uniform2f(&binding->vertex->uniforms,
binding->uniform_locs.psh.surface_size,
pg->surface_binding_dim.width / aa_width,
pg->surface_binding_dim.height / aa_height);
}
if (binding->uniform_locs.vsh.clip_range != -1 ||
binding->uniform_locs.psh.clip_range != -1) {
uint32_t v[2];
v[0] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMIN);
v[1] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMAX);
float zclip_min = *(float *)&v[0];
float zclip_max = *(float *)&v[1];
if (binding->uniform_locs.vsh.clip_range != -1) {
uniform4f(&binding->vertex->uniforms,
binding->uniform_locs.vsh.clip_range, 0, zmax, zclip_min,
zclip_max);
}
if (binding->uniform_locs.psh.clip_range != -1) {
uniform4f(&binding->fragment->uniforms,
binding->uniform_locs.psh.clip_range, 0, zmax, zclip_min,
zclip_max);
}
}
if (binding->uniform_locs.psh.depth_offset != -1) {
float zbias = 0.0f;
if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
(NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE |
NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE |
NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) {
uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS);
zbias = *(float *)&zbias_u32;
if (pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR) != 0 &&
(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) &
NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE)) {
/* TODO: emulate zfactor when z_perspective true, i.e.
* w-buffering. Perhaps calculate an additional offset based on
* triangle orientation in geometry shader and pass the result
* to fragment shader and add it to gl_FragDepth as well.
*/
NV2A_UNIMPLEMENTED("NV_PGRAPH_ZOFFSETFACTOR for w-buffering");
}
}
uniform1f(&binding->fragment->uniforms,
binding->uniform_locs.psh.depth_offset, zbias);
}
/* Clipping regions */
unsigned int max_gl_width = pg->surface_binding_dim.width;
unsigned int max_gl_height = pg->surface_binding_dim.height;
pgraph_apply_scaling_factor(pg, &max_gl_width, &max_gl_height);
uint32_t clip_regions[8][4];
for (int i = 0; i < 8; i++) {
uint32_t x = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPX0 + i * 4);
unsigned int x_min = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMIN);
unsigned int x_max = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMAX) + 1;
uint32_t y = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPY0 + i * 4);
unsigned int y_min = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMIN);
unsigned int y_max = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMAX) + 1;
pgraph_apply_anti_aliasing_factor(pg, &x_min, &y_min);
pgraph_apply_anti_aliasing_factor(pg, &x_max, &y_max);
pgraph_apply_scaling_factor(pg, &x_min, &y_min);
pgraph_apply_scaling_factor(pg, &x_max, &y_max);
clip_regions[i][0] = x_min;
clip_regions[i][1] = y_min;
clip_regions[i][2] = x_max;
clip_regions[i][3] = y_max;
}
uniform1iv(&binding->fragment->uniforms,
binding->uniform_locs.psh.clip_region, 8 * 4,
(void *)clip_regions);
if (binding->uniform_locs.vsh.point_params != -1) {
uniform1iv(&binding->vertex->uniforms,
binding->uniform_locs.vsh.point_params,
ARRAY_SIZE(pg->point_params), (void *)pg->point_params);
}
if (binding->uniform_locs.vsh.material_alpha != -1) {
uniform1f(&binding->vertex->uniforms,
binding->uniform_locs.vsh.material_alpha, pg->material_alpha);
}
if (!r->use_push_constants_for_uniform_attrs && state->vsh.uniform_attrs) {
update_uniform_attr_values(pg, binding);
}
}
// Quickly check PGRAPH state to see if any registers have changed that
// necessitate a full shader state inspection.
static bool check_shaders_dirty(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
if (!r->shader_binding) {
return true;
}
if (pg->program_data_dirty) {
return true;
}
int num_stages = pgraph_reg_r(pg, NV_PGRAPH_COMBINECTL) & 0xFF;
for (int i = 0; i < num_stages; i++) {
if (pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINEALPHAI0 + i * 4) ||
pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINEALPHAO0 + i * 4) ||
pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINECOLORI0 + i * 4) ||
pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINECOLORO0 + i * 4)) {
return true;
}
}
unsigned int regs[] = {
NV_PGRAPH_COMBINECTL,
NV_PGRAPH_COMBINESPECFOG0,
NV_PGRAPH_COMBINESPECFOG1,
NV_PGRAPH_CONTROL_0,
NV_PGRAPH_CONTROL_3,
NV_PGRAPH_CSV0_C,
NV_PGRAPH_CSV0_D,
NV_PGRAPH_CSV1_A,
NV_PGRAPH_CSV1_B,
NV_PGRAPH_POINTSIZE,
NV_PGRAPH_SETUPRASTER,
NV_PGRAPH_SHADERCLIPMODE,
NV_PGRAPH_SHADERCTL,
NV_PGRAPH_SHADERPROG,
NV_PGRAPH_SHADOWCTL,
NV_PGRAPH_ZCOMPRESSOCCLUDE,
};
for (int i = 0; i < ARRAY_SIZE(regs); i++) {
if (pgraph_is_reg_dirty(pg, regs[i])) {
return true;
}
}
ShaderState *state = &r->shader_binding->state;
if (pg->uniform_attrs != state->vsh.uniform_attrs ||
pg->swizzle_attrs != state->vsh.swizzle_attrs ||
pg->compressed_attrs != state->vsh.compressed_attrs ||
pg->primitive_mode != state->vsh.primitive_mode ||
pg->surface_scale_factor != state->vsh.surface_scale_factor) {
return true;
}
// Textures
for (int i = 0; i < 4; i++) {
if (pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXCTL0_0 + i * 4) ||
pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXFILTER0 + i * 4) ||
pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXFMT0 + i * 4)) {
return true;
}
if (pg->vk_renderer_state->shader_binding->state.vsh
.is_fixed_function &&
(pg->texture_matrix_enable[i] !=
pg->vk_renderer_state->shader_binding->state.vsh.fixed_function
.texture_matrix_enable[i])) {
return true;
}
}
nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND_NOTDIRTY);
return false;
}
void pgraph_vk_bind_shaders(PGRAPHState *pg)
{
NV2A_VK_DGROUP_BEGIN("%s", __func__);
PGRAPHVkState *r = pg->vk_renderer_state;
r->shader_bindings_changed = false;
if (check_shaders_dirty(pg)) {
ShaderState new_state = pgraph_get_shader_state(pg);
if (!r->shader_binding || memcmp(&r->shader_binding->state, &new_state, sizeof(ShaderState))) {
r->shader_binding = gen_shaders(pg, &new_state);
r->shader_bindings_changed = true;
}
}
// FIXME: Use dirty bits
pgraph_vk_update_shader_uniforms(pg);
NV2A_VK_DGROUP_END();
}
void pgraph_vk_update_shader_uniforms(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
NV2A_VK_DGROUP_BEGIN("%s", __func__);
nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND);
assert(r->shader_binding);
ShaderBinding *binding = r->shader_binding;
ShaderUniformLayout *layouts[] = { &binding->vertex->uniforms,
&binding->fragment->uniforms };
shader_update_constants(pg, r->shader_binding);
for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
uint64_t hash = fast_hash(layouts[i]->allocation, layouts[i]->total_size);
r->uniforms_changed |= (hash != r->uniform_buffer_hashes[i]);
r->uniform_buffer_hashes[i] = hash;
}
nv2a_profile_inc_counter(r->uniforms_changed ?
NV2A_PROF_SHADER_UBO_DIRTY :
NV2A_PROF_SHADER_UBO_NOTDIRTY);
NV2A_VK_DGROUP_END();
}
void pgraph_vk_init_shaders(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
pgraph_vk_init_glsl_compiler();
create_descriptor_pool(pg);
create_descriptor_set_layout(pg);
create_descriptor_sets(pg);
shader_cache_init(pg);
r->use_push_constants_for_uniform_attrs =
(r->device_props.limits.maxPushConstantsSize >=
MAX_UNIFORM_ATTR_VALUES_SIZE);
}
void pgraph_vk_finalize_shaders(PGRAPHState *pg)
{
shader_cache_finalize(pg);
destroy_descriptor_sets(pg);
destroy_descriptor_set_layout(pg);
destroy_descriptor_pool(pg);
pgraph_vk_finalize_glsl_compiler();
}