/* * Geforce NV2A PGRAPH OpenGL Renderer * * Copyright (c) 2015 espes * Copyright (c) 2015 Jannik Vogel * Copyright (c) 2020-2024 Matt Borgerson * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, see . */ #include "qemu/osdep.h" #include "qemu/fast-hash.h" #include "qemu/mstring.h" #include #include "xemu-version.h" #include "ui/xemu-settings.h" #include "hw/xbox/nv2a/pgraph/glsl/geom.h" #include "hw/xbox/nv2a/pgraph/glsl/vsh.h" #include "hw/xbox/nv2a/pgraph/glsl/psh.h" #include "hw/xbox/nv2a/pgraph/shaders.h" #include "hw/xbox/nv2a/pgraph/util.h" #include "debug.h" #include "renderer.h" static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, bool binding_changed, bool vertex_program, bool fixed_function); static GLenum get_gl_primitive_mode(enum ShaderPolygonMode polygon_mode, enum ShaderPrimitiveMode primitive_mode) { if (polygon_mode == POLY_MODE_POINT) { return GL_POINTS; } switch (primitive_mode) { case PRIM_TYPE_POINTS: return GL_POINTS; case PRIM_TYPE_LINES: return GL_LINES; case PRIM_TYPE_LINE_LOOP: return GL_LINE_LOOP; case PRIM_TYPE_LINE_STRIP: return GL_LINE_STRIP; case PRIM_TYPE_TRIANGLES: return GL_TRIANGLES; case PRIM_TYPE_TRIANGLE_STRIP: return GL_TRIANGLE_STRIP; case PRIM_TYPE_TRIANGLE_FAN: return GL_TRIANGLE_FAN; case PRIM_TYPE_QUADS: return GL_LINES_ADJACENCY; case PRIM_TYPE_QUAD_STRIP: return GL_LINE_STRIP_ADJACENCY; case PRIM_TYPE_POLYGON: if (polygon_mode == POLY_MODE_LINE) { return GL_LINE_LOOP; } else if (polygon_mode == POLY_MODE_FILL) { return GL_TRIANGLE_FAN; } assert(!"PRIM_TYPE_POLYGON with invalid polygon_mode"); return 0; default: assert(!"Invalid primitive_mode"); return 0; } } static GLuint create_gl_shader(GLenum gl_shader_type, const char *code, const char *name) { GLint compiled = 0; NV2A_GL_DGROUP_BEGIN("Creating new %s", name); NV2A_DPRINTF("compile new %s, code:\n%s\n", name, code); GLuint shader = glCreateShader(gl_shader_type); glShaderSource(shader, 1, &code, 0); glCompileShader(shader); /* Check it compiled */ compiled = 0; glGetShaderiv(shader, GL_COMPILE_STATUS, &compiled); if (!compiled) { GLchar* log; GLint log_length; glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length); log = g_malloc(log_length * sizeof(GLchar)); glGetShaderInfoLog(shader, log_length, NULL, log); fprintf(stderr, "%s\n\n" "nv2a: %s compilation failed: %s\n", code, name, log); g_free(log); NV2A_GL_DGROUP_END(); abort(); } NV2A_GL_DGROUP_END(); return shader; } static void update_shader_constant_locations(ShaderBinding *binding, const ShaderState *state) { int i, j; char tmp[64]; /* set texture samplers */ for (i = 0; i < NV2A_MAX_TEXTURES; i++) { char samplerName[16]; snprintf(samplerName, sizeof(samplerName), "texSamp%d", i); GLint texSampLoc = glGetUniformLocation(binding->gl_program, samplerName); if (texSampLoc >= 0) { glUniform1i(texSampLoc, i); } } /* validate the program */ glValidateProgram(binding->gl_program); GLint valid = 0; glGetProgramiv(binding->gl_program, GL_VALIDATE_STATUS, &valid); if (!valid) { GLchar log[1024]; glGetProgramInfoLog(binding->gl_program, 1024, NULL, log); fprintf(stderr, "nv2a: shader validation failed: %s\n", log); abort(); } /* lookup fragment shader uniforms */ for (i = 0; i < 9; i++) { for (j = 0; j < 2; j++) { snprintf(tmp, sizeof(tmp), "c%d_%d", j, i); binding->psh_constant_loc[i][j] = glGetUniformLocation(binding->gl_program, tmp); } } binding->alpha_ref_loc = glGetUniformLocation(binding->gl_program, "alphaRef"); for (i = 1; i < NV2A_MAX_TEXTURES; i++) { snprintf(tmp, sizeof(tmp), "bumpMat%d", i); binding->bump_mat_loc[i] = glGetUniformLocation(binding->gl_program, tmp); snprintf(tmp, sizeof(tmp), "bumpScale%d", i); binding->bump_scale_loc[i] = glGetUniformLocation(binding->gl_program, tmp); snprintf(tmp, sizeof(tmp), "bumpOffset%d", i); binding->bump_offset_loc[i] = glGetUniformLocation(binding->gl_program, tmp); } for (int i = 0; i < NV2A_MAX_TEXTURES; i++) { snprintf(tmp, sizeof(tmp), "texScale%d", i); binding->tex_scale_loc[i] = glGetUniformLocation(binding->gl_program, tmp); } /* lookup vertex shader uniforms */ for(i = 0; i < NV2A_VERTEXSHADER_CONSTANTS; i++) { snprintf(tmp, sizeof(tmp), "c[%d]", i); binding->vsh_constant_loc[i] = glGetUniformLocation(binding->gl_program, tmp); } binding->surface_size_loc = glGetUniformLocation(binding->gl_program, "surfaceSize"); binding->clip_range_loc = glGetUniformLocation(binding->gl_program, "clipRange"); binding->fog_color_loc = glGetUniformLocation(binding->gl_program, "fogColor"); binding->fog_param_loc = glGetUniformLocation(binding->gl_program, "fogParam"); binding->inv_viewport_loc = glGetUniformLocation(binding->gl_program, "invViewport"); for (i = 0; i < NV2A_LTCTXA_COUNT; i++) { snprintf(tmp, sizeof(tmp), "ltctxa[%d]", i); binding->ltctxa_loc[i] = glGetUniformLocation(binding->gl_program, tmp); } for (i = 0; i < NV2A_LTCTXB_COUNT; i++) { snprintf(tmp, sizeof(tmp), "ltctxb[%d]", i); binding->ltctxb_loc[i] = glGetUniformLocation(binding->gl_program, tmp); } for (i = 0; i < NV2A_LTC1_COUNT; i++) { snprintf(tmp, sizeof(tmp), "ltc1[%d]", i); binding->ltc1_loc[i] = glGetUniformLocation(binding->gl_program, tmp); } for (i = 0; i < NV2A_MAX_LIGHTS; i++) { snprintf(tmp, sizeof(tmp), "lightInfiniteHalfVector%d", i); binding->light_infinite_half_vector_loc[i] = glGetUniformLocation(binding->gl_program, tmp); snprintf(tmp, sizeof(tmp), "lightInfiniteDirection%d", i); binding->light_infinite_direction_loc[i] = glGetUniformLocation(binding->gl_program, tmp); snprintf(tmp, sizeof(tmp), "lightLocalPosition%d", i); binding->light_local_position_loc[i] = glGetUniformLocation(binding->gl_program, tmp); snprintf(tmp, sizeof(tmp), "lightLocalAttenuation%d", i); binding->light_local_attenuation_loc[i] = glGetUniformLocation(binding->gl_program, tmp); } for (i = 0; i < 8; i++) { snprintf(tmp, sizeof(tmp), "clipRegion[%d]", i); binding->clip_region_loc[i] = glGetUniformLocation(binding->gl_program, tmp); } if (state->fixed_function) { binding->material_alpha_loc = glGetUniformLocation(binding->gl_program, "material_alpha"); } else { binding->material_alpha_loc = -1; } } static ShaderBinding *generate_shaders(const ShaderState *state) { char *previous_numeric_locale = setlocale(LC_NUMERIC, NULL); if (previous_numeric_locale) { previous_numeric_locale = g_strdup(previous_numeric_locale); } /* Ensure numeric values are printed with '.' radix, no grouping */ setlocale(LC_NUMERIC, "C"); GLuint program = glCreateProgram(); /* Create an optional geometry shader and find primitive type */ GLenum gl_primitive_mode = get_gl_primitive_mode(state->polygon_front_mode, state->primitive_mode); MString* geometry_shader_code = pgraph_gen_geom_glsl(state->polygon_front_mode, state->polygon_back_mode, state->primitive_mode, state->smooth_shading, false); if (geometry_shader_code) { const char* geometry_shader_code_str = mstring_get_str(geometry_shader_code); GLuint geometry_shader = create_gl_shader(GL_GEOMETRY_SHADER, geometry_shader_code_str, "geometry shader"); glAttachShader(program, geometry_shader); mstring_unref(geometry_shader_code); } /* create the vertex shader */ MString *vertex_shader_code = pgraph_gen_vsh_glsl(state, geometry_shader_code != NULL); GLuint vertex_shader = create_gl_shader(GL_VERTEX_SHADER, mstring_get_str(vertex_shader_code), "vertex shader"); glAttachShader(program, vertex_shader); mstring_unref(vertex_shader_code); /* generate a fragment shader from register combiners */ MString *fragment_shader_code = pgraph_gen_psh_glsl(state->psh); const char *fragment_shader_code_str = mstring_get_str(fragment_shader_code); GLuint fragment_shader = create_gl_shader(GL_FRAGMENT_SHADER, fragment_shader_code_str, "fragment shader"); glAttachShader(program, fragment_shader); mstring_unref(fragment_shader_code); /* link the program */ glLinkProgram(program); GLint linked = 0; glGetProgramiv(program, GL_LINK_STATUS, &linked); if(!linked) { GLchar log[2048]; glGetProgramInfoLog(program, 2048, NULL, log); fprintf(stderr, "nv2a: shader linking failed: %s\n", log); abort(); } glUseProgram(program); ShaderBinding* ret = g_malloc0(sizeof(ShaderBinding)); ret->gl_program = program; ret->gl_primitive_mode = gl_primitive_mode; update_shader_constant_locations(ret, state); if (previous_numeric_locale) { setlocale(LC_NUMERIC, previous_numeric_locale); g_free(previous_numeric_locale); } return ret; } static const char *shader_gl_vendor = NULL; static void shader_create_cache_folder(void) { char *shader_path = g_strdup_printf("%sshaders", xemu_settings_get_base_path()); qemu_mkdir(shader_path); g_free(shader_path); } static char *shader_get_lru_cache_path(void) { return g_strdup_printf("%s/shader_cache_list", xemu_settings_get_base_path()); } static void shader_write_lru_list_entry_to_disk(Lru *lru, LruNode *node, void *opaque) { FILE *lru_list_file = (FILE*) opaque; size_t written = fwrite(&node->hash, sizeof(uint64_t), 1, lru_list_file); if (written != 1) { fprintf(stderr, "nv2a: Failed to write shader list entry %llx to disk\n", (unsigned long long) node->hash); } } void pgraph_gl_shader_write_cache_reload_list(PGRAPHState *pg) { PGRAPHGLState *r = pg->gl_renderer_state; if (!g_config.perf.cache_shaders) { qatomic_set(&r->shader_cache_writeback_pending, false); qemu_event_set(&r->shader_cache_writeback_complete); return; } char *shader_lru_path = shader_get_lru_cache_path(); qemu_thread_join(&r->shader_disk_thread); FILE *lru_list = qemu_fopen(shader_lru_path, "wb"); g_free(shader_lru_path); if (!lru_list) { fprintf(stderr, "nv2a: Failed to open shader LRU cache for writing\n"); return; } lru_visit_active(&r->shader_cache, shader_write_lru_list_entry_to_disk, lru_list); fclose(lru_list); lru_flush(&r->shader_cache); qatomic_set(&r->shader_cache_writeback_pending, false); qemu_event_set(&r->shader_cache_writeback_complete); } bool pgraph_gl_shader_load_from_memory(ShaderLruNode *snode) { assert(glGetError() == GL_NO_ERROR); if (!snode->program) { return false; } GLuint gl_program = glCreateProgram(); glProgramBinary(gl_program, snode->program_format, snode->program, snode->program_size); GLint gl_error = glGetError(); if (gl_error != GL_NO_ERROR) { NV2A_DPRINTF("failed to load shader binary from disk: GL error code %d\n", gl_error); glDeleteProgram(gl_program); return false; } glValidateProgram(gl_program); GLint valid = 0; glGetProgramiv(gl_program, GL_VALIDATE_STATUS, &valid); if (!valid) { GLchar log[1024]; glGetProgramInfoLog(gl_program, 1024, NULL, log); NV2A_DPRINTF("failed to load shader binary from disk: %s\n", log); glDeleteProgram(gl_program); return false; } glUseProgram(gl_program); ShaderBinding* binding = g_malloc0(sizeof(ShaderBinding)); binding->gl_program = gl_program; binding->gl_primitive_mode = get_gl_primitive_mode(snode->state.polygon_front_mode, snode->state.primitive_mode); snode->binding = binding; g_free(snode->program); snode->program = NULL; update_shader_constant_locations(binding, &snode->state); return true; } static char *shader_get_bin_directory(uint64_t hash) { const char *cfg_dir = xemu_settings_get_base_path(); char *shader_bin_dir = g_strdup_printf("%s/shaders/%04x", cfg_dir, (uint32_t)(hash >> 48)); return shader_bin_dir; } static char *shader_get_binary_path(const char *shader_bin_dir, uint64_t hash) { uint64_t bin_mask = (uint64_t)0xffff << 48; return g_strdup_printf("%s/%012" PRIx64, shader_bin_dir, hash & ~bin_mask); } static void shader_load_from_disk(PGRAPHState *pg, uint64_t hash) { PGRAPHGLState *r = pg->gl_renderer_state; char *shader_bin_dir = shader_get_bin_directory(hash); char *shader_path = shader_get_binary_path(shader_bin_dir, hash); char *cached_xemu_version = NULL; char *cached_gl_vendor = NULL; void *program_buffer = NULL; uint64_t cached_xemu_version_len; uint64_t gl_vendor_len; GLenum program_binary_format; ShaderState state; size_t shader_size; g_free(shader_bin_dir); qemu_mutex_lock(&r->shader_cache_lock); if (lru_contains_hash(&r->shader_cache, hash)) { qemu_mutex_unlock(&r->shader_cache_lock); return; } qemu_mutex_unlock(&r->shader_cache_lock); FILE *shader_file = qemu_fopen(shader_path, "rb"); if (!shader_file) { goto error; } size_t nread; #define READ_OR_ERR(data, data_len) \ do { \ nread = fread(data, data_len, 1, shader_file); \ if (nread != 1) { \ fclose(shader_file); \ goto error; \ } \ } while (0) READ_OR_ERR(&cached_xemu_version_len, sizeof(cached_xemu_version_len)); cached_xemu_version = g_malloc(cached_xemu_version_len +1); READ_OR_ERR(cached_xemu_version, cached_xemu_version_len); if (strcmp(cached_xemu_version, xemu_version) != 0) { fclose(shader_file); goto error; } READ_OR_ERR(&gl_vendor_len, sizeof(gl_vendor_len)); cached_gl_vendor = g_malloc(gl_vendor_len); READ_OR_ERR(cached_gl_vendor, gl_vendor_len); if (strcmp(cached_gl_vendor, shader_gl_vendor) != 0) { fclose(shader_file); goto error; } READ_OR_ERR(&program_binary_format, sizeof(program_binary_format)); READ_OR_ERR(&state, sizeof(state)); READ_OR_ERR(&shader_size, sizeof(shader_size)); program_buffer = g_malloc(shader_size); READ_OR_ERR(program_buffer, shader_size); #undef READ_OR_ERR fclose(shader_file); g_free(shader_path); g_free(cached_xemu_version); g_free(cached_gl_vendor); qemu_mutex_lock(&r->shader_cache_lock); LruNode *node = lru_lookup(&r->shader_cache, hash, &state); ShaderLruNode *snode = container_of(node, ShaderLruNode, node); /* If we happened to regenerate this shader already, then we may as well use the new one */ if (snode->binding) { qemu_mutex_unlock(&r->shader_cache_lock); return; } snode->program_format = program_binary_format; snode->program_size = shader_size; snode->program = program_buffer; snode->cached = true; qemu_mutex_unlock(&r->shader_cache_lock); return; error: /* Delete the shader so it won't be loaded again */ qemu_unlink(shader_path); g_free(shader_path); g_free(program_buffer); g_free(cached_xemu_version); g_free(cached_gl_vendor); } static void *shader_reload_lru_from_disk(void *arg) { if (!g_config.perf.cache_shaders) { return NULL; } PGRAPHState *pg = (PGRAPHState*) arg; char *shader_lru_path = shader_get_lru_cache_path(); FILE *lru_shaders_list = qemu_fopen(shader_lru_path, "rb"); g_free(shader_lru_path); if (!lru_shaders_list) { return NULL; } uint64_t hash; while (fread(&hash, sizeof(uint64_t), 1, lru_shaders_list) == 1) { shader_load_from_disk(pg, hash); } return NULL; } static void shader_cache_entry_init(Lru *lru, LruNode *node, void *state) { ShaderLruNode *snode = container_of(node, ShaderLruNode, node); memcpy(&snode->state, state, sizeof(ShaderState)); snode->cached = false; snode->binding = NULL; snode->program = NULL; snode->save_thread = NULL; } static void shader_cache_entry_post_evict(Lru *lru, LruNode *node) { ShaderLruNode *snode = container_of(node, ShaderLruNode, node); if (snode->save_thread) { qemu_thread_join(snode->save_thread); g_free(snode->save_thread); } if (snode->binding) { glDeleteProgram(snode->binding->gl_program); g_free(snode->binding); } if (snode->program) { g_free(snode->program); } snode->cached = false; snode->save_thread = NULL; snode->binding = NULL; snode->program = NULL; memset(&snode->state, 0, sizeof(ShaderState)); } static bool shader_cache_entry_compare(Lru *lru, LruNode *node, void *key) { ShaderLruNode *snode = container_of(node, ShaderLruNode, node); return memcmp(&snode->state, key, sizeof(ShaderState)); } void pgraph_gl_init_shaders(PGRAPHState *pg) { PGRAPHGLState *r = pg->gl_renderer_state; qemu_mutex_init(&r->shader_cache_lock); qemu_event_init(&r->shader_cache_writeback_complete, false); if (!shader_gl_vendor) { shader_gl_vendor = (const char *) glGetString(GL_VENDOR); } shader_create_cache_folder(); /* FIXME: Make this configurable */ const size_t shader_cache_size = 50*1024; lru_init(&r->shader_cache); r->shader_cache_entries = malloc(shader_cache_size * sizeof(ShaderLruNode)); assert(r->shader_cache_entries != NULL); for (int i = 0; i < shader_cache_size; i++) { lru_add_free(&r->shader_cache, &r->shader_cache_entries[i].node); } r->shader_cache.init_node = shader_cache_entry_init; r->shader_cache.compare_nodes = shader_cache_entry_compare; r->shader_cache.post_node_evict = shader_cache_entry_post_evict; qemu_thread_create(&r->shader_disk_thread, "pgraph.renderer_state->shader_cache", shader_reload_lru_from_disk, pg, QEMU_THREAD_JOINABLE); } void pgraph_gl_finalize_shaders(PGRAPHState *pg) { PGRAPHGLState *r = pg->gl_renderer_state; // Clear out shader cache pgraph_gl_shader_write_cache_reload_list(pg); // FIXME: also flushes, rename for clarity free(r->shader_cache_entries); r->shader_cache_entries = NULL; qemu_mutex_destroy(&r->shader_cache_lock); } static void *shader_write_to_disk(void *arg) { ShaderLruNode *snode = (ShaderLruNode*) arg; char *shader_bin = shader_get_bin_directory(snode->node.hash); char *shader_path = shader_get_binary_path(shader_bin, snode->node.hash); static uint64_t gl_vendor_len; if (gl_vendor_len == 0) { gl_vendor_len = (uint64_t) (strlen(shader_gl_vendor) + 1); } static uint64_t xemu_version_len = 0; if (xemu_version_len == 0) { xemu_version_len = (uint64_t) (strlen(xemu_version) + 1); } qemu_mkdir(shader_bin); g_free(shader_bin); FILE *shader_file = qemu_fopen(shader_path, "wb"); if (!shader_file) { goto error; } size_t written; #define WRITE_OR_ERR(data, data_size) \ do { \ written = fwrite(data, data_size, 1, shader_file); \ if (written != 1) { \ fclose(shader_file); \ goto error; \ } \ } while (0) WRITE_OR_ERR(&xemu_version_len, sizeof(xemu_version_len)); WRITE_OR_ERR(xemu_version, xemu_version_len); WRITE_OR_ERR(&gl_vendor_len, sizeof(gl_vendor_len)); WRITE_OR_ERR(shader_gl_vendor, gl_vendor_len); WRITE_OR_ERR(&snode->program_format, sizeof(snode->program_format)); WRITE_OR_ERR(&snode->state, sizeof(snode->state)); WRITE_OR_ERR(&snode->program_size, sizeof(snode->program_size)); WRITE_OR_ERR(snode->program, snode->program_size); #undef WRITE_OR_ERR fclose(shader_file); g_free(shader_path); g_free(snode->program); snode->program = NULL; return NULL; error: fprintf(stderr, "nv2a: Failed to write shader binary file to %s\n", shader_path); qemu_unlink(shader_path); g_free(shader_path); g_free(snode->program); snode->program = NULL; return NULL; } void pgraph_gl_shader_cache_to_disk(ShaderLruNode *snode) { if (!snode->binding || snode->cached) { return; } GLint program_size; glGetProgramiv(snode->binding->gl_program, GL_PROGRAM_BINARY_LENGTH, &program_size); if (snode->program) { g_free(snode->program); snode->program = NULL; } /* program_size might be zero on some systems, if no binary formats are supported */ if (program_size == 0) { return; } snode->program = g_malloc(program_size); GLsizei program_size_copied; glGetProgramBinary(snode->binding->gl_program, program_size, &program_size_copied, &snode->program_format, snode->program); assert(glGetError() == GL_NO_ERROR); snode->program_size = program_size_copied; snode->cached = true; char name[24]; snprintf(name, sizeof(name), "scache-%llx", (unsigned long long) snode->node.hash); snode->save_thread = g_malloc0(sizeof(QemuThread)); qemu_thread_create(snode->save_thread, name, shader_write_to_disk, snode, QEMU_THREAD_JOINABLE); } static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, bool binding_changed, // FIXME: Remove these... We already know it from binding.state bool vertex_program, bool fixed_function) { PGRAPHGLState *r = pg->gl_renderer_state; int i, j; /* update combiner constants */ for (i = 0; i < 9; i++) { uint32_t constant[2]; if (i == 8) { /* final combiner */ constant[0] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR0); constant[1] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR1); } else { constant[0] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR0 + i * 4); constant[1] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR1 + i * 4); } for (j = 0; j < 2; j++) { GLint loc = binding->psh_constant_loc[i][j]; if (loc != -1) { float value[4]; pgraph_argb_pack32_to_rgba_float(constant[j], value); glUniform4fv(loc, 1, value); } } } if (binding->alpha_ref_loc != -1) { float alpha_ref = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0), NV_PGRAPH_CONTROL_0_ALPHAREF) / 255.0; glUniform1f(binding->alpha_ref_loc, alpha_ref); } /* For each texture stage */ for (i = 0; i < NV2A_MAX_TEXTURES; i++) { GLint loc; /* Bump luminance only during stages 1 - 3 */ if (i > 0) { loc = binding->bump_mat_loc[i]; if (loc != -1) { uint32_t m_u32[4]; m_u32[0] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT00 + 4 * (i - 1)); m_u32[1] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT01 + 4 * (i - 1)); m_u32[2] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT10 + 4 * (i - 1)); m_u32[3] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT11 + 4 * (i - 1)); float m[4]; m[0] = *(float*)&m_u32[0]; m[1] = *(float*)&m_u32[1]; m[2] = *(float*)&m_u32[2]; m[3] = *(float*)&m_u32[3]; glUniformMatrix2fv(loc, 1, GL_FALSE, m); } loc = binding->bump_scale_loc[i]; if (loc != -1) { uint32_t v = pgraph_reg_r(pg, NV_PGRAPH_BUMPSCALE1 + (i - 1) * 4); glUniform1f(loc, *(float*)&v); } loc = binding->bump_offset_loc[i]; if (loc != -1) { uint32_t v = pgraph_reg_r(pg, NV_PGRAPH_BUMPOFFSET1 + (i - 1) * 4); glUniform1f(loc, *(float*)&v); } } loc = r->shader_binding->tex_scale_loc[i]; if (loc != -1) { assert(r->texture_binding[i] != NULL); glUniform1f(loc, (float)r->texture_binding[i]->scale); } } if (binding->fog_color_loc != -1) { uint32_t fog_color = pgraph_reg_r(pg, NV_PGRAPH_FOGCOLOR); glUniform4f(binding->fog_color_loc, GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_RED) / 255.0, GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_GREEN) / 255.0, GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_BLUE) / 255.0, GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_ALPHA) / 255.0); } if (binding->fog_param_loc != -1) { uint32_t v[2]; v[0] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM0); v[1] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM1); glUniform2f(binding->fog_param_loc, *(float *)&v[0], *(float *)&v[1]); } float zmax; switch (pg->surface_shape.zeta_format) { case NV097_SET_SURFACE_FORMAT_ZETA_Z16: zmax = pg->surface_shape.z_format ? f16_max : (float)0xFFFF; break; case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8: zmax = pg->surface_shape.z_format ? f24_max : (float)0xFFFFFF; break; default: assert(0); } if (fixed_function) { /* update lighting constants */ struct { uint32_t* v; bool* dirty; GLint* locs; size_t len; } lighting_arrays[] = { {&pg->ltctxa[0][0], &pg->ltctxa_dirty[0], binding->ltctxa_loc, NV2A_LTCTXA_COUNT}, {&pg->ltctxb[0][0], &pg->ltctxb_dirty[0], binding->ltctxb_loc, NV2A_LTCTXB_COUNT}, {&pg->ltc1[0][0], &pg->ltc1_dirty[0], binding->ltc1_loc, NV2A_LTC1_COUNT}, }; for (i=0; ilight_infinite_half_vector_loc[i]; if (loc != -1) { glUniform3fv(loc, 1, pg->light_infinite_half_vector[i]); } loc = binding->light_infinite_direction_loc[i]; if (loc != -1) { glUniform3fv(loc, 1, pg->light_infinite_direction[i]); } loc = binding->light_local_position_loc[i]; if (loc != -1) { glUniform3fv(loc, 1, pg->light_local_position[i]); } loc = binding->light_local_attenuation_loc[i]; if (loc != -1) { glUniform3fv(loc, 1, pg->light_local_attenuation[i]); } } /* estimate the viewport by assuming it matches the surface ... */ unsigned int aa_width = 1, aa_height = 1; pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height); float m11 = 0.5 * (pg->surface_binding_dim.width/aa_width); float m22 = -0.5 * (pg->surface_binding_dim.height/aa_height); float m33 = zmax; float m41 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][0]; float m42 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][1]; float invViewport[16] = { 1.0/m11, 0, 0, 0, 0, 1.0/m22, 0, 0, 0, 0, 1.0/m33, 0, -1.0+m41/m11, 1.0+m42/m22, 0, 1.0 }; if (binding->inv_viewport_loc != -1) { glUniformMatrix4fv(binding->inv_viewport_loc, 1, GL_FALSE, &invViewport[0]); } } /* update vertex program constants */ for (i=0; ivsh_constants_dirty[i] && !binding_changed) continue; GLint loc = binding->vsh_constant_loc[i]; if ((loc != -1) && memcmp(binding->vsh_constants[i], pg->vsh_constants[i], sizeof(pg->vsh_constants[1]))) { glUniform4fv(loc, 1, (const GLfloat *)pg->vsh_constants[i]); memcpy(binding->vsh_constants[i], pg->vsh_constants[i], sizeof(pg->vsh_constants[i])); } pg->vsh_constants_dirty[i] = false; } if (binding->surface_size_loc != -1) { unsigned int aa_width = 1, aa_height = 1; pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height); glUniform2f(binding->surface_size_loc, pg->surface_binding_dim.width / aa_width, pg->surface_binding_dim.height / aa_height); } if (binding->clip_range_loc != -1) { uint32_t v[2]; v[0] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMIN); v[1] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMAX); float zclip_min = *(float*)&v[0] / zmax * 2.0 - 1.0; float zclip_max = *(float*)&v[1] / zmax * 2.0 - 1.0; glUniform4f(binding->clip_range_loc, 0, zmax, zclip_min, zclip_max); } /* Clipping regions */ unsigned int max_gl_width = pg->surface_binding_dim.width; unsigned int max_gl_height = pg->surface_binding_dim.height; pgraph_apply_scaling_factor(pg, &max_gl_width, &max_gl_height); for (i = 0; i < 8; i++) { uint32_t x = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPX0 + i * 4); unsigned int x_min = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMIN); unsigned int x_max = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMAX) + 1; uint32_t y = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPY0 + i * 4); unsigned int y_min = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMIN); unsigned int y_max = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMAX) + 1; pgraph_apply_anti_aliasing_factor(pg, &x_min, &y_min); pgraph_apply_anti_aliasing_factor(pg, &x_max, &y_max); pgraph_apply_scaling_factor(pg, &x_min, &y_min); pgraph_apply_scaling_factor(pg, &x_max, &y_max); /* Translate for the GL viewport origin */ int y_min_xlat = MAX((int)max_gl_height - (int)y_max, 0); int y_max_xlat = MIN((int)max_gl_height - (int)y_min, max_gl_height); glUniform4i(r->shader_binding->clip_region_loc[i], x_min, y_min_xlat, x_max, y_max_xlat); } if (binding->material_alpha_loc != -1) { glUniform1f(binding->material_alpha_loc, pg->material_alpha); } } static bool test_shaders_dirty(PGRAPHState *pg) { #define CR_1(reg) CR_x(reg, 1) #define CR_4(reg) CR_x(reg, 4) #define CR_8(reg) CR_x(reg, 8) #define CF(src, name) CF_x(typeof(src), (&src), name, 1) #define CFA(src, name) CF_x(typeof(src[0]), src, name, ARRAY_SIZE(src)) #define CNAME(name) reg_check__ ## name #define CX_x__define(type, name, x) static type CNAME(name)[x]; #define CR_x__define(reg, x) CX_x__define(uint32_t, reg, x) #define CF_x__define(type, src, name, x) CX_x__define(type, name, x) #define CR_x__check(reg, x) \ for (int i = 0; i < x; i++) { if (pgraph_reg_r(pg, reg+i*4) != CNAME(reg)[i]) goto dirty; } #define CF_x__check(type, src, name, x) \ for (int i = 0; i < x; i++) { if (src[i] != CNAME(name)[i]) goto dirty; } #define CR_x__update(reg, x) \ for (int i = 0; i < x; i++) { CNAME(reg)[i] = pgraph_reg_r(pg, reg+i*4); } #define CF_x__update(type, src, name, x) \ for (int i = 0; i < x; i++) { CNAME(name)[i] = src[i]; } #define DIRTY_REGS \ CR_1(NV_PGRAPH_COMBINECTL) \ CR_1(NV_PGRAPH_SHADERCTL) \ CR_1(NV_PGRAPH_SHADOWCTL) \ CR_1(NV_PGRAPH_COMBINESPECFOG0) \ CR_1(NV_PGRAPH_COMBINESPECFOG1) \ CR_1(NV_PGRAPH_CONTROL_0) \ CR_1(NV_PGRAPH_CONTROL_3) \ CR_1(NV_PGRAPH_CSV0_C) \ CR_1(NV_PGRAPH_CSV0_D) \ CR_1(NV_PGRAPH_CSV1_A) \ CR_1(NV_PGRAPH_CSV1_B) \ CR_1(NV_PGRAPH_SETUPRASTER) \ CR_1(NV_PGRAPH_SHADERPROG) \ CR_8(NV_PGRAPH_COMBINECOLORI0) \ CR_8(NV_PGRAPH_COMBINECOLORO0) \ CR_8(NV_PGRAPH_COMBINEALPHAI0) \ CR_8(NV_PGRAPH_COMBINEALPHAO0) \ CR_8(NV_PGRAPH_COMBINEFACTOR0) \ CR_8(NV_PGRAPH_COMBINEFACTOR1) \ CR_1(NV_PGRAPH_SHADERCLIPMODE) \ CR_4(NV_PGRAPH_TEXCTL0_0) \ CR_4(NV_PGRAPH_TEXFMT0) \ CR_4(NV_PGRAPH_TEXFILTER0) \ CR_8(NV_PGRAPH_WINDOWCLIPX0) \ CR_8(NV_PGRAPH_WINDOWCLIPY0) \ CF(pg->primitive_mode, primitive_mode) \ CF(pg->surface_scale_factor, surface_scale_factor) \ CF(pg->compressed_attrs, compressed_attrs) \ CFA(pg->texture_matrix_enable, texture_matrix_enable) #define CR_x(reg, x) CR_x__define(reg, x) #define CF_x(type, src, name, x) CF_x__define(type, src, name, x) DIRTY_REGS #undef CR_x #undef CF_x #define CR_x(reg, x) CR_x__check(reg, x) #define CF_x(type, src, name, x) CF_x__check(type, src, name, x) DIRTY_REGS #undef CR_x #undef CF_x return false; dirty: #define CR_x(reg, x) CR_x__update(reg, x) #define CF_x(type, src, name, x) CF_x__update(type, src, name, x) DIRTY_REGS #undef CR_x #undef CF_x return true; } void pgraph_gl_bind_shaders(PGRAPHState *pg) { PGRAPHGLState *r = pg->gl_renderer_state; bool binding_changed = false; if (r->shader_binding && !test_shaders_dirty(pg) && !pg->program_data_dirty) { nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND_NOTDIRTY); goto update_constants; } pg->program_data_dirty = false; ShaderBinding* old_binding = r->shader_binding; ShaderState state = pgraph_get_shader_state(pg); assert(!state.vulkan); NV2A_GL_DGROUP_BEGIN("%s (VP: %s FFP: %s)", __func__, state.vertex_program ? "yes" : "no", state.fixed_function ? "yes" : "no"); uint64_t shader_state_hash = fast_hash((uint8_t*) &state, sizeof(ShaderState)); qemu_mutex_lock(&r->shader_cache_lock); LruNode *node = lru_lookup(&r->shader_cache, shader_state_hash, &state); ShaderLruNode *snode = container_of(node, ShaderLruNode, node); if (snode->binding || pgraph_gl_shader_load_from_memory(snode)) { r->shader_binding = snode->binding; } else { r->shader_binding = generate_shaders(&state); nv2a_profile_inc_counter(NV2A_PROF_SHADER_GEN); /* cache it */ snode->binding = r->shader_binding; if (g_config.perf.cache_shaders) { pgraph_gl_shader_cache_to_disk(snode); } } qemu_mutex_unlock(&r->shader_cache_lock); binding_changed = (r->shader_binding != old_binding); if (binding_changed) { nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND); glUseProgram(r->shader_binding->gl_program); } NV2A_GL_DGROUP_END(); update_constants: shader_update_constants(pg, r->shader_binding, binding_changed, state.vertex_program, state.fixed_function); } GLuint pgraph_gl_compile_shader(const char *vs_src, const char *fs_src) { GLint status; char err_buf[512]; // Compile vertex shader GLuint vs = glCreateShader(GL_VERTEX_SHADER); glShaderSource(vs, 1, &vs_src, NULL); glCompileShader(vs); glGetShaderiv(vs, GL_COMPILE_STATUS, &status); if (status != GL_TRUE) { glGetShaderInfoLog(vs, sizeof(err_buf), NULL, err_buf); err_buf[sizeof(err_buf)-1] = '\0'; fprintf(stderr, "Vertex shader compilation failed: %s\n", err_buf); exit(1); } // Compile fragment shader GLuint fs = glCreateShader(GL_FRAGMENT_SHADER); glShaderSource(fs, 1, &fs_src, NULL); glCompileShader(fs); glGetShaderiv(fs, GL_COMPILE_STATUS, &status); if (status != GL_TRUE) { glGetShaderInfoLog(fs, sizeof(err_buf), NULL, err_buf); err_buf[sizeof(err_buf)-1] = '\0'; fprintf(stderr, "Fragment shader compilation failed: %s\n", err_buf); exit(1); } // Link vertex and fragment shaders GLuint prog = glCreateProgram(); glAttachShader(prog, vs); glAttachShader(prog, fs); glLinkProgram(prog); glUseProgram(prog); // Flag shaders for deletion (will still be retained for lifetime of prog) glDeleteShader(vs); glDeleteShader(fs); return prog; }