From 0adb1c1b07684174b358340374018a4023b9961d Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Sun, 31 Oct 2021 18:18:41 -0700 Subject: [PATCH] wip --- hw/xbox/nv2a/nv2a_int.h | 6 + hw/xbox/nv2a/pgraph.c | 565 +++++++++++++++++++++++++++------------- ui/xemu-shaders.c | 35 ++- ui/xemu-shaders.h | 3 + 4 files changed, 425 insertions(+), 184 deletions(-) diff --git a/hw/xbox/nv2a/nv2a_int.h b/hw/xbox/nv2a/nv2a_int.h index c8eb214d95..b7c0b85611 100644 --- a/hw/xbox/nv2a/nv2a_int.h +++ b/hw/xbox/nv2a/nv2a_int.h @@ -48,6 +48,7 @@ #include "debug.h" #include "shaders.h" #include "nv2a_regs.h" +#include "ui/xemu-shaders.h" #define GET_MASK(v, mask) (((v) & (mask)) >> ctz32(mask)) @@ -252,6 +253,9 @@ typedef struct PGRAPHState { GLint pvideo_pos_loc; } disp_rndr; + struct decal_shader *blit_shader, *depth_blit_shader, *stencil_blit_shader; + GLuint scale_fbo; + /* subchannels state we're not sure the location of... */ ContextSurfaces2DState context_surfaces_2d; ImageBlitState image_blit; @@ -374,6 +378,7 @@ typedef struct PGRAPHState { unsigned int surface_scale_factor; uint8_t *scale_buf; + bool clearing; } PGRAPHState; typedef struct NV2AState { @@ -464,6 +469,7 @@ typedef struct NV2ABlockInfo { } NV2ABlockInfo; extern GloContext *g_nv2a_context_render; +extern GloContext *g_nv2a_context_scale; extern GloContext *g_nv2a_context_display; void nv2a_update_irq(NV2AState *d); diff --git a/hw/xbox/nv2a/pgraph.c b/hw/xbox/nv2a/pgraph.c index 291d4efe3a..215cda569c 100644 --- a/hw/xbox/nv2a/pgraph.c +++ b/hw/xbox/nv2a/pgraph.c @@ -29,6 +29,7 @@ static NV2AState *g_nv2a; GloContext *g_nv2a_context_render; +GloContext *g_nv2a_context_scale; GloContext *g_nv2a_context_display; NV2AStats g_nv2a_stats; @@ -359,6 +360,7 @@ static const SurfaceFormatInfo kelvin_surface_zeta_int_format_map[] = { // static void pgraph_set_context_user(NV2AState *d, uint32_t val); +static void pgraph_init_scaler(NV2AState *d); static void pgraph_gl_fence(void); static GLuint pgraph_compile_shader(const char *vs_src, const char *fs_src); static void pgraph_init_render_to_texture(NV2AState *d); @@ -616,7 +618,7 @@ void pgraph_flush(NV2AState *d) { PGRAPHState *pg = &d->pgraph; - bool update_surface = (pg->color_binding || pg->zeta_binding); + bool update_surface = false;//(pg->color_binding || pg->zeta_binding); /* Clear last surface shape to force recreation of buffers at next draw */ pg->surface_color.draw_dirty = false; @@ -3148,7 +3150,10 @@ DEF_METHOD(NV097, CLEAR_SURFACE) glClearColor(red, green, blue, alpha); } + pg->clearing = true; pgraph_update_surface(d, true, write_color, write_zeta); + pg->clearing = false; + /* FIXME: Needs confirmation */ unsigned int xmin = @@ -3401,6 +3406,7 @@ static void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg) void nv2a_gl_context_init(void) { g_nv2a_context_render = glo_context_create(); + g_nv2a_context_scale = glo_context_create(); g_nv2a_context_display = glo_context_create(); } @@ -3468,6 +3474,7 @@ void pgraph_init(NV2AState *d) pg->frame_time = 0; pg->draw_time = 0; pg->downloads_pending = false; + pg->clearing = false; qemu_mutex_init(&pg->lock); qemu_event_init(&pg->gl_sync_complete, false); @@ -3552,6 +3559,9 @@ void pgraph_init(NV2AState *d) glo_set_current(g_nv2a_context_display); pgraph_init_display_renderer(d); + glo_set_current(g_nv2a_context_scale); + pgraph_init_scaler(d); + glo_set_current(NULL); } @@ -4488,6 +4498,16 @@ static void pgraph_init_display_renderer(NV2AState *d) assert(glGetError() == GL_NO_ERROR); } +static void pgraph_init_scaler(NV2AState *d) +{ + struct PGRAPHState *pg = &d->pgraph; + pg->blit_shader = create_decal_shader(SHADER_TYPE_BLIT); + pg->depth_blit_shader = create_decal_shader(SHADER_TYPE_BLIT_DEPTH); + pg->stencil_blit_shader = create_decal_shader(SHADER_TYPE_BLIT_STENCIL); + glGenFramebuffers(1, &pg->scale_fbo); + assert(glGetError() == GL_NO_ERROR); +} + static uint8_t *convert_texture_data__CR8YB8CB8YA8(const uint8_t *data, unsigned int width, unsigned int height, @@ -4973,52 +4993,13 @@ static void pgraph_bind_current_surface(NV2AState *d) } } -static void surface_copy_shrink_row(uint8_t *out, uint8_t *in, - unsigned int width, - unsigned int bytes_per_pixel, - unsigned int factor) -{ - if (bytes_per_pixel == 4) { - for (unsigned int x = 0; x < width; x++) { - *(uint32_t *)out = *(uint32_t *)in; - out += 4; - in += 4 * factor; - } - } else if (bytes_per_pixel == 2) { - for (unsigned int x = 0; x < width; x++) { - *(uint16_t *)out = *(uint16_t *)in; - out += 2; - in += 2 * factor; - } - } else { - for (unsigned int x = 0; x < width; x++) { - memcpy(out, in, bytes_per_pixel); - out += bytes_per_pixel; - in += bytes_per_pixel * factor; - } - } -} - - -static void pgraph_download_surface_data_to_buffer(NV2AState *d, - SurfaceBinding *surface, - bool swizzle, bool flip, - bool downscale, - uint8_t *pixels) +static void pgraph_download_surface_data_to_buffer_no_scale( + NV2AState *d, SurfaceBinding *surface, bool swizzle, bool flip, + uint8_t *pixels) { PGRAPHState *pg = &d->pgraph; - swizzle &= surface->swizzle; - downscale &= (pg->surface_scale_factor != 1); - NV2A_XPRINTF(DBG_SURFACE_SYNC, - "[GPU->RAM] %s (%s) surface @ %" HWADDR_PRIx - " (w=%d,h=%d,p=%d,bpp=%d)\n", - surface->color ? "COLOR" : "ZETA", - surface->swizzle ? "sz" : "lin", surface->vram_addr, - surface->width, surface->height, surface->pitch, - surface->fmt.bytes_per_pixel); - - /* Bind destination surface to framebuffer */ + /* Bind target surface to framebuffer */ glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, @@ -5033,48 +5014,25 @@ static void pgraph_download_surface_data_to_buffer(NV2AState *d, /* Read surface into memory */ uint8_t *gl_read_buf = pixels; - uint8_t *swizzle_buf = pixels; if (swizzle) { /* FIXME: Allocate big buffer up front and re-alloc if necessary. * FIXME: Consider swizzle in shader */ - assert(pg->surface_scale_factor == 1 || downscale); - swizzle_buf = (uint8_t *)g_malloc(surface->height * surface->pitch); - gl_read_buf = swizzle_buf; + assert(pg->surface_scale_factor == 1); + gl_read_buf = (uint8_t *)g_malloc(surface->height * surface->pitch); } - if (downscale) { - pg->scale_buf = (uint8_t *)g_realloc( - pg->scale_buf, pg->surface_scale_factor * pg->surface_scale_factor * - surface->height * surface->pitch * - surface->fmt.bytes_per_pixel); - gl_read_buf = pg->scale_buf; - } - - glo_readpixels( - surface->fmt.gl_format, surface->fmt.gl_type, surface->fmt.bytes_per_pixel, - pg->surface_scale_factor * surface->pitch, - pg->surface_scale_factor * surface->width, - pg->surface_scale_factor * surface->height, flip, gl_read_buf); - - /* FIXME: Replace this with a hw accelerated version */ - if (downscale) { - assert(surface->pitch >= (surface->width * surface->fmt.bytes_per_pixel)); - uint8_t *out = swizzle_buf, *in = pg->scale_buf; - for (unsigned int y = 0; y < surface->height; y++) { - surface_copy_shrink_row(out, in, surface->width, - surface->fmt.bytes_per_pixel, - pg->surface_scale_factor); - in += surface->pitch * pg->surface_scale_factor * - pg->surface_scale_factor; - out += surface->pitch; - } - } + glo_readpixels(surface->fmt.gl_format, surface->fmt.gl_type, + surface->fmt.bytes_per_pixel, + surface->pitch * pg->surface_scale_factor, + surface->width * pg->surface_scale_factor, + surface->height * pg->surface_scale_factor, + flip, gl_read_buf); if (swizzle) { - swizzle_rect(swizzle_buf, surface->width, surface->height, pixels, + swizzle_rect(gl_read_buf, surface->width, surface->height, pixels, surface->pitch, surface->fmt.bytes_per_pixel); - g_free(swizzle_buf); + g_free(gl_read_buf); } /* Re-bind original framebuffer target */ @@ -5083,6 +5041,170 @@ static void pgraph_download_surface_data_to_buffer(NV2AState *d, pgraph_bind_current_surface(d); } +/* + * TODO: Eliminate code duplication here + */ +static void pgraph_download_surface_data_to_buffer(NV2AState *d, + SurfaceBinding *surface, + bool swizzle, bool flip, + bool downscale, + uint8_t *pixels) +{ + NV2A_XPRINTF(DBG_SURFACE_SYNC, + "[GPU->RAM] %s (%s) surface @ %" HWADDR_PRIx + " (w=%d,h=%d,p=%d,bpp=%d)\n", + surface->color ? "COLOR" : "ZETA", + surface->swizzle ? "sz" : "lin", surface->vram_addr, + surface->width, surface->height, surface->pitch, + surface->fmt.bytes_per_pixel); + + PGRAPHState *pg = &d->pgraph; + swizzle &= surface->swizzle; + downscale &= (pg->surface_scale_factor != 1); + + if (!downscale) { + pgraph_download_surface_data_to_buffer_no_scale(d, surface, swizzle, + flip, pixels); + return; + } + + /* XXX: May need to adjust this to run in a single context for Intel */ + pgraph_gl_fence(); /* XXX: We don't need client sync here */ + glo_set_current(g_nv2a_context_scale); + assert(glGetError() == GL_NO_ERROR); + + unsigned int width = surface->width, + height = surface->height, + pitch = surface->pitch; + if (!downscale) { + pgraph_apply_scaling_factor(pg, &width, &height); + pitch *= pg->surface_scale_factor; + } + + /* Create intermediate texture for transformed surface */ + GLuint dest_tex; + glGenTextures(1, &dest_tex); + glBindTexture(GL_TEXTURE_2D, dest_tex); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, surface->fmt.gl_internal_format, + width, height, 0, surface->fmt.gl_format, + surface->fmt.gl_type, NULL); + + /* Target intermediate texture */ + glBindFramebuffer(GL_FRAMEBUFFER, d->pgraph.scale_fbo); + glFramebufferTexture2D(GL_FRAMEBUFFER, surface->fmt.gl_attachment, GL_TEXTURE_2D, dest_tex, 0); + if (surface->color) { + GLenum DrawBuffers[1]; + DrawBuffers[0] = surface->fmt.gl_attachment; + glDrawBuffers(1, DrawBuffers); + } + assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); + + /* Bind source surface to texture */ + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, surface->gl_buffer); + + struct decal_shader *s; + switch (surface->fmt.gl_attachment) { + case GL_COLOR_ATTACHMENT0: + s = pg->blit_shader; + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + break; + case GL_DEPTH_ATTACHMENT: + case GL_DEPTH_STENCIL_ATTACHMENT: + s = pg->depth_blit_shader; + glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_DEPTH_COMPONENT); + break; + default: + assert(0); + } + + /* Color or depth pass */ + glEnable(GL_DEPTH_TEST); + glEnable(GL_STENCIL_TEST); + glStencilOp(GL_REPLACE, GL_REPLACE, GL_REPLACE); + glDepthMask(GL_TRUE); + glDepthFunc(GL_ALWAYS); + + glViewport(0, 0, width, height); + glUseProgram(s->prog); + glBindVertexArray(s->vao); + glUniform1i(s->FlipY_loc, flip); + glUniform4f(s->ScaleOffset_loc, 1.0, 1.0, 0, 0); + glUniform4f(s->TexScaleOffset_loc, 1.0, 1.0, 0, 0); + glUniform1i(s->tex_loc, 0); + if (surface->color) { + glClearColor(0, 1, 0, 0); + } else { + glClearStencil(0); + } + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); + glStencilFunc(GL_ALWAYS, 0, 0xff); + glStencilMask(0xff); + glDrawElements(GL_TRIANGLE_FAN, 4, GL_UNSIGNED_INT, NULL); + assert(glGetError() == GL_NO_ERROR); + + /* Stencil pass */ + if (surface->fmt.gl_attachment == GL_DEPTH_STENCIL_ATTACHMENT) { + glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); + s = pg->stencil_blit_shader; + glUseProgram(s->prog); + glUniform1i(s->FlipY_loc, flip); + glUniform4f(s->ScaleOffset_loc, 1.0, 1.0, 0, 0); + glUniform4f(s->TexScaleOffset_loc, 1.0, 1.0, 0, 0); + glUniform1i(s->tex_loc, 0); + glEnable(GL_DEPTH_TEST); + glDepthFunc(GL_ALWAYS); + glEnable(GL_STENCIL_TEST); + glDepthMask(GL_FALSE); + glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX); + + for (int i = 0; i < 8; i++) { + glStencilFunc(GL_ALWAYS, 0xff, 0xff); + glStencilMask(1 << i); + glUniform1ui(s->StencilBit_loc, i); + glDrawElements(GL_TRIANGLE_FAN, 4, GL_UNSIGNED_INT, NULL); + } + } + + /* Read framebuffer data into buffer */ + uint8_t *gl_read_buf = pixels; + uint8_t *swizzle_buf; + + if (swizzle) { + /* FIXME: Allocate big buffer up front and re-alloc if necessary. + * FIXME: Consider swizzle in shader + */ + assert(pg->surface_scale_factor == 1 || downscale); + swizzle_buf = (uint8_t *)g_malloc(height * pitch); + gl_read_buf = swizzle_buf; + } + + glo_readpixels(surface->fmt.gl_format, surface->fmt.gl_type, + surface->fmt.bytes_per_pixel, pitch, width, height, false, + gl_read_buf); + + if (swizzle) { + swizzle_rect(swizzle_buf, width, height, pixels, pitch, + surface->fmt.bytes_per_pixel); + g_free(swizzle_buf); + } + + /* Cleanup */ + glFramebufferTexture2D(GL_FRAMEBUFFER, surface->fmt.gl_attachment, GL_TEXTURE_2D, 0, 0); + glBindTexture(GL_TEXTURE_2D, 0); + glDeleteTextures(1, &dest_tex); + assert(glGetError() == GL_NO_ERROR); + + pgraph_gl_fence(); + glo_set_current(g_nv2a_context_render); + assert(glGetError() == GL_NO_ERROR); +} + static void pgraph_download_surface_data(NV2AState *d, SurfaceBinding *surface, bool force) { @@ -5130,56 +5252,55 @@ void pgraph_download_dirty_surfaces(NV2AState *d) qemu_event_set(&d->pgraph.dirty_surfaces_download_complete); } - -static void surface_copy_expand_row(uint8_t *out, uint8_t *in, - unsigned int width, - unsigned int bytes_per_pixel, - unsigned int factor) +static void pgraph_upload_surface_data_no_scale(NV2AState *d, + SurfaceBinding *surface, + bool force) { - if (bytes_per_pixel == 4) { - for (unsigned int x = 0; x < width; x++) { - for (unsigned int i = 0; i < factor; i++) { - *(uint32_t *)out = *(uint32_t *)in; - out += bytes_per_pixel; - } - in += bytes_per_pixel; - } - } else if (bytes_per_pixel == 2) { - for (unsigned int x = 0; x < width; x++) { - for (unsigned int i = 0; i < factor; i++) { - *(uint16_t *)out = *(uint16_t *)in; - out += bytes_per_pixel; - } - in += bytes_per_pixel; - } - } else { - for (unsigned int x = 0; x < width; x++) { - for (unsigned int i = 0; i < factor; i++) { - memcpy(out, in, bytes_per_pixel); - out += bytes_per_pixel; - } - in += bytes_per_pixel; - } - } -} + // FIXME: Don't query GL for texture binding + GLint last_texture_binding; + glGetIntegerv(GL_TEXTURE_BINDING_2D, &last_texture_binding); -static void surface_copy_expand(uint8_t *out, uint8_t *in, unsigned int width, - unsigned int height, - unsigned int bytes_per_pixel, - unsigned int factor) -{ - size_t out_pitch = width * bytes_per_pixel * factor; + // FIXME: Replace with FBO to not disturb current state + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + 0, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, + 0, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, + GL_TEXTURE_2D, 0, 0); - for (unsigned int y = 0; y < height; y++) { - surface_copy_expand_row(out, in, width, bytes_per_pixel, factor); - uint8_t *row_in = out; - for (unsigned int i = 1; i < factor; i++) { - out += out_pitch; - memcpy(out, row_in, out_pitch); - } - in += width * bytes_per_pixel; - out += out_pitch; + uint8_t *data = d->vram_ptr; + uint8_t *buf = data + surface->vram_addr; + + if (surface->swizzle) { + buf = (uint8_t *)g_malloc(surface->size); + unswizzle_rect(data + surface->vram_addr, surface->width, + surface->height, buf, surface->pitch, + surface->fmt.bytes_per_pixel); } + + uint8_t *flipped_buf = (uint8_t *)g_malloc( + surface->height * surface->width * surface->fmt.bytes_per_pixel); + unsigned int irow; + for (irow = 0; irow < surface->height; irow++) { + memcpy(&flipped_buf[surface->width * (surface->height - irow - 1) * + surface->fmt.bytes_per_pixel], + &buf[surface->pitch * irow], + surface->width * surface->fmt.bytes_per_pixel); + } + + glBindTexture(GL_TEXTURE_2D, surface->gl_buffer); + glTexImage2D(GL_TEXTURE_2D, 0, surface->fmt.gl_internal_format, + surface->width, surface->height, 0, surface->fmt.gl_format, + surface->fmt.gl_type, flipped_buf); + + if (surface->swizzle) { + g_free(buf); + } + + // Rebind previous framebuffer binding + glBindTexture(GL_TEXTURE_2D, last_texture_binding); + + pgraph_bind_current_surface(d); } static void pgraph_upload_surface_data(NV2AState *d, SurfaceBinding *surface, @@ -5203,68 +5324,136 @@ static void pgraph_upload_surface_data(NV2AState *d, SurfaceBinding *surface, surface->upload_pending = false; - // FIXME: Don't query GL for texture binding - GLint last_texture_binding; - glGetIntegerv(GL_TEXTURE_BINDING_2D, &last_texture_binding); + if (pg->surface_scale_factor == 1) { + pgraph_upload_surface_data_no_scale(d, surface, force); + return; + } - // FIXME: Replace with FBO to not disturb current state - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - 0, 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - 0, 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, - GL_TEXTURE_2D, 0, 0); + /* Go to scale context */ + pgraph_gl_fence(); /* XXX: We don't need client sync here */ + glo_set_current(g_nv2a_context_scale); + assert(glGetError() == GL_NO_ERROR); - uint8_t *data = d->vram_ptr; - uint8_t *buf = data + surface->vram_addr; + /* Create intermediate texture for pre-transformed surface */ + GLuint src_tex; + glGenTextures(1, &src_tex); + glBindTexture(GL_TEXTURE_2D, src_tex); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, surface->color ? GL_LINEAR : GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, surface->color ? GL_LINEAR : GL_NEAREST); + /* Load surface data */ + uint8_t *surface_data = d->vram_ptr + surface->vram_addr; if (surface->swizzle) { - buf = (uint8_t*)g_malloc(surface->size); - unswizzle_rect(data + surface->vram_addr, + surface_data = (uint8_t*)g_malloc(surface->size); + unswizzle_rect(d->vram_ptr + surface->vram_addr, surface->width, surface->height, - buf, + surface_data, surface->pitch, surface->fmt.bytes_per_pixel); } - /* FIXME: Replace this flip/scaling */ + glTexImage2D(GL_TEXTURE_2D, 0, surface->fmt.gl_internal_format, + surface->width, surface->height, 0, surface->fmt.gl_format, + surface->fmt.gl_type, surface_data); - // This is VRAM so we can't do this inplace! - uint8_t *flipped_buf = (uint8_t *)g_malloc( - surface->height * surface->width * surface->fmt.bytes_per_pixel); - unsigned int irow; - for (irow = 0; irow < surface->height; irow++) { - memcpy(&flipped_buf[surface->width * (surface->height - irow - 1) - * surface->fmt.bytes_per_pixel], - &buf[surface->pitch * irow], - surface->width * surface->fmt.bytes_per_pixel); - } - - uint8_t *gl_read_buf = flipped_buf; - unsigned int width = surface->width, height = surface->height; - - if (pg->surface_scale_factor > 1) { - pgraph_apply_scaling_factor(pg, &width, &height); - pg->scale_buf = (uint8_t *)g_realloc( - pg->scale_buf, width * height * surface->fmt.bytes_per_pixel); - gl_read_buf = pg->scale_buf; - uint8_t *out = gl_read_buf, *in = flipped_buf; - surface_copy_expand(out, in, surface->width, surface->height, - surface->fmt.bytes_per_pixel, - d->pgraph.surface_scale_factor); - } - - glBindTexture(GL_TEXTURE_2D, surface->gl_buffer); - glTexImage2D(GL_TEXTURE_2D, 0, surface->fmt.gl_internal_format, width, - height, 0, surface->fmt.gl_format, surface->fmt.gl_type, - gl_read_buf); - g_free(flipped_buf); if (surface->swizzle) { - g_free(buf); + g_free(surface_data); } - // Rebind previous framebuffer binding - glBindTexture(GL_TEXTURE_2D, last_texture_binding); + /* Target destination surface texture */ + glBindFramebuffer(GL_FRAMEBUFFER, d->pgraph.scale_fbo); + glFramebufferTexture2D(GL_FRAMEBUFFER, surface->fmt.gl_attachment, GL_TEXTURE_2D, surface->gl_buffer, 0); + if (surface->color) { + GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0}; + glDrawBuffers(1, DrawBuffers); + } + assert(glGetError() == GL_NO_ERROR); + assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); + + /* Bind source surface to texture */ + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, src_tex); + + struct decal_shader *s; + switch (surface->fmt.gl_attachment) { + case GL_COLOR_ATTACHMENT0: + s = pg->blit_shader; + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + break; + case GL_DEPTH_ATTACHMENT: + case GL_DEPTH_STENCIL_ATTACHMENT: + s = pg->depth_blit_shader; + glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_DEPTH_COMPONENT); + break; + default: + assert(0); + } + + /* Render scaled, flipped surface */ + unsigned int width = surface->width, height = surface->height; + pgraph_apply_scaling_factor(pg, &width, &height); + + /* Color or depth pass */ + glEnable(GL_DEPTH_TEST); + glEnable(GL_STENCIL_TEST); + glStencilOp(GL_REPLACE, GL_REPLACE, GL_REPLACE); + glDepthMask(GL_TRUE); + glDepthFunc(GL_ALWAYS); + + glViewport(0, 0, width, height); + glUseProgram(s->prog); + glBindVertexArray(s->vao); + glUniform1i(s->FlipY_loc, 1); + glUniform4f(s->ScaleOffset_loc, 1.0, 1.0, 0, 0); + glUniform4f(s->TexScaleOffset_loc, 1.0, 1.0, 0, 0); + glUniform1i(s->tex_loc, 0); + if (surface->color) { + glClearColor(0, 1, 0, 0); + } else { + glClearStencil(0); + } + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); + glStencilFunc(GL_ALWAYS, 0, 0xff); + glStencilMask(0xff); + glDrawElements(GL_TRIANGLE_FAN, 4, GL_UNSIGNED_INT, NULL); + assert(glGetError() == GL_NO_ERROR); + + /* Stencil pass */ + if (surface->fmt.gl_attachment == GL_DEPTH_STENCIL_ATTACHMENT) { + glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); + s = pg->stencil_blit_shader; + glUseProgram(s->prog); + glUniform1i(s->FlipY_loc, 1); + glUniform4f(s->ScaleOffset_loc, 1.0, 1.0, 0, 0); + glUniform4f(s->TexScaleOffset_loc, 1.0, 1.0, 0, 0); + glUniform1i(s->tex_loc, 0); + glEnable(GL_DEPTH_TEST); + glDepthFunc(GL_ALWAYS); + glEnable(GL_STENCIL_TEST); + glDepthMask(GL_FALSE); + glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX); + + for (int i = 0; i < 8; i++) { + glStencilFunc(GL_ALWAYS, 0xff, 0xff); + glStencilMask(1 << i); + glUniform1ui(s->StencilBit_loc, i); + glDrawElements(GL_TRIANGLE_FAN, 4, GL_UNSIGNED_INT, NULL); + } + } + + /* Cleanup */ + glFramebufferTexture2D(GL_FRAMEBUFFER, surface->fmt.gl_attachment, GL_TEXTURE_2D, 0, 0); + glBindTexture(GL_TEXTURE_2D, 0); + glDeleteTextures(1, &src_tex); + assert(glGetError() == GL_NO_ERROR); + + /* Return to rendering context */ + pgraph_gl_fence(); + glo_set_current(g_nv2a_context_render); + assert(glGetError() == GL_NO_ERROR); pgraph_bind_current_surface(d); } @@ -5272,13 +5461,16 @@ static void pgraph_upload_surface_data(NV2AState *d, SurfaceBinding *surface, static void pgraph_compare_surfaces(SurfaceBinding *s1, SurfaceBinding *s2) { #define DO_CMP(fld) \ - if (s1->fld != s2->fld) \ - NV2A_XPRINTF(DBG_SURFACES, "%20s -- %8ld vs %8ld\n", \ - #fld, (long int)s1->fld, (long int)s2->fld); + NV2A_XPRINTF(DBG_SURFACES, "%25s -- %8lx vs %8lx%s\n", \ + #fld, (long int)s1->fld, (long int)s2->fld, \ + s1->fld != s2->fld ? " ***" : ""); DO_CMP(shape.clip_x) DO_CMP(shape.clip_width) DO_CMP(shape.clip_y) DO_CMP(shape.clip_height) + DO_CMP(shape.color_format) + DO_CMP(shape.zeta_format) + DO_CMP(shape.z_format) DO_CMP(gl_buffer) DO_CMP(fmt.bytes_per_pixel) DO_CMP(fmt.gl_attachment) @@ -5308,7 +5500,7 @@ static void pgraph_populate_surface_binding_entry_sized(NV2AState *d, PGRAPHState *pg = &d->pgraph; Surface *surface; hwaddr dma_address; - SurfaceFormatInfo fmt; + SurfaceFormatInfo fmt; /* FIXME: Pointer */ if (color) { surface = &pg->surface_color; @@ -5358,7 +5550,8 @@ static void pgraph_populate_surface_binding_entry_sized(NV2AState *d, entry->vram_addr = dma.address + surface->offset; entry->width = width; entry->height = height; - entry->pitch = surface->pitch; + entry->pitch = entry->swizzle ? (entry->width * entry->fmt.bytes_per_pixel) + : surface->pitch; entry->size = height * surface->pitch; entry->upload_pending = true; entry->download_pending = false; @@ -5367,6 +5560,8 @@ static void pgraph_populate_surface_binding_entry_sized(NV2AState *d, entry->dma_len = dma.limit; entry->frame_time = pg->frame_time; entry->draw_time = pg->draw_time; + + assert(entry->pitch >= (entry->width * entry->fmt.bytes_per_pixel)); } static void pgraph_populate_surface_binding_entry(NV2AState *d, bool color, @@ -5382,8 +5577,10 @@ static void pgraph_populate_surface_binding_entry(NV2AState *d, bool color, /* Since we determine surface dimensions based on the clipping * rectangle, make sure to include the surface offset as well. */ - width += pg->surface_shape.clip_x; - height += pg->surface_shape.clip_y; + if (pg->surface_type != NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE) { + width += pg->surface_shape.clip_x; + height += pg->surface_shape.clip_y; + } } else { width = pg->color_binding->width; height = pg->color_binding->height; @@ -5431,6 +5628,7 @@ static void pgraph_update_surface_part(NV2AState *d, bool upload, bool color) pg->surface_shape.clip_height); bool should_create = true; + bool skip_sync = pg->clearing; /* FIXME: Incorrect if partial. */ if (found != NULL) { bool is_compatible = @@ -5475,7 +5673,9 @@ static void pgraph_update_surface_part(NV2AState *d, bool upload, bool color) } else { NV2A_XPRINTF(DBG_SURFACES, "Evicting incompatible surface\n"); pgraph_compare_surfaces(found, &entry); - pgraph_download_surface_data_if_dirty(d, found); + if (!skip_sync) { + pgraph_download_surface_data_if_dirty(d, found); + } pgraph_surface_invalidate(d, found); } } @@ -5492,7 +5692,8 @@ static void pgraph_update_surface_part(NV2AState *d, bool upload, bool color) entry.width, entry.height, surface->offset); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, color ? GL_LINEAR : GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, color ? GL_LINEAR : GL_NEAREST); unsigned int width = entry.width, height = entry.height; pgraph_apply_scaling_factor(pg, &width, &height); glTexImage2D(GL_TEXTURE_2D, 0, entry.fmt.gl_internal_format, width, @@ -5509,6 +5710,8 @@ static void pgraph_update_surface_part(NV2AState *d, bool upload, bool color) pg->surface_binding_dim.clip_height = entry.shape.clip_height; } + found->upload_pending &= !skip_sync; + NV2A_XPRINTF(DBG_SURFACES, "%6s: [%5s @ %" HWADDR_PRIx " (%dx%d)] (%s) " "aa:%d, clip:x=%d,w=%d,y=%d,h=%d\n", diff --git a/ui/xemu-shaders.c b/ui/xemu-shaders.c index 9f43d06e4a..cff5576385 100644 --- a/ui/xemu-shaders.c +++ b/ui/xemu-shaders.c @@ -64,7 +64,7 @@ struct decal_shader *create_decal_shader(enum SHADER_TYPE type) s->time = 0; const char *vert_src = - "#version 150 core\n" + "#version 400 core\n" "uniform bool in_FlipY;\n" "uniform vec4 in_ScaleOffset;\n" "uniform vec4 in_TexScaleOffset;\n" @@ -81,7 +81,7 @@ struct decal_shader *create_decal_shader(enum SHADER_TYPE type) assert(vert != 0); const char *image_frag_src = - "#version 150 core\n" + "#version 400 core\n" "uniform sampler2D tex;\n" "uniform vec4 in_ColorPrimary;\n" "uniform vec4 in_ColorSecondary;\n" @@ -92,6 +92,30 @@ struct decal_shader *create_decal_shader(enum SHADER_TYPE type) " vec4 t = texture(tex, Texcoord);\n" " out_Color.rgba = t;\n" "}\n"; + + const char *depth_image_frag_src = + "#version 400 core\n" + "uniform sampler2D tex;\n" + "in vec2 Texcoord;\n" + "out vec4 out_Color;\n" + "void main() {\n" + " ivec2 icoord = ivec2(Texcoord*textureSize(tex, 0));\n" + " gl_FragDepth = texelFetch(tex, icoord, 0).r;\n" + " out_Color.rgba = vec4(1);\n" + "}\n"; + + const char *stencil_image_frag_src = + "#version 400 core\n" + "uniform usampler2D tex;\n" + "uniform uint stencil_bit;\n" + "in vec2 Texcoord;\n" + "out vec4 out_Color;\n" + "void main() {\n" + " ivec2 icoord = ivec2(Texcoord*textureSize(tex, 0));\n" + " uint stencil_val = texelFetch(tex, icoord, 0).r;\n" + " if ((stencil_val & (1u << stencil_bit)) == 0u) discard;\n" + " out_Color.rgba = vec4(1);\n" + "}\n"; // Simple 2-color decal shader // - in_ColorFill is first pass // - Red channel of the texture is used as primary color, mixed with 1-Red for @@ -99,7 +123,7 @@ struct decal_shader *create_decal_shader(enum SHADER_TYPE type) // - Blue is a lazy alpha removal for now // - Alpha channel passed through const char *mask_frag_src = - "#version 150 core\n" + "#version 400 core\n" "uniform sampler2D tex;\n" "uniform vec4 in_ColorPrimary;\n" "uniform vec4 in_ColorSecondary;\n" @@ -118,6 +142,10 @@ struct decal_shader *create_decal_shader(enum SHADER_TYPE type) frag_src = mask_frag_src; } else if (type == SHADER_TYPE_BLIT) { frag_src = image_frag_src; + } else if (type == SHADER_TYPE_BLIT_DEPTH) { + frag_src = depth_image_frag_src; + } else if (type == SHADER_TYPE_BLIT_STENCIL) { + frag_src = stencil_image_frag_src; } else if (type == SHADER_TYPE_LOGO) { frag_src = xemu_logo_frag_src; } else { @@ -147,6 +175,7 @@ struct decal_shader *create_decal_shader(enum SHADER_TYPE type) s->ColorFill_loc = glGetUniformLocation(s->prog, "in_ColorFill"); s->time_loc = glGetUniformLocation(s->prog, "iTime"); s->scale_loc = glGetUniformLocation(s->prog, "scale"); + s->StencilBit_loc = glGetUniformLocation(s->prog, "stencil_bit"); // Create a vertex array object glGenVertexArrays(1, &s->vao); diff --git a/ui/xemu-shaders.h b/ui/xemu-shaders.h index 37cf340a01..cd4c9bbacb 100644 --- a/ui/xemu-shaders.h +++ b/ui/xemu-shaders.h @@ -27,6 +27,8 @@ enum SHADER_TYPE { SHADER_TYPE_BLIT, + SHADER_TYPE_BLIT_DEPTH, + SHADER_TYPE_BLIT_STENCIL, SHADER_TYPE_MASK, SHADER_TYPE_LOGO, }; @@ -48,6 +50,7 @@ struct decal_shader GLint tex_loc; GLint ScaleOffset_loc; GLint TexScaleOffset_loc; + GLint StencilBit_loc; GLint ColorPrimary_loc; GLint ColorSecondary_loc;