diff --git a/hw/xbox/nv2a/pgraph/gl/display.c b/hw/xbox/nv2a/pgraph/gl/display.c index 6d52a5c3b3..2cc9a44bf5 100644 --- a/hw/xbox/nv2a/pgraph/gl/display.c +++ b/hw/xbox/nv2a/pgraph/gl/display.c @@ -68,7 +68,7 @@ void pgraph_gl_init_display(NV2AState *d) "{\n" " vec2 texCoord = gl_FragCoord.xy/display_size;\n" " float rel = display_size.y/textureSize(tex, 0).y/line_offset;\n" - " texCoord.y = 1 + rel*(texCoord.y - 1);" + " texCoord.y = 1 + rel*(texCoord.y - 1);\n" " out_Color.rgba = texture(tex, texCoord);\n" " if (pvideo_enable) {\n" " vec2 screenCoord = gl_FragCoord.xy - 0.5;\n" @@ -102,7 +102,19 @@ void pgraph_gl_init_display(NV2AState *d) glBindBuffer(GL_ARRAY_BUFFER, r->disp_rndr.vbo); glBufferData(GL_ARRAY_BUFFER, 0, NULL, GL_STATIC_DRAW); glGenFramebuffers(1, &r->disp_rndr.fbo); + + glGenTextures(1, &r->disp_rndr.vga_framebuffer_tex); + glBindTexture(GL_TEXTURE_2D, r->disp_rndr.vga_framebuffer_tex); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glGenTextures(1, &r->disp_rndr.pvideo_tex); + glBindTexture(GL_TEXTURE_2D, r->disp_rndr.pvideo_tex); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + assert(glGetError() == GL_NO_ERROR); glo_set_current(g_nv2a_context_render); @@ -280,34 +292,132 @@ static void render_display_pvideo_overlay(NV2AState *d) scale_x, scale_y, 1.0f / pg->surface_scale_factor); } +void pgraph_gl_download_overlapping_surfaces(NV2AState *d, hwaddr start, hwaddr end) +{ + SurfaceBinding *surface; + QTAILQ_FOREACH (surface, &d->pgraph.gl_renderer_state->surfaces, entry) { + hwaddr surf_vram_end = surface->vram_addr + surface->size - 1; + bool overlapping = !(surface->vram_addr >= end || + start >= surf_vram_end); + if (overlapping) { + pgraph_gl_surface_download_if_dirty(d, surface); + } + } +} + +static bool check_framebuffer_dirty(NV2AState *d, + hwaddr framebuffer, + hwaddr framebuffer_end) +{ + framebuffer &= TARGET_PAGE_MASK; + assert(framebuffer_end < memory_region_size(d->vram)); + return memory_region_test_and_clear_dirty(d->vram, + framebuffer, + framebuffer_end - framebuffer, + DIRTY_MEMORY_VGA); +} + +static inline void get_vga_buffer_format(NV2AState *d, + const SurfaceFormatInfo **format, + int *framebuffer_bytes_per_pixel) +{ + int framebuffer_bpp = d->vga.get_bpp(&d->vga); + switch (framebuffer_bpp) { + case 15: + *format = &kelvin_surface_color_format_gl_map[NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5]; + *framebuffer_bytes_per_pixel = 2; + break; + case 16: + *format = &kelvin_surface_color_format_gl_map[NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5]; + *framebuffer_bytes_per_pixel = 2; + break; + case 0: + /* See note in nv2a_get_bpp. For the purposes of selecting a surface, + * this is treated as 32bpp. */ + case 32: + *format = &kelvin_surface_color_format_gl_map[NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8]; + *framebuffer_bytes_per_pixel = 4; + break; + default: + fprintf(stderr, "Unexpected framebuffer_bpp %d\n", framebuffer_bpp); + assert(!"Unexpected framebuffer_bpp value"); + } +} + static void render_display(NV2AState *d, SurfaceBinding *surface) { struct PGRAPHState *pg = &d->pgraph; PGRAPHGLState *r = pg->gl_renderer_state; - unsigned int width, height; + int vga_width, vga_height; VGADisplayParams vga_display_params; - d->vga.get_resolution(&d->vga, (int*)&width, (int*)&height); + d->vga.get_resolution(&d->vga, &vga_width, &vga_height); d->vga.get_params(&d->vga, &vga_display_params); - int line_offset = vga_display_params.line_offset ? surface->pitch / vga_display_params.line_offset : 1; /* Adjust viewport height for interlaced mode, used only in 1080i */ if (d->vga.cr[NV_PRMCIO_INTERLACE_MODE] != NV_PRMCIO_INTERLACE_MODE_DISABLED) { - height *= 2; + vga_height *= 2; } + unsigned int width = vga_width; + unsigned int height = vga_height; pgraph_apply_scaling_factor(pg, &width, &height); + int line_offset = 1; + const SurfaceFormatInfo *format; + int framebuffer_bytes_per_pixel; + get_vga_buffer_format(d, &format, &framebuffer_bytes_per_pixel); + + if (surface + && surface->color + && surface->width == width + && surface->height == height) { + + line_offset = vga_display_params.line_offset ? surface->pitch / vga_display_params.line_offset : 1; + format = &surface->fmt; + } else { + if (vga_width * framebuffer_bytes_per_pixel > vga_display_params.line_offset) { + // Some games without widescreen support (e.g., + // Pirates: The Legend of Black Kat) will set a VGA resolution that + // is wider than a single line when run with widescreen enabled in + // the dashboard. + vga_width = vga_display_params.line_offset / framebuffer_bytes_per_pixel; + width = vga_width; + height = vga_height; + pgraph_apply_scaling_factor(pg, &width, &height); + } + hwaddr framebuffer = d->pcrtc.start; + size_t length = vga_display_params.line_offset * vga_height; + hwaddr framebuffer_end = framebuffer + length - 1; + + pgraph_gl_download_overlapping_surfaces(d, framebuffer, framebuffer_end); + + bool dirty = check_framebuffer_dirty(d, framebuffer, framebuffer_end); + if (dirty) { + nv2a_profile_inc_counter(NV2A_PROF_SURF_UPLOAD); + glBindTexture(GL_TEXTURE_2D, r->disp_rndr.vga_framebuffer_tex); + pgraph_gl_upload_vram_to_bound_texture(d, + framebuffer, + false, + vga_width, + vga_height, + vga_display_params.line_offset, + vga_display_params.line_offset * vga_height, + format); + assert(glGetError() == GL_NO_ERROR); + } + surface = NULL; + } + glBindFramebuffer(GL_FRAMEBUFFER, r->disp_rndr.fbo); glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, r->gl_display_buffer); - bool recreate = ( - surface->fmt.gl_internal_format != r->gl_display_buffer_internal_format - || width != r->gl_display_buffer_width - || height != r->gl_display_buffer_height - || surface->fmt.gl_format != r->gl_display_buffer_format - || surface->fmt.gl_type != r->gl_display_buffer_type - ); + + bool recreate = width != r->gl_display_buffer_width + || height != r->gl_display_buffer_height + || format->gl_internal_format != r->gl_display_buffer_internal_format + || format->gl_format != r->gl_display_buffer_format + || format->gl_type != r->gl_display_buffer_type; if (recreate) { /* XXX: There's apparently a bug in some Intel OpenGL drivers for @@ -321,11 +431,11 @@ static void render_display(NV2AState *d, SurfaceBinding *surface) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - r->gl_display_buffer_internal_format = surface->fmt.gl_internal_format; r->gl_display_buffer_width = width; r->gl_display_buffer_height = height; - r->gl_display_buffer_format = surface->fmt.gl_format; - r->gl_display_buffer_type = surface->fmt.gl_type; + r->gl_display_buffer_internal_format = format->gl_internal_format; + r->gl_display_buffer_format = format->gl_format; + r->gl_display_buffer_type = format->gl_type; glTexImage2D(GL_TEXTURE_2D, 0, r->gl_display_buffer_internal_format, r->gl_display_buffer_width, @@ -342,7 +452,8 @@ static void render_display(NV2AState *d, SurfaceBinding *surface) glDrawBuffers(1, DrawBuffers); assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); - glBindTexture(GL_TEXTURE_2D, surface->gl_buffer); + glBindTexture(GL_TEXTURE_2D, + surface ? surface->gl_buffer : r->disp_rndr.vga_framebuffer_tex); glBindVertexArray(r->disp_rndr.vao); glBindBuffer(GL_ARRAY_BUFFER, r->disp_rndr.vbo); glUseProgram(r->disp_rndr.prog); @@ -381,16 +492,19 @@ void pgraph_gl_sync(NV2AState *d) VGADisplayParams vga_display_params; d->vga.get_params(&d->vga, &vga_display_params); - SurfaceBinding *surface = pgraph_gl_surface_get_within(d, d->pcrtc.start + vga_display_params.line_offset); - if (surface == NULL) { + hwaddr framebuffer = d->pcrtc.start + vga_display_params.line_offset; + if (!framebuffer) { qemu_event_set(&d->pgraph.sync_complete); return; } + SurfaceBinding *surface = pgraph_gl_surface_get_within(d, framebuffer); + if (surface) { + /* FIXME: Sanity check surface dimensions */ - /* FIXME: Sanity check surface dimensions */ + /* Wait for queued commands to complete */ + pgraph_gl_upload_surface_data(d, surface, !tcg_enabled()); + } - /* Wait for queued commands to complete */ - pgraph_gl_upload_surface_data(d, surface, !tcg_enabled()); gl_fence(); assert(glGetError() == GL_NO_ERROR); @@ -418,22 +532,23 @@ int pgraph_gl_get_framebuffer_surface(NV2AState *d) VGADisplayParams vga_display_params; d->vga.get_params(&d->vga, &vga_display_params); - SurfaceBinding *surface = pgraph_gl_surface_get_within( - d, d->pcrtc.start + vga_display_params.line_offset); - if (surface == NULL || !surface->color) { + const hwaddr framebuffer = d->pcrtc.start + vga_display_params.line_offset; + if (!framebuffer) { qemu_mutex_unlock(&d->pfifo.lock); return 0; } - assert(surface->color); - assert(surface->fmt.gl_attachment == GL_COLOR_ATTACHMENT0); - assert(surface->fmt.gl_format == GL_RGBA - || surface->fmt.gl_format == GL_RGB - || surface->fmt.gl_format == GL_BGR - || surface->fmt.gl_format == GL_BGRA + SurfaceBinding *surface = pgraph_gl_surface_get_within(d, framebuffer); + if (surface && surface->color) { + assert(surface->fmt.gl_attachment == GL_COLOR_ATTACHMENT0); + assert(surface->fmt.gl_format == GL_RGBA + || surface->fmt.gl_format == GL_RGB + || surface->fmt.gl_format == GL_BGR + || surface->fmt.gl_format == GL_BGRA ); + surface->frame_time = pg->frame_time; + } - surface->frame_time = pg->frame_time; qemu_event_reset(&d->pgraph.sync_complete); qatomic_set(&pg->sync_pending, true); pfifo_kick(d); diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.h b/hw/xbox/nv2a/pgraph/gl/renderer.h index 5c765361d6..d3e05b7ae0 100644 --- a/hw/xbox/nv2a/pgraph/gl/renderer.h +++ b/hw/xbox/nv2a/pgraph/gl/renderer.h @@ -218,6 +218,7 @@ typedef struct PGRAPHGLState { GLuint display_size_loc; GLuint line_offset_loc; GLuint tex_loc; + GLuint vga_framebuffer_tex; GLuint pvideo_tex; GLint pvideo_enable_loc; GLint pvideo_tex_loc; @@ -284,5 +285,7 @@ void pgraph_gl_shader_write_cache_reload_list(PGRAPHState *pg); void pgraph_gl_set_surface_scale_factor(NV2AState *d, unsigned int scale); unsigned int pgraph_gl_get_surface_scale_factor(NV2AState *d); int pgraph_gl_get_framebuffer_surface(NV2AState *d); +void pgraph_gl_download_overlapping_surfaces(NV2AState *d, hwaddr start, hwaddr end); +void pgraph_gl_upload_vram_to_bound_texture(NV2AState *d, hwaddr vram_addr, bool swizzle, unsigned int surface_width, unsigned int surface_height, unsigned int pitch, size_t size, const SurfaceFormatInfo *fmt); #endif diff --git a/hw/xbox/nv2a/pgraph/gl/surface.c b/hw/xbox/nv2a/pgraph/gl/surface.c index cd6dd145ca..dbb254ae5a 100644 --- a/hw/xbox/nv2a/pgraph/gl/surface.c +++ b/hw/xbox/nv2a/pgraph/gl/surface.c @@ -833,6 +833,72 @@ static void surface_copy_expand(uint8_t *out, uint8_t *in, unsigned int width, } } +// Uploads the pixel data at the given VRAM address into the currently bound +// texture. +void pgraph_gl_upload_vram_to_bound_texture(NV2AState *d, + hwaddr vram_addr, + bool swizzle, + unsigned int surface_width, + unsigned int surface_height, + unsigned int pitch, + size_t size, + const SurfaceFormatInfo *fmt) +{ + PGRAPHState *pg = &d->pgraph; + uint8_t *data = d->vram_ptr; + uint8_t *buf = data + vram_addr; + + if (swizzle) { + buf = (uint8_t*)g_malloc(size); + unswizzle_rect(data + vram_addr, + surface_width, surface_height, + buf, + pitch, + fmt->bytes_per_pixel); + } + + /* FIXME: Replace this flip/scaling */ + + // This is VRAM so we can't do this inplace! + unsigned int compact_pitch = surface_width * fmt->bytes_per_pixel; + uint8_t *flipped_buf = (uint8_t *)g_malloc(surface_height * compact_pitch); + unsigned int irow; + for (irow = 0; irow < surface_height; irow++) { + memcpy(&flipped_buf[compact_pitch * (surface_height - irow - 1)], + &buf[pitch * irow], + compact_pitch); + } + + uint8_t *gl_read_buf = flipped_buf; + unsigned int width = surface_width; + unsigned int height = surface_height; + + if (pg->surface_scale_factor > 1) { + pgraph_apply_scaling_factor(pg, &width, &height); + pg->scale_buf = (uint8_t *)g_realloc( + pg->scale_buf, width * height * fmt->bytes_per_pixel); + gl_read_buf = pg->scale_buf; + uint8_t *out = gl_read_buf, *in = flipped_buf; + surface_copy_expand(out, in, surface_width, surface_height, + fmt->bytes_per_pixel, + d->pgraph.surface_scale_factor); + } + + if (unlikely((width * fmt->bytes_per_pixel) % 4 != 0)) { + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + } else { + glPixelStorei(GL_UNPACK_ALIGNMENT, 4); + } + + glTexImage2D(GL_TEXTURE_2D, 0, fmt->gl_internal_format, width, + height, 0, fmt->gl_format, fmt->gl_type, + gl_read_buf); + g_free(flipped_buf); + if (swizzle) { + g_free(buf); + } +} + void pgraph_gl_upload_surface_data(NV2AState *d, SurfaceBinding *surface, bool force) { @@ -865,62 +931,20 @@ void pgraph_gl_upload_surface_data(NV2AState *d, SurfaceBinding *surface, glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - uint8_t *data = d->vram_ptr; - uint8_t *buf = data + surface->vram_addr; - - if (surface->swizzle) { - buf = (uint8_t*)g_malloc(surface->size); - unswizzle_rect(data + surface->vram_addr, - surface->width, surface->height, - buf, - surface->pitch, - surface->fmt.bytes_per_pixel); - } - - /* FIXME: Replace this flip/scaling */ - - // This is VRAM so we can't do this inplace! - uint8_t *flipped_buf = (uint8_t *)g_malloc( - surface->height * surface->width * surface->fmt.bytes_per_pixel); - unsigned int irow; - for (irow = 0; irow < surface->height; irow++) { - memcpy(&flipped_buf[surface->width * (surface->height - irow - 1) - * surface->fmt.bytes_per_pixel], - &buf[surface->pitch * irow], - surface->width * surface->fmt.bytes_per_pixel); - } - - uint8_t *gl_read_buf = flipped_buf; - unsigned int width = surface->width, height = surface->height; - - if (pg->surface_scale_factor > 1) { - pgraph_apply_scaling_factor(pg, &width, &height); - pg->scale_buf = (uint8_t *)g_realloc( - pg->scale_buf, width * height * surface->fmt.bytes_per_pixel); - gl_read_buf = pg->scale_buf; - uint8_t *out = gl_read_buf, *in = flipped_buf; - surface_copy_expand(out, in, surface->width, surface->height, - surface->fmt.bytes_per_pixel, - d->pgraph.surface_scale_factor); - } - int prev_unpack_alignment; glGetIntegerv(GL_UNPACK_ALIGNMENT, &prev_unpack_alignment); - if (unlikely((width * surface->fmt.bytes_per_pixel) % 4 != 0)) { - glPixelStorei(GL_UNPACK_ALIGNMENT, 1); - } else { - glPixelStorei(GL_UNPACK_ALIGNMENT, 4); - } glBindTexture(GL_TEXTURE_2D, surface->gl_buffer); - glTexImage2D(GL_TEXTURE_2D, 0, surface->fmt.gl_internal_format, width, - height, 0, surface->fmt.gl_format, surface->fmt.gl_type, - gl_read_buf); + pgraph_gl_upload_vram_to_bound_texture(d, + surface->vram_addr, + surface->swizzle, + surface->width, + surface->height, + surface->pitch, + surface->size, + &surface->fmt); + glPixelStorei(GL_UNPACK_ALIGNMENT, prev_unpack_alignment); - g_free(flipped_buf); - if (surface->swizzle) { - g_free(buf); - } // Rebind previous framebuffer binding glBindTexture(GL_TEXTURE_2D, last_texture_binding); diff --git a/hw/xbox/nv2a/pgraph/gl/texture.c b/hw/xbox/nv2a/pgraph/gl/texture.c index 1f2d599eda..69d1866154 100644 --- a/hw/xbox/nv2a/pgraph/gl/texture.c +++ b/hw/xbox/nv2a/pgraph/gl/texture.c @@ -284,14 +284,7 @@ void pgraph_gl_bind_textures(NV2AState *d) // Writeback any surfaces which this texture may index hwaddr tex_vram_end = texture_vram_offset + length - 1; - QTAILQ_FOREACH(surface, &r->surfaces, entry) { - hwaddr surf_vram_end = surface->vram_addr + surface->size - 1; - bool overlapping = !(surface->vram_addr >= tex_vram_end - || texture_vram_offset >= surf_vram_end); - if (overlapping) { - pgraph_gl_surface_download_if_dirty(d, surface); - } - } + pgraph_gl_download_overlapping_surfaces(d, texture_vram_offset, tex_vram_end); } TextureKey key;