nv2a: Handle framebuffer CPU blit and PVIDEO only rendering

Handles two edge cases:
1) CPU blits to the framebuffer without using 3D rendering.
2) Fullscreen PVIDEO rendering without any 3D rendering.

In both cases this change prevents the pgraph code from returning early,
bypassing the special case VGA handling in `sdl2_gl_refresh` and instead
using a special framebuffer texture to render the contents of VRAM.

Fixes #652
Fixes #1165

[Tests](https://github.com/abaire/nxdk_pgraph_tests/blob/main/src/tests/antialiasing_tests.cpp)
[HW results](https://github.com/abaire/nxdk_pgraph_tests_golden_results/wiki/Results-Antialiasing_tests)
This commit is contained in:
Erik Abair 2022-06-30 21:11:11 -07:00
parent f701573d44
commit dc881c700a
4 changed files with 225 additions and 90 deletions

View File

@ -68,7 +68,7 @@ void pgraph_gl_init_display(NV2AState *d)
"{\n"
" vec2 texCoord = gl_FragCoord.xy/display_size;\n"
" float rel = display_size.y/textureSize(tex, 0).y/line_offset;\n"
" texCoord.y = 1 + rel*(texCoord.y - 1);"
" texCoord.y = 1 + rel*(texCoord.y - 1);\n"
" out_Color.rgba = texture(tex, texCoord);\n"
" if (pvideo_enable) {\n"
" vec2 screenCoord = gl_FragCoord.xy - 0.5;\n"
@ -102,7 +102,19 @@ void pgraph_gl_init_display(NV2AState *d)
glBindBuffer(GL_ARRAY_BUFFER, r->disp_rndr.vbo);
glBufferData(GL_ARRAY_BUFFER, 0, NULL, GL_STATIC_DRAW);
glGenFramebuffers(1, &r->disp_rndr.fbo);
glGenTextures(1, &r->disp_rndr.vga_framebuffer_tex);
glBindTexture(GL_TEXTURE_2D, r->disp_rndr.vga_framebuffer_tex);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glGenTextures(1, &r->disp_rndr.pvideo_tex);
glBindTexture(GL_TEXTURE_2D, r->disp_rndr.pvideo_tex);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
assert(glGetError() == GL_NO_ERROR);
glo_set_current(g_nv2a_context_render);
@ -280,34 +292,132 @@ static void render_display_pvideo_overlay(NV2AState *d)
scale_x, scale_y, 1.0f / pg->surface_scale_factor);
}
void pgraph_gl_download_overlapping_surfaces(NV2AState *d, hwaddr start, hwaddr end)
{
SurfaceBinding *surface;
QTAILQ_FOREACH (surface, &d->pgraph.gl_renderer_state->surfaces, entry) {
hwaddr surf_vram_end = surface->vram_addr + surface->size - 1;
bool overlapping = !(surface->vram_addr >= end ||
start >= surf_vram_end);
if (overlapping) {
pgraph_gl_surface_download_if_dirty(d, surface);
}
}
}
static bool check_framebuffer_dirty(NV2AState *d,
hwaddr framebuffer,
hwaddr framebuffer_end)
{
framebuffer &= TARGET_PAGE_MASK;
assert(framebuffer_end < memory_region_size(d->vram));
return memory_region_test_and_clear_dirty(d->vram,
framebuffer,
framebuffer_end - framebuffer,
DIRTY_MEMORY_VGA);
}
static inline void get_vga_buffer_format(NV2AState *d,
const SurfaceFormatInfo **format,
int *framebuffer_bytes_per_pixel)
{
int framebuffer_bpp = d->vga.get_bpp(&d->vga);
switch (framebuffer_bpp) {
case 15:
*format = &kelvin_surface_color_format_gl_map[NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5];
*framebuffer_bytes_per_pixel = 2;
break;
case 16:
*format = &kelvin_surface_color_format_gl_map[NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5];
*framebuffer_bytes_per_pixel = 2;
break;
case 0:
/* See note in nv2a_get_bpp. For the purposes of selecting a surface,
* this is treated as 32bpp. */
case 32:
*format = &kelvin_surface_color_format_gl_map[NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8];
*framebuffer_bytes_per_pixel = 4;
break;
default:
fprintf(stderr, "Unexpected framebuffer_bpp %d\n", framebuffer_bpp);
assert(!"Unexpected framebuffer_bpp value");
}
}
static void render_display(NV2AState *d, SurfaceBinding *surface)
{
struct PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
unsigned int width, height;
int vga_width, vga_height;
VGADisplayParams vga_display_params;
d->vga.get_resolution(&d->vga, (int*)&width, (int*)&height);
d->vga.get_resolution(&d->vga, &vga_width, &vga_height);
d->vga.get_params(&d->vga, &vga_display_params);
int line_offset = vga_display_params.line_offset ? surface->pitch / vga_display_params.line_offset : 1;
/* Adjust viewport height for interlaced mode, used only in 1080i */
if (d->vga.cr[NV_PRMCIO_INTERLACE_MODE] != NV_PRMCIO_INTERLACE_MODE_DISABLED) {
height *= 2;
vga_height *= 2;
}
unsigned int width = vga_width;
unsigned int height = vga_height;
pgraph_apply_scaling_factor(pg, &width, &height);
int line_offset = 1;
const SurfaceFormatInfo *format;
int framebuffer_bytes_per_pixel;
get_vga_buffer_format(d, &format, &framebuffer_bytes_per_pixel);
if (surface
&& surface->color
&& surface->width == width
&& surface->height == height) {
line_offset = vga_display_params.line_offset ? surface->pitch / vga_display_params.line_offset : 1;
format = &surface->fmt;
} else {
if (vga_width * framebuffer_bytes_per_pixel > vga_display_params.line_offset) {
// Some games without widescreen support (e.g.,
// Pirates: The Legend of Black Kat) will set a VGA resolution that
// is wider than a single line when run with widescreen enabled in
// the dashboard.
vga_width = vga_display_params.line_offset / framebuffer_bytes_per_pixel;
width = vga_width;
height = vga_height;
pgraph_apply_scaling_factor(pg, &width, &height);
}
hwaddr framebuffer = d->pcrtc.start;
size_t length = vga_display_params.line_offset * vga_height;
hwaddr framebuffer_end = framebuffer + length - 1;
pgraph_gl_download_overlapping_surfaces(d, framebuffer, framebuffer_end);
bool dirty = check_framebuffer_dirty(d, framebuffer, framebuffer_end);
if (dirty) {
nv2a_profile_inc_counter(NV2A_PROF_SURF_UPLOAD);
glBindTexture(GL_TEXTURE_2D, r->disp_rndr.vga_framebuffer_tex);
pgraph_gl_upload_vram_to_bound_texture(d,
framebuffer,
false,
vga_width,
vga_height,
vga_display_params.line_offset,
vga_display_params.line_offset * vga_height,
format);
assert(glGetError() == GL_NO_ERROR);
}
surface = NULL;
}
glBindFramebuffer(GL_FRAMEBUFFER, r->disp_rndr.fbo);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, r->gl_display_buffer);
bool recreate = (
surface->fmt.gl_internal_format != r->gl_display_buffer_internal_format
|| width != r->gl_display_buffer_width
|| height != r->gl_display_buffer_height
|| surface->fmt.gl_format != r->gl_display_buffer_format
|| surface->fmt.gl_type != r->gl_display_buffer_type
);
bool recreate = width != r->gl_display_buffer_width
|| height != r->gl_display_buffer_height
|| format->gl_internal_format != r->gl_display_buffer_internal_format
|| format->gl_format != r->gl_display_buffer_format
|| format->gl_type != r->gl_display_buffer_type;
if (recreate) {
/* XXX: There's apparently a bug in some Intel OpenGL drivers for
@ -321,11 +431,11 @@ static void render_display(NV2AState *d, SurfaceBinding *surface)
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
r->gl_display_buffer_internal_format = surface->fmt.gl_internal_format;
r->gl_display_buffer_width = width;
r->gl_display_buffer_height = height;
r->gl_display_buffer_format = surface->fmt.gl_format;
r->gl_display_buffer_type = surface->fmt.gl_type;
r->gl_display_buffer_internal_format = format->gl_internal_format;
r->gl_display_buffer_format = format->gl_format;
r->gl_display_buffer_type = format->gl_type;
glTexImage2D(GL_TEXTURE_2D, 0,
r->gl_display_buffer_internal_format,
r->gl_display_buffer_width,
@ -342,7 +452,8 @@ static void render_display(NV2AState *d, SurfaceBinding *surface)
glDrawBuffers(1, DrawBuffers);
assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
glBindTexture(GL_TEXTURE_2D, surface->gl_buffer);
glBindTexture(GL_TEXTURE_2D,
surface ? surface->gl_buffer : r->disp_rndr.vga_framebuffer_tex);
glBindVertexArray(r->disp_rndr.vao);
glBindBuffer(GL_ARRAY_BUFFER, r->disp_rndr.vbo);
glUseProgram(r->disp_rndr.prog);
@ -381,16 +492,19 @@ void pgraph_gl_sync(NV2AState *d)
VGADisplayParams vga_display_params;
d->vga.get_params(&d->vga, &vga_display_params);
SurfaceBinding *surface = pgraph_gl_surface_get_within(d, d->pcrtc.start + vga_display_params.line_offset);
if (surface == NULL) {
hwaddr framebuffer = d->pcrtc.start + vga_display_params.line_offset;
if (!framebuffer) {
qemu_event_set(&d->pgraph.sync_complete);
return;
}
SurfaceBinding *surface = pgraph_gl_surface_get_within(d, framebuffer);
if (surface) {
/* FIXME: Sanity check surface dimensions */
/* FIXME: Sanity check surface dimensions */
/* Wait for queued commands to complete */
pgraph_gl_upload_surface_data(d, surface, !tcg_enabled());
}
/* Wait for queued commands to complete */
pgraph_gl_upload_surface_data(d, surface, !tcg_enabled());
gl_fence();
assert(glGetError() == GL_NO_ERROR);
@ -418,22 +532,23 @@ int pgraph_gl_get_framebuffer_surface(NV2AState *d)
VGADisplayParams vga_display_params;
d->vga.get_params(&d->vga, &vga_display_params);
SurfaceBinding *surface = pgraph_gl_surface_get_within(
d, d->pcrtc.start + vga_display_params.line_offset);
if (surface == NULL || !surface->color) {
const hwaddr framebuffer = d->pcrtc.start + vga_display_params.line_offset;
if (!framebuffer) {
qemu_mutex_unlock(&d->pfifo.lock);
return 0;
}
assert(surface->color);
assert(surface->fmt.gl_attachment == GL_COLOR_ATTACHMENT0);
assert(surface->fmt.gl_format == GL_RGBA
|| surface->fmt.gl_format == GL_RGB
|| surface->fmt.gl_format == GL_BGR
|| surface->fmt.gl_format == GL_BGRA
SurfaceBinding *surface = pgraph_gl_surface_get_within(d, framebuffer);
if (surface && surface->color) {
assert(surface->fmt.gl_attachment == GL_COLOR_ATTACHMENT0);
assert(surface->fmt.gl_format == GL_RGBA
|| surface->fmt.gl_format == GL_RGB
|| surface->fmt.gl_format == GL_BGR
|| surface->fmt.gl_format == GL_BGRA
);
surface->frame_time = pg->frame_time;
}
surface->frame_time = pg->frame_time;
qemu_event_reset(&d->pgraph.sync_complete);
qatomic_set(&pg->sync_pending, true);
pfifo_kick(d);

View File

@ -218,6 +218,7 @@ typedef struct PGRAPHGLState {
GLuint display_size_loc;
GLuint line_offset_loc;
GLuint tex_loc;
GLuint vga_framebuffer_tex;
GLuint pvideo_tex;
GLint pvideo_enable_loc;
GLint pvideo_tex_loc;
@ -284,5 +285,7 @@ void pgraph_gl_shader_write_cache_reload_list(PGRAPHState *pg);
void pgraph_gl_set_surface_scale_factor(NV2AState *d, unsigned int scale);
unsigned int pgraph_gl_get_surface_scale_factor(NV2AState *d);
int pgraph_gl_get_framebuffer_surface(NV2AState *d);
void pgraph_gl_download_overlapping_surfaces(NV2AState *d, hwaddr start, hwaddr end);
void pgraph_gl_upload_vram_to_bound_texture(NV2AState *d, hwaddr vram_addr, bool swizzle, unsigned int surface_width, unsigned int surface_height, unsigned int pitch, size_t size, const SurfaceFormatInfo *fmt);
#endif

View File

@ -833,6 +833,72 @@ static void surface_copy_expand(uint8_t *out, uint8_t *in, unsigned int width,
}
}
// Uploads the pixel data at the given VRAM address into the currently bound
// texture.
void pgraph_gl_upload_vram_to_bound_texture(NV2AState *d,
hwaddr vram_addr,
bool swizzle,
unsigned int surface_width,
unsigned int surface_height,
unsigned int pitch,
size_t size,
const SurfaceFormatInfo *fmt)
{
PGRAPHState *pg = &d->pgraph;
uint8_t *data = d->vram_ptr;
uint8_t *buf = data + vram_addr;
if (swizzle) {
buf = (uint8_t*)g_malloc(size);
unswizzle_rect(data + vram_addr,
surface_width, surface_height,
buf,
pitch,
fmt->bytes_per_pixel);
}
/* FIXME: Replace this flip/scaling */
// This is VRAM so we can't do this inplace!
unsigned int compact_pitch = surface_width * fmt->bytes_per_pixel;
uint8_t *flipped_buf = (uint8_t *)g_malloc(surface_height * compact_pitch);
unsigned int irow;
for (irow = 0; irow < surface_height; irow++) {
memcpy(&flipped_buf[compact_pitch * (surface_height - irow - 1)],
&buf[pitch * irow],
compact_pitch);
}
uint8_t *gl_read_buf = flipped_buf;
unsigned int width = surface_width;
unsigned int height = surface_height;
if (pg->surface_scale_factor > 1) {
pgraph_apply_scaling_factor(pg, &width, &height);
pg->scale_buf = (uint8_t *)g_realloc(
pg->scale_buf, width * height * fmt->bytes_per_pixel);
gl_read_buf = pg->scale_buf;
uint8_t *out = gl_read_buf, *in = flipped_buf;
surface_copy_expand(out, in, surface_width, surface_height,
fmt->bytes_per_pixel,
d->pgraph.surface_scale_factor);
}
if (unlikely((width * fmt->bytes_per_pixel) % 4 != 0)) {
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
} else {
glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
}
glTexImage2D(GL_TEXTURE_2D, 0, fmt->gl_internal_format, width,
height, 0, fmt->gl_format, fmt->gl_type,
gl_read_buf);
g_free(flipped_buf);
if (swizzle) {
g_free(buf);
}
}
void pgraph_gl_upload_surface_data(NV2AState *d, SurfaceBinding *surface,
bool force)
{
@ -865,62 +931,20 @@ void pgraph_gl_upload_surface_data(NV2AState *d, SurfaceBinding *surface,
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
GL_TEXTURE_2D, 0, 0);
uint8_t *data = d->vram_ptr;
uint8_t *buf = data + surface->vram_addr;
if (surface->swizzle) {
buf = (uint8_t*)g_malloc(surface->size);
unswizzle_rect(data + surface->vram_addr,
surface->width, surface->height,
buf,
surface->pitch,
surface->fmt.bytes_per_pixel);
}
/* FIXME: Replace this flip/scaling */
// This is VRAM so we can't do this inplace!
uint8_t *flipped_buf = (uint8_t *)g_malloc(
surface->height * surface->width * surface->fmt.bytes_per_pixel);
unsigned int irow;
for (irow = 0; irow < surface->height; irow++) {
memcpy(&flipped_buf[surface->width * (surface->height - irow - 1)
* surface->fmt.bytes_per_pixel],
&buf[surface->pitch * irow],
surface->width * surface->fmt.bytes_per_pixel);
}
uint8_t *gl_read_buf = flipped_buf;
unsigned int width = surface->width, height = surface->height;
if (pg->surface_scale_factor > 1) {
pgraph_apply_scaling_factor(pg, &width, &height);
pg->scale_buf = (uint8_t *)g_realloc(
pg->scale_buf, width * height * surface->fmt.bytes_per_pixel);
gl_read_buf = pg->scale_buf;
uint8_t *out = gl_read_buf, *in = flipped_buf;
surface_copy_expand(out, in, surface->width, surface->height,
surface->fmt.bytes_per_pixel,
d->pgraph.surface_scale_factor);
}
int prev_unpack_alignment;
glGetIntegerv(GL_UNPACK_ALIGNMENT, &prev_unpack_alignment);
if (unlikely((width * surface->fmt.bytes_per_pixel) % 4 != 0)) {
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
} else {
glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
}
glBindTexture(GL_TEXTURE_2D, surface->gl_buffer);
glTexImage2D(GL_TEXTURE_2D, 0, surface->fmt.gl_internal_format, width,
height, 0, surface->fmt.gl_format, surface->fmt.gl_type,
gl_read_buf);
pgraph_gl_upload_vram_to_bound_texture(d,
surface->vram_addr,
surface->swizzle,
surface->width,
surface->height,
surface->pitch,
surface->size,
&surface->fmt);
glPixelStorei(GL_UNPACK_ALIGNMENT, prev_unpack_alignment);
g_free(flipped_buf);
if (surface->swizzle) {
g_free(buf);
}
// Rebind previous framebuffer binding
glBindTexture(GL_TEXTURE_2D, last_texture_binding);

View File

@ -284,14 +284,7 @@ void pgraph_gl_bind_textures(NV2AState *d)
// Writeback any surfaces which this texture may index
hwaddr tex_vram_end = texture_vram_offset + length - 1;
QTAILQ_FOREACH(surface, &r->surfaces, entry) {
hwaddr surf_vram_end = surface->vram_addr + surface->size - 1;
bool overlapping = !(surface->vram_addr >= tex_vram_end
|| texture_vram_offset >= surf_vram_end);
if (overlapping) {
pgraph_gl_surface_download_if_dirty(d, surface);
}
}
pgraph_gl_download_overlapping_surfaces(d, texture_vram_offset, tex_vram_end);
}
TextureKey key;