diff --git a/.clang-format b/.clang-format
index 8750a94dc8..3779a03403 100644
--- a/.clang-format
+++ b/.clang-format
@@ -71,8 +71,8 @@ IndentWidth: 4
AccessModifierOffset: -4
IndentWrappedFunctionNames: false
KeepEmptyLinesAtTheStartOfBlocks: false
-MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ?
-MacroBlockEnd: '.*_END$'
+#MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ?
+#MacroBlockEnd: '.*_END$'
MaxEmptyLinesToKeep: 2
#PenaltyBreakBeforeFirstCallParameter: 19
#PenaltyBreakComment: 300
diff --git a/.gitmodules b/.gitmodules
index 4118661130..420d7d9cd2 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -82,9 +82,18 @@
[submodule "tomlplusplus"]
path = tomlplusplus
url = https://github.com/marzer/tomlplusplus
-[submodule "hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu"]
- path = hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu
+[submodule "hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu"]
+ path = hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu
url = https://github.com/abaire/nv2a_vsh_cpu.git
[submodule "ui/thirdparty/httplib"]
path = ui/thirdparty/httplib
url = https://github.com/yhirose/cpp-httplib
+[submodule "hw/xbox/nv2a/pgraph/vk/thirdparty/VulkanMemoryAllocator"]
+ path = thirdparty/VulkanMemoryAllocator
+ url = https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator
+[submodule "thirdparty/volk"]
+ path = thirdparty/volk
+ url = https://github.com/zeux/volk
+[submodule "thirdparty/SPIRV-Reflect"]
+ path = thirdparty/SPIRV-Reflect
+ url = https://github.com/KhronosGroup/SPIRV-Reflect
diff --git a/config_spec.yml b/config_spec.yml
index b858606e68..f2c3736a8f 100644
--- a/config_spec.yml
+++ b/config_spec.yml
@@ -130,6 +130,12 @@ input:
default: 18 # w
display:
+ renderer:
+ type: enum
+ values: ["NULL", OPENGL, VULKAN]
+ default: OPENGL
+ vulkan:
+ validation_layers: bool
quality:
surface_scale:
type: integer
diff --git a/configure b/configure
index 11471698b6..880f30c4bd 100755
--- a/configure
+++ b/configure
@@ -237,7 +237,7 @@ else
git_submodules_action="ignore"
fi
-git_submodules="ui/keycodemapdb ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu"
+git_submodules="ui/keycodemapdb ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu thirdparty/volk thirdparty/VulkanMemoryAllocator thirdparty/SPIRV-Reflect"
git="git"
# Don't accept a target_list environment variable.
diff --git a/debian/control b/debian/control
index 91ed61433f..30603057ea 100644
--- a/debian/control
+++ b/debian/control
@@ -16,6 +16,9 @@ Build-Depends: debhelper (>= 11),
libssl-dev,
libpcap-dev,
libslirp-dev,
+ glslang-dev,
+ libvulkan-dev,
+
Standards-Version: 3.9.8
Homepage: https://xemu.app
XS-Debian-Vcs-Browser: https://github.com/mborgerson/xemu
diff --git a/hw/xbox/nv2a/debug.h b/hw/xbox/nv2a/debug.h
index 0c2c3d5f76..8a7fcc1449 100644
--- a/hw/xbox/nv2a/debug.h
+++ b/hw/xbox/nv2a/debug.h
@@ -1,8 +1,9 @@
/*
- * QEMU Geforce NV2A debug helpers
+ * QEMU Geforce NV2A profiling and debug helpers
*
- * Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2023 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -18,8 +19,8 @@
* License along with this library; if not, see .
*/
-#ifndef HW_NV2A_DEBUG_H
-#define HW_NV2A_DEBUG_H
+#ifndef HW_XBOX_NV2A_DEBUG_H
+#define HW_XBOX_NV2A_DEBUG_H
#include
@@ -36,54 +37,6 @@
# define NV2A_DPRINTF(format, ...) do { } while (0)
#endif
-// #define DEBUG_NV2A_GL
-#ifdef DEBUG_NV2A_GL
-
-#include
-#include "gl/gloffscreen.h"
-#include "config-host.h"
-
-void gl_debug_initialize(void);
-void gl_debug_message(bool cc, const char *fmt, ...);
-void gl_debug_group_begin(const char *fmt, ...);
-void gl_debug_group_end(void);
-void gl_debug_label(GLenum target, GLuint name, const char *fmt, ...);
-void gl_debug_frame_terminator(void);
-
-# define NV2A_GL_DPRINTF(cc, format, ...) \
- gl_debug_message(cc, "nv2a: " format, ## __VA_ARGS__)
-# define NV2A_GL_DGROUP_BEGIN(format, ...) \
- gl_debug_group_begin("nv2a: " format, ## __VA_ARGS__)
-# define NV2A_GL_DGROUP_END() \
- gl_debug_group_end()
-# define NV2A_GL_DLABEL(target, name, format, ...) \
- gl_debug_label(target, name, "nv2a: { " format " }", ## __VA_ARGS__)
-#define NV2A_GL_DFRAME_TERMINATOR() \
- gl_debug_frame_terminator()
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef CONFIG_RENDERDOC
-bool nv2a_dbg_renderdoc_available(void);
-void nv2a_dbg_renderdoc_capture_frames(uint32_t num_frames);
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#else
-# define NV2A_GL_DPRINTF(cc, format, ...) do { \
- if (cc) NV2A_DPRINTF(format "\n", ##__VA_ARGS__ ); \
- } while (0)
-# define NV2A_GL_DGROUP_BEGIN(format, ...) do { } while (0)
-# define NV2A_GL_DGROUP_END() do { } while (0)
-# define NV2A_GL_DLABEL(target, name, format, ...) do { } while (0)
-# define NV2A_GL_DFRAME_TERMINATOR() do { } while (0)
-#endif
-
/* Debug prints to identify when unimplemented or unconfirmed features
* are being exercised. These cases likely result in graphical problems of
* varying degree, but should otherwise not crash the system. Enable this
@@ -111,6 +64,22 @@ void nv2a_dbg_renderdoc_capture_frames(uint32_t num_frames);
#endif
#define NV2A_PROF_COUNTERS_XMAC \
+ _X(NV2A_PROF_FINISH_VERTEX_BUFFER_DIRTY) \
+ _X(NV2A_PROF_FINISH_SURFACE_CREATE) \
+ _X(NV2A_PROF_FINISH_SURFACE_DOWN) \
+ _X(NV2A_PROF_FINISH_NEED_BUFFER_SPACE) \
+ _X(NV2A_PROF_FINISH_FRAMEBUFFER_DIRTY) \
+ _X(NV2A_PROF_FINISH_PRESENTING) \
+ _X(NV2A_PROF_FINISH_FLIP_STALL) \
+ _X(NV2A_PROF_FINISH_FLUSH) \
+ _X(NV2A_PROF_CLEAR) \
+ _X(NV2A_PROF_QUEUE_SUBMIT) \
+ _X(NV2A_PROF_QUEUE_SUBMIT_AUX) \
+ _X(NV2A_PROF_PIPELINE_NOTDIRTY) \
+ _X(NV2A_PROF_PIPELINE_GEN) \
+ _X(NV2A_PROF_PIPELINE_BIND) \
+ _X(NV2A_PROF_PIPELINE_MERGE) \
+ _X(NV2A_PROF_PIPELINE_RENDERPASSES) \
_X(NV2A_PROF_BEGIN_ENDS) \
_X(NV2A_PROF_DRAW_ARRAYS) \
_X(NV2A_PROF_INLINE_BUFFERS) \
@@ -120,18 +89,26 @@ void nv2a_dbg_renderdoc_capture_frames(uint32_t num_frames);
_X(NV2A_PROF_SHADER_GEN) \
_X(NV2A_PROF_SHADER_BIND) \
_X(NV2A_PROF_SHADER_BIND_NOTDIRTY) \
+ _X(NV2A_PROF_SHADER_UBO_DIRTY) \
+ _X(NV2A_PROF_SHADER_UBO_NOTDIRTY) \
_X(NV2A_PROF_ATTR_BIND) \
_X(NV2A_PROF_TEX_UPLOAD) \
- _X(NV2A_PROF_TEX_BIND) \
_X(NV2A_PROF_GEOM_BUFFER_UPDATE_1) \
_X(NV2A_PROF_GEOM_BUFFER_UPDATE_2) \
_X(NV2A_PROF_GEOM_BUFFER_UPDATE_3) \
_X(NV2A_PROF_GEOM_BUFFER_UPDATE_4) \
_X(NV2A_PROF_GEOM_BUFFER_UPDATE_4_NOTDIRTY) \
+ _X(NV2A_PROF_SURF_SWIZZLE) \
+ _X(NV2A_PROF_SURF_CREATE) \
_X(NV2A_PROF_SURF_DOWNLOAD) \
_X(NV2A_PROF_SURF_UPLOAD) \
_X(NV2A_PROF_SURF_TO_TEX) \
_X(NV2A_PROF_SURF_TO_TEX_FALLBACK) \
+ _X(NV2A_PROF_QUEUE_SUBMIT_1) \
+ _X(NV2A_PROF_QUEUE_SUBMIT_2) \
+ _X(NV2A_PROF_QUEUE_SUBMIT_3) \
+ _X(NV2A_PROF_QUEUE_SUBMIT_4) \
+ _X(NV2A_PROF_QUEUE_SUBMIT_5) \
enum NV2A_PROF_COUNTERS_ENUM {
#define _X(x) x,
@@ -161,6 +138,21 @@ extern NV2AStats g_nv2a_stats;
const char *nv2a_profile_get_counter_name(unsigned int cnt);
int nv2a_profile_get_counter_value(unsigned int cnt);
+void nv2a_profile_increment(void);
+void nv2a_profile_flip_stall(void);
+
+static inline void nv2a_profile_inc_counter(enum NV2A_PROF_COUNTERS_ENUM cnt)
+{
+ g_nv2a_stats.frame_working.counters[cnt] += 1;
+}
+
+#ifdef CONFIG_RENDERDOC
+void nv2a_dbg_renderdoc_init(void);
+void *nv2a_dbg_renderdoc_get_api(void);
+bool nv2a_dbg_renderdoc_available(void);
+void nv2a_dbg_renderdoc_capture_frames(int num_frames);
+extern int renderdoc_capture_frames;
+#endif
#ifdef __cplusplus
}
diff --git a/hw/xbox/nv2a/gl/meson.build b/hw/xbox/nv2a/gl/meson.build
deleted file mode 100644
index 973a9aa8c1..0000000000
--- a/hw/xbox/nv2a/gl/meson.build
+++ /dev/null
@@ -1,6 +0,0 @@
-softmmu_ss.add([sdl, files(
- 'gloffscreen_common.c',
- 'gloffscreen_sdl.c',
- )])
-
-# gloffscreen_sdl.o-cflags := $(SDL_CFLAGS)
diff --git a/hw/xbox/nv2a/meson.build b/hw/xbox/nv2a/meson.build
index d3b159a3bc..29eff86e27 100644
--- a/hw/xbox/nv2a/meson.build
+++ b/hw/xbox/nv2a/meson.build
@@ -1,27 +1,17 @@
specific_ss.add(files(
'nv2a.c',
- 'debug.c',
'pbus.c',
'pcrtc.c',
'pfb.c',
'pfifo.c',
- 'pgraph.c',
'pmc.c',
'pramdac.c',
'prmcio.c',
'prmdio.c',
'prmvio.c',
- 'psh.c',
'ptimer.c',
'pvideo.c',
- 'shaders.c',
'stubs.c',
'user.c',
- 'vsh.c',
- 'swizzle.c',
- 's3tc.c',
))
-subdir('gl')
-
-subdir('thirdparty')
-specific_ss.add(nv2a_vsh_cpu)
+subdir('pgraph')
diff --git a/hw/xbox/nv2a/nv2a.c b/hw/xbox/nv2a/nv2a.c
index e068f76dc9..7b16113115 100644
--- a/hw/xbox/nv2a/nv2a.c
+++ b/hw/xbox/nv2a/nv2a.c
@@ -172,6 +172,16 @@ static void nv2a_get_offsets(VGACommonState *s,
*pline_compare = line_compare;
}
+const uint8_t *nv2a_get_dac_palette(void)
+{
+ return g_nv2a->puserdac.palette;
+}
+
+int nv2a_get_screen_off(void)
+{
+ return g_nv2a->vga.sr[VGA_SEQ_CLOCK_MODE] & VGA_SR01_SCREEN_OFF;
+}
+
static void nv2a_vga_gfx_update(void *opaque)
{
VGACommonState *vga = opaque;
@@ -277,7 +287,7 @@ static void nv2a_reset(NV2AState *d)
}
memset(d->pfifo.regs, 0, sizeof(d->pfifo.regs));
- memset(d->pgraph.regs, 0, sizeof(d->pgraph.regs));
+ memset(d->pgraph.regs_, 0, sizeof(d->pgraph.regs_));
memset(d->pvideo.regs, 0, sizeof(d->pvideo.regs));
d->pcrtc.start = 0;
@@ -365,11 +375,10 @@ static void nv2a_vm_state_change(void *opaque, bool running, RunState state)
if (state == RUN_STATE_SAVE_VM) {
nv2a_lock_fifo(d);
qatomic_set(&d->pfifo.halt, true);
- qatomic_set(&d->pgraph.download_dirty_surfaces_pending, true);
- qemu_event_reset(&d->pgraph.dirty_surfaces_download_complete);
+ d->pgraph.renderer->ops.pre_savevm_trigger(d);
nv2a_unlock_fifo(d);
qemu_mutex_unlock_iothread();
- qemu_event_wait(&d->pgraph.dirty_surfaces_download_complete);
+ d->pgraph.renderer->ops.pre_savevm_wait(d);
qemu_mutex_lock_iothread();
nv2a_lock_fifo(d);
} else if (state == RUN_STATE_RESTORE_VM) {
@@ -382,11 +391,10 @@ static void nv2a_vm_state_change(void *opaque, bool running, RunState state)
nv2a_unlock_fifo(d);
} else if (state == RUN_STATE_SHUTDOWN) {
nv2a_lock_fifo(d);
- qatomic_set(&d->pgraph.shader_cache_writeback_pending, true);
- qemu_event_reset(&d->pgraph.shader_cache_writeback_complete);
+ d->pgraph.renderer->ops.pre_shutdown_trigger(d);
nv2a_unlock_fifo(d);
qemu_mutex_unlock_iothread();
- qemu_event_wait(&d->pgraph.shader_cache_writeback_complete);
+ d->pgraph.renderer->ops.pre_shutdown_wait(d);
qemu_mutex_lock_iothread();
}
}
@@ -515,9 +523,9 @@ static const VMStateDescription vmstate_nv2a = {
VMSTATE_UINT32(pgraph.inline_buffer_length, NV2AState), // fixme
VMSTATE_UINT32(pgraph.draw_arrays_length, NV2AState),
VMSTATE_UINT32(pgraph.draw_arrays_max_count, NV2AState),
- VMSTATE_INT32_ARRAY(pgraph.gl_draw_arrays_start, NV2AState, 1250),
- VMSTATE_INT32_ARRAY(pgraph.gl_draw_arrays_count, NV2AState, 1250),
- VMSTATE_UINT32_ARRAY(pgraph.regs, NV2AState, 0x2000),
+ VMSTATE_INT32_ARRAY(pgraph.draw_arrays_start, NV2AState, 1250),
+ VMSTATE_INT32_ARRAY(pgraph.draw_arrays_count, NV2AState, 1250),
+ VMSTATE_UINT32_ARRAY(pgraph.regs_, NV2AState, 0x2000),
VMSTATE_UINT32(pmc.pending_interrupts, NV2AState),
VMSTATE_UINT32(pmc.enabled_interrupts, NV2AState),
VMSTATE_UINT32(pfifo.pending_interrupts, NV2AState),
diff --git a/hw/xbox/nv2a/nv2a.h b/hw/xbox/nv2a/nv2a.h
index 35b63749e4..a5c4468deb 100644
--- a/hw/xbox/nv2a/nv2a.h
+++ b/hw/xbox/nv2a/nv2a.h
@@ -22,7 +22,7 @@
#define HW_NV2A_H
void nv2a_init(PCIBus *bus, int devfn, MemoryRegion *ram);
-void nv2a_gl_context_init(void);
+void nv2a_context_init(void);
int nv2a_get_framebuffer_surface(void);
void nv2a_set_surface_scale_factor(unsigned int scale);
unsigned int nv2a_get_surface_scale_factor(void);
diff --git a/hw/xbox/nv2a/nv2a_int.h b/hw/xbox/nv2a/nv2a_int.h
index 31ab6d89ca..9b0189ebc8 100644
--- a/hw/xbox/nv2a/nv2a_int.h
+++ b/hw/xbox/nv2a/nv2a_int.h
@@ -44,25 +44,12 @@
#include "cpu.h"
#include "trace.h"
-#include "swizzle.h"
-#include "lru.h"
-#include "gl/gloffscreen.h"
#include "nv2a.h"
+#include "pgraph/pgraph.h"
#include "debug.h"
-#include "shaders.h"
#include "nv2a_regs.h"
-#define GET_MASK(v, mask) (((v) & (mask)) >> ctz32(mask))
-
-#define SET_MASK(v, mask, val) \
- ({ \
- const unsigned int __val = (val); \
- const unsigned int __mask = (mask); \
- (v) &= ~(__mask); \
- (v) |= ((__val) << ctz32(__mask)) & (__mask); \
- })
-
#define NV2A_DEVICE(obj) OBJECT_CHECK(NV2AState, (obj), "nv2a")
enum FIFOEngine {
@@ -78,347 +65,6 @@ typedef struct DMAObject {
hwaddr limit;
} DMAObject;
-typedef struct VertexAttribute {
- bool dma_select;
- hwaddr offset;
-
- /* inline arrays are packed in order?
- * Need to pass the offset to converted attributes */
- unsigned int inline_array_offset;
-
- float inline_value[4];
-
- unsigned int format;
- unsigned int size; /* size of the data type */
- unsigned int count; /* number of components */
- uint32_t stride;
-
- bool needs_conversion;
-
- float *inline_buffer;
- bool inline_buffer_populated;
-
- GLint gl_count;
- GLenum gl_type;
- GLboolean gl_normalize;
-
- GLuint gl_inline_buffer;
-} VertexAttribute;
-
-typedef struct SurfaceFormatInfo {
- unsigned int bytes_per_pixel;
- GLint gl_internal_format;
- GLenum gl_format;
- GLenum gl_type;
- GLenum gl_attachment;
-} SurfaceFormatInfo;
-
-typedef struct Surface {
- bool draw_dirty;
- bool buffer_dirty;
- bool write_enabled_cache;
- unsigned int pitch;
-
- hwaddr offset;
-} Surface;
-
-typedef struct SurfaceShape {
- unsigned int z_format;
- unsigned int color_format;
- unsigned int zeta_format;
- unsigned int log_width, log_height;
- unsigned int clip_x, clip_y;
- unsigned int clip_width, clip_height;
- unsigned int anti_aliasing;
-} SurfaceShape;
-
-typedef struct SurfaceBinding {
- QTAILQ_ENTRY(SurfaceBinding) entry;
- MemAccessCallback *access_cb;
-
- hwaddr vram_addr;
-
- SurfaceFormatInfo fmt;
- SurfaceShape shape;
- uintptr_t dma_addr;
- uintptr_t dma_len;
- bool color;
- bool swizzle;
-
- unsigned int width;
- unsigned int height;
- unsigned int pitch;
- size_t size;
-
- GLuint gl_buffer;
-
- bool cleared;
- int frame_time;
- int draw_time;
- bool draw_dirty;
- bool download_pending;
- bool upload_pending;
-} SurfaceBinding;
-
-typedef struct TextureShape {
- bool cubemap;
- unsigned int dimensionality;
- unsigned int color_format;
- unsigned int levels;
- unsigned int width, height, depth;
- bool border;
-
- unsigned int min_mipmap_level, max_mipmap_level;
- unsigned int pitch;
-} TextureShape;
-
-typedef struct TextureBinding {
- GLenum gl_target;
- GLuint gl_texture;
- unsigned int refcnt;
- int draw_time;
- uint64_t data_hash;
- unsigned int scale;
- unsigned int min_filter;
- unsigned int mag_filter;
- unsigned int addru;
- unsigned int addrv;
- unsigned int addrp;
- uint32_t border_color;
- bool border_color_set;
-} TextureBinding;
-
-typedef struct TextureKey {
- TextureShape state;
- hwaddr texture_vram_offset;
- hwaddr texture_length;
- hwaddr palette_vram_offset;
- hwaddr palette_length;
-} TextureKey;
-
-typedef struct TextureLruNode {
- LruNode node;
- TextureKey key;
- TextureBinding *binding;
- bool possibly_dirty;
-} TextureLruNode;
-
-typedef struct VertexKey {
- size_t count;
- GLuint gl_type;
- GLboolean gl_normalize;
- size_t stride;
- hwaddr addr;
-} VertexKey;
-
-typedef struct VertexLruNode {
- LruNode node;
- VertexKey key;
- GLuint gl_buffer;
- bool initialized;
-} VertexLruNode;
-
-typedef struct KelvinState {
- hwaddr object_instance;
-} KelvinState;
-
-typedef struct ContextSurfaces2DState {
- hwaddr object_instance;
- hwaddr dma_image_source;
- hwaddr dma_image_dest;
- unsigned int color_format;
- unsigned int source_pitch, dest_pitch;
- hwaddr source_offset, dest_offset;
-} ContextSurfaces2DState;
-
-typedef struct ImageBlitState {
- hwaddr object_instance;
- hwaddr context_surfaces;
- unsigned int operation;
- unsigned int in_x, in_y;
- unsigned int out_x, out_y;
- unsigned int width, height;
-} ImageBlitState;
-
-typedef struct BetaState {
- hwaddr object_instance;
- uint32_t beta;
-} BetaState;
-
-typedef struct QueryReport {
- QSIMPLEQ_ENTRY(QueryReport) entry;
- bool clear;
- uint32_t parameter;
- unsigned int query_count;
- GLuint *queries;
-} QueryReport;
-
-typedef struct PGRAPHState {
- QemuMutex lock;
-
- uint32_t pending_interrupts;
- uint32_t enabled_interrupts;
-
- int frame_time;
- int draw_time;
-
- struct s2t_rndr {
- GLuint fbo, vao, vbo, prog;
- GLuint tex_loc, surface_size_loc;
- } s2t_rndr;
-
- struct disp_rndr {
- GLuint fbo, vao, vbo, prog;
- GLuint display_size_loc;
- GLuint line_offset_loc;
- GLuint tex_loc;
- GLuint pvideo_tex;
- GLint pvideo_enable_loc;
- GLint pvideo_tex_loc;
- GLint pvideo_in_pos_loc;
- GLint pvideo_pos_loc;
- GLint pvideo_scale_loc;
- GLint pvideo_color_key_enable_loc;
- GLint pvideo_color_key_loc;
- GLint palette_loc[256];
- } disp_rndr;
-
- /* subchannels state we're not sure the location of... */
- ContextSurfaces2DState context_surfaces_2d;
- ImageBlitState image_blit;
- KelvinState kelvin;
- BetaState beta;
-
- hwaddr dma_color, dma_zeta;
- Surface surface_color, surface_zeta;
- unsigned int surface_type;
- SurfaceShape surface_shape;
- SurfaceShape last_surface_shape;
- QTAILQ_HEAD(, SurfaceBinding) surfaces;
- SurfaceBinding *color_binding, *zeta_binding;
- struct {
- int clip_x;
- int clip_width;
- int clip_y;
- int clip_height;
- int width;
- int height;
- } surface_binding_dim; // FIXME: Refactor
-
- hwaddr dma_a, dma_b;
- Lru texture_cache;
- TextureLruNode *texture_cache_entries;
- bool texture_dirty[NV2A_MAX_TEXTURES];
- TextureBinding *texture_binding[NV2A_MAX_TEXTURES];
-
- Lru shader_cache;
- ShaderLruNode *shader_cache_entries;
- ShaderBinding *shader_binding;
- QemuMutex shader_cache_lock;
- QemuThread shader_disk_thread;
-
- bool texture_matrix_enable[NV2A_MAX_TEXTURES];
-
- GLuint gl_framebuffer;
-
- GLuint gl_display_buffer;
- GLint gl_display_buffer_internal_format;
- GLsizei gl_display_buffer_width;
- GLsizei gl_display_buffer_height;
- GLenum gl_display_buffer_format;
- GLenum gl_display_buffer_type;
-
- hwaddr dma_state;
- hwaddr dma_notifies;
- hwaddr dma_semaphore;
-
- hwaddr dma_report;
- hwaddr report_offset;
- bool zpass_pixel_count_enable;
- unsigned int zpass_pixel_count_result;
- unsigned int gl_zpass_pixel_count_query_count;
- GLuint *gl_zpass_pixel_count_queries;
- QSIMPLEQ_HEAD(, QueryReport) report_queue;
-
- hwaddr dma_vertex_a, dma_vertex_b;
-
- uint32_t primitive_mode;
-
- bool enable_vertex_program_write;
-
- uint32_t vertex_state_shader_v0[4];
- uint32_t program_data[NV2A_MAX_TRANSFORM_PROGRAM_LENGTH][VSH_TOKEN_SIZE];
- bool program_data_dirty;
-
- uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];
- bool vsh_constants_dirty[NV2A_VERTEXSHADER_CONSTANTS];
-
- /* lighting constant arrays */
- uint32_t ltctxa[NV2A_LTCTXA_COUNT][4];
- bool ltctxa_dirty[NV2A_LTCTXA_COUNT];
- uint32_t ltctxb[NV2A_LTCTXB_COUNT][4];
- bool ltctxb_dirty[NV2A_LTCTXB_COUNT];
- uint32_t ltc1[NV2A_LTC1_COUNT][4];
- bool ltc1_dirty[NV2A_LTC1_COUNT];
-
- float material_alpha;
-
- // should figure out where these are in lighting context
- float light_infinite_half_vector[NV2A_MAX_LIGHTS][3];
- float light_infinite_direction[NV2A_MAX_LIGHTS][3];
- float light_local_position[NV2A_MAX_LIGHTS][3];
- float light_local_attenuation[NV2A_MAX_LIGHTS][3];
-
- float point_params[8];
-
- VertexAttribute vertex_attributes[NV2A_VERTEXSHADER_ATTRIBUTES];
- uint16_t compressed_attrs;
-
- Lru element_cache;
- VertexLruNode *element_cache_entries;
-
- unsigned int inline_array_length;
- uint32_t inline_array[NV2A_MAX_BATCH_LENGTH];
- GLuint gl_inline_array_buffer;
-
- unsigned int inline_elements_length;
- uint32_t inline_elements[NV2A_MAX_BATCH_LENGTH];
-
- unsigned int inline_buffer_length;
-
- unsigned int draw_arrays_length;
- unsigned int draw_arrays_min_start;
- unsigned int draw_arrays_max_count;
- /* FIXME: Unknown size, possibly endless, 1250 will do for now */
- /* Keep in sync with size used in nv2a.c */
- GLint gl_draw_arrays_start[1250];
- GLsizei gl_draw_arrays_count[1250];
- bool draw_arrays_prevent_connect;
-
- GLuint gl_memory_buffer;
- GLuint gl_vertex_array;
-
- uint32_t regs[0x2000];
-
- bool clearing;
- bool waiting_for_nop;
- bool waiting_for_flip;
- bool waiting_for_context_switch;
- bool downloads_pending;
- bool download_dirty_surfaces_pending;
- bool flush_pending;
- bool gl_sync_pending;
- bool shader_cache_writeback_pending;
- QemuEvent downloads_complete;
- QemuEvent dirty_surfaces_download_complete;
- QemuEvent flush_complete;
- QemuEvent gl_sync_complete;
- QemuEvent shader_cache_writeback_complete;
-
- unsigned int surface_scale_factor;
- uint8_t *scale_buf;
-} PGRAPHState;
-
typedef struct NV2AState {
/*< private >*/
PCIDevice parent_obj;
@@ -512,9 +158,6 @@ typedef struct NV2ABlockInfo {
} NV2ABlockInfo;
extern const NV2ABlockInfo blocktable[NV_NUM_BLOCKS];
-extern GloContext *g_nv2a_context_render;
-extern GloContext *g_nv2a_context_display;
-
void nv2a_update_irq(NV2AState *d);
static inline
@@ -566,20 +209,5 @@ DEFINE_PROTO(user)
DMAObject nv_dma_load(NV2AState *d, hwaddr dma_obj_address);
void *nv_dma_map(NV2AState *d, hwaddr dma_obj_address, hwaddr *len);
-void pgraph_init(NV2AState *d);
-void pgraph_destroy(PGRAPHState *pg);
-void pgraph_context_switch(NV2AState *d, unsigned int channel_id);
-int pgraph_method(NV2AState *d, unsigned int subchannel, unsigned int method,
- uint32_t parameter, uint32_t *parameters,
- size_t num_words_available, size_t max_lookahead_words,
- bool inc);
-void pgraph_gl_sync(NV2AState *d);
-void pgraph_process_pending_reports(NV2AState *d);
-void pgraph_process_pending_downloads(NV2AState *d);
-void pgraph_download_dirty_surfaces(NV2AState *d);
-void pgraph_flush(NV2AState *d);
-
-void *pfifo_thread(void *arg);
-void pfifo_kick(NV2AState *d);
#endif
diff --git a/hw/xbox/nv2a/nv2a_regs.h b/hw/xbox/nv2a/nv2a_regs.h
index 108db8f716..78a9091eb5 100644
--- a/hw/xbox/nv2a/nv2a_regs.h
+++ b/hw/xbox/nv2a/nv2a_regs.h
@@ -21,6 +21,17 @@
#ifndef HW_NV2A_REGS_H
#define HW_NV2A_REGS_H
+
+#define GET_MASK(v, mask) (((v) & (mask)) >> ctz32(mask))
+
+#define SET_MASK(v, mask, val) \
+ ({ \
+ const unsigned int __val = (val); \
+ const unsigned int __mask = (mask); \
+ (v) &= ~(__mask); \
+ (v) |= ((__val) << ctz32(__mask)) & (__mask); \
+ })
+
#define NV_NUM_BLOCKS 21
#define NV_PMC 0 /* card master control */
#define NV_PBUS 1 /* bus control */
diff --git a/hw/xbox/nv2a/pfifo.c b/hw/xbox/nv2a/pfifo.c
index 77dd175098..295cbbf27b 100644
--- a/hw/xbox/nv2a/pfifo.c
+++ b/hw/xbox/nv2a/pfifo.c
@@ -95,23 +95,25 @@ void pfifo_kick(NV2AState *d)
qemu_cond_broadcast(&d->pfifo.fifo_cond);
}
-static bool pgraph_can_fifo_access(NV2AState *d) {
- return qatomic_read(&d->pgraph.regs[NV_PGRAPH_FIFO]) & NV_PGRAPH_FIFO_ACCESS;
+static bool can_fifo_access(NV2AState *d) {
+ return qatomic_read(&d->pgraph.regs_[NV_PGRAPH_FIFO]) &
+ NV_PGRAPH_FIFO_ACCESS;
}
/* If NV097_FLIP_STALL was executed, check if the flip has completed.
* This will usually happen in the VSYNC interrupt handler.
*/
-static bool pgraph_is_flip_stall_complete(NV2AState *d)
+static bool is_flip_stall_complete(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
- NV2A_DPRINTF("flip stall read: %d, write: %d, modulo: %d\n",
- GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_READ_3D),
- GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_WRITE_3D),
- GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_MODULO_3D));
+ uint32_t s = pgraph_reg_r(pg, NV_PGRAPH_SURFACE);
+
+ NV2A_DPRINTF("flip stall read: %d, write: %d, modulo: %d\n",
+ GET_MASK(s, NV_PGRAPH_SURFACE_READ_3D),
+ GET_MASK(s, NV_PGRAPH_SURFACE_WRITE_3D),
+ GET_MASK(s, NV_PGRAPH_SURFACE_MODULO_3D));
- uint32_t s = pg->regs[NV_PGRAPH_SURFACE];
if (GET_MASK(s, NV_PGRAPH_SURFACE_READ_3D)
!= GET_MASK(s, NV_PGRAPH_SURFACE_WRITE_3D)) {
return true;
@@ -126,7 +128,7 @@ static bool pfifo_stall_for_flip(NV2AState *d)
if (qatomic_read(&d->pgraph.waiting_for_flip)) {
qemu_mutex_lock(&d->pgraph.lock);
- if (!pgraph_is_flip_stall_complete(d)) {
+ if (!is_flip_stall_complete(d)) {
should_stall = true;
} else {
d->pgraph.waiting_for_flip = false;
@@ -141,7 +143,7 @@ static bool pfifo_puller_should_stall(NV2AState *d)
{
return pfifo_stall_for_flip(d) || qatomic_read(&d->pgraph.waiting_for_nop) ||
qatomic_read(&d->pgraph.waiting_for_context_switch) ||
- !pgraph_can_fifo_access(d);
+ !can_fifo_access(d);
}
static ssize_t pfifo_run_puller(NV2AState *d, uint32_t method_entry,
@@ -187,7 +189,7 @@ static ssize_t pfifo_run_puller(NV2AState *d, uint32_t method_entry,
qemu_mutex_lock(&d->pgraph.lock);
// Switch contexts if necessary
- if (pgraph_can_fifo_access(d)) {
+ if (can_fifo_access(d)) {
pgraph_context_switch(d, entry.channel_id);
if (!d->pgraph.waiting_for_context_switch) {
num_proc =
@@ -221,7 +223,7 @@ static ssize_t pfifo_run_puller(NV2AState *d, uint32_t method_entry,
qemu_mutex_unlock(&d->pfifo.lock);
qemu_mutex_lock(&d->pgraph.lock);
- if (pgraph_can_fifo_access(d)) {
+ if (can_fifo_access(d)) {
num_proc =
pgraph_method(d, subchannel, method, parameter, parameters,
num_words_available, max_lookahead_words, inc);
@@ -242,7 +244,7 @@ static ssize_t pfifo_run_puller(NV2AState *d, uint32_t method_entry,
static bool pfifo_pusher_should_stall(NV2AState *d)
{
- return !pgraph_can_fifo_access(d) ||
+ return !can_fifo_access(d) ||
qatomic_read(&d->pgraph.waiting_for_nop);
}
@@ -447,39 +449,11 @@ static void pfifo_run_pusher(NV2AState *d)
}
}
-static void process_requests(NV2AState *d)
-{
- if (qatomic_read(&d->pgraph.downloads_pending) ||
- qatomic_read(&d->pgraph.download_dirty_surfaces_pending) ||
- qatomic_read(&d->pgraph.gl_sync_pending) ||
- qatomic_read(&d->pgraph.flush_pending) ||
- qatomic_read(&d->pgraph.shader_cache_writeback_pending)) {
- qemu_mutex_unlock(&d->pfifo.lock);
- qemu_mutex_lock(&d->pgraph.lock);
- if (qatomic_read(&d->pgraph.downloads_pending)) {
- pgraph_process_pending_downloads(d);
- }
- if (qatomic_read(&d->pgraph.download_dirty_surfaces_pending)) {
- pgraph_download_dirty_surfaces(d);
- }
- if (qatomic_read(&d->pgraph.gl_sync_pending)) {
- pgraph_gl_sync(d);
- }
- if (qatomic_read(&d->pgraph.flush_pending)) {
- pgraph_flush(d);
- }
- if (qatomic_read(&d->pgraph.shader_cache_writeback_pending)) {
- shader_write_cache_reload_list(&d->pgraph);
- }
- qemu_mutex_unlock(&d->pgraph.lock);
- qemu_mutex_lock(&d->pfifo.lock);
- }
-}
-
void *pfifo_thread(void *arg)
{
NV2AState *d = (NV2AState *)arg;
- glo_set_current(g_nv2a_context_render);
+
+ pgraph_init_thread(d);
rcu_register_thread();
@@ -487,13 +461,13 @@ void *pfifo_thread(void *arg)
while (true) {
d->pfifo.fifo_kick = false;
- process_requests(d);
+ d->pgraph.renderer->ops.process_pending(d);
if (!d->pfifo.halt) {
pfifo_run_pusher(d);
}
- pgraph_process_pending_reports(d);
+ d->pgraph.renderer->ops.process_pending_reports(d);
if (!d->pfifo.fifo_kick) {
qemu_cond_broadcast(&d->pfifo.fifo_idle_cond);
diff --git a/hw/xbox/nv2a/pgraph.c b/hw/xbox/nv2a/pgraph.c
deleted file mode 100644
index 335c73cc0f..0000000000
--- a/hw/xbox/nv2a/pgraph.c
+++ /dev/null
@@ -1,7775 +0,0 @@
-/*
- * QEMU Geforce NV2A implementation
- *
- * Copyright (c) 2012 espes
- * Copyright (c) 2015 Jannik Vogel
- * Copyright (c) 2018-2021 Matt Borgerson
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see .
- */
-
-#include "nv2a_int.h"
-
-#include
-
-#include "nv2a_vsh_emulator.h"
-#include "s3tc.h"
-#include "ui/xemu-settings.h"
-#include "qemu/fast-hash.h"
-
-const float f16_max = 511.9375f;
-const float f24_max = 1.0E30;
-
-static NV2AState *g_nv2a;
-GloContext *g_nv2a_context_render;
-GloContext *g_nv2a_context_display;
-
-NV2AStats g_nv2a_stats;
-
-static void nv2a_profile_increment(void)
-{
- int64_t now = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
- const int64_t fps_update_interval = 250000;
- g_nv2a_stats.last_flip_time = now;
-
- static int64_t frame_count = 0;
- frame_count++;
-
- static int64_t ts = 0;
- int64_t delta = now - ts;
- if (delta >= fps_update_interval) {
- g_nv2a_stats.increment_fps = frame_count * 1000000 / delta;
- ts = now;
- frame_count = 0;
- }
-}
-
-static void nv2a_profile_flip_stall(void)
-{
- glFinish();
-
- int64_t now = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
- int64_t render_time = (now-g_nv2a_stats.last_flip_time)/1000;
-
- g_nv2a_stats.frame_working.mspf = render_time;
- g_nv2a_stats.frame_history[g_nv2a_stats.frame_ptr] =
- g_nv2a_stats.frame_working;
- g_nv2a_stats.frame_ptr =
- (g_nv2a_stats.frame_ptr + 1) % NV2A_PROF_NUM_FRAMES;
- g_nv2a_stats.frame_count++;
- memset(&g_nv2a_stats.frame_working, 0, sizeof(g_nv2a_stats.frame_working));
-}
-
-static void nv2a_profile_inc_counter(enum NV2A_PROF_COUNTERS_ENUM cnt)
-{
- g_nv2a_stats.frame_working.counters[cnt] += 1;
-}
-
-const char *nv2a_profile_get_counter_name(unsigned int cnt)
-{
- const char *default_names[NV2A_PROF__COUNT] = {
- #define _X(x) stringify(x),
- NV2A_PROF_COUNTERS_XMAC
- #undef _X
- };
-
- assert(cnt < NV2A_PROF__COUNT);
- return default_names[cnt] + 10; /* 'NV2A_PROF_' */
-}
-
-int nv2a_profile_get_counter_value(unsigned int cnt)
-{
- assert(cnt < NV2A_PROF__COUNT);
- unsigned int idx = (g_nv2a_stats.frame_ptr + NV2A_PROF_NUM_FRAMES - 1) %
- NV2A_PROF_NUM_FRAMES;
- return g_nv2a_stats.frame_history[idx].counters[cnt];
-}
-
-static const GLenum pgraph_texture_min_filter_map[] = {
- 0,
- GL_NEAREST,
- GL_LINEAR,
- GL_NEAREST_MIPMAP_NEAREST,
- GL_LINEAR_MIPMAP_NEAREST,
- GL_NEAREST_MIPMAP_LINEAR,
- GL_LINEAR_MIPMAP_LINEAR,
- GL_LINEAR,
-};
-
-static const GLenum pgraph_texture_mag_filter_map[] = {
- 0,
- GL_NEAREST,
- GL_LINEAR,
- 0,
- GL_LINEAR /* TODO: Convolution filter... */
-};
-
-static const GLenum pgraph_texture_addr_map[] = {
- 0,
- GL_REPEAT,
- GL_MIRRORED_REPEAT,
- GL_CLAMP_TO_EDGE,
- GL_CLAMP_TO_BORDER,
- GL_CLAMP_TO_EDGE, /* Approximate GL_CLAMP */
-};
-
-static const GLenum pgraph_blend_factor_map[] = {
- GL_ZERO,
- GL_ONE,
- GL_SRC_COLOR,
- GL_ONE_MINUS_SRC_COLOR,
- GL_SRC_ALPHA,
- GL_ONE_MINUS_SRC_ALPHA,
- GL_DST_ALPHA,
- GL_ONE_MINUS_DST_ALPHA,
- GL_DST_COLOR,
- GL_ONE_MINUS_DST_COLOR,
- GL_SRC_ALPHA_SATURATE,
- 0,
- GL_CONSTANT_COLOR,
- GL_ONE_MINUS_CONSTANT_COLOR,
- GL_CONSTANT_ALPHA,
- GL_ONE_MINUS_CONSTANT_ALPHA,
-};
-
-static const GLenum pgraph_blend_equation_map[] = {
- GL_FUNC_SUBTRACT,
- GL_FUNC_REVERSE_SUBTRACT,
- GL_FUNC_ADD,
- GL_MIN,
- GL_MAX,
- GL_FUNC_REVERSE_SUBTRACT,
- GL_FUNC_ADD,
-};
-
-/* FIXME
-static const GLenum pgraph_blend_logicop_map[] = {
- GL_CLEAR,
- GL_AND,
- GL_AND_REVERSE,
- GL_COPY,
- GL_AND_INVERTED,
- GL_NOOP,
- GL_XOR,
- GL_OR,
- GL_NOR,
- GL_EQUIV,
- GL_INVERT,
- GL_OR_REVERSE,
- GL_COPY_INVERTED,
- GL_OR_INVERTED,
- GL_NAND,
- GL_SET,
-};
-*/
-
-static const GLenum pgraph_cull_face_map[] = {
- 0,
- GL_FRONT,
- GL_BACK,
- GL_FRONT_AND_BACK
-};
-
-static const GLenum pgraph_depth_func_map[] = {
- GL_NEVER,
- GL_LESS,
- GL_EQUAL,
- GL_LEQUAL,
- GL_GREATER,
- GL_NOTEQUAL,
- GL_GEQUAL,
- GL_ALWAYS,
-};
-
-static const GLenum pgraph_stencil_func_map[] = {
- GL_NEVER,
- GL_LESS,
- GL_EQUAL,
- GL_LEQUAL,
- GL_GREATER,
- GL_NOTEQUAL,
- GL_GEQUAL,
- GL_ALWAYS,
-};
-
-static const GLenum pgraph_stencil_op_map[] = {
- 0,
- GL_KEEP,
- GL_ZERO,
- GL_REPLACE,
- GL_INCR,
- GL_DECR,
- GL_INVERT,
- GL_INCR_WRAP,
- GL_DECR_WRAP,
-};
-
-typedef struct ColorFormatInfo {
- unsigned int bytes_per_pixel;
- bool linear;
- GLint gl_internal_format;
- GLenum gl_format;
- GLenum gl_type;
- GLenum gl_swizzle_mask[4];
- bool depth;
-} ColorFormatInfo;
-
-static const ColorFormatInfo kelvin_color_format_map[66] = {
- [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8] =
- {1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
- {GL_RED, GL_RED, GL_RED, GL_ONE}},
- [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8] =
- {1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
- {GL_RED, GL_RED, GL_RED, GL_RED}},
- [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5] =
- {2, false, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
- [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5] =
- {2, false, GL_RGB5, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
- [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4] =
- {2, false, GL_RGBA4, GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV},
- [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5] =
- {2, false, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},
- [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8] =
- {4, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
- [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8] =
- {4, false, GL_RGB8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
-
- /* paletted texture */
- [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8] =
- {1, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
-
- [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5] =
- {4, false, GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, 0, GL_RGBA},
- [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8] =
- {4, false, GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, 0, GL_RGBA},
- [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8] =
- {4, false, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, 0, GL_RGBA},
- [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5] =
- {2, true, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
- [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5] =
- {2, true, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},
- [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8] =
- {4, true, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
- [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8] =
- {1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
- {GL_RED, GL_RED, GL_RED, GL_ONE}},
-
- [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8] =
- {2, true, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
- {GL_RED, GL_GREEN, GL_RED, GL_GREEN}},
-
- [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8] =
- {1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
- {GL_ONE, GL_ONE, GL_ONE, GL_RED}},
- [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8] =
- {2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
- {GL_RED, GL_RED, GL_RED, GL_GREEN}},
- [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8] =
- {1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
- {GL_RED, GL_RED, GL_RED, GL_RED}},
- [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5] =
- {2, true, GL_RGB5, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
- [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4] =
- {2, true, GL_RGBA4, GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV},
- [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8] =
- {4, true, GL_RGB8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
- [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8] =
- {1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
- {GL_ONE, GL_ONE, GL_ONE, GL_RED}},
- [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8Y8] =
- {2, true, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
- {GL_RED, GL_RED, GL_RED, GL_GREEN}},
-
- [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5] =
- {2, false, GL_RGB8_SNORM, GL_RGB, GL_BYTE}, /* FIXME: This might be signed */
- [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8] =
- {2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
- {GL_RED, GL_GREEN, GL_RED, GL_GREEN}},
- [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8] =
- {2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
- {GL_GREEN, GL_RED, GL_RED, GL_GREEN}},
-
- [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8] =
- {2, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},
- [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8] =
- {2, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},
-
- /* Additional information is passed to the pixel shader via the swizzle:
- * RED: The depth value.
- * GREEN: 0 for 16-bit, 1 for 24 bit
- * BLUE: 0 for fixed, 1 for float
- */
- [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_DEPTH_Y16_FIXED] =
- {2, false, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT,
- {GL_RED, GL_ZERO, GL_ZERO, GL_ZERO}, true},
- [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED] =
- {4, true, GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8,
- {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}, true},
- [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT] =
- /* FIXME: Uses fixed-point format to match surface format hack below. */
- {4, true, GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8,
- {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}, true},
- [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FIXED] =
- {2, true, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT,
- {GL_RED, GL_ZERO, GL_ZERO, GL_ZERO}, true},
- [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FLOAT] =
- {2, true, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_HALF_FLOAT,
- {GL_RED, GL_ZERO, GL_ONE, GL_ZERO}, true},
-
- [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16] =
- {2, true, GL_R16, GL_RED, GL_UNSIGNED_SHORT,
- {GL_RED, GL_RED, GL_RED, GL_ONE}},
- [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8] =
- {4, false, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},
- [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8] =
- {4, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8},
-
- [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8] =
- {4, false, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8},
-
- [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8] =
- {4, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},
- [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8] =
- {4, true, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8},
- [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8] =
- {4, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}
-};
-
-static const SurfaceFormatInfo kelvin_surface_color_format_map[] = {
- [NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5] =
- {2, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, GL_COLOR_ATTACHMENT0},
- [NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5] =
- {2, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, GL_COLOR_ATTACHMENT0},
- [NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8] =
- {4, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_COLOR_ATTACHMENT0},
- [NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8] =
- {4, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_COLOR_ATTACHMENT0},
-
- // FIXME: Map channel color
- [NV097_SET_SURFACE_FORMAT_COLOR_LE_B8] =
- {1, GL_R8, GL_RED, GL_UNSIGNED_BYTE, GL_COLOR_ATTACHMENT0},
- [NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8] =
- {2, GL_RG8, GL_RG, GL_UNSIGNED_SHORT, GL_COLOR_ATTACHMENT0},
-};
-
-static const SurfaceFormatInfo kelvin_surface_zeta_float_format_map[] = {
- [NV097_SET_SURFACE_FORMAT_ZETA_Z16] =
- {2, GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_HALF_FLOAT, GL_DEPTH_ATTACHMENT},
- [NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] =
- /* FIXME: GL does not support packing floating-point Z24S8 OOTB, so for
- * now just emulate this with fixed-point Z24S8. Possible compat
- * improvement with custom conversion.
- */
- {4, GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, GL_DEPTH_STENCIL_ATTACHMENT},
-};
-
-static const SurfaceFormatInfo kelvin_surface_zeta_fixed_format_map[] = {
- [NV097_SET_SURFACE_FORMAT_ZETA_Z16] =
- {2, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, GL_DEPTH_ATTACHMENT},
- [NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] =
- {4, GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, GL_DEPTH_STENCIL_ATTACHMENT},
-};
-
-static GLfloat supportedAliasedLineWidthRange[2] = { 0.0f, 0.0f };
-static GLfloat supportedSmoothLineWidthRange[2] = { 0.0f, 0.0f };
-
-// static void pgraph_set_context_user(NV2AState *d, uint32_t val);
-static void pgraph_gl_fence(void);
-static GLuint pgraph_compile_shader(const char *vs_src, const char *fs_src);
-static void pgraph_init_render_to_texture(NV2AState *d);
-static void pgraph_init_display_renderer(NV2AState *d);
-static void pgraph_method_log(unsigned int subchannel, unsigned int graphics_class, unsigned int method, uint32_t parameter);
-static void pgraph_allocate_inline_buffer_vertices(PGRAPHState *pg, unsigned int attr);
-static void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg);
-static void pgraph_shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, bool binding_changed, bool vertex_program, bool fixed_function);
-static void pgraph_bind_shaders(PGRAPHState *pg);
-static bool pgraph_framebuffer_dirty(PGRAPHState *pg);
-static bool pgraph_color_write_enabled(PGRAPHState *pg);
-static bool pgraph_zeta_write_enabled(PGRAPHState *pg);
-static void pgraph_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta);
-static void pgraph_wait_for_surface_download(SurfaceBinding *e);
-static void pgraph_surface_access_callback(void *opaque, MemoryRegion *mr, hwaddr addr, hwaddr len, bool write);
-static SurfaceBinding *pgraph_surface_put(NV2AState *d, hwaddr addr, SurfaceBinding *e);
-static SurfaceBinding *pgraph_surface_get(NV2AState *d, hwaddr addr);
-static SurfaceBinding *pgraph_surface_get_within(NV2AState *d, hwaddr addr);
-static void pgraph_unbind_surface(NV2AState *d, bool color);
-static void pgraph_surface_invalidate(NV2AState *d, SurfaceBinding *e);
-static void pgraph_surface_evict_old(NV2AState *d);
-static void pgraph_download_surface_data_if_dirty(NV2AState *d, SurfaceBinding *surface);
-static void pgraph_download_surface_data(NV2AState *d, SurfaceBinding *surface, bool force);
-static void pgraph_download_surface_data_to_buffer(NV2AState *d,
- SurfaceBinding *surface,
- bool swizzle, bool flip,
- bool downscale,
- uint8_t *pixels);
-static void pgraph_upload_surface_data(NV2AState *d, SurfaceBinding *surface, bool force);
-static bool pgraph_check_surface_compatibility(SurfaceBinding *s1, SurfaceBinding *s2, bool strict);
-static bool pgraph_check_surface_to_texture_compatibility(const SurfaceBinding *surface, const TextureShape *shape);
-static void pgraph_render_surface_to_texture(NV2AState *d, SurfaceBinding *surface, TextureBinding *texture, TextureShape *texture_shape, int texture_unit);
-static void pgraph_update_surface_part(NV2AState *d, bool upload, bool color);
-static void pgraph_update_surface(NV2AState *d, bool upload, bool color_write, bool zeta_write);
-static void pgraph_bind_textures(NV2AState *d);
-static void pgraph_apply_anti_aliasing_factor(PGRAPHState *pg, unsigned int *width, unsigned int *height);
-static void pgraph_apply_scaling_factor(PGRAPHState *pg, unsigned int *width, unsigned int *height);
-static void pgraph_get_surface_dimensions(PGRAPHState *pg, unsigned int *width, unsigned int *height);
-static void pgraph_update_memory_buffer(NV2AState *d, hwaddr addr, hwaddr size, bool quick);
-static void pgraph_bind_vertex_attributes(NV2AState *d, unsigned int min_element, unsigned int max_element, bool inline_data, unsigned int inline_stride, unsigned int provoking_element);
-static unsigned int pgraph_bind_inline_array(NV2AState *d);
-static bool pgraph_is_texture_stage_active(PGRAPHState *pg, unsigned int stage);
-
-static float convert_f16_to_float(uint16_t f16);
-static float convert_f24_to_float(uint32_t f24);
-static uint8_t cliptobyte(int x);
-static void convert_yuy2_to_rgb(const uint8_t *line, unsigned int ix, uint8_t *r, uint8_t *g, uint8_t* b);
-static void convert_uyvy_to_rgb(const uint8_t *line, unsigned int ix, uint8_t *r, uint8_t *g, uint8_t* b);
-static uint8_t* convert_texture_data(const TextureShape s, const uint8_t *data, const uint8_t *palette_data, unsigned int width, unsigned int height, unsigned int depth, unsigned int row_pitch, unsigned int slice_pitch);
-static void upload_gl_texture(GLenum gl_target, const TextureShape s, const uint8_t *texture_data, const uint8_t *palette_data);
-static TextureBinding* generate_texture(const TextureShape s, const uint8_t *texture_data, const uint8_t *palette_data);
-static void texture_binding_destroy(gpointer data);
-static void texture_cache_entry_init(Lru *lru, LruNode *node, void *key);
-static void texture_cache_entry_post_evict(Lru *lru, LruNode *node);
-static bool texture_cache_entry_compare(Lru *lru, LruNode *node, void *key);
-
-static void vertex_cache_entry_init(Lru *lru, LruNode *node, void *key)
-{
- VertexLruNode *vnode = container_of(node, VertexLruNode, node);
- memcpy(&vnode->key, key, sizeof(struct VertexKey));
- vnode->initialized = false;
-}
-
-static bool vertex_cache_entry_compare(Lru *lru, LruNode *node, void *key)
-{
- VertexLruNode *vnode = container_of(node, VertexLruNode, node);
- return memcmp(&vnode->key, key, sizeof(VertexKey));
-}
-
-static void pgraph_mark_textures_possibly_dirty(NV2AState *d, hwaddr addr, hwaddr size);
-static bool pgraph_check_texture_dirty(NV2AState *d, hwaddr addr, hwaddr size);
-static unsigned int kelvin_map_stencil_op(uint32_t parameter);
-static unsigned int kelvin_map_polygon_mode(uint32_t parameter);
-static unsigned int kelvin_map_texgen(uint32_t parameter, unsigned int channel);
-static void pgraph_reload_surface_scale_factor(NV2AState *d);
-
-static uint32_t pgraph_rdi_read(PGRAPHState *pg,
- unsigned int select, unsigned int address)
-{
- uint32_t r = 0;
- switch(select) {
- case RDI_INDEX_VTX_CONSTANTS0:
- case RDI_INDEX_VTX_CONSTANTS1:
- assert((address / 4) < NV2A_VERTEXSHADER_CONSTANTS);
- r = pg->vsh_constants[address / 4][3 - address % 4];
- break;
- default:
- fprintf(stderr, "nv2a: unknown rdi read select 0x%x address 0x%x\n",
- select, address);
- assert(false);
- break;
- }
- return r;
-}
-
-static void pgraph_rdi_write(PGRAPHState *pg,
- unsigned int select, unsigned int address,
- uint32_t val)
-{
- switch(select) {
- case RDI_INDEX_VTX_CONSTANTS0:
- case RDI_INDEX_VTX_CONSTANTS1:
- assert(false); /* Untested */
- assert((address / 4) < NV2A_VERTEXSHADER_CONSTANTS);
- pg->vsh_constants_dirty[address / 4] |=
- (val != pg->vsh_constants[address / 4][3 - address % 4]);
- pg->vsh_constants[address / 4][3 - address % 4] = val;
- break;
- default:
- NV2A_DPRINTF("unknown rdi write select 0x%x, address 0x%x, val 0x%08x\n",
- select, address, val);
- break;
- }
-}
-
-uint64_t pgraph_read(void *opaque, hwaddr addr, unsigned int size)
-{
- NV2AState *d = (NV2AState *)opaque;
- PGRAPHState *pg = &d->pgraph;
-
- qemu_mutex_lock(&pg->lock);
-
- uint64_t r = 0;
- switch (addr) {
- case NV_PGRAPH_INTR:
- r = pg->pending_interrupts;
- break;
- case NV_PGRAPH_INTR_EN:
- r = pg->enabled_interrupts;
- break;
- case NV_PGRAPH_RDI_DATA: {
- unsigned int select = GET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX],
- NV_PGRAPH_RDI_INDEX_SELECT);
- unsigned int address = GET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX],
- NV_PGRAPH_RDI_INDEX_ADDRESS);
-
- r = pgraph_rdi_read(pg, select, address);
-
- /* FIXME: Overflow into select? */
- assert(address < GET_MASK(NV_PGRAPH_RDI_INDEX_ADDRESS,
- NV_PGRAPH_RDI_INDEX_ADDRESS));
- SET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX],
- NV_PGRAPH_RDI_INDEX_ADDRESS, address + 1);
- break;
- }
- default:
- r = pg->regs[addr];
- break;
- }
-
- qemu_mutex_unlock(&pg->lock);
-
- nv2a_reg_log_read(NV_PGRAPH, addr, size, r);
- return r;
-}
-
-void pgraph_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size)
-{
- NV2AState *d = (NV2AState *)opaque;
- PGRAPHState *pg = &d->pgraph;
-
- nv2a_reg_log_write(NV_PGRAPH, addr, size, val);
-
- qemu_mutex_lock(&d->pfifo.lock); // FIXME: Factor out fifo lock here
- qemu_mutex_lock(&pg->lock);
-
- switch (addr) {
- case NV_PGRAPH_INTR:
- pg->pending_interrupts &= ~val;
-
- if (!(pg->pending_interrupts & NV_PGRAPH_INTR_ERROR)) {
- pg->waiting_for_nop = false;
- }
- if (!(pg->pending_interrupts & NV_PGRAPH_INTR_CONTEXT_SWITCH)) {
- pg->waiting_for_context_switch = false;
- }
- pfifo_kick(d);
- break;
- case NV_PGRAPH_INTR_EN:
- pg->enabled_interrupts = val;
- break;
- case NV_PGRAPH_INCREMENT:
- if (val & NV_PGRAPH_INCREMENT_READ_3D) {
- SET_MASK(pg->regs[NV_PGRAPH_SURFACE],
- NV_PGRAPH_SURFACE_READ_3D,
- (GET_MASK(pg->regs[NV_PGRAPH_SURFACE],
- NV_PGRAPH_SURFACE_READ_3D)+1)
- % GET_MASK(pg->regs[NV_PGRAPH_SURFACE],
- NV_PGRAPH_SURFACE_MODULO_3D) );
- nv2a_profile_increment();
- pfifo_kick(d);
- }
- break;
- case NV_PGRAPH_RDI_DATA: {
- unsigned int select = GET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX],
- NV_PGRAPH_RDI_INDEX_SELECT);
- unsigned int address = GET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX],
- NV_PGRAPH_RDI_INDEX_ADDRESS);
-
- pgraph_rdi_write(pg, select, address, val);
-
- /* FIXME: Overflow into select? */
- assert(address < GET_MASK(NV_PGRAPH_RDI_INDEX_ADDRESS,
- NV_PGRAPH_RDI_INDEX_ADDRESS));
- SET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX],
- NV_PGRAPH_RDI_INDEX_ADDRESS, address + 1);
- break;
- }
- case NV_PGRAPH_CHANNEL_CTX_TRIGGER: {
- hwaddr context_address =
- GET_MASK(pg->regs[NV_PGRAPH_CHANNEL_CTX_POINTER],
- NV_PGRAPH_CHANNEL_CTX_POINTER_INST) << 4;
-
- if (val & NV_PGRAPH_CHANNEL_CTX_TRIGGER_READ_IN) {
-#ifdef DEBUG_NV2A
- unsigned pgraph_channel_id =
- GET_MASK(pg->regs[NV_PGRAPH_CTX_USER], NV_PGRAPH_CTX_USER_CHID);
-#endif
- NV2A_DPRINTF("PGRAPH: read channel %d context from %" HWADDR_PRIx "\n",
- pgraph_channel_id, context_address);
-
- assert(context_address < memory_region_size(&d->ramin));
-
- uint8_t *context_ptr = d->ramin_ptr + context_address;
- uint32_t context_user = ldl_le_p((uint32_t*)context_ptr);
-
- NV2A_DPRINTF(" - CTX_USER = 0x%x\n", context_user);
-
- pg->regs[NV_PGRAPH_CTX_USER] = context_user;
- // pgraph_set_context_user(d, context_user);
- }
- if (val & NV_PGRAPH_CHANNEL_CTX_TRIGGER_WRITE_OUT) {
- /* do stuff ... */
- }
-
- break;
- }
- default:
- pg->regs[addr] = val;
- break;
- }
-
- // events
- switch (addr) {
- case NV_PGRAPH_FIFO:
- pfifo_kick(d);
- break;
- }
-
- qemu_mutex_unlock(&pg->lock);
- qemu_mutex_unlock(&d->pfifo.lock);
-}
-
-void pgraph_flush(NV2AState *d)
-{
- PGRAPHState *pg = &d->pgraph;
-
- bool update_surface = (pg->color_binding || pg->zeta_binding);
-
- /* Clear last surface shape to force recreation of buffers at next draw */
- pg->surface_color.draw_dirty = false;
- pg->surface_zeta.draw_dirty = false;
- memset(&pg->last_surface_shape, 0, sizeof(pg->last_surface_shape));
- pgraph_unbind_surface(d, true);
- pgraph_unbind_surface(d, false);
-
- SurfaceBinding *s, *next;
- QTAILQ_FOREACH_SAFE(s, &d->pgraph.surfaces, entry, next) {
- pgraph_surface_invalidate(d, s);
- }
-
- pgraph_mark_textures_possibly_dirty(d, 0, memory_region_size(d->vram));
-
- /* Sync all RAM */
- glBindBuffer(GL_ARRAY_BUFFER, d->pgraph.gl_memory_buffer);
- glBufferSubData(GL_ARRAY_BUFFER, 0, memory_region_size(d->vram), d->vram_ptr);
-
- /* FIXME: Flush more? */
-
- pgraph_reload_surface_scale_factor(d);
-
- if (update_surface) {
- pgraph_update_surface(d, true, true, true);
- }
-
- qatomic_set(&d->pgraph.flush_pending, false);
- qemu_event_set(&d->pgraph.flush_complete);
-}
-
-#define METHOD_ADDR(gclass, name) \
- gclass ## _ ## name
-#define METHOD_ADDR_TO_INDEX(x) ((x)>>2)
-#define METHOD_NAME_STR(gclass, name) \
- tostring(gclass ## _ ## name)
-#define METHOD_FUNC_NAME(gclass, name) \
- pgraph_ ## gclass ## _ ## name ## _handler
-#define METHOD_HANDLER_ARG_DECL \
- NV2AState *d, PGRAPHState *pg, \
- unsigned int subchannel, unsigned int method, \
- uint32_t parameter, uint32_t *parameters, \
- size_t num_words_available, size_t *num_words_consumed, bool inc
-#define METHOD_HANDLER_ARGS \
- d, pg, subchannel, method, parameter, parameters, \
- num_words_available, num_words_consumed, inc
-#define DEF_METHOD_PROTO(gclass, name) \
- static void METHOD_FUNC_NAME(gclass, name)(METHOD_HANDLER_ARG_DECL)
-
-#define DEF_METHOD(gclass, name) \
- DEF_METHOD_PROTO(gclass, name);
-#define DEF_METHOD_RANGE(gclass, name, range) \
- DEF_METHOD_PROTO(gclass, name);
-#define DEF_METHOD_CASE_4_OFFSET(gclass, name, offset, stride) /* Drop */
-#define DEF_METHOD_CASE_4(gclass, name, stride) \
- DEF_METHOD_PROTO(gclass, name);
-#include "pgraph_methods.h"
-#undef DEF_METHOD
-#undef DEF_METHOD_RANGE
-#undef DEF_METHOD_CASE_4_OFFSET
-#undef DEF_METHOD_CASE_4
-
-typedef void (*MethodFunc)(METHOD_HANDLER_ARG_DECL);
-static const struct {
- uint32_t base;
- const char *name;
- MethodFunc handler;
-} pgraph_kelvin_methods[0x800] = {
-#define DEF_METHOD(gclass, name) \
- [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name))] = \
- { \
- METHOD_ADDR(gclass, name), \
- METHOD_NAME_STR(gclass, name), \
- METHOD_FUNC_NAME(gclass, name), \
- },
-#define DEF_METHOD_RANGE(gclass, name, range) \
- [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name)) \
- ... METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + 4*range - 1)] = \
- { \
- METHOD_ADDR(gclass, name), \
- METHOD_NAME_STR(gclass, name), \
- METHOD_FUNC_NAME(gclass, name), \
- },
-#define DEF_METHOD_CASE_4_OFFSET(gclass, name, offset, stride) \
- [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset)] = \
- { \
- METHOD_ADDR(gclass, name), \
- METHOD_NAME_STR(gclass, name), \
- METHOD_FUNC_NAME(gclass, name), \
- }, \
- [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset + stride)] = \
- { \
- METHOD_ADDR(gclass, name), \
- METHOD_NAME_STR(gclass, name), \
- METHOD_FUNC_NAME(gclass, name), \
- }, \
- [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset + stride * 2)] = \
- { \
- METHOD_ADDR(gclass, name), \
- METHOD_NAME_STR(gclass, name), \
- METHOD_FUNC_NAME(gclass, name), \
- }, \
- [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset + stride * 3)] = \
- { \
- METHOD_ADDR(gclass, name), \
- METHOD_NAME_STR(gclass, name), \
- METHOD_FUNC_NAME(gclass, name), \
- },
-#define DEF_METHOD_CASE_4(gclass, name, stride) \
- DEF_METHOD_CASE_4_OFFSET(gclass, name, 0, stride)
-#include "pgraph_methods.h"
-#undef DEF_METHOD
-#undef DEF_METHOD_RANGE
-#undef DEF_METHOD_CASE_4_OFFSET
-#undef DEF_METHOD_CASE_4
-};
-
-#define METHOD_RANGE_END_NAME(gclass, name) \
- pgraph_ ## gclass ## _ ## name ## __END
-#define DEF_METHOD(gclass, name) \
- static const size_t METHOD_RANGE_END_NAME(gclass, name) = \
- METHOD_ADDR(gclass, name) + 4;
-#define DEF_METHOD_RANGE(gclass, name, range) \
- static const size_t METHOD_RANGE_END_NAME(gclass, name) = \
- METHOD_ADDR(gclass, name) + 4*range;
-#define DEF_METHOD_CASE_4_OFFSET(gclass, name, offset, stride) /* drop */
-#define DEF_METHOD_CASE_4(gclass, name, stride) \
- static const size_t METHOD_RANGE_END_NAME(gclass, name) = \
- METHOD_ADDR(gclass, name) + 4*stride;
-#include "pgraph_methods.h"
-#undef DEF_METHOD
-#undef DEF_METHOD_RANGE
-#undef DEF_METHOD_CASE_4_OFFSET
-#undef DEF_METHOD_CASE_4
-
-static void pgraph_method_inc(MethodFunc handler, uint32_t end,
- METHOD_HANDLER_ARG_DECL)
-{
- if (!inc) {
- handler(METHOD_HANDLER_ARGS);
- return;
- }
- size_t count = MIN(num_words_available, (end - method) / 4);
- for (size_t i = 0; i < count; i++) {
- parameter = ldl_le_p(parameters + i);
- if (i) {
- pgraph_method_log(subchannel, NV_KELVIN_PRIMITIVE, method,
- parameter);
- }
- handler(METHOD_HANDLER_ARGS);
- method += 4;
- }
- *num_words_consumed = count;
-}
-
-static void pgraph_method_non_inc(MethodFunc handler, METHOD_HANDLER_ARG_DECL)
-{
- if (inc) {
- handler(METHOD_HANDLER_ARGS);
- return;
- }
-
- for (size_t i = 0; i < num_words_available; i++) {
- parameter = ldl_le_p(parameters + i);
- if (i) {
- pgraph_method_log(subchannel, NV_KELVIN_PRIMITIVE, method,
- parameter);
- }
- handler(METHOD_HANDLER_ARGS);
- }
- *num_words_consumed = num_words_available;
-}
-
-#define METHOD_FUNC_NAME_INT(gclass, name) METHOD_FUNC_NAME(gclass, name##_int)
-#define DEF_METHOD_INT(gclass, name) DEF_METHOD(gclass, name##_int)
-#define DEF_METHOD(gclass, name) DEF_METHOD_PROTO(gclass, name)
-
-#define DEF_METHOD_INC(gclass, name) \
- DEF_METHOD_INT(gclass, name); \
- DEF_METHOD(gclass, name) \
- { \
- pgraph_method_inc(METHOD_FUNC_NAME_INT(gclass, name), \
- METHOD_RANGE_END_NAME(gclass, name), \
- METHOD_HANDLER_ARGS); \
- } \
- DEF_METHOD_INT(gclass, name)
-
-#define DEF_METHOD_NON_INC(gclass, name) \
- DEF_METHOD_INT(gclass, name); \
- DEF_METHOD(gclass, name) \
- { \
- pgraph_method_non_inc(METHOD_FUNC_NAME_INT(gclass, name), \
- METHOD_HANDLER_ARGS); \
- } \
- DEF_METHOD_INT(gclass, name)
-
-// TODO: Optimize. Ideally this should all be done via OpenGL.
-static void pgraph_image_blit(NV2AState *d)
-{
- PGRAPHState *pg = &d->pgraph;
- ContextSurfaces2DState *context_surfaces = &pg->context_surfaces_2d;
- ImageBlitState *image_blit = &pg->image_blit;
- BetaState *beta = &pg->beta;
-
- pgraph_update_surface(d, false, true, true);
-
- assert(context_surfaces->object_instance == image_blit->context_surfaces);
-
- unsigned int bytes_per_pixel;
- switch (context_surfaces->color_format) {
- case NV062_SET_COLOR_FORMAT_LE_Y8:
- bytes_per_pixel = 1;
- break;
- case NV062_SET_COLOR_FORMAT_LE_R5G6B5:
- bytes_per_pixel = 2;
- break;
- case NV062_SET_COLOR_FORMAT_LE_A8R8G8B8:
- case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8:
- case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8:
- case NV062_SET_COLOR_FORMAT_LE_Y32:
- bytes_per_pixel = 4;
- break;
- default:
- fprintf(stderr, "Unknown blit surface format: 0x%x\n",
- context_surfaces->color_format);
- assert(false);
- break;
- }
-
- hwaddr source_dma_len, dest_dma_len;
-
- uint8_t *source = (uint8_t *)nv_dma_map(
- d, context_surfaces->dma_image_source, &source_dma_len);
- assert(context_surfaces->source_offset < source_dma_len);
- source += context_surfaces->source_offset;
-
- uint8_t *dest = (uint8_t *)nv_dma_map(d, context_surfaces->dma_image_dest,
- &dest_dma_len);
- assert(context_surfaces->dest_offset < dest_dma_len);
- dest += context_surfaces->dest_offset;
-
- hwaddr source_addr = source - d->vram_ptr;
- hwaddr dest_addr = dest - d->vram_ptr;
-
- SurfaceBinding *surf_src = pgraph_surface_get(d, source_addr);
- if (surf_src) {
- pgraph_download_surface_data_if_dirty(d, surf_src);
- }
-
- SurfaceBinding *surf_dest = pgraph_surface_get(d, dest_addr);
- if (surf_dest) {
- if (image_blit->height < surf_dest->height ||
- image_blit->width < surf_dest->width) {
- pgraph_download_surface_data_if_dirty(d, surf_dest);
- } else {
- // The blit will completely replace the surface so any pending
- // download should be discarded.
- surf_dest->download_pending = false;
- surf_dest->draw_dirty = false;
- }
- surf_dest->upload_pending = true;
- pg->draw_time++;
- }
-
- hwaddr source_offset = image_blit->in_y * context_surfaces->source_pitch +
- image_blit->in_x * bytes_per_pixel;
- hwaddr dest_offset = image_blit->out_y * context_surfaces->dest_pitch +
- image_blit->out_x * bytes_per_pixel;
-
- hwaddr source_size =
- (image_blit->height - 1) * context_surfaces->source_pitch +
- image_blit->width * bytes_per_pixel;
- hwaddr dest_size = (image_blit->height - 1) * context_surfaces->dest_pitch +
- image_blit->width * bytes_per_pixel;
-
- /* FIXME: What does hardware do in this case? */
- assert(source_addr + source_offset + source_size <=
- memory_region_size(d->vram));
- assert(dest_addr + dest_offset + dest_size <= memory_region_size(d->vram));
-
- uint8_t *source_row = source + source_offset;
- uint8_t *dest_row = dest + dest_offset;
-
- if (image_blit->operation == NV09F_SET_OPERATION_SRCCOPY) {
- NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_SRCCOPY");
- for (unsigned int y = 0; y < image_blit->height; y++) {
- memmove(dest_row, source_row, image_blit->width * bytes_per_pixel);
- source_row += context_surfaces->source_pitch;
- dest_row += context_surfaces->dest_pitch;
- }
- } else if (image_blit->operation == NV09F_SET_OPERATION_BLEND_AND) {
- NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_BLEND_AND");
- uint32_t max_beta_mult = 0x7f80;
- uint32_t beta_mult = beta->beta >> 16;
- uint32_t inv_beta_mult = max_beta_mult - beta_mult;
- for (unsigned int y = 0; y < image_blit->height; y++) {
- for (unsigned int x = 0; x < image_blit->width; x++) {
- for (unsigned int ch = 0; ch < 3; ch++) {
- uint32_t a = source_row[x * 4 + ch] * beta_mult;
- uint32_t b = dest_row[x * 4 + ch] * inv_beta_mult;
- dest_row[x * 4 + ch] = (a + b) / max_beta_mult;
- }
- }
- source_row += context_surfaces->source_pitch;
- dest_row += context_surfaces->dest_pitch;
- }
- } else {
- fprintf(stderr, "Unknown blit operation: 0x%x\n",
- image_blit->operation);
- assert(false && "Unknown blit operation");
- }
-
- NV2A_DPRINTF(" - 0x%tx -> 0x%tx\n", source_addr, dest_addr);
-
- bool needs_alpha_patching;
- uint8_t alpha_override;
- switch (context_surfaces->color_format) {
- case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8:
- needs_alpha_patching = true;
- alpha_override = 0xff;
- break;
- case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8:
- needs_alpha_patching = true;
- alpha_override = 0;
- break;
- default:
- needs_alpha_patching = false;
- alpha_override = 0;
- }
-
- if (needs_alpha_patching) {
- dest_row = dest + dest_offset;
- for (unsigned int y = 0; y < image_blit->height; y++) {
- for (unsigned int x = 0; x < image_blit->width; x++) {
- dest_row[x * 4 + 3] = alpha_override;
- }
- dest_row += context_surfaces->dest_pitch;
- }
- }
-
- dest_addr += dest_offset;
- memory_region_set_client_dirty(d->vram, dest_addr, dest_size,
- DIRTY_MEMORY_VGA);
- memory_region_set_client_dirty(d->vram, dest_addr, dest_size,
- DIRTY_MEMORY_NV2A_TEX);
-}
-
-int pgraph_method(NV2AState *d, unsigned int subchannel,
- unsigned int method, uint32_t parameter,
- uint32_t *parameters, size_t num_words_available,
- size_t max_lookahead_words, bool inc)
-{
- int num_processed = 1;
-
- assert(glGetError() == GL_NO_ERROR);
-
- PGRAPHState *pg = &d->pgraph;
-
- bool channel_valid =
- d->pgraph.regs[NV_PGRAPH_CTX_CONTROL] & NV_PGRAPH_CTX_CONTROL_CHID;
- assert(channel_valid);
-
- ContextSurfaces2DState *context_surfaces_2d = &pg->context_surfaces_2d;
- ImageBlitState *image_blit = &pg->image_blit;
- BetaState *beta = &pg->beta;
-
- assert(subchannel < 8);
-
- if (method == NV_SET_OBJECT) {
- assert(parameter < memory_region_size(&d->ramin));
- uint8_t *obj_ptr = d->ramin_ptr + parameter;
-
- uint32_t ctx_1 = ldl_le_p((uint32_t*)obj_ptr);
- uint32_t ctx_2 = ldl_le_p((uint32_t*)(obj_ptr+4));
- uint32_t ctx_3 = ldl_le_p((uint32_t*)(obj_ptr+8));
- uint32_t ctx_4 = ldl_le_p((uint32_t*)(obj_ptr+12));
- uint32_t ctx_5 = parameter;
-
- pg->regs[NV_PGRAPH_CTX_CACHE1 + subchannel * 4] = ctx_1;
- pg->regs[NV_PGRAPH_CTX_CACHE2 + subchannel * 4] = ctx_2;
- pg->regs[NV_PGRAPH_CTX_CACHE3 + subchannel * 4] = ctx_3;
- pg->regs[NV_PGRAPH_CTX_CACHE4 + subchannel * 4] = ctx_4;
- pg->regs[NV_PGRAPH_CTX_CACHE5 + subchannel * 4] = ctx_5;
- }
-
- // is this right?
- pg->regs[NV_PGRAPH_CTX_SWITCH1] = pg->regs[NV_PGRAPH_CTX_CACHE1 + subchannel * 4];
- pg->regs[NV_PGRAPH_CTX_SWITCH2] = pg->regs[NV_PGRAPH_CTX_CACHE2 + subchannel * 4];
- pg->regs[NV_PGRAPH_CTX_SWITCH3] = pg->regs[NV_PGRAPH_CTX_CACHE3 + subchannel * 4];
- pg->regs[NV_PGRAPH_CTX_SWITCH4] = pg->regs[NV_PGRAPH_CTX_CACHE4 + subchannel * 4];
- pg->regs[NV_PGRAPH_CTX_SWITCH5] = pg->regs[NV_PGRAPH_CTX_CACHE5 + subchannel * 4];
-
- uint32_t graphics_class = GET_MASK(pg->regs[NV_PGRAPH_CTX_SWITCH1],
- NV_PGRAPH_CTX_SWITCH1_GRCLASS);
-
- pgraph_method_log(subchannel, graphics_class, method, parameter);
-
- if (subchannel != 0) {
- // catches context switching issues on xbox d3d
- assert(graphics_class != 0x97);
- }
-
- /* ugly switch for now */
- switch (graphics_class) {
- case NV_BETA: {
- switch (method) {
- case NV012_SET_OBJECT:
- beta->object_instance = parameter;
- break;
- case NV012_SET_BETA:
- if (parameter & 0x80000000) {
- beta->beta = 0;
- } else {
- // The parameter is a signed fixed-point number with a sign bit
- // and 31 fractional bits. Note that negative values are clamped
- // to 0, and only 8 fractional bits are actually implemented in
- // hardware.
- beta->beta = parameter & 0x7f800000;
- }
- break;
- default:
- goto unhandled;
- }
- break;
- }
- case NV_CONTEXT_PATTERN: {
- switch (method) {
- case NV044_SET_MONOCHROME_COLOR0:
- pg->regs[NV_PGRAPH_PATT_COLOR0] = parameter;
- break;
- default:
- goto unhandled;
- }
- break;
- }
- case NV_CONTEXT_SURFACES_2D: {
- switch (method) {
- case NV062_SET_OBJECT:
- context_surfaces_2d->object_instance = parameter;
- break;
- case NV062_SET_CONTEXT_DMA_IMAGE_SOURCE:
- context_surfaces_2d->dma_image_source = parameter;
- break;
- case NV062_SET_CONTEXT_DMA_IMAGE_DESTIN:
- context_surfaces_2d->dma_image_dest = parameter;
- break;
- case NV062_SET_COLOR_FORMAT:
- context_surfaces_2d->color_format = parameter;
- break;
- case NV062_SET_PITCH:
- context_surfaces_2d->source_pitch = parameter & 0xFFFF;
- context_surfaces_2d->dest_pitch = parameter >> 16;
- break;
- case NV062_SET_OFFSET_SOURCE:
- context_surfaces_2d->source_offset = parameter & 0x07FFFFFF;
- break;
- case NV062_SET_OFFSET_DESTIN:
- context_surfaces_2d->dest_offset = parameter & 0x07FFFFFF;
- break;
- default:
- goto unhandled;
- }
- break;
- }
- case NV_IMAGE_BLIT: {
- switch (method) {
- case NV09F_SET_OBJECT:
- image_blit->object_instance = parameter;
- break;
- case NV09F_SET_CONTEXT_SURFACES:
- image_blit->context_surfaces = parameter;
- break;
- case NV09F_SET_OPERATION:
- image_blit->operation = parameter;
- break;
- case NV09F_CONTROL_POINT_IN:
- image_blit->in_x = parameter & 0xFFFF;
- image_blit->in_y = parameter >> 16;
- break;
- case NV09F_CONTROL_POINT_OUT:
- image_blit->out_x = parameter & 0xFFFF;
- image_blit->out_y = parameter >> 16;
- break;
- case NV09F_SIZE:
- image_blit->width = parameter & 0xFFFF;
- image_blit->height = parameter >> 16;
-
- if (image_blit->width && image_blit->height) {
- pgraph_image_blit(d);
- }
- break;
- default:
- goto unhandled;
- }
- break;
- }
- case NV_KELVIN_PRIMITIVE: {
- MethodFunc handler =
- pgraph_kelvin_methods[METHOD_ADDR_TO_INDEX(method)].handler;
- if (handler == NULL) {
- goto unhandled;
- }
- size_t num_words_consumed = 1;
- handler(d, pg, subchannel, method, parameter, parameters,
- num_words_available, &num_words_consumed, inc);
-
- /* Squash repeated BEGIN,DRAW_ARRAYS,END */
- #define LAM(i, mthd) ((parameters[i*2+1] & 0x31fff) == (mthd))
- #define LAP(i, prm) (parameters[i*2+2] == (prm))
- #define LAMP(i, mthd, prm) (LAM(i, mthd) && LAP(i, prm))
-
- if (method == NV097_DRAW_ARRAYS && (max_lookahead_words >= 7) &&
- pg->inline_elements_length == 0 &&
- pg->draw_arrays_length <
- (ARRAY_SIZE(pg->gl_draw_arrays_start) - 1) &&
- LAMP(0, NV097_SET_BEGIN_END, NV097_SET_BEGIN_END_OP_END) &&
- LAMP(1, NV097_SET_BEGIN_END, pg->primitive_mode) &&
- LAM(2, NV097_DRAW_ARRAYS)) {
- num_words_consumed += 4;
- pg->draw_arrays_prevent_connect = true;
- }
-
- #undef LAM
- #undef LAP
- #undef LAMP
-
- num_processed = num_words_consumed;
- break;
- }
- default:
- goto unhandled;
- }
-
- return num_processed;
-
-unhandled:
- trace_nv2a_pgraph_method_unhandled(subchannel, graphics_class,
- method, parameter);
- return num_processed;
-}
-
-DEF_METHOD(NV097, SET_OBJECT)
-{
- pg->kelvin.object_instance = parameter;
-}
-
-DEF_METHOD(NV097, NO_OPERATION)
-{
- /* The bios uses nop as a software method call -
- * it seems to expect a notify interrupt if the parameter isn't 0.
- * According to a nouveau guy it should still be a nop regardless
- * of the parameter. It's possible a debug register enables this,
- * but nothing obvious sticks out. Weird.
- */
- if (parameter == 0) {
- return;
- }
-
- unsigned channel_id =
- GET_MASK(pg->regs[NV_PGRAPH_CTX_USER], NV_PGRAPH_CTX_USER_CHID);
-
- assert(!(pg->pending_interrupts & NV_PGRAPH_INTR_ERROR));
-
- SET_MASK(pg->regs[NV_PGRAPH_TRAPPED_ADDR], NV_PGRAPH_TRAPPED_ADDR_CHID,
- channel_id);
- SET_MASK(pg->regs[NV_PGRAPH_TRAPPED_ADDR], NV_PGRAPH_TRAPPED_ADDR_SUBCH,
- subchannel);
- SET_MASK(pg->regs[NV_PGRAPH_TRAPPED_ADDR], NV_PGRAPH_TRAPPED_ADDR_MTHD,
- method);
- pg->regs[NV_PGRAPH_TRAPPED_DATA_LOW] = parameter;
- pg->regs[NV_PGRAPH_NSOURCE] =
- NV_PGRAPH_NSOURCE_NOTIFICATION; /* TODO: check this */
- pg->pending_interrupts |= NV_PGRAPH_INTR_ERROR;
- pg->waiting_for_nop = true;
-
- qemu_mutex_unlock(&pg->lock);
- qemu_mutex_lock_iothread();
- nv2a_update_irq(d);
- qemu_mutex_unlock_iothread();
- qemu_mutex_lock(&pg->lock);
-}
-
-DEF_METHOD(NV097, WAIT_FOR_IDLE)
-{
- pgraph_update_surface(d, false, true, true);
-}
-
-DEF_METHOD(NV097, SET_FLIP_READ)
-{
- SET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_READ_3D,
- parameter);
-}
-
-DEF_METHOD(NV097, SET_FLIP_WRITE)
-{
- SET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_WRITE_3D,
- parameter);
-}
-
-DEF_METHOD(NV097, SET_FLIP_MODULO)
-{
- SET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_MODULO_3D,
- parameter);
-}
-
-DEF_METHOD(NV097, FLIP_INCREMENT_WRITE)
-{
- uint32_t old =
- GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_WRITE_3D);
-
- SET_MASK(pg->regs[NV_PGRAPH_SURFACE],
- NV_PGRAPH_SURFACE_WRITE_3D,
- (GET_MASK(pg->regs[NV_PGRAPH_SURFACE],
- NV_PGRAPH_SURFACE_WRITE_3D)+1)
- % GET_MASK(pg->regs[NV_PGRAPH_SURFACE],
- NV_PGRAPH_SURFACE_MODULO_3D) );
-
- uint32_t new =
- GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_WRITE_3D);
-
- trace_nv2a_pgraph_flip_increment_write(old, new);
- NV2A_GL_DFRAME_TERMINATOR();
- pg->frame_time++;
-}
-
-DEF_METHOD(NV097, FLIP_STALL)
-{
- trace_nv2a_pgraph_flip_stall();
- pgraph_update_surface(d, false, true, true);
- nv2a_profile_flip_stall();
- pg->waiting_for_flip = true;
-}
-
-// TODO: these should be loading the dma objects from ramin here?
-
-DEF_METHOD(NV097, SET_CONTEXT_DMA_NOTIFIES)
-{
- pg->dma_notifies = parameter;
-}
-
-DEF_METHOD(NV097, SET_CONTEXT_DMA_A)
-{
- pg->dma_a = parameter;
-}
-
-DEF_METHOD(NV097, SET_CONTEXT_DMA_B)
-{
- pg->dma_b = parameter;
-}
-
-DEF_METHOD(NV097, SET_CONTEXT_DMA_STATE)
-{
- pg->dma_state = parameter;
-}
-
-DEF_METHOD(NV097, SET_CONTEXT_DMA_COLOR)
-{
- /* try to get any straggling draws in before the surface's changed :/ */
- pgraph_update_surface(d, false, true, true);
-
- pg->dma_color = parameter;
- pg->surface_color.buffer_dirty = true;
-}
-
-DEF_METHOD(NV097, SET_CONTEXT_DMA_ZETA)
-{
- pg->dma_zeta = parameter;
- pg->surface_zeta.buffer_dirty = true;
-}
-
-DEF_METHOD(NV097, SET_CONTEXT_DMA_VERTEX_A)
-{
- pg->dma_vertex_a = parameter;
-}
-
-DEF_METHOD(NV097, SET_CONTEXT_DMA_VERTEX_B)
-{
- pg->dma_vertex_b = parameter;
-}
-
-DEF_METHOD(NV097, SET_CONTEXT_DMA_SEMAPHORE)
-{
- pg->dma_semaphore = parameter;
-}
-
-DEF_METHOD(NV097, SET_CONTEXT_DMA_REPORT)
-{
- pgraph_process_pending_reports(d);
-
- pg->dma_report = parameter;
-}
-
-DEF_METHOD(NV097, SET_SURFACE_CLIP_HORIZONTAL)
-{
- pgraph_update_surface(d, false, true, true);
-
- pg->surface_shape.clip_x =
- GET_MASK(parameter, NV097_SET_SURFACE_CLIP_HORIZONTAL_X);
- pg->surface_shape.clip_width =
- GET_MASK(parameter, NV097_SET_SURFACE_CLIP_HORIZONTAL_WIDTH);
-}
-
-DEF_METHOD(NV097, SET_SURFACE_CLIP_VERTICAL)
-{
- pgraph_update_surface(d, false, true, true);
-
- pg->surface_shape.clip_y =
- GET_MASK(parameter, NV097_SET_SURFACE_CLIP_VERTICAL_Y);
- pg->surface_shape.clip_height =
- GET_MASK(parameter, NV097_SET_SURFACE_CLIP_VERTICAL_HEIGHT);
-}
-
-DEF_METHOD(NV097, SET_SURFACE_FORMAT)
-{
- pgraph_update_surface(d, false, true, true);
-
- pg->surface_shape.color_format =
- GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_COLOR);
- pg->surface_shape.zeta_format =
- GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_ZETA);
- pg->surface_shape.anti_aliasing =
- GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_ANTI_ALIASING);
- pg->surface_shape.log_width =
- GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_WIDTH);
- pg->surface_shape.log_height =
- GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_HEIGHT);
-
- int surface_type = GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_TYPE);
- if (surface_type != pg->surface_type) {
- pg->surface_type = surface_type;
- pg->surface_color.buffer_dirty = true;
- pg->surface_zeta.buffer_dirty = true;
- }
-}
-
-DEF_METHOD(NV097, SET_SURFACE_PITCH)
-{
- pgraph_update_surface(d, false, true, true);
- unsigned int color_pitch = GET_MASK(parameter, NV097_SET_SURFACE_PITCH_COLOR);
- unsigned int zeta_pitch = GET_MASK(parameter, NV097_SET_SURFACE_PITCH_ZETA);
-
- pg->surface_color.buffer_dirty |= (pg->surface_color.pitch != color_pitch);
- pg->surface_color.pitch = color_pitch;
-
- pg->surface_zeta.buffer_dirty |= (pg->surface_zeta.pitch != zeta_pitch);
- pg->surface_zeta.pitch = zeta_pitch;
-}
-
-DEF_METHOD(NV097, SET_SURFACE_COLOR_OFFSET)
-{
- pgraph_update_surface(d, false, true, true);
- pg->surface_color.buffer_dirty |= (pg->surface_color.offset != parameter);
- pg->surface_color.offset = parameter;
-}
-
-DEF_METHOD(NV097, SET_SURFACE_ZETA_OFFSET)
-{
- pgraph_update_surface(d, false, true, true);
- pg->surface_zeta.buffer_dirty |= (pg->surface_zeta.offset != parameter);
- pg->surface_zeta.offset = parameter;
-}
-
-DEF_METHOD_INC(NV097, SET_COMBINER_ALPHA_ICW)
-{
- int slot = (method - NV097_SET_COMBINER_ALPHA_ICW) / 4;
- pg->regs[NV_PGRAPH_COMBINEALPHAI0 + slot*4] = parameter;
-}
-
-DEF_METHOD(NV097, SET_COMBINER_SPECULAR_FOG_CW0)
-{
- pg->regs[NV_PGRAPH_COMBINESPECFOG0] = parameter;
-}
-
-DEF_METHOD(NV097, SET_COMBINER_SPECULAR_FOG_CW1)
-{
- pg->regs[NV_PGRAPH_COMBINESPECFOG1] = parameter;
-}
-
-DEF_METHOD(NV097, SET_TEXTURE_ADDRESS)
-{
- int slot = (method - NV097_SET_TEXTURE_ADDRESS) / 64;
- pg->regs[NV_PGRAPH_TEXADDRESS0 + slot * 4] = parameter;
-}
-
-DEF_METHOD(NV097, SET_CONTROL0)
-{
- pgraph_update_surface(d, false, true, true);
-
- bool stencil_write_enable =
- parameter & NV097_SET_CONTROL0_STENCIL_WRITE_ENABLE;
- SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
- NV_PGRAPH_CONTROL_0_STENCIL_WRITE_ENABLE,
- stencil_write_enable);
-
- uint32_t z_format = GET_MASK(parameter, NV097_SET_CONTROL0_Z_FORMAT);
- SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
- NV_PGRAPH_SETUPRASTER_Z_FORMAT, z_format);
-
- bool z_perspective =
- parameter & NV097_SET_CONTROL0_Z_PERSPECTIVE_ENABLE;
- SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
- NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE,
- z_perspective);
-}
-
-DEF_METHOD(NV097, SET_COLOR_MATERIAL)
-{
- SET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_EMISSION,
- (parameter >> 0) & 3);
- SET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_AMBIENT,
- (parameter >> 2) & 3);
- SET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_DIFFUSE,
- (parameter >> 4) & 3);
- SET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_SPECULAR,
- (parameter >> 6) & 3);
-}
-
-DEF_METHOD(NV097, SET_FOG_MODE)
-{
- /* FIXME: There is also NV_PGRAPH_CSV0_D_FOG_MODE */
- unsigned int mode;
- switch (parameter) {
- case NV097_SET_FOG_MODE_V_LINEAR:
- mode = NV_PGRAPH_CONTROL_3_FOG_MODE_LINEAR; break;
- case NV097_SET_FOG_MODE_V_EXP:
- mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP; break;
- case NV097_SET_FOG_MODE_V_EXP2:
- mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP2; break;
- case NV097_SET_FOG_MODE_V_EXP_ABS:
- mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP_ABS; break;
- case NV097_SET_FOG_MODE_V_EXP2_ABS:
- mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP2_ABS; break;
- case NV097_SET_FOG_MODE_V_LINEAR_ABS:
- mode = NV_PGRAPH_CONTROL_3_FOG_MODE_LINEAR_ABS; break;
- default:
- assert(false);
- break;
- }
- SET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], NV_PGRAPH_CONTROL_3_FOG_MODE,
- mode);
-}
-
-DEF_METHOD(NV097, SET_FOG_GEN_MODE)
-{
- unsigned int mode;
- switch (parameter) {
- case NV097_SET_FOG_GEN_MODE_V_SPEC_ALPHA:
- mode = NV_PGRAPH_CSV0_D_FOGGENMODE_SPEC_ALPHA; break;
- case NV097_SET_FOG_GEN_MODE_V_RADIAL:
- mode = NV_PGRAPH_CSV0_D_FOGGENMODE_RADIAL; break;
- case NV097_SET_FOG_GEN_MODE_V_PLANAR:
- mode = NV_PGRAPH_CSV0_D_FOGGENMODE_PLANAR; break;
- case NV097_SET_FOG_GEN_MODE_V_ABS_PLANAR:
- mode = NV_PGRAPH_CSV0_D_FOGGENMODE_ABS_PLANAR; break;
- case NV097_SET_FOG_GEN_MODE_V_FOG_X:
- mode = NV_PGRAPH_CSV0_D_FOGGENMODE_FOG_X; break;
- default:
- assert(false);
- break;
- }
- SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_FOGGENMODE, mode);
-}
-
-DEF_METHOD(NV097, SET_FOG_ENABLE)
-{
- /*
- FIXME: There is also:
- SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_FOGENABLE,
- parameter);
- */
- SET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], NV_PGRAPH_CONTROL_3_FOGENABLE,
- parameter);
-}
-
-DEF_METHOD(NV097, SET_FOG_COLOR)
-{
- /* PGRAPH channels are ARGB, parameter channels are ABGR */
- uint8_t red = GET_MASK(parameter, NV097_SET_FOG_COLOR_RED);
- uint8_t green = GET_MASK(parameter, NV097_SET_FOG_COLOR_GREEN);
- uint8_t blue = GET_MASK(parameter, NV097_SET_FOG_COLOR_BLUE);
- uint8_t alpha = GET_MASK(parameter, NV097_SET_FOG_COLOR_ALPHA);
- SET_MASK(pg->regs[NV_PGRAPH_FOGCOLOR], NV_PGRAPH_FOGCOLOR_RED, red);
- SET_MASK(pg->regs[NV_PGRAPH_FOGCOLOR], NV_PGRAPH_FOGCOLOR_GREEN, green);
- SET_MASK(pg->regs[NV_PGRAPH_FOGCOLOR], NV_PGRAPH_FOGCOLOR_BLUE, blue);
- SET_MASK(pg->regs[NV_PGRAPH_FOGCOLOR], NV_PGRAPH_FOGCOLOR_ALPHA, alpha);
-}
-
-DEF_METHOD(NV097, SET_WINDOW_CLIP_TYPE)
-{
- SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
- NV_PGRAPH_SETUPRASTER_WINDOWCLIPTYPE, parameter);
-}
-
-DEF_METHOD_INC(NV097, SET_WINDOW_CLIP_HORIZONTAL)
-{
- int slot = (method - NV097_SET_WINDOW_CLIP_HORIZONTAL) / 4;
- for (; slot < 8; ++slot) {
- pg->regs[NV_PGRAPH_WINDOWCLIPX0 + slot * 4] = parameter;
- }
-}
-
-DEF_METHOD_INC(NV097, SET_WINDOW_CLIP_VERTICAL)
-{
- int slot = (method - NV097_SET_WINDOW_CLIP_VERTICAL) / 4;
- for (; slot < 8; ++slot) {
- pg->regs[NV_PGRAPH_WINDOWCLIPY0 + slot * 4] = parameter;
- }
-}
-
-DEF_METHOD(NV097, SET_ALPHA_TEST_ENABLE)
-{
- SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
- NV_PGRAPH_CONTROL_0_ALPHATESTENABLE, parameter);
-}
-
-DEF_METHOD(NV097, SET_BLEND_ENABLE)
-{
- SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_EN, parameter);
-}
-
-DEF_METHOD(NV097, SET_CULL_FACE_ENABLE)
-{
- SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
- NV_PGRAPH_SETUPRASTER_CULLENABLE,
- parameter);
-}
-
-DEF_METHOD(NV097, SET_DEPTH_TEST_ENABLE)
-{
- SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], NV_PGRAPH_CONTROL_0_ZENABLE,
- parameter);
-}
-
-DEF_METHOD(NV097, SET_DITHER_ENABLE)
-{
- SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
- NV_PGRAPH_CONTROL_0_DITHERENABLE, parameter);
-}
-
-DEF_METHOD(NV097, SET_LIGHTING_ENABLE)
-{
- SET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_LIGHTING,
- parameter);
-}
-
-DEF_METHOD(NV097, SET_POINT_PARAMS_ENABLE)
-{
- SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_POINTPARAMSENABLE,
- parameter);
- SET_MASK(pg->regs[NV_PGRAPH_CONTROL_3],
- NV_PGRAPH_CONTROL_3_POINTPARAMSENABLE, parameter);
-}
-
-DEF_METHOD(NV097, SET_POINT_SMOOTH_ENABLE)
-{
- SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
- NV_PGRAPH_SETUPRASTER_POINTSMOOTHENABLE, parameter);
-}
-
-DEF_METHOD(NV097, SET_LINE_SMOOTH_ENABLE)
-{
- SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
- NV_PGRAPH_SETUPRASTER_LINESMOOTHENABLE, parameter);
-}
-
-DEF_METHOD(NV097, SET_POLY_SMOOTH_ENABLE)
-{
- SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
- NV_PGRAPH_SETUPRASTER_POLYSMOOTHENABLE, parameter);
-}
-
-DEF_METHOD(NV097, SET_SKIN_MODE)
-{
- SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_SKIN,
- parameter);
-}
-
-DEF_METHOD(NV097, SET_STENCIL_TEST_ENABLE)
-{
- SET_MASK(pg->regs[NV_PGRAPH_CONTROL_1],
- NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE, parameter);
-}
-
-DEF_METHOD(NV097, SET_POLY_OFFSET_POINT_ENABLE)
-{
- SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
- NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE, parameter);
-}
-
-DEF_METHOD(NV097, SET_POLY_OFFSET_LINE_ENABLE)
-{
- SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
- NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE, parameter);
-}
-
-DEF_METHOD(NV097, SET_POLY_OFFSET_FILL_ENABLE)
-{
- SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
- NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE, parameter);
-}
-
-DEF_METHOD(NV097, SET_ALPHA_FUNC)
-{
- SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
- NV_PGRAPH_CONTROL_0_ALPHAFUNC, parameter & 0xF);
-}
-
-DEF_METHOD(NV097, SET_ALPHA_REF)
-{
- SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
- NV_PGRAPH_CONTROL_0_ALPHAREF, parameter);
-}
-
-DEF_METHOD(NV097, SET_BLEND_FUNC_SFACTOR)
-{
- unsigned int factor;
- switch (parameter) {
- case NV097_SET_BLEND_FUNC_SFACTOR_V_ZERO:
- factor = NV_PGRAPH_BLEND_SFACTOR_ZERO; break;
- case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE:
- factor = NV_PGRAPH_BLEND_SFACTOR_ONE; break;
- case NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_COLOR:
- factor = NV_PGRAPH_BLEND_SFACTOR_SRC_COLOR; break;
- case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_SRC_COLOR:
- factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_SRC_COLOR; break;
- case NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_ALPHA:
- factor = NV_PGRAPH_BLEND_SFACTOR_SRC_ALPHA; break;
- case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_SRC_ALPHA:
- factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_SRC_ALPHA; break;
- case NV097_SET_BLEND_FUNC_SFACTOR_V_DST_ALPHA:
- factor = NV_PGRAPH_BLEND_SFACTOR_DST_ALPHA; break;
- case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_DST_ALPHA:
- factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_DST_ALPHA; break;
- case NV097_SET_BLEND_FUNC_SFACTOR_V_DST_COLOR:
- factor = NV_PGRAPH_BLEND_SFACTOR_DST_COLOR; break;
- case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_DST_COLOR:
- factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_DST_COLOR; break;
- case NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_ALPHA_SATURATE:
- factor = NV_PGRAPH_BLEND_SFACTOR_SRC_ALPHA_SATURATE; break;
- case NV097_SET_BLEND_FUNC_SFACTOR_V_CONSTANT_COLOR:
- factor = NV_PGRAPH_BLEND_SFACTOR_CONSTANT_COLOR; break;
- case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_CONSTANT_COLOR:
- factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_CONSTANT_COLOR; break;
- case NV097_SET_BLEND_FUNC_SFACTOR_V_CONSTANT_ALPHA:
- factor = NV_PGRAPH_BLEND_SFACTOR_CONSTANT_ALPHA; break;
- case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_CONSTANT_ALPHA:
- factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_CONSTANT_ALPHA; break;
- default:
- NV2A_DPRINTF("Unknown blend source factor: 0x%08x\n", parameter);
- return; /* discard */
- }
- SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_SFACTOR, factor);
-}
-
-DEF_METHOD(NV097, SET_BLEND_FUNC_DFACTOR)
-{
- unsigned int factor;
- switch (parameter) {
- case NV097_SET_BLEND_FUNC_DFACTOR_V_ZERO:
- factor = NV_PGRAPH_BLEND_DFACTOR_ZERO; break;
- case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE:
- factor = NV_PGRAPH_BLEND_DFACTOR_ONE; break;
- case NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_COLOR:
- factor = NV_PGRAPH_BLEND_DFACTOR_SRC_COLOR; break;
- case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_SRC_COLOR:
- factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_SRC_COLOR; break;
- case NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_ALPHA:
- factor = NV_PGRAPH_BLEND_DFACTOR_SRC_ALPHA; break;
- case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_SRC_ALPHA:
- factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_SRC_ALPHA; break;
- case NV097_SET_BLEND_FUNC_DFACTOR_V_DST_ALPHA:
- factor = NV_PGRAPH_BLEND_DFACTOR_DST_ALPHA; break;
- case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_DST_ALPHA:
- factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_DST_ALPHA; break;
- case NV097_SET_BLEND_FUNC_DFACTOR_V_DST_COLOR:
- factor = NV_PGRAPH_BLEND_DFACTOR_DST_COLOR; break;
- case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_DST_COLOR:
- factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_DST_COLOR; break;
- case NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_ALPHA_SATURATE:
- factor = NV_PGRAPH_BLEND_DFACTOR_SRC_ALPHA_SATURATE; break;
- case NV097_SET_BLEND_FUNC_DFACTOR_V_CONSTANT_COLOR:
- factor = NV_PGRAPH_BLEND_DFACTOR_CONSTANT_COLOR; break;
- case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_CONSTANT_COLOR:
- factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_CONSTANT_COLOR; break;
- case NV097_SET_BLEND_FUNC_DFACTOR_V_CONSTANT_ALPHA:
- factor = NV_PGRAPH_BLEND_DFACTOR_CONSTANT_ALPHA; break;
- case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_CONSTANT_ALPHA:
- factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_CONSTANT_ALPHA; break;
- default:
- NV2A_DPRINTF("Unknown blend destination factor: 0x%08x\n", parameter);
- return; /* discard */
- }
- SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_DFACTOR, factor);
-}
-
-DEF_METHOD(NV097, SET_BLEND_COLOR)
-{
- pg->regs[NV_PGRAPH_BLENDCOLOR] = parameter;
-}
-
-DEF_METHOD(NV097, SET_BLEND_EQUATION)
-{
- unsigned int equation;
- switch (parameter) {
- case NV097_SET_BLEND_EQUATION_V_FUNC_SUBTRACT:
- equation = 0; break;
- case NV097_SET_BLEND_EQUATION_V_FUNC_REVERSE_SUBTRACT:
- equation = 1; break;
- case NV097_SET_BLEND_EQUATION_V_FUNC_ADD:
- equation = 2; break;
- case NV097_SET_BLEND_EQUATION_V_MIN:
- equation = 3; break;
- case NV097_SET_BLEND_EQUATION_V_MAX:
- equation = 4; break;
- case NV097_SET_BLEND_EQUATION_V_FUNC_REVERSE_SUBTRACT_SIGNED:
- equation = 5; break;
- case NV097_SET_BLEND_EQUATION_V_FUNC_ADD_SIGNED:
- equation = 6; break;
- default:
- NV2A_DPRINTF("Unknown blend equation: 0x%08x\n", parameter);
- return; /* discard */
- }
- SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_EQN, equation);
-}
-
-DEF_METHOD(NV097, SET_DEPTH_FUNC)
-{
- SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], NV_PGRAPH_CONTROL_0_ZFUNC,
- parameter & 0xF);
-}
-
-DEF_METHOD(NV097, SET_COLOR_MASK)
-{
- pg->surface_color.write_enabled_cache |= pgraph_color_write_enabled(pg);
-
- bool alpha = parameter & NV097_SET_COLOR_MASK_ALPHA_WRITE_ENABLE;
- bool red = parameter & NV097_SET_COLOR_MASK_RED_WRITE_ENABLE;
- bool green = parameter & NV097_SET_COLOR_MASK_GREEN_WRITE_ENABLE;
- bool blue = parameter & NV097_SET_COLOR_MASK_BLUE_WRITE_ENABLE;
- SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
- NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE, alpha);
- SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
- NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE, red);
- SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
- NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE, green);
- SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
- NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE, blue);
-}
-
-DEF_METHOD(NV097, SET_DEPTH_MASK)
-{
- pg->surface_zeta.write_enabled_cache |= pgraph_zeta_write_enabled(pg);
-
- SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
- NV_PGRAPH_CONTROL_0_ZWRITEENABLE, parameter);
-}
-
-DEF_METHOD(NV097, SET_STENCIL_MASK)
-{
- SET_MASK(pg->regs[NV_PGRAPH_CONTROL_1],
- NV_PGRAPH_CONTROL_1_STENCIL_MASK_WRITE, parameter);
-}
-
-DEF_METHOD(NV097, SET_STENCIL_FUNC)
-{
- SET_MASK(pg->regs[NV_PGRAPH_CONTROL_1],
- NV_PGRAPH_CONTROL_1_STENCIL_FUNC, parameter & 0xF);
-}
-
-DEF_METHOD(NV097, SET_STENCIL_FUNC_REF)
-{
- SET_MASK(pg->regs[NV_PGRAPH_CONTROL_1],
- NV_PGRAPH_CONTROL_1_STENCIL_REF, parameter);
-}
-
-DEF_METHOD(NV097, SET_STENCIL_FUNC_MASK)
-{
- SET_MASK(pg->regs[NV_PGRAPH_CONTROL_1],
- NV_PGRAPH_CONTROL_1_STENCIL_MASK_READ, parameter);
-}
-
-DEF_METHOD(NV097, SET_STENCIL_OP_FAIL)
-{
- SET_MASK(pg->regs[NV_PGRAPH_CONTROL_2],
- NV_PGRAPH_CONTROL_2_STENCIL_OP_FAIL,
- kelvin_map_stencil_op(parameter));
-}
-
-DEF_METHOD(NV097, SET_STENCIL_OP_ZFAIL)
-{
- SET_MASK(pg->regs[NV_PGRAPH_CONTROL_2],
- NV_PGRAPH_CONTROL_2_STENCIL_OP_ZFAIL,
- kelvin_map_stencil_op(parameter));
-}
-
-DEF_METHOD(NV097, SET_STENCIL_OP_ZPASS)
-{
- SET_MASK(pg->regs[NV_PGRAPH_CONTROL_2],
- NV_PGRAPH_CONTROL_2_STENCIL_OP_ZPASS,
- kelvin_map_stencil_op(parameter));
-}
-
-DEF_METHOD(NV097, SET_SHADE_MODE)
-{
- switch (parameter) {
- case NV097_SET_SHADE_MODE_V_FLAT:
- SET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], NV_PGRAPH_CONTROL_3_SHADEMODE,
- NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT);
- break;
- case NV097_SET_SHADE_MODE_V_SMOOTH:
- SET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], NV_PGRAPH_CONTROL_3_SHADEMODE,
- NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH);
- break;
- default:
- /* Discard */
- break;
- }
-}
-
-DEF_METHOD(NV097, SET_POLYGON_OFFSET_SCALE_FACTOR)
-{
- pg->regs[NV_PGRAPH_ZOFFSETFACTOR] = parameter;
-}
-
-DEF_METHOD(NV097, SET_POLYGON_OFFSET_BIAS)
-{
- pg->regs[NV_PGRAPH_ZOFFSETBIAS] = parameter;
-}
-
-DEF_METHOD(NV097, SET_FRONT_POLYGON_MODE)
-{
- SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
- NV_PGRAPH_SETUPRASTER_FRONTFACEMODE,
- kelvin_map_polygon_mode(parameter));
-}
-
-DEF_METHOD(NV097, SET_BACK_POLYGON_MODE)
-{
- SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
- NV_PGRAPH_SETUPRASTER_BACKFACEMODE,
- kelvin_map_polygon_mode(parameter));
-}
-
-DEF_METHOD(NV097, SET_CLIP_MIN)
-{
- pg->regs[NV_PGRAPH_ZCLIPMIN] = parameter;
-}
-
-DEF_METHOD(NV097, SET_CLIP_MAX)
-{
- pg->regs[NV_PGRAPH_ZCLIPMAX] = parameter;
-}
-
-DEF_METHOD(NV097, SET_CULL_FACE)
-{
- unsigned int face;
- switch (parameter) {
- case NV097_SET_CULL_FACE_V_FRONT:
- face = NV_PGRAPH_SETUPRASTER_CULLCTRL_FRONT; break;
- case NV097_SET_CULL_FACE_V_BACK:
- face = NV_PGRAPH_SETUPRASTER_CULLCTRL_BACK; break;
- case NV097_SET_CULL_FACE_V_FRONT_AND_BACK:
- face = NV_PGRAPH_SETUPRASTER_CULLCTRL_FRONT_AND_BACK; break;
- default:
- assert(false);
- break;
- }
- SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
- NV_PGRAPH_SETUPRASTER_CULLCTRL,
- face);
-}
-
-DEF_METHOD(NV097, SET_FRONT_FACE)
-{
- bool ccw;
- switch (parameter) {
- case NV097_SET_FRONT_FACE_V_CW:
- ccw = false; break;
- case NV097_SET_FRONT_FACE_V_CCW:
- ccw = true; break;
- default:
- NV2A_DPRINTF("Unknown front face: 0x%08x\n", parameter);
- return; /* discard */
- }
- SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
- NV_PGRAPH_SETUPRASTER_FRONTFACE,
- ccw ? 1 : 0);
-}
-
-DEF_METHOD(NV097, SET_NORMALIZATION_ENABLE)
-{
- SET_MASK(pg->regs[NV_PGRAPH_CSV0_C],
- NV_PGRAPH_CSV0_C_NORMALIZATION_ENABLE,
- parameter);
-}
-
-DEF_METHOD_INC(NV097, SET_MATERIAL_EMISSION)
-{
- int slot = (method - NV097_SET_MATERIAL_EMISSION) / 4;
- // FIXME: Verify NV_IGRAPH_XF_LTCTXA_CM_COL is correct
- pg->ltctxa[NV_IGRAPH_XF_LTCTXA_CM_COL][slot] = parameter;
- pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_CM_COL] = true;
-}
-
-DEF_METHOD(NV097, SET_MATERIAL_ALPHA)
-{
- pg->material_alpha = *(float*)¶meter;
-}
-
-DEF_METHOD(NV097, SET_LIGHT_ENABLE_MASK)
-{
- SET_MASK(d->pgraph.regs[NV_PGRAPH_CSV0_D],
- NV_PGRAPH_CSV0_D_LIGHTS,
- parameter);
-}
-
-DEF_METHOD(NV097, SET_TEXGEN_S)
-{
- int slot = (method - NV097_SET_TEXGEN_S) / 16;
- unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A
- : NV_PGRAPH_CSV1_B;
- unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_S
- : NV_PGRAPH_CSV1_A_T0_S;
- SET_MASK(pg->regs[reg], mask, kelvin_map_texgen(parameter, 0));
-}
-
-DEF_METHOD(NV097, SET_TEXGEN_T)
-{
- int slot = (method - NV097_SET_TEXGEN_T) / 16;
- unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A
- : NV_PGRAPH_CSV1_B;
- unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_T
- : NV_PGRAPH_CSV1_A_T0_T;
- SET_MASK(pg->regs[reg], mask, kelvin_map_texgen(parameter, 1));
-}
-
-DEF_METHOD(NV097, SET_TEXGEN_R)
-{
- int slot = (method - NV097_SET_TEXGEN_R) / 16;
- unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A
- : NV_PGRAPH_CSV1_B;
- unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_R
- : NV_PGRAPH_CSV1_A_T0_R;
- SET_MASK(pg->regs[reg], mask, kelvin_map_texgen(parameter, 2));
-}
-
-DEF_METHOD(NV097, SET_TEXGEN_Q)
-{
- int slot = (method - NV097_SET_TEXGEN_Q) / 16;
- unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A
- : NV_PGRAPH_CSV1_B;
- unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_Q
- : NV_PGRAPH_CSV1_A_T0_Q;
- SET_MASK(pg->regs[reg], mask, kelvin_map_texgen(parameter, 3));
-}
-
-DEF_METHOD_INC(NV097, SET_TEXTURE_MATRIX_ENABLE)
-{
- int slot = (method - NV097_SET_TEXTURE_MATRIX_ENABLE) / 4;
- pg->texture_matrix_enable[slot] = parameter;
-}
-
-DEF_METHOD(NV097, SET_POINT_SIZE)
-{
- SET_MASK(pg->regs[NV_PGRAPH_POINTSIZE], NV097_SET_POINT_SIZE_V, parameter);
-}
-
-DEF_METHOD_INC(NV097, SET_PROJECTION_MATRIX)
-{
- int slot = (method - NV097_SET_PROJECTION_MATRIX) / 4;
- // pg->projection_matrix[slot] = *(float*)¶meter;
- unsigned int row = NV_IGRAPH_XF_XFCTX_PMAT0 + slot/4;
- pg->vsh_constants[row][slot%4] = parameter;
- pg->vsh_constants_dirty[row] = true;
-}
-
-DEF_METHOD_INC(NV097, SET_MODEL_VIEW_MATRIX)
-{
- int slot = (method - NV097_SET_MODEL_VIEW_MATRIX) / 4;
- unsigned int matnum = slot / 16;
- unsigned int entry = slot % 16;
- unsigned int row = NV_IGRAPH_XF_XFCTX_MMAT0 + matnum*8 + entry/4;
- pg->vsh_constants[row][entry % 4] = parameter;
- pg->vsh_constants_dirty[row] = true;
-}
-
-DEF_METHOD_INC(NV097, SET_INVERSE_MODEL_VIEW_MATRIX)
-{
- int slot = (method - NV097_SET_INVERSE_MODEL_VIEW_MATRIX) / 4;
- unsigned int matnum = slot / 16;
- unsigned int entry = slot % 16;
- unsigned int row = NV_IGRAPH_XF_XFCTX_IMMAT0 + matnum*8 + entry/4;
- pg->vsh_constants[row][entry % 4] = parameter;
- pg->vsh_constants_dirty[row] = true;
-}
-
-DEF_METHOD_INC(NV097, SET_COMPOSITE_MATRIX)
-{
- int slot = (method - NV097_SET_COMPOSITE_MATRIX) / 4;
- unsigned int row = NV_IGRAPH_XF_XFCTX_CMAT0 + slot/4;
- pg->vsh_constants[row][slot%4] = parameter;
- pg->vsh_constants_dirty[row] = true;
-}
-
-DEF_METHOD_INC(NV097, SET_TEXTURE_MATRIX)
-{
- int slot = (method - NV097_SET_TEXTURE_MATRIX) / 4;
- unsigned int tex = slot / 16;
- unsigned int entry = slot % 16;
- unsigned int row = NV_IGRAPH_XF_XFCTX_T0MAT + tex*8 + entry/4;
- pg->vsh_constants[row][entry%4] = parameter;
- pg->vsh_constants_dirty[row] = true;
-}
-
-DEF_METHOD_INC(NV097, SET_FOG_PARAMS)
-{
- int slot = (method - NV097_SET_FOG_PARAMS) / 4;
- if (slot < 2) {
- pg->regs[NV_PGRAPH_FOGPARAM0 + slot*4] = parameter;
- } else {
- /* FIXME: No idea where slot = 2 is */
- }
-
- pg->ltctxa[NV_IGRAPH_XF_LTCTXA_FOG_K][slot] = parameter;
- pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_FOG_K] = true;
-}
-
-/* Handles NV097_SET_TEXGEN_PLANE_S,T,R,Q */
-DEF_METHOD_INC(NV097, SET_TEXGEN_PLANE_S)
-{
- int slot = (method - NV097_SET_TEXGEN_PLANE_S) / 4;
- unsigned int tex = slot / 16;
- unsigned int entry = slot % 16;
- unsigned int row = NV_IGRAPH_XF_XFCTX_TG0MAT + tex*8 + entry/4;
- pg->vsh_constants[row][entry%4] = parameter;
- pg->vsh_constants_dirty[row] = true;
-}
-
-DEF_METHOD(NV097, SET_TEXGEN_VIEW_MODEL)
-{
- SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_TEXGEN_REF,
- parameter);
-}
-
-DEF_METHOD_INC(NV097, SET_FOG_PLANE)
-{
- int slot = (method - NV097_SET_FOG_PLANE) / 4;
- pg->vsh_constants[NV_IGRAPH_XF_XFCTX_FOG][slot] = parameter;
- pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_FOG] = true;
-}
-
-DEF_METHOD_INC(NV097, SET_SCENE_AMBIENT_COLOR)
-{
- int slot = (method - NV097_SET_SCENE_AMBIENT_COLOR) / 4;
- // ??
- pg->ltctxa[NV_IGRAPH_XF_LTCTXA_FR_AMB][slot] = parameter;
- pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_FR_AMB] = true;
-}
-
-DEF_METHOD_INC(NV097, SET_VIEWPORT_OFFSET)
-{
- int slot = (method - NV097_SET_VIEWPORT_OFFSET) / 4;
- pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][slot] = parameter;
- pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_VPOFF] = true;
-}
-
-DEF_METHOD_INC(NV097, SET_POINT_PARAMS)
-{
- int slot = (method - NV097_SET_POINT_PARAMS) / 4;
- pg->point_params[slot] = *(float *)¶meter; /* FIXME: Where? */
-}
-
-DEF_METHOD_INC(NV097, SET_EYE_POSITION)
-{
- int slot = (method - NV097_SET_EYE_POSITION) / 4;
- pg->vsh_constants[NV_IGRAPH_XF_XFCTX_EYEP][slot] = parameter;
- pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_EYEP] = true;
-}
-
-DEF_METHOD_INC(NV097, SET_COMBINER_FACTOR0)
-{
- int slot = (method - NV097_SET_COMBINER_FACTOR0) / 4;
- pg->regs[NV_PGRAPH_COMBINEFACTOR0 + slot*4] = parameter;
-}
-
-DEF_METHOD_INC(NV097, SET_COMBINER_FACTOR1)
-{
- int slot = (method - NV097_SET_COMBINER_FACTOR1) / 4;
- pg->regs[NV_PGRAPH_COMBINEFACTOR1 + slot*4] = parameter;
-}
-
-DEF_METHOD_INC(NV097, SET_COMBINER_ALPHA_OCW)
-{
- int slot = (method - NV097_SET_COMBINER_ALPHA_OCW) / 4;
- pg->regs[NV_PGRAPH_COMBINEALPHAO0 + slot*4] = parameter;
-}
-
-DEF_METHOD_INC(NV097, SET_COMBINER_COLOR_ICW)
-{
- int slot = (method - NV097_SET_COMBINER_COLOR_ICW) / 4;
- pg->regs[NV_PGRAPH_COMBINECOLORI0 + slot*4] = parameter;
-}
-
-DEF_METHOD_INC(NV097, SET_VIEWPORT_SCALE)
-{
- int slot = (method - NV097_SET_VIEWPORT_SCALE) / 4;
- pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPSCL][slot] = parameter;
- pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_VPSCL] = true;
-}
-
-DEF_METHOD_INC(NV097, SET_TRANSFORM_PROGRAM)
-{
- int slot = (method - NV097_SET_TRANSFORM_PROGRAM) / 4;
-
- int program_load = GET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET],
- NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR);
-
- assert(program_load < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH);
- pg->program_data[program_load][slot%4] = parameter;
- pg->program_data_dirty = true;
-
- if (slot % 4 == 3) {
- SET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET],
- NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR, program_load+1);
- }
-}
-
-DEF_METHOD_INC(NV097, SET_TRANSFORM_CONSTANT)
-{
- int slot = (method - NV097_SET_TRANSFORM_CONSTANT) / 4;
- int const_load = GET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET],
- NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR);
-
- assert(const_load < NV2A_VERTEXSHADER_CONSTANTS);
- // VertexShaderConstant *constant = &pg->constants[const_load];
- pg->vsh_constants_dirty[const_load] |=
- (parameter != pg->vsh_constants[const_load][slot%4]);
- pg->vsh_constants[const_load][slot%4] = parameter;
-
- if (slot % 4 == 3) {
- SET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET],
- NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR, const_load+1);
- }
-}
-
-DEF_METHOD_INC(NV097, SET_VERTEX3F)
-{
- int slot = (method - NV097_SET_VERTEX3F) / 4;
- VertexAttribute *attribute =
- &pg->vertex_attributes[NV2A_VERTEX_ATTR_POSITION];
- pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_POSITION);
- attribute->inline_value[slot] = *(float*)¶meter;
- attribute->inline_value[3] = 1.0f;
- if (slot == 2) {
- pgraph_finish_inline_buffer_vertex(pg);
- }
-}
-
-/* Handles NV097_SET_BACK_LIGHT_* */
-DEF_METHOD_INC(NV097, SET_BACK_LIGHT_AMBIENT_COLOR)
-{
- int slot = (method - NV097_SET_BACK_LIGHT_AMBIENT_COLOR) / 4;
- unsigned int part = NV097_SET_BACK_LIGHT_AMBIENT_COLOR / 4 + slot % 16;
- slot /= 16; /* [Light index] */
- assert(slot < 8);
- switch(part * 4) {
- case NV097_SET_BACK_LIGHT_AMBIENT_COLOR ...
- NV097_SET_BACK_LIGHT_AMBIENT_COLOR + 8:
- part -= NV097_SET_BACK_LIGHT_AMBIENT_COLOR / 4;
- pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_BAMB + slot*6][part] = parameter;
- pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_BAMB + slot*6] = true;
- break;
- case NV097_SET_BACK_LIGHT_DIFFUSE_COLOR ...
- NV097_SET_BACK_LIGHT_DIFFUSE_COLOR + 8:
- part -= NV097_SET_BACK_LIGHT_DIFFUSE_COLOR / 4;
- pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_BDIF + slot*6][part] = parameter;
- pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_BDIF + slot*6] = true;
- break;
- case NV097_SET_BACK_LIGHT_SPECULAR_COLOR ...
- NV097_SET_BACK_LIGHT_SPECULAR_COLOR + 8:
- part -= NV097_SET_BACK_LIGHT_SPECULAR_COLOR / 4;
- pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_BSPC + slot*6][part] = parameter;
- pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_BSPC + slot*6] = true;
- break;
- default:
- assert(false);
- break;
- }
-}
-
-/* Handles all the light source props except for NV097_SET_BACK_LIGHT_* */
-DEF_METHOD_INC(NV097, SET_LIGHT_AMBIENT_COLOR)
-{
- int slot = (method - NV097_SET_LIGHT_AMBIENT_COLOR) / 4;
- unsigned int part = NV097_SET_LIGHT_AMBIENT_COLOR / 4 + slot % 32;
- slot /= 32; /* [Light index] */
- assert(slot < 8);
- switch(part * 4) {
- case NV097_SET_LIGHT_AMBIENT_COLOR ...
- NV097_SET_LIGHT_AMBIENT_COLOR + 8:
- part -= NV097_SET_LIGHT_AMBIENT_COLOR / 4;
- pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_AMB + slot*6][part] = parameter;
- pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_AMB + slot*6] = true;
- break;
- case NV097_SET_LIGHT_DIFFUSE_COLOR ...
- NV097_SET_LIGHT_DIFFUSE_COLOR + 8:
- part -= NV097_SET_LIGHT_DIFFUSE_COLOR / 4;
- pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_DIF + slot*6][part] = parameter;
- pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_DIF + slot*6] = true;
- break;
- case NV097_SET_LIGHT_SPECULAR_COLOR ...
- NV097_SET_LIGHT_SPECULAR_COLOR + 8:
- part -= NV097_SET_LIGHT_SPECULAR_COLOR / 4;
- pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_SPC + slot*6][part] = parameter;
- pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_SPC + slot*6] = true;
- break;
- case NV097_SET_LIGHT_LOCAL_RANGE:
- pg->ltc1[NV_IGRAPH_XF_LTC1_r0 + slot][0] = parameter;
- pg->ltc1_dirty[NV_IGRAPH_XF_LTC1_r0 + slot] = true;
- break;
- case NV097_SET_LIGHT_INFINITE_HALF_VECTOR ...
- NV097_SET_LIGHT_INFINITE_HALF_VECTOR + 8:
- part -= NV097_SET_LIGHT_INFINITE_HALF_VECTOR / 4;
- pg->light_infinite_half_vector[slot][part] = *(float*)¶meter;
- break;
- case NV097_SET_LIGHT_INFINITE_DIRECTION ...
- NV097_SET_LIGHT_INFINITE_DIRECTION + 8:
- part -= NV097_SET_LIGHT_INFINITE_DIRECTION / 4;
- pg->light_infinite_direction[slot][part] = *(float*)¶meter;
- break;
- case NV097_SET_LIGHT_SPOT_FALLOFF ...
- NV097_SET_LIGHT_SPOT_FALLOFF + 8:
- part -= NV097_SET_LIGHT_SPOT_FALLOFF / 4;
- pg->ltctxa[NV_IGRAPH_XF_LTCTXA_L0_K + slot*2][part] = parameter;
- pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_L0_K + slot*2] = true;
- break;
- case NV097_SET_LIGHT_SPOT_DIRECTION ...
- NV097_SET_LIGHT_SPOT_DIRECTION + 12:
- part -= NV097_SET_LIGHT_SPOT_DIRECTION / 4;
- pg->ltctxa[NV_IGRAPH_XF_LTCTXA_L0_SPT + slot*2][part] = parameter;
- pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_L0_SPT + slot*2] = true;
- break;
- case NV097_SET_LIGHT_LOCAL_POSITION ...
- NV097_SET_LIGHT_LOCAL_POSITION + 8:
- part -= NV097_SET_LIGHT_LOCAL_POSITION / 4;
- pg->light_local_position[slot][part] = *(float*)¶meter;
- break;
- case NV097_SET_LIGHT_LOCAL_ATTENUATION ...
- NV097_SET_LIGHT_LOCAL_ATTENUATION + 8:
- part -= NV097_SET_LIGHT_LOCAL_ATTENUATION / 4;
- pg->light_local_attenuation[slot][part] = *(float*)¶meter;
- break;
- default:
- assert(false);
- break;
- }
-}
-
-DEF_METHOD_INC(NV097, SET_VERTEX4F)
-{
- int slot = (method - NV097_SET_VERTEX4F) / 4;
- VertexAttribute *attribute =
- &pg->vertex_attributes[NV2A_VERTEX_ATTR_POSITION];
- pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_POSITION);
- attribute->inline_value[slot] = *(float*)¶meter;
- if (slot == 3) {
- pgraph_finish_inline_buffer_vertex(pg);
- }
-}
-
-DEF_METHOD_INC(NV097, SET_NORMAL3S)
-{
- int slot = (method - NV097_SET_NORMAL3S) / 4;
- unsigned int part = slot % 2;
- VertexAttribute *attribute =
- &pg->vertex_attributes[NV2A_VERTEX_ATTR_NORMAL];
- pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_NORMAL);
- int16_t val = parameter & 0xFFFF;
- attribute->inline_value[part * 2 + 0] = MAX(-1.0f, (float)val / 32767.0f);
- val = parameter >> 16;
- attribute->inline_value[part * 2 + 1] = MAX(-1.0f, (float)val / 32767.0f);
-}
-
-#define SET_VERTEX_ATTRIBUTE_4S(command, attr_index) \
- do { \
- int slot = (method - (command)) / 4; \
- unsigned int part = slot % 2; \
- VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \
- pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \
- attribute->inline_value[part * 2 + 0] = \
- (float)(int16_t)(parameter & 0xFFFF); \
- attribute->inline_value[part * 2 + 1] = \
- (float)(int16_t)(parameter >> 16); \
- } while (0)
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD0_4S)
-{
- SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD0_4S, NV2A_VERTEX_ATTR_TEXTURE0);
-}
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD1_4S)
-{
- SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD1_4S, NV2A_VERTEX_ATTR_TEXTURE1);
-}
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD2_4S)
-{
- SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD2_4S, NV2A_VERTEX_ATTR_TEXTURE2);
-}
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD3_4S)
-{
- SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD3_4S, NV2A_VERTEX_ATTR_TEXTURE3);
-}
-
-#undef SET_VERTEX_ATTRIBUTE_4S
-
-#define SET_VERTEX_ATRIBUTE_TEX_2S(attr_index) \
- do { \
- VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \
- pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \
- attribute->inline_value[0] = (float)(int16_t)(parameter & 0xFFFF); \
- attribute->inline_value[1] = (float)(int16_t)(parameter >> 16); \
- attribute->inline_value[2] = 0.0f; \
- attribute->inline_value[3] = 1.0f; \
- } while (0)
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD0_2S)
-{
- SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE0);
-}
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD1_2S)
-{
- SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE1);
-}
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD2_2S)
-{
- SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE2);
-}
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD3_2S)
-{
- SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE3);
-}
-
-#undef SET_VERTEX_ATRIBUTE_TEX_2S
-
-#define SET_VERTEX_COLOR_3F(command, attr_index) \
- do { \
- int slot = (method - (command)) / 4; \
- VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \
- pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \
- attribute->inline_value[slot] = *(float*)¶meter; \
- attribute->inline_value[3] = 1.0f; \
- } while (0)
-
-DEF_METHOD_INC(NV097, SET_DIFFUSE_COLOR3F)
-{
- SET_VERTEX_COLOR_3F(NV097_SET_DIFFUSE_COLOR3F, NV2A_VERTEX_ATTR_DIFFUSE);
-}
-
-DEF_METHOD_INC(NV097, SET_SPECULAR_COLOR3F)
-{
- SET_VERTEX_COLOR_3F(NV097_SET_SPECULAR_COLOR3F, NV2A_VERTEX_ATTR_SPECULAR);
-}
-
-#undef SET_VERTEX_COLOR_3F
-
-#define SET_VERTEX_ATTRIBUTE_F(command, attr_index) \
- do { \
- int slot = (method - (command)) / 4; \
- VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \
- pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \
- attribute->inline_value[slot] = *(float*)¶meter; \
- } while (0)
-
-DEF_METHOD_INC(NV097, SET_NORMAL3F)
-{
- SET_VERTEX_ATTRIBUTE_F(NV097_SET_NORMAL3F, NV2A_VERTEX_ATTR_NORMAL);
-}
-
-DEF_METHOD_INC(NV097, SET_DIFFUSE_COLOR4F)
-{
- SET_VERTEX_ATTRIBUTE_F(NV097_SET_DIFFUSE_COLOR4F, NV2A_VERTEX_ATTR_DIFFUSE);
-}
-
-DEF_METHOD_INC(NV097, SET_SPECULAR_COLOR4F)
-{
- SET_VERTEX_ATTRIBUTE_F(NV097_SET_SPECULAR_COLOR4F,
- NV2A_VERTEX_ATTR_SPECULAR);
-}
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD0_4F)
-{
- SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD0_4F, NV2A_VERTEX_ATTR_TEXTURE0);
-}
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD1_4F)
-{
- SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD1_4F, NV2A_VERTEX_ATTR_TEXTURE1);
-}
-
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD2_4F)
-{
- SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD2_4F, NV2A_VERTEX_ATTR_TEXTURE2);
-}
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD3_4F)
-{
- SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD3_4F, NV2A_VERTEX_ATTR_TEXTURE3);
-}
-
-#undef SET_VERTEX_ATTRIBUTE_F
-
-#define SET_VERTEX_ATRIBUTE_TEX_2F(command, attr_index) \
- do { \
- int slot = (method - (command)) / 4; \
- VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \
- pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \
- attribute->inline_value[slot] = *(float*)¶meter; \
- attribute->inline_value[2] = 0.0f; \
- attribute->inline_value[3] = 1.0f; \
- } while (0)
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD0_2F)
-{
- SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD0_2F,
- NV2A_VERTEX_ATTR_TEXTURE0);
-}
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD1_2F)
-{
- SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD1_2F,
- NV2A_VERTEX_ATTR_TEXTURE1);
-}
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD2_2F)
-{
- SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD2_2F,
- NV2A_VERTEX_ATTR_TEXTURE2);
-}
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD3_2F)
-{
- SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD3_2F,
- NV2A_VERTEX_ATTR_TEXTURE3);
-}
-
-#undef SET_VERTEX_ATRIBUTE_TEX_2F
-
-#define SET_VERTEX_ATTRIBUTE_4UB(command, attr_index) \
- do { \
- VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \
- pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \
- attribute->inline_value[0] = (parameter & 0xFF) / 255.0f; \
- attribute->inline_value[1] = ((parameter >> 8) & 0xFF) / 255.0f; \
- attribute->inline_value[2] = ((parameter >> 16) & 0xFF) / 255.0f; \
- attribute->inline_value[3] = ((parameter >> 24) & 0xFF) / 255.0f; \
- } while (0)
-
-DEF_METHOD_INC(NV097, SET_DIFFUSE_COLOR4UB)
-{
- SET_VERTEX_ATTRIBUTE_4UB(NV097_SET_DIFFUSE_COLOR4UB,
- NV2A_VERTEX_ATTR_DIFFUSE);
-}
-
-DEF_METHOD_INC(NV097, SET_SPECULAR_COLOR4UB)
-{
- SET_VERTEX_ATTRIBUTE_4UB(NV097_SET_SPECULAR_COLOR4UB,
- NV2A_VERTEX_ATTR_SPECULAR);
-}
-
-#undef SET_VERTEX_ATTRIBUTE_4UB
-
-DEF_METHOD_INC(NV097, SET_VERTEX_DATA_ARRAY_FORMAT)
-{
- int slot = (method - NV097_SET_VERTEX_DATA_ARRAY_FORMAT) / 4;
- VertexAttribute *attr = &pg->vertex_attributes[slot];
- attr->format = GET_MASK(parameter, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE);
- attr->count = GET_MASK(parameter, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_SIZE);
- attr->stride = GET_MASK(parameter,
- NV097_SET_VERTEX_DATA_ARRAY_FORMAT_STRIDE);
- attr->gl_count = attr->count;
-
- NV2A_DPRINTF("vertex data array format=%d, count=%d, stride=%d\n",
- attr->format, attr->count, attr->stride);
-
- switch (attr->format) {
- case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D:
- attr->gl_type = GL_UNSIGNED_BYTE;
- attr->gl_normalize = GL_TRUE;
- attr->size = 1;
- assert(attr->count == 4);
- // http://www.opengl.org/registry/specs/ARB/vertex_array_bgra.txt
- attr->gl_count = GL_BGRA;
- attr->needs_conversion = false;
- break;
- case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL:
- attr->gl_type = GL_UNSIGNED_BYTE;
- attr->gl_normalize = GL_TRUE;
- attr->size = 1;
- attr->needs_conversion = false;
- break;
- case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1:
- attr->gl_type = GL_SHORT;
- attr->gl_normalize = GL_TRUE;
- attr->size = 2;
- attr->needs_conversion = false;
- break;
- case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F:
- attr->gl_type = GL_FLOAT;
- attr->gl_normalize = GL_FALSE;
- attr->size = 4;
- attr->needs_conversion = false;
- break;
- case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K:
- attr->gl_type = GL_SHORT;
- attr->gl_normalize = GL_FALSE;
- attr->size = 2;
- attr->needs_conversion = false;
- break;
- case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP:
- /* 3 signed, normalized components packed in 32-bits. (11,11,10) */
- attr->gl_type = GL_INT;
- attr->size = 4;
- assert(attr->count == 1);
- attr->needs_conversion = true;
- break;
- default:
- fprintf(stderr, "Unknown vertex type: 0x%x\n", attr->format);
- assert(false);
- break;
- }
-
- if (attr->needs_conversion) {
- pg->compressed_attrs |= (1 << slot);
- } else {
- pg->compressed_attrs &= ~(1 << slot);
- }
-}
-
-DEF_METHOD_INC(NV097, SET_VERTEX_DATA_ARRAY_OFFSET)
-{
- int slot = (method - NV097_SET_VERTEX_DATA_ARRAY_OFFSET) / 4;
-
- pg->vertex_attributes[slot].dma_select = parameter & 0x80000000;
- pg->vertex_attributes[slot].offset = parameter & 0x7fffffff;
-}
-
-DEF_METHOD(NV097, SET_LOGIC_OP_ENABLE)
-{
- SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_LOGICOP_ENABLE,
- parameter);
-}
-
-DEF_METHOD(NV097, SET_LOGIC_OP)
-{
- SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_LOGICOP,
- parameter & 0xF);
-}
-
-static void pgraph_process_pending_report(NV2AState *d, QueryReport *r)
-{
- PGRAPHState *pg = &d->pgraph;
-
- if (r->clear) {
- pg->zpass_pixel_count_result = 0;
- return;
- }
-
- uint8_t type = GET_MASK(r->parameter, NV097_GET_REPORT_TYPE);
- assert(type == NV097_GET_REPORT_TYPE_ZPASS_PIXEL_CNT);
-
- /* FIXME: Multisampling affects this (both: OGL and Xbox GPU),
- * not sure if CLEARs also count
- */
- /* FIXME: What about clipping regions etc? */
- for (int i = 0; i < r->query_count; i++) {
- GLuint gl_query_result = 0;
- glGetQueryObjectuiv(r->queries[i], GL_QUERY_RESULT, &gl_query_result);
- gl_query_result /= pg->surface_scale_factor * pg->surface_scale_factor;
- pg->zpass_pixel_count_result += gl_query_result;
- }
-
- if (r->query_count) {
- glDeleteQueries(r->query_count, r->queries);
- g_free(r->queries);
- }
-
- uint64_t timestamp = 0x0011223344556677; /* FIXME: Update timestamp?! */
- uint32_t done = 0;
-
- hwaddr report_dma_len;
- uint8_t *report_data =
- (uint8_t *)nv_dma_map(d, pg->dma_report, &report_dma_len);
-
- hwaddr offset = GET_MASK(r->parameter, NV097_GET_REPORT_OFFSET);
- assert(offset < report_dma_len);
- report_data += offset;
-
- stq_le_p((uint64_t *)&report_data[0], timestamp);
- stl_le_p((uint32_t *)&report_data[8], pg->zpass_pixel_count_result);
- stl_le_p((uint32_t *)&report_data[12], done);
-}
-
-void pgraph_process_pending_reports(NV2AState *d)
-{
- PGRAPHState *pg = &d->pgraph;
- QueryReport *r, *next;
-
- QSIMPLEQ_FOREACH_SAFE(r, &pg->report_queue, entry, next) {
- pgraph_process_pending_report(d, r);
- QSIMPLEQ_REMOVE_HEAD(&pg->report_queue, entry);
- g_free(r);
- }
-}
-
-DEF_METHOD(NV097, CLEAR_REPORT_VALUE)
-{
- /* FIXME: Does this have a value in parameter? Also does this (also?) modify
- * the report memory block?
- */
- if (pg->gl_zpass_pixel_count_query_count) {
- glDeleteQueries(pg->gl_zpass_pixel_count_query_count,
- pg->gl_zpass_pixel_count_queries);
- pg->gl_zpass_pixel_count_query_count = 0;
- }
-
- QueryReport *r = g_malloc(sizeof(QueryReport));
- r->clear = true;
- QSIMPLEQ_INSERT_TAIL(&pg->report_queue, r, entry);
-}
-
-DEF_METHOD(NV097, SET_ZPASS_PIXEL_COUNT_ENABLE)
-{
- pg->zpass_pixel_count_enable = parameter;
-}
-
-DEF_METHOD(NV097, GET_REPORT)
-{
- uint8_t type = GET_MASK(parameter, NV097_GET_REPORT_TYPE);
- assert(type == NV097_GET_REPORT_TYPE_ZPASS_PIXEL_CNT);
-
- QueryReport *r = g_malloc(sizeof(QueryReport));
- r->clear = false;
- r->parameter = parameter;
- r->query_count = pg->gl_zpass_pixel_count_query_count;
- r->queries = pg->gl_zpass_pixel_count_queries;
- QSIMPLEQ_INSERT_TAIL(&pg->report_queue, r, entry);
-
- pg->gl_zpass_pixel_count_query_count = 0;
- pg->gl_zpass_pixel_count_queries = NULL;
-}
-
-DEF_METHOD_INC(NV097, SET_EYE_DIRECTION)
-{
- int slot = (method - NV097_SET_EYE_DIRECTION) / 4;
- pg->ltctxa[NV_IGRAPH_XF_LTCTXA_EYED][slot] = parameter;
- pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_EYED] = true;
-}
-
-static void pgraph_reset_draw_arrays(PGRAPHState *pg)
-{
- pg->draw_arrays_length = 0;
- pg->draw_arrays_min_start = -1;
- pg->draw_arrays_max_count = 0;
- pg->draw_arrays_prevent_connect = false;
-}
-
-static void pgraph_reset_inline_buffers(PGRAPHState *pg)
-{
- pg->inline_elements_length = 0;
- pg->inline_array_length = 0;
- pg->inline_buffer_length = 0;
- pgraph_reset_draw_arrays(pg);
-}
-
-static void pgraph_flush_draw(NV2AState *d)
-{
- PGRAPHState *pg = &d->pgraph;
- if (!(pg->color_binding || pg->zeta_binding)) {
- pgraph_reset_inline_buffers(pg);
- return;
- }
- assert(pg->shader_binding);
-
- if (pg->draw_arrays_length) {
- NV2A_GL_DPRINTF(false, "Draw Arrays");
- nv2a_profile_inc_counter(NV2A_PROF_DRAW_ARRAYS);
- assert(pg->inline_elements_length == 0);
- assert(pg->inline_buffer_length == 0);
- assert(pg->inline_array_length == 0);
-
- pgraph_bind_vertex_attributes(d, pg->draw_arrays_min_start,
- pg->draw_arrays_max_count - 1,
- false, 0,
- pg->draw_arrays_max_count - 1);
- glMultiDrawArrays(pg->shader_binding->gl_primitive_mode,
- pg->gl_draw_arrays_start,
- pg->gl_draw_arrays_count,
- pg->draw_arrays_length);
- } else if (pg->inline_elements_length) {
- NV2A_GL_DPRINTF(false, "Inline Elements");
- nv2a_profile_inc_counter(NV2A_PROF_INLINE_ELEMENTS);
- assert(pg->inline_buffer_length == 0);
- assert(pg->inline_array_length == 0);
-
- uint32_t min_element = (uint32_t)-1;
- uint32_t max_element = 0;
- for (int i=0; i < pg->inline_elements_length; i++) {
- max_element = MAX(pg->inline_elements[i], max_element);
- min_element = MIN(pg->inline_elements[i], min_element);
- }
-
- pgraph_bind_vertex_attributes(
- d, min_element, max_element, false, 0,
- pg->inline_elements[pg->inline_elements_length - 1]);
-
- VertexKey k;
- memset(&k, 0, sizeof(VertexKey));
- k.count = pg->inline_elements_length;
- k.gl_type = GL_UNSIGNED_INT;
- k.gl_normalize = GL_FALSE;
- k.stride = sizeof(uint32_t);
- uint64_t h = fast_hash((uint8_t*)pg->inline_elements,
- pg->inline_elements_length * 4);
-
- LruNode *node = lru_lookup(&pg->element_cache, h, &k);
- VertexLruNode *found = container_of(node, VertexLruNode, node);
- glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, found->gl_buffer);
- if (!found->initialized) {
- nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_4);
- glBufferData(GL_ELEMENT_ARRAY_BUFFER,
- pg->inline_elements_length * 4,
- pg->inline_elements, GL_STATIC_DRAW);
- found->initialized = true;
- } else {
- nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_4_NOTDIRTY);
- }
- glDrawElements(pg->shader_binding->gl_primitive_mode,
- pg->inline_elements_length, GL_UNSIGNED_INT,
- (void *)0);
- } else if (pg->inline_buffer_length) {
- NV2A_GL_DPRINTF(false, "Inline Buffer");
- nv2a_profile_inc_counter(NV2A_PROF_INLINE_BUFFERS);
- assert(pg->inline_array_length == 0);
-
- if (pg->compressed_attrs) {
- pg->compressed_attrs = 0;
- pgraph_bind_shaders(pg);
- }
-
- for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
- VertexAttribute *attr = &pg->vertex_attributes[i];
- if (attr->inline_buffer_populated) {
- nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_3);
- glBindBuffer(GL_ARRAY_BUFFER, attr->gl_inline_buffer);
- glBufferData(GL_ARRAY_BUFFER,
- pg->inline_buffer_length * sizeof(float) * 4,
- attr->inline_buffer, GL_STREAM_DRAW);
- glVertexAttribPointer(i, 4, GL_FLOAT, GL_FALSE, 0, 0);
- glEnableVertexAttribArray(i);
- attr->inline_buffer_populated = false;
- memcpy(attr->inline_value,
- attr->inline_buffer + (pg->inline_buffer_length - 1) * 4,
- sizeof(attr->inline_value));
- } else {
- glDisableVertexAttribArray(i);
- glVertexAttrib4fv(i, attr->inline_value);
- }
- }
-
- glDrawArrays(pg->shader_binding->gl_primitive_mode,
- 0, pg->inline_buffer_length);
- } else if (pg->inline_array_length) {
- NV2A_GL_DPRINTF(false, "Inline Array");
- nv2a_profile_inc_counter(NV2A_PROF_INLINE_ARRAYS);
-
- unsigned int index_count = pgraph_bind_inline_array(d);
- glDrawArrays(pg->shader_binding->gl_primitive_mode,
- 0, index_count);
- } else {
- NV2A_GL_DPRINTF(true, "EMPTY NV097_SET_BEGIN_END");
- NV2A_UNCONFIRMED("EMPTY NV097_SET_BEGIN_END");
- }
-
- pgraph_reset_inline_buffers(pg);
-}
-
-DEF_METHOD(NV097, SET_BEGIN_END)
-{
- uint32_t control_0 = pg->regs[NV_PGRAPH_CONTROL_0];
- bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE;
- bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE;
- bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE;
- bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE;
- bool color_write = mask_alpha || mask_red || mask_green || mask_blue;
- bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE;
- bool stencil_test =
- pg->regs[NV_PGRAPH_CONTROL_1] & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE;
- bool is_nop_draw = !(color_write || depth_test || stencil_test);
-
- if (parameter == NV097_SET_BEGIN_END_OP_END) {
- if (pg->primitive_mode == PRIM_TYPE_INVALID) {
- NV2A_DPRINTF("End without Begin!\n");
- }
- nv2a_profile_inc_counter(NV2A_PROF_BEGIN_ENDS);
-
- if (is_nop_draw) {
- // FIXME: Check PGRAPH register 0x880.
- // HW uses bit 11 in 0x880 to enable or disable a color/zeta limit
- // check that will raise an exception in the case that a draw should
- // modify the color and/or zeta buffer but the target(s) are masked
- // off. This check only seems to trigger during the fragment
- // processing, it is legal to attempt a draw that is entirely
- // clipped regardless of 0x880. See xemu#635 for context.
- return;
- }
-
- pgraph_flush_draw(d);
-
- /* End of visibility testing */
- if (pg->zpass_pixel_count_enable) {
- nv2a_profile_inc_counter(NV2A_PROF_QUERY);
- glEndQuery(GL_SAMPLES_PASSED);
- }
-
- pg->draw_time++;
- if (pg->color_binding && pgraph_color_write_enabled(pg)) {
- pg->color_binding->draw_time = pg->draw_time;
- }
- if (pg->zeta_binding && pgraph_zeta_write_enabled(pg)) {
- pg->zeta_binding->draw_time = pg->draw_time;
- }
-
- pgraph_set_surface_dirty(pg, color_write, depth_test || stencil_test);
-
- NV2A_GL_DGROUP_END();
- pg->primitive_mode = PRIM_TYPE_INVALID;
- } else {
- NV2A_GL_DGROUP_BEGIN("NV097_SET_BEGIN_END: 0x%x", parameter);
- if (pg->primitive_mode != PRIM_TYPE_INVALID) {
- NV2A_DPRINTF("Begin without End!\n");
- }
- assert(parameter <= NV097_SET_BEGIN_END_OP_POLYGON);
- pg->primitive_mode = parameter;
-
- pgraph_update_surface(d, true, true, depth_test || stencil_test);
- pgraph_reset_inline_buffers(pg);
-
- if (is_nop_draw) {
- return;
- }
-
- assert(pg->color_binding || pg->zeta_binding);
-
- pgraph_bind_textures(d);
- pgraph_bind_shaders(pg);
-
- glColorMask(mask_red, mask_green, mask_blue, mask_alpha);
- glDepthMask(!!(control_0 & NV_PGRAPH_CONTROL_0_ZWRITEENABLE));
- glStencilMask(GET_MASK(pg->regs[NV_PGRAPH_CONTROL_1],
- NV_PGRAPH_CONTROL_1_STENCIL_MASK_WRITE));
-
- if (pg->regs[NV_PGRAPH_BLEND] & NV_PGRAPH_BLEND_EN) {
- glEnable(GL_BLEND);
- uint32_t sfactor = GET_MASK(pg->regs[NV_PGRAPH_BLEND],
- NV_PGRAPH_BLEND_SFACTOR);
- uint32_t dfactor = GET_MASK(pg->regs[NV_PGRAPH_BLEND],
- NV_PGRAPH_BLEND_DFACTOR);
- assert(sfactor < ARRAY_SIZE(pgraph_blend_factor_map));
- assert(dfactor < ARRAY_SIZE(pgraph_blend_factor_map));
- glBlendFunc(pgraph_blend_factor_map[sfactor],
- pgraph_blend_factor_map[dfactor]);
-
- uint32_t equation = GET_MASK(pg->regs[NV_PGRAPH_BLEND],
- NV_PGRAPH_BLEND_EQN);
- assert(equation < ARRAY_SIZE(pgraph_blend_equation_map));
- glBlendEquation(pgraph_blend_equation_map[equation]);
-
- uint32_t blend_color = pg->regs[NV_PGRAPH_BLENDCOLOR];
- glBlendColor( ((blend_color >> 16) & 0xFF) / 255.0f, /* red */
- ((blend_color >> 8) & 0xFF) / 255.0f, /* green */
- (blend_color & 0xFF) / 255.0f, /* blue */
- ((blend_color >> 24) & 0xFF) / 255.0f);/* alpha */
- } else {
- glDisable(GL_BLEND);
- }
-
- /* Face culling */
- if (pg->regs[NV_PGRAPH_SETUPRASTER]
- & NV_PGRAPH_SETUPRASTER_CULLENABLE) {
- uint32_t cull_face = GET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
- NV_PGRAPH_SETUPRASTER_CULLCTRL);
- assert(cull_face < ARRAY_SIZE(pgraph_cull_face_map));
- glCullFace(pgraph_cull_face_map[cull_face]);
- glEnable(GL_CULL_FACE);
- } else {
- glDisable(GL_CULL_FACE);
- }
-
- /* Clipping */
- glEnable(GL_CLIP_DISTANCE0);
- glEnable(GL_CLIP_DISTANCE1);
-
- /* Front-face select */
- glFrontFace(pg->regs[NV_PGRAPH_SETUPRASTER]
- & NV_PGRAPH_SETUPRASTER_FRONTFACE
- ? GL_CCW : GL_CW);
-
- /* Polygon offset */
- /* FIXME: GL implementation-specific, maybe do this in VS? */
- if (pg->regs[NV_PGRAPH_SETUPRASTER] &
- NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE) {
- glEnable(GL_POLYGON_OFFSET_FILL);
- } else {
- glDisable(GL_POLYGON_OFFSET_FILL);
- }
- if (pg->regs[NV_PGRAPH_SETUPRASTER] &
- NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE) {
- glEnable(GL_POLYGON_OFFSET_LINE);
- } else {
- glDisable(GL_POLYGON_OFFSET_LINE);
- }
- if (pg->regs[NV_PGRAPH_SETUPRASTER] &
- NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE) {
- glEnable(GL_POLYGON_OFFSET_POINT);
- } else {
- glDisable(GL_POLYGON_OFFSET_POINT);
- }
- if (pg->regs[NV_PGRAPH_SETUPRASTER] &
- (NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE |
- NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE |
- NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) {
- GLfloat zfactor = *(float*)&pg->regs[NV_PGRAPH_ZOFFSETFACTOR];
- GLfloat zbias = *(float*)&pg->regs[NV_PGRAPH_ZOFFSETBIAS];
- glPolygonOffset(zfactor, zbias);
- }
-
- /* Depth testing */
- if (depth_test) {
- glEnable(GL_DEPTH_TEST);
-
- uint32_t depth_func = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
- NV_PGRAPH_CONTROL_0_ZFUNC);
- assert(depth_func < ARRAY_SIZE(pgraph_depth_func_map));
- glDepthFunc(pgraph_depth_func_map[depth_func]);
- } else {
- glDisable(GL_DEPTH_TEST);
- }
-
- if (GET_MASK(pg->regs[NV_PGRAPH_ZCOMPRESSOCCLUDE],
- NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN) ==
- NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP) {
- glEnable(GL_DEPTH_CLAMP);
- } else {
- glDisable(GL_DEPTH_CLAMP);
- }
-
- if (GET_MASK(pg->regs[NV_PGRAPH_CONTROL_3],
- NV_PGRAPH_CONTROL_3_SHADEMODE) ==
- NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT) {
- glProvokingVertex(GL_FIRST_VERTEX_CONVENTION);
- }
-
- if (stencil_test) {
- glEnable(GL_STENCIL_TEST);
-
- uint32_t stencil_func = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_1],
- NV_PGRAPH_CONTROL_1_STENCIL_FUNC);
- uint32_t stencil_ref = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_1],
- NV_PGRAPH_CONTROL_1_STENCIL_REF);
- uint32_t func_mask = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_1],
- NV_PGRAPH_CONTROL_1_STENCIL_MASK_READ);
- uint32_t op_fail = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_2],
- NV_PGRAPH_CONTROL_2_STENCIL_OP_FAIL);
- uint32_t op_zfail = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_2],
- NV_PGRAPH_CONTROL_2_STENCIL_OP_ZFAIL);
- uint32_t op_zpass = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_2],
- NV_PGRAPH_CONTROL_2_STENCIL_OP_ZPASS);
-
- assert(stencil_func < ARRAY_SIZE(pgraph_stencil_func_map));
- assert(op_fail < ARRAY_SIZE(pgraph_stencil_op_map));
- assert(op_zfail < ARRAY_SIZE(pgraph_stencil_op_map));
- assert(op_zpass < ARRAY_SIZE(pgraph_stencil_op_map));
-
- glStencilFunc(
- pgraph_stencil_func_map[stencil_func],
- stencil_ref,
- func_mask);
-
- glStencilOp(
- pgraph_stencil_op_map[op_fail],
- pgraph_stencil_op_map[op_zfail],
- pgraph_stencil_op_map[op_zpass]);
-
- } else {
- glDisable(GL_STENCIL_TEST);
- }
-
- /* Dither */
- /* FIXME: GL implementation dependent */
- if (pg->regs[NV_PGRAPH_CONTROL_0] &
- NV_PGRAPH_CONTROL_0_DITHERENABLE) {
- glEnable(GL_DITHER);
- } else {
- glDisable(GL_DITHER);
- }
-
- glEnable(GL_PROGRAM_POINT_SIZE);
-
- bool anti_aliasing = GET_MASK(pg->regs[NV_PGRAPH_ANTIALIASING], NV_PGRAPH_ANTIALIASING_ENABLE);
-
- /* Edge Antialiasing */
- if (!anti_aliasing && pg->regs[NV_PGRAPH_SETUPRASTER] &
- NV_PGRAPH_SETUPRASTER_LINESMOOTHENABLE) {
- glEnable(GL_LINE_SMOOTH);
- glLineWidth(MIN(supportedSmoothLineWidthRange[1], pg->surface_scale_factor));
- } else {
- glDisable(GL_LINE_SMOOTH);
- glLineWidth(MIN(supportedAliasedLineWidthRange[1], pg->surface_scale_factor));
- }
- if (!anti_aliasing && pg->regs[NV_PGRAPH_SETUPRASTER] &
- NV_PGRAPH_SETUPRASTER_POLYSMOOTHENABLE) {
- glEnable(GL_POLYGON_SMOOTH);
- } else {
- glDisable(GL_POLYGON_SMOOTH);
- }
-
- unsigned int vp_width = pg->surface_binding_dim.width,
- vp_height = pg->surface_binding_dim.height;
- pgraph_apply_scaling_factor(pg, &vp_width, &vp_height);
- glViewport(0, 0, vp_width, vp_height);
-
- /* Surface clip */
- /* FIXME: Consider moving to PSH w/ window clip */
- unsigned int xmin = pg->surface_shape.clip_x - pg->surface_binding_dim.clip_x,
- ymin = pg->surface_shape.clip_y - pg->surface_binding_dim.clip_y;
- unsigned int xmax = xmin + pg->surface_shape.clip_width - 1,
- ymax = ymin + pg->surface_shape.clip_height - 1;
-
- unsigned int scissor_width = xmax - xmin + 1,
- scissor_height = ymax - ymin + 1;
- pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin);
- pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height);
- ymin = pg->surface_binding_dim.height - (ymin + scissor_height);
- pgraph_apply_scaling_factor(pg, &xmin, &ymin);
- pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height);
-
- glEnable(GL_SCISSOR_TEST);
- glScissor(xmin, ymin, scissor_width, scissor_height);
-
- /* Visibility testing */
- if (pg->zpass_pixel_count_enable) {
- pg->gl_zpass_pixel_count_query_count++;
- pg->gl_zpass_pixel_count_queries = (GLuint*)g_realloc(
- pg->gl_zpass_pixel_count_queries,
- sizeof(GLuint) * pg->gl_zpass_pixel_count_query_count);
-
- GLuint gl_query;
- glGenQueries(1, &gl_query);
- pg->gl_zpass_pixel_count_queries[
- pg->gl_zpass_pixel_count_query_count - 1] = gl_query;
- glBeginQuery(GL_SAMPLES_PASSED, gl_query);
- }
- }
-}
-
-DEF_METHOD(NV097, SET_TEXTURE_OFFSET)
-{
- int slot = (method - NV097_SET_TEXTURE_OFFSET) / 64;
- pg->regs[NV_PGRAPH_TEXOFFSET0 + slot * 4] = parameter;
- pg->texture_dirty[slot] = true;
-}
-
-DEF_METHOD(NV097, SET_TEXTURE_FORMAT)
-{
- int slot = (method - NV097_SET_TEXTURE_FORMAT) / 64;
-
- bool dma_select =
- GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_CONTEXT_DMA) == 2;
- bool cubemap =
- GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_CUBEMAP_ENABLE);
- unsigned int border_source =
- GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BORDER_SOURCE);
- unsigned int dimensionality =
- GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_DIMENSIONALITY);
- unsigned int color_format =
- GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_COLOR);
- unsigned int levels =
- GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_MIPMAP_LEVELS);
- unsigned int log_width =
- GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BASE_SIZE_U);
- unsigned int log_height =
- GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BASE_SIZE_V);
- unsigned int log_depth =
- GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BASE_SIZE_P);
-
- uint32_t *reg = &pg->regs[NV_PGRAPH_TEXFMT0 + slot * 4];
- SET_MASK(*reg, NV_PGRAPH_TEXFMT0_CONTEXT_DMA, dma_select);
- SET_MASK(*reg, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE, cubemap);
- SET_MASK(*reg, NV_PGRAPH_TEXFMT0_BORDER_SOURCE, border_source);
- SET_MASK(*reg, NV_PGRAPH_TEXFMT0_DIMENSIONALITY, dimensionality);
- SET_MASK(*reg, NV_PGRAPH_TEXFMT0_COLOR, color_format);
- SET_MASK(*reg, NV_PGRAPH_TEXFMT0_MIPMAP_LEVELS, levels);
- SET_MASK(*reg, NV_PGRAPH_TEXFMT0_BASE_SIZE_U, log_width);
- SET_MASK(*reg, NV_PGRAPH_TEXFMT0_BASE_SIZE_V, log_height);
- SET_MASK(*reg, NV_PGRAPH_TEXFMT0_BASE_SIZE_P, log_depth);
-
- pg->texture_dirty[slot] = true;
-}
-
-DEF_METHOD(NV097, SET_TEXTURE_CONTROL0)
-{
- int slot = (method - NV097_SET_TEXTURE_CONTROL0) / 64;
- pg->regs[NV_PGRAPH_TEXCTL0_0 + slot*4] = parameter;
-}
-
-DEF_METHOD(NV097, SET_TEXTURE_CONTROL1)
-{
- int slot = (method - NV097_SET_TEXTURE_CONTROL1) / 64;
- pg->regs[NV_PGRAPH_TEXCTL1_0 + slot*4] = parameter;
-}
-
-DEF_METHOD(NV097, SET_TEXTURE_FILTER)
-{
- int slot = (method - NV097_SET_TEXTURE_FILTER) / 64;
- pg->regs[NV_PGRAPH_TEXFILTER0 + slot * 4] = parameter;
-}
-
-DEF_METHOD(NV097, SET_TEXTURE_IMAGE_RECT)
-{
- int slot = (method - NV097_SET_TEXTURE_IMAGE_RECT) / 64;
- pg->regs[NV_PGRAPH_TEXIMAGERECT0 + slot * 4] = parameter;
- pg->texture_dirty[slot] = true;
-}
-
-DEF_METHOD(NV097, SET_TEXTURE_PALETTE)
-{
- int slot = (method - NV097_SET_TEXTURE_PALETTE) / 64;
-
- bool dma_select =
- GET_MASK(parameter, NV097_SET_TEXTURE_PALETTE_CONTEXT_DMA) == 1;
- unsigned int length =
- GET_MASK(parameter, NV097_SET_TEXTURE_PALETTE_LENGTH);
- unsigned int offset =
- GET_MASK(parameter, NV097_SET_TEXTURE_PALETTE_OFFSET);
-
- uint32_t *reg = &pg->regs[NV_PGRAPH_TEXPALETTE0 + slot * 4];
- SET_MASK(*reg, NV_PGRAPH_TEXPALETTE0_CONTEXT_DMA, dma_select);
- SET_MASK(*reg, NV_PGRAPH_TEXPALETTE0_LENGTH, length);
- SET_MASK(*reg, NV_PGRAPH_TEXPALETTE0_OFFSET, offset);
-
- pg->texture_dirty[slot] = true;
-}
-
-DEF_METHOD(NV097, SET_TEXTURE_BORDER_COLOR)
-{
- int slot = (method - NV097_SET_TEXTURE_BORDER_COLOR) / 64;
- pg->regs[NV_PGRAPH_BORDERCOLOR0 + slot * 4] = parameter;
-}
-
-DEF_METHOD(NV097, SET_TEXTURE_SET_BUMP_ENV_MAT)
-{
- int slot = (method - NV097_SET_TEXTURE_SET_BUMP_ENV_MAT) / 4;
- if (slot < 16) {
- /* discard */
- return;
- }
-
- slot -= 16;
- const int swizzle[4] = { NV_PGRAPH_BUMPMAT00, NV_PGRAPH_BUMPMAT01,
- NV_PGRAPH_BUMPMAT11, NV_PGRAPH_BUMPMAT10 };
- pg->regs[swizzle[slot % 4] + slot / 4] = parameter;
-}
-
-DEF_METHOD(NV097, SET_TEXTURE_SET_BUMP_ENV_SCALE)
-{
- int slot = (method - NV097_SET_TEXTURE_SET_BUMP_ENV_SCALE) / 64;
- if (slot == 0) {
- /* discard */
- return;
- }
-
- slot--;
- pg->regs[NV_PGRAPH_BUMPSCALE1 + slot * 4] = parameter;
-}
-
-DEF_METHOD(NV097, SET_TEXTURE_SET_BUMP_ENV_OFFSET)
-{
- int slot = (method - NV097_SET_TEXTURE_SET_BUMP_ENV_OFFSET) / 64;
- if (slot == 0) {
- /* discard */
- return;
- }
-
- slot--;
- pg->regs[NV_PGRAPH_BUMPOFFSET1 + slot * 4] = parameter;
-}
-
-static void pgraph_expand_draw_arrays(NV2AState *d)
-{
- PGRAPHState *pg = &d->pgraph;
- GLint start = pg->gl_draw_arrays_start[pg->draw_arrays_length - 1];
- GLsizei count = pg->gl_draw_arrays_count[pg->draw_arrays_length - 1];
-
- /* Render any previously squashed DRAW_ARRAYS calls. This case would be
- * triggered if a set of BEGIN+DA+END triplets is followed by the
- * BEGIN+DA+ARRAY_ELEMENT+... chain that caused this expansion. */
- if (pg->draw_arrays_length > 1) {
- pgraph_flush_draw(d);
- }
- assert((pg->inline_elements_length + count) < NV2A_MAX_BATCH_LENGTH);
- for (unsigned int i = 0; i < count; i++) {
- pg->inline_elements[pg->inline_elements_length++] = start + i;
- }
-
- pgraph_reset_draw_arrays(pg);
-}
-
-static void pgraph_check_within_begin_end_block(PGRAPHState *pg)
-{
- if (pg->primitive_mode == PRIM_TYPE_INVALID) {
- NV2A_DPRINTF("Vertex data being sent outside of begin/end block!\n");
- }
-}
-
-DEF_METHOD_NON_INC(NV097, ARRAY_ELEMENT16)
-{
- pgraph_check_within_begin_end_block(pg);
-
- if (pg->draw_arrays_length) {
- pgraph_expand_draw_arrays(d);
- }
-
- assert(pg->inline_elements_length < NV2A_MAX_BATCH_LENGTH);
- pg->inline_elements[pg->inline_elements_length++] = parameter & 0xFFFF;
- pg->inline_elements[pg->inline_elements_length++] = parameter >> 16;
-}
-
-DEF_METHOD_NON_INC(NV097, ARRAY_ELEMENT32)
-{
- pgraph_check_within_begin_end_block(pg);
-
- if (pg->draw_arrays_length) {
- pgraph_expand_draw_arrays(d);
- }
-
- assert(pg->inline_elements_length < NV2A_MAX_BATCH_LENGTH);
- pg->inline_elements[pg->inline_elements_length++] = parameter;
-}
-
-DEF_METHOD(NV097, DRAW_ARRAYS)
-{
- pgraph_check_within_begin_end_block(pg);
-
- unsigned int start = GET_MASK(parameter, NV097_DRAW_ARRAYS_START_INDEX);
- unsigned int count = GET_MASK(parameter, NV097_DRAW_ARRAYS_COUNT) + 1;
-
- if (pg->inline_elements_length) {
- /* FIXME: Determine HW behavior for overflow case. */
- assert((pg->inline_elements_length + count) < NV2A_MAX_BATCH_LENGTH);
- assert(!pg->draw_arrays_prevent_connect);
-
- for (unsigned int i = 0; i < count; i++) {
- pg->inline_elements[pg->inline_elements_length++] = start + i;
- }
- return;
- }
-
- pg->draw_arrays_min_start = MIN(pg->draw_arrays_min_start, start);
- pg->draw_arrays_max_count = MAX(pg->draw_arrays_max_count, start + count);
-
- assert(pg->draw_arrays_length < ARRAY_SIZE(pg->gl_draw_arrays_start));
-
- /* Attempt to connect contiguous primitives */
- if (!pg->draw_arrays_prevent_connect && pg->draw_arrays_length > 0) {
- unsigned int last_start =
- pg->gl_draw_arrays_start[pg->draw_arrays_length - 1];
- GLsizei* last_count =
- &pg->gl_draw_arrays_count[pg->draw_arrays_length - 1];
- if (start == (last_start + *last_count)) {
- *last_count += count;
- return;
- }
- }
-
- pg->gl_draw_arrays_start[pg->draw_arrays_length] = start;
- pg->gl_draw_arrays_count[pg->draw_arrays_length] = count;
- pg->draw_arrays_length++;
- pg->draw_arrays_prevent_connect = false;
-}
-
-DEF_METHOD_NON_INC(NV097, INLINE_ARRAY)
-{
- pgraph_check_within_begin_end_block(pg);
- assert(pg->inline_array_length < NV2A_MAX_BATCH_LENGTH);
- pg->inline_array[pg->inline_array_length++] = parameter;
-}
-
-DEF_METHOD_INC(NV097, SET_EYE_VECTOR)
-{
- int slot = (method - NV097_SET_EYE_VECTOR) / 4;
- pg->regs[NV_PGRAPH_EYEVEC0 + slot * 4] = parameter;
-}
-
-DEF_METHOD_INC(NV097, SET_VERTEX_DATA2F_M)
-{
- int slot = (method - NV097_SET_VERTEX_DATA2F_M) / 4;
- unsigned int part = slot % 2;
- slot /= 2;
- VertexAttribute *attribute = &pg->vertex_attributes[slot];
- pgraph_allocate_inline_buffer_vertices(pg, slot);
- attribute->inline_value[part] = *(float*)¶meter;
- /* FIXME: Should these really be set to 0.0 and 1.0 ? Conditions? */
- attribute->inline_value[2] = 0.0;
- attribute->inline_value[3] = 1.0;
- if ((slot == 0) && (part == 1)) {
- pgraph_finish_inline_buffer_vertex(pg);
- }
-}
-
-DEF_METHOD_INC(NV097, SET_VERTEX_DATA4F_M)
-{
- int slot = (method - NV097_SET_VERTEX_DATA4F_M) / 4;
- unsigned int part = slot % 4;
- slot /= 4;
- VertexAttribute *attribute = &pg->vertex_attributes[slot];
- pgraph_allocate_inline_buffer_vertices(pg, slot);
- attribute->inline_value[part] = *(float*)¶meter;
- if ((slot == 0) && (part == 3)) {
- pgraph_finish_inline_buffer_vertex(pg);
- }
-}
-
-DEF_METHOD_INC(NV097, SET_VERTEX_DATA2S)
-{
- int slot = (method - NV097_SET_VERTEX_DATA2S) / 4;
- VertexAttribute *attribute = &pg->vertex_attributes[slot];
- pgraph_allocate_inline_buffer_vertices(pg, slot);
- attribute->inline_value[0] = (float)(int16_t)(parameter & 0xFFFF);
- attribute->inline_value[1] = (float)(int16_t)(parameter >> 16);
- attribute->inline_value[2] = 0.0;
- attribute->inline_value[3] = 1.0;
- if (slot == 0) {
- pgraph_finish_inline_buffer_vertex(pg);
- }
-}
-
-DEF_METHOD_INC(NV097, SET_VERTEX_DATA4UB)
-{
- int slot = (method - NV097_SET_VERTEX_DATA4UB) / 4;
- VertexAttribute *attribute = &pg->vertex_attributes[slot];
- pgraph_allocate_inline_buffer_vertices(pg, slot);
- attribute->inline_value[0] = (parameter & 0xFF) / 255.0;
- attribute->inline_value[1] = ((parameter >> 8) & 0xFF) / 255.0;
- attribute->inline_value[2] = ((parameter >> 16) & 0xFF) / 255.0;
- attribute->inline_value[3] = ((parameter >> 24) & 0xFF) / 255.0;
- if (slot == 0) {
- pgraph_finish_inline_buffer_vertex(pg);
- }
-}
-
-DEF_METHOD_INC(NV097, SET_VERTEX_DATA4S_M)
-{
- int slot = (method - NV097_SET_VERTEX_DATA4S_M) / 4;
- unsigned int part = slot % 2;
- slot /= 2;
- VertexAttribute *attribute = &pg->vertex_attributes[slot];
- pgraph_allocate_inline_buffer_vertices(pg, slot);
-
- attribute->inline_value[part * 2 + 0] = (float)(int16_t)(parameter & 0xFFFF);
- attribute->inline_value[part * 2 + 1] = (float)(int16_t)(parameter >> 16);
- if ((slot == 0) && (part == 1)) {
- pgraph_finish_inline_buffer_vertex(pg);
- }
-}
-
-DEF_METHOD(NV097, SET_SEMAPHORE_OFFSET)
-{
- pg->regs[NV_PGRAPH_SEMAPHOREOFFSET] = parameter;
-}
-
-DEF_METHOD(NV097, BACK_END_WRITE_SEMAPHORE_RELEASE)
-{
- pgraph_update_surface(d, false, true, true);
-
- //qemu_mutex_unlock(&d->pgraph.lock);
- //qemu_mutex_lock_iothread();
-
- uint32_t semaphore_offset = pg->regs[NV_PGRAPH_SEMAPHOREOFFSET];
-
- hwaddr semaphore_dma_len;
- uint8_t *semaphore_data = (uint8_t*)nv_dma_map(d, pg->dma_semaphore,
- &semaphore_dma_len);
- assert(semaphore_offset < semaphore_dma_len);
- semaphore_data += semaphore_offset;
-
- stl_le_p((uint32_t*)semaphore_data, parameter);
-
- //qemu_mutex_lock(&d->pgraph.lock);
- //qemu_mutex_unlock_iothread();
-}
-
-DEF_METHOD(NV097, SET_ZMIN_MAX_CONTROL)
-{
- switch (GET_MASK(parameter, NV097_SET_ZMIN_MAX_CONTROL_ZCLAMP_EN)) {
- case NV097_SET_ZMIN_MAX_CONTROL_ZCLAMP_EN_CULL:
- SET_MASK(pg->regs[NV_PGRAPH_ZCOMPRESSOCCLUDE],
- NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN,
- NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CULL);
- break;
- case NV097_SET_ZMIN_MAX_CONTROL_ZCLAMP_EN_CLAMP:
- SET_MASK(pg->regs[NV_PGRAPH_ZCOMPRESSOCCLUDE],
- NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN,
- NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP);
- break;
- default:
- /* FIXME: Should raise NV_PGRAPH_NSOURCE_DATA_ERROR_PENDING */
- assert(!"Invalid zclamp value");
- break;
- }
-}
-
-DEF_METHOD(NV097, SET_ANTI_ALIASING_CONTROL)
-{
- SET_MASK(pg->regs[NV_PGRAPH_ANTIALIASING], NV_PGRAPH_ANTIALIASING_ENABLE,
- GET_MASK(parameter, NV097_SET_ANTI_ALIASING_CONTROL_ENABLE));
- // FIXME: Handle the remaining bits (observed values 0xFFFF0000, 0xFFFF0001)
-}
-
-DEF_METHOD(NV097, SET_ZSTENCIL_CLEAR_VALUE)
-{
- pg->regs[NV_PGRAPH_ZSTENCILCLEARVALUE] = parameter;
-}
-
-DEF_METHOD(NV097, SET_COLOR_CLEAR_VALUE)
-{
- pg->regs[NV_PGRAPH_COLORCLEARVALUE] = parameter;
-}
-
-DEF_METHOD(NV097, CLEAR_SURFACE)
-{
- pg->clearing = true;
-
- NV2A_DPRINTF("---------PRE CLEAR ------\n");
- GLbitfield gl_mask = 0;
-
- bool write_color = (parameter & NV097_CLEAR_SURFACE_COLOR);
- bool write_zeta =
- (parameter & (NV097_CLEAR_SURFACE_Z | NV097_CLEAR_SURFACE_STENCIL));
-
- if (write_zeta) {
- uint32_t clear_zstencil =
- d->pgraph.regs[NV_PGRAPH_ZSTENCILCLEARVALUE];
- GLint gl_clear_stencil;
- GLfloat gl_clear_depth;
-
- switch(pg->surface_shape.zeta_format) {
- case NV097_SET_SURFACE_FORMAT_ZETA_Z16: {
- uint16_t z = clear_zstencil & 0xFFFF;
- /* FIXME: Remove bit for stencil clear? */
- if (pg->surface_shape.z_format) {
- gl_clear_depth = convert_f16_to_float(z) / f16_max;
- } else {
- gl_clear_depth = z / (float)0xFFFF;
- }
- break;
- }
- case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8: {
- gl_clear_stencil = clear_zstencil & 0xFF;
- uint32_t z = clear_zstencil >> 8;
- if (pg->surface_shape.z_format) {
- gl_clear_depth = convert_f24_to_float(z) / f24_max;
- } else {
- gl_clear_depth = z / (float)0xFFFFFF;
- }
- break;
- }
- default:
- fprintf(stderr, "Unknown zeta surface format: 0x%x\n", pg->surface_shape.zeta_format);
- assert(false);
- break;
- }
- if (parameter & NV097_CLEAR_SURFACE_Z) {
- gl_mask |= GL_DEPTH_BUFFER_BIT;
- glDepthMask(GL_TRUE);
- glClearDepth(gl_clear_depth);
- }
- if (parameter & NV097_CLEAR_SURFACE_STENCIL) {
- gl_mask |= GL_STENCIL_BUFFER_BIT;
- glStencilMask(0xff);
- glClearStencil(gl_clear_stencil);
- }
- }
- if (write_color) {
- gl_mask |= GL_COLOR_BUFFER_BIT;
- glColorMask((parameter & NV097_CLEAR_SURFACE_R)
- ? GL_TRUE : GL_FALSE,
- (parameter & NV097_CLEAR_SURFACE_G)
- ? GL_TRUE : GL_FALSE,
- (parameter & NV097_CLEAR_SURFACE_B)
- ? GL_TRUE : GL_FALSE,
- (parameter & NV097_CLEAR_SURFACE_A)
- ? GL_TRUE : GL_FALSE);
- uint32_t clear_color = d->pgraph.regs[NV_PGRAPH_COLORCLEARVALUE];
-
- /* Handle RGB */
- GLfloat red, green, blue;
- switch(pg->surface_shape.color_format) {
- case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5:
- case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_O1R5G5B5:
- red = ((clear_color >> 10) & 0x1F) / 31.0f;
- green = ((clear_color >> 5) & 0x1F) / 31.0f;
- blue = (clear_color & 0x1F) / 31.0f;
- break;
- case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5:
- red = ((clear_color >> 11) & 0x1F) / 31.0f;
- green = ((clear_color >> 5) & 0x3F) / 63.0f;
- blue = (clear_color & 0x1F) / 31.0f;
- break;
- case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8:
- case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_O8R8G8B8:
- case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_Z1A7R8G8B8:
- case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_O1A7R8G8B8:
- case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8:
- red = ((clear_color >> 16) & 0xFF) / 255.0f;
- green = ((clear_color >> 8) & 0xFF) / 255.0f;
- blue = (clear_color & 0xFF) / 255.0f;
- break;
- case NV097_SET_SURFACE_FORMAT_COLOR_LE_B8:
- case NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8:
- /* Xbox D3D doesn't support clearing those */
- default:
- red = 1.0f;
- green = 0.0f;
- blue = 1.0f;
- fprintf(stderr, "CLEAR_SURFACE for color_format 0x%x unsupported",
- pg->surface_shape.color_format);
- assert(false);
- break;
- }
-
- /* Handle alpha */
- GLfloat alpha;
- switch(pg->surface_shape.color_format) {
- /* FIXME: CLEAR_SURFACE seems to work like memset, so maybe we
- * also have to clear non-alpha bits with alpha value?
- * As GL doesn't own those pixels we'd have to do this on
- * our own in xbox memory.
- */
- case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_Z1A7R8G8B8:
- case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_O1A7R8G8B8:
- alpha = ((clear_color >> 24) & 0x7F) / 127.0f;
- assert(false); /* Untested */
- break;
- case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8:
- alpha = ((clear_color >> 24) & 0xFF) / 255.0f;
- break;
- default:
- alpha = 1.0f;
- break;
- }
-
- glClearColor(red, green, blue, alpha);
- }
-
- pgraph_update_surface(d, true, write_color, write_zeta);
-
- /* FIXME: Needs confirmation */
- unsigned int xmin =
- GET_MASK(pg->regs[NV_PGRAPH_CLEARRECTX], NV_PGRAPH_CLEARRECTX_XMIN);
- unsigned int xmax =
- GET_MASK(pg->regs[NV_PGRAPH_CLEARRECTX], NV_PGRAPH_CLEARRECTX_XMAX);
- unsigned int ymin =
- GET_MASK(pg->regs[NV_PGRAPH_CLEARRECTY], NV_PGRAPH_CLEARRECTY_YMIN);
- unsigned int ymax =
- GET_MASK(pg->regs[NV_PGRAPH_CLEARRECTY], NV_PGRAPH_CLEARRECTY_YMAX);
-
- NV2A_DPRINTF(
- "------------------CLEAR 0x%x %d,%d - %d,%d %x---------------\n",
- parameter, xmin, ymin, xmax, ymax,
- d->pgraph.regs[NV_PGRAPH_COLORCLEARVALUE]);
-
- unsigned int scissor_width = xmax - xmin + 1,
- scissor_height = ymax - ymin + 1;
- pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin);
- pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height);
- ymin = pg->surface_binding_dim.height - (ymin + scissor_height);
-
- NV2A_DPRINTF("Translated clear rect to %d,%d - %d,%d\n", xmin, ymin,
- xmin + scissor_width - 1, ymin + scissor_height - 1);
-
- bool full_clear = !xmin && !ymin &&
- scissor_width >= pg->surface_binding_dim.width &&
- scissor_height >= pg->surface_binding_dim.height;
-
- pgraph_apply_scaling_factor(pg, &xmin, &ymin);
- pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height);
-
- /* FIXME: Respect window clip?!?! */
- glEnable(GL_SCISSOR_TEST);
- glScissor(xmin, ymin, scissor_width, scissor_height);
-
- /* Dither */
- /* FIXME: Maybe also disable it here? + GL implementation dependent */
- if (pg->regs[NV_PGRAPH_CONTROL_0] & NV_PGRAPH_CONTROL_0_DITHERENABLE) {
- glEnable(GL_DITHER);
- } else {
- glDisable(GL_DITHER);
- }
-
- glClear(gl_mask);
-
- glDisable(GL_SCISSOR_TEST);
-
- pgraph_set_surface_dirty(pg, write_color, write_zeta);
-
- if (pg->color_binding) {
- pg->color_binding->cleared = full_clear && write_color;
- }
- if (pg->zeta_binding) {
- pg->zeta_binding->cleared = full_clear && write_zeta;
- }
-
- pg->clearing = false;
-}
-
-DEF_METHOD(NV097, SET_CLEAR_RECT_HORIZONTAL)
-{
- pg->regs[NV_PGRAPH_CLEARRECTX] = parameter;
-}
-
-DEF_METHOD(NV097, SET_CLEAR_RECT_VERTICAL)
-{
- pg->regs[NV_PGRAPH_CLEARRECTY] = parameter;
-}
-
-DEF_METHOD_INC(NV097, SET_SPECULAR_FOG_FACTOR)
-{
- int slot = (method - NV097_SET_SPECULAR_FOG_FACTOR) / 4;
- pg->regs[NV_PGRAPH_SPECFOGFACTOR0 + slot*4] = parameter;
-}
-
-DEF_METHOD(NV097, SET_SHADER_CLIP_PLANE_MODE)
-{
- pg->regs[NV_PGRAPH_SHADERCLIPMODE] = parameter;
-}
-
-DEF_METHOD_INC(NV097, SET_COMBINER_COLOR_OCW)
-{
- int slot = (method - NV097_SET_COMBINER_COLOR_OCW) / 4;
- pg->regs[NV_PGRAPH_COMBINECOLORO0 + slot*4] = parameter;
-}
-
-DEF_METHOD(NV097, SET_COMBINER_CONTROL)
-{
- pg->regs[NV_PGRAPH_COMBINECTL] = parameter;
-}
-
-DEF_METHOD(NV097, SET_SHADOW_ZSLOPE_THRESHOLD)
-{
- pg->regs[NV_PGRAPH_SHADOWZSLOPETHRESHOLD] = parameter;
- assert(parameter == 0x7F800000); /* FIXME: Unimplemented */
-}
-
-DEF_METHOD(NV097, SET_SHADOW_DEPTH_FUNC)
-{
- SET_MASK(pg->regs[NV_PGRAPH_SHADOWCTL], NV_PGRAPH_SHADOWCTL_SHADOW_ZFUNC,
- parameter);
-}
-
-DEF_METHOD(NV097, SET_SHADER_STAGE_PROGRAM)
-{
- pg->regs[NV_PGRAPH_SHADERPROG] = parameter;
-}
-
-DEF_METHOD(NV097, SET_DOT_RGBMAPPING)
-{
- SET_MASK(pg->regs[NV_PGRAPH_SHADERCTL], 0xFFF,
- GET_MASK(parameter, 0xFFF));
-}
-
-DEF_METHOD(NV097, SET_SHADER_OTHER_STAGE_INPUT)
-{
- SET_MASK(pg->regs[NV_PGRAPH_SHADERCTL], 0xFFFF000,
- GET_MASK(parameter, 0xFFFF000));
-}
-
-DEF_METHOD_INC(NV097, SET_TRANSFORM_DATA)
-{
- int slot = (method - NV097_SET_TRANSFORM_DATA) / 4;
- pg->vertex_state_shader_v0[slot] = parameter;
-}
-
-DEF_METHOD(NV097, LAUNCH_TRANSFORM_PROGRAM)
-{
- unsigned int program_start = parameter;
- assert(program_start < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH);
- Nv2aVshProgram program;
- Nv2aVshParseResult result = nv2a_vsh_parse_program(
- &program,
- pg->program_data[program_start],
- NV2A_MAX_TRANSFORM_PROGRAM_LENGTH - program_start);
- assert(result == NV2AVPR_SUCCESS);
-
- Nv2aVshCPUXVSSExecutionState state_linkage;
- Nv2aVshExecutionState state = nv2a_vsh_emu_initialize_xss_execution_state(
- &state_linkage, (float*)pg->vsh_constants);
- memcpy(state_linkage.input_regs, pg->vertex_state_shader_v0, sizeof(pg->vertex_state_shader_v0));
-
- nv2a_vsh_emu_execute_track_context_writes(&state, &program, pg->vsh_constants_dirty);
-
- nv2a_vsh_program_destroy(&program);
-}
-
-DEF_METHOD(NV097, SET_TRANSFORM_EXECUTION_MODE)
-{
- SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_MODE,
- GET_MASK(parameter,
- NV097_SET_TRANSFORM_EXECUTION_MODE_MODE));
- SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_RANGE_MODE,
- GET_MASK(parameter,
- NV097_SET_TRANSFORM_EXECUTION_MODE_RANGE_MODE));
-}
-
-DEF_METHOD(NV097, SET_TRANSFORM_PROGRAM_CXT_WRITE_EN)
-{
- pg->enable_vertex_program_write = parameter;
-}
-
-DEF_METHOD(NV097, SET_TRANSFORM_PROGRAM_LOAD)
-{
- assert(parameter < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH);
- SET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET],
- NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR, parameter);
-}
-
-DEF_METHOD(NV097, SET_TRANSFORM_PROGRAM_START)
-{
- assert(parameter < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH);
- SET_MASK(pg->regs[NV_PGRAPH_CSV0_C],
- NV_PGRAPH_CSV0_C_CHEOPS_PROGRAM_START, parameter);
-}
-
-DEF_METHOD(NV097, SET_TRANSFORM_CONSTANT_LOAD)
-{
- assert(parameter < NV2A_VERTEXSHADER_CONSTANTS);
- SET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET],
- NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR, parameter);
-}
-
-
-void pgraph_context_switch(NV2AState *d, unsigned int channel_id)
-{
- bool channel_valid =
- d->pgraph.regs[NV_PGRAPH_CTX_CONTROL] & NV_PGRAPH_CTX_CONTROL_CHID;
- unsigned pgraph_channel_id = GET_MASK(d->pgraph.regs[NV_PGRAPH_CTX_USER], NV_PGRAPH_CTX_USER_CHID);
-
- bool valid = channel_valid && pgraph_channel_id == channel_id;
- if (!valid) {
- SET_MASK(d->pgraph.regs[NV_PGRAPH_TRAPPED_ADDR],
- NV_PGRAPH_TRAPPED_ADDR_CHID, channel_id);
-
- NV2A_DPRINTF("pgraph switching to ch %d\n", channel_id);
-
- /* TODO: hardware context switching */
- assert(!(d->pgraph.regs[NV_PGRAPH_DEBUG_3]
- & NV_PGRAPH_DEBUG_3_HW_CONTEXT_SWITCH));
-
- d->pgraph.waiting_for_context_switch = true;
- qemu_mutex_unlock(&d->pgraph.lock);
- qemu_mutex_lock_iothread();
- d->pgraph.pending_interrupts |= NV_PGRAPH_INTR_CONTEXT_SWITCH;
- nv2a_update_irq(d);
- qemu_mutex_unlock_iothread();
- qemu_mutex_lock(&d->pgraph.lock);
- }
-}
-
-static void pgraph_method_log(unsigned int subchannel,
- unsigned int graphics_class,
- unsigned int method, uint32_t parameter)
-{
- const char *method_name = "?";
- static unsigned int last = 0;
- static unsigned int count = 0;
-
- if (last == NV097_ARRAY_ELEMENT16 && method != last) {
- method_name = "NV097_ARRAY_ELEMENT16";
- trace_nv2a_pgraph_method_abbrev(subchannel, graphics_class, last,
- method_name, count);
- NV2A_GL_DPRINTF(false, "pgraph method (%d) 0x%x %s * %d", subchannel,
- last, method_name, count);
- }
-
- if (method != NV097_ARRAY_ELEMENT16) {
- uint32_t base = method;
- switch (graphics_class) {
- case NV_KELVIN_PRIMITIVE: {
- int idx = METHOD_ADDR_TO_INDEX(method);
- if (idx < ARRAY_SIZE(pgraph_kelvin_methods) &&
- pgraph_kelvin_methods[idx].handler) {
- method_name = pgraph_kelvin_methods[idx].name;
- base = pgraph_kelvin_methods[idx].base;
- }
- break;
- }
- default:
- break;
- }
-
- uint32_t offset = method - base;
- trace_nv2a_pgraph_method(subchannel, graphics_class, method,
- method_name, offset, parameter);
- NV2A_GL_DPRINTF(false,
- "pgraph method (%d): 0x%" PRIx32 " -> 0x%04" PRIx32
- " %s[%" PRId32 "] 0x%" PRIx32,
- subchannel, graphics_class, method, method_name, offset,
- parameter);
- }
-
- if (method == last) {
- count++;
- } else {
- count = 0;
- }
- last = method;
-}
-
-static void pgraph_allocate_inline_buffer_vertices(PGRAPHState *pg,
- unsigned int attr)
-{
- VertexAttribute *attribute = &pg->vertex_attributes[attr];
-
- if (attribute->inline_buffer_populated || pg->inline_buffer_length == 0) {
- return;
- }
-
- /* Now upload the previous attribute value */
- attribute->inline_buffer_populated = true;
- for (int i = 0; i < pg->inline_buffer_length; i++) {
- memcpy(&attribute->inline_buffer[i * 4], attribute->inline_value,
- sizeof(float) * 4);
- }
-}
-
-static void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg)
-{
- pgraph_check_within_begin_end_block(pg);
- assert(pg->inline_buffer_length < NV2A_MAX_BATCH_LENGTH);
-
- for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
- VertexAttribute *attribute = &pg->vertex_attributes[i];
- if (attribute->inline_buffer_populated) {
- memcpy(&attribute->inline_buffer[pg->inline_buffer_length * 4],
- attribute->inline_value, sizeof(float) * 4);
- }
- }
-
- pg->inline_buffer_length++;
-}
-
-void nv2a_gl_context_init(void)
-{
- g_nv2a_context_render = glo_context_create();
- g_nv2a_context_display = glo_context_create();
-
- glGetFloatv(GL_SMOOTH_LINE_WIDTH_RANGE, supportedSmoothLineWidthRange);
- glGetFloatv(GL_ALIASED_LINE_WIDTH_RANGE, supportedAliasedLineWidthRange);
-}
-
-void nv2a_set_surface_scale_factor(unsigned int scale)
-{
- NV2AState *d = g_nv2a;
-
- g_config.display.quality.surface_scale = scale < 1 ? 1 : scale;
-
- qemu_mutex_unlock_iothread();
-
- qemu_mutex_lock(&d->pfifo.lock);
- qatomic_set(&d->pfifo.halt, true);
- qemu_mutex_unlock(&d->pfifo.lock);
-
- qemu_mutex_lock(&d->pgraph.lock);
- qemu_event_reset(&d->pgraph.dirty_surfaces_download_complete);
- qatomic_set(&d->pgraph.download_dirty_surfaces_pending, true);
- qemu_mutex_unlock(&d->pgraph.lock);
- qemu_mutex_lock(&d->pfifo.lock);
- pfifo_kick(d);
- qemu_mutex_unlock(&d->pfifo.lock);
- qemu_event_wait(&d->pgraph.dirty_surfaces_download_complete);
-
- qemu_mutex_lock(&d->pgraph.lock);
- qemu_event_reset(&d->pgraph.flush_complete);
- qatomic_set(&d->pgraph.flush_pending, true);
- qemu_mutex_unlock(&d->pgraph.lock);
- qemu_mutex_lock(&d->pfifo.lock);
- pfifo_kick(d);
- qemu_mutex_unlock(&d->pfifo.lock);
- qemu_event_wait(&d->pgraph.flush_complete);
-
- qemu_mutex_lock(&d->pfifo.lock);
- qatomic_set(&d->pfifo.halt, false);
- pfifo_kick(d);
- qemu_mutex_unlock(&d->pfifo.lock);
-
- qemu_mutex_lock_iothread();
-}
-
-unsigned int nv2a_get_surface_scale_factor(void)
-{
- return g_nv2a->pgraph.surface_scale_factor;
-}
-
-static void pgraph_reload_surface_scale_factor(NV2AState *d)
-{
- int factor = g_config.display.quality.surface_scale;
- d->pgraph.surface_scale_factor = factor < 1 ? 1 : factor;
-}
-
-void pgraph_init(NV2AState *d)
-{
- int i;
-
- g_nv2a = d;
- PGRAPHState *pg = &d->pgraph;
-
- pgraph_reload_surface_scale_factor(d);
-
- pg->frame_time = 0;
- pg->draw_time = 0;
- pg->downloads_pending = false;
-
- qemu_mutex_init(&pg->lock);
- qemu_mutex_init(&pg->shader_cache_lock);
- qemu_event_init(&pg->gl_sync_complete, false);
- qemu_event_init(&pg->downloads_complete, false);
- qemu_event_init(&pg->dirty_surfaces_download_complete, false);
- qemu_event_init(&pg->flush_complete, false);
- qemu_event_init(&pg->shader_cache_writeback_complete, false);
-
- /* fire up opengl */
- glo_set_current(g_nv2a_context_render);
-
-#ifdef DEBUG_NV2A_GL
- gl_debug_initialize();
-#endif
-
- /* DXT textures */
- assert(glo_check_extension("GL_EXT_texture_compression_s3tc"));
- /* Internal RGB565 texture format */
- assert(glo_check_extension("GL_ARB_ES2_compatibility"));
-
- GLint max_vertex_attributes;
- glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_vertex_attributes);
- assert(max_vertex_attributes >= NV2A_VERTEXSHADER_ATTRIBUTES);
-
-
- glGenFramebuffers(1, &pg->gl_framebuffer);
- glBindFramebuffer(GL_FRAMEBUFFER, pg->gl_framebuffer);
-
- pgraph_init_render_to_texture(d);
- QTAILQ_INIT(&pg->surfaces);
-
- QSIMPLEQ_INIT(&pg->report_queue);
-
- //glPolygonMode( GL_FRONT_AND_BACK, GL_LINE );
-
- // Initialize texture cache
- const size_t texture_cache_size = 512;
- lru_init(&pg->texture_cache);
- pg->texture_cache_entries = malloc(texture_cache_size * sizeof(TextureLruNode));
- assert(pg->texture_cache_entries != NULL);
- for (i = 0; i < texture_cache_size; i++) {
- lru_add_free(&pg->texture_cache, &pg->texture_cache_entries[i].node);
- }
-
- pg->texture_cache.init_node = texture_cache_entry_init;
- pg->texture_cache.compare_nodes = texture_cache_entry_compare;
- pg->texture_cache.post_node_evict = texture_cache_entry_post_evict;
-
- // Initialize element cache
- const size_t element_cache_size = 50*1024;
- lru_init(&pg->element_cache);
- pg->element_cache_entries = malloc(element_cache_size * sizeof(VertexLruNode));
- assert(pg->element_cache_entries != NULL);
- GLuint element_cache_buffers[element_cache_size];
- glGenBuffers(element_cache_size, element_cache_buffers);
- for (i = 0; i < element_cache_size; i++) {
- pg->element_cache_entries[i].gl_buffer = element_cache_buffers[i];
- lru_add_free(&pg->element_cache, &pg->element_cache_entries[i].node);
- }
-
- pg->element_cache.init_node = vertex_cache_entry_init;
- pg->element_cache.compare_nodes = vertex_cache_entry_compare;
-
- shader_cache_init(pg);
-
- pg->material_alpha = 0.0f;
- SET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], NV_PGRAPH_CONTROL_3_SHADEMODE,
- NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH);
- pg->primitive_mode = PRIM_TYPE_INVALID;
-
- for (i=0; ivertex_attributes[i];
- glGenBuffers(1, &attribute->gl_inline_buffer);
- attribute->inline_buffer = (float*)g_malloc(NV2A_MAX_BATCH_LENGTH
- * sizeof(float) * 4);
- attribute->inline_buffer_populated = false;
- }
- glGenBuffers(1, &pg->gl_inline_array_buffer);
-
- glGenBuffers(1, &pg->gl_memory_buffer);
- glBindBuffer(GL_ARRAY_BUFFER, pg->gl_memory_buffer);
- glBufferData(GL_ARRAY_BUFFER, memory_region_size(d->vram),
- NULL, GL_DYNAMIC_DRAW);
-
- glGenVertexArrays(1, &pg->gl_vertex_array);
- glBindVertexArray(pg->gl_vertex_array);
-
- assert(glGetError() == GL_NO_ERROR);
-
- glo_set_current(g_nv2a_context_display);
- pgraph_init_display_renderer(d);
-
- glo_set_current(NULL);
-}
-
-void pgraph_destroy(PGRAPHState *pg)
-{
- qemu_mutex_destroy(&pg->lock);
- qemu_mutex_destroy(&pg->shader_cache_lock);
-
- glo_set_current(g_nv2a_context_render);
-
- // TODO: clear out surfaces
-
- glDeleteFramebuffers(1, &pg->gl_framebuffer);
-
- // Clear out shader cache
- shader_write_cache_reload_list(pg);
- free(pg->shader_cache_entries);
-
- // Clear out texture cache
- lru_flush(&pg->texture_cache);
- free(pg->texture_cache_entries);
-
- glo_set_current(NULL);
- glo_context_destroy(g_nv2a_context_render);
- glo_context_destroy(g_nv2a_context_display);
-}
-
-static void pgraph_shader_update_constants(PGRAPHState *pg,
- ShaderBinding *binding,
- bool binding_changed,
- bool vertex_program,
- bool fixed_function)
-{
- int i, j;
-
- /* update combiner constants */
- for (i = 0; i < 9; i++) {
- uint32_t constant[2];
- if (i == 8) {
- /* final combiner */
- constant[0] = pg->regs[NV_PGRAPH_SPECFOGFACTOR0];
- constant[1] = pg->regs[NV_PGRAPH_SPECFOGFACTOR1];
- } else {
- constant[0] = pg->regs[NV_PGRAPH_COMBINEFACTOR0 + i * 4];
- constant[1] = pg->regs[NV_PGRAPH_COMBINEFACTOR1 + i * 4];
- }
-
- for (j = 0; j < 2; j++) {
- GLint loc = binding->psh_constant_loc[i][j];
- if (loc != -1) {
- float value[4];
- value[0] = (float) ((constant[j] >> 16) & 0xFF) / 255.0f;
- value[1] = (float) ((constant[j] >> 8) & 0xFF) / 255.0f;
- value[2] = (float) (constant[j] & 0xFF) / 255.0f;
- value[3] = (float) ((constant[j] >> 24) & 0xFF) / 255.0f;
-
- glUniform4fv(loc, 1, value);
- }
- }
- }
- if (binding->alpha_ref_loc != -1) {
- float alpha_ref = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
- NV_PGRAPH_CONTROL_0_ALPHAREF) / 255.0;
- glUniform1f(binding->alpha_ref_loc, alpha_ref);
- }
-
-
- /* For each texture stage */
- for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
- GLint loc;
-
- /* Bump luminance only during stages 1 - 3 */
- if (i > 0) {
- loc = binding->bump_mat_loc[i];
- if (loc != -1) {
- float m[4];
- m[0] = *(float*)&pg->regs[NV_PGRAPH_BUMPMAT00 + 4 * (i - 1)];
- m[1] = *(float*)&pg->regs[NV_PGRAPH_BUMPMAT01 + 4 * (i - 1)];
- m[2] = *(float*)&pg->regs[NV_PGRAPH_BUMPMAT10 + 4 * (i - 1)];
- m[3] = *(float*)&pg->regs[NV_PGRAPH_BUMPMAT11 + 4 * (i - 1)];
- glUniformMatrix2fv(loc, 1, GL_FALSE, m);
- }
- loc = binding->bump_scale_loc[i];
- if (loc != -1) {
- glUniform1f(loc, *(float*)&pg->regs[
- NV_PGRAPH_BUMPSCALE1 + (i - 1) * 4]);
- }
- loc = binding->bump_offset_loc[i];
- if (loc != -1) {
- glUniform1f(loc, *(float*)&pg->regs[
- NV_PGRAPH_BUMPOFFSET1 + (i - 1) * 4]);
- }
- }
-
- loc = pg->shader_binding->tex_scale_loc[i];
- if (loc != -1) {
- assert(pg->texture_binding[i] != NULL);
- glUniform1f(loc, (float)pg->texture_binding[i]->scale);
- }
- }
-
- if (binding->fog_color_loc != -1) {
- uint32_t fog_color = pg->regs[NV_PGRAPH_FOGCOLOR];
- glUniform4f(binding->fog_color_loc,
- GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_RED) / 255.0,
- GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_GREEN) / 255.0,
- GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_BLUE) / 255.0,
- GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_ALPHA) / 255.0);
- }
- if (binding->fog_param_loc[0] != -1) {
- glUniform1f(binding->fog_param_loc[0],
- *(float*)&pg->regs[NV_PGRAPH_FOGPARAM0]);
- }
- if (binding->fog_param_loc[1] != -1) {
- glUniform1f(binding->fog_param_loc[1],
- *(float*)&pg->regs[NV_PGRAPH_FOGPARAM1]);
- }
-
- float zmax;
- switch (pg->surface_shape.zeta_format) {
- case NV097_SET_SURFACE_FORMAT_ZETA_Z16:
- zmax = pg->surface_shape.z_format ? f16_max : (float)0xFFFF;
- break;
- case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8:
- zmax = pg->surface_shape.z_format ? f24_max : (float)0xFFFFFF;
- break;
- default:
- assert(0);
- }
-
- if (fixed_function) {
- /* update lighting constants */
- struct {
- uint32_t* v;
- bool* dirty;
- GLint* locs;
- size_t len;
- } lighting_arrays[] = {
- {&pg->ltctxa[0][0], &pg->ltctxa_dirty[0], binding->ltctxa_loc, NV2A_LTCTXA_COUNT},
- {&pg->ltctxb[0][0], &pg->ltctxb_dirty[0], binding->ltctxb_loc, NV2A_LTCTXB_COUNT},
- {&pg->ltc1[0][0], &pg->ltc1_dirty[0], binding->ltc1_loc, NV2A_LTC1_COUNT},
- };
-
- for (i=0; ilight_infinite_half_vector_loc[i];
- if (loc != -1) {
- glUniform3fv(loc, 1, pg->light_infinite_half_vector[i]);
- }
- loc = binding->light_infinite_direction_loc[i];
- if (loc != -1) {
- glUniform3fv(loc, 1, pg->light_infinite_direction[i]);
- }
-
- loc = binding->light_local_position_loc[i];
- if (loc != -1) {
- glUniform3fv(loc, 1, pg->light_local_position[i]);
- }
- loc = binding->light_local_attenuation_loc[i];
- if (loc != -1) {
- glUniform3fv(loc, 1, pg->light_local_attenuation[i]);
- }
- }
-
- /* estimate the viewport by assuming it matches the surface ... */
- unsigned int aa_width = 1, aa_height = 1;
- pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height);
-
- float m11 = 0.5 * (pg->surface_binding_dim.width/aa_width);
- float m22 = -0.5 * (pg->surface_binding_dim.height/aa_height);
- float m33 = zmax;
- float m41 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][0];
- float m42 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][1];
-
- float invViewport[16] = {
- 1.0/m11, 0, 0, 0,
- 0, 1.0/m22, 0, 0,
- 0, 0, 1.0/m33, 0,
- -1.0+m41/m11, 1.0+m42/m22, 0, 1.0
- };
-
- if (binding->inv_viewport_loc != -1) {
- glUniformMatrix4fv(binding->inv_viewport_loc,
- 1, GL_FALSE, &invViewport[0]);
- }
- }
-
- /* update vertex program constants */
- for (i=0; ivsh_constants_dirty[i] && !binding_changed) continue;
-
- GLint loc = binding->vsh_constant_loc[i];
- if ((loc != -1) &&
- memcmp(binding->vsh_constants[i], pg->vsh_constants[i],
- sizeof(pg->vsh_constants[1]))) {
- glUniform4fv(loc, 1, (const GLfloat *)pg->vsh_constants[i]);
- memcpy(binding->vsh_constants[i], pg->vsh_constants[i],
- sizeof(pg->vsh_constants[i]));
- }
-
- pg->vsh_constants_dirty[i] = false;
- }
-
- if (binding->surface_size_loc != -1) {
- unsigned int aa_width = 1, aa_height = 1;
- pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height);
- glUniform2f(binding->surface_size_loc,
- pg->surface_binding_dim.width / aa_width,
- pg->surface_binding_dim.height / aa_height);
- }
-
- if (binding->clip_range_loc != -1) {
- float zclip_min = *(float*)&pg->regs[NV_PGRAPH_ZCLIPMIN] / zmax * 2.0 - 1.0;
- float zclip_max = *(float*)&pg->regs[NV_PGRAPH_ZCLIPMAX] / zmax * 2.0 - 1.0;
- glUniform4f(binding->clip_range_loc, 0, zmax, zclip_min, zclip_max);
- }
-
- /* Clipping regions */
- unsigned int max_gl_width = pg->surface_binding_dim.width;
- unsigned int max_gl_height = pg->surface_binding_dim.height;
- pgraph_apply_scaling_factor(pg, &max_gl_width, &max_gl_height);
-
- for (i = 0; i < 8; i++) {
- uint32_t x = pg->regs[NV_PGRAPH_WINDOWCLIPX0 + i * 4];
- unsigned int x_min = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMIN);
- unsigned int x_max = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMAX) + 1;
- uint32_t y = pg->regs[NV_PGRAPH_WINDOWCLIPY0 + i * 4];
- unsigned int y_min = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMIN);
- unsigned int y_max = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMAX) + 1;
- pgraph_apply_anti_aliasing_factor(pg, &x_min, &y_min);
- pgraph_apply_anti_aliasing_factor(pg, &x_max, &y_max);
-
- pgraph_apply_scaling_factor(pg, &x_min, &y_min);
- pgraph_apply_scaling_factor(pg, &x_max, &y_max);
-
- /* Translate for the GL viewport origin */
- int y_min_xlat = MAX((int)max_gl_height - (int)y_max, 0);
- int y_max_xlat = MIN((int)max_gl_height - (int)y_min, max_gl_height);
-
- glUniform4i(pg->shader_binding->clip_region_loc[i],
- x_min, y_min_xlat, x_max, y_max_xlat);
- }
-
- if (binding->material_alpha_loc != -1) {
- glUniform1f(binding->material_alpha_loc, pg->material_alpha);
- }
-}
-
-static bool pgraph_bind_shaders_test_dirty(PGRAPHState *pg)
-{
- #define CR_1(reg) CR_x(reg, 1)
- #define CR_4(reg) CR_x(reg, 4)
- #define CR_8(reg) CR_x(reg, 8)
- #define CF(src, name) CF_x(typeof(src), (&src), name, 1)
- #define CFA(src, name) CF_x(typeof(src[0]), src, name, ARRAY_SIZE(src))
- #define CNAME(name) reg_check__ ## name
- #define CX_x__define(type, name, x) static type CNAME(name)[x];
- #define CR_x__define(reg, x) CX_x__define(uint32_t, reg, x)
- #define CF_x__define(type, src, name, x) CX_x__define(type, name, x)
- #define CR_x__check(reg, x) \
- for (int i = 0; i < x; i++) { if (pg->regs[reg+i*4] != CNAME(reg)[i]) goto dirty; }
- #define CF_x__check(type, src, name, x) \
- for (int i = 0; i < x; i++) { if (src[i] != CNAME(name)[i]) goto dirty; }
- #define CR_x__update(reg, x) \
- for (int i = 0; i < x; i++) { CNAME(reg)[i] = pg->regs[reg+i*4]; }
- #define CF_x__update(type, src, name, x) \
- for (int i = 0; i < x; i++) { CNAME(name)[i] = src[i]; }
-
- #define DIRTY_REGS \
- CR_1(NV_PGRAPH_COMBINECTL) \
- CR_1(NV_PGRAPH_SHADERCTL) \
- CR_1(NV_PGRAPH_SHADOWCTL) \
- CR_1(NV_PGRAPH_COMBINESPECFOG0) \
- CR_1(NV_PGRAPH_COMBINESPECFOG1) \
- CR_1(NV_PGRAPH_CONTROL_0) \
- CR_1(NV_PGRAPH_CONTROL_3) \
- CR_1(NV_PGRAPH_CSV0_C) \
- CR_1(NV_PGRAPH_CSV0_D) \
- CR_1(NV_PGRAPH_CSV1_A) \
- CR_1(NV_PGRAPH_CSV1_B) \
- CR_1(NV_PGRAPH_SETUPRASTER) \
- CR_1(NV_PGRAPH_SHADERPROG) \
- CR_8(NV_PGRAPH_COMBINECOLORI0) \
- CR_8(NV_PGRAPH_COMBINECOLORO0) \
- CR_8(NV_PGRAPH_COMBINEALPHAI0) \
- CR_8(NV_PGRAPH_COMBINEALPHAO0) \
- CR_8(NV_PGRAPH_COMBINEFACTOR0) \
- CR_8(NV_PGRAPH_COMBINEFACTOR1) \
- CR_1(NV_PGRAPH_SHADERCLIPMODE) \
- CR_4(NV_PGRAPH_TEXCTL0_0) \
- CR_4(NV_PGRAPH_TEXFMT0) \
- CR_4(NV_PGRAPH_TEXFILTER0) \
- CR_8(NV_PGRAPH_WINDOWCLIPX0) \
- CR_8(NV_PGRAPH_WINDOWCLIPY0) \
- CF(pg->primitive_mode, primitive_mode) \
- CF(pg->surface_scale_factor, surface_scale_factor) \
- CF(pg->compressed_attrs, compressed_attrs) \
- CFA(pg->texture_matrix_enable, texture_matrix_enable)
-
- #define CR_x(reg, x) CR_x__define(reg, x)
- #define CF_x(type, src, name, x) CF_x__define(type, src, name, x)
- DIRTY_REGS
- #undef CR_x
- #undef CF_x
-
- #define CR_x(reg, x) CR_x__check(reg, x)
- #define CF_x(type, src, name, x) CF_x__check(type, src, name, x)
- DIRTY_REGS
- #undef CR_x
- #undef CF_x
- return false;
-
-dirty:
- #define CR_x(reg, x) CR_x__update(reg, x)
- #define CF_x(type, src, name, x) CF_x__update(type, src, name, x)
- DIRTY_REGS
- #undef CR_x
- #undef CF_x
- return true;
-}
-
-static void pgraph_bind_shaders(PGRAPHState *pg)
-{
- int i, j;
-
- bool vertex_program = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D],
- NV_PGRAPH_CSV0_D_MODE) == 2;
-
- bool fixed_function = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D],
- NV_PGRAPH_CSV0_D_MODE) == 0;
-
- int program_start = GET_MASK(pg->regs[NV_PGRAPH_CSV0_C],
- NV_PGRAPH_CSV0_C_CHEOPS_PROGRAM_START);
-
- NV2A_GL_DGROUP_BEGIN("%s (VP: %s FFP: %s)", __func__,
- vertex_program ? "yes" : "no",
- fixed_function ? "yes" : "no");
-
- bool binding_changed = false;
- if (!pgraph_bind_shaders_test_dirty(pg) && !pg->program_data_dirty) {
- nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND_NOTDIRTY);
- goto update_constants;
- }
-
- pg->program_data_dirty = false;
-
- ShaderBinding* old_binding = pg->shader_binding;
-
- ShaderState state;
- memset(&state, 0, sizeof(ShaderState));
-
- state.surface_scale_factor = pg->surface_scale_factor;
-
- state.compressed_attrs = pg->compressed_attrs;
-
- /* register combiner stuff */
- state.psh.window_clip_exclusive = pg->regs[NV_PGRAPH_SETUPRASTER]
- & NV_PGRAPH_SETUPRASTER_WINDOWCLIPTYPE;
- state.psh.combiner_control = pg->regs[NV_PGRAPH_COMBINECTL];
- state.psh.shader_stage_program = pg->regs[NV_PGRAPH_SHADERPROG];
- state.psh.other_stage_input = pg->regs[NV_PGRAPH_SHADERCTL];
- state.psh.final_inputs_0 = pg->regs[NV_PGRAPH_COMBINESPECFOG0];
- state.psh.final_inputs_1 = pg->regs[NV_PGRAPH_COMBINESPECFOG1];
-
- state.psh.alpha_test = pg->regs[NV_PGRAPH_CONTROL_0]
- & NV_PGRAPH_CONTROL_0_ALPHATESTENABLE;
- state.psh.alpha_func = (enum PshAlphaFunc)GET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
- NV_PGRAPH_CONTROL_0_ALPHAFUNC);
-
- state.psh.point_sprite = pg->regs[NV_PGRAPH_SETUPRASTER] &
- NV_PGRAPH_SETUPRASTER_POINTSMOOTHENABLE;
-
- state.psh.shadow_depth_func = (enum PshShadowDepthFunc)GET_MASK(
- pg->regs[NV_PGRAPH_SHADOWCTL], NV_PGRAPH_SHADOWCTL_SHADOW_ZFUNC);
-
- state.fixed_function = fixed_function;
-
- /* fixed function stuff */
- if (fixed_function) {
- state.skinning = (enum VshSkinning)GET_MASK(pg->regs[NV_PGRAPH_CSV0_D],
- NV_PGRAPH_CSV0_D_SKIN);
- state.lighting = GET_MASK(pg->regs[NV_PGRAPH_CSV0_C],
- NV_PGRAPH_CSV0_C_LIGHTING);
- state.normalization = pg->regs[NV_PGRAPH_CSV0_C]
- & NV_PGRAPH_CSV0_C_NORMALIZATION_ENABLE;
-
- /* color material */
- state.emission_src = (enum MaterialColorSource)GET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_EMISSION);
- state.ambient_src = (enum MaterialColorSource)GET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_AMBIENT);
- state.diffuse_src = (enum MaterialColorSource)GET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_DIFFUSE);
- state.specular_src = (enum MaterialColorSource)GET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_SPECULAR);
- }
-
- /* vertex program stuff */
- state.vertex_program = vertex_program,
- state.z_perspective = pg->regs[NV_PGRAPH_CONTROL_0]
- & NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE;
-
- state.point_params_enable = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D],
- NV_PGRAPH_CSV0_D_POINTPARAMSENABLE);
- state.point_size =
- GET_MASK(pg->regs[NV_PGRAPH_POINTSIZE], NV097_SET_POINT_SIZE_V) / 8.0f;
- if (state.point_params_enable) {
- for (int i = 0; i < 8; i++) {
- state.point_params[i] = pg->point_params[i];
- }
- }
-
- /* geometry shader stuff */
- state.primitive_mode = (enum ShaderPrimitiveMode)pg->primitive_mode;
- state.polygon_front_mode = (enum ShaderPolygonMode)GET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
- NV_PGRAPH_SETUPRASTER_FRONTFACEMODE);
- state.polygon_back_mode = (enum ShaderPolygonMode)GET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
- NV_PGRAPH_SETUPRASTER_BACKFACEMODE);
-
- state.smooth_shading = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_3],
- NV_PGRAPH_CONTROL_3_SHADEMODE) ==
- NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH;
- state.psh.smooth_shading = state.smooth_shading;
-
- state.program_length = 0;
-
- if (vertex_program) {
- // copy in vertex program tokens
- for (i = program_start; i < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH; i++) {
- uint32_t *cur_token = (uint32_t*)&pg->program_data[i];
- memcpy(&state.program_data[state.program_length],
- cur_token,
- VSH_TOKEN_SIZE * sizeof(uint32_t));
- state.program_length++;
-
- if (vsh_get_field(cur_token, FLD_FINAL)) {
- break;
- }
- }
- }
-
- /* Texgen */
- for (i = 0; i < 4; i++) {
- unsigned int reg = (i < 2) ? NV_PGRAPH_CSV1_A : NV_PGRAPH_CSV1_B;
- for (j = 0; j < 4; j++) {
- unsigned int masks[] = {
- (i % 2) ? NV_PGRAPH_CSV1_A_T1_S : NV_PGRAPH_CSV1_A_T0_S,
- (i % 2) ? NV_PGRAPH_CSV1_A_T1_T : NV_PGRAPH_CSV1_A_T0_T,
- (i % 2) ? NV_PGRAPH_CSV1_A_T1_R : NV_PGRAPH_CSV1_A_T0_R,
- (i % 2) ? NV_PGRAPH_CSV1_A_T1_Q : NV_PGRAPH_CSV1_A_T0_Q
- };
- state.texgen[i][j] = (enum VshTexgen)GET_MASK(pg->regs[reg], masks[j]);
- }
- }
-
- /* Fog */
- state.fog_enable = pg->regs[NV_PGRAPH_CONTROL_3]
- & NV_PGRAPH_CONTROL_3_FOGENABLE;
- if (state.fog_enable) {
- /*FIXME: Use CSV0_D? */
- state.fog_mode = (enum VshFogMode)GET_MASK(pg->regs[NV_PGRAPH_CONTROL_3],
- NV_PGRAPH_CONTROL_3_FOG_MODE);
- state.foggen = (enum VshFoggen)GET_MASK(pg->regs[NV_PGRAPH_CSV0_D],
- NV_PGRAPH_CSV0_D_FOGGENMODE);
- } else {
- /* FIXME: Do we still pass the fogmode? */
- state.fog_mode = (enum VshFogMode)0;
- state.foggen = (enum VshFoggen)0;
- }
-
- /* Texture matrices */
- for (i = 0; i < 4; i++) {
- state.texture_matrix_enable[i] = pg->texture_matrix_enable[i];
- }
-
- /* Lighting */
- if (state.lighting) {
- for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
- state.light[i] = (enum VshLight)GET_MASK(pg->regs[NV_PGRAPH_CSV0_D],
- NV_PGRAPH_CSV0_D_LIGHT0 << (i * 2));
- }
- }
-
- /* Copy content of enabled combiner stages */
- int num_stages = pg->regs[NV_PGRAPH_COMBINECTL] & 0xFF;
- for (i = 0; i < num_stages; i++) {
- state.psh.rgb_inputs[i] = pg->regs[NV_PGRAPH_COMBINECOLORI0 + i * 4];
- state.psh.rgb_outputs[i] = pg->regs[NV_PGRAPH_COMBINECOLORO0 + i * 4];
- state.psh.alpha_inputs[i] = pg->regs[NV_PGRAPH_COMBINEALPHAI0 + i * 4];
- state.psh.alpha_outputs[i] = pg->regs[NV_PGRAPH_COMBINEALPHAO0 + i * 4];
- //constant_0[i] = pg->regs[NV_PGRAPH_COMBINEFACTOR0 + i * 4];
- //constant_1[i] = pg->regs[NV_PGRAPH_COMBINEFACTOR1 + i * 4];
- }
-
- for (i = 0; i < 4; i++) {
- for (j = 0; j < 4; j++) {
- state.psh.compare_mode[i][j] =
- (pg->regs[NV_PGRAPH_SHADERCLIPMODE] >> (4 * i + j)) & 1;
- }
-
- uint32_t ctl_0 = pg->regs[NV_PGRAPH_TEXCTL0_0 + i*4];
- bool enabled = pgraph_is_texture_stage_active(pg, i) &&
- (ctl_0 & NV_PGRAPH_TEXCTL0_0_ENABLE);
- if (!enabled) {
- continue;
- }
-
- state.psh.alphakill[i] = ctl_0 & NV_PGRAPH_TEXCTL0_0_ALPHAKILLEN;
-
- uint32_t tex_fmt = pg->regs[NV_PGRAPH_TEXFMT0 + i*4];
- unsigned int color_format = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_COLOR);
- ColorFormatInfo f = kelvin_color_format_map[color_format];
- state.psh.rect_tex[i] = f.linear;
-
- uint32_t border_source = GET_MASK(tex_fmt,
- NV_PGRAPH_TEXFMT0_BORDER_SOURCE);
- bool cubemap = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE);
- state.psh.border_logical_size[i][0] = 0.0f;
- state.psh.border_logical_size[i][1] = 0.0f;
- state.psh.border_logical_size[i][2] = 0.0f;
- if (border_source != NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR) {
- if (!f.linear && !cubemap) {
- // The actual texture will be (at least) double the reported
- // size and shifted by a 4 texel border but texture coordinates
- // will still be relative to the reported size.
- unsigned int reported_width =
- 1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_U);
- unsigned int reported_height =
- 1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_V);
- unsigned int reported_depth =
- 1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_P);
-
- state.psh.border_logical_size[i][0] = reported_width;
- state.psh.border_logical_size[i][1] = reported_height;
- state.psh.border_logical_size[i][2] = reported_depth;
-
- if (reported_width < 8) {
- state.psh.border_inv_real_size[i][0] = 0.0625f;
- } else {
- state.psh.border_inv_real_size[i][0] =
- 1.0f / (reported_width * 2.0f);
- }
- if (reported_height < 8) {
- state.psh.border_inv_real_size[i][1] = 0.0625f;
- } else {
- state.psh.border_inv_real_size[i][1] =
- 1.0f / (reported_height * 2.0f);
- }
- if (reported_depth < 8) {
- state.psh.border_inv_real_size[i][2] = 0.0625f;
- } else {
- state.psh.border_inv_real_size[i][2] =
- 1.0f / (reported_depth * 2.0f);
- }
- } else {
- NV2A_UNIMPLEMENTED("Border source texture with linear %d cubemap %d",
- f.linear, cubemap);
- }
- }
-
- /* Keep track of whether texture data has been loaded as signed
- * normalized integers or not. This dictates whether or not we will need
- * to re-map in fragment shader for certain texture modes (e.g.
- * bumpenvmap).
- *
- * FIXME: When signed texture data is loaded as unsigned and remapped in
- * fragment shader, there may be interpolation artifacts. Fix this to
- * support signed textures more appropriately.
- */
- state.psh.snorm_tex[i] = (f.gl_internal_format == GL_RGB8_SNORM)
- || (f.gl_internal_format == GL_RG8_SNORM);
-
- state.psh.shadow_map[i] = f.depth;
-
- uint32_t filter = pg->regs[NV_PGRAPH_TEXFILTER0 + i*4];
- unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN);
- enum ConvolutionFilter kernel = CONVOLUTION_FILTER_DISABLED;
- /* FIXME: We do not distinguish between min and mag when
- * performing convolution. Just use it if specified for min (common AA
- * case).
- */
- if (min_filter == NV_PGRAPH_TEXFILTER0_MIN_CONVOLUTION_2D_LOD0) {
- int k = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL);
- assert(k == NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL_QUINCUNX ||
- k == NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL_GAUSSIAN_3);
- kernel = (enum ConvolutionFilter)k;
- }
-
- state.psh.conv_tex[i] = kernel;
- }
-
- uint64_t shader_state_hash = fast_hash((uint8_t*) &state, sizeof(ShaderState));
- qemu_mutex_lock(&pg->shader_cache_lock);
- LruNode *node = lru_lookup(&pg->shader_cache, shader_state_hash, &state);
- ShaderLruNode *snode = container_of(node, ShaderLruNode, node);
- if (snode->binding || shader_load_from_memory(snode)) {
- pg->shader_binding = snode->binding;
- } else {
- pg->shader_binding = generate_shaders(&state);
- nv2a_profile_inc_counter(NV2A_PROF_SHADER_GEN);
-
- /* cache it */
- snode->binding = pg->shader_binding;
- if (g_config.perf.cache_shaders) {
- shader_cache_to_disk(snode);
- }
- }
-
- qemu_mutex_unlock(&pg->shader_cache_lock);
-
- binding_changed = (pg->shader_binding != old_binding);
- if (binding_changed) {
- nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND);
- glUseProgram(pg->shader_binding->gl_program);
- }
-
-update_constants:
- pgraph_shader_update_constants(pg, pg->shader_binding, binding_changed,
- vertex_program, fixed_function);
-
- NV2A_GL_DGROUP_END();
-}
-
-static bool pgraph_framebuffer_dirty(PGRAPHState *pg)
-{
- bool shape_changed = memcmp(&pg->surface_shape, &pg->last_surface_shape,
- sizeof(SurfaceShape)) != 0;
- if (!shape_changed || (!pg->surface_shape.color_format
- && !pg->surface_shape.zeta_format)) {
- return false;
- }
- return true;
-}
-
-static bool pgraph_color_write_enabled(PGRAPHState *pg)
-{
- return pg->regs[NV_PGRAPH_CONTROL_0] & (
- NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE
- | NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE
- | NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE
- | NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE);
-}
-
-static bool pgraph_zeta_write_enabled(PGRAPHState *pg)
-{
- return pg->regs[NV_PGRAPH_CONTROL_0] & (
- NV_PGRAPH_CONTROL_0_ZWRITEENABLE
- | NV_PGRAPH_CONTROL_0_STENCIL_WRITE_ENABLE);
-}
-
-static void pgraph_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta)
-{
- NV2A_DPRINTF("pgraph_set_surface_dirty(%d, %d) -- %d %d\n",
- color, zeta,
- pgraph_color_write_enabled(pg), pgraph_zeta_write_enabled(pg));
- /* FIXME: Does this apply to CLEARs too? */
- color = color && pgraph_color_write_enabled(pg);
- zeta = zeta && pgraph_zeta_write_enabled(pg);
- pg->surface_color.draw_dirty |= color;
- pg->surface_zeta.draw_dirty |= zeta;
-
- if (pg->color_binding) {
- pg->color_binding->draw_dirty |= color;
- pg->color_binding->frame_time = pg->frame_time;
- pg->color_binding->cleared = false;
-
- }
-
- if (pg->zeta_binding) {
- pg->zeta_binding->draw_dirty |= zeta;
- pg->zeta_binding->frame_time = pg->frame_time;
- pg->zeta_binding->cleared = false;
-
- }
-}
-
-static GLuint pgraph_compile_shader(const char *vs_src, const char *fs_src)
-{
- GLint status;
- char err_buf[512];
-
- // Compile vertex shader
- GLuint vs = glCreateShader(GL_VERTEX_SHADER);
- glShaderSource(vs, 1, &vs_src, NULL);
- glCompileShader(vs);
- glGetShaderiv(vs, GL_COMPILE_STATUS, &status);
- if (status != GL_TRUE) {
- glGetShaderInfoLog(vs, sizeof(err_buf), NULL, err_buf);
- err_buf[sizeof(err_buf)-1] = '\0';
- fprintf(stderr, "Vertex shader compilation failed: %s\n", err_buf);
- exit(1);
- }
-
- // Compile fragment shader
- GLuint fs = glCreateShader(GL_FRAGMENT_SHADER);
- glShaderSource(fs, 1, &fs_src, NULL);
- glCompileShader(fs);
- glGetShaderiv(fs, GL_COMPILE_STATUS, &status);
- if (status != GL_TRUE) {
- glGetShaderInfoLog(fs, sizeof(err_buf), NULL, err_buf);
- err_buf[sizeof(err_buf)-1] = '\0';
- fprintf(stderr, "Fragment shader compilation failed: %s\n", err_buf);
- exit(1);
- }
-
- // Link vertex and fragment shaders
- GLuint prog = glCreateProgram();
- glAttachShader(prog, vs);
- glAttachShader(prog, fs);
- glLinkProgram(prog);
- glUseProgram(prog);
-
- // Flag shaders for deletion (will still be retained for lifetime of prog)
- glDeleteShader(vs);
- glDeleteShader(fs);
-
- return prog;
-}
-
-static void pgraph_init_render_to_texture(NV2AState *d)
-{
- struct PGRAPHState *pg = &d->pgraph;
- const char *vs =
- "#version 330\n"
- "void main()\n"
- "{\n"
- " float x = -1.0 + float((gl_VertexID & 1) << 2);\n"
- " float y = -1.0 + float((gl_VertexID & 2) << 1);\n"
- " gl_Position = vec4(x, y, 0, 1);\n"
- "}\n";
- const char *fs =
- "#version 330\n"
- "uniform sampler2D tex;\n"
- "uniform vec2 surface_size;\n"
- "layout(location = 0) out vec4 out_Color;\n"
- "void main()\n"
- "{\n"
- " vec2 texCoord;\n"
- " texCoord.x = gl_FragCoord.x;\n"
- " texCoord.y = (surface_size.y - gl_FragCoord.y)\n"
- " + (textureSize(tex,0).y - surface_size.y);\n"
- " texCoord /= textureSize(tex,0).xy;\n"
- " out_Color.rgba = texture(tex, texCoord);\n"
- "}\n";
-
- pg->s2t_rndr.prog = pgraph_compile_shader(vs, fs);
- pg->s2t_rndr.tex_loc = glGetUniformLocation(pg->s2t_rndr.prog, "tex");
- pg->s2t_rndr.surface_size_loc = glGetUniformLocation(pg->s2t_rndr.prog,
- "surface_size");
-
- glGenVertexArrays(1, &pg->s2t_rndr.vao);
- glBindVertexArray(pg->s2t_rndr.vao);
- glGenBuffers(1, &pg->s2t_rndr.vbo);
- glBindBuffer(GL_ARRAY_BUFFER, pg->s2t_rndr.vbo);
- glBufferData(GL_ARRAY_BUFFER, 0, NULL, GL_STATIC_DRAW);
- glGenFramebuffers(1, &pg->s2t_rndr.fbo);
-}
-
-static bool pgraph_surface_to_texture_can_fastpath(SurfaceBinding *surface,
- TextureShape *shape)
-{
- // FIXME: Better checks/handling on formats and surface-texture compat
-
- int surface_fmt = surface->shape.color_format;
- int texture_fmt = shape->color_format;
-
- if (!surface->color) {
- // FIXME: Support zeta to color
- return false;
- }
-
- switch (surface_fmt) {
- case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5: switch (texture_fmt) {
- case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5: return true;
- default: break;
- }
- break;
- case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5: switch (texture_fmt) {
- case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5: return true;
- case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5: return true;
- default: break;
- }
- break;
- case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8: switch(texture_fmt) {
- case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8: return true;
- case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8: return true;
- default: break;
- }
- break;
- case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: switch (texture_fmt) {
- case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8: return true;
- case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8: return true;
- case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8: return true;
- case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8: return true;
- default: break;
- }
- break;
- default: break;
- }
-
- trace_nv2a_pgraph_surface_texture_compat_failed(
- surface_fmt, texture_fmt);
- return false;
-}
-
-
-static void pgraph_render_surface_to(NV2AState *d, SurfaceBinding *surface,
- int texture_unit, GLuint gl_target,
- GLuint gl_texture, unsigned int width,
- unsigned int height)
-{
- glActiveTexture(GL_TEXTURE0 + texture_unit);
- glBindFramebuffer(GL_FRAMEBUFFER, d->pgraph.s2t_rndr.fbo);
-
- GLenum draw_buffers[1] = { GL_COLOR_ATTACHMENT0 };
- glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, gl_target,
- gl_texture, 0);
- glDrawBuffers(1, draw_buffers);
- assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
- assert(glGetError() == GL_NO_ERROR);
-
- float color[] = { 0.0f, 0.0f, 0.0f, 0.0f };
- glBindTexture(GL_TEXTURE_2D, surface->gl_buffer);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
- glTexParameterfv(GL_TEXTURE_2D, GL_TEXTURE_BORDER_COLOR, color);
-
- glBindVertexArray(d->pgraph.s2t_rndr.vao);
- glBindBuffer(GL_ARRAY_BUFFER, d->pgraph.s2t_rndr.vbo);
- glUseProgram(d->pgraph.s2t_rndr.prog);
- glProgramUniform1i(d->pgraph.s2t_rndr.prog, d->pgraph.s2t_rndr.tex_loc,
- texture_unit);
- glProgramUniform2f(d->pgraph.s2t_rndr.prog,
- d->pgraph.s2t_rndr.surface_size_loc, width, height);
-
- glViewport(0, 0, width, height);
- glColorMask(true, true, true, true);
- glDisable(GL_DITHER);
- glDisable(GL_SCISSOR_TEST);
- glDisable(GL_BLEND);
- glDisable(GL_STENCIL_TEST);
- glDisable(GL_CULL_FACE);
- glDisable(GL_DEPTH_TEST);
- glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
- glClearColor(0.0f, 0.0f, 1.0f, 1.0f);
- glClear(GL_COLOR_BUFFER_BIT);
- glDrawArrays(GL_TRIANGLES, 0, 3);
-
- glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, gl_target, 0,
- 0);
- glBindFramebuffer(GL_FRAMEBUFFER, d->pgraph.gl_framebuffer);
- glBindVertexArray(d->pgraph.gl_vertex_array);
- glBindTexture(gl_target, gl_texture);
- glUseProgram(
- d->pgraph.shader_binding ? d->pgraph.shader_binding->gl_program : 0);
-}
-
-static void pgraph_render_surface_to_texture_slow(
- NV2AState *d, SurfaceBinding *surface, TextureBinding *texture,
- TextureShape *texture_shape, int texture_unit)
-{
- PGRAPHState *pg = &d->pgraph;
-
- const ColorFormatInfo *f = &kelvin_color_format_map[texture_shape->color_format];
- assert(texture_shape->color_format < ARRAY_SIZE(kelvin_color_format_map));
- nv2a_profile_inc_counter(NV2A_PROF_SURF_TO_TEX_FALLBACK);
-
- glActiveTexture(GL_TEXTURE0 + texture_unit);
- glBindTexture(texture->gl_target, texture->gl_texture);
-
- unsigned int width = surface->width,
- height = surface->height;
- pgraph_apply_scaling_factor(pg, &width, &height);
-
- size_t bufsize = width * height * surface->fmt.bytes_per_pixel;
-
- uint8_t *buf = g_malloc(bufsize);
- pgraph_download_surface_data_to_buffer(d, surface, false, true, false, buf);
-
- width = texture_shape->width;
- height = texture_shape->height;
- pgraph_apply_scaling_factor(pg, &width, &height);
-
- glTexImage2D(texture->gl_target, 0, f->gl_internal_format, width, height, 0,
- f->gl_format, f->gl_type, buf);
- g_free(buf);
- glBindTexture(texture->gl_target, texture->gl_texture);
-}
-
-/* Note: This function is intended to be called before PGRAPH configures GL
- * state for rendering; it will configure GL state here but only restore a
- * couple of items.
- */
-static void pgraph_render_surface_to_texture(NV2AState *d,
- SurfaceBinding *surface,
- TextureBinding *texture,
- TextureShape *texture_shape,
- int texture_unit)
-{
- PGRAPHState *pg = &d->pgraph;
-
- const ColorFormatInfo *f =
- &kelvin_color_format_map[texture_shape->color_format];
- assert(texture_shape->color_format < ARRAY_SIZE(kelvin_color_format_map));
-
- nv2a_profile_inc_counter(NV2A_PROF_SURF_TO_TEX);
-
- if (!pgraph_surface_to_texture_can_fastpath(surface, texture_shape)) {
- pgraph_render_surface_to_texture_slow(d, surface, texture,
- texture_shape, texture_unit);
- return;
- }
-
-
- unsigned int width = texture_shape->width,
- height = texture_shape->height;
- pgraph_apply_scaling_factor(pg, &width, &height);
-
- glActiveTexture(GL_TEXTURE0 + texture_unit);
- glBindTexture(texture->gl_target, texture->gl_texture);
- glTexParameteri(texture->gl_target, GL_TEXTURE_BASE_LEVEL, 0);
- glTexParameteri(texture->gl_target, GL_TEXTURE_MAX_LEVEL, 0);
- glTexParameteri(texture->gl_target, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
- glTexImage2D(texture->gl_target, 0, f->gl_internal_format, width, height, 0,
- f->gl_format, f->gl_type, NULL);
- glBindTexture(texture->gl_target, 0);
- pgraph_render_surface_to(d, surface, texture_unit, texture->gl_target,
- texture->gl_texture, width, height);
- glBindTexture(texture->gl_target, texture->gl_texture);
- glUseProgram(
- d->pgraph.shader_binding ? d->pgraph.shader_binding->gl_program : 0);
-}
-
-static void pgraph_gl_fence(void)
-{
- GLsync fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
- int result = glClientWaitSync(fence, GL_SYNC_FLUSH_COMMANDS_BIT,
- (GLuint64)(5000000000));
- assert(result == GL_CONDITION_SATISFIED || result == GL_ALREADY_SIGNALED);
- glDeleteSync(fence);
-}
-
-static void pgraph_init_display_renderer(NV2AState *d)
-{
- struct PGRAPHState *pg = &d->pgraph;
-
- glGenTextures(1, &pg->gl_display_buffer);
- pg->gl_display_buffer_internal_format = 0;
- pg->gl_display_buffer_width = 0;
- pg->gl_display_buffer_height = 0;
- pg->gl_display_buffer_format = 0;
- pg->gl_display_buffer_type = 0;
-
- const char *vs =
- "#version 330\n"
- "void main()\n"
- "{\n"
- " float x = -1.0 + float((gl_VertexID & 1) << 2);\n"
- " float y = -1.0 + float((gl_VertexID & 2) << 1);\n"
- " gl_Position = vec4(x, y, 0, 1);\n"
- "}\n";
- /* FIXME: improve interlace handling, pvideo */
-
- const char *fs =
- "#version 330\n"
- "uniform sampler2D tex;\n"
- "uniform bool pvideo_enable;\n"
- "uniform sampler2D pvideo_tex;\n"
- "uniform vec2 pvideo_in_pos;\n"
- "uniform vec4 pvideo_pos;\n"
- "uniform vec3 pvideo_scale;\n"
- "uniform bool pvideo_color_key_enable;\n"
- "uniform vec4 pvideo_color_key;\n"
- "uniform vec2 display_size;\n"
- "uniform float line_offset;\n"
- "layout(location = 0) out vec4 out_Color;\n"
- "void main()\n"
- "{\n"
- " vec2 texCoord = gl_FragCoord.xy/display_size;\n"
- " float rel = display_size.y/textureSize(tex, 0).y/line_offset;\n"
- " texCoord.y = 1 + rel*(texCoord.y - 1);"
- " out_Color.rgba = texture(tex, texCoord);\n"
- " if (pvideo_enable) {\n"
- " vec2 screenCoord = gl_FragCoord.xy - 0.5;\n"
- " vec4 output_region = vec4(pvideo_pos.xy, pvideo_pos.xy + pvideo_pos.zw);\n"
- " bvec4 clip = bvec4(lessThan(screenCoord, output_region.xy),\n"
- " greaterThan(screenCoord, output_region.zw));\n"
- " if (!any(clip) && (!pvideo_color_key_enable || out_Color.rgba == pvideo_color_key)) {\n"
- " vec2 out_xy = (screenCoord - pvideo_pos.xy) * pvideo_scale.z;\n"
- " vec2 in_st = (pvideo_in_pos + out_xy * pvideo_scale.xy) / textureSize(pvideo_tex, 0);\n"
- " in_st.y *= -1.0;\n"
- " out_Color.rgba = texture(pvideo_tex, in_st);\n"
- " }\n"
- " }\n"
- "}\n";
-
- pg->disp_rndr.prog = pgraph_compile_shader(vs, fs);
- pg->disp_rndr.tex_loc = glGetUniformLocation(pg->disp_rndr.prog, "tex");
- pg->disp_rndr.pvideo_enable_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_enable");
- pg->disp_rndr.pvideo_tex_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_tex");
- pg->disp_rndr.pvideo_in_pos_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_in_pos");
- pg->disp_rndr.pvideo_pos_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_pos");
- pg->disp_rndr.pvideo_scale_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_scale");
- pg->disp_rndr.pvideo_color_key_enable_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_color_key_enable");
- pg->disp_rndr.pvideo_color_key_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_color_key");
- pg->disp_rndr.display_size_loc = glGetUniformLocation(pg->disp_rndr.prog, "display_size");
- pg->disp_rndr.line_offset_loc = glGetUniformLocation(pg->disp_rndr.prog, "line_offset");
-
- glGenVertexArrays(1, &pg->disp_rndr.vao);
- glBindVertexArray(pg->disp_rndr.vao);
- glGenBuffers(1, &pg->disp_rndr.vbo);
- glBindBuffer(GL_ARRAY_BUFFER, pg->disp_rndr.vbo);
- glBufferData(GL_ARRAY_BUFFER, 0, NULL, GL_STATIC_DRAW);
- glGenFramebuffers(1, &pg->disp_rndr.fbo);
- glGenTextures(1, &pg->disp_rndr.pvideo_tex);
- assert(glGetError() == GL_NO_ERROR);
-}
-
-static uint8_t *convert_texture_data__CR8YB8CB8YA8(const uint8_t *data,
- unsigned int width,
- unsigned int height,
- unsigned int pitch)
-{
- uint8_t *converted_data = (uint8_t *)g_malloc(width * height * 4);
- int x, y;
- for (y = 0; y < height; y++) {
- const uint8_t *line = &data[y * pitch];
- const uint32_t row_offset = y * width;
- for (x = 0; x < width; x++) {
- uint8_t *pixel = &converted_data[(row_offset + x) * 4];
- convert_yuy2_to_rgb(line, x, &pixel[0], &pixel[1], &pixel[2]);
- pixel[3] = 255;
- }
- }
- return converted_data;
-}
-
-static inline float pvideo_calculate_scale(unsigned int din_dout,
- unsigned int output_size)
-{
- float calculated_in = din_dout * (output_size - 1);
- calculated_in = floorf(calculated_in / (1 << 20) + 0.5f);
- return (calculated_in + 1.0f) / output_size;
-}
-
-static void pgraph_render_display_pvideo_overlay(NV2AState *d)
-{
- PGRAPHState *pg = &d->pgraph;
-
- // FIXME: This check against PVIDEO_SIZE_IN does not match HW behavior.
- // Many games seem to pass this value when initializing or tearing down
- // PVIDEO. On its own, this generally does not result in the overlay being
- // hidden, however there are certain games (e.g., Ultimate Beach Soccer)
- // that use an unknown mechanism to hide the overlay without explicitly
- // stopping it.
- // Since the value seems to be set to 0xFFFFFFFF only in cases where the
- // content is not valid, it is probably good enough to treat it as an
- // implicit stop.
- bool enabled = (d->pvideo.regs[NV_PVIDEO_BUFFER] & NV_PVIDEO_BUFFER_0_USE)
- && d->pvideo.regs[NV_PVIDEO_SIZE_IN] != 0xFFFFFFFF;
- glUniform1ui(d->pgraph.disp_rndr.pvideo_enable_loc, enabled);
- if (!enabled) {
- return;
- }
-
- hwaddr base = d->pvideo.regs[NV_PVIDEO_BASE];
- hwaddr limit = d->pvideo.regs[NV_PVIDEO_LIMIT];
- hwaddr offset = d->pvideo.regs[NV_PVIDEO_OFFSET];
-
- int in_width =
- GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_WIDTH);
- int in_height =
- GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_HEIGHT);
-
- int in_s = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN],
- NV_PVIDEO_POINT_IN_S);
- int in_t = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN],
- NV_PVIDEO_POINT_IN_T);
-
- int in_pitch =
- GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_PITCH);
- int in_color =
- GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_COLOR);
-
- unsigned int out_width =
- GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_WIDTH);
- unsigned int out_height =
- GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_HEIGHT);
-
- float scale_x = 1.0f;
- float scale_y = 1.0f;
- unsigned int ds_dx = d->pvideo.regs[NV_PVIDEO_DS_DX];
- unsigned int dt_dy = d->pvideo.regs[NV_PVIDEO_DT_DY];
- if (ds_dx != NV_PVIDEO_DIN_DOUT_UNITY) {
- scale_x = pvideo_calculate_scale(ds_dx, out_width);
- }
- if (dt_dy != NV_PVIDEO_DIN_DOUT_UNITY) {
- scale_y = pvideo_calculate_scale(dt_dy, out_height);
- }
-
- // On HW, setting NV_PVIDEO_SIZE_IN larger than NV_PVIDEO_SIZE_OUT results
- // in them being capped to the output size, content is not scaled. This is
- // particularly important as NV_PVIDEO_SIZE_IN may be set to 0xFFFFFFFF
- // during initialization or teardown.
- if (in_width > out_width) {
- in_width = floorf((float)out_width * scale_x + 0.5f);
- }
- if (in_height > out_height) {
- in_height = floorf((float)out_height * scale_y + 0.5f);
- }
-
- /* TODO: support other color formats */
- assert(in_color == NV_PVIDEO_FORMAT_COLOR_LE_CR8YB8CB8YA8);
-
- unsigned int out_x =
- GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_X);
- unsigned int out_y =
- GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_Y);
-
- unsigned int color_key_enabled =
- GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_DISPLAY);
- glUniform1ui(d->pgraph.disp_rndr.pvideo_color_key_enable_loc,
- color_key_enabled);
-
- // TODO: Verify that masking off the top byte is correct.
- // SeaBlade sets a color key of 0x80000000 but the texture passed into the
- // shader is cleared to 0 alpha.
- unsigned int color_key = d->pvideo.regs[NV_PVIDEO_COLOR_KEY] & 0xFFFFFF;
- glUniform4f(d->pgraph.disp_rndr.pvideo_color_key_loc,
- GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_RED) / 255.0,
- GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_GREEN) / 255.0,
- GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_BLUE) / 255.0,
- GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_ALPHA) / 255.0);
-
- assert(offset + in_pitch * in_height <= limit);
- hwaddr end = base + offset + in_pitch * in_height;
- assert(end <= memory_region_size(d->vram));
-
- pgraph_apply_scaling_factor(pg, &out_x, &out_y);
- pgraph_apply_scaling_factor(pg, &out_width, &out_height);
-
- // Translate for the GL viewport origin.
- out_y = MAX(pg->gl_display_buffer_height - 1 - (int)(out_y + out_height), 0);
-
- glActiveTexture(GL_TEXTURE0 + 1);
- glBindTexture(GL_TEXTURE_2D, g_nv2a->pgraph.disp_rndr.pvideo_tex);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
- uint8_t *tex_rgba = convert_texture_data__CR8YB8CB8YA8(
- d->vram_ptr + base + offset, in_width, in_height, in_pitch);
- glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, in_width, in_height, 0, GL_RGBA,
- GL_UNSIGNED_BYTE, tex_rgba);
- g_free(tex_rgba);
- glUniform1i(d->pgraph.disp_rndr.pvideo_tex_loc, 1);
- glUniform2f(d->pgraph.disp_rndr.pvideo_in_pos_loc, in_s, in_t);
- glUniform4f(d->pgraph.disp_rndr.pvideo_pos_loc,
- out_x, out_y, out_width, out_height);
- glUniform3f(d->pgraph.disp_rndr.pvideo_scale_loc,
- scale_x, scale_y, 1.0f / pg->surface_scale_factor);
-}
-
-static void pgraph_render_display(NV2AState *d, SurfaceBinding *surface)
-{
- struct PGRAPHState *pg = &d->pgraph;
-
- unsigned int width, height;
- uint32_t pline_offset, pstart_addr, pline_compare;
- d->vga.get_resolution(&d->vga, (int*)&width, (int*)&height);
- d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
- int line_offset = surface->pitch / pline_offset;
-
- /* Adjust viewport height for interlaced mode, used only in 1080i */
- if (d->vga.cr[NV_PRMCIO_INTERLACE_MODE] != NV_PRMCIO_INTERLACE_MODE_DISABLED) {
- height *= 2;
- }
-
- pgraph_apply_scaling_factor(pg, &width, &height);
-
- glBindFramebuffer(GL_FRAMEBUFFER, d->pgraph.disp_rndr.fbo);
- glActiveTexture(GL_TEXTURE0);
- glBindTexture(GL_TEXTURE_2D, pg->gl_display_buffer);
- bool recreate = (
- surface->fmt.gl_internal_format != pg->gl_display_buffer_internal_format
- || width != pg->gl_display_buffer_width
- || height != pg->gl_display_buffer_height
- || surface->fmt.gl_format != pg->gl_display_buffer_format
- || surface->fmt.gl_type != pg->gl_display_buffer_type
- );
-
- if (recreate) {
- /* XXX: There's apparently a bug in some Intel OpenGL drivers for
- * Windows that will leak this texture when its orphaned after use in
- * another context, apparently regardless of which thread it's created
- * or released on.
- *
- * Driver: 27.20.100.8729 9/11/2020 W10 x64
- * Track: https://community.intel.com/t5/Graphics/OpenGL-Windows-drivers-for-Intel-HD-630-leaking-GPU-memory-when/td-p/1274423
- */
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
- pg->gl_display_buffer_internal_format = surface->fmt.gl_internal_format;
- pg->gl_display_buffer_width = width;
- pg->gl_display_buffer_height = height;
- pg->gl_display_buffer_format = surface->fmt.gl_format;
- pg->gl_display_buffer_type = surface->fmt.gl_type;
- glTexImage2D(GL_TEXTURE_2D, 0,
- pg->gl_display_buffer_internal_format,
- pg->gl_display_buffer_width,
- pg->gl_display_buffer_height,
- 0,
- pg->gl_display_buffer_format,
- pg->gl_display_buffer_type,
- NULL);
- }
-
- glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
- GL_TEXTURE_2D, pg->gl_display_buffer, 0);
- GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0};
- glDrawBuffers(1, DrawBuffers);
- assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
-
- glBindTexture(GL_TEXTURE_2D, surface->gl_buffer);
- glBindVertexArray(pg->disp_rndr.vao);
- glBindBuffer(GL_ARRAY_BUFFER, pg->disp_rndr.vbo);
- glUseProgram(pg->disp_rndr.prog);
- glProgramUniform1i(pg->disp_rndr.prog, pg->disp_rndr.tex_loc, 0);
- glUniform2f(d->pgraph.disp_rndr.display_size_loc, width, height);
- glUniform1f(d->pgraph.disp_rndr.line_offset_loc, line_offset);
- pgraph_render_display_pvideo_overlay(d);
-
- glViewport(0, 0, width, height);
- glColorMask(true, true, true, true);
- glDisable(GL_SCISSOR_TEST);
- glDisable(GL_BLEND);
- glDisable(GL_STENCIL_TEST);
- glDisable(GL_CULL_FACE);
- glDisable(GL_DEPTH_TEST);
- glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
- glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
- glClear(GL_COLOR_BUFFER_BIT);
- glDrawArrays(GL_TRIANGLES, 0, 3);
-
- glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
- GL_TEXTURE_2D, 0, 0);
-}
-
-void pgraph_gl_sync(NV2AState *d)
-{
- uint32_t pline_offset, pstart_addr, pline_compare;
- d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
- SurfaceBinding *surface = pgraph_surface_get_within(d, d->pcrtc.start + pline_offset);
- if (surface == NULL) {
- qemu_event_set(&d->pgraph.gl_sync_complete);
- return;
- }
-
- /* FIXME: Sanity check surface dimensions */
-
- /* Wait for queued commands to complete */
- pgraph_upload_surface_data(d, surface, !tcg_enabled());
- pgraph_gl_fence();
- assert(glGetError() == GL_NO_ERROR);
-
- /* Render framebuffer in display context */
- glo_set_current(g_nv2a_context_display);
- pgraph_render_display(d, surface);
- pgraph_gl_fence();
- assert(glGetError() == GL_NO_ERROR);
-
- /* Switch back to original context */
- glo_set_current(g_nv2a_context_render);
-
- qatomic_set(&d->pgraph.gl_sync_pending, false);
- qemu_event_set(&d->pgraph.gl_sync_complete);
-}
-
-const uint8_t *nv2a_get_dac_palette(void)
-{
- return g_nv2a->puserdac.palette;
-}
-
-int nv2a_get_screen_off(void)
-{
- return g_nv2a->vga.sr[VGA_SEQ_CLOCK_MODE] & VGA_SR01_SCREEN_OFF;
-}
-
-int nv2a_get_framebuffer_surface(void)
-{
- NV2AState *d = g_nv2a;
- PGRAPHState *pg = &d->pgraph;
-
- qemu_mutex_lock(&d->pfifo.lock);
- // FIXME: Possible race condition with pgraph, consider lock
- uint32_t pline_offset, pstart_addr, pline_compare;
- d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
- SurfaceBinding *surface = pgraph_surface_get_within(d, d->pcrtc.start + pline_offset);
- if (surface == NULL || !surface->color) {
- qemu_mutex_unlock(&d->pfifo.lock);
- return 0;
- }
-
- assert(surface->color);
- assert(surface->fmt.gl_attachment == GL_COLOR_ATTACHMENT0);
- assert(surface->fmt.gl_format == GL_RGBA
- || surface->fmt.gl_format == GL_RGB
- || surface->fmt.gl_format == GL_BGR
- || surface->fmt.gl_format == GL_BGRA
- );
-
- surface->frame_time = pg->frame_time;
- qemu_event_reset(&d->pgraph.gl_sync_complete);
- qatomic_set(&pg->gl_sync_pending, true);
- pfifo_kick(d);
- qemu_mutex_unlock(&d->pfifo.lock);
- qemu_event_wait(&d->pgraph.gl_sync_complete);
-
- return pg->gl_display_buffer;
-}
-
-static bool pgraph_check_surface_to_texture_compatibility(
- const SurfaceBinding *surface,
- const TextureShape *shape)
-{
- // FIXME: Better checks/handling on formats and surface-texture compat
-
- if ((!surface->swizzle && surface->pitch != shape->pitch) ||
- surface->width != shape->width ||
- surface->height != shape->height) {
- return false;
- }
-
- int surface_fmt = surface->shape.color_format;
- int texture_fmt = shape->color_format;
-
- if (!surface->color) {
- // FIXME: Support zeta to color
- return false;
- }
-
- if (shape->cubemap) {
- // FIXME: Support rendering surface to cubemap face
- return false;
- }
-
- if (shape->levels > 1) {
- // FIXME: Support rendering surface to mip levels
- return false;
- }
-
- switch (surface_fmt) {
- case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5: switch (texture_fmt) {
- case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5: return true;
- default: break;
- }
- break;
- case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5: switch (texture_fmt) {
- case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5: return true;
- case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5: return true;
- default: break;
- }
- break;
- case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8: switch(texture_fmt) {
- case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8: return true;
- case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8: return true;
- default: break;
- }
- break;
- case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: switch (texture_fmt) {
- case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8: return true;
- case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8: return true;
- case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8: return true;
- case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8: return true;
- default: break;
- }
- break;
- default:
- break;
- }
-
- trace_nv2a_pgraph_surface_texture_compat_failed(
- surface_fmt, texture_fmt);
- return false;
-}
-
-static void pgraph_wait_for_surface_download(SurfaceBinding *e)
-{
- NV2AState *d = g_nv2a;
-
- if (qatomic_read(&e->draw_dirty)) {
- qemu_mutex_lock(&d->pfifo.lock);
- qemu_event_reset(&d->pgraph.downloads_complete);
- qatomic_set(&e->download_pending, true);
- qatomic_set(&d->pgraph.downloads_pending, true);
- pfifo_kick(d);
- qemu_mutex_unlock(&d->pfifo.lock);
- qemu_event_wait(&d->pgraph.downloads_complete);
- }
-}
-
-static void pgraph_surface_access_callback(
- void *opaque,
- MemoryRegion *mr,
- hwaddr addr,
- hwaddr len,
- bool write)
-{
- SurfaceBinding *e = opaque;
- assert(addr >= e->vram_addr);
- hwaddr offset = addr - e->vram_addr;
- assert(offset < e->size);
-
- if (qatomic_read(&e->draw_dirty)) {
- trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset);
- pgraph_wait_for_surface_download(e);
- }
-
- if (write && !qatomic_read(&e->upload_pending)) {
- trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset);
- qatomic_set(&e->upload_pending, true);
- }
-}
-
-static SurfaceBinding *pgraph_surface_put(NV2AState *d,
- hwaddr addr,
- SurfaceBinding *surface_in)
-{
- assert(pgraph_surface_get(d, addr) == NULL);
-
- SurfaceBinding *surface, *next;
- uintptr_t e_end = surface_in->vram_addr + surface_in->size - 1;
- QTAILQ_FOREACH_SAFE(surface, &d->pgraph.surfaces, entry, next) {
- uintptr_t s_end = surface->vram_addr + surface->size - 1;
- bool overlapping = !(surface->vram_addr > e_end
- || surface_in->vram_addr > s_end);
- if (overlapping) {
- trace_nv2a_pgraph_surface_evict_overlapping(
- surface->vram_addr, surface->width, surface->height,
- surface->pitch);
- pgraph_download_surface_data_if_dirty(d, surface);
- pgraph_surface_invalidate(d, surface);
- }
- }
-
- SurfaceBinding *surface_out = g_malloc(sizeof(SurfaceBinding));
- assert(surface_out != NULL);
- *surface_out = *surface_in;
-
- if (tcg_enabled()) {
- qemu_mutex_unlock(&d->pgraph.lock);
- qemu_mutex_lock_iothread();
- mem_access_callback_insert(qemu_get_cpu(0),
- d->vram, surface_out->vram_addr, surface_out->size,
- &surface_out->access_cb, &pgraph_surface_access_callback,
- surface_out);
- qemu_mutex_unlock_iothread();
- qemu_mutex_lock(&d->pgraph.lock);
- }
-
- QTAILQ_INSERT_TAIL(&d->pgraph.surfaces, surface_out, entry);
-
- return surface_out;
-}
-
-static SurfaceBinding *pgraph_surface_get(NV2AState *d, hwaddr addr)
-{
- SurfaceBinding *surface;
- QTAILQ_FOREACH (surface, &d->pgraph.surfaces, entry) {
- if (surface->vram_addr == addr) {
- return surface;
- }
- }
-
- return NULL;
-}
-
-static SurfaceBinding *pgraph_surface_get_within(NV2AState *d, hwaddr addr)
-{
- SurfaceBinding *surface;
- QTAILQ_FOREACH (surface, &d->pgraph.surfaces, entry) {
- if (addr >= surface->vram_addr &&
- addr < (surface->vram_addr + surface->size)) {
- return surface;
- }
- }
-
- return NULL;
-}
-
-static void pgraph_surface_invalidate(NV2AState *d, SurfaceBinding *surface)
-{
- trace_nv2a_pgraph_surface_invalidated(surface->vram_addr);
-
- if (surface == d->pgraph.color_binding) {
- assert(d->pgraph.surface_color.buffer_dirty);
- pgraph_unbind_surface(d, true);
- }
- if (surface == d->pgraph.zeta_binding) {
- assert(d->pgraph.surface_zeta.buffer_dirty);
- pgraph_unbind_surface(d, false);
- }
-
- if (tcg_enabled()) {
- qemu_mutex_unlock(&d->pgraph.lock);
- qemu_mutex_lock_iothread();
- mem_access_callback_remove_by_ref(qemu_get_cpu(0), surface->access_cb);
- qemu_mutex_unlock_iothread();
- qemu_mutex_lock(&d->pgraph.lock);
- }
-
- glDeleteTextures(1, &surface->gl_buffer);
-
- QTAILQ_REMOVE(&d->pgraph.surfaces, surface, entry);
- g_free(surface);
-}
-
-static void pgraph_surface_evict_old(NV2AState *d)
-{
- const int surface_age_limit = 5;
-
- SurfaceBinding *s, *next;
- QTAILQ_FOREACH_SAFE(s, &d->pgraph.surfaces, entry, next) {
- int last_used = d->pgraph.frame_time - s->frame_time;
- if (last_used >= surface_age_limit) {
- trace_nv2a_pgraph_surface_evict_reason("old", s->vram_addr);
- pgraph_download_surface_data_if_dirty(d, s);
- pgraph_surface_invalidate(d, s);
- }
- }
-}
-
-static bool pgraph_check_surface_compatibility(SurfaceBinding *s1,
- SurfaceBinding *s2, bool strict)
-{
- bool format_compatible =
- (s1->color == s2->color) &&
- (s1->fmt.gl_attachment == s2->fmt.gl_attachment) &&
- (s1->fmt.gl_internal_format == s2->fmt.gl_internal_format) &&
- (s1->pitch == s2->pitch) &&
- (s1->shape.clip_x <= s2->shape.clip_x) &&
- (s1->shape.clip_y <= s2->shape.clip_y);
- if (!format_compatible) {
- return false;
- }
-
- if (!strict) {
- return (s1->width >= s2->width) && (s1->height >= s2->height);
- } else {
- return (s1->width == s2->width) && (s1->height == s2->height);
- }
-}
-
-static void pgraph_download_surface_data_if_dirty(NV2AState *d,
- SurfaceBinding *surface)
-{
- if (surface->draw_dirty) {
- pgraph_download_surface_data(d, surface, true);
- }
-}
-
-static void pgraph_bind_current_surface(NV2AState *d)
-{
- PGRAPHState *pg = &d->pgraph;
-
- if (pg->color_binding) {
- glFramebufferTexture2D(GL_FRAMEBUFFER, pg->color_binding->fmt.gl_attachment,
- GL_TEXTURE_2D, pg->color_binding->gl_buffer, 0);
- }
-
- if (pg->zeta_binding) {
- glFramebufferTexture2D(GL_FRAMEBUFFER, pg->zeta_binding->fmt.gl_attachment,
- GL_TEXTURE_2D, pg->zeta_binding->gl_buffer, 0);
- }
-
- if (pg->color_binding || pg->zeta_binding) {
- assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) ==
- GL_FRAMEBUFFER_COMPLETE);
- }
-}
-
-static void surface_copy_shrink_row(uint8_t *out, uint8_t *in,
- unsigned int width,
- unsigned int bytes_per_pixel,
- unsigned int factor)
-{
- if (bytes_per_pixel == 4) {
- for (unsigned int x = 0; x < width; x++) {
- *(uint32_t *)out = *(uint32_t *)in;
- out += 4;
- in += 4 * factor;
- }
- } else if (bytes_per_pixel == 2) {
- for (unsigned int x = 0; x < width; x++) {
- *(uint16_t *)out = *(uint16_t *)in;
- out += 2;
- in += 2 * factor;
- }
- } else {
- for (unsigned int x = 0; x < width; x++) {
- memcpy(out, in, bytes_per_pixel);
- out += bytes_per_pixel;
- in += bytes_per_pixel * factor;
- }
- }
-}
-
-
-static void pgraph_download_surface_data_to_buffer(NV2AState *d,
- SurfaceBinding *surface,
- bool swizzle, bool flip,
- bool downscale,
- uint8_t *pixels)
-{
- PGRAPHState *pg = &d->pgraph;
- swizzle &= surface->swizzle;
- downscale &= (pg->surface_scale_factor != 1);
-
- trace_nv2a_pgraph_surface_download(
- surface->color ? "COLOR" : "ZETA",
- surface->swizzle ? "sz" : "lin", surface->vram_addr,
- surface->width, surface->height, surface->pitch,
- surface->fmt.bytes_per_pixel);
-
- /* Bind destination surface to framebuffer */
- glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
- 0, 0);
- glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
- 0, 0);
- glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
- GL_TEXTURE_2D, 0, 0);
- glFramebufferTexture2D(GL_FRAMEBUFFER, surface->fmt.gl_attachment,
- GL_TEXTURE_2D, surface->gl_buffer, 0);
-
- assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
-
- /* Read surface into memory */
- uint8_t *gl_read_buf = pixels;
-
- uint8_t *swizzle_buf = pixels;
- if (swizzle) {
- /* FIXME: Allocate big buffer up front and re-alloc if necessary.
- * FIXME: Consider swizzle in shader
- */
- assert(pg->surface_scale_factor == 1 || downscale);
- swizzle_buf = (uint8_t *)g_malloc(surface->size);
- gl_read_buf = swizzle_buf;
- }
-
- if (downscale) {
- pg->scale_buf = (uint8_t *)g_realloc(
- pg->scale_buf, pg->surface_scale_factor * pg->surface_scale_factor *
- surface->size);
- gl_read_buf = pg->scale_buf;
- }
-
- glo_readpixels(
- surface->fmt.gl_format, surface->fmt.gl_type, surface->fmt.bytes_per_pixel,
- pg->surface_scale_factor * surface->pitch,
- pg->surface_scale_factor * surface->width,
- pg->surface_scale_factor * surface->height, flip, gl_read_buf);
-
- /* FIXME: Replace this with a hw accelerated version */
- if (downscale) {
- assert(surface->pitch >= (surface->width * surface->fmt.bytes_per_pixel));
- uint8_t *out = swizzle_buf, *in = pg->scale_buf;
- for (unsigned int y = 0; y < surface->height; y++) {
- surface_copy_shrink_row(out, in, surface->width,
- surface->fmt.bytes_per_pixel,
- pg->surface_scale_factor);
- in += surface->pitch * pg->surface_scale_factor *
- pg->surface_scale_factor;
- out += surface->pitch;
- }
- }
-
- if (swizzle) {
- swizzle_rect(swizzle_buf, surface->width, surface->height, pixels,
- surface->pitch, surface->fmt.bytes_per_pixel);
- g_free(swizzle_buf);
- }
-
- /* Re-bind original framebuffer target */
- glFramebufferTexture2D(GL_FRAMEBUFFER, surface->fmt.gl_attachment,
- GL_TEXTURE_2D, 0, 0);
- pgraph_bind_current_surface(d);
-}
-
-static void pgraph_download_surface_data(NV2AState *d, SurfaceBinding *surface,
- bool force)
-{
- if (!(surface->download_pending || force)) {
- return;
- }
-
- /* FIXME: Respect write enable at last TOU? */
-
- nv2a_profile_inc_counter(NV2A_PROF_SURF_DOWNLOAD);
-
- pgraph_download_surface_data_to_buffer(
- d, surface, true, true, true, d->vram_ptr + surface->vram_addr);
-
- memory_region_set_client_dirty(d->vram, surface->vram_addr,
- surface->pitch * surface->height,
- DIRTY_MEMORY_VGA);
- memory_region_set_client_dirty(d->vram, surface->vram_addr,
- surface->pitch * surface->height,
- DIRTY_MEMORY_NV2A_TEX);
-
- surface->download_pending = false;
- surface->draw_dirty = false;
-}
-
-void pgraph_process_pending_downloads(NV2AState *d)
-{
- SurfaceBinding *surface;
- QTAILQ_FOREACH(surface, &d->pgraph.surfaces, entry) {
- pgraph_download_surface_data(d, surface, false);
- }
-
- qatomic_set(&d->pgraph.downloads_pending, false);
- qemu_event_set(&d->pgraph.downloads_complete);
-}
-
-void pgraph_download_dirty_surfaces(NV2AState *d)
-{
- SurfaceBinding *surface;
- QTAILQ_FOREACH(surface, &d->pgraph.surfaces, entry) {
- pgraph_download_surface_data_if_dirty(d, surface);
- }
-
- qatomic_set(&d->pgraph.download_dirty_surfaces_pending, false);
- qemu_event_set(&d->pgraph.dirty_surfaces_download_complete);
-}
-
-
-static void surface_copy_expand_row(uint8_t *out, uint8_t *in,
- unsigned int width,
- unsigned int bytes_per_pixel,
- unsigned int factor)
-{
- if (bytes_per_pixel == 4) {
- for (unsigned int x = 0; x < width; x++) {
- for (unsigned int i = 0; i < factor; i++) {
- *(uint32_t *)out = *(uint32_t *)in;
- out += bytes_per_pixel;
- }
- in += bytes_per_pixel;
- }
- } else if (bytes_per_pixel == 2) {
- for (unsigned int x = 0; x < width; x++) {
- for (unsigned int i = 0; i < factor; i++) {
- *(uint16_t *)out = *(uint16_t *)in;
- out += bytes_per_pixel;
- }
- in += bytes_per_pixel;
- }
- } else {
- for (unsigned int x = 0; x < width; x++) {
- for (unsigned int i = 0; i < factor; i++) {
- memcpy(out, in, bytes_per_pixel);
- out += bytes_per_pixel;
- }
- in += bytes_per_pixel;
- }
- }
-}
-
-static void surface_copy_expand(uint8_t *out, uint8_t *in, unsigned int width,
- unsigned int height,
- unsigned int bytes_per_pixel,
- unsigned int factor)
-{
- size_t out_pitch = width * bytes_per_pixel * factor;
-
- for (unsigned int y = 0; y < height; y++) {
- surface_copy_expand_row(out, in, width, bytes_per_pixel, factor);
- uint8_t *row_in = out;
- for (unsigned int i = 1; i < factor; i++) {
- out += out_pitch;
- memcpy(out, row_in, out_pitch);
- }
- in += width * bytes_per_pixel;
- out += out_pitch;
- }
-}
-
-static void pgraph_upload_surface_data(NV2AState *d, SurfaceBinding *surface,
- bool force)
-{
- if (!(surface->upload_pending || force)) {
- return;
- }
-
- nv2a_profile_inc_counter(NV2A_PROF_SURF_UPLOAD);
-
- trace_nv2a_pgraph_surface_upload(
- surface->color ? "COLOR" : "ZETA",
- surface->swizzle ? "sz" : "lin", surface->vram_addr,
- surface->width, surface->height, surface->pitch,
- surface->fmt.bytes_per_pixel);
-
- PGRAPHState *pg = &d->pgraph;
-
- surface->upload_pending = false;
- surface->draw_time = pg->draw_time;
-
- // FIXME: Don't query GL for texture binding
- GLint last_texture_binding;
- glGetIntegerv(GL_TEXTURE_BINDING_2D, &last_texture_binding);
-
- // FIXME: Replace with FBO to not disturb current state
- glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
- 0, 0);
- glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
- 0, 0);
- glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
- GL_TEXTURE_2D, 0, 0);
-
- uint8_t *data = d->vram_ptr;
- uint8_t *buf = data + surface->vram_addr;
-
- if (surface->swizzle) {
- buf = (uint8_t*)g_malloc(surface->size);
- unswizzle_rect(data + surface->vram_addr,
- surface->width, surface->height,
- buf,
- surface->pitch,
- surface->fmt.bytes_per_pixel);
- }
-
- /* FIXME: Replace this flip/scaling */
-
- // This is VRAM so we can't do this inplace!
- uint8_t *flipped_buf = (uint8_t *)g_malloc(
- surface->height * surface->width * surface->fmt.bytes_per_pixel);
- unsigned int irow;
- for (irow = 0; irow < surface->height; irow++) {
- memcpy(&flipped_buf[surface->width * (surface->height - irow - 1)
- * surface->fmt.bytes_per_pixel],
- &buf[surface->pitch * irow],
- surface->width * surface->fmt.bytes_per_pixel);
- }
-
- uint8_t *gl_read_buf = flipped_buf;
- unsigned int width = surface->width, height = surface->height;
-
- if (pg->surface_scale_factor > 1) {
- pgraph_apply_scaling_factor(pg, &width, &height);
- pg->scale_buf = (uint8_t *)g_realloc(
- pg->scale_buf, width * height * surface->fmt.bytes_per_pixel);
- gl_read_buf = pg->scale_buf;
- uint8_t *out = gl_read_buf, *in = flipped_buf;
- surface_copy_expand(out, in, surface->width, surface->height,
- surface->fmt.bytes_per_pixel,
- d->pgraph.surface_scale_factor);
- }
-
- int prev_unpack_alignment;
- glGetIntegerv(GL_UNPACK_ALIGNMENT, &prev_unpack_alignment);
- if (unlikely((width * surface->fmt.bytes_per_pixel) % 4 != 0)) {
- glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
- } else {
- glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
- }
-
- glBindTexture(GL_TEXTURE_2D, surface->gl_buffer);
- glTexImage2D(GL_TEXTURE_2D, 0, surface->fmt.gl_internal_format, width,
- height, 0, surface->fmt.gl_format, surface->fmt.gl_type,
- gl_read_buf);
- glPixelStorei(GL_UNPACK_ALIGNMENT, prev_unpack_alignment);
- g_free(flipped_buf);
- if (surface->swizzle) {
- g_free(buf);
- }
-
- // Rebind previous framebuffer binding
- glBindTexture(GL_TEXTURE_2D, last_texture_binding);
-
- pgraph_bind_current_surface(d);
-}
-
-static void pgraph_compare_surfaces(SurfaceBinding *s1, SurfaceBinding *s2)
-{
- #define DO_CMP(fld) \
- if (s1->fld != s2->fld) \
- trace_nv2a_pgraph_surface_compare_mismatch( \
- #fld, (long int)s1->fld, (long int)s2->fld);
- DO_CMP(shape.clip_x)
- DO_CMP(shape.clip_width)
- DO_CMP(shape.clip_y)
- DO_CMP(shape.clip_height)
- DO_CMP(gl_buffer)
- DO_CMP(fmt.bytes_per_pixel)
- DO_CMP(fmt.gl_attachment)
- DO_CMP(fmt.gl_internal_format)
- DO_CMP(fmt.gl_format)
- DO_CMP(fmt.gl_type)
- DO_CMP(color)
- DO_CMP(swizzle)
- DO_CMP(vram_addr)
- DO_CMP(width)
- DO_CMP(height)
- DO_CMP(pitch)
- DO_CMP(size)
- DO_CMP(dma_addr)
- DO_CMP(dma_len)
- DO_CMP(frame_time)
- DO_CMP(draw_time)
- #undef DO_CMP
-}
-
-static void pgraph_populate_surface_binding_entry_sized(NV2AState *d,
- bool color,
- unsigned int width,
- unsigned int height,
- SurfaceBinding *entry)
-{
- PGRAPHState *pg = &d->pgraph;
- Surface *surface;
- hwaddr dma_address;
- SurfaceFormatInfo fmt;
-
- if (color) {
- surface = &pg->surface_color;
- dma_address = pg->dma_color;
- assert(pg->surface_shape.color_format != 0);
- assert(pg->surface_shape.color_format <
- ARRAY_SIZE(kelvin_surface_color_format_map));
- fmt = kelvin_surface_color_format_map[pg->surface_shape.color_format];
- if (fmt.bytes_per_pixel == 0) {
- fprintf(stderr, "nv2a: unimplemented color surface format 0x%x\n",
- pg->surface_shape.color_format);
- abort();
- }
- } else {
- surface = &pg->surface_zeta;
- dma_address = pg->dma_zeta;
- assert(pg->surface_shape.zeta_format != 0);
- assert(pg->surface_shape.zeta_format <
- ARRAY_SIZE(kelvin_surface_zeta_float_format_map));
- const SurfaceFormatInfo *map =
- pg->surface_shape.z_format ? kelvin_surface_zeta_float_format_map :
- kelvin_surface_zeta_fixed_format_map;
- fmt = map[pg->surface_shape.zeta_format];
- }
-
- DMAObject dma = nv_dma_load(d, dma_address);
- /* There's a bunch of bugs that could cause us to hit this function
- * at the wrong time and get a invalid dma object.
- * Check that it's sane. */
- assert(dma.dma_class == NV_DMA_IN_MEMORY_CLASS);
- // assert(dma.address + surface->offset != 0);
- assert(surface->offset <= dma.limit);
- assert(surface->offset + surface->pitch * height <= dma.limit + 1);
- assert(surface->pitch % fmt.bytes_per_pixel == 0);
- assert((dma.address & ~0x07FFFFFF) == 0);
-
- entry->shape = (color || !pg->color_binding) ? pg->surface_shape :
- pg->color_binding->shape;
- entry->gl_buffer = 0;
- entry->fmt = fmt;
- entry->color = color;
- entry->swizzle =
- (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE);
- entry->vram_addr = dma.address + surface->offset;
- entry->width = width;
- entry->height = height;
- entry->pitch = surface->pitch;
- entry->size = height * MAX(surface->pitch, width * fmt.bytes_per_pixel);
- entry->upload_pending = true;
- entry->download_pending = false;
- entry->draw_dirty = false;
- entry->dma_addr = dma.address;
- entry->dma_len = dma.limit;
- entry->frame_time = pg->frame_time;
- entry->draw_time = pg->draw_time;
- entry->cleared = false;
-}
-
-static void pgraph_populate_surface_binding_entry(NV2AState *d, bool color,
- SurfaceBinding *entry)
-{
- PGRAPHState *pg = &d->pgraph;
- unsigned int width, height;
-
- if (color || !pg->color_binding) {
- pgraph_get_surface_dimensions(pg, &width, &height);
- pgraph_apply_anti_aliasing_factor(pg, &width, &height);
-
- /* Since we determine surface dimensions based on the clipping
- * rectangle, make sure to include the surface offset as well.
- */
- if (pg->surface_type != NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE) {
- width += pg->surface_shape.clip_x;
- height += pg->surface_shape.clip_y;
- }
- } else {
- width = pg->color_binding->width;
- height = pg->color_binding->height;
- }
-
- pgraph_populate_surface_binding_entry_sized(d, color, width, height, entry);
-}
-
-static void pgraph_update_surface_part(NV2AState *d, bool upload, bool color)
-{
- PGRAPHState *pg = &d->pgraph;
-
- SurfaceBinding entry;
- pgraph_populate_surface_binding_entry(d, color, &entry);
-
- Surface *surface = color ? &pg->surface_color : &pg->surface_zeta;
-
- bool mem_dirty = !tcg_enabled() && memory_region_test_and_clear_dirty(
- d->vram, entry.vram_addr, entry.size,
- DIRTY_MEMORY_NV2A);
-
- if (upload && (surface->buffer_dirty || mem_dirty)) {
- pgraph_unbind_surface(d, color);
-
- SurfaceBinding *found = pgraph_surface_get(d, entry.vram_addr);
- if (found != NULL) {
- /* FIXME: Support same color/zeta surface target? In the mean time,
- * if the surface we just found is currently bound, just unbind it.
- */
- SurfaceBinding *other = (color ? pg->zeta_binding
- : pg->color_binding);
- if (found == other) {
- NV2A_UNIMPLEMENTED("Same color & zeta surface offset");
- pgraph_unbind_surface(d, !color);
- }
- }
-
- trace_nv2a_pgraph_surface_target(
- color ? "COLOR" : "ZETA", entry.vram_addr,
- entry.swizzle ? "sz" : "ln",
- pg->surface_shape.anti_aliasing,
- pg->surface_shape.clip_x,
- pg->surface_shape.clip_width, pg->surface_shape.clip_y,
- pg->surface_shape.clip_height);
-
- bool should_create = true;
-
- if (found != NULL) {
- bool is_compatible =
- pgraph_check_surface_compatibility(found, &entry, false);
-
-#define TRACE_ARGS found->vram_addr, found->width, found->height, \
- found->swizzle ? "sz" : "ln", \
- found->shape.anti_aliasing, found->shape.clip_x, \
- found->shape.clip_width, found->shape.clip_y, \
- found->shape.clip_height, found->pitch
- if (found->color) {
- trace_nv2a_pgraph_surface_match_color(TRACE_ARGS);
- } else {
- trace_nv2a_pgraph_surface_match_zeta(TRACE_ARGS);
- }
-#undef TRACE_ARGS
-
- assert(!(entry.swizzle && pg->clearing));
-
- if (found->swizzle != entry.swizzle) {
- /* Clears should only be done on linear surfaces. Avoid
- * synchronization by allowing (1) a surface marked swizzled to
- * be cleared under the assumption the entire surface is
- * destined to be cleared and (2) a fully cleared linear surface
- * to be marked swizzled. Strictly match size to avoid
- * pathological cases.
- */
- is_compatible &= (pg->clearing || found->cleared) &&
- pgraph_check_surface_compatibility(found, &entry, true);
- if (is_compatible) {
- trace_nv2a_pgraph_surface_migrate_type(
- entry.swizzle ? "swizzled" : "linear");
- }
- }
-
- if (is_compatible && color &&
- !pgraph_check_surface_compatibility(found, &entry, true)) {
- SurfaceBinding zeta_entry;
- pgraph_populate_surface_binding_entry_sized(
- d, !color, found->width, found->height, &zeta_entry);
- hwaddr color_end = found->vram_addr + found->size;
- hwaddr zeta_end = zeta_entry.vram_addr + zeta_entry.size;
- is_compatible &= found->vram_addr >= zeta_end ||
- zeta_entry.vram_addr >= color_end;
- }
-
- if (is_compatible && !color && pg->color_binding) {
- is_compatible &= (found->width == pg->color_binding->width) &&
- (found->height == pg->color_binding->height);
- }
-
- if (is_compatible) {
- /* FIXME: Refactor */
- pg->surface_binding_dim.width = found->width;
- pg->surface_binding_dim.clip_x = found->shape.clip_x;
- pg->surface_binding_dim.clip_width = found->shape.clip_width;
- pg->surface_binding_dim.height = found->height;
- pg->surface_binding_dim.clip_y = found->shape.clip_y;
- pg->surface_binding_dim.clip_height = found->shape.clip_height;
- found->upload_pending |= mem_dirty;
- pg->surface_zeta.buffer_dirty |= color;
- should_create = false;
- } else {
- trace_nv2a_pgraph_surface_evict_reason(
- "incompatible", found->vram_addr);
- pgraph_compare_surfaces(found, &entry);
- pgraph_download_surface_data_if_dirty(d, found);
- pgraph_surface_invalidate(d, found);
- }
- }
-
- if (should_create) {
- glGenTextures(1, &entry.gl_buffer);
- glBindTexture(GL_TEXTURE_2D, entry.gl_buffer);
- NV2A_GL_DLABEL(GL_TEXTURE, entry.gl_buffer,
- "%s format: %0X, width: %d, height: %d "
- "(addr %" HWADDR_PRIx ")",
- color ? "color" : "zeta",
- color ? pg->surface_shape.color_format
- : pg->surface_shape.zeta_format,
- entry.width, entry.height, surface->offset);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
- unsigned int width = entry.width, height = entry.height;
- pgraph_apply_scaling_factor(pg, &width, &height);
- glTexImage2D(GL_TEXTURE_2D, 0, entry.fmt.gl_internal_format, width,
- height, 0, entry.fmt.gl_format, entry.fmt.gl_type,
- NULL);
- found = pgraph_surface_put(d, entry.vram_addr, &entry);
-
- /* FIXME: Refactor */
- pg->surface_binding_dim.width = entry.width;
- pg->surface_binding_dim.clip_x = entry.shape.clip_x;
- pg->surface_binding_dim.clip_width = entry.shape.clip_width;
- pg->surface_binding_dim.height = entry.height;
- pg->surface_binding_dim.clip_y = entry.shape.clip_y;
- pg->surface_binding_dim.clip_height = entry.shape.clip_height;
-
- if (color && pg->zeta_binding && (pg->zeta_binding->width != entry.width || pg->zeta_binding->height != entry.height)) {
- pg->surface_zeta.buffer_dirty = true;
- }
- }
-
-#define TRACE_ARGS found->vram_addr, found->width, found->height, \
- found->swizzle ? "sz" : "ln", found->shape.anti_aliasing, \
- found->shape.clip_x, found->shape.clip_width, \
- found->shape.clip_y, found->shape.clip_height, found->pitch
-
- if (color) {
- if (should_create) {
- trace_nv2a_pgraph_surface_create_color(TRACE_ARGS);
- } else {
- trace_nv2a_pgraph_surface_hit_color(TRACE_ARGS);
- }
-
- pg->color_binding = found;
- } else {
- if (should_create) {
- trace_nv2a_pgraph_surface_create_zeta(TRACE_ARGS);
- } else {
- trace_nv2a_pgraph_surface_hit_zeta(TRACE_ARGS);
- }
- pg->zeta_binding = found;
- }
-#undef TRACE_ARGS
-
- glFramebufferTexture2D(GL_FRAMEBUFFER, entry.fmt.gl_attachment,
- GL_TEXTURE_2D, found->gl_buffer, 0);
- assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) ==
- GL_FRAMEBUFFER_COMPLETE);
-
- surface->buffer_dirty = false;
- }
-
- if (!upload && surface->draw_dirty) {
- if (!tcg_enabled()) {
- /* FIXME: Cannot monitor for reads/writes; flush now */
- pgraph_download_surface_data(d,
- color ? pg->color_binding : pg->zeta_binding, true);
- }
-
- surface->write_enabled_cache = false;
- surface->draw_dirty = false;
- }
-}
-
-static void pgraph_unbind_surface(NV2AState *d, bool color)
-{
- PGRAPHState *pg = &d->pgraph;
-
- if (color) {
- if (pg->color_binding) {
- glFramebufferTexture2D(GL_FRAMEBUFFER,
- GL_COLOR_ATTACHMENT0,
- GL_TEXTURE_2D, 0, 0);
- pg->color_binding = NULL;
- }
- } else {
- if (pg->zeta_binding) {
- glFramebufferTexture2D(GL_FRAMEBUFFER,
- GL_DEPTH_ATTACHMENT,
- GL_TEXTURE_2D, 0, 0);
- glFramebufferTexture2D(GL_FRAMEBUFFER,
- GL_DEPTH_STENCIL_ATTACHMENT,
- GL_TEXTURE_2D, 0, 0);
- pg->zeta_binding = NULL;
- }
- }
-}
-
-static void pgraph_update_surface(NV2AState *d, bool upload,
- bool color_write, bool zeta_write)
-{
- PGRAPHState *pg = &d->pgraph;
-
- pg->surface_shape.z_format = GET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
- NV_PGRAPH_SETUPRASTER_Z_FORMAT);
-
- color_write = color_write &&
- (pg->clearing || pgraph_color_write_enabled(pg));
- zeta_write = zeta_write && (pg->clearing || pgraph_zeta_write_enabled(pg));
-
- if (upload) {
- bool fb_dirty = pgraph_framebuffer_dirty(pg);
- if (fb_dirty) {
- memcpy(&pg->last_surface_shape, &pg->surface_shape,
- sizeof(SurfaceShape));
- pg->surface_color.buffer_dirty = true;
- pg->surface_zeta.buffer_dirty = true;
- }
-
- if (pg->surface_color.buffer_dirty) {
- pgraph_unbind_surface(d, true);
- }
-
- if (color_write) {
- pgraph_update_surface_part(d, true, true);
- }
-
- if (pg->surface_zeta.buffer_dirty) {
- pgraph_unbind_surface(d, false);
- }
-
- if (zeta_write) {
- pgraph_update_surface_part(d, true, false);
- }
- } else {
- if ((color_write || pg->surface_color.write_enabled_cache)
- && pg->surface_color.draw_dirty) {
- pgraph_update_surface_part(d, false, true);
- }
- if ((zeta_write || pg->surface_zeta.write_enabled_cache)
- && pg->surface_zeta.draw_dirty) {
- pgraph_update_surface_part(d, false, false);
- }
- }
-
- if (upload) {
- pg->draw_time++;
- }
-
- bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE);
-
- if (pg->color_binding) {
- pg->color_binding->frame_time = pg->frame_time;
- if (upload) {
- pgraph_upload_surface_data(d, pg->color_binding, false);
- pg->color_binding->draw_time = pg->draw_time;
- pg->color_binding->swizzle = swizzle;
- }
- }
-
- if (pg->zeta_binding) {
- pg->zeta_binding->frame_time = pg->frame_time;
- if (upload) {
- pgraph_upload_surface_data(d, pg->zeta_binding, false);
- pg->zeta_binding->draw_time = pg->draw_time;
- pg->zeta_binding->swizzle = swizzle;
- }
- }
-
- // Sanity check color and zeta dimensions match
- if (pg->color_binding && pg->zeta_binding) {
- assert((pg->color_binding->width == pg->zeta_binding->width)
- && (pg->color_binding->height == pg->zeta_binding->height));
- }
-
- pgraph_surface_evict_old(d);
-}
-
-struct pgraph_texture_possibly_dirty_struct {
- hwaddr addr, end;
-};
-
-static void pgraph_mark_textures_possibly_dirty_visitor(Lru *lru, LruNode *node, void *opaque)
-{
- struct pgraph_texture_possibly_dirty_struct *test =
- (struct pgraph_texture_possibly_dirty_struct *)opaque;
-
- struct TextureLruNode *tnode = container_of(node, TextureLruNode, node);
- if (tnode->binding == NULL || tnode->possibly_dirty) {
- return;
- }
-
- uintptr_t k_tex_addr = tnode->key.texture_vram_offset;
- uintptr_t k_tex_end = k_tex_addr + tnode->key.texture_length - 1;
- bool overlapping = !(test->addr > k_tex_end || k_tex_addr > test->end);
-
- if (tnode->key.palette_length > 0) {
- uintptr_t k_pal_addr = tnode->key.palette_vram_offset;
- uintptr_t k_pal_end = k_pal_addr + tnode->key.palette_length - 1;
- overlapping |= !(test->addr > k_pal_end || k_pal_addr > test->end);
- }
-
- tnode->possibly_dirty |= overlapping;
-}
-
-
-static void pgraph_mark_textures_possibly_dirty(NV2AState *d,
- hwaddr addr, hwaddr size)
-{
- hwaddr end = TARGET_PAGE_ALIGN(addr + size) - 1;
- addr &= TARGET_PAGE_MASK;
- assert(end <= memory_region_size(d->vram));
-
- struct pgraph_texture_possibly_dirty_struct test = {
- .addr = addr,
- .end = end,
- };
-
- lru_visit_active(&d->pgraph.texture_cache,
- pgraph_mark_textures_possibly_dirty_visitor,
- &test);
-}
-
-static bool pgraph_check_texture_dirty(NV2AState *d, hwaddr addr, hwaddr size)
-{
- hwaddr end = TARGET_PAGE_ALIGN(addr + size);
- addr &= TARGET_PAGE_MASK;
- assert(end < memory_region_size(d->vram));
- return memory_region_test_and_clear_dirty(d->vram, addr, end - addr,
- DIRTY_MEMORY_NV2A_TEX);
-}
-
-static bool pgraph_is_texture_stage_active(PGRAPHState *pg, unsigned int stage)
-{
- assert(stage < NV2A_MAX_TEXTURES);
- uint32_t mode = (pg->regs[NV_PGRAPH_SHADERPROG] >> (stage * 5)) & 0x1F;
- return !!mode;
-}
-
-// Check if any of the pages spanned by the a texture are dirty.
-static bool pgraph_check_texture_possibly_dirty(NV2AState *d, hwaddr texture_vram_offset, unsigned int length, hwaddr palette_vram_offset, unsigned int palette_length)
-{
- bool possibly_dirty = false;
- if (pgraph_check_texture_dirty(d, texture_vram_offset, length)) {
- possibly_dirty = true;
- pgraph_mark_textures_possibly_dirty(d, texture_vram_offset, length);
- }
- if (palette_length && pgraph_check_texture_dirty(d, palette_vram_offset,
- palette_length)) {
- possibly_dirty = true;
- pgraph_mark_textures_possibly_dirty(d, palette_vram_offset,
- palette_length);
- }
- return possibly_dirty;
-}
-
-static void apply_texture_parameters(TextureBinding *binding,
- const ColorFormatInfo *f,
- unsigned int dimensionality,
- unsigned int filter,
- unsigned int address,
- bool is_bordered,
- uint32_t border_color)
-{
- unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN);
- unsigned int mag_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG);
- unsigned int addru = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRU);
- unsigned int addrv = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRV);
- unsigned int addrp = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRP);
-
- if (f->linear) {
- /* somtimes games try to set mipmap min filters on linear textures.
- * this could indicate a bug... */
- switch (min_filter) {
- case NV_PGRAPH_TEXFILTER0_MIN_BOX_NEARESTLOD:
- case NV_PGRAPH_TEXFILTER0_MIN_BOX_TENT_LOD:
- min_filter = NV_PGRAPH_TEXFILTER0_MIN_BOX_LOD0;
- break;
- case NV_PGRAPH_TEXFILTER0_MIN_TENT_NEARESTLOD:
- case NV_PGRAPH_TEXFILTER0_MIN_TENT_TENT_LOD:
- min_filter = NV_PGRAPH_TEXFILTER0_MIN_TENT_LOD0;
- break;
- }
- }
-
- if (min_filter != binding->min_filter) {
- glTexParameteri(binding->gl_target, GL_TEXTURE_MIN_FILTER,
- pgraph_texture_min_filter_map[min_filter]);
- binding->min_filter = min_filter;
- }
- if (mag_filter != binding->mag_filter) {
- glTexParameteri(binding->gl_target, GL_TEXTURE_MAG_FILTER,
- pgraph_texture_mag_filter_map[mag_filter]);
- binding->mag_filter = mag_filter;
- }
-
- /* Texture wrapping */
- assert(addru < ARRAY_SIZE(pgraph_texture_addr_map));
- if (addru != binding->addru) {
- glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_S,
- pgraph_texture_addr_map[addru]);
- binding->addru = addru;
- }
- bool needs_border_color = binding->addru == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER;
- if (dimensionality > 1) {
- if (addrv != binding->addrv) {
- assert(addrv < ARRAY_SIZE(pgraph_texture_addr_map));
- glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_T,
- pgraph_texture_addr_map[addrv]);
- binding->addrv = addrv;
- }
- needs_border_color = needs_border_color || binding->addrv == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER;
- }
- if (dimensionality > 2) {
- if (addrp != binding->addrp) {
- assert(addrp < ARRAY_SIZE(pgraph_texture_addr_map));
- glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_R,
- pgraph_texture_addr_map[addrp]);
- binding->addrp = addrp;
- }
- needs_border_color = needs_border_color || binding->addrp == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER;
- }
-
- if (!is_bordered && needs_border_color) {
- if (!binding->border_color_set || binding->border_color != border_color) {
- GLfloat gl_border_color[] = {
- /* FIXME: Color channels might be wrong order */
- ((border_color >> 16) & 0xFF) / 255.0f, /* red */
- ((border_color >> 8) & 0xFF) / 255.0f, /* green */
- (border_color & 0xFF) / 255.0f, /* blue */
- ((border_color >> 24) & 0xFF) / 255.0f /* alpha */
- };
- glTexParameterfv(binding->gl_target, GL_TEXTURE_BORDER_COLOR,
- gl_border_color);
-
- binding->border_color_set = true;
- binding->border_color = border_color;
- }
- }
-}
-
-static void pgraph_bind_textures(NV2AState *d)
-{
- int i;
- PGRAPHState *pg = &d->pgraph;
-
- NV2A_GL_DGROUP_BEGIN("%s", __func__);
-
- for (i=0; iregs[NV_PGRAPH_TEXCTL0_0 + i*4];
- bool enabled = pgraph_is_texture_stage_active(pg, i) &&
- GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_ENABLE);
- /* FIXME: What happens if texture is disabled but stage is active? */
-
- glActiveTexture(GL_TEXTURE0 + i);
- if (!enabled) {
- glBindTexture(GL_TEXTURE_CUBE_MAP, 0);
- glBindTexture(GL_TEXTURE_RECTANGLE, 0);
- glBindTexture(GL_TEXTURE_1D, 0);
- glBindTexture(GL_TEXTURE_2D, 0);
- glBindTexture(GL_TEXTURE_3D, 0);
- continue;
- }
-
- uint32_t ctl_1 = pg->regs[NV_PGRAPH_TEXCTL1_0 + i*4];
- uint32_t fmt = pg->regs[NV_PGRAPH_TEXFMT0 + i*4];
- uint32_t filter = pg->regs[NV_PGRAPH_TEXFILTER0 + i*4];
- uint32_t address = pg->regs[NV_PGRAPH_TEXADDRESS0 + i*4];
- uint32_t palette = pg->regs[NV_PGRAPH_TEXPALETTE0 + i*4];
-
- unsigned int min_mipmap_level =
- GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_MIN_LOD_CLAMP);
- unsigned int max_mipmap_level =
- GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_MAX_LOD_CLAMP);
-
- unsigned int pitch =
- GET_MASK(ctl_1, NV_PGRAPH_TEXCTL1_0_IMAGE_PITCH);
-
- unsigned int dma_select =
- GET_MASK(fmt, NV_PGRAPH_TEXFMT0_CONTEXT_DMA);
- bool cubemap =
- GET_MASK(fmt, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE);
- unsigned int dimensionality =
- GET_MASK(fmt, NV_PGRAPH_TEXFMT0_DIMENSIONALITY);
- unsigned int color_format = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_COLOR);
- unsigned int levels = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_MIPMAP_LEVELS);
- unsigned int log_width = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_U);
- unsigned int log_height = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_V);
- unsigned int log_depth = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_P);
-
- unsigned int rect_width =
- GET_MASK(pg->regs[NV_PGRAPH_TEXIMAGERECT0 + i*4],
- NV_PGRAPH_TEXIMAGERECT0_WIDTH);
- unsigned int rect_height =
- GET_MASK(pg->regs[NV_PGRAPH_TEXIMAGERECT0 + i*4],
- NV_PGRAPH_TEXIMAGERECT0_HEIGHT);
-#ifdef DEBUG_NV2A
- unsigned int lod_bias =
- GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIPMAP_LOD_BIAS);
-#endif
- unsigned int border_source = GET_MASK(fmt,
- NV_PGRAPH_TEXFMT0_BORDER_SOURCE);
- uint32_t border_color = pg->regs[NV_PGRAPH_BORDERCOLOR0 + i*4];
-
- hwaddr offset = pg->regs[NV_PGRAPH_TEXOFFSET0 + i*4];
-
- bool palette_dma_select =
- GET_MASK(palette, NV_PGRAPH_TEXPALETTE0_CONTEXT_DMA);
- unsigned int palette_length_index =
- GET_MASK(palette, NV_PGRAPH_TEXPALETTE0_LENGTH);
- unsigned int palette_offset =
- palette & NV_PGRAPH_TEXPALETTE0_OFFSET;
-
- unsigned int palette_length = 0;
- switch (palette_length_index) {
- case NV_PGRAPH_TEXPALETTE0_LENGTH_256: palette_length = 256; break;
- case NV_PGRAPH_TEXPALETTE0_LENGTH_128: palette_length = 128; break;
- case NV_PGRAPH_TEXPALETTE0_LENGTH_64: palette_length = 64; break;
- case NV_PGRAPH_TEXPALETTE0_LENGTH_32: palette_length = 32; break;
- default: assert(false); break;
- }
-
- /* Check for unsupported features */
- if (filter & NV_PGRAPH_TEXFILTER0_ASIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_ASIGNED");
- if (filter & NV_PGRAPH_TEXFILTER0_RSIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_RSIGNED");
- if (filter & NV_PGRAPH_TEXFILTER0_GSIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_GSIGNED");
- if (filter & NV_PGRAPH_TEXFILTER0_BSIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_BSIGNED");
-
- nv2a_profile_inc_counter(NV2A_PROF_TEX_BIND);
-
- hwaddr dma_len;
- uint8_t *texture_data;
- if (dma_select) {
- texture_data = (uint8_t*)nv_dma_map(d, pg->dma_b, &dma_len);
- } else {
- texture_data = (uint8_t*)nv_dma_map(d, pg->dma_a, &dma_len);
- }
- assert(offset < dma_len);
- texture_data += offset;
- hwaddr texture_vram_offset = texture_data - d->vram_ptr;
-
- hwaddr palette_dma_len;
- uint8_t *palette_data;
- if (palette_dma_select) {
- palette_data = (uint8_t*)nv_dma_map(d, pg->dma_b, &palette_dma_len);
- } else {
- palette_data = (uint8_t*)nv_dma_map(d, pg->dma_a, &palette_dma_len);
- }
- assert(palette_offset < palette_dma_len);
- palette_data += palette_offset;
- hwaddr palette_vram_offset = palette_data - d->vram_ptr;
-
- NV2A_DPRINTF(" texture %d is format 0x%x, "
- "off 0x%" HWADDR_PRIx " (r %d, %d or %d, %d, %d; %d%s),"
- " filter %x %x, levels %d-%d %d bias %d\n",
- i, color_format, offset,
- rect_width, rect_height,
- 1 << log_width, 1 << log_height, 1 << log_depth,
- pitch,
- cubemap ? "; cubemap" : "",
- GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN),
- GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG),
- min_mipmap_level, max_mipmap_level, levels,
- lod_bias);
-
- assert(color_format < ARRAY_SIZE(kelvin_color_format_map));
- ColorFormatInfo f = kelvin_color_format_map[color_format];
- if (f.bytes_per_pixel == 0) {
- fprintf(stderr, "nv2a: unimplemented texture color format 0x%x\n",
- color_format);
- abort();
- }
-
- unsigned int width, height, depth;
- if (f.linear) {
- assert(dimensionality == 2);
- width = rect_width;
- height = rect_height;
- depth = 1;
- } else {
- width = 1 << log_width;
- height = 1 << log_height;
- depth = 1 << log_depth;
- pitch = 0;
-
- levels = MIN(levels, max_mipmap_level + 1);
-
- /* Discard mipmap levels that would be smaller than 1x1.
- * FIXME: Is this actually needed?
- *
- * >> Level 0: 32 x 4
- * Level 1: 16 x 2
- * Level 2: 8 x 1
- * Level 3: 4 x 1
- * Level 4: 2 x 1
- * Level 5: 1 x 1
- */
- levels = MIN(levels, MAX(log_width, log_height) + 1);
- assert(levels > 0);
-
- if (dimensionality == 3) {
- /* FIXME: What about 3D mipmaps? */
- if (log_width < 2 || log_height < 2) {
- /* Base level is smaller than 4x4... */
- levels = 1;
- } else {
- levels = MIN(levels, MIN(log_width, log_height) - 1);
- }
- }
- min_mipmap_level = MIN(levels-1, min_mipmap_level);
- max_mipmap_level = MIN(levels-1, max_mipmap_level);
- }
-
- size_t length = 0;
- if (f.linear) {
- assert(cubemap == false);
- assert(dimensionality == 2);
- length = height * pitch;
- } else {
- if (dimensionality >= 2) {
- unsigned int w = width, h = height;
- int level;
- if (f.gl_format != 0) {
- for (level = 0; level < levels; level++) {
- w = MAX(w, 1);
- h = MAX(h, 1);
- length += w * h * f.bytes_per_pixel;
- w /= 2;
- h /= 2;
- }
- } else {
- /* Compressed textures are a bit different */
- unsigned int block_size =
- f.gl_internal_format ==
- GL_COMPRESSED_RGBA_S3TC_DXT1_EXT ?
- 8 : 16;
- for (level = 0; level < levels; level++) {
- w = MAX(w, 1);
- h = MAX(h, 1);
- unsigned int phys_w = (w + 3) & ~3,
- phys_h = (h + 3) & ~3;
- length += phys_w/4 * phys_h/4 * block_size;
- w /= 2;
- h /= 2;
- }
- }
- if (cubemap) {
- assert(dimensionality == 2);
- length = (length + NV2A_CUBEMAP_FACE_ALIGNMENT - 1) & ~(NV2A_CUBEMAP_FACE_ALIGNMENT - 1);
- length *= 6;
- }
- if (dimensionality >= 3) {
- length *= depth;
- }
- }
- }
-
- bool is_bordered = border_source != NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR;
-
- assert((texture_vram_offset + length) < memory_region_size(d->vram));
- assert((palette_vram_offset + palette_length)
- < memory_region_size(d->vram));
- bool is_indexed = (color_format ==
- NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8);
- bool possibly_dirty = false;
- bool possibly_dirty_checked = false;
-
- SurfaceBinding *surface = pgraph_surface_get(d, texture_vram_offset);
- TextureBinding *tbind = pg->texture_binding[i];
- if (!pg->texture_dirty[i] && tbind) {
- bool reusable = false;
- if (surface && tbind->draw_time == surface->draw_time) {
- reusable = true;
- } else if (!surface) {
- possibly_dirty = pgraph_check_texture_possibly_dirty(
- d,
- texture_vram_offset,
- length,
- palette_vram_offset,
- is_indexed ? palette_length : 0);
- possibly_dirty_checked = true;
- reusable = !possibly_dirty;
- }
-
- if (reusable) {
- glBindTexture(pg->texture_binding[i]->gl_target,
- pg->texture_binding[i]->gl_texture);
- apply_texture_parameters(pg->texture_binding[i],
- &f,
- dimensionality,
- filter,
- address,
- is_bordered,
- border_color);
- continue;
- }
- }
-
- TextureShape state;
- memset(&state, 0, sizeof(TextureShape));
- state.cubemap = cubemap;
- state.dimensionality = dimensionality;
- state.color_format = color_format;
- state.levels = levels;
- state.width = width;
- state.height = height;
- state.depth = depth;
- state.min_mipmap_level = min_mipmap_level;
- state.max_mipmap_level = max_mipmap_level;
- state.pitch = pitch;
- state.border = is_bordered;
-
- /*
- * Check active surfaces to see if this texture was a render target
- */
- bool surf_to_tex = false;
- if (surface != NULL) {
- surf_to_tex = pgraph_check_surface_to_texture_compatibility(
- surface, &state);
-
- if (surf_to_tex && surface->upload_pending) {
- pgraph_upload_surface_data(d, surface, false);
- }
- }
-
- if (!surf_to_tex) {
- // FIXME: Restructure to support rendering surfaces to cubemap faces
-
- // Writeback any surfaces which this texture may index
- hwaddr tex_vram_end = texture_vram_offset + length - 1;
- QTAILQ_FOREACH(surface, &d->pgraph.surfaces, entry) {
- hwaddr surf_vram_end = surface->vram_addr + surface->size - 1;
- bool overlapping = !(surface->vram_addr >= tex_vram_end
- || texture_vram_offset >= surf_vram_end);
- if (overlapping) {
- pgraph_download_surface_data_if_dirty(d, surface);
- }
- }
- }
-
- TextureKey key;
- memset(&key, 0, sizeof(TextureKey));
- key.state = state;
- key.texture_vram_offset = texture_vram_offset;
- key.texture_length = length;
- if (is_indexed) {
- key.palette_vram_offset = palette_vram_offset;
- key.palette_length = palette_length;
- }
-
- // Search for existing texture binding in cache
- uint64_t tex_binding_hash = fast_hash((uint8_t*)&key, sizeof(key));
- LruNode *found = lru_lookup(&pg->texture_cache,
- tex_binding_hash, &key);
- TextureLruNode *key_out = container_of(found, TextureLruNode, node);
- possibly_dirty |= (key_out->binding == NULL) || key_out->possibly_dirty;
-
- if (!surf_to_tex && !possibly_dirty_checked) {
- possibly_dirty |= pgraph_check_texture_possibly_dirty(
- d,
- texture_vram_offset,
- length,
- palette_vram_offset,
- is_indexed ? palette_length : 0);
- }
-
- // Calculate hash of texture data, if necessary
- uint64_t tex_data_hash = 0;
- if (!surf_to_tex && possibly_dirty) {
- tex_data_hash = fast_hash(texture_data, length);
- if (is_indexed) {
- tex_data_hash ^= fast_hash(palette_data, palette_length);
- }
- }
-
- // Free existing binding, if texture data has changed
- bool must_destroy = (key_out->binding != NULL)
- && possibly_dirty
- && (key_out->binding->data_hash != tex_data_hash);
- if (must_destroy) {
- texture_binding_destroy(key_out->binding);
- key_out->binding = NULL;
- }
-
- if (key_out->binding == NULL) {
- // Must create the texture
- key_out->binding = generate_texture(state, texture_data, palette_data);
- key_out->binding->data_hash = tex_data_hash;
- key_out->binding->scale = 1;
- } else {
- // Saved an upload! Reuse existing texture in graphics memory.
- glBindTexture(key_out->binding->gl_target,
- key_out->binding->gl_texture);
- }
-
- key_out->possibly_dirty = false;
- TextureBinding *binding = key_out->binding;
- binding->refcnt++;
-
- if (surf_to_tex && binding->draw_time < surface->draw_time) {
-
- trace_nv2a_pgraph_surface_render_to_texture(
- surface->vram_addr, surface->width, surface->height);
- pgraph_render_surface_to_texture(d, surface, binding, &state, i);
- binding->draw_time = surface->draw_time;
- if (binding->gl_target == GL_TEXTURE_RECTANGLE) {
- binding->scale = pg->surface_scale_factor;
- } else {
- binding->scale = 1;
- }
- }
-
- apply_texture_parameters(binding,
- &f,
- dimensionality,
- filter,
- address,
- is_bordered,
- border_color);
-
- if (pg->texture_binding[i]) {
- if (pg->texture_binding[i]->gl_target != binding->gl_target) {
- glBindTexture(pg->texture_binding[i]->gl_target, 0);
- }
- texture_binding_destroy(pg->texture_binding[i]);
- }
- pg->texture_binding[i] = binding;
- pg->texture_dirty[i] = false;
- }
- NV2A_GL_DGROUP_END();
-}
-
-static void pgraph_apply_anti_aliasing_factor(PGRAPHState *pg,
- unsigned int *width,
- unsigned int *height)
-{
- switch (pg->surface_shape.anti_aliasing) {
- case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_1:
- break;
- case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_CORNER_2:
- if (width) { *width *= 2; }
- break;
- case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_SQUARE_OFFSET_4:
- if (width) { *width *= 2; }
- if (height) { *height *= 2; }
- break;
- default:
- assert(false);
- break;
- }
-}
-
-static void pgraph_apply_scaling_factor(PGRAPHState *pg,
- unsigned int *width,
- unsigned int *height)
-{
- *width *= pg->surface_scale_factor;
- *height *= pg->surface_scale_factor;
-}
-
-static void pgraph_get_surface_dimensions(PGRAPHState *pg,
- unsigned int *width,
- unsigned int *height)
-{
- bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE);
- if (swizzle) {
- *width = 1 << pg->surface_shape.log_width;
- *height = 1 << pg->surface_shape.log_height;
- } else {
- *width = pg->surface_shape.clip_width;
- *height = pg->surface_shape.clip_height;
- }
-}
-
-static void pgraph_update_memory_buffer(NV2AState *d, hwaddr addr, hwaddr size,
- bool quick)
-{
- glBindBuffer(GL_ARRAY_BUFFER, d->pgraph.gl_memory_buffer);
-
- hwaddr end = TARGET_PAGE_ALIGN(addr + size);
- addr &= TARGET_PAGE_MASK;
- assert(end < memory_region_size(d->vram));
-
- static hwaddr last_addr, last_end;
- if (quick && (addr >= last_addr) && (end <= last_end)) {
- return;
- }
- last_addr = addr;
- last_end = end;
-
- size = end - addr;
- if (memory_region_test_and_clear_dirty(d->vram, addr, size,
- DIRTY_MEMORY_NV2A)) {
- glBufferSubData(GL_ARRAY_BUFFER, addr, size,
- d->vram_ptr + addr);
- nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_1);
- }
-}
-
-static void pgraph_update_inline_value(VertexAttribute *attr,
- const uint8_t *data)
-{
- assert(attr->count <= 4);
- attr->inline_value[0] = 0.0f;
- attr->inline_value[1] = 0.0f;
- attr->inline_value[2] = 0.0f;
- attr->inline_value[3] = 1.0f;
-
- switch (attr->format) {
- case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D:
- case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL:
- for (uint32_t i = 0; i < attr->count; ++i) {
- attr->inline_value[i] = (float)data[i] / 255.0f;
- }
- break;
- case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1: {
- const int16_t *val = (const int16_t *) data;
- for (uint32_t i = 0; i < attr->count; ++i, ++val) {
- attr->inline_value[i] = MAX(-1.0f, (float) *val / 32767.0f);
- }
- break;
- }
- case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F:
- memcpy(attr->inline_value, data, attr->size * attr->count);
- break;
- case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K: {
- const int16_t *val = (const int16_t *) data;
- for (uint32_t i = 0; i < attr->count; ++i, ++val) {
- attr->inline_value[i] = (float)*val;
- }
- break;
- }
- case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP: {
- /* 3 signed, normalized components packed in 32-bits. (11,11,10) */
- const int32_t val = *(const int32_t *)data;
- int32_t x = val & 0x7FF;
- if (x & 0x400) {
- x |= 0xFFFFF800;
- }
- int32_t y = (val >> 11) & 0x7FF;
- if (y & 0x400) {
- y |= 0xFFFFF800;
- }
- int32_t z = (val >> 22) & 0x7FF;
- if (z & 0x200) {
- z |= 0xFFFFFC00;
- }
-
- attr->inline_value[0] = MAX(-1.0f, (float)x / 1023.0f);
- attr->inline_value[1] = MAX(-1.0f, (float)y / 1023.0f);
- attr->inline_value[2] = MAX(-1.0f, (float)z / 511.0f);
- break;
- }
- default:
- fprintf(stderr, "Unknown vertex attribute type: 0x%x for format 0x%x\n",
- attr->gl_type, attr->format);
- assert(!"Unsupported attribute type");
- break;
- }
-}
-
-static void pgraph_bind_vertex_attributes(NV2AState *d,
- unsigned int min_element,
- unsigned int max_element,
- bool inline_data,
- unsigned int inline_stride,
- unsigned int provoking_element)
-{
- PGRAPHState *pg = &d->pgraph;
- bool updated_memory_buffer = false;
- unsigned int num_elements = max_element - min_element + 1;
-
- if (inline_data) {
- NV2A_GL_DGROUP_BEGIN("%s (num_elements: %d inline stride: %d)",
- __func__, num_elements, inline_stride);
- } else {
- NV2A_GL_DGROUP_BEGIN("%s (num_elements: %d)", __func__, num_elements);
- }
-
- pg->compressed_attrs = 0;
-
- for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
- VertexAttribute *attr = &pg->vertex_attributes[i];
-
- if (!attr->count) {
- glDisableVertexAttribArray(i);
- glVertexAttrib4fv(i, attr->inline_value);
- continue;
- }
-
- nv2a_profile_inc_counter(NV2A_PROF_ATTR_BIND);
- hwaddr attrib_data_addr;
- size_t stride;
-
- if (attr->needs_conversion) {
- pg->compressed_attrs |= (1 << i);
- }
-
- hwaddr start = 0;
- if (inline_data) {
- glBindBuffer(GL_ARRAY_BUFFER, pg->gl_inline_array_buffer);
- attrib_data_addr = attr->inline_array_offset;
- stride = inline_stride;
- } else {
- hwaddr dma_len;
- uint8_t *attr_data = (uint8_t *)nv_dma_map(
- d, attr->dma_select ? pg->dma_vertex_b : pg->dma_vertex_a,
- &dma_len);
- assert(attr->offset < dma_len);
- attrib_data_addr = attr_data + attr->offset - d->vram_ptr;
- stride = attr->stride;
- start = attrib_data_addr + min_element * stride;
- pgraph_update_memory_buffer(d, start, num_elements * stride,
- updated_memory_buffer);
- updated_memory_buffer = true;
- }
-
- uint32_t provoking_element_index = provoking_element - min_element;
- size_t element_size = attr->size * attr->count;
- assert(element_size <= sizeof(attr->inline_value));
- const uint8_t *last_entry;
-
- if (inline_data) {
- last_entry = (uint8_t*)pg->inline_array + attr->inline_array_offset;
- } else {
- last_entry = d->vram_ptr + start;
- }
- if (!stride) {
- // Stride of 0 indicates that only the first element should be
- // used.
- pgraph_update_inline_value(attr, last_entry);
- glDisableVertexAttribArray(i);
- glVertexAttrib4fv(i, attr->inline_value);
- continue;
- }
-
- if (attr->needs_conversion) {
- glVertexAttribIPointer(i, attr->gl_count, attr->gl_type, stride,
- (void *)attrib_data_addr);
- } else {
- glVertexAttribPointer(i, attr->gl_count, attr->gl_type,
- attr->gl_normalize, stride,
- (void *)attrib_data_addr);
- }
-
- glEnableVertexAttribArray(i);
- last_entry += stride * provoking_element_index;
- pgraph_update_inline_value(attr, last_entry);
- }
-
- NV2A_GL_DGROUP_END();
-}
-
-static unsigned int pgraph_bind_inline_array(NV2AState *d)
-{
- PGRAPHState *pg = &d->pgraph;
-
- unsigned int offset = 0;
- for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
- VertexAttribute *attr = &pg->vertex_attributes[i];
- if (attr->count == 0) {
- continue;
- }
-
- /* FIXME: Double check */
- offset = ROUND_UP(offset, attr->size);
- attr->inline_array_offset = offset;
- NV2A_DPRINTF("bind inline attribute %d size=%d, count=%d\n",
- i, attr->size, attr->count);
- offset += attr->size * attr->count;
- offset = ROUND_UP(offset, attr->size);
- }
-
- unsigned int vertex_size = offset;
- unsigned int index_count = pg->inline_array_length*4 / vertex_size;
-
- NV2A_DPRINTF("draw inline array %d, %d\n", vertex_size, index_count);
-
- nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_2);
- glBindBuffer(GL_ARRAY_BUFFER, pg->gl_inline_array_buffer);
- glBufferData(GL_ARRAY_BUFFER, NV2A_MAX_BATCH_LENGTH * sizeof(uint32_t),
- NULL, GL_STREAM_DRAW);
- glBufferSubData(GL_ARRAY_BUFFER, 0, index_count * vertex_size, pg->inline_array);
- pgraph_bind_vertex_attributes(d, 0, index_count-1, true, vertex_size,
- index_count-1);
-
- return index_count;
-}
-
-/* 16 bit to [0.0, F16_MAX = 511.9375] */
-static float convert_f16_to_float(uint16_t f16) {
- if (f16 == 0x0000) { return 0.0; }
- uint32_t i = (f16 << 11) + 0x3C000000;
- return *(float*)&i;
-}
-
-/* 24 bit to [0.0, F24_MAX] */
-static float convert_f24_to_float(uint32_t f24) {
- assert(!(f24 >> 24));
- f24 &= 0xFFFFFF;
- if (f24 == 0x000000) { return 0.0; }
- uint32_t i = f24 << 7;
- return *(float*)&i;
-}
-
-static uint8_t cliptobyte(int x)
-{
- return (uint8_t)((x < 0) ? 0 : ((x > 255) ? 255 : x));
-}
-
-static void convert_yuy2_to_rgb(const uint8_t *line, unsigned int ix,
- uint8_t *r, uint8_t *g, uint8_t* b) {
- int c, d, e;
- c = (int)line[ix * 2] - 16;
- if (ix % 2) {
- d = (int)line[ix * 2 - 1] - 128;
- e = (int)line[ix * 2 + 1] - 128;
- } else {
- d = (int)line[ix * 2 + 1] - 128;
- e = (int)line[ix * 2 + 3] - 128;
- }
- *r = cliptobyte((298 * c + 409 * e + 128) >> 8);
- *g = cliptobyte((298 * c - 100 * d - 208 * e + 128) >> 8);
- *b = cliptobyte((298 * c + 516 * d + 128) >> 8);
-}
-
-static void convert_uyvy_to_rgb(const uint8_t *line, unsigned int ix,
- uint8_t *r, uint8_t *g, uint8_t* b) {
- int c, d, e;
- c = (int)line[ix * 2 + 1] - 16;
- if (ix % 2) {
- d = (int)line[ix * 2 - 2] - 128;
- e = (int)line[ix * 2 + 0] - 128;
- } else {
- d = (int)line[ix * 2 + 0] - 128;
- e = (int)line[ix * 2 + 2] - 128;
- }
- *r = cliptobyte((298 * c + 409 * e + 128) >> 8);
- *g = cliptobyte((298 * c - 100 * d - 208 * e + 128) >> 8);
- *b = cliptobyte((298 * c + 516 * d + 128) >> 8);
-}
-
-static uint8_t* convert_texture_data(const TextureShape s,
- const uint8_t *data,
- const uint8_t *palette_data,
- unsigned int width,
- unsigned int height,
- unsigned int depth,
- unsigned int row_pitch,
- unsigned int slice_pitch)
-{
- if (s.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8) {
- uint8_t* converted_data = (uint8_t*)g_malloc(width * height * depth * 4);
- int x, y, z;
- const uint8_t* src = data;
- uint32_t* dst = (uint32_t*)converted_data;
- for (z = 0; z < depth; z++) {
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x++) {
- uint8_t index = src[y * row_pitch + x];
- uint32_t color = *(uint32_t * )(palette_data + index * 4);
- *dst++ = color;
- }
- }
- src += slice_pitch;
- }
- return converted_data;
- } else if (s.color_format
- == NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8 ||
- s.color_format
- == NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8) {
- // TODO: Investigate whether a non-1 depth is possible.
- // Generally the hardware asserts when attempting to use volumetric
- // textures in linear formats.
- assert(depth == 1); /* FIXME */
- // FIXME: only valid if control0 register allows for colorspace conversion
- uint8_t* converted_data = (uint8_t*)g_malloc(width * height * 4);
- int x, y;
- uint8_t* pixel = converted_data;
- for (y = 0; y < height; y++) {
- const uint8_t* line = &data[y * row_pitch * depth];
- for (x = 0; x < width; x++, pixel += 4) {
- if (s.color_format
- == NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8) {
- convert_yuy2_to_rgb(line, x, &pixel[0], &pixel[1], &pixel[2]);
- } else {
- convert_uyvy_to_rgb(line, x, &pixel[0], &pixel[1], &pixel[2]);
- }
- pixel[3] = 255;
- }
- }
- return converted_data;
- } else if (s.color_format
- == NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5) {
- assert(depth == 1); /* FIXME */
- uint8_t *converted_data = (uint8_t*)g_malloc(width * height * 3);
- int x, y;
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x++) {
- uint16_t rgb655 = *(uint16_t*)(data + y * row_pitch + x * 2);
- int8_t *pixel = (int8_t*)&converted_data[(y * width + x) * 3];
- /* Maps 5 bit G and B signed value range to 8 bit
- * signed values. R is probably unsigned.
- */
- rgb655 ^= (1 << 9) | (1 << 4);
- pixel[0] = ((rgb655 & 0xFC00) >> 10) * 0x7F / 0x3F;
- pixel[1] = ((rgb655 & 0x03E0) >> 5) * 0xFF / 0x1F - 0x80;
- pixel[2] = (rgb655 & 0x001F) * 0xFF / 0x1F - 0x80;
- }
- }
- return converted_data;
- } else {
- return NULL;
- }
-}
-
-static void upload_gl_texture(GLenum gl_target,
- const TextureShape s,
- const uint8_t *texture_data,
- const uint8_t *palette_data)
-{
- ColorFormatInfo f = kelvin_color_format_map[s.color_format];
- nv2a_profile_inc_counter(NV2A_PROF_TEX_UPLOAD);
-
- unsigned int adjusted_width = s.width;
- unsigned int adjusted_height = s.height;
- unsigned int adjusted_pitch = s.pitch;
- unsigned int adjusted_depth = s.depth;
- if (!f.linear && s.border) {
- adjusted_width = MAX(16, adjusted_width * 2);
- adjusted_height = MAX(16, adjusted_height * 2);
- adjusted_pitch = adjusted_width * (s.pitch / s.width);
- adjusted_depth = MAX(16, s.depth * 2);
- }
-
- switch(gl_target) {
- case GL_TEXTURE_1D:
- assert(false);
- break;
- case GL_TEXTURE_RECTANGLE: {
- /* Can't handle strides unaligned to pixels */
- assert(s.pitch % f.bytes_per_pixel == 0);
-
- uint8_t *converted = convert_texture_data(s, texture_data,
- palette_data,
- adjusted_width,
- adjusted_height, 1,
- adjusted_pitch, 0);
- glPixelStorei(GL_UNPACK_ROW_LENGTH,
- converted ? 0 : adjusted_pitch / f.bytes_per_pixel);
- glTexImage2D(gl_target, 0, f.gl_internal_format,
- adjusted_width, adjusted_height, 0,
- f.gl_format, f.gl_type,
- converted ? converted : texture_data);
-
- if (converted) {
- g_free(converted);
- }
-
- glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
- break;
- }
- case GL_TEXTURE_2D:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
- case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
- case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: {
-
- unsigned int width = adjusted_width, height = adjusted_height;
-
- int level;
- for (level = 0; level < s.levels; level++) {
- width = MAX(width, 1);
- height = MAX(height, 1);
-
- if (f.gl_format == 0) { /* compressed */
- // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-block-compression#virtual-size-versus-physical-size
- unsigned int block_size =
- f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT ?
- 8 : 16;
- unsigned int physical_width = (width + 3) & ~3,
- physical_height = (height + 3) & ~3;
- if (physical_width != width) {
- glPixelStorei(GL_UNPACK_ROW_LENGTH, physical_width);
- }
- uint8_t *converted = decompress_2d_texture_data(
- f.gl_internal_format, texture_data, physical_width,
- physical_height);
- unsigned int tex_width = width;
- unsigned int tex_height = height;
-
- if (s.cubemap && adjusted_width != s.width) {
- // FIXME: Consider preserving the border.
- // There does not seem to be a way to reference the border
- // texels in a cubemap, so they are discarded.
- glPixelStorei(GL_UNPACK_SKIP_PIXELS, 4);
- glPixelStorei(GL_UNPACK_SKIP_ROWS, 4);
- tex_width = s.width;
- tex_height = s.height;
- if (physical_width == width) {
- glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width);
- }
- }
-
- glTexImage2D(gl_target, level, GL_RGBA, tex_width, tex_height, 0,
- GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, converted);
- g_free(converted);
- if (physical_width != width) {
- glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
- }
- if (s.cubemap && adjusted_width != s.width) {
- glPixelStorei(GL_UNPACK_SKIP_PIXELS, 0);
- glPixelStorei(GL_UNPACK_SKIP_ROWS, 0);
- if (physical_width == width) {
- glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
- }
- }
- texture_data +=
- physical_width / 4 * physical_height / 4 * block_size;
- } else {
- unsigned int pitch = width * f.bytes_per_pixel;
- uint8_t *unswizzled = (uint8_t*)g_malloc(height * pitch);
- unswizzle_rect(texture_data, width, height,
- unswizzled, pitch, f.bytes_per_pixel);
- uint8_t *converted = convert_texture_data(s, unswizzled,
- palette_data,
- width, height, 1,
- pitch, 0);
- uint8_t *pixel_data = converted ? converted : unswizzled;
- unsigned int tex_width = width;
- unsigned int tex_height = height;
-
- if (s.cubemap && adjusted_width != s.width) {
- // FIXME: Consider preserving the border.
- // There does not seem to be a way to reference the border
- // texels in a cubemap, so they are discarded.
- glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width);
- tex_width = s.width;
- tex_height = s.height;
- pixel_data += 4 * f.bytes_per_pixel + 4 * pitch;
- }
-
- glTexImage2D(gl_target, level, f.gl_internal_format, tex_width,
- tex_height, 0, f.gl_format, f.gl_type,
- pixel_data);
- if (s.cubemap && s.border) {
- glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
- }
- if (converted) {
- g_free(converted);
- }
- g_free(unswizzled);
-
- texture_data += width * height * f.bytes_per_pixel;
- }
-
- width /= 2;
- height /= 2;
- }
-
- break;
- }
- case GL_TEXTURE_3D: {
-
- unsigned int width = adjusted_width;
- unsigned int height = adjusted_height;
- unsigned int depth = adjusted_depth;
-
- assert(f.linear == false);
-
- int level;
- for (level = 0; level < s.levels; level++) {
- if (f.gl_format == 0) { /* compressed */
- assert(width % 4 == 0 && height % 4 == 0 &&
- "Compressed 3D texture virtual size");
- width = MAX(width, 4);
- height = MAX(height, 4);
- depth = MAX(depth, 1);
-
- unsigned int block_size;
- if (f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
- block_size = 8;
- } else {
- block_size = 16;
- }
-
- size_t texture_size = width/4 * height/4 * depth * block_size;
-
- uint8_t *converted = decompress_3d_texture_data(f.gl_internal_format, texture_data, width, height, depth);
-
- glTexImage3D(gl_target, level, GL_RGBA8,
- width, height, depth, 0,
- GL_RGBA, GL_UNSIGNED_INT_8_8_8_8,
- converted);
-
- g_free(converted);
-
- texture_data += texture_size;
- } else {
- width = MAX(width, 1);
- height = MAX(height, 1);
- depth = MAX(depth, 1);
-
- unsigned int row_pitch = width * f.bytes_per_pixel;
- unsigned int slice_pitch = row_pitch * height;
- uint8_t *unswizzled = (uint8_t*)g_malloc(slice_pitch * depth);
- unswizzle_box(texture_data, width, height, depth, unswizzled,
- row_pitch, slice_pitch, f.bytes_per_pixel);
-
- uint8_t *converted = convert_texture_data(s, unswizzled,
- palette_data,
- width, height, depth,
- row_pitch, slice_pitch);
-
- glTexImage3D(gl_target, level, f.gl_internal_format,
- width, height, depth, 0,
- f.gl_format, f.gl_type,
- converted ? converted : unswizzled);
-
- if (converted) {
- g_free(converted);
- }
- g_free(unswizzled);
-
- texture_data += width * height * depth * f.bytes_per_pixel;
- }
-
- width /= 2;
- height /= 2;
- depth /= 2;
- }
- break;
- }
- default:
- assert(false);
- break;
- }
-}
-
-static TextureBinding* generate_texture(const TextureShape s,
- const uint8_t *texture_data,
- const uint8_t *palette_data)
-{
- ColorFormatInfo f = kelvin_color_format_map[s.color_format];
-
- /* Create a new opengl texture */
- GLuint gl_texture;
- glGenTextures(1, &gl_texture);
-
- GLenum gl_target;
- if (s.cubemap) {
- assert(f.linear == false);
- assert(s.dimensionality == 2);
- gl_target = GL_TEXTURE_CUBE_MAP;
- } else {
- if (f.linear) {
- /* linear textures use unnormalised texcoords.
- * GL_TEXTURE_RECTANGLE_ARB conveniently also does, but
- * does not allow repeat and mirror wrap modes.
- * (or mipmapping, but xbox d3d says 'Non swizzled and non
- * compressed textures cannot be mip mapped.')
- * Not sure if that'll be an issue. */
-
- /* FIXME: GLSL 330 provides us with textureSize()! Use that? */
- gl_target = GL_TEXTURE_RECTANGLE;
- assert(s.dimensionality == 2);
- } else {
- switch(s.dimensionality) {
- case 1: gl_target = GL_TEXTURE_1D; break;
- case 2: gl_target = GL_TEXTURE_2D; break;
- case 3: gl_target = GL_TEXTURE_3D; break;
- default:
- assert(false);
- break;
- }
- }
- }
-
- glBindTexture(gl_target, gl_texture);
-
- NV2A_GL_DLABEL(GL_TEXTURE, gl_texture,
- "offset: 0x%08lx, format: 0x%02X%s, %d dimensions%s, "
- "width: %d, height: %d, depth: %d",
- texture_data - g_nv2a->vram_ptr,
- s.color_format, f.linear ? "" : " (SZ)",
- s.dimensionality, s.cubemap ? " (Cubemap)" : "",
- s.width, s.height, s.depth);
-
- if (gl_target == GL_TEXTURE_CUBE_MAP) {
-
- ColorFormatInfo f = kelvin_color_format_map[s.color_format];
- unsigned int block_size;
- if (f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
- block_size = 8;
- } else {
- block_size = 16;
- }
-
- size_t length = 0;
- unsigned int w = s.width;
- unsigned int h = s.height;
- if (!f.linear && s.border) {
- w = MAX(16, w * 2);
- h = MAX(16, h * 2);
- }
-
- int level;
- for (level = 0; level < s.levels; level++) {
- if (f.gl_format == 0) {
- length += w/4 * h/4 * block_size;
- } else {
- length += w * h * f.bytes_per_pixel;
- }
-
- w /= 2;
- h /= 2;
- }
-
- length = (length + NV2A_CUBEMAP_FACE_ALIGNMENT - 1) & ~(NV2A_CUBEMAP_FACE_ALIGNMENT - 1);
-
- upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_X,
- s, texture_data + 0 * length, palette_data);
- upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
- s, texture_data + 1 * length, palette_data);
- upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
- s, texture_data + 2 * length, palette_data);
- upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y,
- s, texture_data + 3 * length, palette_data);
- upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
- s, texture_data + 4 * length, palette_data);
- upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z,
- s, texture_data + 5 * length, palette_data);
- } else {
- upload_gl_texture(gl_target, s, texture_data, palette_data);
- }
-
- /* Linear textures don't support mipmapping */
- if (!f.linear) {
- glTexParameteri(gl_target, GL_TEXTURE_BASE_LEVEL,
- s.min_mipmap_level);
- glTexParameteri(gl_target, GL_TEXTURE_MAX_LEVEL,
- s.levels - 1);
- }
-
- if (f.gl_swizzle_mask[0] != 0 || f.gl_swizzle_mask[1] != 0
- || f.gl_swizzle_mask[2] != 0 || f.gl_swizzle_mask[3] != 0) {
- glTexParameteriv(gl_target, GL_TEXTURE_SWIZZLE_RGBA,
- (const GLint *)f.gl_swizzle_mask);
- }
-
- TextureBinding* ret = (TextureBinding *)g_malloc(sizeof(TextureBinding));
- ret->gl_target = gl_target;
- ret->gl_texture = gl_texture;
- ret->refcnt = 1;
- ret->draw_time = 0;
- ret->data_hash = 0;
- ret->min_filter = 0xFFFFFFFF;
- ret->mag_filter = 0xFFFFFFFF;
- ret->addru = 0xFFFFFFFF;
- ret->addrv = 0xFFFFFFFF;
- ret->addrp = 0xFFFFFFFF;
- ret->border_color_set = false;
- return ret;
-}
-
-static void texture_binding_destroy(gpointer data)
-{
- TextureBinding *binding = (TextureBinding *)data;
- assert(binding->refcnt > 0);
- binding->refcnt--;
- if (binding->refcnt == 0) {
- glDeleteTextures(1, &binding->gl_texture);
- g_free(binding);
- }
-}
-
-/* functions for texture LRU cache */
-static void texture_cache_entry_init(Lru *lru, LruNode *node, void *key)
-{
- TextureLruNode *tnode = container_of(node, TextureLruNode, node);
- memcpy(&tnode->key, key, sizeof(TextureKey));
-
- tnode->binding = NULL;
- tnode->possibly_dirty = false;
-}
-
-static void texture_cache_entry_post_evict(Lru *lru, LruNode *node)
-{
- TextureLruNode *tnode = container_of(node, TextureLruNode, node);
- if (tnode->binding) {
- texture_binding_destroy(tnode->binding);
- tnode->binding = NULL;
- tnode->possibly_dirty = false;
- }
-}
-
-static bool texture_cache_entry_compare(Lru *lru, LruNode *node, void *key)
-{
- TextureLruNode *tnode = container_of(node, TextureLruNode, node);
- return memcmp(&tnode->key, key, sizeof(TextureKey));
-}
-
-static unsigned int kelvin_map_stencil_op(uint32_t parameter)
-{
- unsigned int op;
- switch (parameter) {
- case NV097_SET_STENCIL_OP_V_KEEP:
- op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_KEEP; break;
- case NV097_SET_STENCIL_OP_V_ZERO:
- op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_ZERO; break;
- case NV097_SET_STENCIL_OP_V_REPLACE:
- op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_REPLACE; break;
- case NV097_SET_STENCIL_OP_V_INCRSAT:
- op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INCRSAT; break;
- case NV097_SET_STENCIL_OP_V_DECRSAT:
- op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_DECRSAT; break;
- case NV097_SET_STENCIL_OP_V_INVERT:
- op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INVERT; break;
- case NV097_SET_STENCIL_OP_V_INCR:
- op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INCR; break;
- case NV097_SET_STENCIL_OP_V_DECR:
- op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_DECR; break;
- default:
- assert(false);
- break;
- }
- return op;
-}
-
-static unsigned int kelvin_map_polygon_mode(uint32_t parameter)
-{
- unsigned int mode;
- switch (parameter) {
- case NV097_SET_FRONT_POLYGON_MODE_V_POINT:
- mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_POINT; break;
- case NV097_SET_FRONT_POLYGON_MODE_V_LINE:
- mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_LINE; break;
- case NV097_SET_FRONT_POLYGON_MODE_V_FILL:
- mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_FILL; break;
- default:
- assert(false);
- break;
- }
- return mode;
-}
-
-static unsigned int kelvin_map_texgen(uint32_t parameter, unsigned int channel)
-{
- assert(channel < 4);
- unsigned int texgen;
- switch (parameter) {
- case NV097_SET_TEXGEN_S_DISABLE:
- texgen = NV_PGRAPH_CSV1_A_T0_S_DISABLE; break;
- case NV097_SET_TEXGEN_S_EYE_LINEAR:
- texgen = NV_PGRAPH_CSV1_A_T0_S_EYE_LINEAR; break;
- case NV097_SET_TEXGEN_S_OBJECT_LINEAR:
- texgen = NV_PGRAPH_CSV1_A_T0_S_OBJECT_LINEAR; break;
- case NV097_SET_TEXGEN_S_SPHERE_MAP:
- assert(channel < 2);
- texgen = NV_PGRAPH_CSV1_A_T0_S_SPHERE_MAP; break;
- case NV097_SET_TEXGEN_S_REFLECTION_MAP:
- assert(channel < 3);
- texgen = NV_PGRAPH_CSV1_A_T0_S_REFLECTION_MAP; break;
- case NV097_SET_TEXGEN_S_NORMAL_MAP:
- assert(channel < 3);
- texgen = NV_PGRAPH_CSV1_A_T0_S_NORMAL_MAP; break;
- default:
- assert(false);
- break;
- }
- return texgen;
-}
diff --git a/hw/xbox/nv2a/pgraph/debug_renderdoc.c b/hw/xbox/nv2a/pgraph/debug_renderdoc.c
new file mode 100644
index 0000000000..ded339e23f
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/debug_renderdoc.c
@@ -0,0 +1,84 @@
+/*
+ * Geforce NV2A PGRAPH Renderdoc Helpers
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "qemu/osdep.h"
+
+#include
+#include
+
+#pragma GCC diagnostic ignored "-Wstrict-prototypes"
+#include "thirdparty/renderdoc_app.h"
+
+#include "hw/xbox/nv2a/debug.h"
+
+#ifdef _WIN32
+#include
+#else
+#include
+#endif
+
+static RENDERDOC_API_1_6_0 *rdoc_api = NULL;
+
+int renderdoc_capture_frames = 0;
+
+void nv2a_dbg_renderdoc_init(void)
+{
+ if (rdoc_api) {
+ return;
+ }
+
+#ifdef _WIN32
+ HMODULE renderdoc = GetModuleHandleA("renderdoc.dll");
+ if (renderdoc) {
+ pRENDERDOC_GetAPI RENDERDOC_GetAPI =
+ (pRENDERDOC_GetAPI)GetProcAddress(renderdoc, "RENDERDOC_GetAPI");
+#else
+ void *renderdoc = dlopen(
+#ifdef __APPLE__
+ "librenderdoc.dylib",
+#else
+ "librenderdoc.so",
+#endif
+ RTLD_LAZY);
+ if (renderdoc) {
+ pRENDERDOC_GetAPI RENDERDOC_GetAPI =
+ (pRENDERDOC_GetAPI)dlsym(renderdoc, "RENDERDOC_GetAPI");
+#endif // _WIN32
+ int ret =
+ RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void **)&rdoc_api);
+ assert(ret == 1 && "Failed to retrieve RenderDoc API.");
+ } else {
+ fprintf(stderr, "Error: Failed to open renderdoc library: %s\n", dlerror());
+ }
+}
+
+void *nv2a_dbg_renderdoc_get_api(void)
+{
+ return (void*)rdoc_api;
+}
+
+bool nv2a_dbg_renderdoc_available(void)
+{
+ return rdoc_api != NULL;
+}
+
+void nv2a_dbg_renderdoc_capture_frames(int num_frames)
+{
+ renderdoc_capture_frames += num_frames;
+}
diff --git a/hw/xbox/nv2a/pgraph/gl/blit.c b/hw/xbox/nv2a/pgraph/gl/blit.c
new file mode 100644
index 0000000000..b4cce8a5ef
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/gl/blit.c
@@ -0,0 +1,174 @@
+/*
+ * Geforce NV2A PGRAPH OpenGL Renderer
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "hw/xbox/nv2a/nv2a_int.h"
+#include "renderer.h"
+
+// TODO: Optimize. Ideally this should all be done via OpenGL.
+void pgraph_gl_image_blit(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ ContextSurfaces2DState *context_surfaces = &pg->context_surfaces_2d;
+ ImageBlitState *image_blit = &pg->image_blit;
+ BetaState *beta = &pg->beta;
+
+ pgraph_gl_surface_update(d, false, true, true);
+
+ assert(context_surfaces->object_instance == image_blit->context_surfaces);
+
+ unsigned int bytes_per_pixel;
+ switch (context_surfaces->color_format) {
+ case NV062_SET_COLOR_FORMAT_LE_Y8:
+ bytes_per_pixel = 1;
+ break;
+ case NV062_SET_COLOR_FORMAT_LE_R5G6B5:
+ bytes_per_pixel = 2;
+ break;
+ case NV062_SET_COLOR_FORMAT_LE_A8R8G8B8:
+ case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8:
+ case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8:
+ case NV062_SET_COLOR_FORMAT_LE_Y32:
+ bytes_per_pixel = 4;
+ break;
+ default:
+ fprintf(stderr, "Unknown blit surface format: 0x%x\n",
+ context_surfaces->color_format);
+ assert(false);
+ break;
+ }
+
+ hwaddr source_dma_len, dest_dma_len;
+
+ uint8_t *source = (uint8_t *)nv_dma_map(
+ d, context_surfaces->dma_image_source, &source_dma_len);
+ assert(context_surfaces->source_offset < source_dma_len);
+ source += context_surfaces->source_offset;
+
+ uint8_t *dest = (uint8_t *)nv_dma_map(d, context_surfaces->dma_image_dest,
+ &dest_dma_len);
+ assert(context_surfaces->dest_offset < dest_dma_len);
+ dest += context_surfaces->dest_offset;
+
+ hwaddr source_addr = source - d->vram_ptr;
+ hwaddr dest_addr = dest - d->vram_ptr;
+
+ SurfaceBinding *surf_src = pgraph_gl_surface_get(d, source_addr);
+ if (surf_src) {
+ pgraph_gl_surface_download_if_dirty(d, surf_src);
+ }
+
+ SurfaceBinding *surf_dest = pgraph_gl_surface_get(d, dest_addr);
+ if (surf_dest) {
+ if (image_blit->height < surf_dest->height ||
+ image_blit->width < surf_dest->width) {
+ pgraph_gl_surface_download_if_dirty(d, surf_dest);
+ } else {
+ // The blit will completely replace the surface so any pending
+ // download should be discarded.
+ surf_dest->download_pending = false;
+ surf_dest->draw_dirty = false;
+ }
+ surf_dest->upload_pending = true;
+ pg->draw_time++;
+ }
+
+ hwaddr source_offset = image_blit->in_y * context_surfaces->source_pitch +
+ image_blit->in_x * bytes_per_pixel;
+ hwaddr dest_offset = image_blit->out_y * context_surfaces->dest_pitch +
+ image_blit->out_x * bytes_per_pixel;
+
+ hwaddr source_size =
+ (image_blit->height - 1) * context_surfaces->source_pitch +
+ image_blit->width * bytes_per_pixel;
+ hwaddr dest_size = (image_blit->height - 1) * context_surfaces->dest_pitch +
+ image_blit->width * bytes_per_pixel;
+
+ /* FIXME: What does hardware do in this case? */
+ assert(source_addr + source_offset + source_size <=
+ memory_region_size(d->vram));
+ assert(dest_addr + dest_offset + dest_size <= memory_region_size(d->vram));
+
+ uint8_t *source_row = source + source_offset;
+ uint8_t *dest_row = dest + dest_offset;
+
+ if (image_blit->operation == NV09F_SET_OPERATION_SRCCOPY) {
+ // NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_SRCCOPY");
+ for (unsigned int y = 0; y < image_blit->height; y++) {
+ memmove(dest_row, source_row, image_blit->width * bytes_per_pixel);
+ source_row += context_surfaces->source_pitch;
+ dest_row += context_surfaces->dest_pitch;
+ }
+ } else if (image_blit->operation == NV09F_SET_OPERATION_BLEND_AND) {
+ // NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_BLEND_AND");
+ uint32_t max_beta_mult = 0x7f80;
+ uint32_t beta_mult = beta->beta >> 16;
+ uint32_t inv_beta_mult = max_beta_mult - beta_mult;
+ for (unsigned int y = 0; y < image_blit->height; y++) {
+ for (unsigned int x = 0; x < image_blit->width; x++) {
+ for (unsigned int ch = 0; ch < 3; ch++) {
+ uint32_t a = source_row[x * 4 + ch] * beta_mult;
+ uint32_t b = dest_row[x * 4 + ch] * inv_beta_mult;
+ dest_row[x * 4 + ch] = (a + b) / max_beta_mult;
+ }
+ }
+ source_row += context_surfaces->source_pitch;
+ dest_row += context_surfaces->dest_pitch;
+ }
+ } else {
+ fprintf(stderr, "Unknown blit operation: 0x%x\n",
+ image_blit->operation);
+ assert(false && "Unknown blit operation");
+ }
+
+ NV2A_DPRINTF(" - 0x%tx -> 0x%tx\n", source_addr, dest_addr);
+
+ bool needs_alpha_patching;
+ uint8_t alpha_override;
+ switch (context_surfaces->color_format) {
+ case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8:
+ needs_alpha_patching = true;
+ alpha_override = 0xff;
+ break;
+ case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8:
+ needs_alpha_patching = true;
+ alpha_override = 0;
+ break;
+ default:
+ needs_alpha_patching = false;
+ alpha_override = 0;
+ }
+
+ if (needs_alpha_patching) {
+ dest_row = dest + dest_offset;
+ for (unsigned int y = 0; y < image_blit->height; y++) {
+ for (unsigned int x = 0; x < image_blit->width; x++) {
+ dest_row[x * 4 + 3] = alpha_override;
+ }
+ dest_row += context_surfaces->dest_pitch;
+ }
+ }
+
+ dest_addr += dest_offset;
+ memory_region_set_client_dirty(d->vram, dest_addr, dest_size,
+ DIRTY_MEMORY_VGA);
+ memory_region_set_client_dirty(d->vram, dest_addr, dest_size,
+ DIRTY_MEMORY_NV2A_TEX);
+}
diff --git a/hw/xbox/nv2a/pgraph/gl/constants.h b/hw/xbox/nv2a/pgraph/gl/constants.h
new file mode 100644
index 0000000000..d78b0054e3
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/gl/constants.h
@@ -0,0 +1,322 @@
+/*
+ * Geforce NV2A PGRAPH OpenGL Renderer
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_GL_CONSTANTS_H
+#define HW_XBOX_NV2A_PGRAPH_GL_CONSTANTS_H
+
+#include "qemu/osdep.h"
+#include "hw/xbox/nv2a/nv2a_regs.h"
+#include "gloffscreen.h"
+
+static const GLenum pgraph_texture_min_filter_gl_map[] = {
+ 0,
+ GL_NEAREST,
+ GL_LINEAR,
+ GL_NEAREST_MIPMAP_NEAREST,
+ GL_LINEAR_MIPMAP_NEAREST,
+ GL_NEAREST_MIPMAP_LINEAR,
+ GL_LINEAR_MIPMAP_LINEAR,
+ GL_LINEAR,
+};
+
+static const GLenum pgraph_texture_mag_filter_gl_map[] = {
+ 0,
+ GL_NEAREST,
+ GL_LINEAR,
+ 0,
+ GL_LINEAR /* TODO: Convolution filter... */
+};
+
+static const GLenum pgraph_texture_addr_gl_map[] = {
+ 0,
+ GL_REPEAT,
+ GL_MIRRORED_REPEAT,
+ GL_CLAMP_TO_EDGE,
+ GL_CLAMP_TO_BORDER,
+ GL_CLAMP_TO_EDGE, /* Approximate GL_CLAMP */
+};
+
+static const GLenum pgraph_blend_factor_gl_map[] = {
+ GL_ZERO,
+ GL_ONE,
+ GL_SRC_COLOR,
+ GL_ONE_MINUS_SRC_COLOR,
+ GL_SRC_ALPHA,
+ GL_ONE_MINUS_SRC_ALPHA,
+ GL_DST_ALPHA,
+ GL_ONE_MINUS_DST_ALPHA,
+ GL_DST_COLOR,
+ GL_ONE_MINUS_DST_COLOR,
+ GL_SRC_ALPHA_SATURATE,
+ 0,
+ GL_CONSTANT_COLOR,
+ GL_ONE_MINUS_CONSTANT_COLOR,
+ GL_CONSTANT_ALPHA,
+ GL_ONE_MINUS_CONSTANT_ALPHA,
+};
+
+static const GLenum pgraph_blend_equation_gl_map[] = {
+ GL_FUNC_SUBTRACT,
+ GL_FUNC_REVERSE_SUBTRACT,
+ GL_FUNC_ADD,
+ GL_MIN,
+ GL_MAX,
+ GL_FUNC_REVERSE_SUBTRACT,
+ GL_FUNC_ADD,
+};
+
+/* FIXME
+static const GLenum pgraph_blend_logicop_map[] = {
+ GL_CLEAR,
+ GL_AND,
+ GL_AND_REVERSE,
+ GL_COPY,
+ GL_AND_INVERTED,
+ GL_NOOP,
+ GL_XOR,
+ GL_OR,
+ GL_NOR,
+ GL_EQUIV,
+ GL_INVERT,
+ GL_OR_REVERSE,
+ GL_COPY_INVERTED,
+ GL_OR_INVERTED,
+ GL_NAND,
+ GL_SET,
+};
+*/
+
+static const GLenum pgraph_cull_face_gl_map[] = {
+ 0,
+ GL_FRONT,
+ GL_BACK,
+ GL_FRONT_AND_BACK
+};
+
+static const GLenum pgraph_depth_func_gl_map[] = {
+ GL_NEVER,
+ GL_LESS,
+ GL_EQUAL,
+ GL_LEQUAL,
+ GL_GREATER,
+ GL_NOTEQUAL,
+ GL_GEQUAL,
+ GL_ALWAYS,
+};
+
+static const GLenum pgraph_stencil_func_gl_map[] = {
+ GL_NEVER,
+ GL_LESS,
+ GL_EQUAL,
+ GL_LEQUAL,
+ GL_GREATER,
+ GL_NOTEQUAL,
+ GL_GEQUAL,
+ GL_ALWAYS,
+};
+
+static const GLenum pgraph_stencil_op_gl_map[] = {
+ 0,
+ GL_KEEP,
+ GL_ZERO,
+ GL_REPLACE,
+ GL_INCR,
+ GL_DECR,
+ GL_INVERT,
+ GL_INCR_WRAP,
+ GL_DECR_WRAP,
+};
+
+typedef struct ColorFormatInfo {
+ unsigned int bytes_per_pixel;
+ bool linear;
+ GLint gl_internal_format;
+ GLenum gl_format;
+ GLenum gl_type;
+ GLenum gl_swizzle_mask[4];
+ bool depth;
+} ColorFormatInfo;
+
+static const ColorFormatInfo kelvin_color_format_gl_map[66] = {
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8] =
+ {1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
+ {GL_RED, GL_RED, GL_RED, GL_ONE}},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8] =
+ {1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
+ {GL_RED, GL_RED, GL_RED, GL_RED}},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5] =
+ {2, false, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5] =
+ {2, false, GL_RGB5, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4] =
+ {2, false, GL_RGBA4, GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5] =
+ {2, false, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8] =
+ {4, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8] =
+ {4, false, GL_RGB8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
+
+ /* paletted texture */
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8] =
+ {1, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
+
+ [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5] =
+ {4, false, GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, 0, GL_RGBA},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8] =
+ {4, false, GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, 0, GL_RGBA},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8] =
+ {4, false, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, 0, GL_RGBA},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5] =
+ {2, true, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5] =
+ {2, true, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8] =
+ {4, true, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8] =
+ {1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
+ {GL_RED, GL_RED, GL_RED, GL_ONE}},
+
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8] =
+ {2, true, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
+ {GL_RED, GL_GREEN, GL_RED, GL_GREEN}},
+
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8] =
+ {1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
+ {GL_ONE, GL_ONE, GL_ONE, GL_RED}},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8] =
+ {2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
+ {GL_RED, GL_RED, GL_RED, GL_GREEN}},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8] =
+ {1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
+ {GL_RED, GL_RED, GL_RED, GL_RED}},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5] =
+ {2, true, GL_RGB5, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4] =
+ {2, true, GL_RGBA4, GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8] =
+ {4, true, GL_RGB8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8] =
+ {1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
+ {GL_ONE, GL_ONE, GL_ONE, GL_RED}},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8Y8] =
+ {2, true, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
+ {GL_RED, GL_RED, GL_RED, GL_GREEN}},
+
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5] =
+ {2, false, GL_RGB8_SNORM, GL_RGB, GL_BYTE}, /* FIXME: This might be signed */
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8] =
+ {2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
+ {GL_RED, GL_GREEN, GL_RED, GL_GREEN}},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8] =
+ {2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
+ {GL_GREEN, GL_RED, GL_RED, GL_GREEN}},
+
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8] =
+ {2, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8] =
+ {2, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},
+
+ /* Additional information is passed to the pixel shader via the swizzle:
+ * RED: The depth value.
+ * GREEN: 0 for 16-bit, 1 for 24 bit
+ * BLUE: 0 for fixed, 1 for float
+ */
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_DEPTH_Y16_FIXED] =
+ {2, false, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT,
+ {GL_RED, GL_ZERO, GL_ZERO, GL_ZERO}, true},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED] =
+ {4, true, GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8,
+ {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}, true},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT] =
+ /* FIXME: Uses fixed-point format to match surface format hack below. */
+ {4, true, GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8,
+ {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}, true},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FIXED] =
+ {2, true, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT,
+ {GL_RED, GL_ZERO, GL_ZERO, GL_ZERO}, true},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FLOAT] =
+ {2, true, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_HALF_FLOAT,
+ {GL_RED, GL_ZERO, GL_ONE, GL_ZERO}, true},
+
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16] =
+ {2, true, GL_R16, GL_RED, GL_UNSIGNED_SHORT,
+ {GL_RED, GL_RED, GL_RED, GL_ONE}},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8] =
+ {4, false, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8] =
+ {4, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8},
+
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8] =
+ {4, false, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8},
+
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8] =
+ {4, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8] =
+ {4, true, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8},
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8] =
+ {4, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}
+};
+
+typedef struct SurfaceFormatInfo {
+ unsigned int bytes_per_pixel;
+ GLint gl_internal_format;
+ GLenum gl_format;
+ GLenum gl_type;
+ GLenum gl_attachment;
+} SurfaceFormatInfo;
+
+static const SurfaceFormatInfo kelvin_surface_color_format_gl_map[] = {
+ [NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5] =
+ {2, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, GL_COLOR_ATTACHMENT0},
+ [NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5] =
+ {2, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, GL_COLOR_ATTACHMENT0},
+ [NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8] =
+ {4, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_COLOR_ATTACHMENT0},
+ [NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8] =
+ {4, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_COLOR_ATTACHMENT0},
+
+ // FIXME: Map channel color
+ [NV097_SET_SURFACE_FORMAT_COLOR_LE_B8] =
+ {1, GL_R8, GL_RED, GL_UNSIGNED_BYTE, GL_COLOR_ATTACHMENT0},
+ [NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8] =
+ {2, GL_RG8, GL_RG, GL_UNSIGNED_SHORT, GL_COLOR_ATTACHMENT0},
+};
+
+static const SurfaceFormatInfo kelvin_surface_zeta_float_format_gl_map[] = {
+ [NV097_SET_SURFACE_FORMAT_ZETA_Z16] =
+ {2, GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_HALF_FLOAT, GL_DEPTH_ATTACHMENT},
+ [NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] =
+ /* FIXME: GL does not support packing floating-point Z24S8 OOTB, so for
+ * now just emulate this with fixed-point Z24S8. Possible compat
+ * improvement with custom conversion.
+ */
+ {4, GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, GL_DEPTH_STENCIL_ATTACHMENT},
+};
+
+static const SurfaceFormatInfo kelvin_surface_zeta_fixed_format_gl_map[] = {
+ [NV097_SET_SURFACE_FORMAT_ZETA_Z16] =
+ {2, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, GL_DEPTH_ATTACHMENT},
+ [NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] =
+ {4, GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, GL_DEPTH_STENCIL_ATTACHMENT},
+};
+
+#endif
diff --git a/hw/xbox/nv2a/debug.c b/hw/xbox/nv2a/pgraph/gl/debug.c
similarity index 77%
rename from hw/xbox/nv2a/debug.c
rename to hw/xbox/nv2a/pgraph/gl/debug.c
index def94cdba1..8e7f49e47c 100644
--- a/hw/xbox/nv2a/debug.c
+++ b/hw/xbox/nv2a/pgraph/gl/debug.c
@@ -1,5 +1,5 @@
/*
- * QEMU Geforce NV2A debug helpers
+ * Geforce NV2A PGRAPH OpenGL Renderer
*
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2012 espes
@@ -18,6 +18,7 @@
* License along with this library; if not, see .
*/
+#include "renderer.h"
#include "debug.h"
#ifdef DEBUG_NV2A_GL
@@ -28,15 +29,8 @@
#include
#ifdef CONFIG_RENDERDOC
+#pragma GCC diagnostic ignored "-Wstrict-prototypes"
#include "thirdparty/renderdoc_app.h"
-#ifdef _WIN32
-#include
-#else
-#include
-#endif
-
-static RENDERDOC_API_1_1_2 *rdoc_api = NULL;
-static int32_t renderdoc_capture_frames = 0;
#endif
#define CHECK_GL_ERROR() do { \
@@ -74,31 +68,7 @@ void gl_debug_initialize(void)
}
#ifdef CONFIG_RENDERDOC
- const char *renderdoc_lib;
- void* renderdoc;
-#ifdef __APPLE__
- renderdoc_lib = "librenderdoc.dylib";
-#elif _WIN32
- renderdoc_lib = "renderdoc.dll";
-#else
- renderdoc_lib = "librenderdoc.so";
-#endif
-
-#ifdef _WIN32
- renderdoc = GetModuleHandleA(renderdoc_lib);
- if (renderdoc) {
- pRENDERDOC_GetAPI RENDERDOC_GetAPI = (pRENDERDOC_GetAPI)GetProcAddress(
- renderdoc, "RENDERDOC_GetAPI");
-#else
- renderdoc = dlopen(renderdoc_lib, RTLD_NOW | RTLD_NOLOAD);
- if (renderdoc) {
- pRENDERDOC_GetAPI RENDERDOC_GetAPI = (pRENDERDOC_GetAPI)dlsym(
- renderdoc, "RENDERDOC_GetAPI");
-#endif
- int ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_1_2,
- (void **)&rdoc_api);
- assert(ret == 1 && "Failed to retrieve RenderDoc API.");
- }
+ nv2a_dbg_renderdoc_init();
#endif
}
@@ -179,7 +149,10 @@ void gl_debug_frame_terminator(void)
CHECK_GL_ERROR();
#ifdef CONFIG_RENDERDOC
- if (rdoc_api) {
+ if (nv2a_dbg_renderdoc_available()) {
+
+ RENDERDOC_API_1_6_0 *rdoc_api = nv2a_dbg_renderdoc_get_api();
+
if (rdoc_api->IsTargetControlConnected()) {
if (rdoc_api->IsFrameCapturing()) {
rdoc_api->EndFrameCapture(NULL, NULL);
@@ -190,7 +163,7 @@ void gl_debug_frame_terminator(void)
error);
}
}
- if (renderdoc_capture_frames) {
+ if (renderdoc_capture_frames > 0) {
rdoc_api->StartFrameCapture(NULL, NULL);
GLenum error = glGetError();
if (error != GL_NO_ERROR) {
@@ -203,22 +176,10 @@ void gl_debug_frame_terminator(void)
}
}
#endif
- if (!has_GL_GREMEDY_frame_terminator) {
- return;
+ if (has_GL_GREMEDY_frame_terminator) {
+ glFrameTerminatorGREMEDY();
+ CHECK_GL_ERROR();
}
-
- glFrameTerminatorGREMEDY();
- CHECK_GL_ERROR();
}
-#ifdef CONFIG_RENDERDOC
-bool nv2a_dbg_renderdoc_available(void) {
- return rdoc_api != NULL;
-}
-
-void nv2a_dbg_renderdoc_capture_frames(uint32_t num_frames) {
- renderdoc_capture_frames = num_frames;
-}
-#endif
-
#endif // DEBUG_NV2A_GL
diff --git a/hw/xbox/nv2a/pgraph/gl/debug.h b/hw/xbox/nv2a/pgraph/gl/debug.h
new file mode 100644
index 0000000000..c242e1f384
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/gl/debug.h
@@ -0,0 +1,60 @@
+/*
+ * Geforce NV2A PGRAPH OpenGL Renderer
+ *
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2012 espes
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_GL_DEBUG_H
+#define HW_XBOX_NV2A_PGRAPH_GL_DEBUG_H
+
+// #define DEBUG_NV2A_GL
+#ifdef DEBUG_NV2A_GL
+
+#include
+#include "gloffscreen.h"
+#include "config-host.h"
+
+void gl_debug_initialize(void);
+void gl_debug_message(bool cc, const char *fmt, ...);
+void gl_debug_group_begin(const char *fmt, ...);
+void gl_debug_group_end(void);
+void gl_debug_label(GLenum target, GLuint name, const char *fmt, ...);
+void gl_debug_frame_terminator(void);
+
+# define NV2A_GL_DPRINTF(cc, format, ...) \
+ gl_debug_message(cc, "nv2a: " format, ## __VA_ARGS__)
+# define NV2A_GL_DGROUP_BEGIN(format, ...) \
+ gl_debug_group_begin("nv2a: " format, ## __VA_ARGS__)
+# define NV2A_GL_DGROUP_END() \
+ gl_debug_group_end()
+# define NV2A_GL_DLABEL(target, name, format, ...) \
+ gl_debug_label(target, name, "nv2a: { " format " }", ## __VA_ARGS__)
+#define NV2A_GL_DFRAME_TERMINATOR() \
+ gl_debug_frame_terminator()
+
+#else
+
+# define NV2A_GL_DPRINTF(cc, format, ...) do { \
+ if (cc) NV2A_DPRINTF(format "\n", ##__VA_ARGS__ ); \
+ } while (0)
+# define NV2A_GL_DGROUP_BEGIN(format, ...) do { } while (0)
+# define NV2A_GL_DGROUP_END() do { } while (0)
+# define NV2A_GL_DLABEL(target, name, format, ...) do { } while (0)
+# define NV2A_GL_DFRAME_TERMINATOR() do { } while (0)
+#endif
+
+#endif
diff --git a/hw/xbox/nv2a/pgraph/gl/display.c b/hw/xbox/nv2a/pgraph/gl/display.c
new file mode 100644
index 0000000000..804fec2c2d
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/gl/display.c
@@ -0,0 +1,407 @@
+/*
+ * Geforce NV2A PGRAPH OpenGL Renderer
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "hw/xbox/nv2a/nv2a_int.h"
+#include "hw/xbox/nv2a/pgraph/util.h"
+#include "renderer.h"
+
+#include
+
+void pgraph_gl_init_display_renderer(NV2AState *d)
+{
+ struct PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ glGenTextures(1, &r->gl_display_buffer);
+ r->gl_display_buffer_internal_format = 0;
+ r->gl_display_buffer_width = 0;
+ r->gl_display_buffer_height = 0;
+ r->gl_display_buffer_format = 0;
+ r->gl_display_buffer_type = 0;
+
+ const char *vs =
+ "#version 330\n"
+ "void main()\n"
+ "{\n"
+ " float x = -1.0 + float((gl_VertexID & 1) << 2);\n"
+ " float y = -1.0 + float((gl_VertexID & 2) << 1);\n"
+ " gl_Position = vec4(x, y, 0, 1);\n"
+ "}\n";
+ /* FIXME: improve interlace handling, pvideo */
+
+ const char *fs =
+ "#version 330\n"
+ "uniform sampler2D tex;\n"
+ "uniform bool pvideo_enable;\n"
+ "uniform sampler2D pvideo_tex;\n"
+ "uniform vec2 pvideo_in_pos;\n"
+ "uniform vec4 pvideo_pos;\n"
+ "uniform vec3 pvideo_scale;\n"
+ "uniform bool pvideo_color_key_enable;\n"
+ "uniform vec4 pvideo_color_key;\n"
+ "uniform vec2 display_size;\n"
+ "uniform float line_offset;\n"
+ "layout(location = 0) out vec4 out_Color;\n"
+ "void main()\n"
+ "{\n"
+ " vec2 texCoord = gl_FragCoord.xy/display_size;\n"
+ " float rel = display_size.y/textureSize(tex, 0).y/line_offset;\n"
+ " texCoord.y = 1 + rel*(texCoord.y - 1);"
+ " out_Color.rgba = texture(tex, texCoord);\n"
+ " if (pvideo_enable) {\n"
+ " vec2 screenCoord = gl_FragCoord.xy - 0.5;\n"
+ " vec4 output_region = vec4(pvideo_pos.xy, pvideo_pos.xy + pvideo_pos.zw);\n"
+ " bvec4 clip = bvec4(lessThan(screenCoord, output_region.xy),\n"
+ " greaterThan(screenCoord, output_region.zw));\n"
+ " if (!any(clip) && (!pvideo_color_key_enable || out_Color.rgba == pvideo_color_key)) {\n"
+ " vec2 out_xy = (screenCoord - pvideo_pos.xy) * pvideo_scale.z;\n"
+ " vec2 in_st = (pvideo_in_pos + out_xy * pvideo_scale.xy) / textureSize(pvideo_tex, 0);\n"
+ " in_st.y *= -1.0;\n"
+ " out_Color.rgba = texture(pvideo_tex, in_st);\n"
+ " }\n"
+ " }\n"
+ "}\n";
+
+ r->disp_rndr.prog = pgraph_gl_compile_shader(vs, fs);
+ r->disp_rndr.tex_loc = glGetUniformLocation(r->disp_rndr.prog, "tex");
+ r->disp_rndr.pvideo_enable_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_enable");
+ r->disp_rndr.pvideo_tex_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_tex");
+ r->disp_rndr.pvideo_in_pos_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_in_pos");
+ r->disp_rndr.pvideo_pos_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_pos");
+ r->disp_rndr.pvideo_scale_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_scale");
+ r->disp_rndr.pvideo_color_key_enable_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_color_key_enable");
+ r->disp_rndr.pvideo_color_key_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_color_key");
+ r->disp_rndr.display_size_loc = glGetUniformLocation(r->disp_rndr.prog, "display_size");
+ r->disp_rndr.line_offset_loc = glGetUniformLocation(r->disp_rndr.prog, "line_offset");
+
+ glGenVertexArrays(1, &r->disp_rndr.vao);
+ glBindVertexArray(r->disp_rndr.vao);
+ glGenBuffers(1, &r->disp_rndr.vbo);
+ glBindBuffer(GL_ARRAY_BUFFER, r->disp_rndr.vbo);
+ glBufferData(GL_ARRAY_BUFFER, 0, NULL, GL_STATIC_DRAW);
+ glGenFramebuffers(1, &r->disp_rndr.fbo);
+ glGenTextures(1, &r->disp_rndr.pvideo_tex);
+ assert(glGetError() == GL_NO_ERROR);
+}
+
+static uint8_t *convert_texture_data__CR8YB8CB8YA8(const uint8_t *data,
+ unsigned int width,
+ unsigned int height,
+ unsigned int pitch)
+{
+ uint8_t *converted_data = (uint8_t *)g_malloc(width * height * 4);
+ int x, y;
+ for (y = 0; y < height; y++) {
+ const uint8_t *line = &data[y * pitch];
+ const uint32_t row_offset = y * width;
+ for (x = 0; x < width; x++) {
+ uint8_t *pixel = &converted_data[(row_offset + x) * 4];
+ convert_yuy2_to_rgb(line, x, &pixel[0], &pixel[1], &pixel[2]);
+ pixel[3] = 255;
+ }
+ }
+ return converted_data;
+}
+
+static float pvideo_calculate_scale(unsigned int din_dout,
+ unsigned int output_size)
+{
+ float calculated_in = din_dout * (output_size - 1);
+ calculated_in = floorf(calculated_in / (1 << 20) + 0.5f);
+ return (calculated_in + 1.0f) / output_size;
+}
+
+static void render_display_pvideo_overlay(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ // FIXME: This check against PVIDEO_SIZE_IN does not match HW behavior.
+ // Many games seem to pass this value when initializing or tearing down
+ // PVIDEO. On its own, this generally does not result in the overlay being
+ // hidden, however there are certain games (e.g., Ultimate Beach Soccer)
+ // that use an unknown mechanism to hide the overlay without explicitly
+ // stopping it.
+ // Since the value seems to be set to 0xFFFFFFFF only in cases where the
+ // content is not valid, it is probably good enough to treat it as an
+ // implicit stop.
+ bool enabled = (d->pvideo.regs[NV_PVIDEO_BUFFER] & NV_PVIDEO_BUFFER_0_USE)
+ && d->pvideo.regs[NV_PVIDEO_SIZE_IN] != 0xFFFFFFFF;
+ glUniform1ui(r->disp_rndr.pvideo_enable_loc, enabled);
+ if (!enabled) {
+ return;
+ }
+
+ hwaddr base = d->pvideo.regs[NV_PVIDEO_BASE];
+ hwaddr limit = d->pvideo.regs[NV_PVIDEO_LIMIT];
+ hwaddr offset = d->pvideo.regs[NV_PVIDEO_OFFSET];
+
+ int in_width =
+ GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_WIDTH);
+ int in_height =
+ GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_HEIGHT);
+
+ int in_s = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN],
+ NV_PVIDEO_POINT_IN_S);
+ int in_t = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN],
+ NV_PVIDEO_POINT_IN_T);
+
+ int in_pitch =
+ GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_PITCH);
+ int in_color =
+ GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_COLOR);
+
+ unsigned int out_width =
+ GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_WIDTH);
+ unsigned int out_height =
+ GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_HEIGHT);
+
+ float scale_x = 1.0f;
+ float scale_y = 1.0f;
+ unsigned int ds_dx = d->pvideo.regs[NV_PVIDEO_DS_DX];
+ unsigned int dt_dy = d->pvideo.regs[NV_PVIDEO_DT_DY];
+ if (ds_dx != NV_PVIDEO_DIN_DOUT_UNITY) {
+ scale_x = pvideo_calculate_scale(ds_dx, out_width);
+ }
+ if (dt_dy != NV_PVIDEO_DIN_DOUT_UNITY) {
+ scale_y = pvideo_calculate_scale(dt_dy, out_height);
+ }
+
+ // On HW, setting NV_PVIDEO_SIZE_IN larger than NV_PVIDEO_SIZE_OUT results
+ // in them being capped to the output size, content is not scaled. This is
+ // particularly important as NV_PVIDEO_SIZE_IN may be set to 0xFFFFFFFF
+ // during initialization or teardown.
+ if (in_width > out_width) {
+ in_width = floorf((float)out_width * scale_x + 0.5f);
+ }
+ if (in_height > out_height) {
+ in_height = floorf((float)out_height * scale_y + 0.5f);
+ }
+
+ /* TODO: support other color formats */
+ assert(in_color == NV_PVIDEO_FORMAT_COLOR_LE_CR8YB8CB8YA8);
+
+ unsigned int out_x =
+ GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_X);
+ unsigned int out_y =
+ GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_Y);
+
+ unsigned int color_key_enabled =
+ GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_DISPLAY);
+ glUniform1ui(r->disp_rndr.pvideo_color_key_enable_loc,
+ color_key_enabled);
+
+ // TODO: Verify that masking off the top byte is correct.
+ // SeaBlade sets a color key of 0x80000000 but the texture passed into the
+ // shader is cleared to 0 alpha.
+ unsigned int color_key = d->pvideo.regs[NV_PVIDEO_COLOR_KEY] & 0xFFFFFF;
+ glUniform4f(r->disp_rndr.pvideo_color_key_loc,
+ GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_RED) / 255.0,
+ GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_GREEN) / 255.0,
+ GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_BLUE) / 255.0,
+ GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_ALPHA) / 255.0);
+
+ assert(offset + in_pitch * in_height <= limit);
+ hwaddr end = base + offset + in_pitch * in_height;
+ assert(end <= memory_region_size(d->vram));
+
+ pgraph_apply_scaling_factor(pg, &out_x, &out_y);
+ pgraph_apply_scaling_factor(pg, &out_width, &out_height);
+
+ // Translate for the GL viewport origin.
+ out_y = MAX(r->gl_display_buffer_height - 1 - (int)(out_y + out_height), 0);
+
+ glActiveTexture(GL_TEXTURE0 + 1);
+ glBindTexture(GL_TEXTURE_2D, r->disp_rndr.pvideo_tex);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ uint8_t *tex_rgba = convert_texture_data__CR8YB8CB8YA8(
+ d->vram_ptr + base + offset, in_width, in_height, in_pitch);
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, in_width, in_height, 0, GL_RGBA,
+ GL_UNSIGNED_BYTE, tex_rgba);
+ g_free(tex_rgba);
+ glUniform1i(r->disp_rndr.pvideo_tex_loc, 1);
+ glUniform2f(r->disp_rndr.pvideo_in_pos_loc, in_s, in_t);
+ glUniform4f(r->disp_rndr.pvideo_pos_loc,
+ out_x, out_y, out_width, out_height);
+ glUniform3f(r->disp_rndr.pvideo_scale_loc,
+ scale_x, scale_y, 1.0f / pg->surface_scale_factor);
+}
+
+static void render_display(NV2AState *d, SurfaceBinding *surface)
+{
+ struct PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ unsigned int width, height;
+ uint32_t pline_offset, pstart_addr, pline_compare;
+ d->vga.get_resolution(&d->vga, (int*)&width, (int*)&height);
+ d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
+ int line_offset = surface->pitch / pline_offset;
+
+ /* Adjust viewport height for interlaced mode, used only in 1080i */
+ if (d->vga.cr[NV_PRMCIO_INTERLACE_MODE] != NV_PRMCIO_INTERLACE_MODE_DISABLED) {
+ height *= 2;
+ }
+
+ pgraph_apply_scaling_factor(pg, &width, &height);
+
+ glBindFramebuffer(GL_FRAMEBUFFER, r->disp_rndr.fbo);
+ glActiveTexture(GL_TEXTURE0);
+ glBindTexture(GL_TEXTURE_2D, r->gl_display_buffer);
+ bool recreate = (
+ surface->fmt.gl_internal_format != r->gl_display_buffer_internal_format
+ || width != r->gl_display_buffer_width
+ || height != r->gl_display_buffer_height
+ || surface->fmt.gl_format != r->gl_display_buffer_format
+ || surface->fmt.gl_type != r->gl_display_buffer_type
+ );
+
+ if (recreate) {
+ /* XXX: There's apparently a bug in some Intel OpenGL drivers for
+ * Windows that will leak this texture when its orphaned after use in
+ * another context, apparently regardless of which thread it's created
+ * or released on.
+ *
+ * Driver: 27.20.100.8729 9/11/2020 W10 x64
+ * Track: https://community.intel.com/t5/Graphics/OpenGL-Windows-drivers-for-Intel-HD-630-leaking-GPU-memory-when/td-p/1274423
+ */
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ r->gl_display_buffer_internal_format = surface->fmt.gl_internal_format;
+ r->gl_display_buffer_width = width;
+ r->gl_display_buffer_height = height;
+ r->gl_display_buffer_format = surface->fmt.gl_format;
+ r->gl_display_buffer_type = surface->fmt.gl_type;
+ glTexImage2D(GL_TEXTURE_2D, 0,
+ r->gl_display_buffer_internal_format,
+ r->gl_display_buffer_width,
+ r->gl_display_buffer_height,
+ 0,
+ r->gl_display_buffer_format,
+ r->gl_display_buffer_type,
+ NULL);
+ }
+
+ glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
+ GL_TEXTURE_2D, r->gl_display_buffer, 0);
+ GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0};
+ glDrawBuffers(1, DrawBuffers);
+ assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
+
+ glBindTexture(GL_TEXTURE_2D, surface->gl_buffer);
+ glBindVertexArray(r->disp_rndr.vao);
+ glBindBuffer(GL_ARRAY_BUFFER, r->disp_rndr.vbo);
+ glUseProgram(r->disp_rndr.prog);
+ glProgramUniform1i(r->disp_rndr.prog, r->disp_rndr.tex_loc, 0);
+ glUniform2f(r->disp_rndr.display_size_loc, width, height);
+ glUniform1f(r->disp_rndr.line_offset_loc, line_offset);
+ render_display_pvideo_overlay(d);
+
+ glViewport(0, 0, width, height);
+ glColorMask(true, true, true, true);
+ glDisable(GL_SCISSOR_TEST);
+ glDisable(GL_BLEND);
+ glDisable(GL_STENCIL_TEST);
+ glDisable(GL_CULL_FACE);
+ glDisable(GL_DEPTH_TEST);
+ glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
+ glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
+ glClear(GL_COLOR_BUFFER_BIT);
+ glDrawArrays(GL_TRIANGLES, 0, 3);
+
+ glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
+ GL_TEXTURE_2D, 0, 0);
+}
+
+static void gl_fence(void)
+{
+ GLsync fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+ int result = glClientWaitSync(fence, GL_SYNC_FLUSH_COMMANDS_BIT,
+ (GLuint64)(5000000000));
+ assert(result == GL_CONDITION_SATISFIED || result == GL_ALREADY_SIGNALED);
+ glDeleteSync(fence);
+}
+
+void pgraph_gl_sync(NV2AState *d)
+{
+ uint32_t pline_offset, pstart_addr, pline_compare;
+ d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
+ SurfaceBinding *surface = pgraph_gl_surface_get_within(d, d->pcrtc.start + pline_offset);
+ if (surface == NULL) {
+ qemu_event_set(&d->pgraph.sync_complete);
+ return;
+ }
+
+ /* FIXME: Sanity check surface dimensions */
+
+ /* Wait for queued commands to complete */
+ pgraph_gl_upload_surface_data(d, surface, !tcg_enabled());
+ gl_fence();
+ assert(glGetError() == GL_NO_ERROR);
+
+ /* Render framebuffer in display context */
+ glo_set_current(g_nv2a_context_display);
+ render_display(d, surface);
+ gl_fence();
+ assert(glGetError() == GL_NO_ERROR);
+
+ /* Switch back to original context */
+ glo_set_current(g_nv2a_context_render);
+
+ qatomic_set(&d->pgraph.sync_pending, false);
+ qemu_event_set(&d->pgraph.sync_complete);
+}
+
+int pgraph_gl_get_framebuffer_surface(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ qemu_mutex_lock(&d->pfifo.lock);
+ // FIXME: Possible race condition with pgraph, consider lock
+ uint32_t pline_offset, pstart_addr, pline_compare;
+ d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
+ SurfaceBinding *surface = pgraph_gl_surface_get_within(d, d->pcrtc.start + pline_offset);
+ if (surface == NULL || !surface->color) {
+ qemu_mutex_unlock(&d->pfifo.lock);
+ return 0;
+ }
+
+ assert(surface->color);
+ assert(surface->fmt.gl_attachment == GL_COLOR_ATTACHMENT0);
+ assert(surface->fmt.gl_format == GL_RGBA
+ || surface->fmt.gl_format == GL_RGB
+ || surface->fmt.gl_format == GL_BGR
+ || surface->fmt.gl_format == GL_BGRA
+ );
+
+ surface->frame_time = pg->frame_time;
+ qemu_event_reset(&d->pgraph.sync_complete);
+ qatomic_set(&pg->sync_pending, true);
+ pfifo_kick(d);
+ qemu_mutex_unlock(&d->pfifo.lock);
+ qemu_event_wait(&d->pgraph.sync_complete);
+
+ return r->gl_display_buffer;
+}
diff --git a/hw/xbox/nv2a/pgraph/gl/draw.c b/hw/xbox/nv2a/pgraph/gl/draw.c
new file mode 100644
index 0000000000..94e9beb50b
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/gl/draw.c
@@ -0,0 +1,528 @@
+/*
+ * Geforce NV2A PGRAPH OpenGL Renderer
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "qemu/fast-hash.h"
+#include "hw/xbox/nv2a/nv2a_int.h"
+#include "debug.h"
+#include "renderer.h"
+
+void pgraph_gl_clear_surface(NV2AState *d, uint32_t parameter)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ NV2A_DPRINTF("---------PRE CLEAR ------\n");
+ pg->clearing = true;
+
+ GLbitfield gl_mask = 0;
+
+ bool write_color = (parameter & NV097_CLEAR_SURFACE_COLOR);
+ bool write_zeta =
+ (parameter & (NV097_CLEAR_SURFACE_Z | NV097_CLEAR_SURFACE_STENCIL));
+
+ if (write_zeta) {
+ GLint gl_clear_stencil;
+ GLfloat gl_clear_depth;
+ pgraph_get_clear_depth_stencil_value(pg, &gl_clear_depth,
+ &gl_clear_stencil);
+
+ if (parameter & NV097_CLEAR_SURFACE_Z) {
+ gl_mask |= GL_DEPTH_BUFFER_BIT;
+ glDepthMask(GL_TRUE);
+ glClearDepth(gl_clear_depth);
+ }
+ if (parameter & NV097_CLEAR_SURFACE_STENCIL) {
+ gl_mask |= GL_STENCIL_BUFFER_BIT;
+ glStencilMask(0xff);
+ glClearStencil(gl_clear_stencil);
+ }
+ }
+ if (write_color) {
+ gl_mask |= GL_COLOR_BUFFER_BIT;
+ glColorMask((parameter & NV097_CLEAR_SURFACE_R)
+ ? GL_TRUE : GL_FALSE,
+ (parameter & NV097_CLEAR_SURFACE_G)
+ ? GL_TRUE : GL_FALSE,
+ (parameter & NV097_CLEAR_SURFACE_B)
+ ? GL_TRUE : GL_FALSE,
+ (parameter & NV097_CLEAR_SURFACE_A)
+ ? GL_TRUE : GL_FALSE);
+
+ GLfloat rgba[4];
+ pgraph_get_clear_color(pg, rgba);
+ glClearColor(rgba[0], rgba[1], rgba[2], rgba[3]);
+ }
+
+ pgraph_gl_surface_update(d, true, write_color, write_zeta);
+
+ /* FIXME: Needs confirmation */
+ unsigned int xmin =
+ GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTX), NV_PGRAPH_CLEARRECTX_XMIN);
+ unsigned int xmax =
+ GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTX), NV_PGRAPH_CLEARRECTX_XMAX);
+ unsigned int ymin =
+ GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTY), NV_PGRAPH_CLEARRECTY_YMIN);
+ unsigned int ymax =
+ GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTY), NV_PGRAPH_CLEARRECTY_YMAX);
+
+ NV2A_DPRINTF(
+ "------------------CLEAR 0x%x %d,%d - %d,%d %x---------------\n",
+ parameter, xmin, ymin, xmax, ymax,
+ d->pgraph.regs_[NV_PGRAPH_COLORCLEARVALUE]);
+
+ unsigned int scissor_width = xmax - xmin + 1,
+ scissor_height = ymax - ymin + 1;
+ pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin);
+ pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height);
+ ymin = pg->surface_binding_dim.height - (ymin + scissor_height);
+
+ NV2A_DPRINTF("Translated clear rect to %d,%d - %d,%d\n", xmin, ymin,
+ xmin + scissor_width - 1, ymin + scissor_height - 1);
+
+ bool full_clear = !xmin && !ymin &&
+ scissor_width >= pg->surface_binding_dim.width &&
+ scissor_height >= pg->surface_binding_dim.height;
+
+ pgraph_apply_scaling_factor(pg, &xmin, &ymin);
+ pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height);
+
+ /* FIXME: Respect window clip?!?! */
+ glEnable(GL_SCISSOR_TEST);
+ glScissor(xmin, ymin, scissor_width, scissor_height);
+
+ /* Dither */
+ /* FIXME: Maybe also disable it here? + GL implementation dependent */
+ if (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & NV_PGRAPH_CONTROL_0_DITHERENABLE) {
+ glEnable(GL_DITHER);
+ } else {
+ glDisable(GL_DITHER);
+ }
+
+ glClear(gl_mask);
+
+ glDisable(GL_SCISSOR_TEST);
+
+ pgraph_gl_set_surface_dirty(pg, write_color, write_zeta);
+
+ if (r->color_binding) {
+ r->color_binding->cleared = full_clear && write_color;
+ }
+ if (r->zeta_binding) {
+ r->zeta_binding->cleared = full_clear && write_zeta;
+ }
+
+ pg->clearing = false;
+}
+
+void pgraph_gl_draw_begin(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ NV2A_GL_DGROUP_BEGIN("NV097_SET_BEGIN_END: 0x%x", pg->primitive_mode);
+
+ uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0);
+ bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE;
+ bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE;
+ bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE;
+ bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE;
+ bool color_write = mask_alpha || mask_red || mask_green || mask_blue;
+ bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE;
+ bool stencil_test =
+ pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE;
+ bool is_nop_draw = !(color_write || depth_test || stencil_test);
+
+ pgraph_gl_surface_update(d, true, true, depth_test || stencil_test);
+
+ if (is_nop_draw) {
+ return;
+ }
+
+ assert(r->color_binding || r->zeta_binding);
+
+ pgraph_gl_bind_textures(d);
+ pgraph_gl_bind_shaders(pg);
+
+ glColorMask(mask_red, mask_green, mask_blue, mask_alpha);
+ glDepthMask(!!(control_0 & NV_PGRAPH_CONTROL_0_ZWRITEENABLE));
+ glStencilMask(GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
+ NV_PGRAPH_CONTROL_1_STENCIL_MASK_WRITE));
+
+ if (pgraph_reg_r(pg, NV_PGRAPH_BLEND) & NV_PGRAPH_BLEND_EN) {
+ glEnable(GL_BLEND);
+ uint32_t sfactor = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND),
+ NV_PGRAPH_BLEND_SFACTOR);
+ uint32_t dfactor = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND),
+ NV_PGRAPH_BLEND_DFACTOR);
+ assert(sfactor < ARRAY_SIZE(pgraph_blend_factor_gl_map));
+ assert(dfactor < ARRAY_SIZE(pgraph_blend_factor_gl_map));
+ glBlendFunc(pgraph_blend_factor_gl_map[sfactor],
+ pgraph_blend_factor_gl_map[dfactor]);
+
+ uint32_t equation = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND),
+ NV_PGRAPH_BLEND_EQN);
+ assert(equation < ARRAY_SIZE(pgraph_blend_equation_gl_map));
+ glBlendEquation(pgraph_blend_equation_gl_map[equation]);
+
+ uint32_t blend_color = pgraph_reg_r(pg, NV_PGRAPH_BLENDCOLOR);
+ float gl_blend_color[4];
+ pgraph_argb_pack32_to_rgba_float(blend_color, gl_blend_color);
+ glBlendColor(gl_blend_color[0], gl_blend_color[1], gl_blend_color[2],
+ gl_blend_color[3]);
+ } else {
+ glDisable(GL_BLEND);
+ }
+
+ /* Face culling */
+ if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER)
+ & NV_PGRAPH_SETUPRASTER_CULLENABLE) {
+ uint32_t cull_face = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER),
+ NV_PGRAPH_SETUPRASTER_CULLCTRL);
+ assert(cull_face < ARRAY_SIZE(pgraph_cull_face_gl_map));
+ glCullFace(pgraph_cull_face_gl_map[cull_face]);
+ glEnable(GL_CULL_FACE);
+ } else {
+ glDisable(GL_CULL_FACE);
+ }
+
+ /* Clipping */
+ glEnable(GL_CLIP_DISTANCE0);
+ glEnable(GL_CLIP_DISTANCE1);
+
+ /* Front-face select */
+ glFrontFace(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER)
+ & NV_PGRAPH_SETUPRASTER_FRONTFACE
+ ? GL_CCW : GL_CW);
+
+ /* Polygon offset */
+ /* FIXME: GL implementation-specific, maybe do this in VS? */
+ if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+ NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE) {
+ glEnable(GL_POLYGON_OFFSET_FILL);
+ } else {
+ glDisable(GL_POLYGON_OFFSET_FILL);
+ }
+ if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+ NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE) {
+ glEnable(GL_POLYGON_OFFSET_LINE);
+ } else {
+ glDisable(GL_POLYGON_OFFSET_LINE);
+ }
+ if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+ NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE) {
+ glEnable(GL_POLYGON_OFFSET_POINT);
+ } else {
+ glDisable(GL_POLYGON_OFFSET_POINT);
+ }
+ if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+ (NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE |
+ NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE |
+ NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) {
+ uint32_t zfactor_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR);
+ GLfloat zfactor = *(float*)&zfactor_u32;
+ uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS);
+ GLfloat zbias = *(float*)&zbias_u32;
+ glPolygonOffset(zfactor, zbias);
+ }
+
+ /* Depth testing */
+ if (depth_test) {
+ glEnable(GL_DEPTH_TEST);
+
+ uint32_t depth_func = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0),
+ NV_PGRAPH_CONTROL_0_ZFUNC);
+ assert(depth_func < ARRAY_SIZE(pgraph_depth_func_gl_map));
+ glDepthFunc(pgraph_depth_func_gl_map[depth_func]);
+ } else {
+ glDisable(GL_DEPTH_TEST);
+ }
+
+ if (GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ZCOMPRESSOCCLUDE),
+ NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN) ==
+ NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP) {
+ glEnable(GL_DEPTH_CLAMP);
+ } else {
+ glDisable(GL_DEPTH_CLAMP);
+ }
+
+ if (GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3),
+ NV_PGRAPH_CONTROL_3_SHADEMODE) ==
+ NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT) {
+ glProvokingVertex(GL_FIRST_VERTEX_CONVENTION);
+ }
+
+ if (stencil_test) {
+ glEnable(GL_STENCIL_TEST);
+
+ uint32_t stencil_func = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
+ NV_PGRAPH_CONTROL_1_STENCIL_FUNC);
+ uint32_t stencil_ref = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
+ NV_PGRAPH_CONTROL_1_STENCIL_REF);
+ uint32_t func_mask = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
+ NV_PGRAPH_CONTROL_1_STENCIL_MASK_READ);
+ uint32_t op_fail = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2),
+ NV_PGRAPH_CONTROL_2_STENCIL_OP_FAIL);
+ uint32_t op_zfail = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2),
+ NV_PGRAPH_CONTROL_2_STENCIL_OP_ZFAIL);
+ uint32_t op_zpass = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2),
+ NV_PGRAPH_CONTROL_2_STENCIL_OP_ZPASS);
+
+ assert(stencil_func < ARRAY_SIZE(pgraph_stencil_func_gl_map));
+ assert(op_fail < ARRAY_SIZE(pgraph_stencil_op_gl_map));
+ assert(op_zfail < ARRAY_SIZE(pgraph_stencil_op_gl_map));
+ assert(op_zpass < ARRAY_SIZE(pgraph_stencil_op_gl_map));
+
+ glStencilFunc(
+ pgraph_stencil_func_gl_map[stencil_func],
+ stencil_ref,
+ func_mask);
+
+ glStencilOp(
+ pgraph_stencil_op_gl_map[op_fail],
+ pgraph_stencil_op_gl_map[op_zfail],
+ pgraph_stencil_op_gl_map[op_zpass]);
+
+ } else {
+ glDisable(GL_STENCIL_TEST);
+ }
+
+ /* Dither */
+ /* FIXME: GL implementation dependent */
+ if (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) &
+ NV_PGRAPH_CONTROL_0_DITHERENABLE) {
+ glEnable(GL_DITHER);
+ } else {
+ glDisable(GL_DITHER);
+ }
+
+ glEnable(GL_PROGRAM_POINT_SIZE);
+
+ bool anti_aliasing = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ANTIALIASING), NV_PGRAPH_ANTIALIASING_ENABLE);
+
+ /* Edge Antialiasing */
+ if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+ NV_PGRAPH_SETUPRASTER_LINESMOOTHENABLE) {
+ glEnable(GL_LINE_SMOOTH);
+ } else {
+ glDisable(GL_LINE_SMOOTH);
+ }
+ if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+ NV_PGRAPH_SETUPRASTER_POLYSMOOTHENABLE) {
+ glEnable(GL_POLYGON_SMOOTH);
+ } else {
+ glDisable(GL_POLYGON_SMOOTH);
+ }
+
+ unsigned int vp_width = pg->surface_binding_dim.width,
+ vp_height = pg->surface_binding_dim.height;
+ pgraph_apply_scaling_factor(pg, &vp_width, &vp_height);
+ glViewport(0, 0, vp_width, vp_height);
+
+ /* Surface clip */
+ /* FIXME: Consider moving to PSH w/ window clip */
+ unsigned int xmin = pg->surface_shape.clip_x - pg->surface_binding_dim.clip_x,
+ ymin = pg->surface_shape.clip_y - pg->surface_binding_dim.clip_y;
+ unsigned int xmax = xmin + pg->surface_shape.clip_width - 1,
+ ymax = ymin + pg->surface_shape.clip_height - 1;
+
+ unsigned int scissor_width = xmax - xmin + 1,
+ scissor_height = ymax - ymin + 1;
+ pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin);
+ pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height);
+ ymin = pg->surface_binding_dim.height - (ymin + scissor_height);
+ pgraph_apply_scaling_factor(pg, &xmin, &ymin);
+ pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height);
+
+ glEnable(GL_SCISSOR_TEST);
+ glScissor(xmin, ymin, scissor_width, scissor_height);
+
+ /* Visibility testing */
+ if (pg->zpass_pixel_count_enable) {
+ r->gl_zpass_pixel_count_query_count++;
+ r->gl_zpass_pixel_count_queries = (GLuint*)g_realloc(
+ r->gl_zpass_pixel_count_queries,
+ sizeof(GLuint) * r->gl_zpass_pixel_count_query_count);
+
+ GLuint gl_query;
+ glGenQueries(1, &gl_query);
+ r->gl_zpass_pixel_count_queries[
+ r->gl_zpass_pixel_count_query_count - 1] = gl_query;
+ glBeginQuery(GL_SAMPLES_PASSED, gl_query);
+ }
+}
+
+void pgraph_gl_draw_end(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0);
+ bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE;
+ bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE;
+ bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE;
+ bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE;
+ bool color_write = mask_alpha || mask_red || mask_green || mask_blue;
+ bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE;
+ bool stencil_test =
+ pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE;
+ bool is_nop_draw = !(color_write || depth_test || stencil_test);
+
+ if (is_nop_draw) {
+ // FIXME: Check PGRAPH register 0x880.
+ // HW uses bit 11 in 0x880 to enable or disable a color/zeta limit
+ // check that will raise an exception in the case that a draw should
+ // modify the color and/or zeta buffer but the target(s) are masked
+ // off. This check only seems to trigger during the fragment
+ // processing, it is legal to attempt a draw that is entirely
+ // clipped regardless of 0x880. See xemu#635 for context.
+ return;
+ }
+
+ pgraph_gl_flush_draw(d);
+
+ /* End of visibility testing */
+ if (pg->zpass_pixel_count_enable) {
+ nv2a_profile_inc_counter(NV2A_PROF_QUERY);
+ glEndQuery(GL_SAMPLES_PASSED);
+ }
+
+ pg->draw_time++;
+ if (r->color_binding && pgraph_color_write_enabled(pg)) {
+ r->color_binding->draw_time = pg->draw_time;
+ }
+ if (r->zeta_binding && pgraph_zeta_write_enabled(pg)) {
+ r->zeta_binding->draw_time = pg->draw_time;
+ }
+
+ pgraph_gl_set_surface_dirty(pg, color_write, depth_test || stencil_test);
+ NV2A_GL_DGROUP_END();
+}
+
+void pgraph_gl_flush_draw(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ if (!(r->color_binding || r->zeta_binding)) {
+ return;
+ }
+ assert(r->shader_binding);
+
+ if (pg->draw_arrays_length) {
+ NV2A_GL_DPRINTF(false, "Draw Arrays");
+ nv2a_profile_inc_counter(NV2A_PROF_DRAW_ARRAYS);
+ assert(pg->inline_elements_length == 0);
+ assert(pg->inline_buffer_length == 0);
+ assert(pg->inline_array_length == 0);
+
+ pgraph_gl_bind_vertex_attributes(d, pg->draw_arrays_min_start,
+ pg->draw_arrays_max_count - 1,
+ false, 0,
+ pg->draw_arrays_max_count - 1);
+ glMultiDrawArrays(r->shader_binding->gl_primitive_mode,
+ pg->draw_arrays_start,
+ pg->draw_arrays_count,
+ pg->draw_arrays_length);
+ } else if (pg->inline_elements_length) {
+ NV2A_GL_DPRINTF(false, "Inline Elements");
+ nv2a_profile_inc_counter(NV2A_PROF_INLINE_ELEMENTS);
+ assert(pg->inline_buffer_length == 0);
+ assert(pg->inline_array_length == 0);
+
+ uint32_t min_element = (uint32_t)-1;
+ uint32_t max_element = 0;
+ for (int i=0; i < pg->inline_elements_length; i++) {
+ max_element = MAX(pg->inline_elements[i], max_element);
+ min_element = MIN(pg->inline_elements[i], min_element);
+ }
+
+ pgraph_gl_bind_vertex_attributes(
+ d, min_element, max_element, false, 0,
+ pg->inline_elements[pg->inline_elements_length - 1]);
+
+ VertexKey k;
+ memset(&k, 0, sizeof(VertexKey));
+ k.count = pg->inline_elements_length;
+ k.gl_type = GL_UNSIGNED_INT;
+ k.gl_normalize = GL_FALSE;
+ k.stride = sizeof(uint32_t);
+ uint64_t h = fast_hash((uint8_t*)pg->inline_elements,
+ pg->inline_elements_length * 4);
+
+ LruNode *node = lru_lookup(&r->element_cache, h, &k);
+ VertexLruNode *found = container_of(node, VertexLruNode, node);
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, found->gl_buffer);
+ if (!found->initialized) {
+ nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_4);
+ glBufferData(GL_ELEMENT_ARRAY_BUFFER,
+ pg->inline_elements_length * 4,
+ pg->inline_elements, GL_STATIC_DRAW);
+ found->initialized = true;
+ } else {
+ nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_4_NOTDIRTY);
+ }
+ glDrawElements(r->shader_binding->gl_primitive_mode,
+ pg->inline_elements_length, GL_UNSIGNED_INT,
+ (void *)0);
+ } else if (pg->inline_buffer_length) {
+ NV2A_GL_DPRINTF(false, "Inline Buffer");
+ nv2a_profile_inc_counter(NV2A_PROF_INLINE_BUFFERS);
+ assert(pg->inline_array_length == 0);
+
+ if (pg->compressed_attrs) {
+ pg->compressed_attrs = 0;
+ pgraph_gl_bind_shaders(pg);
+ }
+
+ for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+ VertexAttribute *attr = &pg->vertex_attributes[i];
+ if (attr->inline_buffer_populated) {
+ nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_3);
+ glBindBuffer(GL_ARRAY_BUFFER, r->gl_inline_buffer[i]);
+ glBufferData(GL_ARRAY_BUFFER,
+ pg->inline_buffer_length * sizeof(float) * 4,
+ attr->inline_buffer, GL_STREAM_DRAW);
+ glVertexAttribPointer(i, 4, GL_FLOAT, GL_FALSE, 0, 0);
+ glEnableVertexAttribArray(i);
+ attr->inline_buffer_populated = false;
+ memcpy(attr->inline_value,
+ attr->inline_buffer + (pg->inline_buffer_length - 1) * 4,
+ sizeof(attr->inline_value));
+ } else {
+ glDisableVertexAttribArray(i);
+ glVertexAttrib4fv(i, attr->inline_value);
+ }
+ }
+
+ glDrawArrays(r->shader_binding->gl_primitive_mode,
+ 0, pg->inline_buffer_length);
+ } else if (pg->inline_array_length) {
+ NV2A_GL_DPRINTF(false, "Inline Array");
+ nv2a_profile_inc_counter(NV2A_PROF_INLINE_ARRAYS);
+
+ unsigned int index_count = pgraph_gl_bind_inline_array(d);
+ glDrawArrays(r->shader_binding->gl_primitive_mode,
+ 0, index_count);
+ } else {
+ NV2A_GL_DPRINTF(true, "EMPTY NV097_SET_BEGIN_END");
+ NV2A_UNCONFIRMED("EMPTY NV097_SET_BEGIN_END");
+ }
+}
diff --git a/hw/xbox/nv2a/pgraph/gl/meson.build b/hw/xbox/nv2a/pgraph/gl/meson.build
new file mode 100644
index 0000000000..ab25eacb7d
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/gl/meson.build
@@ -0,0 +1,12 @@
+specific_ss.add([sdl, gloffscreen, files(
+ 'blit.c',
+ 'debug.c',
+ 'display.c',
+ 'draw.c',
+ 'renderer.c',
+ 'reports.c',
+ 'shaders.c',
+ 'surface.c',
+ 'texture.c',
+ 'vertex.c',
+ )])
diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.c b/hw/xbox/nv2a/pgraph/gl/renderer.c
new file mode 100644
index 0000000000..2114608683
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/gl/renderer.c
@@ -0,0 +1,201 @@
+/*
+ * Geforce NV2A PGRAPH OpenGL Renderer
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "hw/xbox/nv2a/nv2a_int.h"
+#include "hw/xbox/nv2a/pgraph/pgraph.h"
+#include "debug.h"
+#include "renderer.h"
+
+GloContext *g_nv2a_context_render;
+GloContext *g_nv2a_context_display;
+
+static void nv2a_gl_context_init(void)
+{
+ g_nv2a_context_render = glo_context_create();
+ g_nv2a_context_display = glo_context_create();
+}
+
+static void pgraph_gl_init_thread(NV2AState *d)
+{
+ glo_set_current(g_nv2a_context_render);
+}
+
+static void pgraph_gl_deinit(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+
+ glo_set_current(g_nv2a_context_render);
+
+ pgraph_gl_deinit_surfaces(pg);
+ pgraph_gl_deinit_shader_cache(pg);
+ pgraph_gl_deinit_texture_cache(pg);
+
+ glo_set_current(NULL);
+ glo_context_destroy(g_nv2a_context_render);
+ glo_context_destroy(g_nv2a_context_display);
+}
+
+static void pgraph_gl_flip_stall(NV2AState *d)
+{
+ NV2A_GL_DFRAME_TERMINATOR();
+ glFinish();
+}
+
+static void pgraph_gl_flush(NV2AState *d)
+{
+ pgraph_gl_surface_flush(d);
+ pgraph_gl_mark_textures_possibly_dirty(d, 0, memory_region_size(d->vram));
+ pgraph_gl_update_entire_memory_buffer(d);
+ /* FIXME: Flush more? */
+
+ qatomic_set(&d->pgraph.flush_pending, false);
+ qemu_event_set(&d->pgraph.flush_complete);
+}
+
+static void pgraph_gl_process_pending(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ if (qatomic_read(&r->downloads_pending) ||
+ qatomic_read(&r->download_dirty_surfaces_pending) ||
+ qatomic_read(&d->pgraph.sync_pending) ||
+ qatomic_read(&d->pgraph.flush_pending) ||
+ qatomic_read(&r->shader_cache_writeback_pending)) {
+ qemu_mutex_unlock(&d->pfifo.lock);
+ qemu_mutex_lock(&d->pgraph.lock);
+ if (qatomic_read(&r->downloads_pending)) {
+ pgraph_gl_process_pending_downloads(d);
+ }
+ if (qatomic_read(&r->download_dirty_surfaces_pending)) {
+ pgraph_gl_download_dirty_surfaces(d);
+ }
+ if (qatomic_read(&d->pgraph.sync_pending)) {
+ pgraph_gl_sync(d);
+ }
+ if (qatomic_read(&d->pgraph.flush_pending)) {
+ pgraph_gl_flush(d);
+ }
+ if (qatomic_read(&r->shader_cache_writeback_pending)) {
+ pgraph_gl_shader_write_cache_reload_list(&d->pgraph);
+ }
+ qemu_mutex_unlock(&d->pgraph.lock);
+ qemu_mutex_lock(&d->pfifo.lock);
+ }
+}
+
+static void pgraph_gl_pre_savevm_trigger(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ qatomic_set(&r->download_dirty_surfaces_pending, true);
+ qemu_event_reset(&r->dirty_surfaces_download_complete);
+}
+
+static void pgraph_gl_pre_savevm_wait(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ qemu_event_wait(&r->dirty_surfaces_download_complete);
+}
+
+static void pgraph_gl_pre_shutdown_trigger(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ qatomic_set(&r->shader_cache_writeback_pending, true);
+ qemu_event_reset(&r->shader_cache_writeback_complete);
+}
+
+static void pgraph_gl_pre_shutdown_wait(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ qemu_event_wait(&r->shader_cache_writeback_complete);
+}
+
+static void pgraph_gl_init(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+
+ pg->gl_renderer_state = g_malloc(sizeof(PGRAPHGLState));
+
+ /* fire up opengl */
+ glo_set_current(g_nv2a_context_render);
+
+#ifdef DEBUG_NV2A_GL
+ gl_debug_initialize();
+#endif
+
+ /* DXT textures */
+ assert(glo_check_extension("GL_EXT_texture_compression_s3tc"));
+ /* Internal RGB565 texture format */
+ assert(glo_check_extension("GL_ARB_ES2_compatibility"));
+
+ pgraph_gl_init_surfaces(pg);
+ pgraph_gl_init_reports(d);
+ pgraph_gl_init_texture_cache(d);
+ pgraph_gl_init_vertex_cache(d);
+ pgraph_gl_init_shader_cache(pg);
+
+ glo_set_current(g_nv2a_context_display);
+ pgraph_gl_init_display_renderer(d);
+
+ glo_set_current(NULL);
+}
+
+static PGRAPHRenderer pgraph_gl_renderer = {
+ .type = CONFIG_DISPLAY_RENDERER_OPENGL,
+ .name = "OpenGL",
+ .ops = {
+ .init = pgraph_gl_init,
+ .early_context_init = nv2a_gl_context_init,
+ .init_thread = pgraph_gl_init_thread,
+ .finalize = pgraph_gl_deinit,
+ .clear_report_value = pgraph_gl_clear_report_value,
+ .clear_surface = pgraph_gl_clear_surface,
+ .draw_begin = pgraph_gl_draw_begin,
+ .draw_end = pgraph_gl_draw_end,
+ .flip_stall = pgraph_gl_flip_stall,
+ .flush_draw = pgraph_gl_flush_draw,
+ .get_report = pgraph_gl_get_report,
+ .image_blit = pgraph_gl_image_blit,
+ .pre_savevm_trigger = pgraph_gl_pre_savevm_trigger,
+ .pre_savevm_wait = pgraph_gl_pre_savevm_wait,
+ .pre_shutdown_trigger = pgraph_gl_pre_shutdown_trigger,
+ .pre_shutdown_wait = pgraph_gl_pre_shutdown_wait,
+ .process_pending = pgraph_gl_process_pending,
+ .process_pending_reports = pgraph_gl_process_pending_reports,
+ .surface_update = pgraph_gl_surface_update,
+ .set_surface_scale_factor = pgraph_gl_set_surface_scale_factor,
+ .get_surface_scale_factor = pgraph_gl_get_surface_scale_factor,
+ .get_framebuffer_surface = pgraph_gl_get_framebuffer_surface,
+ }
+};
+
+static void __attribute__((constructor)) register_renderer(void)
+{
+ pgraph_renderer_register(&pgraph_gl_renderer);
+}
diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.h b/hw/xbox/nv2a/pgraph/gl/renderer.h
new file mode 100644
index 0000000000..fff4ac7d53
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/gl/renderer.h
@@ -0,0 +1,283 @@
+/*
+ * Geforce NV2A PGRAPH OpenGL Renderer
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_GL_RENDERER_H
+#define HW_XBOX_NV2A_PGRAPH_GL_RENDERER_H
+
+#include "qemu/osdep.h"
+#include "qemu/thread.h"
+#include "qemu/queue.h"
+#include "qemu/lru.h"
+
+#include "hw/hw.h"
+
+#include "hw/xbox/nv2a/nv2a_int.h"
+#include "hw/xbox/nv2a/nv2a_regs.h"
+#include "hw/xbox/nv2a/pgraph/surface.h"
+#include "hw/xbox/nv2a/pgraph/texture.h"
+#include "hw/xbox/nv2a/pgraph/shaders.h"
+
+#include "gloffscreen.h"
+#include "constants.h"
+
+typedef struct SurfaceBinding {
+ QTAILQ_ENTRY(SurfaceBinding) entry;
+ MemAccessCallback *access_cb;
+
+ hwaddr vram_addr;
+
+ SurfaceShape shape;
+ uintptr_t dma_addr;
+ uintptr_t dma_len;
+ bool color;
+ bool swizzle;
+
+ unsigned int width;
+ unsigned int height;
+ unsigned int pitch;
+ size_t size;
+
+ bool cleared;
+ int frame_time;
+ int draw_time;
+ bool draw_dirty;
+ bool download_pending;
+ bool upload_pending;
+
+ GLuint gl_buffer;
+ SurfaceFormatInfo fmt;
+} SurfaceBinding;
+
+typedef struct TextureBinding {
+ unsigned int refcnt;
+ int draw_time;
+ uint64_t data_hash;
+ unsigned int scale;
+ unsigned int min_filter;
+ unsigned int mag_filter;
+ unsigned int addru;
+ unsigned int addrv;
+ unsigned int addrp;
+ uint32_t border_color;
+ bool border_color_set;
+ GLenum gl_target;
+ GLuint gl_texture;
+} TextureBinding;
+
+typedef struct ShaderBinding {
+ GLuint gl_program;
+ GLenum gl_primitive_mode;
+
+ GLint psh_constant_loc[9][2];
+ GLint alpha_ref_loc;
+
+ GLint bump_mat_loc[NV2A_MAX_TEXTURES];
+ GLint bump_scale_loc[NV2A_MAX_TEXTURES];
+ GLint bump_offset_loc[NV2A_MAX_TEXTURES];
+ GLint tex_scale_loc[NV2A_MAX_TEXTURES];
+
+ GLint surface_size_loc;
+ GLint clip_range_loc;
+
+ GLint vsh_constant_loc[NV2A_VERTEXSHADER_CONSTANTS];
+ uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];
+
+ GLint inv_viewport_loc;
+ GLint ltctxa_loc[NV2A_LTCTXA_COUNT];
+ GLint ltctxb_loc[NV2A_LTCTXB_COUNT];
+ GLint ltc1_loc[NV2A_LTC1_COUNT];
+
+ GLint fog_color_loc;
+ GLint fog_param_loc;
+ GLint light_infinite_half_vector_loc[NV2A_MAX_LIGHTS];
+ GLint light_infinite_direction_loc[NV2A_MAX_LIGHTS];
+ GLint light_local_position_loc[NV2A_MAX_LIGHTS];
+ GLint light_local_attenuation_loc[NV2A_MAX_LIGHTS];
+
+ GLint clip_region_loc[8];
+
+ GLint material_alpha_loc;
+} ShaderBinding;
+
+typedef struct ShaderLruNode {
+ LruNode node;
+ bool cached;
+ void *program;
+ size_t program_size;
+ GLenum program_format;
+ ShaderState state;
+ ShaderBinding *binding;
+ QemuThread *save_thread;
+} ShaderLruNode;
+
+typedef struct VertexKey {
+ size_t count;
+ size_t stride;
+ hwaddr addr;
+
+ GLboolean gl_normalize;
+ GLuint gl_type;
+} VertexKey;
+
+typedef struct VertexLruNode {
+ LruNode node;
+ VertexKey key;
+ bool initialized;
+
+ GLuint gl_buffer;
+} VertexLruNode;
+
+typedef struct TextureKey {
+ TextureShape state;
+ hwaddr texture_vram_offset;
+ hwaddr texture_length;
+ hwaddr palette_vram_offset;
+ hwaddr palette_length;
+} TextureKey;
+
+typedef struct TextureLruNode {
+ LruNode node;
+ TextureKey key;
+ TextureBinding *binding;
+ bool possibly_dirty;
+} TextureLruNode;
+
+typedef struct QueryReport {
+ QSIMPLEQ_ENTRY(QueryReport) entry;
+ bool clear;
+ uint32_t parameter;
+ unsigned int query_count;
+ GLuint *queries;
+} QueryReport;
+
+typedef struct PGRAPHGLState {
+ GLuint gl_framebuffer;
+ GLuint gl_display_buffer;
+ GLint gl_display_buffer_internal_format;
+ GLsizei gl_display_buffer_width;
+ GLsizei gl_display_buffer_height;
+ GLenum gl_display_buffer_format;
+ GLenum gl_display_buffer_type;
+
+ Lru element_cache;
+ VertexLruNode *element_cache_entries;
+ GLuint gl_inline_array_buffer;
+ GLuint gl_memory_buffer;
+ GLuint gl_vertex_array;
+ GLuint gl_inline_buffer[NV2A_VERTEXSHADER_ATTRIBUTES];
+
+ QTAILQ_HEAD(, SurfaceBinding) surfaces;
+ SurfaceBinding *color_binding, *zeta_binding;
+ bool downloads_pending;
+ QemuEvent downloads_complete;
+ bool download_dirty_surfaces_pending;
+ QemuEvent dirty_surfaces_download_complete; // common
+
+ TextureBinding *texture_binding[NV2A_MAX_TEXTURES];
+ Lru texture_cache;
+ TextureLruNode *texture_cache_entries;
+
+ Lru shader_cache;
+ ShaderLruNode *shader_cache_entries;
+ ShaderBinding *shader_binding;
+ QemuMutex shader_cache_lock;
+ QemuThread shader_disk_thread;
+
+ unsigned int zpass_pixel_count_result;
+ unsigned int gl_zpass_pixel_count_query_count;
+ GLuint *gl_zpass_pixel_count_queries;
+ QSIMPLEQ_HEAD(, QueryReport) report_queue;
+
+ bool shader_cache_writeback_pending;
+ QemuEvent shader_cache_writeback_complete;
+
+ struct s2t_rndr {
+ GLuint fbo, vao, vbo, prog;
+ GLuint tex_loc, surface_size_loc;
+ } s2t_rndr;
+
+ struct disp_rndr {
+ GLuint fbo, vao, vbo, prog;
+ GLuint display_size_loc;
+ GLuint line_offset_loc;
+ GLuint tex_loc;
+ GLuint pvideo_tex;
+ GLint pvideo_enable_loc;
+ GLint pvideo_tex_loc;
+ GLint pvideo_in_pos_loc;
+ GLint pvideo_pos_loc;
+ GLint pvideo_scale_loc;
+ GLint pvideo_color_key_enable_loc;
+ GLint pvideo_color_key_loc;
+ GLint palette_loc[256];
+ } disp_rndr;
+} PGRAPHGLState;
+
+extern GloContext *g_nv2a_context_render;
+extern GloContext *g_nv2a_context_display;
+
+unsigned int pgraph_gl_bind_inline_array(NV2AState *d);
+void pgraph_gl_bind_shaders(PGRAPHState *pg);
+void pgraph_gl_bind_textures(NV2AState *d);
+void pgraph_gl_bind_vertex_attributes(NV2AState *d, unsigned int min_element, unsigned int max_element, bool inline_data, unsigned int inline_stride, unsigned int provoking_element);
+bool pgraph_gl_check_surface_to_texture_compatibility(const SurfaceBinding *surface, const TextureShape *shape);
+GLuint pgraph_gl_compile_shader(const char *vs_src, const char *fs_src);
+void pgraph_gl_deinit_shader_cache(PGRAPHState *pg);
+void pgraph_gl_deinit_surfaces(PGRAPHState *pg);
+void pgraph_gl_deinit_texture_cache(PGRAPHState *pg);
+void pgraph_gl_download_dirty_surfaces(NV2AState *d);
+void pgraph_gl_clear_report_value(NV2AState *d);
+void pgraph_gl_clear_surface(NV2AState *d, uint32_t parameter);
+void pgraph_gl_draw_begin(NV2AState *d);
+void pgraph_gl_draw_end(NV2AState *d);
+void pgraph_gl_flush_draw(NV2AState *d);
+void pgraph_gl_get_report(NV2AState *d, uint32_t parameter);
+void pgraph_gl_image_blit(NV2AState *d);
+void pgraph_gl_mark_textures_possibly_dirty(NV2AState *d, hwaddr addr, hwaddr size);
+void pgraph_gl_process_pending_reports(NV2AState *d);
+void pgraph_gl_surface_flush(NV2AState *d);
+void pgraph_gl_surface_update(NV2AState *d, bool upload, bool color_write, bool zeta_write);
+void pgraph_gl_sync(NV2AState *d);
+void pgraph_gl_update_entire_memory_buffer(NV2AState *d);
+void pgraph_gl_init_display_renderer(NV2AState *d);
+void pgraph_gl_init_reports(NV2AState *d);
+void pgraph_gl_init_shader_cache(PGRAPHState *pg);
+void pgraph_gl_init_surfaces(PGRAPHState *pg);
+void pgraph_gl_init_texture_cache(NV2AState *d);
+void pgraph_gl_init_vertex_cache(NV2AState *d);
+void pgraph_gl_process_pending_downloads(NV2AState *d);
+void pgraph_gl_reload_surface_scale_factor(PGRAPHState *pg);
+void pgraph_gl_render_surface_to_texture(NV2AState *d, SurfaceBinding *surface, TextureBinding *texture, TextureShape *texture_shape, int texture_unit);
+void pgraph_gl_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta);
+void pgraph_gl_surface_download_if_dirty(NV2AState *d, SurfaceBinding *surface);
+SurfaceBinding *pgraph_gl_surface_get(NV2AState *d, hwaddr addr);
+SurfaceBinding *pgraph_gl_surface_get_within(NV2AState *d, hwaddr addr);
+void pgraph_gl_surface_invalidate(NV2AState *d, SurfaceBinding *e);
+void pgraph_gl_unbind_surface(NV2AState *d, bool color);
+void pgraph_gl_upload_surface_data(NV2AState *d, SurfaceBinding *surface, bool force);
+void pgraph_gl_shader_cache_to_disk(ShaderLruNode *snode);
+bool pgraph_gl_shader_load_from_memory(ShaderLruNode *snode);
+void pgraph_gl_shader_write_cache_reload_list(PGRAPHState *pg);
+void pgraph_gl_set_surface_scale_factor(NV2AState *d, unsigned int scale);
+unsigned int pgraph_gl_get_surface_scale_factor(NV2AState *d);
+int pgraph_gl_get_framebuffer_surface(NV2AState *d);
+
+#endif
diff --git a/hw/xbox/nv2a/pgraph/gl/reports.c b/hw/xbox/nv2a/pgraph/gl/reports.c
new file mode 100644
index 0000000000..0673c37e0c
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/gl/reports.c
@@ -0,0 +1,111 @@
+/*
+ * Geforce NV2A PGRAPH OpenGL Renderer
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include
+#include "renderer.h"
+
+static void process_pending_report(NV2AState *d, QueryReport *report)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ if (report->clear) {
+ r->zpass_pixel_count_result = 0;
+ return;
+ }
+
+ uint8_t type = GET_MASK(report->parameter, NV097_GET_REPORT_TYPE);
+ assert(type == NV097_GET_REPORT_TYPE_ZPASS_PIXEL_CNT);
+
+ /* FIXME: Multisampling affects this (both: OGL and Xbox GPU),
+ * not sure if CLEARs also count
+ */
+ /* FIXME: What about clipping regions etc? */
+ for (int i = 0; i < report->query_count; i++) {
+ GLuint gl_query_result = 0;
+ glGetQueryObjectuiv(report->queries[i], GL_QUERY_RESULT, &gl_query_result);
+ gl_query_result /= pg->surface_scale_factor * pg->surface_scale_factor;
+ r->zpass_pixel_count_result += gl_query_result;
+ }
+
+ if (report->query_count) {
+ glDeleteQueries(report->query_count, report->queries);
+ g_free(report->queries);
+ }
+
+ pgraph_write_zpass_pixel_cnt_report(d, report->parameter, r->zpass_pixel_count_result);
+}
+
+void pgraph_gl_process_pending_reports(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+ QueryReport *report, *next;
+
+ QSIMPLEQ_FOREACH_SAFE(report, &r->report_queue, entry, next) {
+ process_pending_report(d, report);
+ QSIMPLEQ_REMOVE_HEAD(&r->report_queue, entry);
+ g_free(report);
+ }
+}
+
+void pgraph_gl_clear_report_value(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ /* FIXME: Does this have a value in parameter? Also does this (also?) modify
+ * the report memory block?
+ */
+ if (r->gl_zpass_pixel_count_query_count) {
+ glDeleteQueries(r->gl_zpass_pixel_count_query_count,
+ r->gl_zpass_pixel_count_queries);
+ r->gl_zpass_pixel_count_query_count = 0;
+ }
+
+ QueryReport *report = g_malloc(sizeof(QueryReport));
+ report->clear = true;
+ QSIMPLEQ_INSERT_TAIL(&r->report_queue, report, entry);
+}
+
+void pgraph_gl_init_reports(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ QSIMPLEQ_INIT(&r->report_queue);
+}
+
+void pgraph_gl_get_report(NV2AState *d, uint32_t parameter)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ QueryReport *report = g_malloc(sizeof(QueryReport));
+ report->clear = false;
+ report->parameter = parameter;
+ report->query_count = r->gl_zpass_pixel_count_query_count;
+ report->queries = r->gl_zpass_pixel_count_queries;
+ QSIMPLEQ_INSERT_TAIL(&r->report_queue, report, entry);
+
+ r->gl_zpass_pixel_count_query_count = 0;
+ r->gl_zpass_pixel_count_queries = NULL;
+}
diff --git a/hw/xbox/nv2a/pgraph/gl/shaders.c b/hw/xbox/nv2a/pgraph/gl/shaders.c
new file mode 100644
index 0000000000..0bb4eaa598
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/gl/shaders.c
@@ -0,0 +1,1102 @@
+/*
+ * Geforce NV2A PGRAPH OpenGL Renderer
+ *
+ * Copyright (c) 2015 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2020-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/fast-hash.h"
+#include "qemu/mstring.h"
+#include
+
+#include "xemu-version.h"
+#include "ui/xemu-settings.h"
+#include "hw/xbox/nv2a/pgraph/glsl/geom.h"
+#include "hw/xbox/nv2a/pgraph/glsl/vsh.h"
+#include "hw/xbox/nv2a/pgraph/glsl/psh.h"
+#include "hw/xbox/nv2a/pgraph/shaders.h"
+#include "hw/xbox/nv2a/pgraph/util.h"
+#include "debug.h"
+#include "renderer.h"
+
+static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, bool binding_changed, bool vertex_program, bool fixed_function);
+
+static GLenum get_gl_primitive_mode(enum ShaderPolygonMode polygon_mode, enum ShaderPrimitiveMode primitive_mode)
+{
+ if (polygon_mode == POLY_MODE_POINT) {
+ return GL_POINTS;
+ }
+
+ switch (primitive_mode) {
+ case PRIM_TYPE_POINTS: return GL_POINTS;
+ case PRIM_TYPE_LINES: return GL_LINES;
+ case PRIM_TYPE_LINE_LOOP: return GL_LINE_LOOP;
+ case PRIM_TYPE_LINE_STRIP: return GL_LINE_STRIP;
+ case PRIM_TYPE_TRIANGLES: return GL_TRIANGLES;
+ case PRIM_TYPE_TRIANGLE_STRIP: return GL_TRIANGLE_STRIP;
+ case PRIM_TYPE_TRIANGLE_FAN: return GL_TRIANGLE_FAN;
+ case PRIM_TYPE_QUADS: return GL_LINES_ADJACENCY;
+ case PRIM_TYPE_QUAD_STRIP: return GL_LINE_STRIP_ADJACENCY;
+ case PRIM_TYPE_POLYGON:
+ if (polygon_mode == POLY_MODE_LINE) {
+ return GL_LINE_LOOP;
+ } else if (polygon_mode == POLY_MODE_FILL) {
+ return GL_TRIANGLE_FAN;
+ }
+
+ assert(!"PRIM_TYPE_POLYGON with invalid polygon_mode");
+ return 0;
+ default:
+ assert(!"Invalid primitive_mode");
+ return 0;
+ }
+}
+
+static GLuint create_gl_shader(GLenum gl_shader_type,
+ const char *code,
+ const char *name)
+{
+ GLint compiled = 0;
+
+ NV2A_GL_DGROUP_BEGIN("Creating new %s", name);
+
+ NV2A_DPRINTF("compile new %s, code:\n%s\n", name, code);
+
+ GLuint shader = glCreateShader(gl_shader_type);
+ glShaderSource(shader, 1, &code, 0);
+ glCompileShader(shader);
+
+ /* Check it compiled */
+ compiled = 0;
+ glGetShaderiv(shader, GL_COMPILE_STATUS, &compiled);
+ if (!compiled) {
+ GLchar* log;
+ GLint log_length;
+ glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length);
+ log = g_malloc(log_length * sizeof(GLchar));
+ glGetShaderInfoLog(shader, log_length, NULL, log);
+ fprintf(stderr, "%s\n\n" "nv2a: %s compilation failed: %s\n", code, name, log);
+ g_free(log);
+
+ NV2A_GL_DGROUP_END();
+ abort();
+ }
+
+ NV2A_GL_DGROUP_END();
+
+ return shader;
+}
+
+static void update_shader_constant_locations(ShaderBinding *binding, const ShaderState *state)
+{
+ int i, j;
+ char tmp[64];
+
+ /* set texture samplers */
+ for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
+ char samplerName[16];
+ snprintf(samplerName, sizeof(samplerName), "texSamp%d", i);
+ GLint texSampLoc = glGetUniformLocation(binding->gl_program, samplerName);
+ if (texSampLoc >= 0) {
+ glUniform1i(texSampLoc, i);
+ }
+ }
+
+ /* validate the program */
+ glValidateProgram(binding->gl_program);
+ GLint valid = 0;
+ glGetProgramiv(binding->gl_program, GL_VALIDATE_STATUS, &valid);
+ if (!valid) {
+ GLchar log[1024];
+ glGetProgramInfoLog(binding->gl_program, 1024, NULL, log);
+ fprintf(stderr, "nv2a: shader validation failed: %s\n", log);
+ abort();
+ }
+
+ /* lookup fragment shader uniforms */
+ for (i = 0; i < 9; i++) {
+ for (j = 0; j < 2; j++) {
+ snprintf(tmp, sizeof(tmp), "c%d_%d", j, i);
+ binding->psh_constant_loc[i][j] = glGetUniformLocation(binding->gl_program, tmp);
+ }
+ }
+ binding->alpha_ref_loc = glGetUniformLocation(binding->gl_program, "alphaRef");
+ for (i = 1; i < NV2A_MAX_TEXTURES; i++) {
+ snprintf(tmp, sizeof(tmp), "bumpMat%d", i);
+ binding->bump_mat_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
+ snprintf(tmp, sizeof(tmp), "bumpScale%d", i);
+ binding->bump_scale_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
+ snprintf(tmp, sizeof(tmp), "bumpOffset%d", i);
+ binding->bump_offset_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
+ }
+
+ for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
+ snprintf(tmp, sizeof(tmp), "texScale%d", i);
+ binding->tex_scale_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
+ }
+
+ /* lookup vertex shader uniforms */
+ for(i = 0; i < NV2A_VERTEXSHADER_CONSTANTS; i++) {
+ snprintf(tmp, sizeof(tmp), "c[%d]", i);
+ binding->vsh_constant_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
+ }
+ binding->surface_size_loc = glGetUniformLocation(binding->gl_program, "surfaceSize");
+ binding->clip_range_loc = glGetUniformLocation(binding->gl_program, "clipRange");
+ binding->fog_color_loc = glGetUniformLocation(binding->gl_program, "fogColor");
+ binding->fog_param_loc = glGetUniformLocation(binding->gl_program, "fogParam");
+
+ binding->inv_viewport_loc = glGetUniformLocation(binding->gl_program, "invViewport");
+ for (i = 0; i < NV2A_LTCTXA_COUNT; i++) {
+ snprintf(tmp, sizeof(tmp), "ltctxa[%d]", i);
+ binding->ltctxa_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
+ }
+ for (i = 0; i < NV2A_LTCTXB_COUNT; i++) {
+ snprintf(tmp, sizeof(tmp), "ltctxb[%d]", i);
+ binding->ltctxb_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
+ }
+ for (i = 0; i < NV2A_LTC1_COUNT; i++) {
+ snprintf(tmp, sizeof(tmp), "ltc1[%d]", i);
+ binding->ltc1_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
+ }
+ for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
+ snprintf(tmp, sizeof(tmp), "lightInfiniteHalfVector%d", i);
+ binding->light_infinite_half_vector_loc[i] =
+ glGetUniformLocation(binding->gl_program, tmp);
+ snprintf(tmp, sizeof(tmp), "lightInfiniteDirection%d", i);
+ binding->light_infinite_direction_loc[i] =
+ glGetUniformLocation(binding->gl_program, tmp);
+
+ snprintf(tmp, sizeof(tmp), "lightLocalPosition%d", i);
+ binding->light_local_position_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
+ snprintf(tmp, sizeof(tmp), "lightLocalAttenuation%d", i);
+ binding->light_local_attenuation_loc[i] =
+ glGetUniformLocation(binding->gl_program, tmp);
+ }
+ for (i = 0; i < 8; i++) {
+ snprintf(tmp, sizeof(tmp), "clipRegion[%d]", i);
+ binding->clip_region_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
+ }
+
+ if (state->fixed_function) {
+ binding->material_alpha_loc =
+ glGetUniformLocation(binding->gl_program, "material_alpha");
+ } else {
+ binding->material_alpha_loc = -1;
+ }
+}
+
+static ShaderBinding *generate_shaders(const ShaderState *state)
+{
+ char *previous_numeric_locale = setlocale(LC_NUMERIC, NULL);
+ if (previous_numeric_locale) {
+ previous_numeric_locale = g_strdup(previous_numeric_locale);
+ }
+
+ /* Ensure numeric values are printed with '.' radix, no grouping */
+ setlocale(LC_NUMERIC, "C");
+ GLuint program = glCreateProgram();
+
+ /* Create an optional geometry shader and find primitive type */
+ GLenum gl_primitive_mode =
+ get_gl_primitive_mode(state->polygon_front_mode, state->primitive_mode);
+ MString* geometry_shader_code =
+ pgraph_gen_geom_glsl(state->polygon_front_mode,
+ state->polygon_back_mode,
+ state->primitive_mode,
+ state->smooth_shading,
+ false);
+ if (geometry_shader_code) {
+ const char* geometry_shader_code_str =
+ mstring_get_str(geometry_shader_code);
+ GLuint geometry_shader = create_gl_shader(GL_GEOMETRY_SHADER,
+ geometry_shader_code_str,
+ "geometry shader");
+ glAttachShader(program, geometry_shader);
+ mstring_unref(geometry_shader_code);
+ }
+
+ /* create the vertex shader */
+ MString *vertex_shader_code =
+ pgraph_gen_vsh_glsl(state, geometry_shader_code != NULL);
+ GLuint vertex_shader = create_gl_shader(GL_VERTEX_SHADER,
+ mstring_get_str(vertex_shader_code),
+ "vertex shader");
+ glAttachShader(program, vertex_shader);
+ mstring_unref(vertex_shader_code);
+
+ /* generate a fragment shader from register combiners */
+ MString *fragment_shader_code = pgraph_gen_psh_glsl(state->psh);
+ const char *fragment_shader_code_str =
+ mstring_get_str(fragment_shader_code);
+ GLuint fragment_shader = create_gl_shader(GL_FRAGMENT_SHADER,
+ fragment_shader_code_str,
+ "fragment shader");
+ glAttachShader(program, fragment_shader);
+ mstring_unref(fragment_shader_code);
+
+ /* link the program */
+ glLinkProgram(program);
+ GLint linked = 0;
+ glGetProgramiv(program, GL_LINK_STATUS, &linked);
+ if(!linked) {
+ GLchar log[2048];
+ glGetProgramInfoLog(program, 2048, NULL, log);
+ fprintf(stderr, "nv2a: shader linking failed: %s\n", log);
+ abort();
+ }
+
+ glUseProgram(program);
+
+ ShaderBinding* ret = g_malloc0(sizeof(ShaderBinding));
+ ret->gl_program = program;
+ ret->gl_primitive_mode = gl_primitive_mode;
+
+ update_shader_constant_locations(ret, state);
+
+ if (previous_numeric_locale) {
+ setlocale(LC_NUMERIC, previous_numeric_locale);
+ g_free(previous_numeric_locale);
+ }
+
+ return ret;
+}
+
+static const char *shader_gl_vendor = NULL;
+
+static void shader_create_cache_folder(void)
+{
+ char *shader_path = g_strdup_printf("%sshaders", xemu_settings_get_base_path());
+ qemu_mkdir(shader_path);
+ g_free(shader_path);
+}
+
+static char *shader_get_lru_cache_path(void)
+{
+ return g_strdup_printf("%s/shader_cache_list", xemu_settings_get_base_path());
+}
+
+static void shader_write_lru_list_entry_to_disk(Lru *lru, LruNode *node, void *opaque)
+{
+ FILE *lru_list_file = (FILE*) opaque;
+ size_t written = fwrite(&node->hash, sizeof(uint64_t), 1, lru_list_file);
+ if (written != 1) {
+ fprintf(stderr, "nv2a: Failed to write shader list entry %llx to disk\n",
+ (unsigned long long) node->hash);
+ }
+}
+
+void pgraph_gl_shader_write_cache_reload_list(PGRAPHState *pg)
+{
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ if (!g_config.perf.cache_shaders) {
+ qatomic_set(&r->shader_cache_writeback_pending, false);
+ qemu_event_set(&r->shader_cache_writeback_complete);
+ return;
+ }
+
+ char *shader_lru_path = shader_get_lru_cache_path();
+ qemu_thread_join(&r->shader_disk_thread);
+
+ FILE *lru_list = qemu_fopen(shader_lru_path, "wb");
+ g_free(shader_lru_path);
+ if (!lru_list) {
+ fprintf(stderr, "nv2a: Failed to open shader LRU cache for writing\n");
+ return;
+ }
+
+ lru_visit_active(&r->shader_cache, shader_write_lru_list_entry_to_disk, lru_list);
+ fclose(lru_list);
+
+ lru_flush(&r->shader_cache);
+
+ qatomic_set(&r->shader_cache_writeback_pending, false);
+ qemu_event_set(&r->shader_cache_writeback_complete);
+}
+
+bool pgraph_gl_shader_load_from_memory(ShaderLruNode *snode)
+{
+ assert(glGetError() == GL_NO_ERROR);
+
+ if (!snode->program) {
+ return false;
+ }
+
+ GLuint gl_program = glCreateProgram();
+ glProgramBinary(gl_program, snode->program_format, snode->program, snode->program_size);
+ GLint gl_error = glGetError();
+ if (gl_error != GL_NO_ERROR) {
+ NV2A_DPRINTF("failed to load shader binary from disk: GL error code %d\n", gl_error);
+ glDeleteProgram(gl_program);
+ return false;
+ }
+
+ glValidateProgram(gl_program);
+ GLint valid = 0;
+ glGetProgramiv(gl_program, GL_VALIDATE_STATUS, &valid);
+ if (!valid) {
+ GLchar log[1024];
+ glGetProgramInfoLog(gl_program, 1024, NULL, log);
+ NV2A_DPRINTF("failed to load shader binary from disk: %s\n", log);
+ glDeleteProgram(gl_program);
+ return false;
+ }
+
+ glUseProgram(gl_program);
+
+ ShaderBinding* binding = g_malloc0(sizeof(ShaderBinding));
+ binding->gl_program = gl_program;
+ binding->gl_primitive_mode = get_gl_primitive_mode(snode->state.polygon_front_mode,
+ snode->state.primitive_mode);
+ snode->binding = binding;
+
+ g_free(snode->program);
+ snode->program = NULL;
+
+ update_shader_constant_locations(binding, &snode->state);
+
+ return true;
+}
+
+static char *shader_get_bin_directory(uint64_t hash)
+{
+ const char *cfg_dir = xemu_settings_get_base_path();
+ uint64_t bin_mask = 0xffffUL << 48;
+ char *shader_bin_dir = g_strdup_printf("%s/shaders/%04lx",
+ cfg_dir, (hash & bin_mask) >> 48);
+ return shader_bin_dir;
+}
+
+static char *shader_get_binary_path(const char *shader_bin_dir, uint64_t hash)
+{
+ uint64_t bin_mask = 0xffffUL << 48;
+ return g_strdup_printf("%s/%012lx", shader_bin_dir,
+ hash & (~bin_mask));
+}
+
+static void shader_load_from_disk(PGRAPHState *pg, uint64_t hash)
+{
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ char *shader_bin_dir = shader_get_bin_directory(hash);
+ char *shader_path = shader_get_binary_path(shader_bin_dir, hash);
+ char *cached_xemu_version = NULL;
+ char *cached_gl_vendor = NULL;
+ void *program_buffer = NULL;
+
+ uint64_t cached_xemu_version_len;
+ uint64_t gl_vendor_len;
+ GLenum program_binary_format;
+ ShaderState state;
+ size_t shader_size;
+
+ g_free(shader_bin_dir);
+
+ qemu_mutex_lock(&r->shader_cache_lock);
+ if (lru_contains_hash(&r->shader_cache, hash)) {
+ qemu_mutex_unlock(&r->shader_cache_lock);
+ return;
+ }
+ qemu_mutex_unlock(&r->shader_cache_lock);
+
+ FILE *shader_file = qemu_fopen(shader_path, "rb");
+ if (!shader_file) {
+ goto error;
+ }
+
+ size_t nread;
+ #define READ_OR_ERR(data, data_len) \
+ do { \
+ nread = fread(data, data_len, 1, shader_file); \
+ if (nread != 1) { \
+ fclose(shader_file); \
+ goto error; \
+ } \
+ } while (0)
+
+ READ_OR_ERR(&cached_xemu_version_len, sizeof(cached_xemu_version_len));
+
+ cached_xemu_version = g_malloc(cached_xemu_version_len +1);
+ READ_OR_ERR(cached_xemu_version, cached_xemu_version_len);
+ if (strcmp(cached_xemu_version, xemu_version) != 0) {
+ fclose(shader_file);
+ goto error;
+ }
+
+ READ_OR_ERR(&gl_vendor_len, sizeof(gl_vendor_len));
+
+ cached_gl_vendor = g_malloc(gl_vendor_len);
+ READ_OR_ERR(cached_gl_vendor, gl_vendor_len);
+ if (strcmp(cached_gl_vendor, shader_gl_vendor) != 0) {
+ fclose(shader_file);
+ goto error;
+ }
+
+ READ_OR_ERR(&program_binary_format, sizeof(program_binary_format));
+ READ_OR_ERR(&state, sizeof(state));
+ READ_OR_ERR(&shader_size, sizeof(shader_size));
+
+ program_buffer = g_malloc(shader_size);
+ READ_OR_ERR(program_buffer, shader_size);
+
+ #undef READ_OR_ERR
+
+ fclose(shader_file);
+ g_free(shader_path);
+ g_free(cached_xemu_version);
+ g_free(cached_gl_vendor);
+
+ qemu_mutex_lock(&r->shader_cache_lock);
+ LruNode *node = lru_lookup(&r->shader_cache, hash, &state);
+ ShaderLruNode *snode = container_of(node, ShaderLruNode, node);
+
+ /* If we happened to regenerate this shader already, then we may as well use the new one */
+ if (snode->binding) {
+ qemu_mutex_unlock(&r->shader_cache_lock);
+ return;
+ }
+
+ snode->program_format = program_binary_format;
+ snode->program_size = shader_size;
+ snode->program = program_buffer;
+ snode->cached = true;
+ qemu_mutex_unlock(&r->shader_cache_lock);
+ return;
+
+error:
+ /* Delete the shader so it won't be loaded again */
+ qemu_unlink(shader_path);
+ g_free(shader_path);
+ g_free(program_buffer);
+ g_free(cached_xemu_version);
+ g_free(cached_gl_vendor);
+}
+
+static void *shader_reload_lru_from_disk(void *arg)
+{
+ if (!g_config.perf.cache_shaders) {
+ return NULL;
+ }
+
+ PGRAPHState *pg = (PGRAPHState*) arg;
+ char *shader_lru_path = shader_get_lru_cache_path();
+
+ FILE *lru_shaders_list = qemu_fopen(shader_lru_path, "rb");
+ g_free(shader_lru_path);
+ if (!lru_shaders_list) {
+ return NULL;
+ }
+
+ uint64_t hash;
+ while (fread(&hash, sizeof(uint64_t), 1, lru_shaders_list) == 1) {
+ shader_load_from_disk(pg, hash);
+ }
+
+ return NULL;
+}
+
+static void shader_cache_entry_init(Lru *lru, LruNode *node, void *state)
+{
+ ShaderLruNode *snode = container_of(node, ShaderLruNode, node);
+ memcpy(&snode->state, state, sizeof(ShaderState));
+ snode->cached = false;
+ snode->binding = NULL;
+ snode->program = NULL;
+ snode->save_thread = NULL;
+}
+
+static void shader_cache_entry_post_evict(Lru *lru, LruNode *node)
+{
+ ShaderLruNode *snode = container_of(node, ShaderLruNode, node);
+
+ if (snode->save_thread) {
+ qemu_thread_join(snode->save_thread);
+ g_free(snode->save_thread);
+ }
+
+ if (snode->binding) {
+ glDeleteProgram(snode->binding->gl_program);
+ g_free(snode->binding);
+ }
+
+ if (snode->program) {
+ g_free(snode->program);
+ }
+
+ snode->cached = false;
+ snode->save_thread = NULL;
+ snode->binding = NULL;
+ snode->program = NULL;
+ memset(&snode->state, 0, sizeof(ShaderState));
+}
+
+static bool shader_cache_entry_compare(Lru *lru, LruNode *node, void *key)
+{
+ ShaderLruNode *snode = container_of(node, ShaderLruNode, node);
+ return memcmp(&snode->state, key, sizeof(ShaderState));
+}
+
+void pgraph_gl_init_shader_cache(PGRAPHState *pg)
+{
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ qemu_mutex_init(&r->shader_cache_lock);
+ qemu_event_init(&r->shader_cache_writeback_complete, false);
+
+ if (!shader_gl_vendor) {
+ shader_gl_vendor = (const char *) glGetString(GL_VENDOR);
+ }
+
+ shader_create_cache_folder();
+
+ /* FIXME: Make this configurable */
+ const size_t shader_cache_size = 50*1024;
+ lru_init(&r->shader_cache);
+ r->shader_cache_entries = malloc(shader_cache_size * sizeof(ShaderLruNode));
+ assert(r->shader_cache_entries != NULL);
+ for (int i = 0; i < shader_cache_size; i++) {
+ lru_add_free(&r->shader_cache, &r->shader_cache_entries[i].node);
+ }
+
+ r->shader_cache.init_node = shader_cache_entry_init;
+ r->shader_cache.compare_nodes = shader_cache_entry_compare;
+ r->shader_cache.post_node_evict = shader_cache_entry_post_evict;
+
+ qemu_thread_create(&r->shader_disk_thread, "pgraph.renderer_state->shader_cache",
+ shader_reload_lru_from_disk, pg, QEMU_THREAD_JOINABLE);
+}
+
+void pgraph_gl_deinit_shader_cache(PGRAPHState *pg)
+{
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ // Clear out shader cache
+ pgraph_gl_shader_write_cache_reload_list(pg);
+ free(r->shader_cache_entries);
+ qemu_mutex_destroy(&r->shader_cache_lock);
+}
+
+static void *shader_write_to_disk(void *arg)
+{
+ ShaderLruNode *snode = (ShaderLruNode*) arg;
+
+ char *shader_bin = shader_get_bin_directory(snode->node.hash);
+ char *shader_path = shader_get_binary_path(shader_bin, snode->node.hash);
+
+ static uint64_t gl_vendor_len;
+ if (gl_vendor_len == 0) {
+ gl_vendor_len = (uint64_t) (strlen(shader_gl_vendor) + 1);
+ }
+
+ static uint64_t xemu_version_len = 0;
+ if (xemu_version_len == 0) {
+ xemu_version_len = (uint64_t) (strlen(xemu_version) + 1);
+ }
+
+ qemu_mkdir(shader_bin);
+ g_free(shader_bin);
+
+ FILE *shader_file = qemu_fopen(shader_path, "wb");
+ if (!shader_file) {
+ goto error;
+ }
+
+ size_t written;
+ #define WRITE_OR_ERR(data, data_size) \
+ do { \
+ written = fwrite(data, data_size, 1, shader_file); \
+ if (written != 1) { \
+ fclose(shader_file); \
+ goto error; \
+ } \
+ } while (0)
+
+ WRITE_OR_ERR(&xemu_version_len, sizeof(xemu_version_len));
+ WRITE_OR_ERR(xemu_version, xemu_version_len);
+
+ WRITE_OR_ERR(&gl_vendor_len, sizeof(gl_vendor_len));
+ WRITE_OR_ERR(shader_gl_vendor, gl_vendor_len);
+
+ WRITE_OR_ERR(&snode->program_format, sizeof(snode->program_format));
+ WRITE_OR_ERR(&snode->state, sizeof(snode->state));
+
+ WRITE_OR_ERR(&snode->program_size, sizeof(snode->program_size));
+ WRITE_OR_ERR(snode->program, snode->program_size);
+
+ #undef WRITE_OR_ERR
+
+ fclose(shader_file);
+
+ g_free(shader_path);
+ g_free(snode->program);
+ snode->program = NULL;
+
+ return NULL;
+
+error:
+ fprintf(stderr, "nv2a: Failed to write shader binary file to %s\n", shader_path);
+ qemu_unlink(shader_path);
+ g_free(shader_path);
+ g_free(snode->program);
+ snode->program = NULL;
+ return NULL;
+}
+
+void pgraph_gl_shader_cache_to_disk(ShaderLruNode *snode)
+{
+ if (!snode->binding || snode->cached) {
+ return;
+ }
+
+ GLint program_size;
+ glGetProgramiv(snode->binding->gl_program, GL_PROGRAM_BINARY_LENGTH, &program_size);
+
+ if (snode->program) {
+ g_free(snode->program);
+ snode->program = NULL;
+ }
+
+ /* program_size might be zero on some systems, if no binary formats are supported */
+ if (program_size == 0) {
+ return;
+ }
+
+ snode->program = g_malloc(program_size);
+ GLsizei program_size_copied;
+ glGetProgramBinary(snode->binding->gl_program, program_size, &program_size_copied,
+ &snode->program_format, snode->program);
+ assert(glGetError() == GL_NO_ERROR);
+
+ snode->program_size = program_size_copied;
+ snode->cached = true;
+
+ char name[24];
+ snprintf(name, sizeof(name), "scache-%llx", (unsigned long long) snode->node.hash);
+ snode->save_thread = g_malloc0(sizeof(QemuThread));
+ qemu_thread_create(snode->save_thread, name, shader_write_to_disk, snode, QEMU_THREAD_JOINABLE);
+}
+
+static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding,
+ bool binding_changed,
+
+ // FIXME: Remove these... We already know it from binding.state
+ bool vertex_program,
+ bool fixed_function)
+{
+ PGRAPHGLState *r = pg->gl_renderer_state;
+ int i, j;
+
+ /* update combiner constants */
+ for (i = 0; i < 9; i++) {
+ uint32_t constant[2];
+ if (i == 8) {
+ /* final combiner */
+ constant[0] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR0);
+ constant[1] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR1);
+ } else {
+ constant[0] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR0 + i * 4);
+ constant[1] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR1 + i * 4);
+ }
+
+ for (j = 0; j < 2; j++) {
+ GLint loc = binding->psh_constant_loc[i][j];
+ if (loc != -1) {
+ float value[4];
+ pgraph_argb_pack32_to_rgba_float(constant[j], value);
+ glUniform4fv(loc, 1, value);
+ }
+ }
+ }
+ if (binding->alpha_ref_loc != -1) {
+ float alpha_ref = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0),
+ NV_PGRAPH_CONTROL_0_ALPHAREF) / 255.0;
+ glUniform1f(binding->alpha_ref_loc, alpha_ref);
+ }
+
+
+ /* For each texture stage */
+ for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
+ GLint loc;
+
+ /* Bump luminance only during stages 1 - 3 */
+ if (i > 0) {
+ loc = binding->bump_mat_loc[i];
+ if (loc != -1) {
+ uint32_t m_u32[4];
+ m_u32[0] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT00 + 4 * (i - 1));
+ m_u32[1] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT01 + 4 * (i - 1));
+ m_u32[2] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT10 + 4 * (i - 1));
+ m_u32[3] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT11 + 4 * (i - 1));
+ float m[4];
+ m[0] = *(float*)&m_u32[0];
+ m[1] = *(float*)&m_u32[1];
+ m[2] = *(float*)&m_u32[2];
+ m[3] = *(float*)&m_u32[3];
+ glUniformMatrix2fv(loc, 1, GL_FALSE, m);
+ }
+ loc = binding->bump_scale_loc[i];
+ if (loc != -1) {
+ uint32_t v =
+ pgraph_reg_r(pg, NV_PGRAPH_BUMPSCALE1 + (i - 1) * 4);
+ glUniform1f(loc, *(float*)&v);
+ }
+ loc = binding->bump_offset_loc[i];
+ if (loc != -1) {
+ uint32_t v =
+ pgraph_reg_r(pg, NV_PGRAPH_BUMPOFFSET1 + (i - 1) * 4);
+ glUniform1f(loc, *(float*)&v);
+ }
+ }
+
+ loc = r->shader_binding->tex_scale_loc[i];
+ if (loc != -1) {
+ assert(r->texture_binding[i] != NULL);
+ glUniform1f(loc, (float)r->texture_binding[i]->scale);
+ }
+ }
+
+ if (binding->fog_color_loc != -1) {
+ uint32_t fog_color = pgraph_reg_r(pg, NV_PGRAPH_FOGCOLOR);
+ glUniform4f(binding->fog_color_loc,
+ GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_RED) / 255.0,
+ GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_GREEN) / 255.0,
+ GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_BLUE) / 255.0,
+ GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_ALPHA) / 255.0);
+ }
+ if (binding->fog_param_loc != -1) {
+ uint32_t v[2];
+ v[0] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM0);
+ v[1] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM1);
+ glUniform2f(binding->fog_param_loc, *(float *)&v[0], *(float *)&v[1]);
+ }
+
+ float zmax;
+ switch (pg->surface_shape.zeta_format) {
+ case NV097_SET_SURFACE_FORMAT_ZETA_Z16:
+ zmax = pg->surface_shape.z_format ? f16_max : (float)0xFFFF;
+ break;
+ case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8:
+ zmax = pg->surface_shape.z_format ? f24_max : (float)0xFFFFFF;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (fixed_function) {
+ /* update lighting constants */
+ struct {
+ uint32_t* v;
+ bool* dirty;
+ GLint* locs;
+ size_t len;
+ } lighting_arrays[] = {
+ {&pg->ltctxa[0][0], &pg->ltctxa_dirty[0], binding->ltctxa_loc, NV2A_LTCTXA_COUNT},
+ {&pg->ltctxb[0][0], &pg->ltctxb_dirty[0], binding->ltctxb_loc, NV2A_LTCTXB_COUNT},
+ {&pg->ltc1[0][0], &pg->ltc1_dirty[0], binding->ltc1_loc, NV2A_LTC1_COUNT},
+ };
+
+ for (i=0; ilight_infinite_half_vector_loc[i];
+ if (loc != -1) {
+ glUniform3fv(loc, 1, pg->light_infinite_half_vector[i]);
+ }
+ loc = binding->light_infinite_direction_loc[i];
+ if (loc != -1) {
+ glUniform3fv(loc, 1, pg->light_infinite_direction[i]);
+ }
+
+ loc = binding->light_local_position_loc[i];
+ if (loc != -1) {
+ glUniform3fv(loc, 1, pg->light_local_position[i]);
+ }
+ loc = binding->light_local_attenuation_loc[i];
+ if (loc != -1) {
+ glUniform3fv(loc, 1, pg->light_local_attenuation[i]);
+ }
+ }
+
+ /* estimate the viewport by assuming it matches the surface ... */
+ unsigned int aa_width = 1, aa_height = 1;
+ pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height);
+
+ float m11 = 0.5 * (pg->surface_binding_dim.width/aa_width);
+ float m22 = -0.5 * (pg->surface_binding_dim.height/aa_height);
+ float m33 = zmax;
+ float m41 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][0];
+ float m42 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][1];
+
+ float invViewport[16] = {
+ 1.0/m11, 0, 0, 0,
+ 0, 1.0/m22, 0, 0,
+ 0, 0, 1.0/m33, 0,
+ -1.0+m41/m11, 1.0+m42/m22, 0, 1.0
+ };
+
+ if (binding->inv_viewport_loc != -1) {
+ glUniformMatrix4fv(binding->inv_viewport_loc,
+ 1, GL_FALSE, &invViewport[0]);
+ }
+ }
+
+ /* update vertex program constants */
+ for (i=0; ivsh_constants_dirty[i] && !binding_changed) continue;
+
+ GLint loc = binding->vsh_constant_loc[i];
+ if ((loc != -1) &&
+ memcmp(binding->vsh_constants[i], pg->vsh_constants[i],
+ sizeof(pg->vsh_constants[1]))) {
+ glUniform4fv(loc, 1, (const GLfloat *)pg->vsh_constants[i]);
+ memcpy(binding->vsh_constants[i], pg->vsh_constants[i],
+ sizeof(pg->vsh_constants[i]));
+ }
+
+ pg->vsh_constants_dirty[i] = false;
+ }
+
+ if (binding->surface_size_loc != -1) {
+ unsigned int aa_width = 1, aa_height = 1;
+ pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height);
+ glUniform2f(binding->surface_size_loc,
+ pg->surface_binding_dim.width / aa_width,
+ pg->surface_binding_dim.height / aa_height);
+ }
+
+ if (binding->clip_range_loc != -1) {
+ uint32_t v[2];
+ v[0] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMIN);
+ v[1] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMAX);
+ float zclip_min = *(float*)&v[0] / zmax * 2.0 - 1.0;
+ float zclip_max = *(float*)&v[1] / zmax * 2.0 - 1.0;
+ glUniform4f(binding->clip_range_loc, 0, zmax, zclip_min, zclip_max);
+ }
+
+ /* Clipping regions */
+ unsigned int max_gl_width = pg->surface_binding_dim.width;
+ unsigned int max_gl_height = pg->surface_binding_dim.height;
+ pgraph_apply_scaling_factor(pg, &max_gl_width, &max_gl_height);
+
+ for (i = 0; i < 8; i++) {
+ uint32_t x = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPX0 + i * 4);
+ unsigned int x_min = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMIN);
+ unsigned int x_max = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMAX) + 1;
+ uint32_t y = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPY0 + i * 4);
+ unsigned int y_min = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMIN);
+ unsigned int y_max = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMAX) + 1;
+ pgraph_apply_anti_aliasing_factor(pg, &x_min, &y_min);
+ pgraph_apply_anti_aliasing_factor(pg, &x_max, &y_max);
+
+ pgraph_apply_scaling_factor(pg, &x_min, &y_min);
+ pgraph_apply_scaling_factor(pg, &x_max, &y_max);
+
+ /* Translate for the GL viewport origin */
+ int y_min_xlat = MAX((int)max_gl_height - (int)y_max, 0);
+ int y_max_xlat = MIN((int)max_gl_height - (int)y_min, max_gl_height);
+
+ glUniform4i(r->shader_binding->clip_region_loc[i],
+ x_min, y_min_xlat, x_max, y_max_xlat);
+ }
+
+ if (binding->material_alpha_loc != -1) {
+ glUniform1f(binding->material_alpha_loc, pg->material_alpha);
+ }
+}
+
+static bool test_shaders_dirty(PGRAPHState *pg)
+{
+ #define CR_1(reg) CR_x(reg, 1)
+ #define CR_4(reg) CR_x(reg, 4)
+ #define CR_8(reg) CR_x(reg, 8)
+ #define CF(src, name) CF_x(typeof(src), (&src), name, 1)
+ #define CFA(src, name) CF_x(typeof(src[0]), src, name, ARRAY_SIZE(src))
+ #define CNAME(name) reg_check__ ## name
+ #define CX_x__define(type, name, x) static type CNAME(name)[x];
+ #define CR_x__define(reg, x) CX_x__define(uint32_t, reg, x)
+ #define CF_x__define(type, src, name, x) CX_x__define(type, name, x)
+ #define CR_x__check(reg, x) \
+ for (int i = 0; i < x; i++) { if (pgraph_reg_r(pg, reg+i*4) != CNAME(reg)[i]) goto dirty; }
+ #define CF_x__check(type, src, name, x) \
+ for (int i = 0; i < x; i++) { if (src[i] != CNAME(name)[i]) goto dirty; }
+ #define CR_x__update(reg, x) \
+ for (int i = 0; i < x; i++) { CNAME(reg)[i] = pgraph_reg_r(pg, reg+i*4); }
+ #define CF_x__update(type, src, name, x) \
+ for (int i = 0; i < x; i++) { CNAME(name)[i] = src[i]; }
+
+ #define DIRTY_REGS \
+ CR_1(NV_PGRAPH_COMBINECTL) \
+ CR_1(NV_PGRAPH_SHADERCTL) \
+ CR_1(NV_PGRAPH_SHADOWCTL) \
+ CR_1(NV_PGRAPH_COMBINESPECFOG0) \
+ CR_1(NV_PGRAPH_COMBINESPECFOG1) \
+ CR_1(NV_PGRAPH_CONTROL_0) \
+ CR_1(NV_PGRAPH_CONTROL_3) \
+ CR_1(NV_PGRAPH_CSV0_C) \
+ CR_1(NV_PGRAPH_CSV0_D) \
+ CR_1(NV_PGRAPH_CSV1_A) \
+ CR_1(NV_PGRAPH_CSV1_B) \
+ CR_1(NV_PGRAPH_SETUPRASTER) \
+ CR_1(NV_PGRAPH_SHADERPROG) \
+ CR_8(NV_PGRAPH_COMBINECOLORI0) \
+ CR_8(NV_PGRAPH_COMBINECOLORO0) \
+ CR_8(NV_PGRAPH_COMBINEALPHAI0) \
+ CR_8(NV_PGRAPH_COMBINEALPHAO0) \
+ CR_8(NV_PGRAPH_COMBINEFACTOR0) \
+ CR_8(NV_PGRAPH_COMBINEFACTOR1) \
+ CR_1(NV_PGRAPH_SHADERCLIPMODE) \
+ CR_4(NV_PGRAPH_TEXCTL0_0) \
+ CR_4(NV_PGRAPH_TEXFMT0) \
+ CR_4(NV_PGRAPH_TEXFILTER0) \
+ CR_8(NV_PGRAPH_WINDOWCLIPX0) \
+ CR_8(NV_PGRAPH_WINDOWCLIPY0) \
+ CF(pg->primitive_mode, primitive_mode) \
+ CF(pg->surface_scale_factor, surface_scale_factor) \
+ CF(pg->compressed_attrs, compressed_attrs) \
+ CFA(pg->texture_matrix_enable, texture_matrix_enable)
+
+ #define CR_x(reg, x) CR_x__define(reg, x)
+ #define CF_x(type, src, name, x) CF_x__define(type, src, name, x)
+ DIRTY_REGS
+ #undef CR_x
+ #undef CF_x
+
+ #define CR_x(reg, x) CR_x__check(reg, x)
+ #define CF_x(type, src, name, x) CF_x__check(type, src, name, x)
+ DIRTY_REGS
+ #undef CR_x
+ #undef CF_x
+ return false;
+
+dirty:
+ #define CR_x(reg, x) CR_x__update(reg, x)
+ #define CF_x(type, src, name, x) CF_x__update(type, src, name, x)
+ DIRTY_REGS
+ #undef CR_x
+ #undef CF_x
+ return true;
+}
+
+void pgraph_gl_bind_shaders(PGRAPHState *pg)
+{
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ NV2A_GL_DGROUP_BEGIN("%s (VP: %s FFP: %s)", __func__,
+ vertex_program ? "yes" : "no",
+ fixed_function ? "yes" : "no");
+
+ bool binding_changed = false;
+ if (!test_shaders_dirty(pg) && !pg->program_data_dirty) {
+ nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND_NOTDIRTY);
+ goto update_constants;
+ }
+
+ pg->program_data_dirty = false;
+
+ ShaderBinding* old_binding = r->shader_binding;
+
+ ShaderState state = pgraph_get_shader_state(pg);
+
+ uint64_t shader_state_hash = fast_hash((uint8_t*) &state, sizeof(ShaderState));
+ qemu_mutex_lock(&r->shader_cache_lock);
+ LruNode *node = lru_lookup(&r->shader_cache, shader_state_hash, &state);
+ ShaderLruNode *snode = container_of(node, ShaderLruNode, node);
+ if (snode->binding || pgraph_gl_shader_load_from_memory(snode)) {
+ r->shader_binding = snode->binding;
+ } else {
+ r->shader_binding = generate_shaders(&state);
+ nv2a_profile_inc_counter(NV2A_PROF_SHADER_GEN);
+
+ /* cache it */
+ snode->binding = r->shader_binding;
+ if (g_config.perf.cache_shaders) {
+ pgraph_gl_shader_cache_to_disk(snode);
+ }
+ }
+
+ qemu_mutex_unlock(&r->shader_cache_lock);
+
+ binding_changed = (r->shader_binding != old_binding);
+ if (binding_changed) {
+ nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND);
+ glUseProgram(r->shader_binding->gl_program);
+ }
+
+update_constants:
+ shader_update_constants(pg, r->shader_binding, binding_changed,
+ state.vertex_program, state.fixed_function);
+
+ NV2A_GL_DGROUP_END();
+}
+
+GLuint pgraph_gl_compile_shader(const char *vs_src, const char *fs_src)
+{
+ GLint status;
+ char err_buf[512];
+
+ // Compile vertex shader
+ GLuint vs = glCreateShader(GL_VERTEX_SHADER);
+ glShaderSource(vs, 1, &vs_src, NULL);
+ glCompileShader(vs);
+ glGetShaderiv(vs, GL_COMPILE_STATUS, &status);
+ if (status != GL_TRUE) {
+ glGetShaderInfoLog(vs, sizeof(err_buf), NULL, err_buf);
+ err_buf[sizeof(err_buf)-1] = '\0';
+ fprintf(stderr, "Vertex shader compilation failed: %s\n", err_buf);
+ exit(1);
+ }
+
+ // Compile fragment shader
+ GLuint fs = glCreateShader(GL_FRAGMENT_SHADER);
+ glShaderSource(fs, 1, &fs_src, NULL);
+ glCompileShader(fs);
+ glGetShaderiv(fs, GL_COMPILE_STATUS, &status);
+ if (status != GL_TRUE) {
+ glGetShaderInfoLog(fs, sizeof(err_buf), NULL, err_buf);
+ err_buf[sizeof(err_buf)-1] = '\0';
+ fprintf(stderr, "Fragment shader compilation failed: %s\n", err_buf);
+ exit(1);
+ }
+
+ // Link vertex and fragment shaders
+ GLuint prog = glCreateProgram();
+ glAttachShader(prog, vs);
+ glAttachShader(prog, fs);
+ glLinkProgram(prog);
+ glUseProgram(prog);
+
+ // Flag shaders for deletion (will still be retained for lifetime of prog)
+ glDeleteShader(vs);
+ glDeleteShader(fs);
+
+ return prog;
+}
diff --git a/hw/xbox/nv2a/pgraph/gl/surface.c b/hw/xbox/nv2a/pgraph/gl/surface.c
new file mode 100644
index 0000000000..332ca7199e
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/gl/surface.c
@@ -0,0 +1,1400 @@
+/*
+ * Geforce NV2A PGRAPH OpenGL Renderer
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "hw/xbox/nv2a/pgraph/pgraph.h"
+#include "ui/xemu-settings.h"
+#include "hw/xbox/nv2a/nv2a_int.h"
+#include "hw/xbox/nv2a/pgraph/swizzle.h"
+#include "debug.h"
+#include "renderer.h"
+
+static void surface_download(NV2AState *d, SurfaceBinding *surface, bool force);
+static void surface_download_to_buffer(NV2AState *d, SurfaceBinding *surface,
+ bool swizzle, bool flip, bool downscale,
+ uint8_t *pixels);
+static void surface_get_dimensions(PGRAPHState *pg, unsigned int *width, unsigned int *height);
+
+void pgraph_gl_set_surface_scale_factor(NV2AState *d, unsigned int scale)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ g_config.display.quality.surface_scale = scale < 1 ? 1 : scale;
+
+ qemu_mutex_unlock_iothread();
+
+ qemu_mutex_lock(&d->pfifo.lock);
+ qatomic_set(&d->pfifo.halt, true);
+ qemu_mutex_unlock(&d->pfifo.lock);
+
+ qemu_mutex_lock(&d->pgraph.lock);
+ qemu_event_reset(&r->dirty_surfaces_download_complete);
+ qatomic_set(&r->download_dirty_surfaces_pending, true);
+ qemu_mutex_unlock(&d->pgraph.lock);
+ qemu_mutex_lock(&d->pfifo.lock);
+ pfifo_kick(d);
+ qemu_mutex_unlock(&d->pfifo.lock);
+ qemu_event_wait(&r->dirty_surfaces_download_complete);
+
+ qemu_mutex_lock(&d->pgraph.lock);
+ qemu_event_reset(&d->pgraph.flush_complete);
+ qatomic_set(&d->pgraph.flush_pending, true);
+ qemu_mutex_unlock(&d->pgraph.lock);
+ qemu_mutex_lock(&d->pfifo.lock);
+ pfifo_kick(d);
+ qemu_mutex_unlock(&d->pfifo.lock);
+ qemu_event_wait(&d->pgraph.flush_complete);
+
+ qemu_mutex_lock(&d->pfifo.lock);
+ qatomic_set(&d->pfifo.halt, false);
+ pfifo_kick(d);
+ qemu_mutex_unlock(&d->pfifo.lock);
+
+ qemu_mutex_lock_iothread();
+}
+
+unsigned int pgraph_gl_get_surface_scale_factor(NV2AState *d)
+{
+ return d->pgraph.surface_scale_factor;
+}
+
+void pgraph_gl_reload_surface_scale_factor(PGRAPHState *pg)
+{
+ int factor = g_config.display.quality.surface_scale;
+ pg->surface_scale_factor = factor < 1 ? 1 : factor;
+}
+
+// FIXME: Move to common
+static bool framebuffer_dirty(PGRAPHState *pg)
+{
+ bool shape_changed = memcmp(&pg->surface_shape, &pg->last_surface_shape,
+ sizeof(SurfaceShape)) != 0;
+ if (!shape_changed || (!pg->surface_shape.color_format
+ && !pg->surface_shape.zeta_format)) {
+ return false;
+ }
+ return true;
+}
+
+void pgraph_gl_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta)
+{
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ NV2A_DPRINTF("pgraph_set_surface_dirty(%d, %d) -- %d %d\n",
+ color, zeta,
+ pgraph_color_write_enabled(pg), pgraph_zeta_write_enabled(pg));
+ /* FIXME: Does this apply to CLEARs too? */
+ color = color && pgraph_color_write_enabled(pg);
+ zeta = zeta && pgraph_zeta_write_enabled(pg);
+ pg->surface_color.draw_dirty |= color;
+ pg->surface_zeta.draw_dirty |= zeta;
+
+ if (r->color_binding) {
+ r->color_binding->draw_dirty |= color;
+ r->color_binding->frame_time = pg->frame_time;
+ r->color_binding->cleared = false;
+
+ }
+
+ if (r->zeta_binding) {
+ r->zeta_binding->draw_dirty |= zeta;
+ r->zeta_binding->frame_time = pg->frame_time;
+ r->zeta_binding->cleared = false;
+
+ }
+}
+
+static void init_render_to_texture(PGRAPHState *pg)
+{
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ const char *vs =
+ "#version 330\n"
+ "void main()\n"
+ "{\n"
+ " float x = -1.0 + float((gl_VertexID & 1) << 2);\n"
+ " float y = -1.0 + float((gl_VertexID & 2) << 1);\n"
+ " gl_Position = vec4(x, y, 0, 1);\n"
+ "}\n";
+ const char *fs =
+ "#version 330\n"
+ "uniform sampler2D tex;\n"
+ "uniform vec2 surface_size;\n"
+ "layout(location = 0) out vec4 out_Color;\n"
+ "void main()\n"
+ "{\n"
+ " vec2 texCoord;\n"
+ " texCoord.x = gl_FragCoord.x;\n"
+ " texCoord.y = (surface_size.y - gl_FragCoord.y)\n"
+ " + (textureSize(tex,0).y - surface_size.y);\n"
+ " texCoord /= textureSize(tex,0).xy;\n"
+ " out_Color.rgba = texture(tex, texCoord);\n"
+ "}\n";
+
+ r->s2t_rndr.prog = pgraph_gl_compile_shader(vs, fs);
+ r->s2t_rndr.tex_loc = glGetUniformLocation(r->s2t_rndr.prog, "tex");
+ r->s2t_rndr.surface_size_loc = glGetUniformLocation(r->s2t_rndr.prog,
+ "surface_size");
+
+ glGenVertexArrays(1, &r->s2t_rndr.vao);
+ glBindVertexArray(r->s2t_rndr.vao);
+ glGenBuffers(1, &r->s2t_rndr.vbo);
+ glBindBuffer(GL_ARRAY_BUFFER, r->s2t_rndr.vbo);
+ glBufferData(GL_ARRAY_BUFFER, 0, NULL, GL_STATIC_DRAW);
+ glGenFramebuffers(1, &r->s2t_rndr.fbo);
+}
+
+static bool surface_to_texture_can_fastpath(SurfaceBinding *surface,
+ TextureShape *shape)
+{
+ // FIXME: Better checks/handling on formats and surface-texture compat
+
+ int surface_fmt = surface->shape.color_format;
+ int texture_fmt = shape->color_format;
+
+ if (!surface->color) {
+ // FIXME: Support zeta to color
+ return false;
+ }
+
+ switch (surface_fmt) {
+ case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5: switch (texture_fmt) {
+ case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5: return true;
+ default: break;
+ }
+ break;
+ case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5: switch (texture_fmt) {
+ case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5: return true;
+ case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5: return true;
+ default: break;
+ }
+ break;
+ case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8: switch(texture_fmt) {
+ case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8: return true;
+ case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8: return true;
+ default: break;
+ }
+ break;
+ case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: switch (texture_fmt) {
+ case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8: return true;
+ case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8: return true;
+ case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8: return true;
+ case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8: return true;
+ default: break;
+ }
+ break;
+ default: break;
+ }
+
+ trace_nv2a_pgraph_surface_texture_compat_failed(
+ surface_fmt, texture_fmt);
+ return false;
+}
+
+static void render_surface_to(NV2AState *d, SurfaceBinding *surface,
+ int texture_unit, GLuint gl_target,
+ GLuint gl_texture, unsigned int width,
+ unsigned int height)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ glActiveTexture(GL_TEXTURE0 + texture_unit);
+ glBindFramebuffer(GL_FRAMEBUFFER, r->s2t_rndr.fbo);
+
+ GLenum draw_buffers[1] = { GL_COLOR_ATTACHMENT0 };
+ glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, gl_target,
+ gl_texture, 0);
+ glDrawBuffers(1, draw_buffers);
+ assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
+ assert(glGetError() == GL_NO_ERROR);
+
+ float color[] = { 0.0f, 0.0f, 0.0f, 0.0f };
+ glBindTexture(GL_TEXTURE_2D, surface->gl_buffer);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
+ glTexParameterfv(GL_TEXTURE_2D, GL_TEXTURE_BORDER_COLOR, color);
+
+ glBindVertexArray(r->s2t_rndr.vao);
+ glBindBuffer(GL_ARRAY_BUFFER, r->s2t_rndr.vbo);
+ glUseProgram(r->s2t_rndr.prog);
+ glProgramUniform1i(r->s2t_rndr.prog, r->s2t_rndr.tex_loc,
+ texture_unit);
+ glProgramUniform2f(r->s2t_rndr.prog,
+ r->s2t_rndr.surface_size_loc, width, height);
+
+ glViewport(0, 0, width, height);
+ glColorMask(true, true, true, true);
+ glDisable(GL_DITHER);
+ glDisable(GL_SCISSOR_TEST);
+ glDisable(GL_BLEND);
+ glDisable(GL_STENCIL_TEST);
+ glDisable(GL_CULL_FACE);
+ glDisable(GL_DEPTH_TEST);
+ glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
+ glClearColor(0.0f, 0.0f, 1.0f, 1.0f);
+ glClear(GL_COLOR_BUFFER_BIT);
+ glDrawArrays(GL_TRIANGLES, 0, 3);
+
+ glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, gl_target, 0,
+ 0);
+ glBindFramebuffer(GL_FRAMEBUFFER, r->gl_framebuffer);
+ glBindVertexArray(r->gl_vertex_array);
+ glBindTexture(gl_target, gl_texture);
+ glUseProgram(
+ r->shader_binding ? r->shader_binding->gl_program : 0);
+}
+
+static void render_surface_to_texture_slow(NV2AState *d,
+ SurfaceBinding *surface,
+ TextureBinding *texture,
+ TextureShape *texture_shape,
+ int texture_unit)
+{
+ PGRAPHState *pg = &d->pgraph;
+
+ const ColorFormatInfo *f = &kelvin_color_format_gl_map[texture_shape->color_format];
+ assert(texture_shape->color_format < ARRAY_SIZE(kelvin_color_format_gl_map));
+ nv2a_profile_inc_counter(NV2A_PROF_SURF_TO_TEX_FALLBACK);
+
+ glActiveTexture(GL_TEXTURE0 + texture_unit);
+ glBindTexture(texture->gl_target, texture->gl_texture);
+
+ unsigned int width = surface->width,
+ height = surface->height;
+ pgraph_apply_scaling_factor(pg, &width, &height);
+
+ size_t bufsize = width * height * surface->fmt.bytes_per_pixel;
+
+ uint8_t *buf = g_malloc(bufsize);
+ surface_download_to_buffer(d, surface, false, true, false, buf);
+
+ width = texture_shape->width;
+ height = texture_shape->height;
+ pgraph_apply_scaling_factor(pg, &width, &height);
+
+ glTexImage2D(texture->gl_target, 0, f->gl_internal_format, width, height, 0,
+ f->gl_format, f->gl_type, buf);
+ g_free(buf);
+ glBindTexture(texture->gl_target, texture->gl_texture);
+}
+
+/* Note: This function is intended to be called before PGRAPH configures GL
+ * state for rendering; it will configure GL state here but only restore a
+ * couple of items.
+ */
+void pgraph_gl_render_surface_to_texture(NV2AState *d, SurfaceBinding *surface,
+ TextureBinding *texture,
+ TextureShape *texture_shape,
+ int texture_unit)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ const ColorFormatInfo *f =
+ &kelvin_color_format_gl_map[texture_shape->color_format];
+ assert(texture_shape->color_format < ARRAY_SIZE(kelvin_color_format_gl_map));
+
+ nv2a_profile_inc_counter(NV2A_PROF_SURF_TO_TEX);
+
+ if (!surface_to_texture_can_fastpath(surface, texture_shape)) {
+ render_surface_to_texture_slow(d, surface, texture,
+ texture_shape, texture_unit);
+ return;
+ }
+
+ unsigned int width = texture_shape->width, height = texture_shape->height;
+ pgraph_apply_scaling_factor(pg, &width, &height);
+
+ glActiveTexture(GL_TEXTURE0 + texture_unit);
+ glBindTexture(texture->gl_target, texture->gl_texture);
+ glTexParameteri(texture->gl_target, GL_TEXTURE_BASE_LEVEL, 0);
+ glTexParameteri(texture->gl_target, GL_TEXTURE_MAX_LEVEL, 0);
+ glTexParameteri(texture->gl_target, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ glTexImage2D(texture->gl_target, 0, f->gl_internal_format, width, height, 0,
+ f->gl_format, f->gl_type, NULL);
+ glBindTexture(texture->gl_target, 0);
+ render_surface_to(d, surface, texture_unit, texture->gl_target,
+ texture->gl_texture, width, height);
+ glBindTexture(texture->gl_target, texture->gl_texture);
+ glUseProgram(
+ r->shader_binding ? r->shader_binding->gl_program : 0);
+}
+
+bool pgraph_gl_check_surface_to_texture_compatibility(
+ const SurfaceBinding *surface,
+ const TextureShape *shape)
+{
+ // FIXME: Better checks/handling on formats and surface-texture compat
+
+ if ((!surface->swizzle && surface->pitch != shape->pitch) ||
+ surface->width != shape->width ||
+ surface->height != shape->height) {
+ return false;
+ }
+
+ int surface_fmt = surface->shape.color_format;
+ int texture_fmt = shape->color_format;
+
+ if (!surface->color) {
+ // FIXME: Support zeta to color
+ return false;
+ }
+
+ if (shape->cubemap) {
+ // FIXME: Support rendering surface to cubemap face
+ return false;
+ }
+
+ if (shape->levels > 1) {
+ // FIXME: Support rendering surface to mip levels
+ return false;
+ }
+
+ switch (surface_fmt) {
+ case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5: switch (texture_fmt) {
+ case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5: return true;
+ default: break;
+ }
+ break;
+ case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5: switch (texture_fmt) {
+ case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5: return true;
+ case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5: return true;
+ default: break;
+ }
+ break;
+ case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8: switch(texture_fmt) {
+ case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8: return true;
+ case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8: return true;
+ default: break;
+ }
+ break;
+ case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: switch (texture_fmt) {
+ case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8: return true;
+ case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8: return true;
+ case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8: return true;
+ case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8: return true;
+ default: break;
+ }
+ break;
+ default:
+ break;
+ }
+
+ trace_nv2a_pgraph_surface_texture_compat_failed(
+ surface_fmt, texture_fmt);
+ return false;
+}
+
+static void wait_for_surface_download(SurfaceBinding *e)
+{
+ NV2AState *d = g_nv2a;
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ if (qatomic_read(&e->draw_dirty)) {
+ qemu_mutex_lock(&d->pfifo.lock);
+ qemu_event_reset(&r->downloads_complete);
+ qatomic_set(&e->download_pending, true);
+ qatomic_set(&r->downloads_pending, true);
+ pfifo_kick(d);
+ qemu_mutex_unlock(&d->pfifo.lock);
+ qemu_event_wait(&r->downloads_complete);
+ }
+}
+
+static void surface_access_callback(void *opaque, MemoryRegion *mr, hwaddr addr,
+ hwaddr len, bool write)
+{
+ SurfaceBinding *e = opaque;
+ assert(addr >= e->vram_addr);
+ hwaddr offset = addr - e->vram_addr;
+ assert(offset < e->size);
+
+ if (qatomic_read(&e->draw_dirty)) {
+ trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset);
+ wait_for_surface_download(e);
+ }
+
+ if (write && !qatomic_read(&e->upload_pending)) {
+ trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset);
+ qatomic_set(&e->upload_pending, true);
+ }
+}
+
+static SurfaceBinding *surface_put(NV2AState *d, hwaddr addr,
+ SurfaceBinding *surface_in)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ assert(pgraph_gl_surface_get(d, addr) == NULL);
+
+ SurfaceBinding *surface, *next;
+ uintptr_t e_end = surface_in->vram_addr + surface_in->size - 1;
+ QTAILQ_FOREACH_SAFE(surface, &r->surfaces, entry, next) {
+ uintptr_t s_end = surface->vram_addr + surface->size - 1;
+ bool overlapping = !(surface->vram_addr > e_end
+ || surface_in->vram_addr > s_end);
+ if (overlapping) {
+ trace_nv2a_pgraph_surface_evict_overlapping(
+ surface->vram_addr, surface->width, surface->height,
+ surface->pitch);
+ pgraph_gl_surface_download_if_dirty(d, surface);
+ pgraph_gl_surface_invalidate(d, surface);
+ }
+ }
+
+ SurfaceBinding *surface_out = g_malloc(sizeof(SurfaceBinding));
+ assert(surface_out != NULL);
+ *surface_out = *surface_in;
+
+ if (tcg_enabled()) {
+ qemu_mutex_unlock(&d->pgraph.lock);
+ qemu_mutex_lock_iothread();
+ mem_access_callback_insert(qemu_get_cpu(0),
+ d->vram, surface_out->vram_addr, surface_out->size,
+ &surface_out->access_cb, &surface_access_callback,
+ surface_out);
+ qemu_mutex_unlock_iothread();
+ qemu_mutex_lock(&d->pgraph.lock);
+ }
+
+ QTAILQ_INSERT_TAIL(&r->surfaces, surface_out, entry);
+
+ return surface_out;
+}
+
+SurfaceBinding *pgraph_gl_surface_get(NV2AState *d, hwaddr addr)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ SurfaceBinding *surface;
+ QTAILQ_FOREACH (surface, &r->surfaces, entry) {
+ if (surface->vram_addr == addr) {
+ return surface;
+ }
+ }
+
+ return NULL;
+}
+
+SurfaceBinding *pgraph_gl_surface_get_within(NV2AState *d, hwaddr addr)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ SurfaceBinding *surface;
+ QTAILQ_FOREACH (surface, &r->surfaces, entry) {
+ if (addr >= surface->vram_addr &&
+ addr < (surface->vram_addr + surface->size)) {
+ return surface;
+ }
+ }
+
+ return NULL;
+}
+
+void pgraph_gl_surface_invalidate(NV2AState *d, SurfaceBinding *surface)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ trace_nv2a_pgraph_surface_invalidated(surface->vram_addr);
+
+ if (surface == r->color_binding) {
+ assert(d->pgraph.surface_color.buffer_dirty);
+ pgraph_gl_unbind_surface(d, true);
+ }
+ if (surface == r->zeta_binding) {
+ assert(d->pgraph.surface_zeta.buffer_dirty);
+ pgraph_gl_unbind_surface(d, false);
+ }
+
+ if (tcg_enabled()) {
+ qemu_mutex_unlock(&d->pgraph.lock);
+ qemu_mutex_lock_iothread();
+ mem_access_callback_remove_by_ref(qemu_get_cpu(0), surface->access_cb);
+ qemu_mutex_unlock_iothread();
+ qemu_mutex_lock(&d->pgraph.lock);
+ }
+
+ glDeleteTextures(1, &surface->gl_buffer);
+
+ QTAILQ_REMOVE(&r->surfaces, surface, entry);
+ g_free(surface);
+}
+
+static void surface_evict_old(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ const int surface_age_limit = 5;
+
+ SurfaceBinding *s, *next;
+ QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) {
+ int last_used = d->pgraph.frame_time - s->frame_time;
+ if (last_used >= surface_age_limit) {
+ trace_nv2a_pgraph_surface_evict_reason("old", s->vram_addr);
+ pgraph_gl_surface_download_if_dirty(d, s);
+ pgraph_gl_surface_invalidate(d, s);
+ }
+ }
+}
+
+static bool check_surface_compatibility(SurfaceBinding *s1, SurfaceBinding *s2,
+ bool strict)
+{
+ bool format_compatible =
+ (s1->color == s2->color) &&
+ (s1->fmt.gl_attachment == s2->fmt.gl_attachment) &&
+ (s1->fmt.gl_internal_format == s2->fmt.gl_internal_format) &&
+ (s1->pitch == s2->pitch) &&
+ (s1->shape.clip_x <= s2->shape.clip_x) &&
+ (s1->shape.clip_y <= s2->shape.clip_y);
+ if (!format_compatible) {
+ return false;
+ }
+
+ if (!strict) {
+ return (s1->width >= s2->width) && (s1->height >= s2->height);
+ } else {
+ return (s1->width == s2->width) && (s1->height == s2->height);
+ }
+}
+
+void pgraph_gl_surface_download_if_dirty(NV2AState *d,
+ SurfaceBinding *surface)
+{
+ if (surface->draw_dirty) {
+ surface_download(d, surface, true);
+ }
+}
+
+static void bind_current_surface(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ if (r->color_binding) {
+ glFramebufferTexture2D(GL_FRAMEBUFFER, r->color_binding->fmt.gl_attachment,
+ GL_TEXTURE_2D, r->color_binding->gl_buffer, 0);
+ }
+
+ if (r->zeta_binding) {
+ glFramebufferTexture2D(GL_FRAMEBUFFER, r->zeta_binding->fmt.gl_attachment,
+ GL_TEXTURE_2D, r->zeta_binding->gl_buffer, 0);
+ }
+
+ if (r->color_binding || r->zeta_binding) {
+ assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) ==
+ GL_FRAMEBUFFER_COMPLETE);
+ }
+}
+
+static void surface_copy_shrink_row(uint8_t *out, uint8_t *in,
+ unsigned int width,
+ unsigned int bytes_per_pixel,
+ unsigned int factor)
+{
+ if (bytes_per_pixel == 4) {
+ for (unsigned int x = 0; x < width; x++) {
+ *(uint32_t *)out = *(uint32_t *)in;
+ out += 4;
+ in += 4 * factor;
+ }
+ } else if (bytes_per_pixel == 2) {
+ for (unsigned int x = 0; x < width; x++) {
+ *(uint16_t *)out = *(uint16_t *)in;
+ out += 2;
+ in += 2 * factor;
+ }
+ } else {
+ for (unsigned int x = 0; x < width; x++) {
+ memcpy(out, in, bytes_per_pixel);
+ out += bytes_per_pixel;
+ in += bytes_per_pixel * factor;
+ }
+ }
+}
+
+static void surface_download_to_buffer(NV2AState *d, SurfaceBinding *surface,
+ bool swizzle, bool flip, bool downscale,
+ uint8_t *pixels)
+{
+ PGRAPHState *pg = &d->pgraph;
+
+ swizzle &= surface->swizzle;
+ downscale &= (pg->surface_scale_factor != 1);
+
+ trace_nv2a_pgraph_surface_download(
+ surface->color ? "COLOR" : "ZETA",
+ surface->swizzle ? "sz" : "lin", surface->vram_addr,
+ surface->width, surface->height, surface->pitch,
+ surface->fmt.bytes_per_pixel);
+
+ /* Bind destination surface to framebuffer */
+ glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
+ 0, 0);
+ glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
+ 0, 0);
+ glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
+ GL_TEXTURE_2D, 0, 0);
+ glFramebufferTexture2D(GL_FRAMEBUFFER, surface->fmt.gl_attachment,
+ GL_TEXTURE_2D, surface->gl_buffer, 0);
+
+ assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
+
+ /* Read surface into memory */
+ uint8_t *gl_read_buf = pixels;
+
+ uint8_t *swizzle_buf = pixels;
+ if (swizzle) {
+ /* FIXME: Allocate big buffer up front and re-alloc if necessary.
+ * FIXME: Consider swizzle in shader
+ */
+ assert(pg->surface_scale_factor == 1 || downscale);
+ swizzle_buf = (uint8_t *)g_malloc(surface->size);
+ gl_read_buf = swizzle_buf;
+ }
+
+ if (downscale) {
+ pg->scale_buf = (uint8_t *)g_realloc(
+ pg->scale_buf, pg->surface_scale_factor * pg->surface_scale_factor *
+ surface->size);
+ gl_read_buf = pg->scale_buf;
+ }
+
+ glo_readpixels(
+ surface->fmt.gl_format, surface->fmt.gl_type, surface->fmt.bytes_per_pixel,
+ pg->surface_scale_factor * surface->pitch,
+ pg->surface_scale_factor * surface->width,
+ pg->surface_scale_factor * surface->height, flip, gl_read_buf);
+
+ /* FIXME: Replace this with a hw accelerated version */
+ if (downscale) {
+ assert(surface->pitch >= (surface->width * surface->fmt.bytes_per_pixel));
+ uint8_t *out = swizzle_buf, *in = pg->scale_buf;
+ for (unsigned int y = 0; y < surface->height; y++) {
+ surface_copy_shrink_row(out, in, surface->width,
+ surface->fmt.bytes_per_pixel,
+ pg->surface_scale_factor);
+ in += surface->pitch * pg->surface_scale_factor *
+ pg->surface_scale_factor;
+ out += surface->pitch;
+ }
+ }
+
+ if (swizzle) {
+ swizzle_rect(swizzle_buf, surface->width, surface->height, pixels,
+ surface->pitch, surface->fmt.bytes_per_pixel);
+ g_free(swizzle_buf);
+ }
+
+ /* Re-bind original framebuffer target */
+ glFramebufferTexture2D(GL_FRAMEBUFFER, surface->fmt.gl_attachment,
+ GL_TEXTURE_2D, 0, 0);
+ bind_current_surface(d);
+}
+
+static void surface_download(NV2AState *d, SurfaceBinding *surface, bool force)
+{
+ if (!(surface->download_pending || force)) {
+ return;
+ }
+
+ /* FIXME: Respect write enable at last TOU? */
+
+ nv2a_profile_inc_counter(NV2A_PROF_SURF_DOWNLOAD);
+
+ surface_download_to_buffer(d, surface, true, true, true,
+ d->vram_ptr + surface->vram_addr);
+
+ memory_region_set_client_dirty(d->vram, surface->vram_addr,
+ surface->pitch * surface->height,
+ DIRTY_MEMORY_VGA);
+ memory_region_set_client_dirty(d->vram, surface->vram_addr,
+ surface->pitch * surface->height,
+ DIRTY_MEMORY_NV2A_TEX);
+
+ surface->download_pending = false;
+ surface->draw_dirty = false;
+}
+
+void pgraph_gl_process_pending_downloads(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ SurfaceBinding *surface;
+ QTAILQ_FOREACH(surface, &r->surfaces, entry) {
+ surface_download(d, surface, false);
+ }
+
+ qatomic_set(&r->downloads_pending, false);
+ qemu_event_set(&r->downloads_complete);
+}
+
+void pgraph_gl_download_dirty_surfaces(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ SurfaceBinding *surface;
+ QTAILQ_FOREACH(surface, &r->surfaces, entry) {
+ pgraph_gl_surface_download_if_dirty(d, surface);
+ }
+
+ qatomic_set(&r->download_dirty_surfaces_pending, false);
+ qemu_event_set(&r->dirty_surfaces_download_complete);
+}
+
+static void surface_copy_expand_row(uint8_t *out, uint8_t *in,
+ unsigned int width,
+ unsigned int bytes_per_pixel,
+ unsigned int factor)
+{
+ if (bytes_per_pixel == 4) {
+ for (unsigned int x = 0; x < width; x++) {
+ for (unsigned int i = 0; i < factor; i++) {
+ *(uint32_t *)out = *(uint32_t *)in;
+ out += bytes_per_pixel;
+ }
+ in += bytes_per_pixel;
+ }
+ } else if (bytes_per_pixel == 2) {
+ for (unsigned int x = 0; x < width; x++) {
+ for (unsigned int i = 0; i < factor; i++) {
+ *(uint16_t *)out = *(uint16_t *)in;
+ out += bytes_per_pixel;
+ }
+ in += bytes_per_pixel;
+ }
+ } else {
+ for (unsigned int x = 0; x < width; x++) {
+ for (unsigned int i = 0; i < factor; i++) {
+ memcpy(out, in, bytes_per_pixel);
+ out += bytes_per_pixel;
+ }
+ in += bytes_per_pixel;
+ }
+ }
+}
+
+static void surface_copy_expand(uint8_t *out, uint8_t *in, unsigned int width,
+ unsigned int height,
+ unsigned int bytes_per_pixel,
+ unsigned int factor)
+{
+ size_t out_pitch = width * bytes_per_pixel * factor;
+
+ for (unsigned int y = 0; y < height; y++) {
+ surface_copy_expand_row(out, in, width, bytes_per_pixel, factor);
+ uint8_t *row_in = out;
+ for (unsigned int i = 1; i < factor; i++) {
+ out += out_pitch;
+ memcpy(out, row_in, out_pitch);
+ }
+ in += width * bytes_per_pixel;
+ out += out_pitch;
+ }
+}
+
+void pgraph_gl_upload_surface_data(NV2AState *d, SurfaceBinding *surface,
+ bool force)
+{
+ if (!(surface->upload_pending || force)) {
+ return;
+ }
+
+ nv2a_profile_inc_counter(NV2A_PROF_SURF_UPLOAD);
+
+ trace_nv2a_pgraph_surface_upload(
+ surface->color ? "COLOR" : "ZETA",
+ surface->swizzle ? "sz" : "lin", surface->vram_addr,
+ surface->width, surface->height, surface->pitch,
+ surface->fmt.bytes_per_pixel);
+
+ PGRAPHState *pg = &d->pgraph;
+
+ surface->upload_pending = false;
+ surface->draw_time = pg->draw_time;
+
+ // FIXME: Don't query GL for texture binding
+ GLint last_texture_binding;
+ glGetIntegerv(GL_TEXTURE_BINDING_2D, &last_texture_binding);
+
+ // FIXME: Replace with FBO to not disturb current state
+ glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
+ 0, 0);
+ glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
+ 0, 0);
+ glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
+ GL_TEXTURE_2D, 0, 0);
+
+ uint8_t *data = d->vram_ptr;
+ uint8_t *buf = data + surface->vram_addr;
+
+ if (surface->swizzle) {
+ buf = (uint8_t*)g_malloc(surface->size);
+ unswizzle_rect(data + surface->vram_addr,
+ surface->width, surface->height,
+ buf,
+ surface->pitch,
+ surface->fmt.bytes_per_pixel);
+ }
+
+ /* FIXME: Replace this flip/scaling */
+
+ // This is VRAM so we can't do this inplace!
+ uint8_t *flipped_buf = (uint8_t *)g_malloc(
+ surface->height * surface->width * surface->fmt.bytes_per_pixel);
+ unsigned int irow;
+ for (irow = 0; irow < surface->height; irow++) {
+ memcpy(&flipped_buf[surface->width * (surface->height - irow - 1)
+ * surface->fmt.bytes_per_pixel],
+ &buf[surface->pitch * irow],
+ surface->width * surface->fmt.bytes_per_pixel);
+ }
+
+ uint8_t *gl_read_buf = flipped_buf;
+ unsigned int width = surface->width, height = surface->height;
+
+ if (pg->surface_scale_factor > 1) {
+ pgraph_apply_scaling_factor(pg, &width, &height);
+ pg->scale_buf = (uint8_t *)g_realloc(
+ pg->scale_buf, width * height * surface->fmt.bytes_per_pixel);
+ gl_read_buf = pg->scale_buf;
+ uint8_t *out = gl_read_buf, *in = flipped_buf;
+ surface_copy_expand(out, in, surface->width, surface->height,
+ surface->fmt.bytes_per_pixel,
+ d->pgraph.surface_scale_factor);
+ }
+
+ int prev_unpack_alignment;
+ glGetIntegerv(GL_UNPACK_ALIGNMENT, &prev_unpack_alignment);
+ if (unlikely((width * surface->fmt.bytes_per_pixel) % 4 != 0)) {
+ glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
+ } else {
+ glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
+ }
+
+ glBindTexture(GL_TEXTURE_2D, surface->gl_buffer);
+ glTexImage2D(GL_TEXTURE_2D, 0, surface->fmt.gl_internal_format, width,
+ height, 0, surface->fmt.gl_format, surface->fmt.gl_type,
+ gl_read_buf);
+ glPixelStorei(GL_UNPACK_ALIGNMENT, prev_unpack_alignment);
+ g_free(flipped_buf);
+ if (surface->swizzle) {
+ g_free(buf);
+ }
+
+ // Rebind previous framebuffer binding
+ glBindTexture(GL_TEXTURE_2D, last_texture_binding);
+
+ bind_current_surface(d);
+}
+
+static void compare_surfaces(SurfaceBinding *s1, SurfaceBinding *s2)
+{
+ #define DO_CMP(fld) \
+ if (s1->fld != s2->fld) \
+ trace_nv2a_pgraph_surface_compare_mismatch( \
+ #fld, (long int)s1->fld, (long int)s2->fld);
+ DO_CMP(shape.clip_x)
+ DO_CMP(shape.clip_width)
+ DO_CMP(shape.clip_y)
+ DO_CMP(shape.clip_height)
+ DO_CMP(gl_buffer)
+ DO_CMP(fmt.bytes_per_pixel)
+ DO_CMP(fmt.gl_attachment)
+ DO_CMP(fmt.gl_internal_format)
+ DO_CMP(fmt.gl_format)
+ DO_CMP(fmt.gl_type)
+ DO_CMP(color)
+ DO_CMP(swizzle)
+ DO_CMP(vram_addr)
+ DO_CMP(width)
+ DO_CMP(height)
+ DO_CMP(pitch)
+ DO_CMP(size)
+ DO_CMP(dma_addr)
+ DO_CMP(dma_len)
+ DO_CMP(frame_time)
+ DO_CMP(draw_time)
+ #undef DO_CMP
+}
+
+static void populate_surface_binding_entry_sized(NV2AState *d, bool color,
+ unsigned int width,
+ unsigned int height,
+ SurfaceBinding *entry)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ Surface *surface;
+ hwaddr dma_address;
+ SurfaceFormatInfo fmt;
+
+ if (color) {
+ surface = &pg->surface_color;
+ dma_address = pg->dma_color;
+ assert(pg->surface_shape.color_format != 0);
+ assert(pg->surface_shape.color_format <
+ ARRAY_SIZE(kelvin_surface_color_format_gl_map));
+ fmt = kelvin_surface_color_format_gl_map[pg->surface_shape.color_format];
+ if (fmt.bytes_per_pixel == 0) {
+ fprintf(stderr, "nv2a: unimplemented color surface format 0x%x\n",
+ pg->surface_shape.color_format);
+ abort();
+ }
+ } else {
+ surface = &pg->surface_zeta;
+ dma_address = pg->dma_zeta;
+ assert(pg->surface_shape.zeta_format != 0);
+ assert(pg->surface_shape.zeta_format <
+ ARRAY_SIZE(kelvin_surface_zeta_float_format_gl_map));
+ const SurfaceFormatInfo *map =
+ pg->surface_shape.z_format ? kelvin_surface_zeta_float_format_gl_map :
+ kelvin_surface_zeta_fixed_format_gl_map;
+ fmt = map[pg->surface_shape.zeta_format];
+ }
+
+ DMAObject dma = nv_dma_load(d, dma_address);
+ /* There's a bunch of bugs that could cause us to hit this function
+ * at the wrong time and get a invalid dma object.
+ * Check that it's sane. */
+ assert(dma.dma_class == NV_DMA_IN_MEMORY_CLASS);
+ // assert(dma.address + surface->offset != 0);
+ assert(surface->offset <= dma.limit);
+ assert(surface->offset + surface->pitch * height <= dma.limit + 1);
+ assert(surface->pitch % fmt.bytes_per_pixel == 0);
+ assert((dma.address & ~0x07FFFFFF) == 0);
+
+ entry->shape = (color || !r->color_binding) ? pg->surface_shape :
+ r->color_binding->shape;
+ entry->gl_buffer = 0;
+ entry->fmt = fmt;
+ entry->color = color;
+ entry->swizzle =
+ (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE);
+ entry->vram_addr = dma.address + surface->offset;
+ entry->width = width;
+ entry->height = height;
+ entry->pitch = surface->pitch;
+ entry->size = height * MAX(surface->pitch, width * fmt.bytes_per_pixel);
+ entry->upload_pending = true;
+ entry->download_pending = false;
+ entry->draw_dirty = false;
+ entry->dma_addr = dma.address;
+ entry->dma_len = dma.limit;
+ entry->frame_time = pg->frame_time;
+ entry->draw_time = pg->draw_time;
+ entry->cleared = false;
+}
+
+static void populate_surface_binding_entry(NV2AState *d, bool color,
+ SurfaceBinding *entry)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ unsigned int width, height;
+
+ if (color || !r->color_binding) {
+ surface_get_dimensions(pg, &width, &height);
+ pgraph_apply_anti_aliasing_factor(pg, &width, &height);
+
+ /* Since we determine surface dimensions based on the clipping
+ * rectangle, make sure to include the surface offset as well.
+ */
+ if (pg->surface_type != NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE) {
+ width += pg->surface_shape.clip_x;
+ height += pg->surface_shape.clip_y;
+ }
+ } else {
+ width = r->color_binding->width;
+ height = r->color_binding->height;
+ }
+
+ populate_surface_binding_entry_sized(d, color, width, height, entry);
+}
+
+static void update_surface_part(NV2AState *d, bool upload, bool color)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ SurfaceBinding entry;
+ populate_surface_binding_entry(d, color, &entry);
+
+ Surface *surface = color ? &pg->surface_color : &pg->surface_zeta;
+
+ bool mem_dirty = !tcg_enabled() && memory_region_test_and_clear_dirty(
+ d->vram, entry.vram_addr, entry.size,
+ DIRTY_MEMORY_NV2A);
+
+ if (upload && (surface->buffer_dirty || mem_dirty)) {
+ pgraph_gl_unbind_surface(d, color);
+
+ SurfaceBinding *found = pgraph_gl_surface_get(d, entry.vram_addr);
+ if (found != NULL) {
+ /* FIXME: Support same color/zeta surface target? In the mean time,
+ * if the surface we just found is currently bound, just unbind it.
+ */
+ SurfaceBinding *other = (color ? r->zeta_binding
+ : r->color_binding);
+ if (found == other) {
+ NV2A_UNIMPLEMENTED("Same color & zeta surface offset");
+ pgraph_gl_unbind_surface(d, !color);
+ }
+ }
+
+ trace_nv2a_pgraph_surface_target(
+ color ? "COLOR" : "ZETA", entry.vram_addr,
+ entry.swizzle ? "sz" : "ln",
+ pg->surface_shape.anti_aliasing,
+ pg->surface_shape.clip_x,
+ pg->surface_shape.clip_width, pg->surface_shape.clip_y,
+ pg->surface_shape.clip_height);
+
+ bool should_create = true;
+
+ if (found != NULL) {
+ bool is_compatible =
+ check_surface_compatibility(found, &entry, false);
+
+#define TRACE_ARGS found->vram_addr, found->width, found->height, \
+ found->swizzle ? "sz" : "ln", \
+ found->shape.anti_aliasing, found->shape.clip_x, \
+ found->shape.clip_width, found->shape.clip_y, \
+ found->shape.clip_height, found->pitch
+ if (found->color) {
+ trace_nv2a_pgraph_surface_match_color(TRACE_ARGS);
+ } else {
+ trace_nv2a_pgraph_surface_match_zeta(TRACE_ARGS);
+ }
+#undef TRACE_ARGS
+
+ assert(!(entry.swizzle && pg->clearing));
+
+ if (found->swizzle != entry.swizzle) {
+ /* Clears should only be done on linear surfaces. Avoid
+ * synchronization by allowing (1) a surface marked swizzled to
+ * be cleared under the assumption the entire surface is
+ * destined to be cleared and (2) a fully cleared linear surface
+ * to be marked swizzled. Strictly match size to avoid
+ * pathological cases.
+ */
+ is_compatible &= (pg->clearing || found->cleared) &&
+ check_surface_compatibility(found, &entry, true);
+ if (is_compatible) {
+ trace_nv2a_pgraph_surface_migrate_type(
+ entry.swizzle ? "swizzled" : "linear");
+ }
+ }
+
+ if (is_compatible && color &&
+ !check_surface_compatibility(found, &entry, true)) {
+ SurfaceBinding zeta_entry;
+ populate_surface_binding_entry_sized(
+ d, !color, found->width, found->height, &zeta_entry);
+ hwaddr color_end = found->vram_addr + found->size;
+ hwaddr zeta_end = zeta_entry.vram_addr + zeta_entry.size;
+ is_compatible &= found->vram_addr >= zeta_end ||
+ zeta_entry.vram_addr >= color_end;
+ }
+
+ if (is_compatible && !color && r->color_binding) {
+ is_compatible &= (found->width == r->color_binding->width) &&
+ (found->height == r->color_binding->height);
+ }
+
+ if (is_compatible) {
+ /* FIXME: Refactor */
+ pg->surface_binding_dim.width = found->width;
+ pg->surface_binding_dim.clip_x = found->shape.clip_x;
+ pg->surface_binding_dim.clip_width = found->shape.clip_width;
+ pg->surface_binding_dim.height = found->height;
+ pg->surface_binding_dim.clip_y = found->shape.clip_y;
+ pg->surface_binding_dim.clip_height = found->shape.clip_height;
+ found->upload_pending |= mem_dirty;
+ pg->surface_zeta.buffer_dirty |= color;
+ should_create = false;
+ } else {
+ trace_nv2a_pgraph_surface_evict_reason(
+ "incompatible", found->vram_addr);
+ compare_surfaces(found, &entry);
+ pgraph_gl_surface_download_if_dirty(d, found);
+ pgraph_gl_surface_invalidate(d, found);
+ }
+ }
+
+ if (should_create) {
+ glGenTextures(1, &entry.gl_buffer);
+ glBindTexture(GL_TEXTURE_2D, entry.gl_buffer);
+ NV2A_GL_DLABEL(GL_TEXTURE, entry.gl_buffer,
+ "%s format: %0X, width: %d, height: %d "
+ "(addr %" HWADDR_PRIx ")",
+ color ? "color" : "zeta",
+ color ? pg->surface_shape.color_format
+ : pg->surface_shape.zeta_format,
+ entry.width, entry.height, surface->offset);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ unsigned int width = entry.width, height = entry.height;
+ pgraph_apply_scaling_factor(pg, &width, &height);
+ glTexImage2D(GL_TEXTURE_2D, 0, entry.fmt.gl_internal_format, width,
+ height, 0, entry.fmt.gl_format, entry.fmt.gl_type,
+ NULL);
+ found = surface_put(d, entry.vram_addr, &entry);
+
+ /* FIXME: Refactor */
+ pg->surface_binding_dim.width = entry.width;
+ pg->surface_binding_dim.clip_x = entry.shape.clip_x;
+ pg->surface_binding_dim.clip_width = entry.shape.clip_width;
+ pg->surface_binding_dim.height = entry.height;
+ pg->surface_binding_dim.clip_y = entry.shape.clip_y;
+ pg->surface_binding_dim.clip_height = entry.shape.clip_height;
+
+ if (color && r->zeta_binding && (r->zeta_binding->width != entry.width || r->zeta_binding->height != entry.height)) {
+ pg->surface_zeta.buffer_dirty = true;
+ }
+ }
+
+#define TRACE_ARGS found->vram_addr, found->width, found->height, \
+ found->swizzle ? "sz" : "ln", found->shape.anti_aliasing, \
+ found->shape.clip_x, found->shape.clip_width, \
+ found->shape.clip_y, found->shape.clip_height, found->pitch
+
+ if (color) {
+ if (should_create) {
+ trace_nv2a_pgraph_surface_create_color(TRACE_ARGS);
+ } else {
+ trace_nv2a_pgraph_surface_hit_color(TRACE_ARGS);
+ }
+
+ r->color_binding = found;
+ } else {
+ if (should_create) {
+ trace_nv2a_pgraph_surface_create_zeta(TRACE_ARGS);
+ } else {
+ trace_nv2a_pgraph_surface_hit_zeta(TRACE_ARGS);
+ }
+ r->zeta_binding = found;
+ }
+#undef TRACE_ARGS
+
+ glFramebufferTexture2D(GL_FRAMEBUFFER, entry.fmt.gl_attachment,
+ GL_TEXTURE_2D, found->gl_buffer, 0);
+ assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) ==
+ GL_FRAMEBUFFER_COMPLETE);
+
+ surface->buffer_dirty = false;
+ }
+
+ if (!upload && surface->draw_dirty) {
+ if (!tcg_enabled()) {
+ /* FIXME: Cannot monitor for reads/writes; flush now */
+ surface_download(d,
+ color ? r->color_binding :
+ r->zeta_binding,
+ true);
+ }
+
+ surface->write_enabled_cache = false;
+ surface->draw_dirty = false;
+ }
+}
+
+void pgraph_gl_unbind_surface(NV2AState *d, bool color)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ if (color) {
+ if (r->color_binding) {
+ glFramebufferTexture2D(GL_FRAMEBUFFER,
+ GL_COLOR_ATTACHMENT0,
+ GL_TEXTURE_2D, 0, 0);
+ r->color_binding = NULL;
+ }
+ } else {
+ if (r->zeta_binding) {
+ glFramebufferTexture2D(GL_FRAMEBUFFER,
+ GL_DEPTH_ATTACHMENT,
+ GL_TEXTURE_2D, 0, 0);
+ glFramebufferTexture2D(GL_FRAMEBUFFER,
+ GL_DEPTH_STENCIL_ATTACHMENT,
+ GL_TEXTURE_2D, 0, 0);
+ r->zeta_binding = NULL;
+ }
+ }
+}
+
+void pgraph_gl_surface_update(NV2AState *d, bool upload, bool color_write,
+ bool zeta_write)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ pg->surface_shape.z_format =
+ GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER),
+ NV_PGRAPH_SETUPRASTER_Z_FORMAT);
+
+ color_write = color_write &&
+ (pg->clearing || pgraph_color_write_enabled(pg));
+ zeta_write = zeta_write && (pg->clearing || pgraph_zeta_write_enabled(pg));
+
+ if (upload) {
+ bool fb_dirty = framebuffer_dirty(pg);
+ if (fb_dirty) {
+ memcpy(&pg->last_surface_shape, &pg->surface_shape,
+ sizeof(SurfaceShape));
+ pg->surface_color.buffer_dirty = true;
+ pg->surface_zeta.buffer_dirty = true;
+ }
+
+ if (pg->surface_color.buffer_dirty) {
+ pgraph_gl_unbind_surface(d, true);
+ }
+
+ if (color_write) {
+ update_surface_part(d, true, true);
+ }
+
+ if (pg->surface_zeta.buffer_dirty) {
+ pgraph_gl_unbind_surface(d, false);
+ }
+
+ if (zeta_write) {
+ update_surface_part(d, true, false);
+ }
+ } else {
+ if ((color_write || pg->surface_color.write_enabled_cache)
+ && pg->surface_color.draw_dirty) {
+ update_surface_part(d, false, true);
+ }
+ if ((zeta_write || pg->surface_zeta.write_enabled_cache)
+ && pg->surface_zeta.draw_dirty) {
+ update_surface_part(d, false, false);
+ }
+ }
+
+ if (upload) {
+ pg->draw_time++;
+ }
+
+ bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE);
+
+ if (r->color_binding) {
+ r->color_binding->frame_time = pg->frame_time;
+ if (upload) {
+ pgraph_gl_upload_surface_data(d, r->color_binding, false);
+ r->color_binding->draw_time = pg->draw_time;
+ r->color_binding->swizzle = swizzle;
+ }
+ }
+
+ if (r->zeta_binding) {
+ r->zeta_binding->frame_time = pg->frame_time;
+ if (upload) {
+ pgraph_gl_upload_surface_data(d, r->zeta_binding, false);
+ r->zeta_binding->draw_time = pg->draw_time;
+ r->zeta_binding->swizzle = swizzle;
+ }
+ }
+
+ // Sanity check color and zeta dimensions match
+ if (r->color_binding && r->zeta_binding) {
+ assert((r->color_binding->width == r->zeta_binding->width)
+ && (r->color_binding->height == r->zeta_binding->height));
+ }
+
+ surface_evict_old(d);
+}
+
+// FIXME: Move to common
+static void surface_get_dimensions(PGRAPHState *pg, unsigned int *width,
+ unsigned int *height)
+{
+ bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE);
+ if (swizzle) {
+ *width = 1 << pg->surface_shape.log_width;
+ *height = 1 << pg->surface_shape.log_height;
+ } else {
+ *width = pg->surface_shape.clip_width;
+ *height = pg->surface_shape.clip_height;
+ }
+}
+
+void pgraph_gl_init_surfaces(PGRAPHState *pg)
+{
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ pgraph_gl_reload_surface_scale_factor(pg);
+ glGenFramebuffers(1, &r->gl_framebuffer);
+ glBindFramebuffer(GL_FRAMEBUFFER, r->gl_framebuffer);
+ QTAILQ_INIT(&r->surfaces);
+ r->downloads_pending = false;
+ qemu_event_init(&r->downloads_complete, false);
+ qemu_event_init(&r->dirty_surfaces_download_complete, false);
+
+ init_render_to_texture(pg);
+}
+
+void pgraph_gl_deinit_surfaces(PGRAPHState *pg)
+{
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ glDeleteFramebuffers(1, &r->gl_framebuffer);
+ // TODO: clear out surfaces
+}
+
+void pgraph_gl_surface_flush(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ bool update_surface = (r->color_binding || r->zeta_binding);
+
+ /* Clear last surface shape to force recreation of buffers at next draw */
+ pg->surface_color.draw_dirty = false;
+ pg->surface_zeta.draw_dirty = false;
+ memset(&pg->last_surface_shape, 0, sizeof(pg->last_surface_shape));
+ pgraph_gl_unbind_surface(d, true);
+ pgraph_gl_unbind_surface(d, false);
+
+ SurfaceBinding *s, *next;
+ QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) {
+ pgraph_gl_surface_invalidate(d, s);
+ }
+
+ pgraph_gl_reload_surface_scale_factor(pg);
+
+ if (update_surface) {
+ pgraph_gl_surface_update(d, true, true, true);
+ }
+}
diff --git a/hw/xbox/nv2a/pgraph/gl/texture.c b/hw/xbox/nv2a/pgraph/gl/texture.c
new file mode 100644
index 0000000000..bf072f44d6
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/gl/texture.c
@@ -0,0 +1,819 @@
+/*
+ * Geforce NV2A PGRAPH OpenGL Renderer
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "qemu/fast-hash.h"
+#include "hw/xbox/nv2a/nv2a_int.h"
+#include "hw/xbox/nv2a/pgraph/swizzle.h"
+#include "hw/xbox/nv2a/pgraph/s3tc.h"
+#include "hw/xbox/nv2a/pgraph/texture.h"
+#include "debug.h"
+#include "renderer.h"
+
+static TextureBinding* generate_texture(const TextureShape s, const uint8_t *texture_data, const uint8_t *palette_data);
+static void texture_binding_destroy(gpointer data);
+
+struct pgraph_texture_possibly_dirty_struct {
+ hwaddr addr, end;
+};
+
+static void mark_textures_possibly_dirty_visitor(Lru *lru, LruNode *node, void *opaque)
+{
+ struct pgraph_texture_possibly_dirty_struct *test =
+ (struct pgraph_texture_possibly_dirty_struct *)opaque;
+
+ struct TextureLruNode *tnode = container_of(node, TextureLruNode, node);
+ if (tnode->binding == NULL || tnode->possibly_dirty) {
+ return;
+ }
+
+ uintptr_t k_tex_addr = tnode->key.texture_vram_offset;
+ uintptr_t k_tex_end = k_tex_addr + tnode->key.texture_length - 1;
+ bool overlapping = !(test->addr > k_tex_end || k_tex_addr > test->end);
+
+ if (tnode->key.palette_length > 0) {
+ uintptr_t k_pal_addr = tnode->key.palette_vram_offset;
+ uintptr_t k_pal_end = k_pal_addr + tnode->key.palette_length - 1;
+ overlapping |= !(test->addr > k_pal_end || k_pal_addr > test->end);
+ }
+
+ tnode->possibly_dirty |= overlapping;
+}
+
+void pgraph_gl_mark_textures_possibly_dirty(NV2AState *d,
+ hwaddr addr, hwaddr size)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ hwaddr end = TARGET_PAGE_ALIGN(addr + size) - 1;
+ addr &= TARGET_PAGE_MASK;
+ assert(end <= memory_region_size(d->vram));
+
+ struct pgraph_texture_possibly_dirty_struct test = {
+ .addr = addr,
+ .end = end,
+ };
+
+ lru_visit_active(&r->texture_cache,
+ mark_textures_possibly_dirty_visitor,
+ &test);
+}
+
+static bool check_texture_dirty(NV2AState *d, hwaddr addr, hwaddr size)
+{
+ hwaddr end = TARGET_PAGE_ALIGN(addr + size);
+ addr &= TARGET_PAGE_MASK;
+ assert(end < memory_region_size(d->vram));
+ return memory_region_test_and_clear_dirty(d->vram, addr, end - addr,
+ DIRTY_MEMORY_NV2A_TEX);
+}
+
+// Check if any of the pages spanned by the a texture are dirty.
+static bool check_texture_possibly_dirty(NV2AState *d,
+ hwaddr texture_vram_offset,
+ unsigned int length,
+ hwaddr palette_vram_offset,
+ unsigned int palette_length)
+{
+ bool possibly_dirty = false;
+ if (check_texture_dirty(d, texture_vram_offset, length)) {
+ possibly_dirty = true;
+ pgraph_gl_mark_textures_possibly_dirty(d, texture_vram_offset, length);
+ }
+ if (palette_length && check_texture_dirty(d, palette_vram_offset,
+ palette_length)) {
+ possibly_dirty = true;
+ pgraph_gl_mark_textures_possibly_dirty(d, palette_vram_offset,
+ palette_length);
+ }
+ return possibly_dirty;
+}
+
+static void apply_texture_parameters(TextureBinding *binding,
+ const BasicColorFormatInfo *f,
+ unsigned int dimensionality,
+ unsigned int filter,
+ unsigned int address,
+ bool is_bordered,
+ uint32_t border_color)
+{
+ unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN);
+ unsigned int mag_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG);
+ unsigned int addru = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRU);
+ unsigned int addrv = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRV);
+ unsigned int addrp = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRP);
+
+ if (f->linear) {
+ /* somtimes games try to set mipmap min filters on linear textures.
+ * this could indicate a bug... */
+ switch (min_filter) {
+ case NV_PGRAPH_TEXFILTER0_MIN_BOX_NEARESTLOD:
+ case NV_PGRAPH_TEXFILTER0_MIN_BOX_TENT_LOD:
+ min_filter = NV_PGRAPH_TEXFILTER0_MIN_BOX_LOD0;
+ break;
+ case NV_PGRAPH_TEXFILTER0_MIN_TENT_NEARESTLOD:
+ case NV_PGRAPH_TEXFILTER0_MIN_TENT_TENT_LOD:
+ min_filter = NV_PGRAPH_TEXFILTER0_MIN_TENT_LOD0;
+ break;
+ }
+ }
+
+ if (min_filter != binding->min_filter) {
+ glTexParameteri(binding->gl_target, GL_TEXTURE_MIN_FILTER,
+ pgraph_texture_min_filter_gl_map[min_filter]);
+ binding->min_filter = min_filter;
+ }
+ if (mag_filter != binding->mag_filter) {
+ glTexParameteri(binding->gl_target, GL_TEXTURE_MAG_FILTER,
+ pgraph_texture_mag_filter_gl_map[mag_filter]);
+ binding->mag_filter = mag_filter;
+ }
+
+ /* Texture wrapping */
+ assert(addru < ARRAY_SIZE(pgraph_texture_addr_gl_map));
+ if (addru != binding->addru) {
+ glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_S,
+ pgraph_texture_addr_gl_map[addru]);
+ binding->addru = addru;
+ }
+ bool needs_border_color = binding->addru == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER;
+ if (dimensionality > 1) {
+ if (addrv != binding->addrv) {
+ assert(addrv < ARRAY_SIZE(pgraph_texture_addr_gl_map));
+ glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_T,
+ pgraph_texture_addr_gl_map[addrv]);
+ binding->addrv = addrv;
+ }
+ needs_border_color = needs_border_color || binding->addrv == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER;
+ }
+ if (dimensionality > 2) {
+ if (addrp != binding->addrp) {
+ assert(addrp < ARRAY_SIZE(pgraph_texture_addr_gl_map));
+ glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_R,
+ pgraph_texture_addr_gl_map[addrp]);
+ binding->addrp = addrp;
+ }
+ needs_border_color = needs_border_color || binding->addrp == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER;
+ }
+
+ if (!is_bordered && needs_border_color) {
+ if (!binding->border_color_set || binding->border_color != border_color) {
+ /* FIXME: Color channels might be wrong order */
+ GLfloat gl_border_color[4];
+ pgraph_argb_pack32_to_rgba_float(border_color, gl_border_color);
+ glTexParameterfv(binding->gl_target, GL_TEXTURE_BORDER_COLOR,
+ gl_border_color);
+
+ binding->border_color_set = true;
+ binding->border_color = border_color;
+ }
+ }
+}
+
+void pgraph_gl_bind_textures(NV2AState *d)
+{
+ int i;
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ NV2A_GL_DGROUP_BEGIN("%s", __func__);
+
+ for (i=0; ivram));
+ assert((palette_vram_offset + palette_length)
+ < memory_region_size(d->vram));
+ bool is_indexed = (state.color_format ==
+ NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8);
+ bool possibly_dirty = false;
+ bool possibly_dirty_checked = false;
+
+ SurfaceBinding *surface = pgraph_gl_surface_get(d, texture_vram_offset);
+ TextureBinding *tbind = r->texture_binding[i];
+ if (!pg->texture_dirty[i] && tbind) {
+ bool reusable = false;
+ if (surface && tbind->draw_time == surface->draw_time) {
+ reusable = true;
+ } else if (!surface) {
+ possibly_dirty = check_texture_possibly_dirty(
+ d,
+ texture_vram_offset,
+ length,
+ palette_vram_offset,
+ is_indexed ? palette_length : 0);
+ possibly_dirty_checked = true;
+ reusable = !possibly_dirty;
+ }
+
+ if (reusable) {
+ glBindTexture(r->texture_binding[i]->gl_target,
+ r->texture_binding[i]->gl_texture);
+ apply_texture_parameters(r->texture_binding[i],
+ &kelvin_color_format_info_map[state.color_format],
+ state.dimensionality,
+ filter,
+ address,
+ state.border,
+ border_color);
+ continue;
+ }
+ }
+
+ /*
+ * Check active surfaces to see if this texture was a render target
+ */
+ bool surf_to_tex = false;
+ if (surface != NULL) {
+ surf_to_tex = pgraph_gl_check_surface_to_texture_compatibility(
+ surface, &state);
+
+ if (surf_to_tex && surface->upload_pending) {
+ pgraph_gl_upload_surface_data(d, surface, false);
+ }
+ }
+
+ if (!surf_to_tex) {
+ // FIXME: Restructure to support rendering surfaces to cubemap faces
+
+ // Writeback any surfaces which this texture may index
+ hwaddr tex_vram_end = texture_vram_offset + length - 1;
+ QTAILQ_FOREACH(surface, &r->surfaces, entry) {
+ hwaddr surf_vram_end = surface->vram_addr + surface->size - 1;
+ bool overlapping = !(surface->vram_addr >= tex_vram_end
+ || texture_vram_offset >= surf_vram_end);
+ if (overlapping) {
+ pgraph_gl_surface_download_if_dirty(d, surface);
+ }
+ }
+ }
+
+ TextureKey key;
+ memset(&key, 0, sizeof(TextureKey));
+ key.state = state;
+ key.texture_vram_offset = texture_vram_offset;
+ key.texture_length = length;
+ if (is_indexed) {
+ key.palette_vram_offset = palette_vram_offset;
+ key.palette_length = palette_length;
+ }
+
+ // Search for existing texture binding in cache
+ uint64_t tex_binding_hash = fast_hash((uint8_t*)&key, sizeof(key));
+ LruNode *found = lru_lookup(&r->texture_cache,
+ tex_binding_hash, &key);
+ TextureLruNode *key_out = container_of(found, TextureLruNode, node);
+ possibly_dirty |= (key_out->binding == NULL) || key_out->possibly_dirty;
+
+ if (!surf_to_tex && !possibly_dirty_checked) {
+ possibly_dirty |= check_texture_possibly_dirty(
+ d,
+ texture_vram_offset,
+ length,
+ palette_vram_offset,
+ is_indexed ? palette_length : 0);
+ }
+
+ // Calculate hash of texture data, if necessary
+ void *texture_data = (char*)d->vram_ptr + texture_vram_offset;
+ void *palette_data = (char*)d->vram_ptr + palette_vram_offset;
+
+ uint64_t tex_data_hash = 0;
+ if (!surf_to_tex && possibly_dirty) {
+ tex_data_hash = fast_hash(texture_data, length);
+ if (is_indexed) {
+ tex_data_hash ^= fast_hash(palette_data, palette_length);
+ }
+ }
+
+ // Free existing binding, if texture data has changed
+ bool must_destroy = (key_out->binding != NULL)
+ && possibly_dirty
+ && (key_out->binding->data_hash != tex_data_hash);
+ if (must_destroy) {
+ texture_binding_destroy(key_out->binding);
+ key_out->binding = NULL;
+ }
+
+ if (key_out->binding == NULL) {
+ // Must create the texture
+ key_out->binding = generate_texture(state, texture_data, palette_data);
+ key_out->binding->data_hash = tex_data_hash;
+ key_out->binding->scale = 1;
+ } else {
+ // Saved an upload! Reuse existing texture in graphics memory.
+ glBindTexture(key_out->binding->gl_target,
+ key_out->binding->gl_texture);
+ }
+
+ key_out->possibly_dirty = false;
+ TextureBinding *binding = key_out->binding;
+ binding->refcnt++;
+
+ if (surf_to_tex && binding->draw_time < surface->draw_time) {
+
+ trace_nv2a_pgraph_surface_render_to_texture(
+ surface->vram_addr, surface->width, surface->height);
+ pgraph_gl_render_surface_to_texture(d, surface, binding, &state, i);
+ binding->draw_time = surface->draw_time;
+ if (binding->gl_target == GL_TEXTURE_RECTANGLE) {
+ binding->scale = pg->surface_scale_factor;
+ } else {
+ binding->scale = 1;
+ }
+ }
+
+ apply_texture_parameters(binding,
+ &kelvin_color_format_info_map[state.color_format],
+ state.dimensionality,
+ filter,
+ address,
+ state.border,
+ border_color);
+
+ if (r->texture_binding[i]) {
+ if (r->texture_binding[i]->gl_target != binding->gl_target) {
+ glBindTexture(r->texture_binding[i]->gl_target, 0);
+ }
+ texture_binding_destroy(r->texture_binding[i]);
+ }
+ r->texture_binding[i] = binding;
+ pg->texture_dirty[i] = false;
+ }
+ NV2A_GL_DGROUP_END();
+}
+
+static enum S3TC_DECOMPRESS_FORMAT
+gl_internal_format_to_s3tc_enum(GLint gl_internal_format)
+{
+ switch (gl_internal_format) {
+ case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+ return S3TC_DECOMPRESS_FORMAT_DXT1;
+ case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+ return S3TC_DECOMPRESS_FORMAT_DXT3;
+ case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+ return S3TC_DECOMPRESS_FORMAT_DXT5;
+ default:
+ assert(!"Invalid format");
+ }
+}
+
+static void upload_gl_texture(GLenum gl_target,
+ const TextureShape s,
+ const uint8_t *texture_data,
+ const uint8_t *palette_data)
+{
+ ColorFormatInfo f = kelvin_color_format_gl_map[s.color_format];
+ nv2a_profile_inc_counter(NV2A_PROF_TEX_UPLOAD);
+
+ unsigned int adjusted_width = s.width;
+ unsigned int adjusted_height = s.height;
+ unsigned int adjusted_pitch = s.pitch;
+ unsigned int adjusted_depth = s.depth;
+ if (!f.linear && s.border) {
+ adjusted_width = MAX(16, adjusted_width * 2);
+ adjusted_height = MAX(16, adjusted_height * 2);
+ adjusted_pitch = adjusted_width * (s.pitch / s.width);
+ adjusted_depth = MAX(16, s.depth * 2);
+ }
+
+ switch(gl_target) {
+ case GL_TEXTURE_1D:
+ assert(false);
+ break;
+ case GL_TEXTURE_RECTANGLE: {
+ /* Can't handle strides unaligned to pixels */
+ assert(s.pitch % f.bytes_per_pixel == 0);
+
+ uint8_t *converted = pgraph_convert_texture_data(
+ s, texture_data, palette_data, adjusted_width, adjusted_height, 1,
+ adjusted_pitch, 0, NULL);
+ glPixelStorei(GL_UNPACK_ROW_LENGTH,
+ converted ? 0 : adjusted_pitch / f.bytes_per_pixel);
+ glTexImage2D(gl_target, 0, f.gl_internal_format,
+ adjusted_width, adjusted_height, 0,
+ f.gl_format, f.gl_type,
+ converted ? converted : texture_data);
+
+ if (converted) {
+ g_free(converted);
+ }
+
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+ break;
+ }
+ case GL_TEXTURE_2D:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+ case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+ case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: {
+
+ unsigned int width = adjusted_width, height = adjusted_height;
+
+ int level;
+ for (level = 0; level < s.levels; level++) {
+ width = MAX(width, 1);
+ height = MAX(height, 1);
+
+ if (f.gl_format == 0) { /* compressed */
+ // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-block-compression#virtual-size-versus-physical-size
+ unsigned int block_size =
+ f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT ?
+ 8 : 16;
+ unsigned int physical_width = (width + 3) & ~3,
+ physical_height = (height + 3) & ~3;
+ if (physical_width != width) {
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, physical_width);
+ }
+ uint8_t *converted = s3tc_decompress_2d(
+ gl_internal_format_to_s3tc_enum(f.gl_internal_format),
+ texture_data, physical_width, physical_height);
+ unsigned int tex_width = width;
+ unsigned int tex_height = height;
+
+ if (s.cubemap && adjusted_width != s.width) {
+ // FIXME: Consider preserving the border.
+ // There does not seem to be a way to reference the border
+ // texels in a cubemap, so they are discarded.
+ glPixelStorei(GL_UNPACK_SKIP_PIXELS, 4);
+ glPixelStorei(GL_UNPACK_SKIP_ROWS, 4);
+ tex_width = s.width;
+ tex_height = s.height;
+ if (physical_width == width) {
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width);
+ }
+ }
+
+ glTexImage2D(gl_target, level, GL_RGBA, tex_width, tex_height, 0,
+ GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, converted);
+ g_free(converted);
+ if (physical_width != width) {
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+ }
+ if (s.cubemap && adjusted_width != s.width) {
+ glPixelStorei(GL_UNPACK_SKIP_PIXELS, 0);
+ glPixelStorei(GL_UNPACK_SKIP_ROWS, 0);
+ if (physical_width == width) {
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+ }
+ }
+ texture_data +=
+ physical_width / 4 * physical_height / 4 * block_size;
+ } else {
+ unsigned int pitch = width * f.bytes_per_pixel;
+ uint8_t *unswizzled = (uint8_t*)g_malloc(height * pitch);
+ unswizzle_rect(texture_data, width, height,
+ unswizzled, pitch, f.bytes_per_pixel);
+ uint8_t *converted = pgraph_convert_texture_data(
+ s, unswizzled, palette_data, width, height, 1, pitch, 0,
+ NULL);
+ uint8_t *pixel_data = converted ? converted : unswizzled;
+ unsigned int tex_width = width;
+ unsigned int tex_height = height;
+
+ if (s.cubemap && adjusted_width != s.width) {
+ // FIXME: Consider preserving the border.
+ // There does not seem to be a way to reference the border
+ // texels in a cubemap, so they are discarded.
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width);
+ tex_width = s.width;
+ tex_height = s.height;
+ pixel_data += 4 * f.bytes_per_pixel + 4 * pitch;
+ }
+
+ glTexImage2D(gl_target, level, f.gl_internal_format, tex_width,
+ tex_height, 0, f.gl_format, f.gl_type,
+ pixel_data);
+ if (s.cubemap && s.border) {
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+ }
+ if (converted) {
+ g_free(converted);
+ }
+ g_free(unswizzled);
+
+ texture_data += width * height * f.bytes_per_pixel;
+ }
+
+ width /= 2;
+ height /= 2;
+ }
+
+ break;
+ }
+ case GL_TEXTURE_3D: {
+
+ unsigned int width = adjusted_width;
+ unsigned int height = adjusted_height;
+ unsigned int depth = adjusted_depth;
+
+ assert(f.linear == false);
+
+ int level;
+ for (level = 0; level < s.levels; level++) {
+ if (f.gl_format == 0) { /* compressed */
+ assert(width % 4 == 0 && height % 4 == 0 &&
+ "Compressed 3D texture virtual size");
+ width = MAX(width, 4);
+ height = MAX(height, 4);
+ depth = MAX(depth, 1);
+
+ unsigned int block_size;
+ if (f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
+ block_size = 8;
+ } else {
+ block_size = 16;
+ }
+
+ size_t texture_size = width/4 * height/4 * depth * block_size;
+
+ uint8_t *converted = s3tc_decompress_3d(
+ gl_internal_format_to_s3tc_enum(f.gl_internal_format),
+ texture_data, width, height, depth);
+
+ glTexImage3D(gl_target, level, GL_RGBA8,
+ width, height, depth, 0,
+ GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV,
+ converted);
+
+ g_free(converted);
+
+ texture_data += texture_size;
+ } else {
+ width = MAX(width, 1);
+ height = MAX(height, 1);
+ depth = MAX(depth, 1);
+
+ unsigned int row_pitch = width * f.bytes_per_pixel;
+ unsigned int slice_pitch = row_pitch * height;
+ uint8_t *unswizzled = (uint8_t*)g_malloc(slice_pitch * depth);
+ unswizzle_box(texture_data, width, height, depth, unswizzled,
+ row_pitch, slice_pitch, f.bytes_per_pixel);
+
+ uint8_t *converted = pgraph_convert_texture_data(
+ s, unswizzled, palette_data, width, height, depth,
+ row_pitch, slice_pitch, NULL);
+
+ glTexImage3D(gl_target, level, f.gl_internal_format,
+ width, height, depth, 0,
+ f.gl_format, f.gl_type,
+ converted ? converted : unswizzled);
+
+ if (converted) {
+ g_free(converted);
+ }
+ g_free(unswizzled);
+
+ texture_data += width * height * depth * f.bytes_per_pixel;
+ }
+
+ width /= 2;
+ height /= 2;
+ depth /= 2;
+ }
+ break;
+ }
+ default:
+ assert(false);
+ break;
+ }
+}
+
+static TextureBinding* generate_texture(const TextureShape s,
+ const uint8_t *texture_data,
+ const uint8_t *palette_data)
+{
+ ColorFormatInfo f = kelvin_color_format_gl_map[s.color_format];
+
+ /* Create a new opengl texture */
+ GLuint gl_texture;
+ glGenTextures(1, &gl_texture);
+
+ GLenum gl_target;
+ if (s.cubemap) {
+ assert(f.linear == false);
+ assert(s.dimensionality == 2);
+ gl_target = GL_TEXTURE_CUBE_MAP;
+ } else {
+ if (f.linear) {
+ /* linear textures use unnormalised texcoords.
+ * GL_TEXTURE_RECTANGLE_ARB conveniently also does, but
+ * does not allow repeat and mirror wrap modes.
+ * (or mipmapping, but xbox d3d says 'Non swizzled and non
+ * compressed textures cannot be mip mapped.')
+ * Not sure if that'll be an issue. */
+
+ /* FIXME: GLSL 330 provides us with textureSize()! Use that? */
+ gl_target = GL_TEXTURE_RECTANGLE;
+ assert(s.dimensionality == 2);
+ } else {
+ switch(s.dimensionality) {
+ case 1: gl_target = GL_TEXTURE_1D; break;
+ case 2: gl_target = GL_TEXTURE_2D; break;
+ case 3: gl_target = GL_TEXTURE_3D; break;
+ default:
+ assert(false);
+ break;
+ }
+ }
+ }
+
+ glBindTexture(gl_target, gl_texture);
+
+ NV2A_GL_DLABEL(GL_TEXTURE, gl_texture,
+ "offset: 0x%08lx, format: 0x%02X%s, %d dimensions%s, "
+ "width: %d, height: %d, depth: %d",
+ texture_data - g_nv2a->vram_ptr,
+ s.color_format, f.linear ? "" : " (SZ)",
+ s.dimensionality, s.cubemap ? " (Cubemap)" : "",
+ s.width, s.height, s.depth);
+
+ if (gl_target == GL_TEXTURE_CUBE_MAP) {
+
+ ColorFormatInfo f = kelvin_color_format_gl_map[s.color_format];
+ unsigned int block_size;
+ if (f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
+ block_size = 8;
+ } else {
+ block_size = 16;
+ }
+
+ size_t length = 0;
+ unsigned int w = s.width;
+ unsigned int h = s.height;
+ if (!f.linear && s.border) {
+ w = MAX(16, w * 2);
+ h = MAX(16, h * 2);
+ }
+
+ int level;
+ for (level = 0; level < s.levels; level++) {
+ if (f.gl_format == 0) {
+ length += w/4 * h/4 * block_size;
+ } else {
+ length += w * h * f.bytes_per_pixel;
+ }
+
+ w /= 2;
+ h /= 2;
+ }
+
+ length = (length + NV2A_CUBEMAP_FACE_ALIGNMENT - 1) & ~(NV2A_CUBEMAP_FACE_ALIGNMENT - 1);
+
+ upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_X,
+ s, texture_data + 0 * length, palette_data);
+ upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
+ s, texture_data + 1 * length, palette_data);
+ upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
+ s, texture_data + 2 * length, palette_data);
+ upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y,
+ s, texture_data + 3 * length, palette_data);
+ upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
+ s, texture_data + 4 * length, palette_data);
+ upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z,
+ s, texture_data + 5 * length, palette_data);
+ } else {
+ upload_gl_texture(gl_target, s, texture_data, palette_data);
+ }
+
+ /* Linear textures don't support mipmapping */
+ if (!f.linear) {
+ glTexParameteri(gl_target, GL_TEXTURE_BASE_LEVEL,
+ s.min_mipmap_level);
+ glTexParameteri(gl_target, GL_TEXTURE_MAX_LEVEL,
+ s.levels - 1);
+ }
+
+ if (f.gl_swizzle_mask[0] != 0 || f.gl_swizzle_mask[1] != 0
+ || f.gl_swizzle_mask[2] != 0 || f.gl_swizzle_mask[3] != 0) {
+ glTexParameteriv(gl_target, GL_TEXTURE_SWIZZLE_RGBA,
+ (const GLint *)f.gl_swizzle_mask);
+ }
+
+ TextureBinding* ret = (TextureBinding *)g_malloc(sizeof(TextureBinding));
+ ret->gl_target = gl_target;
+ ret->gl_texture = gl_texture;
+ ret->refcnt = 1;
+ ret->draw_time = 0;
+ ret->data_hash = 0;
+ ret->min_filter = 0xFFFFFFFF;
+ ret->mag_filter = 0xFFFFFFFF;
+ ret->addru = 0xFFFFFFFF;
+ ret->addrv = 0xFFFFFFFF;
+ ret->addrp = 0xFFFFFFFF;
+ ret->border_color_set = false;
+ return ret;
+}
+
+static void texture_binding_destroy(gpointer data)
+{
+ TextureBinding *binding = (TextureBinding *)data;
+ assert(binding->refcnt > 0);
+ binding->refcnt--;
+ if (binding->refcnt == 0) {
+ glDeleteTextures(1, &binding->gl_texture);
+ g_free(binding);
+ }
+}
+
+/* functions for texture LRU cache */
+static void texture_cache_entry_init(Lru *lru, LruNode *node, void *key)
+{
+ TextureLruNode *tnode = container_of(node, TextureLruNode, node);
+ memcpy(&tnode->key, key, sizeof(TextureKey));
+
+ tnode->binding = NULL;
+ tnode->possibly_dirty = false;
+}
+
+static void texture_cache_entry_post_evict(Lru *lru, LruNode *node)
+{
+ TextureLruNode *tnode = container_of(node, TextureLruNode, node);
+ if (tnode->binding) {
+ texture_binding_destroy(tnode->binding);
+ tnode->binding = NULL;
+ tnode->possibly_dirty = false;
+ }
+}
+
+static bool texture_cache_entry_compare(Lru *lru, LruNode *node, void *key)
+{
+ TextureLruNode *tnode = container_of(node, TextureLruNode, node);
+ return memcmp(&tnode->key, key, sizeof(TextureKey));
+}
+
+void pgraph_gl_init_texture_cache(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ const size_t texture_cache_size = 512;
+ lru_init(&r->texture_cache);
+ r->texture_cache_entries = malloc(texture_cache_size * sizeof(TextureLruNode));
+ assert(r->texture_cache_entries != NULL);
+ for (int i = 0; i < texture_cache_size; i++) {
+ lru_add_free(&r->texture_cache, &r->texture_cache_entries[i].node);
+ }
+
+ r->texture_cache.init_node = texture_cache_entry_init;
+ r->texture_cache.compare_nodes = texture_cache_entry_compare;
+ r->texture_cache.post_node_evict = texture_cache_entry_post_evict;
+}
+
+void pgraph_gl_deinit_texture_cache(PGRAPHState *pg)
+{
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ // Clear out texture cache
+ lru_flush(&r->texture_cache);
+ free(r->texture_cache_entries);
+}
diff --git a/hw/xbox/nv2a/pgraph/gl/vertex.c b/hw/xbox/nv2a/pgraph/gl/vertex.c
new file mode 100644
index 0000000000..21f42b647c
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/gl/vertex.c
@@ -0,0 +1,283 @@
+/*
+ * Geforce NV2A PGRAPH OpenGL Renderer
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "hw/xbox/nv2a/nv2a_regs.h"
+#include
+#include "debug.h"
+#include "renderer.h"
+
+static void update_memory_buffer(NV2AState *d, hwaddr addr, hwaddr size,
+ bool quick)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ glBindBuffer(GL_ARRAY_BUFFER, r->gl_memory_buffer);
+
+ hwaddr end = TARGET_PAGE_ALIGN(addr + size);
+ addr &= TARGET_PAGE_MASK;
+ assert(end < memory_region_size(d->vram));
+
+ static hwaddr last_addr, last_end;
+ if (quick && (addr >= last_addr) && (end <= last_end)) {
+ return;
+ }
+ last_addr = addr;
+ last_end = end;
+
+ size = end - addr;
+ if (memory_region_test_and_clear_dirty(d->vram, addr, size,
+ DIRTY_MEMORY_NV2A)) {
+ glBufferSubData(GL_ARRAY_BUFFER, addr, size,
+ d->vram_ptr + addr);
+ nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_1);
+ }
+}
+
+void pgraph_gl_update_entire_memory_buffer(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ glBindBuffer(GL_ARRAY_BUFFER, r->gl_memory_buffer);
+ glBufferSubData(GL_ARRAY_BUFFER, 0, memory_region_size(d->vram), d->vram_ptr);
+}
+
+void pgraph_gl_bind_vertex_attributes(NV2AState *d, unsigned int min_element,
+ unsigned int max_element, bool inline_data,
+ unsigned int inline_stride,
+ unsigned int provoking_element)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ bool updated_memory_buffer = false;
+ unsigned int num_elements = max_element - min_element + 1;
+
+ if (inline_data) {
+ NV2A_GL_DGROUP_BEGIN("%s (num_elements: %d inline stride: %d)",
+ __func__, num_elements, inline_stride);
+ } else {
+ NV2A_GL_DGROUP_BEGIN("%s (num_elements: %d)", __func__, num_elements);
+ }
+
+ pg->compressed_attrs = 0;
+
+ for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+ VertexAttribute *attr = &pg->vertex_attributes[i];
+
+ if (!attr->count) {
+ glDisableVertexAttribArray(i);
+ glVertexAttrib4fv(i, attr->inline_value);
+ continue;
+ }
+
+ NV2A_DPRINTF("vertex data array format=%d, count=%d, stride=%d\n",
+ attr->format, attr->count, attr->stride);
+
+ GLint gl_count = attr->count;
+ GLenum gl_type;
+ GLboolean gl_normalize;
+ bool needs_conversion = false;
+
+ switch (attr->format) {
+ case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D:
+ gl_type = GL_UNSIGNED_BYTE;
+ gl_normalize = GL_TRUE;
+ // http://www.opengl.org/registry/specs/ARB/vertex_array_bgra.txt
+ gl_count = GL_BGRA;
+ break;
+ case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL:
+ gl_type = GL_UNSIGNED_BYTE;
+ gl_normalize = GL_TRUE;
+ break;
+ case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1:
+ gl_type = GL_SHORT;
+ gl_normalize = GL_TRUE;
+ break;
+ case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F:
+ gl_type = GL_FLOAT;
+ gl_normalize = GL_FALSE;
+ break;
+ case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K:
+ gl_type = GL_SHORT;
+ gl_normalize = GL_FALSE;
+ break;
+ case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP:
+ /* 3 signed, normalized components packed in 32-bits. (11,11,10) */
+ gl_type = GL_INT;
+ assert(attr->count == 1);
+ needs_conversion = true;
+ break;
+ default:
+ fprintf(stderr, "Unknown vertex type: 0x%x\n", attr->format);
+ assert(false);
+ break;
+ }
+
+ nv2a_profile_inc_counter(NV2A_PROF_ATTR_BIND);
+ hwaddr attrib_data_addr;
+ size_t stride;
+
+ if (needs_conversion) {
+ pg->compressed_attrs |= (1 << i);
+ }
+
+ hwaddr start = 0;
+ if (inline_data) {
+ glBindBuffer(GL_ARRAY_BUFFER, r->gl_inline_array_buffer);
+ attrib_data_addr = attr->inline_array_offset;
+ stride = inline_stride;
+ } else {
+ hwaddr dma_len;
+ uint8_t *attr_data = (uint8_t *)nv_dma_map(
+ d, attr->dma_select ? pg->dma_vertex_b : pg->dma_vertex_a,
+ &dma_len);
+ assert(attr->offset < dma_len);
+ attrib_data_addr = attr_data + attr->offset - d->vram_ptr;
+ stride = attr->stride;
+ start = attrib_data_addr + min_element * stride;
+ update_memory_buffer(d, start, num_elements * stride,
+ updated_memory_buffer);
+ updated_memory_buffer = true;
+ }
+
+ uint32_t provoking_element_index = provoking_element - min_element;
+ size_t element_size = attr->size * attr->count;
+ assert(element_size <= sizeof(attr->inline_value));
+ const uint8_t *last_entry;
+
+ if (inline_data) {
+ last_entry = (uint8_t*)pg->inline_array + attr->inline_array_offset;
+ } else {
+ last_entry = d->vram_ptr + start;
+ }
+ if (!stride) {
+ // Stride of 0 indicates that only the first element should be
+ // used.
+ pgraph_update_inline_value(attr, last_entry);
+ glDisableVertexAttribArray(i);
+ glVertexAttrib4fv(i, attr->inline_value);
+ continue;
+ }
+
+ if (needs_conversion) {
+ glVertexAttribIPointer(i, gl_count, gl_type, stride,
+ (void *)attrib_data_addr);
+ } else {
+ glVertexAttribPointer(i, gl_count, gl_type, gl_normalize, stride,
+ (void *)attrib_data_addr);
+ }
+
+ glEnableVertexAttribArray(i);
+ last_entry += stride * provoking_element_index;
+ pgraph_update_inline_value(attr, last_entry);
+ }
+
+ NV2A_GL_DGROUP_END();
+}
+
+unsigned int pgraph_gl_bind_inline_array(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ unsigned int offset = 0;
+ for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+ VertexAttribute *attr = &pg->vertex_attributes[i];
+ if (attr->count == 0) {
+ continue;
+ }
+
+ /* FIXME: Double check */
+ offset = ROUND_UP(offset, attr->size);
+ attr->inline_array_offset = offset;
+ NV2A_DPRINTF("bind inline attribute %d size=%d, count=%d\n",
+ i, attr->size, attr->count);
+ offset += attr->size * attr->count;
+ offset = ROUND_UP(offset, attr->size);
+ }
+
+ unsigned int vertex_size = offset;
+ unsigned int index_count = pg->inline_array_length*4 / vertex_size;
+
+ NV2A_DPRINTF("draw inline array %d, %d\n", vertex_size, index_count);
+
+ nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_2);
+ glBindBuffer(GL_ARRAY_BUFFER, r->gl_inline_array_buffer);
+ glBufferData(GL_ARRAY_BUFFER, NV2A_MAX_BATCH_LENGTH * sizeof(uint32_t),
+ NULL, GL_STREAM_DRAW);
+ glBufferSubData(GL_ARRAY_BUFFER, 0, index_count * vertex_size, pg->inline_array);
+ pgraph_gl_bind_vertex_attributes(d, 0, index_count-1, true, vertex_size,
+ index_count-1);
+
+ return index_count;
+}
+
+static void vertex_cache_entry_init(Lru *lru, LruNode *node, void *key)
+{
+ VertexLruNode *vnode = container_of(node, VertexLruNode, node);
+ memcpy(&vnode->key, key, sizeof(struct VertexKey));
+ vnode->initialized = false;
+}
+
+static bool vertex_cache_entry_compare(Lru *lru, LruNode *node, void *key)
+{
+ VertexLruNode *vnode = container_of(node, VertexLruNode, node);
+ return memcmp(&vnode->key, key, sizeof(VertexKey));
+}
+
+void pgraph_gl_init_vertex_cache(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHGLState *r = pg->gl_renderer_state;
+
+ const size_t element_cache_size = 50*1024;
+ lru_init(&r->element_cache);
+ r->element_cache_entries = malloc(element_cache_size * sizeof(VertexLruNode));
+ assert(r->element_cache_entries != NULL);
+ GLuint element_cache_buffers[element_cache_size];
+ glGenBuffers(element_cache_size, element_cache_buffers);
+ for (int i = 0; i < element_cache_size; i++) {
+ r->element_cache_entries[i].gl_buffer = element_cache_buffers[i];
+ lru_add_free(&r->element_cache, &r->element_cache_entries[i].node);
+ }
+
+ r->element_cache.init_node = vertex_cache_entry_init;
+ r->element_cache.compare_nodes = vertex_cache_entry_compare;
+
+ GLint max_vertex_attributes;
+ glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_vertex_attributes);
+ assert(max_vertex_attributes >= NV2A_VERTEXSHADER_ATTRIBUTES);
+
+ glGenBuffers(NV2A_VERTEXSHADER_ATTRIBUTES, r->gl_inline_buffer);
+ glGenBuffers(1, &r->gl_inline_array_buffer);
+
+ glGenBuffers(1, &r->gl_memory_buffer);
+ glBindBuffer(GL_ARRAY_BUFFER, r->gl_memory_buffer);
+ glBufferData(GL_ARRAY_BUFFER, memory_region_size(d->vram),
+ NULL, GL_DYNAMIC_DRAW);
+
+ glGenVertexArrays(1, &r->gl_vertex_array);
+ glBindVertexArray(r->gl_vertex_array);
+
+ assert(glGetError() == GL_NO_ERROR);
+}
diff --git a/hw/xbox/nv2a/pgraph/glsl/common.c b/hw/xbox/nv2a/pgraph/glsl/common.c
new file mode 100644
index 0000000000..7059880373
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/glsl/common.c
@@ -0,0 +1,58 @@
+/*
+ * Geforce NV2A PGRAPH GLSL Shader Generator
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+
+#include "common.h"
+
+
+MString *pgraph_get_glsl_vtx_header(MString *out, bool location, bool smooth, bool in, bool prefix, bool array)
+{
+ const char *flat_s = "flat";
+ const char *noperspective_s = "noperspective";
+ const char *qualifier_s = smooth ? noperspective_s : flat_s;
+ const char *qualifiers[11] = {
+ noperspective_s, flat_s, qualifier_s, qualifier_s,
+ qualifier_s, qualifier_s, noperspective_s, noperspective_s,
+ noperspective_s, noperspective_s, noperspective_s
+ };
+
+ const char *in_out_s = in ? "in" : "out";
+
+ const char *float_s = "float";
+ const char *vec4_s = "vec4";
+ const char *types[11] = { float_s, float_s, vec4_s, vec4_s, vec4_s, vec4_s,
+ float_s, vec4_s, vec4_s, vec4_s, vec4_s };
+
+ const char *prefix_s = prefix ? "v_" : "";
+ const char *names[11] = {
+ "vtx_inv_w", "vtx_inv_w_flat", "vtxD0", "vtxD1", "vtxB0", "vtxB1",
+ "vtxFog", "vtxT0", "vtxT1", "vtxT2", "vtxT3",
+ };
+ const char *suffix_s = array ? "[]" : "";
+
+ for (int i = 0; i < 11; i++) {
+ if (location) {
+ mstring_append_fmt(out, "layout(location = %d) ", i);
+ }
+ mstring_append_fmt(out, "%s %s %s %s%s%s;\n",
+ qualifiers[i], in_out_s, types[i], prefix_s, names[i], suffix_s);
+ }
+
+ return out;
+}
diff --git a/hw/xbox/nv2a/pgraph/glsl/common.h b/hw/xbox/nv2a/pgraph/glsl/common.h
new file mode 100644
index 0000000000..6820a1dcb1
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/glsl/common.h
@@ -0,0 +1,38 @@
+/*
+ * Geforce NV2A PGRAPH GLSL Shader Generator
+ *
+ * Copyright (c) 2015 espes
+ * Copyright (c) 2015 Jannik Vogel
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#ifndef HW_NV2A_SHADERS_COMMON_H
+#define HW_NV2A_SHADERS_COMMON_H
+
+#include "qemu/mstring.h"
+#include
+
+#define GLSL_C(idx) "c[" stringify(idx) "]"
+#define GLSL_LTCTXA(idx) "ltctxa[" stringify(idx) "]"
+
+#define GLSL_C_MAT4(idx) \
+ "mat4(" GLSL_C(idx) ", " GLSL_C(idx+1) ", " \
+ GLSL_C(idx+2) ", " GLSL_C(idx+3) ")"
+
+#define GLSL_DEFINE(a, b) "#define " stringify(a) " " b "\n"
+
+MString *pgraph_get_glsl_vtx_header(MString *out, bool location, bool smooth, bool in, bool prefix, bool array);
+
+#endif
diff --git a/hw/xbox/nv2a/pgraph/glsl/geom.c b/hw/xbox/nv2a/pgraph/glsl/geom.c
new file mode 100644
index 0000000000..0e738f0280
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/glsl/geom.c
@@ -0,0 +1,228 @@
+/*
+ * Geforce NV2A PGRAPH GLSL Shader Generator
+ *
+ * Copyright (c) 2015 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2020-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "hw/xbox/nv2a/pgraph/shaders.h"
+#include "common.h"
+#include "geom.h"
+
+MString *pgraph_gen_geom_glsl(enum ShaderPolygonMode polygon_front_mode,
+ enum ShaderPolygonMode polygon_back_mode,
+ enum ShaderPrimitiveMode primitive_mode,
+ bool smooth_shading,
+ bool vulkan)
+{
+ /* FIXME: Missing support for 2-sided-poly mode */
+ assert(polygon_front_mode == polygon_back_mode);
+ enum ShaderPolygonMode polygon_mode = polygon_front_mode;
+
+ /* POINT mode shouldn't require any special work */
+ if (polygon_mode == POLY_MODE_POINT) {
+ return NULL;
+ }
+
+ /* Handle LINE and FILL mode */
+ const char *layout_in = NULL;
+ const char *layout_out = NULL;
+ const char *body = NULL;
+ switch (primitive_mode) {
+ case PRIM_TYPE_POINTS: return NULL;
+ case PRIM_TYPE_LINES: return NULL;
+ case PRIM_TYPE_LINE_LOOP: return NULL;
+ case PRIM_TYPE_LINE_STRIP: return NULL;
+ case PRIM_TYPE_TRIANGLES:
+ if (polygon_mode == POLY_MODE_FILL) { return NULL; }
+ assert(polygon_mode == POLY_MODE_LINE);
+ layout_in = "layout(triangles) in;\n";
+ layout_out = "layout(line_strip, max_vertices = 4) out;\n";
+ body = " emit_vertex(0, 0);\n"
+ " emit_vertex(1, 0);\n"
+ " emit_vertex(2, 0);\n"
+ " emit_vertex(0, 0);\n"
+ " EndPrimitive();\n";
+ break;
+ case PRIM_TYPE_TRIANGLE_STRIP:
+ if (polygon_mode == POLY_MODE_FILL) { return NULL; }
+ assert(polygon_mode == POLY_MODE_LINE);
+ layout_in = "layout(triangles) in;\n";
+ layout_out = "layout(line_strip, max_vertices = 4) out;\n";
+ /* Imagine a quad made of a tristrip, the comments tell you which
+ * vertex we are using */
+ body = " if ((gl_PrimitiveIDIn & 1) == 0) {\n"
+ " if (gl_PrimitiveIDIn == 0) {\n"
+ " emit_vertex(0, 0);\n" /* bottom right */
+ " }\n"
+ " emit_vertex(1, 0);\n" /* top right */
+ " emit_vertex(2, 0);\n" /* bottom left */
+ " emit_vertex(0, 0);\n" /* bottom right */
+ " } else {\n"
+ " emit_vertex(2, 0);\n" /* bottom left */
+ " emit_vertex(1, 0);\n" /* top left */
+ " emit_vertex(0, 0);\n" /* top right */
+ " }\n"
+ " EndPrimitive();\n";
+ break;
+ case PRIM_TYPE_TRIANGLE_FAN:
+ if (polygon_mode == POLY_MODE_FILL) { return NULL; }
+ assert(polygon_mode == POLY_MODE_LINE);
+ layout_in = "layout(triangles) in;\n";
+ layout_out = "layout(line_strip, max_vertices = 4) out;\n";
+ body = " if (gl_PrimitiveIDIn == 0) {\n"
+ " emit_vertex(0, 0);\n"
+ " }\n"
+ " emit_vertex(1, 0);\n"
+ " emit_vertex(2, 0);\n"
+ " emit_vertex(0, 0);\n"
+ " EndPrimitive();\n";
+ break;
+ case PRIM_TYPE_QUADS:
+ layout_in = "layout(lines_adjacency) in;\n";
+ if (polygon_mode == POLY_MODE_LINE) {
+ layout_out = "layout(line_strip, max_vertices = 5) out;\n";
+ body = " emit_vertex(0, 3);\n"
+ " emit_vertex(1, 3);\n"
+ " emit_vertex(2, 3);\n"
+ " emit_vertex(3, 3);\n"
+ " emit_vertex(0, 3);\n"
+ " EndPrimitive();\n";
+ } else if (polygon_mode == POLY_MODE_FILL) {
+ layout_out = "layout(triangle_strip, max_vertices = 4) out;\n";
+ body = " emit_vertex(3, 3);\n"
+ " emit_vertex(0, 3);\n"
+ " emit_vertex(2, 3);\n"
+ " emit_vertex(1, 3);\n"
+ " EndPrimitive();\n";
+ } else {
+ assert(false);
+ return NULL;
+ }
+ break;
+ case PRIM_TYPE_QUAD_STRIP:
+ layout_in = "layout(lines_adjacency) in;\n";
+ if (polygon_mode == POLY_MODE_LINE) {
+ layout_out = "layout(line_strip, max_vertices = 5) out;\n";
+ body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n"
+ " if (gl_PrimitiveIDIn == 0) {\n"
+ " emit_vertex(0, 3);\n"
+ " }\n"
+ " emit_vertex(1, 3);\n"
+ " emit_vertex(3, 3);\n"
+ " emit_vertex(2, 3);\n"
+ " emit_vertex(0, 3);\n"
+ " EndPrimitive();\n";
+ } else if (polygon_mode == POLY_MODE_FILL) {
+ layout_out = "layout(triangle_strip, max_vertices = 4) out;\n";
+ body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n"
+ " emit_vertex(0, 3);\n"
+ " emit_vertex(1, 3);\n"
+ " emit_vertex(2, 3);\n"
+ " emit_vertex(3, 3);\n"
+ " EndPrimitive();\n";
+ } else {
+ assert(false);
+ return NULL;
+ }
+ break;
+ case PRIM_TYPE_POLYGON:
+ if (polygon_mode == POLY_MODE_LINE) {
+ return NULL;
+ }
+ if (polygon_mode == POLY_MODE_FILL) {
+ if (smooth_shading) {
+ return NULL;
+ }
+ layout_in = "layout(triangles) in;\n";
+ layout_out = "layout(triangle_strip, max_vertices = 3) out;\n";
+ body = " emit_vertex(0, 2);\n"
+ " emit_vertex(1, 2);\n"
+ " emit_vertex(2, 2);\n"
+ " EndPrimitive();\n";
+ } else {
+ assert(false);
+ return NULL;
+ }
+ break;
+
+ default:
+ assert(false);
+ return NULL;
+ }
+
+ /* generate a geometry shader to support deprecated primitive types */
+ assert(layout_in);
+ assert(layout_out);
+ assert(body);
+ MString *s = mstring_new();
+ mstring_append_fmt(s, "#version %d\n\n", vulkan ? 450 : 400);
+ mstring_append(s, layout_in);
+ mstring_append(s, layout_out);
+ mstring_append(s, "\n");
+ pgraph_get_glsl_vtx_header(s, vulkan, smooth_shading, true, true, true);
+ pgraph_get_glsl_vtx_header(s, vulkan, smooth_shading, false, false, false);
+
+ if (smooth_shading) {
+ mstring_append(s,
+ "void emit_vertex(int index, int _unused) {\n"
+ " gl_Position = gl_in[index].gl_Position;\n"
+ " gl_PointSize = gl_in[index].gl_PointSize;\n"
+ // " gl_ClipDistance[0] = gl_in[index].gl_ClipDistance[0];\n"
+ // " gl_ClipDistance[1] = gl_in[index].gl_ClipDistance[1];\n"
+ " vtx_inv_w = v_vtx_inv_w[index];\n"
+ " vtx_inv_w_flat = v_vtx_inv_w[index];\n"
+ " vtxD0 = v_vtxD0[index];\n"
+ " vtxD1 = v_vtxD1[index];\n"
+ " vtxB0 = v_vtxB0[index];\n"
+ " vtxB1 = v_vtxB1[index];\n"
+ " vtxFog = v_vtxFog[index];\n"
+ " vtxT0 = v_vtxT0[index];\n"
+ " vtxT1 = v_vtxT1[index];\n"
+ " vtxT2 = v_vtxT2[index];\n"
+ " vtxT3 = v_vtxT3[index];\n"
+ " EmitVertex();\n"
+ "}\n");
+ } else {
+ mstring_append(s,
+ "void emit_vertex(int index, int provoking_index) {\n"
+ " gl_Position = gl_in[index].gl_Position;\n"
+ " gl_PointSize = gl_in[index].gl_PointSize;\n"
+ // " gl_ClipDistance[0] = gl_in[index].gl_ClipDistance[0];\n"
+ // " gl_ClipDistance[1] = gl_in[index].gl_ClipDistance[1];\n"
+ " vtx_inv_w = v_vtx_inv_w[index];\n"
+ " vtx_inv_w_flat = v_vtx_inv_w[provoking_index];\n"
+ " vtxD0 = v_vtxD0[provoking_index];\n"
+ " vtxD1 = v_vtxD1[provoking_index];\n"
+ " vtxB0 = v_vtxB0[provoking_index];\n"
+ " vtxB1 = v_vtxB1[provoking_index];\n"
+ " vtxFog = v_vtxFog[index];\n"
+ " vtxT0 = v_vtxT0[index];\n"
+ " vtxT1 = v_vtxT1[index];\n"
+ " vtxT2 = v_vtxT2[index];\n"
+ " vtxT3 = v_vtxT3[index];\n"
+ " EmitVertex();\n"
+ "}\n");
+ }
+
+ mstring_append(s, "\n"
+ "void main() {\n");
+ mstring_append(s, body);
+ mstring_append(s, "}\n");
+
+ return s;
+}
diff --git a/hw/xbox/nv2a/pgraph/glsl/geom.h b/hw/xbox/nv2a/pgraph/glsl/geom.h
new file mode 100644
index 0000000000..9ca605be71
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/glsl/geom.h
@@ -0,0 +1,34 @@
+/*
+ * Geforce NV2A PGRAPH GLSL Shader Generator
+ *
+ * Copyright (c) 2015 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2020-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_GEOM_H
+#define HW_XBOX_NV2A_PGRAPH_GLSL_GEOM_H
+
+#include "qemu/mstring.h"
+#include "hw/xbox/nv2a/pgraph/shaders.h"
+
+MString *pgraph_gen_geom_glsl(enum ShaderPolygonMode polygon_front_mode,
+ enum ShaderPolygonMode polygon_back_mode,
+ enum ShaderPrimitiveMode primitive_mode,
+ bool smooth_shading,
+ bool vulkan);
+
+#endif
diff --git a/hw/xbox/nv2a/pgraph/glsl/meson.build b/hw/xbox/nv2a/pgraph/glsl/meson.build
new file mode 100644
index 0000000000..82df3f7ede
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/glsl/meson.build
@@ -0,0 +1,8 @@
+specific_ss.add([files(
+ 'common.c',
+ 'geom.c',
+ 'psh.c',
+ 'vsh.c',
+ 'vsh-ff.c',
+ 'vsh-prog.c',
+ )])
diff --git a/hw/xbox/nv2a/psh.c b/hw/xbox/nv2a/pgraph/glsl/psh.c
similarity index 90%
rename from hw/xbox/nv2a/psh.c
rename to hw/xbox/nv2a/pgraph/glsl/psh.c
index ca9bffe79d..58ad5cf7ac 100644
--- a/hw/xbox/nv2a/psh.c
+++ b/hw/xbox/nv2a/pgraph/glsl/psh.c
@@ -3,7 +3,7 @@
*
* Copyright (c) 2013 espes
* Copyright (c) 2015 Jannik Vogel
- * Copyright (c) 2020-2021 Matt Borgerson
+ * Copyright (c) 2020-2024 Matt Borgerson
*
* Based on:
* Cxbx, PixelShader.cpp
@@ -34,9 +34,9 @@
#include
#include
-#include "qapi/qmp/qstring.h"
-
-#include "shaders_common.h"
+#include "common.h"
+#include "hw/xbox/nv2a/debug.h"
+#include "hw/xbox/nv2a/pgraph/psh.h"
#include "psh.h"
/*
@@ -575,7 +575,7 @@ static const char* get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *s
return NULL;
case PS_TEXTUREMODES_PROJECT2D:
- return state->rect_tex[i] ? sampler2DRect : sampler2D;
+ return (state->rect_tex[i] && !state->vulkan) ? sampler2DRect : sampler2D;
case PS_TEXTUREMODES_BUMPENVMAP:
case PS_TEXTUREMODES_BUMPENVMAP_LUM:
@@ -584,12 +584,15 @@ static const char* get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *s
fprintf(stderr, "Shadow map support not implemented for mode %d\n", mode);
assert(!"Shadow map support not implemented for this mode");
}
- return state->rect_tex[i] ? sampler2DRect : sampler2D;
+ return (state->rect_tex[i] && !state->vulkan) ? sampler2DRect : sampler2D;
case PS_TEXTUREMODES_PROJECT3D:
case PS_TEXTUREMODES_DOT_STR_3D:
+ if (state->tex_x8y24[i] && state->vulkan) {
+ return "usampler2D";
+ }
if (state->shadow_map[i]) {
- return state->rect_tex[i] ? sampler2DRect : sampler2D;
+ return (state->rect_tex[i] && !state->vulkan) ? sampler2DRect : sampler2D;
}
return sampler3D;
@@ -634,12 +637,28 @@ static void psh_append_shadowmap(const struct PixelShader *ps, int i, bool compa
return;
}
- mstring_append_fmt(vars,
- "pT%d.xy *= texScale%d;\n"
- "vec4 t%d_depth = textureProj(texSamp%d, pT%d.xyw);\n",
- i, i, i, i, i);
-
+ mstring_append_fmt(vars, "pT%d.xy *= texScale%d;\n", i, i);
const char *comparison = shadow_comparison_map[ps->state.shadow_depth_func];
+ if (ps->state.rect_tex[i] && ps->state.vulkan) {
+ if (ps->state.tex_x8y24[i]) {
+ mstring_append_fmt(
+ vars,
+ "uvec4 t%d_depth_raw = texture(texSamp%d, pT%d.xy/pT%d.w);\n", i, i, i, i);
+ mstring_append_fmt(
+ vars,
+ "vec4 t%d_depth = vec4(float(t%d_depth_raw.x & 0xFFFFFF), 1.0, 0.0, 0.0);",
+ i, i);
+ } else {
+ mstring_append_fmt(
+ vars,
+ "vec4 t%d_depth = textureLod(texSamp%d, pT%d.xy/pT%d.w, 0);\n", i,
+ i, i, i);
+ }
+ } else {
+ mstring_append_fmt(
+ vars, "vec4 t%d_depth = textureProj(texSamp%d, pT%d.xyw);\n", i, i,
+ i);
+ }
// Depth.y != 0 indicates 24 bit; depth.z != 0 indicates float.
if (compare_z) {
@@ -685,18 +704,69 @@ static void apply_border_adjustment(const struct PixelShader *ps, MString *vars,
var_name, var_name, i, ps->state.border_inv_real_size[i][0], ps->state.border_inv_real_size[i][1], ps->state.border_inv_real_size[i][2]);
}
+static void apply_convolution_filter(const struct PixelShader *ps, MString *vars, int tex)
+{
+ // FIXME: Convolution for 2D textures
+ // FIXME: Quincunx
+ assert(ps->state.rect_tex[tex]);
+
+ if (ps->state.vulkan) {
+ mstring_append_fmt(vars,
+ "vec4 t%d = vec4(0.0);\n"
+ "for (int i = 0; i < 9; i++) {\n"
+ " vec2 texCoord = pT%d.xy/pT%d.w + convolution3x3[i];\n"
+ " t%d += textureLod(texSamp%d, texCoord, 0) * gaussian3x3[i];\n"
+ "}\n", tex, tex, tex, tex, tex);
+ } else {
+ mstring_append_fmt(vars,
+ "vec4 t%d = vec4(0.0);\n"
+ "for (int i = 0; i < 9; i++) {\n"
+ " vec3 texCoord = pT%d.xyw + vec3(convolution3x3[i], 0);\n"
+ " t%d += textureProj(texSamp%d, texCoord) * gaussian3x3[i];\n"
+ "}\n", tex, tex, tex, tex, tex);
+
+ }
+}
+
static MString* psh_convert(struct PixelShader *ps)
{
int i;
+ const char *u = ps->state.vulkan ? "" : "uniform "; // FIXME: Remove
+
MString *preflight = mstring_new();
- mstring_append(preflight, ps->state.smooth_shading ?
- STRUCT_VERTEX_DATA_IN_SMOOTH :
- STRUCT_VERTEX_DATA_IN_FLAT);
- mstring_append(preflight, "\n");
- mstring_append(preflight, "out vec4 fragColor;\n");
- mstring_append(preflight, "\n");
- mstring_append(preflight, "uniform vec4 fogColor;\n");
+ pgraph_get_glsl_vtx_header(preflight, ps->state.vulkan,
+ ps->state.smooth_shading, true, false, false);
+
+ if (ps->state.vulkan) {
+ mstring_append_fmt(preflight,
+ "layout(location = 0) out vec4 fragColor;\n"
+ "layout(binding = %d, std140) uniform PshUniforms {\n", PSH_UBO_BINDING);
+ } else {
+ mstring_append_fmt(preflight,
+ "layout(location = 0) out vec4 fragColor;\n");
+ }
+
+ mstring_append_fmt(preflight, "%sfloat alphaRef;\n"
+ "%svec4 fogColor;\n"
+ "%sivec4 clipRegion[8];\n",
+ u, u, u);
+ for (int i = 0; i < 4; i++) {
+ mstring_append_fmt(preflight, "%smat2 bumpMat%d;\n"
+ "%sfloat bumpScale%d;\n"
+ "%sfloat bumpOffset%d;\n"
+ "%sfloat texScale%d;\n",
+ u, i, u, i, u, i, u, i);
+ }
+ for (int i = 0; i < 9; i++) {
+ for (int j = 0; j < 2; j++) {
+ mstring_append_fmt(preflight, "%svec4 c%d_%d;\n", u, j, i);
+ }
+ }
+
+ if (ps->state.vulkan) {
+ mstring_append(preflight, "};\n");
+ }
const char *dotmap_funcs[] = {
"dotmap_zero_to_one",
@@ -766,22 +836,12 @@ static MString* psh_convert(struct PixelShader *ps)
" vec2(-1.0,-1.0),vec2(0.0,-1.0),vec2(1.0,-1.0),\n"
" vec2(-1.0, 0.0),vec2(0.0, 0.0),vec2(1.0, 0.0),\n"
" vec2(-1.0, 1.0),vec2(0.0, 1.0),vec2(1.0, 1.0));\n"
- "vec4 gaussianFilter2DRectProj(sampler2DRect sampler, vec3 texCoord) {\n"
- " vec4 sum = vec4(0.0);\n"
- " for (int i = 0; i < 9; i++) {\n"
- " sum += gaussian3x3[i]*textureProj(sampler,\n"
- " texCoord + vec3(convolution3x3[i], 0.0));\n"
- " }\n"
- " return sum;\n"
- "}\n"
);
/* Window Clipping */
MString *clip = mstring_new();
- mstring_append(preflight, "uniform ivec4 clipRegion[8];\n");
- mstring_append_fmt(clip, "/* Window-clip (%s) */\n",
- ps->state.window_clip_exclusive ?
- "Exclusive" : "Inclusive");
+ mstring_append_fmt(clip, "/* Window-clip (%slusive) */\n",
+ ps->state.window_clip_exclusive ? "Exc" : "Inc");
if (!ps->state.window_clip_exclusive) {
mstring_append(clip, "bool clipContained = false;\n");
}
@@ -856,23 +916,27 @@ static MString* psh_convert(struct PixelShader *ps)
if (ps->state.shadow_map[i]) {
psh_append_shadowmap(ps, i, false, vars);
} else {
- const char *lookup = "textureProj";
- if ((ps->state.conv_tex[i] == CONVOLUTION_FILTER_GAUSSIAN)
- || (ps->state.conv_tex[i] == CONVOLUTION_FILTER_QUINCUNX)) {
- /* FIXME: Quincunx looks better than Linear and costs less than
- * Gaussian, but Gaussian should be plenty fast so use it for
- * now.
- */
- if (ps->state.rect_tex[i]) {
- lookup = "gaussianFilter2DRectProj";
- } else {
- NV2A_UNIMPLEMENTED("Convolution for 2D textures");
- }
- }
apply_border_adjustment(ps, vars, i, "pT%d");
mstring_append_fmt(vars, "pT%d.xy = texScale%d * pT%d.xy;\n", i, i, i);
- mstring_append_fmt(vars, "vec4 t%d = %s(texSamp%d, pT%d.xyw);\n",
- i, lookup, i, i);
+ if (ps->state.rect_tex[i]) {
+ if ((ps->state.conv_tex[i] ==
+ CONVOLUTION_FILTER_GAUSSIAN) ||
+ (ps->state.conv_tex[i] ==
+ CONVOLUTION_FILTER_QUINCUNX)) {
+ apply_convolution_filter(ps, vars, i);
+ } else {
+ if (ps->state.vulkan) {
+ mstring_append_fmt(vars, "vec4 t%d = textureLod(texSamp%d, pT%d.xy/pT%d.w, 0);\n",
+ i, i, i, i);
+ } else {
+ mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, pT%d.xyw);\n",
+ i, i, i);
+ }
+ }
+ } else {
+ mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, pT%d.xyw);\n",
+ i, i, i);
+ }
}
break;
}
@@ -880,6 +944,7 @@ static MString* psh_convert(struct PixelShader *ps)
if (ps->state.shadow_map[i]) {
psh_append_shadowmap(ps, i, true, vars);
} else {
+ assert(!ps->state.rect_tex[i]);
apply_border_adjustment(ps, vars, i, "pT%d");
mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, pT%d.xyzw);\n",
i, i, i);
@@ -906,7 +971,6 @@ static MString* psh_convert(struct PixelShader *ps)
}
case PS_TEXTUREMODES_BUMPENVMAP:
assert(i >= 1);
- mstring_append_fmt(preflight, "uniform mat2 bumpMat%d;\n", i);
if (ps->state.snorm_tex[ps->input_tex[i]]) {
/* Input color channels already signed (FIXME: May not always want signed textures in this case) */
@@ -925,9 +989,6 @@ static MString* psh_convert(struct PixelShader *ps)
break;
case PS_TEXTUREMODES_BUMPENVMAP_LUM:
assert(i >= 1);
- mstring_append_fmt(preflight, "uniform float bumpScale%d;\n", i);
- mstring_append_fmt(preflight, "uniform float bumpOffset%d;\n", i);
- mstring_append_fmt(preflight, "uniform mat2 bumpMat%d;\n", i);
if (ps->state.snorm_tex[ps->input_tex[i]]) {
/* Input color channels already signed (FIXME: May not always want signed textures in this case) */
@@ -1060,8 +1121,10 @@ static MString* psh_convert(struct PixelShader *ps)
break;
}
- mstring_append_fmt(preflight, "uniform float texScale%d;\n", i);
if (sampler_type != NULL) {
+ if (ps->state.vulkan) {
+ mstring_append_fmt(preflight, "layout(binding = %d) ", PSH_TEX_BINDING + i);
+ }
mstring_append_fmt(preflight, "uniform %s texSamp%d;\n", sampler_type, i);
/* As this means a texture fetch does happen, do alphakill */
@@ -1091,7 +1154,6 @@ static MString* psh_convert(struct PixelShader *ps)
}
if (ps->state.alpha_test && ps->state.alpha_func != ALPHA_FUNC_ALWAYS) {
- mstring_append_fmt(preflight, "uniform float alphaRef;\n");
if (ps->state.alpha_func == ALPHA_FUNC_NEVER) {
mstring_append(ps->code, "discard;\n");
} else {
@@ -1112,10 +1174,6 @@ static MString* psh_convert(struct PixelShader *ps)
}
}
- for (i = 0; i < ps->num_const_refs; i++) {
- mstring_append_fmt(preflight, "uniform vec4 %s;\n", ps->const_refs[i]);
- }
-
for (i = 0; i < ps->num_var_refs; i++) {
mstring_append_fmt(vars, "vec4 %s;\n", ps->var_refs[i]);
if (strcmp(ps->var_refs[i], "r0") == 0) {
@@ -1128,7 +1186,7 @@ static MString* psh_convert(struct PixelShader *ps)
}
MString *final = mstring_new();
- mstring_append(final, "#version 330\n\n");
+ mstring_append_fmt(final, "#version %d\n\n", ps->state.vulkan ? 450 : 400);
mstring_append(final, mstring_get_str(preflight));
mstring_append(final, "void main() {\n");
mstring_append(final, mstring_get_str(clip));
@@ -1175,7 +1233,7 @@ static void parse_combiner_output(uint32_t value, struct OutputInfo *out)
out->cd_alphablue = flags & 0x40;
}
-MString *psh_translate(const PshState state)
+MString *pgraph_gen_psh_glsl(const PshState state)
{
int i;
struct PixelShader ps;
diff --git a/hw/xbox/nv2a/pgraph/glsl/psh.h b/hw/xbox/nv2a/pgraph/glsl/psh.h
new file mode 100644
index 0000000000..1ae0b0db7e
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/glsl/psh.h
@@ -0,0 +1,41 @@
+/*
+ * Geforce NV2A PGRAPH GLSL Shader Generator
+ *
+ * Copyright (c) 2013 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2020-2024 Matt Borgerson
+ *
+ * Based on:
+ * Cxbx, PixelShader.cpp
+ * Copyright (c) 2004 Aaron Robinson
+ * Kingofc
+ * Xeon, XBD3DPixelShader.cpp
+ * Copyright (c) 2003 _SF_
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see .
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_PSH_H
+#define HW_XBOX_NV2A_PGRAPH_GLSL_PSH_H
+
+#include "qemu/mstring.h"
+#include "hw/xbox/nv2a/pgraph/shaders.h"
+
+// FIXME: Move to struct
+#define PSH_UBO_BINDING 1
+#define PSH_TEX_BINDING 2
+
+MString *pgraph_gen_psh_glsl(const PshState state);
+
+#endif
diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c
new file mode 100644
index 0000000000..59749003cd
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c
@@ -0,0 +1,497 @@
+/*
+ * Geforce NV2A PGRAPH GLSL Shader Generator
+ *
+ * Copyright (c) 2015 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2020-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "hw/xbox/nv2a/pgraph/shaders.h"
+#include "common.h"
+#include "vsh-ff.h"
+
+static void append_skinning_code(MString* str, bool mix,
+ unsigned int count, const char* type,
+ const char* output, const char* input,
+ const char* matrix, const char* swizzle);
+
+void pgraph_gen_vsh_ff_glsl(const ShaderState *state, MString *header,
+ MString *body, MString *uniforms)
+{
+ int i, j;
+ const char *u = state->vulkan ? "" : "uniform "; // FIXME: Remove
+
+ /* generate vertex shader mimicking fixed function */
+ mstring_append(header,
+"#define position v0\n"
+"#define weight v1\n"
+"#define normal v2.xyz\n"
+"#define diffuse v3\n"
+"#define specular v4\n"
+"#define fogCoord v5.x\n"
+"#define pointSize v6\n"
+"#define backDiffuse v7\n"
+"#define backSpecular v8\n"
+"#define texture0 v9\n"
+"#define texture1 v10\n"
+"#define texture2 v11\n"
+"#define texture3 v12\n"
+"#define reserved1 v13\n"
+"#define reserved2 v14\n"
+"#define reserved3 v15\n"
+"\n");
+ mstring_append_fmt(uniforms,
+"%svec4 ltctxa[" stringify(NV2A_LTCTXA_COUNT) "];\n"
+"%svec4 ltctxb[" stringify(NV2A_LTCTXB_COUNT) "];\n"
+"%svec4 ltc1[" stringify(NV2A_LTC1_COUNT) "];\n", u, u, u
+);
+ mstring_append(header,
+"\n"
+GLSL_DEFINE(projectionMat, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_PMAT0))
+GLSL_DEFINE(compositeMat, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_CMAT0))
+"\n"
+GLSL_DEFINE(texPlaneS0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 0))
+GLSL_DEFINE(texPlaneT0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 1))
+GLSL_DEFINE(texPlaneR0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 2))
+GLSL_DEFINE(texPlaneQ0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 3))
+"\n"
+GLSL_DEFINE(texPlaneS1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 0))
+GLSL_DEFINE(texPlaneT1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 1))
+GLSL_DEFINE(texPlaneR1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 2))
+GLSL_DEFINE(texPlaneQ1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 3))
+"\n"
+GLSL_DEFINE(texPlaneS2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 0))
+GLSL_DEFINE(texPlaneT2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 1))
+GLSL_DEFINE(texPlaneR2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 2))
+GLSL_DEFINE(texPlaneQ2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 3))
+"\n"
+GLSL_DEFINE(texPlaneS3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 0))
+GLSL_DEFINE(texPlaneT3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 1))
+GLSL_DEFINE(texPlaneR3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 2))
+GLSL_DEFINE(texPlaneQ3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 3))
+"\n"
+GLSL_DEFINE(modelViewMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT0))
+GLSL_DEFINE(modelViewMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT1))
+GLSL_DEFINE(modelViewMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT2))
+GLSL_DEFINE(modelViewMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT3))
+"\n"
+GLSL_DEFINE(invModelViewMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT0))
+GLSL_DEFINE(invModelViewMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT1))
+GLSL_DEFINE(invModelViewMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT2))
+GLSL_DEFINE(invModelViewMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT3))
+"\n"
+GLSL_DEFINE(eyePosition, GLSL_C(NV_IGRAPH_XF_XFCTX_EYEP))
+"\n"
+"#define lightAmbientColor(i) "
+ "ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_AMB) " + (i)*6].xyz\n"
+"#define lightDiffuseColor(i) "
+ "ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_DIF) " + (i)*6].xyz\n"
+"#define lightSpecularColor(i) "
+ "ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_SPC) " + (i)*6].xyz\n"
+"\n"
+"#define lightSpotFalloff(i) "
+ "ltctxa[" stringify(NV_IGRAPH_XF_LTCTXA_L0_K) " + (i)*2].xyz\n"
+"#define lightSpotDirection(i) "
+ "ltctxa[" stringify(NV_IGRAPH_XF_LTCTXA_L0_SPT) " + (i)*2]\n"
+"\n"
+"#define lightLocalRange(i) "
+ "ltc1[" stringify(NV_IGRAPH_XF_LTC1_r0) " + (i)].x\n"
+"\n"
+GLSL_DEFINE(sceneAmbientColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_FR_AMB) ".xyz")
+GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz")
+"\n"
+);
+ mstring_append_fmt(uniforms,
+"%smat4 invViewport;\n", u);
+
+ /* Skinning */
+ unsigned int count;
+ bool mix;
+ switch (state->skinning) {
+ case SKINNING_OFF:
+ mix = false; count = 0; break;
+ case SKINNING_1WEIGHTS:
+ mix = true; count = 2; break;
+ case SKINNING_2WEIGHTS2MATRICES:
+ mix = false; count = 2; break;
+ case SKINNING_2WEIGHTS:
+ mix = true; count = 3; break;
+ case SKINNING_3WEIGHTS3MATRICES:
+ mix = false; count = 3; break;
+ case SKINNING_3WEIGHTS:
+ mix = true; count = 4; break;
+ case SKINNING_4WEIGHTS4MATRICES:
+ mix = false; count = 4; break;
+ default:
+ assert(false);
+ break;
+ }
+ mstring_append_fmt(body, "/* Skinning mode %d */\n",
+ state->skinning);
+
+ append_skinning_code(body, mix, count, "vec4",
+ "tPosition", "position",
+ "modelViewMat", "xyzw");
+ append_skinning_code(body, mix, count, "vec3",
+ "tNormal", "vec4(normal, 0.0)",
+ "invModelViewMat", "xyz");
+
+ /* Normalization */
+ if (state->normalization) {
+ mstring_append(body, "tNormal = normalize(tNormal);\n");
+ }
+
+ /* Texgen */
+ for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
+ mstring_append_fmt(body, "/* Texgen for stage %d */\n",
+ i);
+ /* Set each component individually */
+ /* FIXME: could be nicer if some channels share the same texgen */
+ for (j = 0; j < 4; j++) {
+ /* TODO: TexGen View Model missing! */
+ char c = "xyzw"[j];
+ char cSuffix = "STRQ"[j];
+ switch (state->texgen[i][j]) {
+ case TEXGEN_DISABLE:
+ mstring_append_fmt(body, "oT%d.%c = texture%d.%c;\n",
+ i, c, i, c);
+ break;
+ case TEXGEN_EYE_LINEAR:
+ mstring_append_fmt(body, "oT%d.%c = dot(texPlane%c%d, tPosition);\n",
+ i, c, cSuffix, i);
+ break;
+ case TEXGEN_OBJECT_LINEAR:
+ mstring_append_fmt(body, "oT%d.%c = dot(texPlane%c%d, position);\n",
+ i, c, cSuffix, i);
+ break;
+ case TEXGEN_SPHERE_MAP:
+ assert(j < 2); /* Channels S,T only! */
+ mstring_append(body, "{\n");
+ /* FIXME: u, r and m only have to be calculated once */
+ mstring_append(body, " vec3 u = normalize(tPosition.xyz);\n");
+ //FIXME: tNormal before or after normalization? Always normalize?
+ mstring_append(body, " vec3 r = reflect(u, tNormal);\n");
+
+ /* FIXME: This would consume 1 division fewer and *might* be
+ * faster than length:
+ * // [z=1/(2*x) => z=1/x*0.5]
+ * vec3 ro = r + vec3(0.0, 0.0, 1.0);
+ * float m = inversesqrt(dot(ro,ro))*0.5;
+ */
+
+ mstring_append(body, " float invM = 1.0 / (2.0 * length(r + vec3(0.0, 0.0, 1.0)));\n");
+ mstring_append_fmt(body, " oT%d.%c = r.%c * invM + 0.5;\n",
+ i, c, c);
+ mstring_append(body, "}\n");
+ break;
+ case TEXGEN_REFLECTION_MAP:
+ assert(j < 3); /* Channels S,T,R only! */
+ mstring_append(body, "{\n");
+ /* FIXME: u and r only have to be calculated once, can share the one from SPHERE_MAP */
+ mstring_append(body, " vec3 u = normalize(tPosition.xyz);\n");
+ mstring_append(body, " vec3 r = reflect(u, tNormal);\n");
+ mstring_append_fmt(body, " oT%d.%c = r.%c;\n",
+ i, c, c);
+ mstring_append(body, "}\n");
+ break;
+ case TEXGEN_NORMAL_MAP:
+ assert(j < 3); /* Channels S,T,R only! */
+ mstring_append_fmt(body, "oT%d.%c = tNormal.%c;\n",
+ i, c, c);
+ break;
+ default:
+ assert(false);
+ break;
+ }
+ }
+ }
+
+ /* Apply texture matrices */
+ for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
+ if (state->texture_matrix_enable[i]) {
+ mstring_append_fmt(body,
+ "oT%d = oT%d * texMat%d;\n",
+ i, i, i);
+ }
+ }
+
+ /* Lighting */
+ if (state->lighting) {
+
+ //FIXME: Do 2 passes if we want 2 sided-lighting?
+
+ static char alpha_source_diffuse[] = "diffuse.a";
+ static char alpha_source_specular[] = "specular.a";
+ static char alpha_source_material[] = "material_alpha";
+ const char *alpha_source = alpha_source_diffuse;
+ if (state->diffuse_src == MATERIAL_COLOR_SRC_MATERIAL) {
+ mstring_append_fmt(uniforms, "%sfloat material_alpha;\n", u);
+ alpha_source = alpha_source_material;
+ } else if (state->diffuse_src == MATERIAL_COLOR_SRC_SPECULAR) {
+ alpha_source = alpha_source_specular;
+ }
+
+ if (state->ambient_src == MATERIAL_COLOR_SRC_MATERIAL) {
+ mstring_append_fmt(body, "oD0 = vec4(sceneAmbientColor, %s);\n", alpha_source);
+ } else if (state->ambient_src == MATERIAL_COLOR_SRC_DIFFUSE) {
+ mstring_append_fmt(body, "oD0 = vec4(diffuse.rgb, %s);\n", alpha_source);
+ } else if (state->ambient_src == MATERIAL_COLOR_SRC_SPECULAR) {
+ mstring_append_fmt(body, "oD0 = vec4(specular.rgb, %s);\n", alpha_source);
+ }
+
+ mstring_append(body, "oD0.rgb *= materialEmissionColor.rgb;\n");
+ if (state->emission_src == MATERIAL_COLOR_SRC_MATERIAL) {
+ mstring_append(body, "oD0.rgb += sceneAmbientColor;\n");
+ } else if (state->emission_src == MATERIAL_COLOR_SRC_DIFFUSE) {
+ mstring_append(body, "oD0.rgb += diffuse.rgb;\n");
+ } else if (state->emission_src == MATERIAL_COLOR_SRC_SPECULAR) {
+ mstring_append(body, "oD0.rgb += specular.rgb;\n");
+ }
+
+ mstring_append(body, "oD1 = vec4(0.0, 0.0, 0.0, specular.a);\n");
+
+ for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
+ if (state->light[i] == LIGHT_OFF) {
+ continue;
+ }
+
+ /* FIXME: It seems that we only have to handle the surface colors if
+ * they are not part of the material [= vertex colors].
+ * If they are material the cpu will premultiply light
+ * colors
+ */
+
+ mstring_append_fmt(body, "/* Light %d */ {\n", i);
+
+ if (state->light[i] == LIGHT_LOCAL
+ || state->light[i] == LIGHT_SPOT) {
+
+ mstring_append_fmt(uniforms,
+ "%svec3 lightLocalPosition%d;\n"
+ "%svec3 lightLocalAttenuation%d;\n",
+ u, i, u, i);
+ mstring_append_fmt(body,
+ " vec3 VP = lightLocalPosition%d - tPosition.xyz/tPosition.w;\n"
+ " float d = length(VP);\n"
+//FIXME: if (d > lightLocalRange) { .. don't process this light .. } /* inclusive?! */ - what about directional lights?
+ " VP = normalize(VP);\n"
+ " float attenuation = 1.0 / (lightLocalAttenuation%d.x\n"
+ " + lightLocalAttenuation%d.y * d\n"
+ " + lightLocalAttenuation%d.z * d * d);\n"
+ " vec3 halfVector = normalize(VP + eyePosition.xyz / eyePosition.w);\n" /* FIXME: Not sure if eyePosition is correct */
+ " float nDotVP = max(0.0, dot(tNormal, VP));\n"
+ " float nDotHV = max(0.0, dot(tNormal, halfVector));\n",
+ i, i, i, i);
+
+ }
+
+ switch(state->light[i]) {
+ case LIGHT_INFINITE:
+
+ /* lightLocalRange will be 1e+30 here */
+
+ mstring_append_fmt(uniforms,
+ "%svec3 lightInfiniteHalfVector%d;\n"
+ "%svec3 lightInfiniteDirection%d;\n",
+ u, i, u, i);
+ mstring_append_fmt(body,
+ " float attenuation = 1.0;\n"
+ " float nDotVP = max(0.0, dot(tNormal, normalize(vec3(lightInfiniteDirection%d))));\n"
+ " float nDotHV = max(0.0, dot(tNormal, vec3(lightInfiniteHalfVector%d)));\n",
+ i, i);
+
+ /* FIXME: Do specular */
+
+ /* FIXME: tBackDiffuse */
+
+ break;
+ case LIGHT_LOCAL:
+ /* Everything done already */
+ break;
+ case LIGHT_SPOT:
+ /* https://docs.microsoft.com/en-us/windows/win32/direct3d9/attenuation-and-spotlight-factor#spotlight-factor */
+ mstring_append_fmt(body,
+ " vec4 spotDir = lightSpotDirection(%d);\n"
+ " float invScale = 1/length(spotDir.xyz);\n"
+ " float cosHalfPhi = -invScale*spotDir.w;\n"
+ " float cosHalfTheta = invScale + cosHalfPhi;\n"
+ " float spotDirDotVP = dot(spotDir.xyz, VP);\n"
+ " float rho = invScale*spotDirDotVP;\n"
+ " if (rho > cosHalfTheta) {\n"
+ " } else if (rho <= cosHalfPhi) {\n"
+ " attenuation = 0.0;\n"
+ " } else {\n"
+ " attenuation *= spotDirDotVP + spotDir.w;\n" /* FIXME: lightSpotFalloff */
+ " }\n",
+ i);
+ break;
+ default:
+ assert(false);
+ break;
+ }
+
+ mstring_append_fmt(body,
+ " float pf;\n"
+ " if (nDotVP == 0.0) {\n"
+ " pf = 0.0;\n"
+ " } else {\n"
+ " pf = pow(nDotHV, /* specular(l, m, n, l1, m1, n1) */ 0.001);\n"
+ " }\n"
+ " vec3 lightAmbient = lightAmbientColor(%d) * attenuation;\n"
+ " vec3 lightDiffuse = lightDiffuseColor(%d) * attenuation * nDotVP;\n"
+ " vec3 lightSpecular = lightSpecularColor(%d) * pf;\n",
+ i, i, i);
+
+ mstring_append(body,
+ " oD0.xyz += lightAmbient;\n");
+
+ switch (state->diffuse_src) {
+ case MATERIAL_COLOR_SRC_MATERIAL:
+ mstring_append(body,
+ " oD0.xyz += lightDiffuse;\n");
+ break;
+ case MATERIAL_COLOR_SRC_DIFFUSE:
+ mstring_append(body,
+ " oD0.xyz += diffuse.xyz * lightDiffuse;\n");
+ break;
+ case MATERIAL_COLOR_SRC_SPECULAR:
+ mstring_append(body,
+ " oD0.xyz += specular.xyz * lightDiffuse;\n");
+ break;
+ }
+
+ mstring_append(body,
+ " oD1.xyz += specular.xyz * lightSpecular;\n");
+
+ mstring_append(body, "}\n");
+ }
+ } else {
+ mstring_append(body, " oD0 = diffuse;\n");
+ mstring_append(body, " oD1 = specular;\n");
+ }
+ mstring_append(body, " oB0 = backDiffuse;\n");
+ mstring_append(body, " oB1 = backSpecular;\n");
+
+ /* Fog */
+ if (state->fog_enable) {
+
+ /* From: https://www.opengl.org/registry/specs/NV/fog_distance.txt */
+ switch(state->foggen) {
+ case FOGGEN_SPEC_ALPHA:
+ /* FIXME: Do we have to clamp here? */
+ mstring_append(body, " float fogDistance = clamp(specular.a, 0.0, 1.0);\n");
+ break;
+ case FOGGEN_RADIAL:
+ mstring_append(body, " float fogDistance = length(tPosition.xyz);\n");
+ break;
+ case FOGGEN_PLANAR:
+ case FOGGEN_ABS_PLANAR:
+ mstring_append(body, " float fogDistance = dot(fogPlane.xyz, tPosition.xyz) + fogPlane.w;\n");
+ if (state->foggen == FOGGEN_ABS_PLANAR) {
+ mstring_append(body, " fogDistance = abs(fogDistance);\n");
+ }
+ break;
+ case FOGGEN_FOG_X:
+ mstring_append(body, " float fogDistance = fogCoord;\n");
+ break;
+ default:
+ assert(false);
+ break;
+ }
+
+ }
+
+ /* If skinning is off the composite matrix already includes the MV matrix */
+ if (state->skinning == SKINNING_OFF) {
+ mstring_append(body, " tPosition = position;\n");
+ }
+
+ mstring_append(body,
+ " oPos = invViewport * (tPosition * compositeMat);\n"
+ );
+
+ if (state->vulkan) {
+ mstring_append(body, " oPos.y *= -1;\n");
+ } else {
+ mstring_append(body, " oPos.z = oPos.z * 2.0 - oPos.w;\n");
+ }
+
+ /* FIXME: Testing */
+ if (state->point_params_enable) {
+ mstring_append_fmt(
+ body,
+ " float d_e = length(position * modelViewMat0);\n"
+ " oPts.x = 1/sqrt(%f + %f*d_e + %f*d_e*d_e) + %f;\n",
+ state->point_params[0], state->point_params[1], state->point_params[2],
+ state->point_params[6]);
+ mstring_append_fmt(body, " oPts.x = min(oPts.x*%f + %f, 64.0) * %d;\n",
+ state->point_params[3], state->point_params[7],
+ state->surface_scale_factor);
+ } else {
+ mstring_append_fmt(body, " oPts.x = %f * %d;\n", state->point_size,
+ state->surface_scale_factor);
+ }
+
+ mstring_append(body,
+ " if (oPos.w == 0.0 || isinf(oPos.w)) {\n"
+ " vtx_inv_w = 1.0;\n"
+ " } else {\n"
+ " vtx_inv_w = 1.0 / oPos.w;\n"
+ " }\n"
+ " vtx_inv_w_flat = vtx_inv_w;\n");
+}
+
+static void append_skinning_code(MString* str, bool mix,
+ unsigned int count, const char* type,
+ const char* output, const char* input,
+ const char* matrix, const char* swizzle)
+{
+ if (count == 0) {
+ mstring_append_fmt(str, "%s %s = (%s * %s0).%s;\n",
+ type, output, input, matrix, swizzle);
+ } else {
+ mstring_append_fmt(str, "%s %s = %s(0.0);\n", type, output, type);
+ if (mix) {
+ /* Generated final weight (like GL_WEIGHT_SUM_UNITY_ARB) */
+ mstring_append(str, "{\n"
+ " float weight_i;\n"
+ " float weight_n = 1.0;\n");
+ int i;
+ for (i = 0; i < count; i++) {
+ if (i < (count - 1)) {
+ char c = "xyzw"[i];
+ mstring_append_fmt(str, " weight_i = weight.%c;\n"
+ " weight_n -= weight_i;\n",
+ c);
+ } else {
+ mstring_append(str, " weight_i = weight_n;\n");
+ }
+ mstring_append_fmt(str, " %s += (%s * %s%d).%s * weight_i;\n",
+ output, input, matrix, i, swizzle);
+ }
+ mstring_append(str, "}\n");
+ } else {
+ /* Individual weights */
+ int i;
+ for (i = 0; i < count; i++) {
+ char c = "xyzw"[i];
+ mstring_append_fmt(str, "%s += (%s * %s%d).%s * weight.%c;\n",
+ output, input, matrix, i, swizzle, c);
+ }
+ }
+ }
+}
diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.h b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.h
new file mode 100644
index 0000000000..949bf54252
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.h
@@ -0,0 +1,31 @@
+/*
+ * Geforce NV2A PGRAPH GLSL Shader Generator
+ *
+ * Copyright (c) 2015 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2020-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_VSH_FF_H
+#define HW_XBOX_NV2A_PGRAPH_GLSL_VSH_FF_H
+
+#include "qemu/mstring.h"
+#include "hw/xbox/nv2a/pgraph/shaders.h"
+
+void pgraph_gen_vsh_ff_glsl(const ShaderState *state, MString *header,
+ MString *body, MString *uniforms);
+
+#endif
diff --git a/hw/xbox/nv2a/vsh.c b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c
similarity index 97%
rename from hw/xbox/nv2a/vsh.c
rename to hw/xbox/nv2a/pgraph/glsl/vsh-prog.c
index 0e4cf314bc..7bebed71e8 100644
--- a/hw/xbox/nv2a/vsh.c
+++ b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c
@@ -1,5 +1,5 @@
/*
- * QEMU Geforce NV2A vertex shader translation
+ * Geforce NV2A PGRAPH GLSL Shader Generator
*
* Copyright (c) 2014 Jannik Vogel
* Copyright (c) 2012 espes
@@ -32,8 +32,9 @@
#include
#include
-#include "shaders_common.h"
-#include "vsh.h"
+#include "hw/xbox/nv2a/pgraph/vsh.h"
+#include "common.h"
+#include "vsh-prog.h"
#define VSH_D3DSCM_CORRECTION 96
@@ -794,10 +795,11 @@ static const char* vsh_header =
" return t;\n"
"}\n";
-void vsh_translate(uint16_t version,
+void pgraph_gen_vsh_prog_glsl(uint16_t version,
const uint32_t *tokens,
unsigned int length,
bool z_perspective,
+ bool vulkan,
MString *header, MString *body)
{
@@ -843,14 +845,30 @@ void vsh_translate(uint16_t version,
* TODO: the pixel-center co-ordinate differences should handled
*/
" oPos.x = 2.0 * (oPos.x - surfaceSize.x * 0.5) / surfaceSize.x;\n"
- " oPos.y = -2.0 * (oPos.y - surfaceSize.y * 0.5) / surfaceSize.y;\n"
- );
+ );
+
+ if (vulkan) {
+ mstring_append(body,
+ " oPos.y = 2.0 * oPos.y / surfaceSize.y - 1.0;\n");
+ } else {
+ mstring_append(body, " oPos.y = -2.0 * (oPos.y - surfaceSize.y * 0.5) "
+ "/ surfaceSize.y;\n");
+ }
+
if (z_perspective) {
mstring_append(body, " oPos.z = oPos.w;\n");
}
+
+ mstring_append(body,
+ " if (clipRange.y != clipRange.x) {\n");
+ if (vulkan) {
+ mstring_append(body, " oPos.z /= clipRange.y;\n");
+ } else {
+ mstring_append(body,
+ " oPos.z = (oPos.z - clipRange.x)/(0.5*(clipRange.y "
+ "- clipRange.x)) - 1;\n");
+ }
mstring_append(body,
- " if (clipRange.y != clipRange.x) {\n"
- " oPos.z = (oPos.z - clipRange.x)/(0.5*(clipRange.y - clipRange.x)) - 1;\n"
" }\n"
/* Correct for the perspective divide */
diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.h b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.h
new file mode 100644
index 0000000000..84d8141c5e
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.h
@@ -0,0 +1,35 @@
+/*
+ * Geforce NV2A PGRAPH GLSL Shader Generator
+ *
+ * Copyright (c) 2014 Jannik Vogel
+ * Copyright (c) 2012 espes
+ *
+ * Based on:
+ * Cxbx, VertexShader.cpp
+ * Copyright (c) 2004 Aaron Robinson
+ * Kingofc
+ * Dxbx, uPushBuffer.pas
+ * Copyright (c) 2007 Shadow_tj, PatrickvL
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see .
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_VSH_PROG_H
+#define HW_XBOX_NV2A_PGRAPH_GLSL_VSH_PROG_H
+
+void pgraph_gen_vsh_prog_glsl(uint16_t version, const uint32_t *tokens,
+ unsigned int length, bool z_perspective,
+ bool vulkan, MString *header, MString *body);
+
+#endif
diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh.c b/hw/xbox/nv2a/pgraph/glsl/vsh.c
new file mode 100644
index 0000000000..4fcc09cac5
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/glsl/vsh.c
@@ -0,0 +1,274 @@
+/*
+ * Geforce NV2A PGRAPH GLSL Shader Generator
+ *
+ * Copyright (c) 2015 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2020-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "hw/xbox/nv2a/pgraph/shaders.h"
+#include "common.h"
+#include "vsh.h"
+#include "vsh-ff.h"
+#include "vsh-prog.h"
+#include
+
+MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs)
+{
+ int i;
+ MString *output = mstring_new();
+ mstring_append_fmt(output, "#version %d\n\n", state->vulkan ? 450 : 400);
+
+ MString *header = mstring_from_str("");
+
+ MString *uniforms = mstring_from_str("");
+
+ const char *u = state->vulkan ? "" : "uniform "; // FIXME: Remove
+
+ mstring_append_fmt(uniforms,
+ "%svec4 clipRange;\n"
+ "%svec2 surfaceSize;\n"
+ "%svec4 c[" stringify(NV2A_VERTEXSHADER_CONSTANTS) "];\n"
+ "%svec2 fogParam;\n",
+ u, u, u, u
+ );
+
+ mstring_append(header,
+ GLSL_DEFINE(fogPlane, GLSL_C(NV_IGRAPH_XF_XFCTX_FOG))
+ GLSL_DEFINE(texMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T0MAT))
+ GLSL_DEFINE(texMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T1MAT))
+ GLSL_DEFINE(texMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T2MAT))
+ GLSL_DEFINE(texMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T3MAT))
+
+ "\n"
+ "vec4 oPos = vec4(0.0,0.0,0.0,1.0);\n"
+ "vec4 oD0 = vec4(0.0,0.0,0.0,1.0);\n"
+ "vec4 oD1 = vec4(0.0,0.0,0.0,1.0);\n"
+ "vec4 oB0 = vec4(0.0,0.0,0.0,1.0);\n"
+ "vec4 oB1 = vec4(0.0,0.0,0.0,1.0);\n"
+ "vec4 oPts = vec4(0.0,0.0,0.0,1.0);\n"
+ "vec4 oFog = vec4(0.0,0.0,0.0,1.0);\n"
+ "vec4 oT0 = vec4(0.0,0.0,0.0,1.0);\n"
+ "vec4 oT1 = vec4(0.0,0.0,0.0,1.0);\n"
+ "vec4 oT2 = vec4(0.0,0.0,0.0,1.0);\n"
+ "vec4 oT3 = vec4(0.0,0.0,0.0,1.0);\n"
+ "\n"
+ "vec4 decompress_11_11_10(int cmp) {\n"
+ " float x = float(bitfieldExtract(cmp, 0, 11)) / 1023.0;\n"
+ " float y = float(bitfieldExtract(cmp, 11, 11)) / 1023.0;\n"
+ " float z = float(bitfieldExtract(cmp, 22, 10)) / 511.0;\n"
+ " return vec4(x, y, z, 1);\n"
+ "}\n");
+
+ pgraph_get_glsl_vtx_header(header, state->vulkan, state->smooth_shading,
+ false, prefix_outputs, false);
+
+ if (prefix_outputs) {
+ mstring_append(header,
+ "#define vtx_inv_w v_vtx_inv_w\n"
+ "#define vtx_inv_w_flat v_vtx_inv_w_flat\n"
+ "#define vtxD0 v_vtxD0\n"
+ "#define vtxD1 v_vtxD1\n"
+ "#define vtxB0 v_vtxB0\n"
+ "#define vtxB1 v_vtxB1\n"
+ "#define vtxFog v_vtxFog\n"
+ "#define vtxT0 v_vtxT0\n"
+ "#define vtxT1 v_vtxT1\n"
+ "#define vtxT2 v_vtxT2\n"
+ "#define vtxT3 v_vtxT3\n"
+ );
+ }
+ mstring_append(header, "\n");
+ for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+
+ bool is_uniform = state->uniform_attrs & (1 << i);
+ bool is_compressed = state->compressed_attrs & (1 << i);
+
+ assert(!(is_uniform && is_compressed));
+
+ if (is_uniform) {
+ mstring_append_fmt(header, "vec4 v%d = inlineValue[%d];\n", i, i);
+ } else {
+ if (state->compressed_attrs & (1 << i)) {
+ mstring_append_fmt(header,
+ "layout(location = %d) in int v%d_cmp;\n", i, i);
+ } else if (state->swizzle_attrs & (1 << i)) {
+ mstring_append_fmt(header, "layout(location = %d) in vec4 v%d_sw;\n",
+ i, i);
+ } else {
+ mstring_append_fmt(header, "layout(location = %d) in vec4 v%d;\n",
+ i, i);
+ }
+ }
+ }
+ mstring_append(header, "\n");
+
+ MString *body = mstring_from_str("void main() {\n");
+
+ for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+ if (state->compressed_attrs & (1 << i)) {
+ mstring_append_fmt(
+ body, "vec4 v%d = decompress_11_11_10(v%d_cmp);\n", i, i);
+ }
+
+ if (state->swizzle_attrs & (1 << i)) {
+ mstring_append_fmt(body, "vec4 v%d = v%d_sw.bgra;\n", i, i);
+ }
+
+ }
+
+ if (state->fixed_function) {
+ pgraph_gen_vsh_ff_glsl(state, header, body, uniforms);
+ } else if (state->vertex_program) {
+ pgraph_gen_vsh_prog_glsl(VSH_VERSION_XVS,
+ (uint32_t *)state->program_data,
+ state->program_length, state->z_perspective,
+ state->vulkan, header, body);
+ } else {
+ assert(false);
+ }
+
+
+ /* Fog */
+
+ if (state->fog_enable) {
+
+ if (state->vertex_program) {
+ /* FIXME: Does foggen do something here? Let's do some tracking..
+ *
+ * "RollerCoaster Tycoon" has
+ * state->vertex_program = true; state->foggen == FOGGEN_PLANAR
+ * but expects oFog.x as fogdistance?! Writes oFog.xyzw = v0.z
+ */
+ mstring_append(body, " float fogDistance = oFog.x;\n");
+ }
+
+ /* FIXME: Do this per pixel? */
+
+ switch (state->fog_mode) {
+ case FOG_MODE_LINEAR:
+ case FOG_MODE_LINEAR_ABS:
+
+ /* f = (end - d) / (end - start)
+ * fogParam.y = -1 / (end - start)
+ * fogParam.x = 1 - end * fogParam.y;
+ */
+
+ mstring_append(body,
+ " if (isinf(fogDistance)) {\n"
+ " fogDistance = 0.0;\n"
+ " }\n"
+ );
+ mstring_append(body, " float fogFactor = fogParam.x + fogDistance * fogParam.y;\n");
+ mstring_append(body, " fogFactor -= 1.0;\n");
+ break;
+ case FOG_MODE_EXP:
+ mstring_append(body,
+ " if (isinf(fogDistance)) {\n"
+ " fogDistance = 0.0;\n"
+ " }\n"
+ );
+ /* fallthru */
+ case FOG_MODE_EXP_ABS:
+
+ /* f = 1 / (e^(d * density))
+ * fogParam.y = -density / (2 * ln(256))
+ * fogParam.x = 1.5
+ */
+
+ mstring_append(body, " float fogFactor = fogParam.x + exp2(fogDistance * fogParam.y * 16.0);\n");
+ mstring_append(body, " fogFactor -= 1.5;\n");
+ break;
+ case FOG_MODE_EXP2:
+ case FOG_MODE_EXP2_ABS:
+
+ /* f = 1 / (e^((d * density)^2))
+ * fogParam.y = -density / (2 * sqrt(ln(256)))
+ * fogParam.x = 1.5
+ */
+
+ mstring_append(body, " float fogFactor = fogParam.x + exp2(-fogDistance * fogDistance * fogParam.y * fogParam.y * 32.0);\n");
+ mstring_append(body, " fogFactor -= 1.5;\n");
+ break;
+ default:
+ assert(false);
+ break;
+ }
+ /* Calculate absolute for the modes which need it */
+ switch (state->fog_mode) {
+ case FOG_MODE_LINEAR_ABS:
+ case FOG_MODE_EXP_ABS:
+ case FOG_MODE_EXP2_ABS:
+ mstring_append(body, " fogFactor = abs(fogFactor);\n");
+ break;
+ default:
+ break;
+ }
+
+ mstring_append(body, " oFog.xyzw = vec4(fogFactor);\n");
+ } else {
+ /* FIXME: Is the fog still calculated / passed somehow?!
+ */
+ mstring_append(body, " oFog.xyzw = vec4(1.0);\n");
+ }
+
+ /* Set outputs */
+ const char *shade_model_mult = state->smooth_shading ? "vtx_inv_w" : "vtx_inv_w_flat";
+ mstring_append_fmt(body, "\n"
+ " vtxD0 = clamp(oD0, 0.0, 1.0) * %s;\n"
+ " vtxD1 = clamp(oD1, 0.0, 1.0) * %s;\n"
+ " vtxB0 = clamp(oB0, 0.0, 1.0) * %s;\n"
+ " vtxB1 = clamp(oB1, 0.0, 1.0) * %s;\n"
+ " vtxFog = oFog.x * vtx_inv_w;\n"
+ " vtxT0 = oT0 * vtx_inv_w;\n"
+ " vtxT1 = oT1 * vtx_inv_w;\n"
+ " vtxT2 = oT2 * vtx_inv_w;\n"
+ " vtxT3 = oT3 * vtx_inv_w;\n"
+ " gl_Position = oPos;\n"
+ " gl_PointSize = oPts.x;\n"
+ // " gl_ClipDistance[0] = oPos.z - oPos.w*clipRange.z;\n" // Near
+ // " gl_ClipDistance[1] = oPos.w*clipRange.w - oPos.z;\n" // Far
+ "\n"
+ "}\n",
+ shade_model_mult,
+ shade_model_mult,
+ shade_model_mult,
+ shade_model_mult);
+
+
+ /* Return combined header + source */
+ if (state->vulkan) {
+ mstring_append_fmt(
+ output, "layout(binding = %d, std140) uniform VshUniforms {\n%s};\n\n",
+ VSH_UBO_BINDING, mstring_get_str(uniforms));
+ // FIXME: Only needed for vk, for gl we use glVertexAttrib
+ mstring_append_fmt(output,
+ "layout(push_constant) uniform PushConstants {\n"
+ "vec4 inlineValue[" stringify(NV2A_VERTEXSHADER_ATTRIBUTES) "];\n"
+ "};\n\n");
+ } else {
+ mstring_append(
+ output, mstring_get_str(uniforms));
+ }
+
+ mstring_append(output, mstring_get_str(header));
+ mstring_unref(header);
+
+ mstring_append(output, mstring_get_str(body));
+ mstring_unref(body);
+ return output;
+}
diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh.h b/hw/xbox/nv2a/pgraph/glsl/vsh.h
new file mode 100644
index 0000000000..584e1997e3
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/glsl/vsh.h
@@ -0,0 +1,33 @@
+/*
+ * Geforce NV2A PGRAPH GLSL Shader Generator
+ *
+ * Copyright (c) 2015 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2020-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_VSH_H
+#define HW_XBOX_NV2A_PGRAPH_GLSL_VSH_H
+
+#include "qemu/mstring.h"
+#include "hw/xbox/nv2a/pgraph/shaders.h"
+
+// FIXME: Move to struct
+#define VSH_UBO_BINDING 0
+
+MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs);
+
+#endif
diff --git a/hw/xbox/nv2a/pgraph/meson.build b/hw/xbox/nv2a/pgraph/meson.build
new file mode 100644
index 0000000000..5b8bc181c3
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/meson.build
@@ -0,0 +1,19 @@
+specific_ss.add(files(
+ 'pgraph.c',
+ 'profile.c',
+ 'rdi.c',
+ 's3tc.c',
+ 'shaders.c',
+ 'swizzle.c',
+ 'texture.c',
+ 'vertex.c',
+ ))
+if have_renderdoc
+ specific_ss.add(files('debug_renderdoc.c'))
+endif
+subdir('thirdparty')
+subdir('null')
+subdir('gl')
+subdir('glsl')
+subdir('vk')
+specific_ss.add(nv2a_vsh_cpu)
diff --git a/hw/xbox/nv2a/pgraph_methods.h b/hw/xbox/nv2a/pgraph/methods.h
similarity index 100%
rename from hw/xbox/nv2a/pgraph_methods.h
rename to hw/xbox/nv2a/pgraph/methods.h
diff --git a/hw/xbox/nv2a/pgraph/null/meson.build b/hw/xbox/nv2a/pgraph/null/meson.build
new file mode 100644
index 0000000000..e2731a13d9
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/null/meson.build
@@ -0,0 +1,3 @@
+specific_ss.add([sdl, files(
+ 'renderer.c',
+ )])
diff --git a/hw/xbox/nv2a/pgraph/null/renderer.c b/hw/xbox/nv2a/pgraph/null/renderer.c
new file mode 100644
index 0000000000..9a9c2512cc
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/null/renderer.c
@@ -0,0 +1,146 @@
+/*
+ * Geforce NV2A PGRAPH Null Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/thread.h"
+#include "hw/hw.h"
+#include "hw/xbox/nv2a/nv2a_int.h"
+
+static void pgraph_null_sync(NV2AState *d)
+{
+ qatomic_set(&d->pgraph.sync_pending, false);
+ qemu_event_set(&d->pgraph.sync_complete);
+}
+
+static void pgraph_null_flush(NV2AState *d)
+{
+ qatomic_set(&d->pgraph.flush_pending, false);
+ qemu_event_set(&d->pgraph.flush_complete);
+}
+
+static void pgraph_null_process_pending(NV2AState *d)
+{
+ if (
+ qatomic_read(&d->pgraph.sync_pending) ||
+ qatomic_read(&d->pgraph.flush_pending)
+ ) {
+ qemu_mutex_unlock(&d->pfifo.lock);
+ qemu_mutex_lock(&d->pgraph.lock);
+ if (qatomic_read(&d->pgraph.sync_pending)) {
+ pgraph_null_sync(d);
+ }
+ if (qatomic_read(&d->pgraph.flush_pending)) {
+ pgraph_null_flush(d);
+ }
+ qemu_mutex_unlock(&d->pgraph.lock);
+ qemu_mutex_lock(&d->pfifo.lock);
+ }
+}
+
+static void pgraph_null_clear_report_value(NV2AState *d)
+{
+}
+
+static void pgraph_null_clear_surface(NV2AState *d, uint32_t parameter)
+{
+}
+
+static void pgraph_null_draw_begin(NV2AState *d)
+{
+}
+
+static void pgraph_null_draw_end(NV2AState *d)
+{
+}
+
+static void pgraph_null_flip_stall(NV2AState *d)
+{
+}
+
+static void pgraph_null_flush_draw(NV2AState *d)
+{
+}
+
+static void pgraph_null_get_report(NV2AState *d, uint32_t parameter)
+{
+ pgraph_write_zpass_pixel_cnt_report(d, parameter, 0);
+}
+
+static void pgraph_null_image_blit(NV2AState *d)
+{
+}
+
+static void pgraph_null_pre_savevm_trigger(NV2AState *d)
+{
+}
+
+static void pgraph_null_pre_savevm_wait(NV2AState *d)
+{
+}
+
+static void pgraph_null_pre_shutdown_trigger(NV2AState *d)
+{
+}
+
+static void pgraph_null_pre_shutdown_wait(NV2AState *d)
+{
+}
+
+static void pgraph_null_process_pending_reports(NV2AState *d)
+{
+}
+
+static void pgraph_null_surface_update(NV2AState *d, bool upload,
+ bool color_write, bool zeta_write)
+{
+}
+
+static void pgraph_null_init(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ pg->null_renderer_state = NULL;
+}
+
+static PGRAPHRenderer pgraph_null_renderer = {
+ .type = CONFIG_DISPLAY_RENDERER_NULL,
+ .name = "Null",
+ .ops = {
+ .init = pgraph_null_init,
+ .clear_report_value = pgraph_null_clear_report_value,
+ .clear_surface = pgraph_null_clear_surface,
+ .draw_begin = pgraph_null_draw_begin,
+ .draw_end = pgraph_null_draw_end,
+ .flip_stall = pgraph_null_flip_stall,
+ .flush_draw = pgraph_null_flush_draw,
+ .get_report = pgraph_null_get_report,
+ .image_blit = pgraph_null_image_blit,
+ .pre_savevm_trigger = pgraph_null_pre_savevm_trigger,
+ .pre_savevm_wait = pgraph_null_pre_savevm_wait,
+ .pre_shutdown_trigger = pgraph_null_pre_shutdown_trigger,
+ .pre_shutdown_wait = pgraph_null_pre_shutdown_wait,
+ .process_pending = pgraph_null_process_pending,
+ .process_pending_reports = pgraph_null_process_pending_reports,
+ .surface_update = pgraph_null_surface_update,
+ }
+};
+
+static void __attribute__((constructor)) register_renderer(void)
+{
+ pgraph_renderer_register(&pgraph_null_renderer);
+}
diff --git a/hw/xbox/nv2a/pgraph/pgraph.c b/hw/xbox/nv2a/pgraph/pgraph.c
new file mode 100644
index 0000000000..0062efa15f
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/pgraph.c
@@ -0,0 +1,2874 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "../nv2a_int.h"
+#include "ui/xemu-settings.h"
+#include "util.h"
+#include "swizzle.h"
+#include "nv2a_vsh_emulator.h"
+
+#define PG_GET_MASK(reg, mask) GET_MASK(pgraph_reg_r(pg, reg), mask)
+#define PG_SET_MASK(reg, mask, value) \
+ do { \
+ uint32_t rv = pgraph_reg_r(pg, reg); \
+ SET_MASK(rv, mask, value); \
+ pgraph_reg_w(pg, reg, rv); \
+ } while (0)
+
+
+NV2AState *g_nv2a;
+
+uint64_t pgraph_read(void *opaque, hwaddr addr, unsigned int size)
+{
+ NV2AState *d = (NV2AState *)opaque;
+ PGRAPHState *pg = &d->pgraph;
+
+ qemu_mutex_lock(&pg->lock);
+
+ uint64_t r = 0;
+ switch (addr) {
+ case NV_PGRAPH_INTR:
+ r = pg->pending_interrupts;
+ break;
+ case NV_PGRAPH_INTR_EN:
+ r = pg->enabled_interrupts;
+ break;
+ case NV_PGRAPH_RDI_DATA: {
+ unsigned int select = PG_GET_MASK(NV_PGRAPH_RDI_INDEX,
+ NV_PGRAPH_RDI_INDEX_SELECT);
+ unsigned int address = PG_GET_MASK(NV_PGRAPH_RDI_INDEX,
+ NV_PGRAPH_RDI_INDEX_ADDRESS);
+
+ r = pgraph_rdi_read(pg, select, address);
+
+ /* FIXME: Overflow into select? */
+ assert(address < GET_MASK(NV_PGRAPH_RDI_INDEX_ADDRESS,
+ NV_PGRAPH_RDI_INDEX_ADDRESS));
+ PG_SET_MASK(NV_PGRAPH_RDI_INDEX,
+ NV_PGRAPH_RDI_INDEX_ADDRESS, address + 1);
+ break;
+ }
+ default:
+ r = pgraph_reg_r(pg, addr);
+ break;
+ }
+
+ qemu_mutex_unlock(&pg->lock);
+
+ nv2a_reg_log_read(NV_PGRAPH, addr, size, r);
+ return r;
+}
+
+void pgraph_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size)
+{
+ NV2AState *d = (NV2AState *)opaque;
+ PGRAPHState *pg = &d->pgraph;
+
+ nv2a_reg_log_write(NV_PGRAPH, addr, size, val);
+
+ qemu_mutex_lock(&d->pfifo.lock); // FIXME: Factor out fifo lock here
+ qemu_mutex_lock(&pg->lock);
+
+ switch (addr) {
+ case NV_PGRAPH_INTR:
+ pg->pending_interrupts &= ~val;
+
+ if (!(pg->pending_interrupts & NV_PGRAPH_INTR_ERROR)) {
+ pg->waiting_for_nop = false;
+ }
+ if (!(pg->pending_interrupts & NV_PGRAPH_INTR_CONTEXT_SWITCH)) {
+ pg->waiting_for_context_switch = false;
+ }
+ pfifo_kick(d);
+ break;
+ case NV_PGRAPH_INTR_EN:
+ pg->enabled_interrupts = val;
+ break;
+ case NV_PGRAPH_INCREMENT:
+ if (val & NV_PGRAPH_INCREMENT_READ_3D) {
+ PG_SET_MASK(NV_PGRAPH_SURFACE,
+ NV_PGRAPH_SURFACE_READ_3D,
+ (PG_GET_MASK(NV_PGRAPH_SURFACE,
+ NV_PGRAPH_SURFACE_READ_3D)+1)
+ % PG_GET_MASK(NV_PGRAPH_SURFACE,
+ NV_PGRAPH_SURFACE_MODULO_3D) );
+ nv2a_profile_increment();
+ pfifo_kick(d);
+ }
+ break;
+ case NV_PGRAPH_RDI_DATA: {
+ unsigned int select = PG_GET_MASK(NV_PGRAPH_RDI_INDEX,
+ NV_PGRAPH_RDI_INDEX_SELECT);
+ unsigned int address = PG_GET_MASK(NV_PGRAPH_RDI_INDEX,
+ NV_PGRAPH_RDI_INDEX_ADDRESS);
+
+ pgraph_rdi_write(pg, select, address, val);
+
+ /* FIXME: Overflow into select? */
+ assert(address < GET_MASK(NV_PGRAPH_RDI_INDEX_ADDRESS,
+ NV_PGRAPH_RDI_INDEX_ADDRESS));
+ PG_SET_MASK(NV_PGRAPH_RDI_INDEX,
+ NV_PGRAPH_RDI_INDEX_ADDRESS, address + 1);
+ break;
+ }
+ case NV_PGRAPH_CHANNEL_CTX_TRIGGER: {
+ hwaddr context_address =
+ PG_GET_MASK(NV_PGRAPH_CHANNEL_CTX_POINTER,
+ NV_PGRAPH_CHANNEL_CTX_POINTER_INST) << 4;
+
+ if (val & NV_PGRAPH_CHANNEL_CTX_TRIGGER_READ_IN) {
+#ifdef DEBUG_NV2A
+ unsigned pgraph_channel_id =
+ PG_GET_MASK(NV_PGRAPH_CTX_USER, NV_PGRAPH_CTX_USER_CHID);
+#endif
+ NV2A_DPRINTF("PGRAPH: read channel %d context from %" HWADDR_PRIx "\n",
+ pgraph_channel_id, context_address);
+
+ assert(context_address < memory_region_size(&d->ramin));
+
+ uint8_t *context_ptr = d->ramin_ptr + context_address;
+ uint32_t context_user = ldl_le_p((uint32_t*)context_ptr);
+
+ NV2A_DPRINTF(" - CTX_USER = 0x%x\n", context_user);
+
+ pgraph_reg_w(pg, NV_PGRAPH_CTX_USER, context_user);
+ // pgraph_set_context_user(d, context_user);
+ }
+ if (val & NV_PGRAPH_CHANNEL_CTX_TRIGGER_WRITE_OUT) {
+ /* do stuff ... */
+ }
+
+ break;
+ }
+ default:
+ pgraph_reg_w(pg, addr, val);
+ break;
+ }
+
+ // events
+ switch (addr) {
+ case NV_PGRAPH_FIFO:
+ pfifo_kick(d);
+ break;
+ }
+
+ qemu_mutex_unlock(&pg->lock);
+ qemu_mutex_unlock(&d->pfifo.lock);
+}
+
+void pgraph_context_switch(NV2AState *d, unsigned int channel_id)
+{
+ PGRAPHState *pg = &d->pgraph;
+
+ bool channel_valid =
+ pgraph_reg_r(pg, NV_PGRAPH_CTX_CONTROL) & NV_PGRAPH_CTX_CONTROL_CHID;
+ unsigned pgraph_channel_id =
+ PG_GET_MASK(NV_PGRAPH_CTX_USER, NV_PGRAPH_CTX_USER_CHID);
+
+ bool valid = channel_valid && pgraph_channel_id == channel_id;
+ if (!valid) {
+ PG_SET_MASK(NV_PGRAPH_TRAPPED_ADDR,
+ NV_PGRAPH_TRAPPED_ADDR_CHID, channel_id);
+
+ NV2A_DPRINTF("pgraph switching to ch %d\n", channel_id);
+
+ /* TODO: hardware context switching */
+ assert(!PG_GET_MASK(NV_PGRAPH_DEBUG_3,
+ NV_PGRAPH_DEBUG_3_HW_CONTEXT_SWITCH));
+
+ pg->waiting_for_context_switch = true;
+ qemu_mutex_unlock(&pg->lock);
+ qemu_mutex_lock_iothread();
+ pg->pending_interrupts |= NV_PGRAPH_INTR_CONTEXT_SWITCH;
+ nv2a_update_irq(d);
+ qemu_mutex_unlock_iothread();
+ qemu_mutex_lock(&pg->lock);
+ }
+}
+
+static const PGRAPHRenderer *renderers[CONFIG_DISPLAY_RENDERER__COUNT];
+
+void pgraph_renderer_register(const PGRAPHRenderer *renderer)
+{
+ assert(renderer->type < CONFIG_DISPLAY_RENDERER__COUNT);
+ renderers[renderer->type] = renderer;
+}
+
+void pgraph_init(NV2AState *d)
+{
+ g_nv2a = d;
+
+ PGRAPHState *pg = &d->pgraph;
+ qemu_mutex_init(&pg->lock);
+ qemu_event_init(&pg->sync_complete, false);
+ qemu_event_init(&pg->flush_complete, false);
+
+ pg->frame_time = 0;
+ pg->draw_time = 0;
+
+ pg->material_alpha = 0.0f;
+ PG_SET_MASK(NV_PGRAPH_CONTROL_3, NV_PGRAPH_CONTROL_3_SHADEMODE,
+ NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH);
+ pg->primitive_mode = PRIM_TYPE_INVALID;
+
+ for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+ VertexAttribute *attribute = &pg->vertex_attributes[i];
+ attribute->inline_buffer = (float*)g_malloc(NV2A_MAX_BATCH_LENGTH
+ * sizeof(float) * 4);
+ attribute->inline_buffer_populated = false;
+ }
+
+ pgraph_clear_dirty_reg_map(pg);
+
+ pg->renderer = renderers[g_config.display.renderer];
+ pg->renderer->ops.init(d);
+}
+
+void pgraph_clear_dirty_reg_map(PGRAPHState *pg)
+{
+ memset(pg->regs_dirty, 0, sizeof(pg->regs_dirty));
+}
+
+void pgraph_init_thread(NV2AState *d)
+{
+ if (d->pgraph.renderer->ops.init_thread) {
+ d->pgraph.renderer->ops.init_thread(d);
+ }
+}
+
+static CONFIG_DISPLAY_RENDERER get_default_renderer(void)
+{
+#ifdef CONFIG_OPENGL
+ if (renderers[CONFIG_DISPLAY_RENDERER_OPENGL]) {
+ return CONFIG_DISPLAY_RENDERER_OPENGL;
+ }
+#endif
+#ifdef CONFIG_VULKAN
+ if (renderers[CONFIG_DISPLAY_RENDERER_VULKAN]) {
+ return CONFIG_DISPLAY_RENDERER_VULKAN;
+ }
+#endif
+ fprintf(stderr, "Warning: No available renderer\n");
+ return CONFIG_DISPLAY_RENDERER_NULL;
+}
+
+void nv2a_context_init(void)
+{
+ if (!renderers[g_config.display.renderer]) {
+ g_config.display.renderer = get_default_renderer();
+ fprintf(stderr,
+ "Warning: Configured renderer unavailable. Switching to %s.\n",
+ renderers[g_config.display.renderer]->name);
+ }
+
+ if (renderers[g_config.display.renderer]->ops.early_context_init) {
+ renderers[g_config.display.renderer]->ops.early_context_init();
+ }
+}
+
+void pgraph_destroy(PGRAPHState *pg)
+{
+ NV2AState *d = container_of(pg, NV2AState, pgraph);
+
+ if (pg->renderer->ops.finalize) {
+ pg->renderer->ops.finalize(d);
+ }
+
+ qemu_mutex_destroy(&pg->lock);
+}
+
+int nv2a_get_framebuffer_surface(void)
+{
+ NV2AState *d = g_nv2a;
+
+ if (d->pgraph.renderer->ops.get_framebuffer_surface) {
+ return d->pgraph.renderer->ops.get_framebuffer_surface(d);
+ }
+
+ return 0;
+}
+
+void nv2a_set_surface_scale_factor(unsigned int scale)
+{
+ NV2AState *d = g_nv2a;
+
+ if (d->pgraph.renderer->ops.set_surface_scale_factor) {
+ d->pgraph.renderer->ops.set_surface_scale_factor(d, scale);
+ }
+}
+
+unsigned int nv2a_get_surface_scale_factor(void)
+{
+ NV2AState *d = g_nv2a;
+
+ if (d->pgraph.renderer->ops.get_surface_scale_factor) {
+ return d->pgraph.renderer->ops.get_surface_scale_factor(d);
+ }
+
+ return 1;
+}
+
+#define METHOD_ADDR(gclass, name) \
+ gclass ## _ ## name
+#define METHOD_ADDR_TO_INDEX(x) ((x)>>2)
+#define METHOD_NAME_STR(gclass, name) \
+ tostring(gclass ## _ ## name)
+#define METHOD_FUNC_NAME(gclass, name) \
+ pgraph_ ## gclass ## _ ## name ## _handler
+#define METHOD_HANDLER_ARG_DECL \
+ NV2AState *d, PGRAPHState *pg, \
+ unsigned int subchannel, unsigned int method, \
+ uint32_t parameter, uint32_t *parameters, \
+ size_t num_words_available, size_t *num_words_consumed, bool inc
+#define METHOD_HANDLER_ARGS \
+ d, pg, subchannel, method, parameter, parameters, \
+ num_words_available, num_words_consumed, inc
+#define DEF_METHOD_PROTO(gclass, name) \
+ static void METHOD_FUNC_NAME(gclass, name)(METHOD_HANDLER_ARG_DECL)
+
+#define DEF_METHOD(gclass, name) \
+ DEF_METHOD_PROTO(gclass, name);
+#define DEF_METHOD_RANGE(gclass, name, range) \
+ DEF_METHOD_PROTO(gclass, name);
+#define DEF_METHOD_CASE_4_OFFSET(gclass, name, offset, stride) /* Drop */
+#define DEF_METHOD_CASE_4(gclass, name, stride) \
+ DEF_METHOD_PROTO(gclass, name);
+#include "methods.h"
+#undef DEF_METHOD
+#undef DEF_METHOD_RANGE
+#undef DEF_METHOD_CASE_4_OFFSET
+#undef DEF_METHOD_CASE_4
+
+typedef void (*MethodFunc)(METHOD_HANDLER_ARG_DECL);
+static const struct {
+ uint32_t base;
+ const char *name;
+ MethodFunc handler;
+} pgraph_kelvin_methods[0x800] = {
+#define DEF_METHOD(gclass, name) \
+ [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name))] = \
+ { \
+ METHOD_ADDR(gclass, name), \
+ METHOD_NAME_STR(gclass, name), \
+ METHOD_FUNC_NAME(gclass, name), \
+ },
+#define DEF_METHOD_RANGE(gclass, name, range) \
+ [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name)) \
+ ... METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + 4*range - 1)] = \
+ { \
+ METHOD_ADDR(gclass, name), \
+ METHOD_NAME_STR(gclass, name), \
+ METHOD_FUNC_NAME(gclass, name), \
+ },
+#define DEF_METHOD_CASE_4_OFFSET(gclass, name, offset, stride) \
+ [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset)] = \
+ { \
+ METHOD_ADDR(gclass, name), \
+ METHOD_NAME_STR(gclass, name), \
+ METHOD_FUNC_NAME(gclass, name), \
+ }, \
+ [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset + stride)] = \
+ { \
+ METHOD_ADDR(gclass, name), \
+ METHOD_NAME_STR(gclass, name), \
+ METHOD_FUNC_NAME(gclass, name), \
+ }, \
+ [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset + stride * 2)] = \
+ { \
+ METHOD_ADDR(gclass, name), \
+ METHOD_NAME_STR(gclass, name), \
+ METHOD_FUNC_NAME(gclass, name), \
+ }, \
+ [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset + stride * 3)] = \
+ { \
+ METHOD_ADDR(gclass, name), \
+ METHOD_NAME_STR(gclass, name), \
+ METHOD_FUNC_NAME(gclass, name), \
+ },
+#define DEF_METHOD_CASE_4(gclass, name, stride) \
+ DEF_METHOD_CASE_4_OFFSET(gclass, name, 0, stride)
+#include "methods.h"
+#undef DEF_METHOD
+#undef DEF_METHOD_RANGE
+#undef DEF_METHOD_CASE_4_OFFSET
+#undef DEF_METHOD_CASE_4
+};
+
+#define METHOD_RANGE_END_NAME(gclass, name) \
+ pgraph_ ## gclass ## _ ## name ## __END
+#define DEF_METHOD(gclass, name) \
+ static const size_t METHOD_RANGE_END_NAME(gclass, name) = \
+ METHOD_ADDR(gclass, name) + 4;
+#define DEF_METHOD_RANGE(gclass, name, range) \
+ static const size_t METHOD_RANGE_END_NAME(gclass, name) = \
+ METHOD_ADDR(gclass, name) + 4*range;
+#define DEF_METHOD_CASE_4_OFFSET(gclass, name, offset, stride) /* drop */
+#define DEF_METHOD_CASE_4(gclass, name, stride) \
+ static const size_t METHOD_RANGE_END_NAME(gclass, name) = \
+ METHOD_ADDR(gclass, name) + 4*stride;
+#include "methods.h"
+#undef DEF_METHOD
+#undef DEF_METHOD_RANGE
+#undef DEF_METHOD_CASE_4_OFFSET
+#undef DEF_METHOD_CASE_4
+
+static void pgraph_method_log(unsigned int subchannel,
+ unsigned int graphics_class,
+ unsigned int method, uint32_t parameter)
+{
+ const char *method_name = "?";
+ static unsigned int last = 0;
+ static unsigned int count = 0;
+
+ if (last == NV097_ARRAY_ELEMENT16 && method != last) {
+ method_name = "NV097_ARRAY_ELEMENT16";
+ trace_nv2a_pgraph_method_abbrev(subchannel, graphics_class, last,
+ method_name, count);
+ }
+
+ if (method != NV097_ARRAY_ELEMENT16) {
+ uint32_t base = method;
+ switch (graphics_class) {
+ case NV_KELVIN_PRIMITIVE: {
+ int idx = METHOD_ADDR_TO_INDEX(method);
+ if (idx < ARRAY_SIZE(pgraph_kelvin_methods) &&
+ pgraph_kelvin_methods[idx].handler) {
+ method_name = pgraph_kelvin_methods[idx].name;
+ base = pgraph_kelvin_methods[idx].base;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ uint32_t offset = method - base;
+ trace_nv2a_pgraph_method(subchannel, graphics_class, method,
+ method_name, offset, parameter);
+ }
+
+ if (method == last) {
+ count++;
+ } else {
+ count = 0;
+ }
+ last = method;
+}
+
+static void pgraph_method_inc(MethodFunc handler, uint32_t end,
+ METHOD_HANDLER_ARG_DECL)
+{
+ if (!inc) {
+ handler(METHOD_HANDLER_ARGS);
+ return;
+ }
+ size_t count = MIN(num_words_available, (end - method) / 4);
+ for (size_t i = 0; i < count; i++) {
+ parameter = ldl_le_p(parameters + i);
+ if (i) {
+ pgraph_method_log(subchannel, NV_KELVIN_PRIMITIVE, method,
+ parameter);
+ }
+ handler(METHOD_HANDLER_ARGS);
+ method += 4;
+ }
+ *num_words_consumed = count;
+}
+
+static void pgraph_method_non_inc(MethodFunc handler, METHOD_HANDLER_ARG_DECL)
+{
+ if (inc) {
+ handler(METHOD_HANDLER_ARGS);
+ return;
+ }
+
+ for (size_t i = 0; i < num_words_available; i++) {
+ parameter = ldl_le_p(parameters + i);
+ if (i) {
+ pgraph_method_log(subchannel, NV_KELVIN_PRIMITIVE, method,
+ parameter);
+ }
+ handler(METHOD_HANDLER_ARGS);
+ }
+ *num_words_consumed = num_words_available;
+}
+
+#define METHOD_FUNC_NAME_INT(gclass, name) METHOD_FUNC_NAME(gclass, name##_int)
+#define DEF_METHOD_INT(gclass, name) DEF_METHOD(gclass, name##_int)
+#define DEF_METHOD(gclass, name) DEF_METHOD_PROTO(gclass, name)
+
+#define DEF_METHOD_INC(gclass, name) \
+ DEF_METHOD_INT(gclass, name); \
+ DEF_METHOD(gclass, name) \
+ { \
+ pgraph_method_inc(METHOD_FUNC_NAME_INT(gclass, name), \
+ METHOD_RANGE_END_NAME(gclass, name), \
+ METHOD_HANDLER_ARGS); \
+ } \
+ DEF_METHOD_INT(gclass, name)
+
+#define DEF_METHOD_NON_INC(gclass, name) \
+ DEF_METHOD_INT(gclass, name); \
+ DEF_METHOD(gclass, name) \
+ { \
+ pgraph_method_non_inc(METHOD_FUNC_NAME_INT(gclass, name), \
+ METHOD_HANDLER_ARGS); \
+ } \
+ DEF_METHOD_INT(gclass, name)
+
+int pgraph_method(NV2AState *d, unsigned int subchannel,
+ unsigned int method, uint32_t parameter,
+ uint32_t *parameters, size_t num_words_available,
+ size_t max_lookahead_words, bool inc)
+{
+ int num_processed = 1;
+
+ PGRAPHState *pg = &d->pgraph;
+
+ bool channel_valid =
+ PG_GET_MASK(NV_PGRAPH_CTX_CONTROL, NV_PGRAPH_CTX_CONTROL_CHID);
+ assert(channel_valid);
+
+ ContextSurfaces2DState *context_surfaces_2d = &pg->context_surfaces_2d;
+ ImageBlitState *image_blit = &pg->image_blit;
+ BetaState *beta = &pg->beta;
+
+ assert(subchannel < 8);
+
+ if (method == NV_SET_OBJECT) {
+ assert(parameter < memory_region_size(&d->ramin));
+ uint8_t *obj_ptr = d->ramin_ptr + parameter;
+
+ uint32_t ctx_1 = ldl_le_p((uint32_t*)obj_ptr);
+ uint32_t ctx_2 = ldl_le_p((uint32_t*)(obj_ptr+4));
+ uint32_t ctx_3 = ldl_le_p((uint32_t*)(obj_ptr+8));
+ uint32_t ctx_4 = ldl_le_p((uint32_t*)(obj_ptr+12));
+ uint32_t ctx_5 = parameter;
+
+ pgraph_reg_w(pg, NV_PGRAPH_CTX_CACHE1 + subchannel * 4, ctx_1);
+ pgraph_reg_w(pg, NV_PGRAPH_CTX_CACHE2 + subchannel * 4, ctx_2);
+ pgraph_reg_w(pg, NV_PGRAPH_CTX_CACHE3 + subchannel * 4, ctx_3);
+ pgraph_reg_w(pg, NV_PGRAPH_CTX_CACHE4 + subchannel * 4, ctx_4);
+ pgraph_reg_w(pg, NV_PGRAPH_CTX_CACHE5 + subchannel * 4, ctx_5);
+ }
+
+ // is this right?
+ pgraph_reg_w(pg, NV_PGRAPH_CTX_SWITCH1,
+ pgraph_reg_r(pg, NV_PGRAPH_CTX_CACHE1 + subchannel * 4));
+ pgraph_reg_w(pg, NV_PGRAPH_CTX_SWITCH2,
+ pgraph_reg_r(pg, NV_PGRAPH_CTX_CACHE2 + subchannel * 4));
+ pgraph_reg_w(pg, NV_PGRAPH_CTX_SWITCH3,
+ pgraph_reg_r(pg, NV_PGRAPH_CTX_CACHE3 + subchannel * 4));
+ pgraph_reg_w(pg, NV_PGRAPH_CTX_SWITCH4,
+ pgraph_reg_r(pg, NV_PGRAPH_CTX_CACHE4 + subchannel * 4));
+ pgraph_reg_w(pg, NV_PGRAPH_CTX_SWITCH5,
+ pgraph_reg_r(pg, NV_PGRAPH_CTX_CACHE5 + subchannel * 4));
+
+ uint32_t graphics_class = PG_GET_MASK(NV_PGRAPH_CTX_SWITCH1,
+ NV_PGRAPH_CTX_SWITCH1_GRCLASS);
+
+ pgraph_method_log(subchannel, graphics_class, method, parameter);
+
+ if (subchannel != 0) {
+ // catches context switching issues on xbox d3d
+ assert(graphics_class != 0x97);
+ }
+
+ /* ugly switch for now */
+ switch (graphics_class) {
+ case NV_BETA: {
+ switch (method) {
+ case NV012_SET_OBJECT:
+ beta->object_instance = parameter;
+ break;
+ case NV012_SET_BETA:
+ if (parameter & 0x80000000) {
+ beta->beta = 0;
+ } else {
+ // The parameter is a signed fixed-point number with a sign bit
+ // and 31 fractional bits. Note that negative values are clamped
+ // to 0, and only 8 fractional bits are actually implemented in
+ // hardware.
+ beta->beta = parameter & 0x7f800000;
+ }
+ break;
+ default:
+ goto unhandled;
+ }
+ break;
+ }
+ case NV_CONTEXT_PATTERN: {
+ switch (method) {
+ case NV044_SET_MONOCHROME_COLOR0:
+ pgraph_reg_w(pg, NV_PGRAPH_PATT_COLOR0, parameter);
+ break;
+ default:
+ goto unhandled;
+ }
+ break;
+ }
+ case NV_CONTEXT_SURFACES_2D: {
+ switch (method) {
+ case NV062_SET_OBJECT:
+ context_surfaces_2d->object_instance = parameter;
+ break;
+ case NV062_SET_CONTEXT_DMA_IMAGE_SOURCE:
+ context_surfaces_2d->dma_image_source = parameter;
+ break;
+ case NV062_SET_CONTEXT_DMA_IMAGE_DESTIN:
+ context_surfaces_2d->dma_image_dest = parameter;
+ break;
+ case NV062_SET_COLOR_FORMAT:
+ context_surfaces_2d->color_format = parameter;
+ break;
+ case NV062_SET_PITCH:
+ context_surfaces_2d->source_pitch = parameter & 0xFFFF;
+ context_surfaces_2d->dest_pitch = parameter >> 16;
+ break;
+ case NV062_SET_OFFSET_SOURCE:
+ context_surfaces_2d->source_offset = parameter & 0x07FFFFFF;
+ break;
+ case NV062_SET_OFFSET_DESTIN:
+ context_surfaces_2d->dest_offset = parameter & 0x07FFFFFF;
+ break;
+ default:
+ goto unhandled;
+ }
+ break;
+ }
+ case NV_IMAGE_BLIT: {
+ switch (method) {
+ case NV09F_SET_OBJECT:
+ image_blit->object_instance = parameter;
+ break;
+ case NV09F_SET_CONTEXT_SURFACES:
+ image_blit->context_surfaces = parameter;
+ break;
+ case NV09F_SET_OPERATION:
+ image_blit->operation = parameter;
+ break;
+ case NV09F_CONTROL_POINT_IN:
+ image_blit->in_x = parameter & 0xFFFF;
+ image_blit->in_y = parameter >> 16;
+ break;
+ case NV09F_CONTROL_POINT_OUT:
+ image_blit->out_x = parameter & 0xFFFF;
+ image_blit->out_y = parameter >> 16;
+ break;
+ case NV09F_SIZE:
+ image_blit->width = parameter & 0xFFFF;
+ image_blit->height = parameter >> 16;
+
+ if (image_blit->width && image_blit->height) {
+ d->pgraph.renderer->ops.image_blit(d);
+ }
+ break;
+ default:
+ goto unhandled;
+ }
+ break;
+ }
+ case NV_KELVIN_PRIMITIVE: {
+ MethodFunc handler =
+ pgraph_kelvin_methods[METHOD_ADDR_TO_INDEX(method)].handler;
+ if (handler == NULL) {
+ goto unhandled;
+ }
+ size_t num_words_consumed = 1;
+ handler(d, pg, subchannel, method, parameter, parameters,
+ num_words_available, &num_words_consumed, inc);
+
+ /* Squash repeated BEGIN,DRAW_ARRAYS,END */
+ #define LAM(i, mthd) ((parameters[i*2+1] & 0x31fff) == (mthd))
+ #define LAP(i, prm) (parameters[i*2+2] == (prm))
+ #define LAMP(i, mthd, prm) (LAM(i, mthd) && LAP(i, prm))
+
+ if (method == NV097_DRAW_ARRAYS && (max_lookahead_words >= 7) &&
+ pg->inline_elements_length == 0 &&
+ pg->draw_arrays_length <
+ (ARRAY_SIZE(pg->draw_arrays_start) - 1) &&
+ LAMP(0, NV097_SET_BEGIN_END, NV097_SET_BEGIN_END_OP_END) &&
+ LAMP(1, NV097_SET_BEGIN_END, pg->primitive_mode) &&
+ LAM(2, NV097_DRAW_ARRAYS)) {
+ num_words_consumed += 4;
+ pg->draw_arrays_prevent_connect = true;
+ }
+
+ #undef LAM
+ #undef LAP
+ #undef LAMP
+
+ num_processed = num_words_consumed;
+ break;
+ }
+ default:
+ goto unhandled;
+ }
+
+ return num_processed;
+
+unhandled:
+ trace_nv2a_pgraph_method_unhandled(subchannel, graphics_class,
+ method, parameter);
+ return num_processed;
+}
+
+DEF_METHOD(NV097, SET_OBJECT)
+{
+ pg->kelvin.object_instance = parameter;
+}
+
+DEF_METHOD(NV097, NO_OPERATION)
+{
+ /* The bios uses nop as a software method call -
+ * it seems to expect a notify interrupt if the parameter isn't 0.
+ * According to a nouveau guy it should still be a nop regardless
+ * of the parameter. It's possible a debug register enables this,
+ * but nothing obvious sticks out. Weird.
+ */
+ if (parameter == 0) {
+ return;
+ }
+
+ unsigned channel_id =
+ PG_GET_MASK(NV_PGRAPH_CTX_USER, NV_PGRAPH_CTX_USER_CHID);
+
+ assert(!(pg->pending_interrupts & NV_PGRAPH_INTR_ERROR));
+
+ PG_SET_MASK(NV_PGRAPH_TRAPPED_ADDR, NV_PGRAPH_TRAPPED_ADDR_CHID,
+ channel_id);
+ PG_SET_MASK(NV_PGRAPH_TRAPPED_ADDR, NV_PGRAPH_TRAPPED_ADDR_SUBCH,
+ subchannel);
+ PG_SET_MASK(NV_PGRAPH_TRAPPED_ADDR, NV_PGRAPH_TRAPPED_ADDR_MTHD,
+ method);
+ pgraph_reg_w(pg, NV_PGRAPH_TRAPPED_DATA_LOW, parameter);
+ pgraph_reg_w(pg, NV_PGRAPH_NSOURCE,
+ NV_PGRAPH_NSOURCE_NOTIFICATION); /* TODO: check this */
+ pg->pending_interrupts |= NV_PGRAPH_INTR_ERROR;
+ pg->waiting_for_nop = true;
+
+ qemu_mutex_unlock(&pg->lock);
+ qemu_mutex_lock_iothread();
+ nv2a_update_irq(d);
+ qemu_mutex_unlock_iothread();
+ qemu_mutex_lock(&pg->lock);
+}
+
+DEF_METHOD(NV097, WAIT_FOR_IDLE)
+{
+ d->pgraph.renderer->ops.surface_update(d, false, true, true);
+}
+
+DEF_METHOD(NV097, SET_FLIP_READ)
+{
+ PG_SET_MASK(NV_PGRAPH_SURFACE, NV_PGRAPH_SURFACE_READ_3D,
+ parameter);
+}
+
+DEF_METHOD(NV097, SET_FLIP_WRITE)
+{
+ PG_SET_MASK(NV_PGRAPH_SURFACE, NV_PGRAPH_SURFACE_WRITE_3D,
+ parameter);
+}
+
+DEF_METHOD(NV097, SET_FLIP_MODULO)
+{
+ PG_SET_MASK(NV_PGRAPH_SURFACE, NV_PGRAPH_SURFACE_MODULO_3D,
+ parameter);
+}
+
+DEF_METHOD(NV097, FLIP_INCREMENT_WRITE)
+{
+ uint32_t old =
+ PG_GET_MASK(NV_PGRAPH_SURFACE, NV_PGRAPH_SURFACE_WRITE_3D);
+
+ PG_SET_MASK(NV_PGRAPH_SURFACE,
+ NV_PGRAPH_SURFACE_WRITE_3D,
+ (PG_GET_MASK(NV_PGRAPH_SURFACE,
+ NV_PGRAPH_SURFACE_WRITE_3D)+1)
+ % PG_GET_MASK(NV_PGRAPH_SURFACE,
+ NV_PGRAPH_SURFACE_MODULO_3D) );
+
+ uint32_t new =
+ PG_GET_MASK(NV_PGRAPH_SURFACE, NV_PGRAPH_SURFACE_WRITE_3D);
+
+ trace_nv2a_pgraph_flip_increment_write(old, new);
+ pg->frame_time++;
+}
+
+DEF_METHOD(NV097, FLIP_STALL)
+{
+ trace_nv2a_pgraph_flip_stall();
+ d->pgraph.renderer->ops.surface_update(d, false, true, true);
+ d->pgraph.renderer->ops.flip_stall(d);
+ nv2a_profile_flip_stall();
+ pg->waiting_for_flip = true;
+}
+
+// TODO: these should be loading the dma objects from ramin here?
+
+DEF_METHOD(NV097, SET_CONTEXT_DMA_NOTIFIES)
+{
+ pg->dma_notifies = parameter;
+}
+
+DEF_METHOD(NV097, SET_CONTEXT_DMA_A)
+{
+ pg->dma_a = parameter;
+}
+
+DEF_METHOD(NV097, SET_CONTEXT_DMA_B)
+{
+ pg->dma_b = parameter;
+}
+
+DEF_METHOD(NV097, SET_CONTEXT_DMA_STATE)
+{
+ pg->dma_state = parameter;
+}
+
+DEF_METHOD(NV097, SET_CONTEXT_DMA_COLOR)
+{
+ /* try to get any straggling draws in before the surface's changed :/ */
+ d->pgraph.renderer->ops.surface_update(d, false, true, true);
+
+ pg->dma_color = parameter;
+ pg->surface_color.buffer_dirty = true;
+}
+
+DEF_METHOD(NV097, SET_CONTEXT_DMA_ZETA)
+{
+ pg->dma_zeta = parameter;
+ pg->surface_zeta.buffer_dirty = true;
+}
+
+DEF_METHOD(NV097, SET_CONTEXT_DMA_VERTEX_A)
+{
+ pg->dma_vertex_a = parameter;
+}
+
+DEF_METHOD(NV097, SET_CONTEXT_DMA_VERTEX_B)
+{
+ pg->dma_vertex_b = parameter;
+}
+
+DEF_METHOD(NV097, SET_CONTEXT_DMA_SEMAPHORE)
+{
+ pg->dma_semaphore = parameter;
+}
+
+DEF_METHOD(NV097, SET_CONTEXT_DMA_REPORT)
+{
+ d->pgraph.renderer->ops.process_pending_reports(d);
+
+ pg->dma_report = parameter;
+}
+
+DEF_METHOD(NV097, SET_SURFACE_CLIP_HORIZONTAL)
+{
+ d->pgraph.renderer->ops.surface_update(d, false, true, true);
+
+ pg->surface_shape.clip_x =
+ GET_MASK(parameter, NV097_SET_SURFACE_CLIP_HORIZONTAL_X);
+ pg->surface_shape.clip_width =
+ GET_MASK(parameter, NV097_SET_SURFACE_CLIP_HORIZONTAL_WIDTH);
+}
+
+DEF_METHOD(NV097, SET_SURFACE_CLIP_VERTICAL)
+{
+ d->pgraph.renderer->ops.surface_update(d, false, true, true);
+
+ pg->surface_shape.clip_y =
+ GET_MASK(parameter, NV097_SET_SURFACE_CLIP_VERTICAL_Y);
+ pg->surface_shape.clip_height =
+ GET_MASK(parameter, NV097_SET_SURFACE_CLIP_VERTICAL_HEIGHT);
+}
+
+DEF_METHOD(NV097, SET_SURFACE_FORMAT)
+{
+ d->pgraph.renderer->ops.surface_update(d, false, true, true);
+
+ pg->surface_shape.color_format =
+ GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_COLOR);
+ pg->surface_shape.zeta_format =
+ GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_ZETA);
+ pg->surface_shape.anti_aliasing =
+ GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_ANTI_ALIASING);
+ pg->surface_shape.log_width =
+ GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_WIDTH);
+ pg->surface_shape.log_height =
+ GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_HEIGHT);
+
+ int surface_type = GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_TYPE);
+ if (surface_type != pg->surface_type) {
+ pg->surface_type = surface_type;
+ pg->surface_color.buffer_dirty = true;
+ pg->surface_zeta.buffer_dirty = true;
+ }
+}
+
+DEF_METHOD(NV097, SET_SURFACE_PITCH)
+{
+ d->pgraph.renderer->ops.surface_update(d, false, true, true);
+ unsigned int color_pitch = GET_MASK(parameter, NV097_SET_SURFACE_PITCH_COLOR);
+ unsigned int zeta_pitch = GET_MASK(parameter, NV097_SET_SURFACE_PITCH_ZETA);
+
+ pg->surface_color.buffer_dirty |= (pg->surface_color.pitch != color_pitch);
+ pg->surface_color.pitch = color_pitch;
+
+ pg->surface_zeta.buffer_dirty |= (pg->surface_zeta.pitch != zeta_pitch);
+ pg->surface_zeta.pitch = zeta_pitch;
+}
+
+DEF_METHOD(NV097, SET_SURFACE_COLOR_OFFSET)
+{
+ d->pgraph.renderer->ops.surface_update(d, false, true, true);
+ pg->surface_color.buffer_dirty |= (pg->surface_color.offset != parameter);
+ pg->surface_color.offset = parameter;
+}
+
+DEF_METHOD(NV097, SET_SURFACE_ZETA_OFFSET)
+{
+ d->pgraph.renderer->ops.surface_update(d, false, true, true);
+ pg->surface_zeta.buffer_dirty |= (pg->surface_zeta.offset != parameter);
+ pg->surface_zeta.offset = parameter;
+}
+
+DEF_METHOD_INC(NV097, SET_COMBINER_ALPHA_ICW)
+{
+ int slot = (method - NV097_SET_COMBINER_ALPHA_ICW) / 4;
+ pgraph_reg_w(pg, NV_PGRAPH_COMBINEALPHAI0 + slot * 4, parameter);
+}
+
+DEF_METHOD(NV097, SET_COMBINER_SPECULAR_FOG_CW0)
+{
+ pgraph_reg_w(pg, NV_PGRAPH_COMBINESPECFOG0, parameter);
+}
+
+DEF_METHOD(NV097, SET_COMBINER_SPECULAR_FOG_CW1)
+{
+ pgraph_reg_w(pg, NV_PGRAPH_COMBINESPECFOG1, parameter);
+}
+
+DEF_METHOD(NV097, SET_TEXTURE_ADDRESS)
+{
+ int slot = (method - NV097_SET_TEXTURE_ADDRESS) / 64;
+ pgraph_reg_w(pg, NV_PGRAPH_TEXADDRESS0 + slot * 4, parameter);
+}
+
+DEF_METHOD(NV097, SET_CONTROL0)
+{
+ d->pgraph.renderer->ops.surface_update(d, false, true, true);
+
+ bool stencil_write_enable =
+ parameter & NV097_SET_CONTROL0_STENCIL_WRITE_ENABLE;
+ PG_SET_MASK(NV_PGRAPH_CONTROL_0,
+ NV_PGRAPH_CONTROL_0_STENCIL_WRITE_ENABLE,
+ stencil_write_enable);
+
+ uint32_t z_format = GET_MASK(parameter, NV097_SET_CONTROL0_Z_FORMAT);
+ PG_SET_MASK(NV_PGRAPH_SETUPRASTER,
+ NV_PGRAPH_SETUPRASTER_Z_FORMAT, z_format);
+
+ bool z_perspective =
+ parameter & NV097_SET_CONTROL0_Z_PERSPECTIVE_ENABLE;
+ PG_SET_MASK(NV_PGRAPH_CONTROL_0,
+ NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE,
+ z_perspective);
+}
+
+DEF_METHOD(NV097, SET_COLOR_MATERIAL)
+{
+ PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_EMISSION,
+ (parameter >> 0) & 3);
+ PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_AMBIENT,
+ (parameter >> 2) & 3);
+ PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_DIFFUSE,
+ (parameter >> 4) & 3);
+ PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_SPECULAR,
+ (parameter >> 6) & 3);
+}
+
+DEF_METHOD(NV097, SET_FOG_MODE)
+{
+ /* FIXME: There is also NV_PGRAPH_CSV0_D_FOG_MODE */
+ unsigned int mode;
+ switch (parameter) {
+ case NV097_SET_FOG_MODE_V_LINEAR:
+ mode = NV_PGRAPH_CONTROL_3_FOG_MODE_LINEAR; break;
+ case NV097_SET_FOG_MODE_V_EXP:
+ mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP; break;
+ case NV097_SET_FOG_MODE_V_EXP2:
+ mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP2; break;
+ case NV097_SET_FOG_MODE_V_EXP_ABS:
+ mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP_ABS; break;
+ case NV097_SET_FOG_MODE_V_EXP2_ABS:
+ mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP2_ABS; break;
+ case NV097_SET_FOG_MODE_V_LINEAR_ABS:
+ mode = NV_PGRAPH_CONTROL_3_FOG_MODE_LINEAR_ABS; break;
+ default:
+ assert(false);
+ break;
+ }
+ PG_SET_MASK(NV_PGRAPH_CONTROL_3, NV_PGRAPH_CONTROL_3_FOG_MODE,
+ mode);
+}
+
+DEF_METHOD(NV097, SET_FOG_GEN_MODE)
+{
+ unsigned int mode;
+ switch (parameter) {
+ case NV097_SET_FOG_GEN_MODE_V_SPEC_ALPHA:
+ mode = NV_PGRAPH_CSV0_D_FOGGENMODE_SPEC_ALPHA; break;
+ case NV097_SET_FOG_GEN_MODE_V_RADIAL:
+ mode = NV_PGRAPH_CSV0_D_FOGGENMODE_RADIAL; break;
+ case NV097_SET_FOG_GEN_MODE_V_PLANAR:
+ mode = NV_PGRAPH_CSV0_D_FOGGENMODE_PLANAR; break;
+ case NV097_SET_FOG_GEN_MODE_V_ABS_PLANAR:
+ mode = NV_PGRAPH_CSV0_D_FOGGENMODE_ABS_PLANAR; break;
+ case NV097_SET_FOG_GEN_MODE_V_FOG_X:
+ mode = NV_PGRAPH_CSV0_D_FOGGENMODE_FOG_X; break;
+ default:
+ assert(false);
+ break;
+ }
+ PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_FOGGENMODE, mode);
+}
+
+DEF_METHOD(NV097, SET_FOG_ENABLE)
+{
+ /*
+ FIXME: There is also:
+ PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_FOGENABLE,
+ parameter);
+ */
+ PG_SET_MASK(NV_PGRAPH_CONTROL_3, NV_PGRAPH_CONTROL_3_FOGENABLE,
+ parameter);
+}
+
+DEF_METHOD(NV097, SET_FOG_COLOR)
+{
+ /* PGRAPH channels are ARGB, parameter channels are ABGR */
+ uint8_t red = GET_MASK(parameter, NV097_SET_FOG_COLOR_RED);
+ uint8_t green = GET_MASK(parameter, NV097_SET_FOG_COLOR_GREEN);
+ uint8_t blue = GET_MASK(parameter, NV097_SET_FOG_COLOR_BLUE);
+ uint8_t alpha = GET_MASK(parameter, NV097_SET_FOG_COLOR_ALPHA);
+ PG_SET_MASK(NV_PGRAPH_FOGCOLOR, NV_PGRAPH_FOGCOLOR_RED, red);
+ PG_SET_MASK(NV_PGRAPH_FOGCOLOR, NV_PGRAPH_FOGCOLOR_GREEN, green);
+ PG_SET_MASK(NV_PGRAPH_FOGCOLOR, NV_PGRAPH_FOGCOLOR_BLUE, blue);
+ PG_SET_MASK(NV_PGRAPH_FOGCOLOR, NV_PGRAPH_FOGCOLOR_ALPHA, alpha);
+}
+
+DEF_METHOD(NV097, SET_WINDOW_CLIP_TYPE)
+{
+ PG_SET_MASK(NV_PGRAPH_SETUPRASTER,
+ NV_PGRAPH_SETUPRASTER_WINDOWCLIPTYPE, parameter);
+}
+
+DEF_METHOD_INC(NV097, SET_WINDOW_CLIP_HORIZONTAL)
+{
+ int slot = (method - NV097_SET_WINDOW_CLIP_HORIZONTAL) / 4;
+ for (; slot < 8; ++slot) {
+ pgraph_reg_w(pg, NV_PGRAPH_WINDOWCLIPX0 + slot * 4, parameter);
+ }
+}
+
+DEF_METHOD_INC(NV097, SET_WINDOW_CLIP_VERTICAL)
+{
+ int slot = (method - NV097_SET_WINDOW_CLIP_VERTICAL) / 4;
+ for (; slot < 8; ++slot) {
+ pgraph_reg_w(pg, NV_PGRAPH_WINDOWCLIPY0 + slot * 4, parameter);
+ }
+}
+
+DEF_METHOD(NV097, SET_ALPHA_TEST_ENABLE)
+{
+ PG_SET_MASK(NV_PGRAPH_CONTROL_0,
+ NV_PGRAPH_CONTROL_0_ALPHATESTENABLE, parameter);
+}
+
+DEF_METHOD(NV097, SET_BLEND_ENABLE)
+{
+ PG_SET_MASK(NV_PGRAPH_BLEND, NV_PGRAPH_BLEND_EN, parameter);
+}
+
+DEF_METHOD(NV097, SET_CULL_FACE_ENABLE)
+{
+ PG_SET_MASK(NV_PGRAPH_SETUPRASTER,
+ NV_PGRAPH_SETUPRASTER_CULLENABLE,
+ parameter);
+}
+
+DEF_METHOD(NV097, SET_DEPTH_TEST_ENABLE)
+{
+ PG_SET_MASK(NV_PGRAPH_CONTROL_0, NV_PGRAPH_CONTROL_0_ZENABLE,
+ parameter);
+}
+
+DEF_METHOD(NV097, SET_DITHER_ENABLE)
+{
+ PG_SET_MASK(NV_PGRAPH_CONTROL_0,
+ NV_PGRAPH_CONTROL_0_DITHERENABLE, parameter);
+}
+
+DEF_METHOD(NV097, SET_LIGHTING_ENABLE)
+{
+ PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_LIGHTING,
+ parameter);
+}
+
+DEF_METHOD(NV097, SET_POINT_PARAMS_ENABLE)
+{
+ PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_POINTPARAMSENABLE,
+ parameter);
+ PG_SET_MASK(NV_PGRAPH_CONTROL_3,
+ NV_PGRAPH_CONTROL_3_POINTPARAMSENABLE, parameter);
+}
+
+DEF_METHOD(NV097, SET_POINT_SMOOTH_ENABLE)
+{
+ PG_SET_MASK(NV_PGRAPH_SETUPRASTER,
+ NV_PGRAPH_SETUPRASTER_POINTSMOOTHENABLE, parameter);
+}
+
+DEF_METHOD(NV097, SET_LINE_SMOOTH_ENABLE)
+{
+ PG_SET_MASK(NV_PGRAPH_SETUPRASTER,
+ NV_PGRAPH_SETUPRASTER_LINESMOOTHENABLE, parameter);
+}
+
+DEF_METHOD(NV097, SET_POLY_SMOOTH_ENABLE)
+{
+ PG_SET_MASK(NV_PGRAPH_SETUPRASTER,
+ NV_PGRAPH_SETUPRASTER_POLYSMOOTHENABLE, parameter);
+}
+
+DEF_METHOD(NV097, SET_SKIN_MODE)
+{
+ PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_SKIN,
+ parameter);
+}
+
+DEF_METHOD(NV097, SET_STENCIL_TEST_ENABLE)
+{
+ PG_SET_MASK(NV_PGRAPH_CONTROL_1,
+ NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE, parameter);
+}
+
+DEF_METHOD(NV097, SET_POLY_OFFSET_POINT_ENABLE)
+{
+ PG_SET_MASK(NV_PGRAPH_SETUPRASTER,
+ NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE, parameter);
+}
+
+DEF_METHOD(NV097, SET_POLY_OFFSET_LINE_ENABLE)
+{
+ PG_SET_MASK(NV_PGRAPH_SETUPRASTER,
+ NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE, parameter);
+}
+
+DEF_METHOD(NV097, SET_POLY_OFFSET_FILL_ENABLE)
+{
+ PG_SET_MASK(NV_PGRAPH_SETUPRASTER,
+ NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE, parameter);
+}
+
+DEF_METHOD(NV097, SET_ALPHA_FUNC)
+{
+ PG_SET_MASK(NV_PGRAPH_CONTROL_0,
+ NV_PGRAPH_CONTROL_0_ALPHAFUNC, parameter & 0xF);
+}
+
+DEF_METHOD(NV097, SET_ALPHA_REF)
+{
+ PG_SET_MASK(NV_PGRAPH_CONTROL_0,
+ NV_PGRAPH_CONTROL_0_ALPHAREF, parameter);
+}
+
+DEF_METHOD(NV097, SET_BLEND_FUNC_SFACTOR)
+{
+ unsigned int factor;
+ switch (parameter) {
+ case NV097_SET_BLEND_FUNC_SFACTOR_V_ZERO:
+ factor = NV_PGRAPH_BLEND_SFACTOR_ZERO; break;
+ case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE:
+ factor = NV_PGRAPH_BLEND_SFACTOR_ONE; break;
+ case NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_COLOR:
+ factor = NV_PGRAPH_BLEND_SFACTOR_SRC_COLOR; break;
+ case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_SRC_COLOR:
+ factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_SRC_COLOR; break;
+ case NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_ALPHA:
+ factor = NV_PGRAPH_BLEND_SFACTOR_SRC_ALPHA; break;
+ case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_SRC_ALPHA:
+ factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_SRC_ALPHA; break;
+ case NV097_SET_BLEND_FUNC_SFACTOR_V_DST_ALPHA:
+ factor = NV_PGRAPH_BLEND_SFACTOR_DST_ALPHA; break;
+ case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_DST_ALPHA:
+ factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_DST_ALPHA; break;
+ case NV097_SET_BLEND_FUNC_SFACTOR_V_DST_COLOR:
+ factor = NV_PGRAPH_BLEND_SFACTOR_DST_COLOR; break;
+ case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_DST_COLOR:
+ factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_DST_COLOR; break;
+ case NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_ALPHA_SATURATE:
+ factor = NV_PGRAPH_BLEND_SFACTOR_SRC_ALPHA_SATURATE; break;
+ case NV097_SET_BLEND_FUNC_SFACTOR_V_CONSTANT_COLOR:
+ factor = NV_PGRAPH_BLEND_SFACTOR_CONSTANT_COLOR; break;
+ case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_CONSTANT_COLOR:
+ factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_CONSTANT_COLOR; break;
+ case NV097_SET_BLEND_FUNC_SFACTOR_V_CONSTANT_ALPHA:
+ factor = NV_PGRAPH_BLEND_SFACTOR_CONSTANT_ALPHA; break;
+ case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_CONSTANT_ALPHA:
+ factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_CONSTANT_ALPHA; break;
+ default:
+ NV2A_DPRINTF("Unknown blend source factor: 0x%08x\n", parameter);
+ return; /* discard */
+ }
+ PG_SET_MASK(NV_PGRAPH_BLEND, NV_PGRAPH_BLEND_SFACTOR, factor);
+}
+
+DEF_METHOD(NV097, SET_BLEND_FUNC_DFACTOR)
+{
+ unsigned int factor;
+ switch (parameter) {
+ case NV097_SET_BLEND_FUNC_DFACTOR_V_ZERO:
+ factor = NV_PGRAPH_BLEND_DFACTOR_ZERO; break;
+ case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE:
+ factor = NV_PGRAPH_BLEND_DFACTOR_ONE; break;
+ case NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_COLOR:
+ factor = NV_PGRAPH_BLEND_DFACTOR_SRC_COLOR; break;
+ case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_SRC_COLOR:
+ factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_SRC_COLOR; break;
+ case NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_ALPHA:
+ factor = NV_PGRAPH_BLEND_DFACTOR_SRC_ALPHA; break;
+ case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_SRC_ALPHA:
+ factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_SRC_ALPHA; break;
+ case NV097_SET_BLEND_FUNC_DFACTOR_V_DST_ALPHA:
+ factor = NV_PGRAPH_BLEND_DFACTOR_DST_ALPHA; break;
+ case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_DST_ALPHA:
+ factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_DST_ALPHA; break;
+ case NV097_SET_BLEND_FUNC_DFACTOR_V_DST_COLOR:
+ factor = NV_PGRAPH_BLEND_DFACTOR_DST_COLOR; break;
+ case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_DST_COLOR:
+ factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_DST_COLOR; break;
+ case NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_ALPHA_SATURATE:
+ factor = NV_PGRAPH_BLEND_DFACTOR_SRC_ALPHA_SATURATE; break;
+ case NV097_SET_BLEND_FUNC_DFACTOR_V_CONSTANT_COLOR:
+ factor = NV_PGRAPH_BLEND_DFACTOR_CONSTANT_COLOR; break;
+ case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_CONSTANT_COLOR:
+ factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_CONSTANT_COLOR; break;
+ case NV097_SET_BLEND_FUNC_DFACTOR_V_CONSTANT_ALPHA:
+ factor = NV_PGRAPH_BLEND_DFACTOR_CONSTANT_ALPHA; break;
+ case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_CONSTANT_ALPHA:
+ factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_CONSTANT_ALPHA; break;
+ default:
+ NV2A_DPRINTF("Unknown blend destination factor: 0x%08x\n", parameter);
+ return; /* discard */
+ }
+ PG_SET_MASK(NV_PGRAPH_BLEND, NV_PGRAPH_BLEND_DFACTOR, factor);
+}
+
+DEF_METHOD(NV097, SET_BLEND_COLOR)
+{
+ pgraph_reg_w(pg, NV_PGRAPH_BLENDCOLOR, parameter);
+}
+
+DEF_METHOD(NV097, SET_BLEND_EQUATION)
+{
+ unsigned int equation;
+ switch (parameter) {
+ case NV097_SET_BLEND_EQUATION_V_FUNC_SUBTRACT:
+ equation = 0; break;
+ case NV097_SET_BLEND_EQUATION_V_FUNC_REVERSE_SUBTRACT:
+ equation = 1; break;
+ case NV097_SET_BLEND_EQUATION_V_FUNC_ADD:
+ equation = 2; break;
+ case NV097_SET_BLEND_EQUATION_V_MIN:
+ equation = 3; break;
+ case NV097_SET_BLEND_EQUATION_V_MAX:
+ equation = 4; break;
+ case NV097_SET_BLEND_EQUATION_V_FUNC_REVERSE_SUBTRACT_SIGNED:
+ equation = 5; break;
+ case NV097_SET_BLEND_EQUATION_V_FUNC_ADD_SIGNED:
+ equation = 6; break;
+ default:
+ NV2A_DPRINTF("Unknown blend equation: 0x%08x\n", parameter);
+ return; /* discard */
+ }
+ PG_SET_MASK(NV_PGRAPH_BLEND, NV_PGRAPH_BLEND_EQN, equation);
+}
+
+DEF_METHOD(NV097, SET_DEPTH_FUNC)
+{
+ PG_SET_MASK(NV_PGRAPH_CONTROL_0, NV_PGRAPH_CONTROL_0_ZFUNC,
+ parameter & 0xF);
+}
+
+DEF_METHOD(NV097, SET_COLOR_MASK)
+{
+ pg->surface_color.write_enabled_cache |= pgraph_color_write_enabled(pg);
+
+ bool alpha = parameter & NV097_SET_COLOR_MASK_ALPHA_WRITE_ENABLE;
+ bool red = parameter & NV097_SET_COLOR_MASK_RED_WRITE_ENABLE;
+ bool green = parameter & NV097_SET_COLOR_MASK_GREEN_WRITE_ENABLE;
+ bool blue = parameter & NV097_SET_COLOR_MASK_BLUE_WRITE_ENABLE;
+ PG_SET_MASK(NV_PGRAPH_CONTROL_0,
+ NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE, alpha);
+ PG_SET_MASK(NV_PGRAPH_CONTROL_0,
+ NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE, red);
+ PG_SET_MASK(NV_PGRAPH_CONTROL_0,
+ NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE, green);
+ PG_SET_MASK(NV_PGRAPH_CONTROL_0,
+ NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE, blue);
+}
+
+DEF_METHOD(NV097, SET_DEPTH_MASK)
+{
+ pg->surface_zeta.write_enabled_cache |= pgraph_zeta_write_enabled(pg);
+
+ PG_SET_MASK(NV_PGRAPH_CONTROL_0,
+ NV_PGRAPH_CONTROL_0_ZWRITEENABLE, parameter);
+}
+
+DEF_METHOD(NV097, SET_STENCIL_MASK)
+{
+ PG_SET_MASK(NV_PGRAPH_CONTROL_1,
+ NV_PGRAPH_CONTROL_1_STENCIL_MASK_WRITE, parameter);
+}
+
+DEF_METHOD(NV097, SET_STENCIL_FUNC)
+{
+ PG_SET_MASK(NV_PGRAPH_CONTROL_1,
+ NV_PGRAPH_CONTROL_1_STENCIL_FUNC, parameter & 0xF);
+}
+
+DEF_METHOD(NV097, SET_STENCIL_FUNC_REF)
+{
+ PG_SET_MASK(NV_PGRAPH_CONTROL_1,
+ NV_PGRAPH_CONTROL_1_STENCIL_REF, parameter);
+}
+
+DEF_METHOD(NV097, SET_STENCIL_FUNC_MASK)
+{
+ PG_SET_MASK(NV_PGRAPH_CONTROL_1,
+ NV_PGRAPH_CONTROL_1_STENCIL_MASK_READ, parameter);
+}
+
+static unsigned int kelvin_map_stencil_op(uint32_t parameter)
+{
+ unsigned int op;
+ switch (parameter) {
+ case NV097_SET_STENCIL_OP_V_KEEP:
+ op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_KEEP; break;
+ case NV097_SET_STENCIL_OP_V_ZERO:
+ op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_ZERO; break;
+ case NV097_SET_STENCIL_OP_V_REPLACE:
+ op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_REPLACE; break;
+ case NV097_SET_STENCIL_OP_V_INCRSAT:
+ op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INCRSAT; break;
+ case NV097_SET_STENCIL_OP_V_DECRSAT:
+ op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_DECRSAT; break;
+ case NV097_SET_STENCIL_OP_V_INVERT:
+ op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INVERT; break;
+ case NV097_SET_STENCIL_OP_V_INCR:
+ op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INCR; break;
+ case NV097_SET_STENCIL_OP_V_DECR:
+ op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_DECR; break;
+ default:
+ assert(false);
+ break;
+ }
+ return op;
+}
+
+DEF_METHOD(NV097, SET_STENCIL_OP_FAIL)
+{
+ PG_SET_MASK(NV_PGRAPH_CONTROL_2,
+ NV_PGRAPH_CONTROL_2_STENCIL_OP_FAIL,
+ kelvin_map_stencil_op(parameter));
+}
+
+DEF_METHOD(NV097, SET_STENCIL_OP_ZFAIL)
+{
+ PG_SET_MASK(NV_PGRAPH_CONTROL_2,
+ NV_PGRAPH_CONTROL_2_STENCIL_OP_ZFAIL,
+ kelvin_map_stencil_op(parameter));
+}
+
+DEF_METHOD(NV097, SET_STENCIL_OP_ZPASS)
+{
+ PG_SET_MASK(NV_PGRAPH_CONTROL_2,
+ NV_PGRAPH_CONTROL_2_STENCIL_OP_ZPASS,
+ kelvin_map_stencil_op(parameter));
+}
+
+DEF_METHOD(NV097, SET_SHADE_MODE)
+{
+ switch (parameter) {
+ case NV097_SET_SHADE_MODE_V_FLAT:
+ PG_SET_MASK(NV_PGRAPH_CONTROL_3, NV_PGRAPH_CONTROL_3_SHADEMODE,
+ NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT);
+ break;
+ case NV097_SET_SHADE_MODE_V_SMOOTH:
+ PG_SET_MASK(NV_PGRAPH_CONTROL_3, NV_PGRAPH_CONTROL_3_SHADEMODE,
+ NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH);
+ break;
+ default:
+ /* Discard */
+ break;
+ }
+}
+
+DEF_METHOD(NV097, SET_POLYGON_OFFSET_SCALE_FACTOR)
+{
+ pgraph_reg_w(pg, NV_PGRAPH_ZOFFSETFACTOR, parameter);
+}
+
+DEF_METHOD(NV097, SET_POLYGON_OFFSET_BIAS)
+{
+ pgraph_reg_w(pg, NV_PGRAPH_ZOFFSETBIAS, parameter);
+}
+
+static unsigned int kelvin_map_polygon_mode(uint32_t parameter)
+{
+ unsigned int mode;
+ switch (parameter) {
+ case NV097_SET_FRONT_POLYGON_MODE_V_POINT:
+ mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_POINT; break;
+ case NV097_SET_FRONT_POLYGON_MODE_V_LINE:
+ mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_LINE; break;
+ case NV097_SET_FRONT_POLYGON_MODE_V_FILL:
+ mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_FILL; break;
+ default:
+ assert(false);
+ break;
+ }
+ return mode;
+}
+
+DEF_METHOD(NV097, SET_FRONT_POLYGON_MODE)
+{
+ PG_SET_MASK(NV_PGRAPH_SETUPRASTER,
+ NV_PGRAPH_SETUPRASTER_FRONTFACEMODE,
+ kelvin_map_polygon_mode(parameter));
+}
+
+DEF_METHOD(NV097, SET_BACK_POLYGON_MODE)
+{
+ PG_SET_MASK(NV_PGRAPH_SETUPRASTER,
+ NV_PGRAPH_SETUPRASTER_BACKFACEMODE,
+ kelvin_map_polygon_mode(parameter));
+}
+
+DEF_METHOD(NV097, SET_CLIP_MIN)
+{
+ pgraph_reg_w(pg, NV_PGRAPH_ZCLIPMIN, parameter);
+}
+
+DEF_METHOD(NV097, SET_CLIP_MAX)
+{
+ pgraph_reg_w(pg, NV_PGRAPH_ZCLIPMAX, parameter);
+}
+
+DEF_METHOD(NV097, SET_CULL_FACE)
+{
+ unsigned int face;
+ switch (parameter) {
+ case NV097_SET_CULL_FACE_V_FRONT:
+ face = NV_PGRAPH_SETUPRASTER_CULLCTRL_FRONT; break;
+ case NV097_SET_CULL_FACE_V_BACK:
+ face = NV_PGRAPH_SETUPRASTER_CULLCTRL_BACK; break;
+ case NV097_SET_CULL_FACE_V_FRONT_AND_BACK:
+ face = NV_PGRAPH_SETUPRASTER_CULLCTRL_FRONT_AND_BACK; break;
+ default:
+ assert(false);
+ break;
+ }
+ PG_SET_MASK(NV_PGRAPH_SETUPRASTER, NV_PGRAPH_SETUPRASTER_CULLCTRL, face);
+}
+
+DEF_METHOD(NV097, SET_FRONT_FACE)
+{
+ bool ccw;
+ switch (parameter) {
+ case NV097_SET_FRONT_FACE_V_CW:
+ ccw = false; break;
+ case NV097_SET_FRONT_FACE_V_CCW:
+ ccw = true; break;
+ default:
+ NV2A_DPRINTF("Unknown front face: 0x%08x\n", parameter);
+ return; /* discard */
+ }
+ PG_SET_MASK(NV_PGRAPH_SETUPRASTER, NV_PGRAPH_SETUPRASTER_FRONTFACE,
+ ccw ? 1 : 0);
+}
+
+DEF_METHOD(NV097, SET_NORMALIZATION_ENABLE)
+{
+ PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_NORMALIZATION_ENABLE,
+ parameter);
+}
+
+DEF_METHOD_INC(NV097, SET_MATERIAL_EMISSION)
+{
+ int slot = (method - NV097_SET_MATERIAL_EMISSION) / 4;
+ // FIXME: Verify NV_IGRAPH_XF_LTCTXA_CM_COL is correct
+ pg->ltctxa[NV_IGRAPH_XF_LTCTXA_CM_COL][slot] = parameter;
+ pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_CM_COL] = true;
+}
+
+DEF_METHOD(NV097, SET_MATERIAL_ALPHA)
+{
+ pg->material_alpha = *(float*)¶meter;
+}
+
+DEF_METHOD(NV097, SET_LIGHT_ENABLE_MASK)
+{
+ PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_LIGHTS, parameter);
+}
+
+static unsigned int kelvin_map_texgen(uint32_t parameter, unsigned int channel)
+{
+ assert(channel < 4);
+ unsigned int texgen;
+ switch (parameter) {
+ case NV097_SET_TEXGEN_S_DISABLE:
+ texgen = NV_PGRAPH_CSV1_A_T0_S_DISABLE; break;
+ case NV097_SET_TEXGEN_S_EYE_LINEAR:
+ texgen = NV_PGRAPH_CSV1_A_T0_S_EYE_LINEAR; break;
+ case NV097_SET_TEXGEN_S_OBJECT_LINEAR:
+ texgen = NV_PGRAPH_CSV1_A_T0_S_OBJECT_LINEAR; break;
+ case NV097_SET_TEXGEN_S_SPHERE_MAP:
+ assert(channel < 2);
+ texgen = NV_PGRAPH_CSV1_A_T0_S_SPHERE_MAP; break;
+ case NV097_SET_TEXGEN_S_REFLECTION_MAP:
+ assert(channel < 3);
+ texgen = NV_PGRAPH_CSV1_A_T0_S_REFLECTION_MAP; break;
+ case NV097_SET_TEXGEN_S_NORMAL_MAP:
+ assert(channel < 3);
+ texgen = NV_PGRAPH_CSV1_A_T0_S_NORMAL_MAP; break;
+ default:
+ assert(false);
+ break;
+ }
+ return texgen;
+}
+
+DEF_METHOD(NV097, SET_TEXGEN_S)
+{
+ int slot = (method - NV097_SET_TEXGEN_S) / 16;
+ unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A
+ : NV_PGRAPH_CSV1_B;
+ unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_S
+ : NV_PGRAPH_CSV1_A_T0_S;
+ PG_SET_MASK(reg, mask, kelvin_map_texgen(parameter, 0));
+}
+
+DEF_METHOD(NV097, SET_TEXGEN_T)
+{
+ int slot = (method - NV097_SET_TEXGEN_T) / 16;
+ unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A
+ : NV_PGRAPH_CSV1_B;
+ unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_T
+ : NV_PGRAPH_CSV1_A_T0_T;
+ PG_SET_MASK(reg, mask, kelvin_map_texgen(parameter, 1));
+}
+
+DEF_METHOD(NV097, SET_TEXGEN_R)
+{
+ int slot = (method - NV097_SET_TEXGEN_R) / 16;
+ unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A
+ : NV_PGRAPH_CSV1_B;
+ unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_R
+ : NV_PGRAPH_CSV1_A_T0_R;
+ PG_SET_MASK(reg, mask, kelvin_map_texgen(parameter, 2));
+}
+
+DEF_METHOD(NV097, SET_TEXGEN_Q)
+{
+ int slot = (method - NV097_SET_TEXGEN_Q) / 16;
+ unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A
+ : NV_PGRAPH_CSV1_B;
+ unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_Q
+ : NV_PGRAPH_CSV1_A_T0_Q;
+ PG_SET_MASK(reg, mask, kelvin_map_texgen(parameter, 3));
+}
+
+DEF_METHOD_INC(NV097, SET_TEXTURE_MATRIX_ENABLE)
+{
+ int slot = (method - NV097_SET_TEXTURE_MATRIX_ENABLE) / 4;
+ pg->texture_matrix_enable[slot] = parameter;
+}
+
+DEF_METHOD(NV097, SET_POINT_SIZE)
+{
+ PG_SET_MASK(NV_PGRAPH_POINTSIZE, NV097_SET_POINT_SIZE_V, parameter);
+}
+
+DEF_METHOD_INC(NV097, SET_PROJECTION_MATRIX)
+{
+ int slot = (method - NV097_SET_PROJECTION_MATRIX) / 4;
+ // pg->projection_matrix[slot] = *(float*)¶meter;
+ unsigned int row = NV_IGRAPH_XF_XFCTX_PMAT0 + slot/4;
+ pg->vsh_constants[row][slot%4] = parameter;
+ pg->vsh_constants_dirty[row] = true;
+}
+
+DEF_METHOD_INC(NV097, SET_MODEL_VIEW_MATRIX)
+{
+ int slot = (method - NV097_SET_MODEL_VIEW_MATRIX) / 4;
+ unsigned int matnum = slot / 16;
+ unsigned int entry = slot % 16;
+ unsigned int row = NV_IGRAPH_XF_XFCTX_MMAT0 + matnum*8 + entry/4;
+ pg->vsh_constants[row][entry % 4] = parameter;
+ pg->vsh_constants_dirty[row] = true;
+}
+
+DEF_METHOD_INC(NV097, SET_INVERSE_MODEL_VIEW_MATRIX)
+{
+ int slot = (method - NV097_SET_INVERSE_MODEL_VIEW_MATRIX) / 4;
+ unsigned int matnum = slot / 16;
+ unsigned int entry = slot % 16;
+ unsigned int row = NV_IGRAPH_XF_XFCTX_IMMAT0 + matnum*8 + entry/4;
+ pg->vsh_constants[row][entry % 4] = parameter;
+ pg->vsh_constants_dirty[row] = true;
+}
+
+DEF_METHOD_INC(NV097, SET_COMPOSITE_MATRIX)
+{
+ int slot = (method - NV097_SET_COMPOSITE_MATRIX) / 4;
+ unsigned int row = NV_IGRAPH_XF_XFCTX_CMAT0 + slot/4;
+ pg->vsh_constants[row][slot%4] = parameter;
+ pg->vsh_constants_dirty[row] = true;
+}
+
+DEF_METHOD_INC(NV097, SET_TEXTURE_MATRIX)
+{
+ int slot = (method - NV097_SET_TEXTURE_MATRIX) / 4;
+ unsigned int tex = slot / 16;
+ unsigned int entry = slot % 16;
+ unsigned int row = NV_IGRAPH_XF_XFCTX_T0MAT + tex*8 + entry/4;
+ pg->vsh_constants[row][entry%4] = parameter;
+ pg->vsh_constants_dirty[row] = true;
+}
+
+DEF_METHOD_INC(NV097, SET_FOG_PARAMS)
+{
+ int slot = (method - NV097_SET_FOG_PARAMS) / 4;
+ if (slot < 2) {
+ pgraph_reg_w(pg, NV_PGRAPH_FOGPARAM0 + slot*4, parameter);
+ } else {
+ /* FIXME: No idea where slot = 2 is */
+ }
+
+ pg->ltctxa[NV_IGRAPH_XF_LTCTXA_FOG_K][slot] = parameter;
+ pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_FOG_K] = true;
+}
+
+/* Handles NV097_SET_TEXGEN_PLANE_S,T,R,Q */
+DEF_METHOD_INC(NV097, SET_TEXGEN_PLANE_S)
+{
+ int slot = (method - NV097_SET_TEXGEN_PLANE_S) / 4;
+ unsigned int tex = slot / 16;
+ unsigned int entry = slot % 16;
+ unsigned int row = NV_IGRAPH_XF_XFCTX_TG0MAT + tex*8 + entry/4;
+ pg->vsh_constants[row][entry%4] = parameter;
+ pg->vsh_constants_dirty[row] = true;
+}
+
+DEF_METHOD(NV097, SET_TEXGEN_VIEW_MODEL)
+{
+ PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_TEXGEN_REF,
+ parameter);
+}
+
+DEF_METHOD_INC(NV097, SET_FOG_PLANE)
+{
+ int slot = (method - NV097_SET_FOG_PLANE) / 4;
+ pg->vsh_constants[NV_IGRAPH_XF_XFCTX_FOG][slot] = parameter;
+ pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_FOG] = true;
+}
+
+DEF_METHOD_INC(NV097, SET_SCENE_AMBIENT_COLOR)
+{
+ int slot = (method - NV097_SET_SCENE_AMBIENT_COLOR) / 4;
+ // ??
+ pg->ltctxa[NV_IGRAPH_XF_LTCTXA_FR_AMB][slot] = parameter;
+ pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_FR_AMB] = true;
+}
+
+DEF_METHOD_INC(NV097, SET_VIEWPORT_OFFSET)
+{
+ int slot = (method - NV097_SET_VIEWPORT_OFFSET) / 4;
+ pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][slot] = parameter;
+ pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_VPOFF] = true;
+}
+
+DEF_METHOD_INC(NV097, SET_POINT_PARAMS)
+{
+ int slot = (method - NV097_SET_POINT_PARAMS) / 4;
+ pg->point_params[slot] = *(float *)¶meter; /* FIXME: Where? */
+}
+
+DEF_METHOD_INC(NV097, SET_EYE_POSITION)
+{
+ int slot = (method - NV097_SET_EYE_POSITION) / 4;
+ pg->vsh_constants[NV_IGRAPH_XF_XFCTX_EYEP][slot] = parameter;
+ pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_EYEP] = true;
+}
+
+DEF_METHOD_INC(NV097, SET_COMBINER_FACTOR0)
+{
+ int slot = (method - NV097_SET_COMBINER_FACTOR0) / 4;
+ pgraph_reg_w(pg, NV_PGRAPH_COMBINEFACTOR0 + slot*4, parameter);
+}
+
+DEF_METHOD_INC(NV097, SET_COMBINER_FACTOR1)
+{
+ int slot = (method - NV097_SET_COMBINER_FACTOR1) / 4;
+ pgraph_reg_w(pg, NV_PGRAPH_COMBINEFACTOR1 + slot*4, parameter);
+}
+
+DEF_METHOD_INC(NV097, SET_COMBINER_ALPHA_OCW)
+{
+ int slot = (method - NV097_SET_COMBINER_ALPHA_OCW) / 4;
+ pgraph_reg_w(pg, NV_PGRAPH_COMBINEALPHAO0 + slot*4, parameter);
+}
+
+DEF_METHOD_INC(NV097, SET_COMBINER_COLOR_ICW)
+{
+ int slot = (method - NV097_SET_COMBINER_COLOR_ICW) / 4;
+ pgraph_reg_w(pg, NV_PGRAPH_COMBINECOLORI0 + slot*4, parameter);
+}
+
+DEF_METHOD_INC(NV097, SET_VIEWPORT_SCALE)
+{
+ int slot = (method - NV097_SET_VIEWPORT_SCALE) / 4;
+ pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPSCL][slot] = parameter;
+ pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_VPSCL] = true;
+}
+
+DEF_METHOD_INC(NV097, SET_TRANSFORM_PROGRAM)
+{
+ int slot = (method - NV097_SET_TRANSFORM_PROGRAM) / 4;
+
+ int program_load = PG_GET_MASK(NV_PGRAPH_CHEOPS_OFFSET,
+ NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR);
+
+ assert(program_load < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH);
+ pg->program_data[program_load][slot%4] = parameter;
+ pg->program_data_dirty = true;
+
+ if (slot % 4 == 3) {
+ PG_SET_MASK(NV_PGRAPH_CHEOPS_OFFSET,
+ NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR, program_load+1);
+ }
+}
+
+DEF_METHOD_INC(NV097, SET_TRANSFORM_CONSTANT)
+{
+ int slot = (method - NV097_SET_TRANSFORM_CONSTANT) / 4;
+ int const_load = PG_GET_MASK(NV_PGRAPH_CHEOPS_OFFSET,
+ NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR);
+
+ assert(const_load < NV2A_VERTEXSHADER_CONSTANTS);
+ // VertexShaderConstant *constant = &pg->constants[const_load];
+ pg->vsh_constants_dirty[const_load] |=
+ (parameter != pg->vsh_constants[const_load][slot%4]);
+ pg->vsh_constants[const_load][slot%4] = parameter;
+
+ if (slot % 4 == 3) {
+ PG_SET_MASK(NV_PGRAPH_CHEOPS_OFFSET,
+ NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR, const_load+1);
+ }
+}
+
+DEF_METHOD_INC(NV097, SET_VERTEX3F)
+{
+ int slot = (method - NV097_SET_VERTEX3F) / 4;
+ VertexAttribute *attribute =
+ &pg->vertex_attributes[NV2A_VERTEX_ATTR_POSITION];
+ pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_POSITION);
+ attribute->inline_value[slot] = *(float*)¶meter;
+ attribute->inline_value[3] = 1.0f;
+ if (slot == 2) {
+ pgraph_finish_inline_buffer_vertex(pg);
+ }
+}
+
+/* Handles NV097_SET_BACK_LIGHT_* */
+DEF_METHOD_INC(NV097, SET_BACK_LIGHT_AMBIENT_COLOR)
+{
+ int slot = (method - NV097_SET_BACK_LIGHT_AMBIENT_COLOR) / 4;
+ unsigned int part = NV097_SET_BACK_LIGHT_AMBIENT_COLOR / 4 + slot % 16;
+ slot /= 16; /* [Light index] */
+ assert(slot < 8);
+ switch(part * 4) {
+ case NV097_SET_BACK_LIGHT_AMBIENT_COLOR ...
+ NV097_SET_BACK_LIGHT_AMBIENT_COLOR + 8:
+ part -= NV097_SET_BACK_LIGHT_AMBIENT_COLOR / 4;
+ pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_BAMB + slot*6][part] = parameter;
+ pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_BAMB + slot*6] = true;
+ break;
+ case NV097_SET_BACK_LIGHT_DIFFUSE_COLOR ...
+ NV097_SET_BACK_LIGHT_DIFFUSE_COLOR + 8:
+ part -= NV097_SET_BACK_LIGHT_DIFFUSE_COLOR / 4;
+ pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_BDIF + slot*6][part] = parameter;
+ pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_BDIF + slot*6] = true;
+ break;
+ case NV097_SET_BACK_LIGHT_SPECULAR_COLOR ...
+ NV097_SET_BACK_LIGHT_SPECULAR_COLOR + 8:
+ part -= NV097_SET_BACK_LIGHT_SPECULAR_COLOR / 4;
+ pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_BSPC + slot*6][part] = parameter;
+ pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_BSPC + slot*6] = true;
+ break;
+ default:
+ assert(false);
+ break;
+ }
+}
+
+/* Handles all the light source props except for NV097_SET_BACK_LIGHT_* */
+DEF_METHOD_INC(NV097, SET_LIGHT_AMBIENT_COLOR)
+{
+ int slot = (method - NV097_SET_LIGHT_AMBIENT_COLOR) / 4;
+ unsigned int part = NV097_SET_LIGHT_AMBIENT_COLOR / 4 + slot % 32;
+ slot /= 32; /* [Light index] */
+ assert(slot < 8);
+ switch(part * 4) {
+ case NV097_SET_LIGHT_AMBIENT_COLOR ...
+ NV097_SET_LIGHT_AMBIENT_COLOR + 8:
+ part -= NV097_SET_LIGHT_AMBIENT_COLOR / 4;
+ pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_AMB + slot*6][part] = parameter;
+ pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_AMB + slot*6] = true;
+ break;
+ case NV097_SET_LIGHT_DIFFUSE_COLOR ...
+ NV097_SET_LIGHT_DIFFUSE_COLOR + 8:
+ part -= NV097_SET_LIGHT_DIFFUSE_COLOR / 4;
+ pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_DIF + slot*6][part] = parameter;
+ pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_DIF + slot*6] = true;
+ break;
+ case NV097_SET_LIGHT_SPECULAR_COLOR ...
+ NV097_SET_LIGHT_SPECULAR_COLOR + 8:
+ part -= NV097_SET_LIGHT_SPECULAR_COLOR / 4;
+ pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_SPC + slot*6][part] = parameter;
+ pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_SPC + slot*6] = true;
+ break;
+ case NV097_SET_LIGHT_LOCAL_RANGE:
+ pg->ltc1[NV_IGRAPH_XF_LTC1_r0 + slot][0] = parameter;
+ pg->ltc1_dirty[NV_IGRAPH_XF_LTC1_r0 + slot] = true;
+ break;
+ case NV097_SET_LIGHT_INFINITE_HALF_VECTOR ...
+ NV097_SET_LIGHT_INFINITE_HALF_VECTOR + 8:
+ part -= NV097_SET_LIGHT_INFINITE_HALF_VECTOR / 4;
+ pg->light_infinite_half_vector[slot][part] = *(float*)¶meter;
+ break;
+ case NV097_SET_LIGHT_INFINITE_DIRECTION ...
+ NV097_SET_LIGHT_INFINITE_DIRECTION + 8:
+ part -= NV097_SET_LIGHT_INFINITE_DIRECTION / 4;
+ pg->light_infinite_direction[slot][part] = *(float*)¶meter;
+ break;
+ case NV097_SET_LIGHT_SPOT_FALLOFF ...
+ NV097_SET_LIGHT_SPOT_FALLOFF + 8:
+ part -= NV097_SET_LIGHT_SPOT_FALLOFF / 4;
+ pg->ltctxa[NV_IGRAPH_XF_LTCTXA_L0_K + slot*2][part] = parameter;
+ pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_L0_K + slot*2] = true;
+ break;
+ case NV097_SET_LIGHT_SPOT_DIRECTION ...
+ NV097_SET_LIGHT_SPOT_DIRECTION + 12:
+ part -= NV097_SET_LIGHT_SPOT_DIRECTION / 4;
+ pg->ltctxa[NV_IGRAPH_XF_LTCTXA_L0_SPT + slot*2][part] = parameter;
+ pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_L0_SPT + slot*2] = true;
+ break;
+ case NV097_SET_LIGHT_LOCAL_POSITION ...
+ NV097_SET_LIGHT_LOCAL_POSITION + 8:
+ part -= NV097_SET_LIGHT_LOCAL_POSITION / 4;
+ pg->light_local_position[slot][part] = *(float*)¶meter;
+ break;
+ case NV097_SET_LIGHT_LOCAL_ATTENUATION ...
+ NV097_SET_LIGHT_LOCAL_ATTENUATION + 8:
+ part -= NV097_SET_LIGHT_LOCAL_ATTENUATION / 4;
+ pg->light_local_attenuation[slot][part] = *(float*)¶meter;
+ break;
+ default:
+ assert(false);
+ break;
+ }
+}
+
+DEF_METHOD_INC(NV097, SET_VERTEX4F)
+{
+ int slot = (method - NV097_SET_VERTEX4F) / 4;
+ VertexAttribute *attribute =
+ &pg->vertex_attributes[NV2A_VERTEX_ATTR_POSITION];
+ pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_POSITION);
+ attribute->inline_value[slot] = *(float*)¶meter;
+ if (slot == 3) {
+ pgraph_finish_inline_buffer_vertex(pg);
+ }
+}
+
+DEF_METHOD_INC(NV097, SET_NORMAL3S)
+{
+ int slot = (method - NV097_SET_NORMAL3S) / 4;
+ unsigned int part = slot % 2;
+ VertexAttribute *attribute =
+ &pg->vertex_attributes[NV2A_VERTEX_ATTR_NORMAL];
+ pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_NORMAL);
+ int16_t val = parameter & 0xFFFF;
+ attribute->inline_value[part * 2 + 0] = MAX(-1.0f, (float)val / 32767.0f);
+ val = parameter >> 16;
+ attribute->inline_value[part * 2 + 1] = MAX(-1.0f, (float)val / 32767.0f);
+}
+
+#define SET_VERTEX_ATTRIBUTE_4S(command, attr_index) \
+ do { \
+ int slot = (method - (command)) / 4; \
+ unsigned int part = slot % 2; \
+ VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \
+ pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \
+ attribute->inline_value[part * 2 + 0] = \
+ (float)(int16_t)(parameter & 0xFFFF); \
+ attribute->inline_value[part * 2 + 1] = \
+ (float)(int16_t)(parameter >> 16); \
+ } while (0)
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD0_4S)
+{
+ SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD0_4S, NV2A_VERTEX_ATTR_TEXTURE0);
+}
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD1_4S)
+{
+ SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD1_4S, NV2A_VERTEX_ATTR_TEXTURE1);
+}
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD2_4S)
+{
+ SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD2_4S, NV2A_VERTEX_ATTR_TEXTURE2);
+}
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD3_4S)
+{
+ SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD3_4S, NV2A_VERTEX_ATTR_TEXTURE3);
+}
+
+#undef SET_VERTEX_ATTRIBUTE_4S
+
+#define SET_VERTEX_ATRIBUTE_TEX_2S(attr_index) \
+ do { \
+ VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \
+ pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \
+ attribute->inline_value[0] = (float)(int16_t)(parameter & 0xFFFF); \
+ attribute->inline_value[1] = (float)(int16_t)(parameter >> 16); \
+ attribute->inline_value[2] = 0.0f; \
+ attribute->inline_value[3] = 1.0f; \
+ } while (0)
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD0_2S)
+{
+ SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE0);
+}
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD1_2S)
+{
+ SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE1);
+}
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD2_2S)
+{
+ SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE2);
+}
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD3_2S)
+{
+ SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE3);
+}
+
+#undef SET_VERTEX_ATRIBUTE_TEX_2S
+
+#define SET_VERTEX_COLOR_3F(command, attr_index) \
+ do { \
+ int slot = (method - (command)) / 4; \
+ VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \
+ pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \
+ attribute->inline_value[slot] = *(float*)¶meter; \
+ attribute->inline_value[3] = 1.0f; \
+ } while (0)
+
+DEF_METHOD_INC(NV097, SET_DIFFUSE_COLOR3F)
+{
+ SET_VERTEX_COLOR_3F(NV097_SET_DIFFUSE_COLOR3F, NV2A_VERTEX_ATTR_DIFFUSE);
+}
+
+DEF_METHOD_INC(NV097, SET_SPECULAR_COLOR3F)
+{
+ SET_VERTEX_COLOR_3F(NV097_SET_SPECULAR_COLOR3F, NV2A_VERTEX_ATTR_SPECULAR);
+}
+
+#undef SET_VERTEX_COLOR_3F
+
+#define SET_VERTEX_ATTRIBUTE_F(command, attr_index) \
+ do { \
+ int slot = (method - (command)) / 4; \
+ VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \
+ pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \
+ attribute->inline_value[slot] = *(float*)¶meter; \
+ } while (0)
+
+DEF_METHOD_INC(NV097, SET_NORMAL3F)
+{
+ SET_VERTEX_ATTRIBUTE_F(NV097_SET_NORMAL3F, NV2A_VERTEX_ATTR_NORMAL);
+}
+
+DEF_METHOD_INC(NV097, SET_DIFFUSE_COLOR4F)
+{
+ SET_VERTEX_ATTRIBUTE_F(NV097_SET_DIFFUSE_COLOR4F, NV2A_VERTEX_ATTR_DIFFUSE);
+}
+
+DEF_METHOD_INC(NV097, SET_SPECULAR_COLOR4F)
+{
+ SET_VERTEX_ATTRIBUTE_F(NV097_SET_SPECULAR_COLOR4F,
+ NV2A_VERTEX_ATTR_SPECULAR);
+}
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD0_4F)
+{
+ SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD0_4F, NV2A_VERTEX_ATTR_TEXTURE0);
+}
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD1_4F)
+{
+ SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD1_4F, NV2A_VERTEX_ATTR_TEXTURE1);
+}
+
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD2_4F)
+{
+ SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD2_4F, NV2A_VERTEX_ATTR_TEXTURE2);
+}
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD3_4F)
+{
+ SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD3_4F, NV2A_VERTEX_ATTR_TEXTURE3);
+}
+
+#undef SET_VERTEX_ATTRIBUTE_F
+
+#define SET_VERTEX_ATRIBUTE_TEX_2F(command, attr_index) \
+ do { \
+ int slot = (method - (command)) / 4; \
+ VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \
+ pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \
+ attribute->inline_value[slot] = *(float*)¶meter; \
+ attribute->inline_value[2] = 0.0f; \
+ attribute->inline_value[3] = 1.0f; \
+ } while (0)
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD0_2F)
+{
+ SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD0_2F,
+ NV2A_VERTEX_ATTR_TEXTURE0);
+}
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD1_2F)
+{
+ SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD1_2F,
+ NV2A_VERTEX_ATTR_TEXTURE1);
+}
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD2_2F)
+{
+ SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD2_2F,
+ NV2A_VERTEX_ATTR_TEXTURE2);
+}
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD3_2F)
+{
+ SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD3_2F,
+ NV2A_VERTEX_ATTR_TEXTURE3);
+}
+
+#undef SET_VERTEX_ATRIBUTE_TEX_2F
+
+#define SET_VERTEX_ATTRIBUTE_4UB(command, attr_index) \
+ do { \
+ VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \
+ pgraph_allocate_inline_buffer_vertices(pg, (attr_index)); \
+ attribute->inline_value[0] = (parameter & 0xFF) / 255.0f; \
+ attribute->inline_value[1] = ((parameter >> 8) & 0xFF) / 255.0f; \
+ attribute->inline_value[2] = ((parameter >> 16) & 0xFF) / 255.0f; \
+ attribute->inline_value[3] = ((parameter >> 24) & 0xFF) / 255.0f; \
+ } while (0)
+
+DEF_METHOD_INC(NV097, SET_DIFFUSE_COLOR4UB)
+{
+ SET_VERTEX_ATTRIBUTE_4UB(NV097_SET_DIFFUSE_COLOR4UB,
+ NV2A_VERTEX_ATTR_DIFFUSE);
+}
+
+DEF_METHOD_INC(NV097, SET_SPECULAR_COLOR4UB)
+{
+ SET_VERTEX_ATTRIBUTE_4UB(NV097_SET_SPECULAR_COLOR4UB,
+ NV2A_VERTEX_ATTR_SPECULAR);
+}
+
+#undef SET_VERTEX_ATTRIBUTE_4UB
+
+DEF_METHOD_INC(NV097, SET_VERTEX_DATA_ARRAY_FORMAT)
+{
+ int slot = (method - NV097_SET_VERTEX_DATA_ARRAY_FORMAT) / 4;
+ VertexAttribute *attr = &pg->vertex_attributes[slot];
+ attr->format = GET_MASK(parameter, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE);
+ attr->count = GET_MASK(parameter, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_SIZE);
+ attr->stride = GET_MASK(parameter,
+ NV097_SET_VERTEX_DATA_ARRAY_FORMAT_STRIDE);
+
+ NV2A_DPRINTF("vertex data array format=%d, count=%d, stride=%d\n",
+ attr->format, attr->count, attr->stride);
+
+ switch (attr->format) {
+ case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D:
+ attr->size = 1;
+ assert(attr->count == 4);
+ break;
+ case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL:
+ attr->size = 1;
+ break;
+ case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1:
+ attr->size = 2;
+ break;
+ case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F:
+ attr->size = 4;
+ break;
+ case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K:
+ attr->size = 2;
+ break;
+ case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP:
+ /* 3 signed, normalized components packed in 32-bits. (11,11,10) */
+ attr->size = 4;
+ assert(attr->count == 1);
+ break;
+ default:
+ fprintf(stderr, "Unknown vertex type: 0x%x\n", attr->format);
+ assert(false);
+ break;
+ }
+
+ if (attr->format == NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP) {
+ pg->compressed_attrs |= (1 << slot);
+ } else {
+ pg->compressed_attrs &= ~(1 << slot);
+ }
+}
+
+DEF_METHOD_INC(NV097, SET_VERTEX_DATA_ARRAY_OFFSET)
+{
+ int slot = (method - NV097_SET_VERTEX_DATA_ARRAY_OFFSET) / 4;
+
+ pg->vertex_attributes[slot].dma_select = parameter & 0x80000000;
+ pg->vertex_attributes[slot].offset = parameter & 0x7fffffff;
+}
+
+DEF_METHOD(NV097, SET_LOGIC_OP_ENABLE)
+{
+ PG_SET_MASK(NV_PGRAPH_BLEND, NV_PGRAPH_BLEND_LOGICOP_ENABLE,
+ parameter);
+}
+
+DEF_METHOD(NV097, SET_LOGIC_OP)
+{
+ PG_SET_MASK(NV_PGRAPH_BLEND, NV_PGRAPH_BLEND_LOGICOP,
+ parameter & 0xF);
+}
+
+DEF_METHOD(NV097, CLEAR_REPORT_VALUE)
+{
+ d->pgraph.renderer->ops.clear_report_value(d);
+}
+
+DEF_METHOD(NV097, SET_ZPASS_PIXEL_COUNT_ENABLE)
+{
+ pg->zpass_pixel_count_enable = parameter;
+}
+
+DEF_METHOD(NV097, GET_REPORT)
+{
+ uint8_t type = GET_MASK(parameter, NV097_GET_REPORT_TYPE);
+ assert(type == NV097_GET_REPORT_TYPE_ZPASS_PIXEL_CNT);
+
+ d->pgraph.renderer->ops.get_report(d, parameter);
+}
+
+DEF_METHOD_INC(NV097, SET_EYE_DIRECTION)
+{
+ int slot = (method - NV097_SET_EYE_DIRECTION) / 4;
+ pg->ltctxa[NV_IGRAPH_XF_LTCTXA_EYED][slot] = parameter;
+ pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_EYED] = true;
+}
+
+DEF_METHOD(NV097, SET_BEGIN_END)
+{
+ if (parameter == NV097_SET_BEGIN_END_OP_END) {
+ if (pg->primitive_mode == PRIM_TYPE_INVALID) {
+ NV2A_DPRINTF("End without Begin!\n");
+ }
+ nv2a_profile_inc_counter(NV2A_PROF_BEGIN_ENDS);
+ d->pgraph.renderer->ops.draw_end(d);
+ pgraph_reset_inline_buffers(pg);
+ pg->primitive_mode = PRIM_TYPE_INVALID;
+ } else {
+ if (pg->primitive_mode != PRIM_TYPE_INVALID) {
+ NV2A_DPRINTF("Begin without End!\n");
+ }
+ assert(parameter <= NV097_SET_BEGIN_END_OP_POLYGON);
+ pg->primitive_mode = parameter;
+ pgraph_reset_inline_buffers(pg);
+ d->pgraph.renderer->ops.draw_begin(d);
+ }
+}
+
+DEF_METHOD(NV097, SET_TEXTURE_OFFSET)
+{
+ int slot = (method - NV097_SET_TEXTURE_OFFSET) / 64;
+ pgraph_reg_w(pg, NV_PGRAPH_TEXOFFSET0 + slot * 4, parameter);
+ pg->texture_dirty[slot] = true;
+}
+
+DEF_METHOD(NV097, SET_TEXTURE_FORMAT)
+{
+ int slot = (method - NV097_SET_TEXTURE_FORMAT) / 64;
+
+ bool dma_select =
+ GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_CONTEXT_DMA) == 2;
+ bool cubemap =
+ GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_CUBEMAP_ENABLE);
+ unsigned int border_source =
+ GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BORDER_SOURCE);
+ unsigned int dimensionality =
+ GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_DIMENSIONALITY);
+ unsigned int color_format =
+ GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_COLOR);
+ unsigned int levels =
+ GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_MIPMAP_LEVELS);
+ unsigned int log_width =
+ GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BASE_SIZE_U);
+ unsigned int log_height =
+ GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BASE_SIZE_V);
+ unsigned int log_depth =
+ GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BASE_SIZE_P);
+
+ unsigned int reg = NV_PGRAPH_TEXFMT0 + slot * 4;
+ PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_CONTEXT_DMA, dma_select);
+ PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE, cubemap);
+ PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_BORDER_SOURCE, border_source);
+ PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_DIMENSIONALITY, dimensionality);
+ PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_COLOR, color_format);
+ PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_MIPMAP_LEVELS, levels);
+ PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_BASE_SIZE_U, log_width);
+ PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_BASE_SIZE_V, log_height);
+ PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_BASE_SIZE_P, log_depth);
+
+ pg->texture_dirty[slot] = true;
+}
+
+DEF_METHOD(NV097, SET_TEXTURE_CONTROL0)
+{
+ int slot = (method - NV097_SET_TEXTURE_CONTROL0) / 64;
+ pgraph_reg_w(pg, NV_PGRAPH_TEXCTL0_0 + slot*4, parameter);
+ pg->texture_dirty[slot] = true;
+}
+
+DEF_METHOD(NV097, SET_TEXTURE_CONTROL1)
+{
+ int slot = (method - NV097_SET_TEXTURE_CONTROL1) / 64;
+ pgraph_reg_w(pg, NV_PGRAPH_TEXCTL1_0 + slot*4, parameter);
+ pg->texture_dirty[slot] = true;
+}
+
+DEF_METHOD(NV097, SET_TEXTURE_FILTER)
+{
+ int slot = (method - NV097_SET_TEXTURE_FILTER) / 64;
+ pgraph_reg_w(pg, NV_PGRAPH_TEXFILTER0 + slot * 4, parameter);
+ pg->texture_dirty[slot] = true;
+}
+
+DEF_METHOD(NV097, SET_TEXTURE_IMAGE_RECT)
+{
+ int slot = (method - NV097_SET_TEXTURE_IMAGE_RECT) / 64;
+ pgraph_reg_w(pg, NV_PGRAPH_TEXIMAGERECT0 + slot * 4, parameter);
+ pg->texture_dirty[slot] = true;
+}
+
+DEF_METHOD(NV097, SET_TEXTURE_PALETTE)
+{
+ int slot = (method - NV097_SET_TEXTURE_PALETTE) / 64;
+
+ bool dma_select =
+ GET_MASK(parameter, NV097_SET_TEXTURE_PALETTE_CONTEXT_DMA) == 1;
+ unsigned int length =
+ GET_MASK(parameter, NV097_SET_TEXTURE_PALETTE_LENGTH);
+ unsigned int offset =
+ GET_MASK(parameter, NV097_SET_TEXTURE_PALETTE_OFFSET);
+
+ unsigned int reg = NV_PGRAPH_TEXPALETTE0 + slot * 4;
+ PG_SET_MASK(reg, NV_PGRAPH_TEXPALETTE0_CONTEXT_DMA, dma_select);
+ PG_SET_MASK(reg, NV_PGRAPH_TEXPALETTE0_LENGTH, length);
+ PG_SET_MASK(reg, NV_PGRAPH_TEXPALETTE0_OFFSET, offset);
+
+ pg->texture_dirty[slot] = true;
+}
+
+DEF_METHOD(NV097, SET_TEXTURE_BORDER_COLOR)
+{
+ int slot = (method - NV097_SET_TEXTURE_BORDER_COLOR) / 64;
+ pgraph_reg_w(pg, NV_PGRAPH_BORDERCOLOR0 + slot * 4, parameter);
+}
+
+DEF_METHOD(NV097, SET_TEXTURE_SET_BUMP_ENV_MAT)
+{
+ int slot = (method - NV097_SET_TEXTURE_SET_BUMP_ENV_MAT) / 4;
+ if (slot < 16) {
+ /* discard */
+ return;
+ }
+
+ slot -= 16;
+ const int swizzle[4] = { NV_PGRAPH_BUMPMAT00, NV_PGRAPH_BUMPMAT01,
+ NV_PGRAPH_BUMPMAT11, NV_PGRAPH_BUMPMAT10 };
+ pgraph_reg_w(pg, swizzle[slot % 4] + slot / 4, parameter);
+}
+
+DEF_METHOD(NV097, SET_TEXTURE_SET_BUMP_ENV_SCALE)
+{
+ int slot = (method - NV097_SET_TEXTURE_SET_BUMP_ENV_SCALE) / 64;
+ if (slot == 0) {
+ /* discard */
+ return;
+ }
+
+ slot--;
+ pgraph_reg_w(pg, NV_PGRAPH_BUMPSCALE1 + slot * 4, parameter);
+}
+
+DEF_METHOD(NV097, SET_TEXTURE_SET_BUMP_ENV_OFFSET)
+{
+ int slot = (method - NV097_SET_TEXTURE_SET_BUMP_ENV_OFFSET) / 64;
+ if (slot == 0) {
+ /* discard */
+ return;
+ }
+
+ slot--;
+ pgraph_reg_w(pg, NV_PGRAPH_BUMPOFFSET1 + slot * 4, parameter);
+}
+
+static void pgraph_expand_draw_arrays(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ uint32_t start = pg->draw_arrays_start[pg->draw_arrays_length - 1];
+ uint32_t count = pg->draw_arrays_count[pg->draw_arrays_length - 1];
+
+ /* Render any previously squashed DRAW_ARRAYS calls. This case would be
+ * triggered if a set of BEGIN+DA+END triplets is followed by the
+ * BEGIN+DA+ARRAY_ELEMENT+... chain that caused this expansion. */
+ if (pg->draw_arrays_length > 1) {
+ d->pgraph.renderer->ops.flush_draw(d);
+ pgraph_reset_inline_buffers(pg);
+ }
+ assert((pg->inline_elements_length + count) < NV2A_MAX_BATCH_LENGTH);
+ for (unsigned int i = 0; i < count; i++) {
+ pg->inline_elements[pg->inline_elements_length++] = start + i;
+ }
+
+ pgraph_reset_draw_arrays(pg);
+}
+
+void pgraph_check_within_begin_end_block(PGRAPHState *pg)
+{
+ if (pg->primitive_mode == PRIM_TYPE_INVALID) {
+ NV2A_DPRINTF("Vertex data being sent outside of begin/end block!\n");
+ }
+}
+
+DEF_METHOD_NON_INC(NV097, ARRAY_ELEMENT16)
+{
+ pgraph_check_within_begin_end_block(pg);
+
+ if (pg->draw_arrays_length) {
+ pgraph_expand_draw_arrays(d);
+ }
+
+ assert(pg->inline_elements_length < NV2A_MAX_BATCH_LENGTH);
+ pg->inline_elements[pg->inline_elements_length++] = parameter & 0xFFFF;
+ pg->inline_elements[pg->inline_elements_length++] = parameter >> 16;
+}
+
+DEF_METHOD_NON_INC(NV097, ARRAY_ELEMENT32)
+{
+ pgraph_check_within_begin_end_block(pg);
+
+ if (pg->draw_arrays_length) {
+ pgraph_expand_draw_arrays(d);
+ }
+
+ assert(pg->inline_elements_length < NV2A_MAX_BATCH_LENGTH);
+ pg->inline_elements[pg->inline_elements_length++] = parameter;
+}
+
+DEF_METHOD(NV097, DRAW_ARRAYS)
+{
+ pgraph_check_within_begin_end_block(pg);
+
+ int32_t start = GET_MASK(parameter, NV097_DRAW_ARRAYS_START_INDEX);
+ int32_t count = GET_MASK(parameter, NV097_DRAW_ARRAYS_COUNT) + 1;
+
+ if (pg->inline_elements_length) {
+ /* FIXME: Determine HW behavior for overflow case. */
+ assert((pg->inline_elements_length + count) < NV2A_MAX_BATCH_LENGTH);
+ assert(!pg->draw_arrays_prevent_connect);
+
+ for (unsigned int i = 0; i < count; i++) {
+ pg->inline_elements[pg->inline_elements_length++] = start + i;
+ }
+ return;
+ }
+
+ pg->draw_arrays_min_start = MIN(pg->draw_arrays_min_start, start);
+ pg->draw_arrays_max_count = MAX(pg->draw_arrays_max_count, start + count);
+
+ assert(pg->draw_arrays_length < ARRAY_SIZE(pg->draw_arrays_start));
+
+ /* Attempt to connect contiguous primitives */
+ if (!pg->draw_arrays_prevent_connect && pg->draw_arrays_length > 0) {
+ unsigned int last_start =
+ pg->draw_arrays_start[pg->draw_arrays_length - 1];
+ int32_t *last_count =
+ &pg->draw_arrays_count[pg->draw_arrays_length - 1];
+ if (start == (last_start + *last_count)) {
+ *last_count += count;
+ return;
+ }
+ }
+
+ pg->draw_arrays_start[pg->draw_arrays_length] = start;
+ pg->draw_arrays_count[pg->draw_arrays_length] = count;
+ pg->draw_arrays_length++;
+ pg->draw_arrays_prevent_connect = false;
+}
+
+DEF_METHOD_NON_INC(NV097, INLINE_ARRAY)
+{
+ pgraph_check_within_begin_end_block(pg);
+ assert(pg->inline_array_length < NV2A_MAX_BATCH_LENGTH);
+ pg->inline_array[pg->inline_array_length++] = parameter;
+}
+
+DEF_METHOD_INC(NV097, SET_EYE_VECTOR)
+{
+ int slot = (method - NV097_SET_EYE_VECTOR) / 4;
+ pgraph_reg_w(pg, NV_PGRAPH_EYEVEC0 + slot * 4, parameter);
+}
+
+DEF_METHOD_INC(NV097, SET_VERTEX_DATA2F_M)
+{
+ int slot = (method - NV097_SET_VERTEX_DATA2F_M) / 4;
+ unsigned int part = slot % 2;
+ slot /= 2;
+ VertexAttribute *attribute = &pg->vertex_attributes[slot];
+ pgraph_allocate_inline_buffer_vertices(pg, slot);
+ attribute->inline_value[part] = *(float*)¶meter;
+ /* FIXME: Should these really be set to 0.0 and 1.0 ? Conditions? */
+ attribute->inline_value[2] = 0.0;
+ attribute->inline_value[3] = 1.0;
+ if ((slot == 0) && (part == 1)) {
+ pgraph_finish_inline_buffer_vertex(pg);
+ }
+}
+
+DEF_METHOD_INC(NV097, SET_VERTEX_DATA4F_M)
+{
+ int slot = (method - NV097_SET_VERTEX_DATA4F_M) / 4;
+ unsigned int part = slot % 4;
+ slot /= 4;
+ VertexAttribute *attribute = &pg->vertex_attributes[slot];
+ pgraph_allocate_inline_buffer_vertices(pg, slot);
+ attribute->inline_value[part] = *(float*)¶meter;
+ if ((slot == 0) && (part == 3)) {
+ pgraph_finish_inline_buffer_vertex(pg);
+ }
+}
+
+DEF_METHOD_INC(NV097, SET_VERTEX_DATA2S)
+{
+ int slot = (method - NV097_SET_VERTEX_DATA2S) / 4;
+ VertexAttribute *attribute = &pg->vertex_attributes[slot];
+ pgraph_allocate_inline_buffer_vertices(pg, slot);
+ attribute->inline_value[0] = (float)(int16_t)(parameter & 0xFFFF);
+ attribute->inline_value[1] = (float)(int16_t)(parameter >> 16);
+ attribute->inline_value[2] = 0.0;
+ attribute->inline_value[3] = 1.0;
+ if (slot == 0) {
+ pgraph_finish_inline_buffer_vertex(pg);
+ }
+}
+
+DEF_METHOD_INC(NV097, SET_VERTEX_DATA4UB)
+{
+ int slot = (method - NV097_SET_VERTEX_DATA4UB) / 4;
+ VertexAttribute *attribute = &pg->vertex_attributes[slot];
+ pgraph_allocate_inline_buffer_vertices(pg, slot);
+ attribute->inline_value[0] = (parameter & 0xFF) / 255.0;
+ attribute->inline_value[1] = ((parameter >> 8) & 0xFF) / 255.0;
+ attribute->inline_value[2] = ((parameter >> 16) & 0xFF) / 255.0;
+ attribute->inline_value[3] = ((parameter >> 24) & 0xFF) / 255.0;
+ if (slot == 0) {
+ pgraph_finish_inline_buffer_vertex(pg);
+ }
+}
+
+DEF_METHOD_INC(NV097, SET_VERTEX_DATA4S_M)
+{
+ int slot = (method - NV097_SET_VERTEX_DATA4S_M) / 4;
+ unsigned int part = slot % 2;
+ slot /= 2;
+ VertexAttribute *attribute = &pg->vertex_attributes[slot];
+ pgraph_allocate_inline_buffer_vertices(pg, slot);
+
+ attribute->inline_value[part * 2 + 0] = (float)(int16_t)(parameter & 0xFFFF);
+ attribute->inline_value[part * 2 + 1] = (float)(int16_t)(parameter >> 16);
+ if ((slot == 0) && (part == 1)) {
+ pgraph_finish_inline_buffer_vertex(pg);
+ }
+}
+
+DEF_METHOD(NV097, SET_SEMAPHORE_OFFSET)
+{
+ pgraph_reg_w(pg, NV_PGRAPH_SEMAPHOREOFFSET, parameter);
+}
+
+DEF_METHOD(NV097, BACK_END_WRITE_SEMAPHORE_RELEASE)
+{
+ d->pgraph.renderer->ops.surface_update(d, false, true, true);
+
+ //qemu_mutex_unlock(&d->pgraph.lock);
+ //qemu_mutex_lock_iothread();
+
+ uint32_t semaphore_offset = pgraph_reg_r(pg, NV_PGRAPH_SEMAPHOREOFFSET);
+
+ hwaddr semaphore_dma_len;
+ uint8_t *semaphore_data = (uint8_t*)nv_dma_map(d, pg->dma_semaphore,
+ &semaphore_dma_len);
+ assert(semaphore_offset < semaphore_dma_len);
+ semaphore_data += semaphore_offset;
+
+ stl_le_p((uint32_t*)semaphore_data, parameter);
+
+ //qemu_mutex_lock(&d->pgraph.lock);
+ //qemu_mutex_unlock_iothread();
+}
+
+DEF_METHOD(NV097, SET_ZMIN_MAX_CONTROL)
+{
+ switch (GET_MASK(parameter, NV097_SET_ZMIN_MAX_CONTROL_ZCLAMP_EN)) {
+ case NV097_SET_ZMIN_MAX_CONTROL_ZCLAMP_EN_CULL:
+ PG_SET_MASK(NV_PGRAPH_ZCOMPRESSOCCLUDE,
+ NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN,
+ NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CULL);
+ break;
+ case NV097_SET_ZMIN_MAX_CONTROL_ZCLAMP_EN_CLAMP:
+ PG_SET_MASK(NV_PGRAPH_ZCOMPRESSOCCLUDE,
+ NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN,
+ NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP);
+ break;
+ default:
+ /* FIXME: Should raise NV_PGRAPH_NSOURCE_DATA_ERROR_PENDING */
+ assert(!"Invalid zclamp value");
+ break;
+ }
+}
+
+DEF_METHOD(NV097, SET_ANTI_ALIASING_CONTROL)
+{
+ PG_SET_MASK(NV_PGRAPH_ANTIALIASING, NV_PGRAPH_ANTIALIASING_ENABLE,
+ GET_MASK(parameter, NV097_SET_ANTI_ALIASING_CONTROL_ENABLE));
+ // FIXME: Handle the remaining bits (observed values 0xFFFF0000, 0xFFFF0001)
+}
+
+DEF_METHOD(NV097, SET_ZSTENCIL_CLEAR_VALUE)
+{
+ pgraph_reg_w(pg, NV_PGRAPH_ZSTENCILCLEARVALUE, parameter);
+}
+
+DEF_METHOD(NV097, SET_COLOR_CLEAR_VALUE)
+{
+ pgraph_reg_w(pg, NV_PGRAPH_COLORCLEARVALUE, parameter);
+}
+
+DEF_METHOD(NV097, CLEAR_SURFACE)
+{
+ d->pgraph.renderer->ops.clear_surface(d, parameter);
+}
+
+DEF_METHOD(NV097, SET_CLEAR_RECT_HORIZONTAL)
+{
+ pgraph_reg_w(pg, NV_PGRAPH_CLEARRECTX, parameter);
+}
+
+DEF_METHOD(NV097, SET_CLEAR_RECT_VERTICAL)
+{
+ pgraph_reg_w(pg, NV_PGRAPH_CLEARRECTY, parameter);
+}
+
+DEF_METHOD_INC(NV097, SET_SPECULAR_FOG_FACTOR)
+{
+ int slot = (method - NV097_SET_SPECULAR_FOG_FACTOR) / 4;
+ pgraph_reg_w(pg, NV_PGRAPH_SPECFOGFACTOR0 + slot*4, parameter);
+}
+
+DEF_METHOD(NV097, SET_SHADER_CLIP_PLANE_MODE)
+{
+ pgraph_reg_w(pg, NV_PGRAPH_SHADERCLIPMODE, parameter);
+}
+
+DEF_METHOD_INC(NV097, SET_COMBINER_COLOR_OCW)
+{
+ int slot = (method - NV097_SET_COMBINER_COLOR_OCW) / 4;
+ pgraph_reg_w(pg, NV_PGRAPH_COMBINECOLORO0 + slot*4, parameter);
+}
+
+DEF_METHOD(NV097, SET_COMBINER_CONTROL)
+{
+ pgraph_reg_w(pg, NV_PGRAPH_COMBINECTL, parameter);
+}
+
+DEF_METHOD(NV097, SET_SHADOW_ZSLOPE_THRESHOLD)
+{
+ pgraph_reg_w(pg, NV_PGRAPH_SHADOWZSLOPETHRESHOLD, parameter);
+ assert(parameter == 0x7F800000); /* FIXME: Unimplemented */
+}
+
+DEF_METHOD(NV097, SET_SHADOW_DEPTH_FUNC)
+{
+ PG_SET_MASK(NV_PGRAPH_SHADOWCTL, NV_PGRAPH_SHADOWCTL_SHADOW_ZFUNC,
+ parameter);
+}
+
+DEF_METHOD(NV097, SET_SHADER_STAGE_PROGRAM)
+{
+ pgraph_reg_w(pg, NV_PGRAPH_SHADERPROG, parameter);
+}
+
+DEF_METHOD(NV097, SET_DOT_RGBMAPPING)
+{
+ PG_SET_MASK(NV_PGRAPH_SHADERCTL, 0xFFF,
+ GET_MASK(parameter, 0xFFF));
+}
+
+DEF_METHOD(NV097, SET_SHADER_OTHER_STAGE_INPUT)
+{
+ PG_SET_MASK(NV_PGRAPH_SHADERCTL, 0xFFFF000,
+ GET_MASK(parameter, 0xFFFF000));
+}
+
+DEF_METHOD_INC(NV097, SET_TRANSFORM_DATA)
+{
+ int slot = (method - NV097_SET_TRANSFORM_DATA) / 4;
+ pg->vertex_state_shader_v0[slot] = parameter;
+}
+
+DEF_METHOD(NV097, LAUNCH_TRANSFORM_PROGRAM)
+{
+ unsigned int program_start = parameter;
+ assert(program_start < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH);
+ Nv2aVshProgram program;
+ Nv2aVshParseResult result = nv2a_vsh_parse_program(
+ &program,
+ pg->program_data[program_start],
+ NV2A_MAX_TRANSFORM_PROGRAM_LENGTH - program_start);
+ assert(result == NV2AVPR_SUCCESS);
+
+ Nv2aVshCPUXVSSExecutionState state_linkage;
+ Nv2aVshExecutionState state = nv2a_vsh_emu_initialize_xss_execution_state(
+ &state_linkage, (float*)pg->vsh_constants);
+ memcpy(state_linkage.input_regs, pg->vertex_state_shader_v0, sizeof(pg->vertex_state_shader_v0));
+
+ nv2a_vsh_emu_execute_track_context_writes(&state, &program, pg->vsh_constants_dirty);
+
+ nv2a_vsh_program_destroy(&program);
+}
+
+DEF_METHOD(NV097, SET_TRANSFORM_EXECUTION_MODE)
+{
+ PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_MODE,
+ GET_MASK(parameter,
+ NV097_SET_TRANSFORM_EXECUTION_MODE_MODE));
+ PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_RANGE_MODE,
+ GET_MASK(parameter,
+ NV097_SET_TRANSFORM_EXECUTION_MODE_RANGE_MODE));
+}
+
+DEF_METHOD(NV097, SET_TRANSFORM_PROGRAM_CXT_WRITE_EN)
+{
+ pg->enable_vertex_program_write = parameter;
+}
+
+DEF_METHOD(NV097, SET_TRANSFORM_PROGRAM_LOAD)
+{
+ assert(parameter < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH);
+ PG_SET_MASK(NV_PGRAPH_CHEOPS_OFFSET,
+ NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR, parameter);
+}
+
+DEF_METHOD(NV097, SET_TRANSFORM_PROGRAM_START)
+{
+ assert(parameter < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH);
+ PG_SET_MASK(NV_PGRAPH_CSV0_C,
+ NV_PGRAPH_CSV0_C_CHEOPS_PROGRAM_START, parameter);
+}
+
+DEF_METHOD(NV097, SET_TRANSFORM_CONSTANT_LOAD)
+{
+ assert(parameter < NV2A_VERTEXSHADER_CONSTANTS);
+ PG_SET_MASK(NV_PGRAPH_CHEOPS_OFFSET,
+ NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR, parameter);
+}
+
+void pgraph_get_clear_color(PGRAPHState *pg, float rgba[4])
+{
+ uint32_t clear_color = pgraph_reg_r(pg, NV_PGRAPH_COLORCLEARVALUE);
+
+ float *r = &rgba[0], *g = &rgba[1], *b = &rgba[2], *a = &rgba[3];
+
+ /* Handle RGB */
+ switch(pg->surface_shape.color_format) {
+ case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5:
+ case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_O1R5G5B5:
+ *r = ((clear_color >> 10) & 0x1F) / 31.0f;
+ *g = ((clear_color >> 5) & 0x1F) / 31.0f;
+ *b = (clear_color & 0x1F) / 31.0f;
+ break;
+ case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5:
+ *r = ((clear_color >> 11) & 0x1F) / 31.0f;
+ *g = ((clear_color >> 5) & 0x3F) / 63.0f;
+ *b = (clear_color & 0x1F) / 31.0f;
+ break;
+ case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8:
+ case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_O8R8G8B8:
+ case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_Z1A7R8G8B8:
+ case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_O1A7R8G8B8:
+ case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8:
+ *r = ((clear_color >> 16) & 0xFF) / 255.0f;
+ *g = ((clear_color >> 8) & 0xFF) / 255.0f;
+ *b = (clear_color & 0xFF) / 255.0f;
+ break;
+ case NV097_SET_SURFACE_FORMAT_COLOR_LE_B8:
+ case NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8:
+ /* Xbox D3D doesn't support clearing those */
+ default:
+ *r = 1.0f;
+ *g = 0.0f;
+ *b = 1.0f;
+ fprintf(stderr, "CLEAR_SURFACE for color_format 0x%x unsupported",
+ pg->surface_shape.color_format);
+ assert(false);
+ break;
+ }
+
+ /* Handle alpha */
+ switch(pg->surface_shape.color_format) {
+ /* FIXME: CLEAR_SURFACE seems to work like memset, so maybe we
+ * also have to clear non-alpha bits with alpha value?
+ * As GL doesn't own those pixels we'd have to do this on
+ * our own in xbox memory.
+ */
+ case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_Z1A7R8G8B8:
+ case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_O1A7R8G8B8:
+ *a = ((clear_color >> 24) & 0x7F) / 127.0f;
+ assert(false); /* Untested */
+ break;
+ case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8:
+ *a = ((clear_color >> 24) & 0xFF) / 255.0f;
+ break;
+ default:
+ *a = 1.0f;
+ break;
+ }
+}
+
+void pgraph_get_clear_depth_stencil_value(PGRAPHState *pg, float *depth,
+ int *stencil)
+{
+ uint32_t clear_zstencil =
+ pgraph_reg_r(pg, NV_PGRAPH_ZSTENCILCLEARVALUE);
+ *stencil = 0;
+ *depth = 1.0;
+
+ switch (pg->surface_shape.zeta_format) {
+ case NV097_SET_SURFACE_FORMAT_ZETA_Z16: {
+ uint16_t z = clear_zstencil & 0xFFFF;
+ /* FIXME: Remove bit for stencil clear? */
+ if (pg->surface_shape.z_format) {
+ *depth = convert_f16_to_float(z) / f16_max;
+ } else {
+ *depth = z / (float)0xFFFF;
+ }
+ break;
+ }
+ case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8: {
+ *stencil = clear_zstencil & 0xFF;
+ uint32_t z = clear_zstencil >> 8;
+ if (pg->surface_shape.z_format) {
+ *depth = convert_f24_to_float(z) / f24_max;
+ } else {
+ *depth = z / (float)0xFFFFFF;
+ }
+ break;
+ }
+ default:
+ fprintf(stderr, "Unknown zeta surface format: 0x%x\n",
+ pg->surface_shape.zeta_format);
+ assert(false);
+ break;
+ }
+}
+
+void pgraph_write_zpass_pixel_cnt_report(NV2AState *d, uint32_t parameter,
+ uint32_t result)
+{
+ PGRAPHState *pg = &d->pgraph;
+
+ uint64_t timestamp = 0x0011223344556677; /* FIXME: Update timestamp?! */
+ uint32_t done = 0; // FIXME: Check
+
+ hwaddr report_dma_len;
+ uint8_t *report_data =
+ (uint8_t *)nv_dma_map(d, pg->dma_report, &report_dma_len);
+
+ hwaddr offset = GET_MASK(parameter, NV097_GET_REPORT_OFFSET);
+ assert(offset < report_dma_len);
+ report_data += offset;
+
+ stq_le_p((uint64_t *)&report_data[0], timestamp);
+ stl_le_p((uint32_t *)&report_data[8], result);
+ stl_le_p((uint32_t *)&report_data[12], done);
+
+ NV2A_DPRINTF("Report result %d @%" HWADDR_PRIx, result, offset);
+}
diff --git a/hw/xbox/nv2a/pgraph/pgraph.h b/hw/xbox/nv2a/pgraph/pgraph.h
new file mode 100644
index 0000000000..799e879c06
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/pgraph.h
@@ -0,0 +1,383 @@
+/*
+ * QEMU Geforce NV2A PGRAPH internal definitions
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_H
+#define HW_XBOX_NV2A_PGRAPH_H
+
+#include "xemu-config.h"
+#include "qemu/osdep.h"
+#include "qemu/bitmap.h"
+#include "qemu/units.h"
+#include "qemu/thread.h"
+#include "cpu.h"
+
+#include "shaders.h"
+#include "surface.h"
+#include "util.h"
+
+typedef struct NV2AState NV2AState;
+typedef struct PGRAPHNullState PGRAPHNullState;
+typedef struct PGRAPHGLState PGRAPHGLState;
+typedef struct PGRAPHVkState PGRAPHVkState;
+
+typedef struct VertexAttribute {
+ bool dma_select;
+ hwaddr offset;
+
+ /* inline arrays are packed in order?
+ * Need to pass the offset to converted attributes */
+ unsigned int inline_array_offset;
+
+ float inline_value[4];
+
+ unsigned int format;
+ unsigned int size; /* size of the data type */
+ unsigned int count; /* number of components */
+ uint32_t stride;
+
+ bool needs_conversion;
+
+ float *inline_buffer;
+ bool inline_buffer_populated;
+} VertexAttribute;
+
+typedef struct Surface {
+ bool draw_dirty;
+ bool buffer_dirty;
+ bool write_enabled_cache;
+ unsigned int pitch;
+
+ hwaddr offset;
+} Surface;
+
+typedef struct KelvinState {
+ hwaddr object_instance;
+} KelvinState;
+
+typedef struct ContextSurfaces2DState {
+ hwaddr object_instance;
+ hwaddr dma_image_source;
+ hwaddr dma_image_dest;
+ unsigned int color_format;
+ unsigned int source_pitch, dest_pitch;
+ hwaddr source_offset, dest_offset;
+} ContextSurfaces2DState;
+
+typedef struct ImageBlitState {
+ hwaddr object_instance;
+ hwaddr context_surfaces;
+ unsigned int operation;
+ unsigned int in_x, in_y;
+ unsigned int out_x, out_y;
+ unsigned int width, height;
+} ImageBlitState;
+
+typedef struct BetaState {
+ hwaddr object_instance;
+ uint32_t beta;
+} BetaState;
+
+typedef struct PGRAPHRenderer {
+ CONFIG_DISPLAY_RENDERER type;
+ const char *name;
+ struct {
+ void (*early_context_init)(void);
+ void (*init)(NV2AState *d);
+ void (*init_thread)(NV2AState *d);
+ void (*finalize)(NV2AState *d);
+ void (*clear_report_value)(NV2AState *d);
+ void (*clear_surface)(NV2AState *d, uint32_t parameter);
+ void (*draw_begin)(NV2AState *d);
+ void (*draw_end)(NV2AState *d);
+ void (*flip_stall)(NV2AState *d);
+ void (*flush_draw)(NV2AState *d);
+ void (*get_report)(NV2AState *d, uint32_t parameter);
+ void (*image_blit)(NV2AState *d);
+ void (*pre_savevm_trigger)(NV2AState *d);
+ void (*pre_savevm_wait)(NV2AState *d);
+ void (*pre_shutdown_trigger)(NV2AState *d);
+ void (*pre_shutdown_wait)(NV2AState *d);
+ void (*process_pending)(NV2AState *d);
+ void (*process_pending_reports)(NV2AState *d);
+ void (*surface_flush)(NV2AState *d);
+ void (*surface_update)(NV2AState *d, bool upload, bool color_write, bool zeta_write);
+ void (*set_surface_scale_factor)(NV2AState *d, unsigned int scale);
+ unsigned int (*get_surface_scale_factor)(NV2AState *d);
+ int (*get_framebuffer_surface)(NV2AState *d);
+ } ops;
+} PGRAPHRenderer;
+
+typedef struct PGRAPHState {
+ QemuMutex lock;
+
+ uint32_t pending_interrupts;
+ uint32_t enabled_interrupts;
+
+ int frame_time;
+ int draw_time;
+
+ /* subchannels state we're not sure the location of... */
+ ContextSurfaces2DState context_surfaces_2d;
+ ImageBlitState image_blit;
+ KelvinState kelvin;
+ BetaState beta;
+
+ hwaddr dma_color, dma_zeta;
+ Surface surface_color, surface_zeta;
+ unsigned int surface_type;
+ SurfaceShape surface_shape;
+ SurfaceShape last_surface_shape;
+
+ struct {
+ int clip_x;
+ int clip_width;
+ int clip_y;
+ int clip_height;
+ int width;
+ int height;
+ } surface_binding_dim; // FIXME: Refactor
+
+ hwaddr dma_a, dma_b;
+ bool texture_dirty[NV2A_MAX_TEXTURES];
+
+ bool texture_matrix_enable[NV2A_MAX_TEXTURES];
+
+ hwaddr dma_state;
+ hwaddr dma_notifies;
+ hwaddr dma_semaphore;
+
+ hwaddr dma_report;
+ hwaddr report_offset;
+ bool zpass_pixel_count_enable;
+
+ hwaddr dma_vertex_a, dma_vertex_b;
+
+ uint32_t primitive_mode;
+
+ bool enable_vertex_program_write; // FIXME: Not used anywhere???
+
+ uint32_t vertex_state_shader_v0[4];
+ uint32_t program_data[NV2A_MAX_TRANSFORM_PROGRAM_LENGTH][VSH_TOKEN_SIZE];
+ bool program_data_dirty;
+
+ uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];
+ bool vsh_constants_dirty[NV2A_VERTEXSHADER_CONSTANTS];
+
+ /* lighting constant arrays */
+ uint32_t ltctxa[NV2A_LTCTXA_COUNT][4];
+ bool ltctxa_dirty[NV2A_LTCTXA_COUNT];
+ uint32_t ltctxb[NV2A_LTCTXB_COUNT][4];
+ bool ltctxb_dirty[NV2A_LTCTXB_COUNT];
+ uint32_t ltc1[NV2A_LTC1_COUNT][4];
+ bool ltc1_dirty[NV2A_LTC1_COUNT];
+
+ float material_alpha;
+
+ // should figure out where these are in lighting context
+ float light_infinite_half_vector[NV2A_MAX_LIGHTS][3];
+ float light_infinite_direction[NV2A_MAX_LIGHTS][3];
+ float light_local_position[NV2A_MAX_LIGHTS][3];
+ float light_local_attenuation[NV2A_MAX_LIGHTS][3];
+
+ float point_params[8];
+
+ VertexAttribute vertex_attributes[NV2A_VERTEXSHADER_ATTRIBUTES];
+ uint16_t compressed_attrs;
+ uint16_t uniform_attrs;
+ uint16_t swizzle_attrs;
+
+ unsigned int inline_array_length;
+ uint32_t inline_array[NV2A_MAX_BATCH_LENGTH];
+
+ unsigned int inline_elements_length;
+ uint32_t inline_elements[NV2A_MAX_BATCH_LENGTH];
+
+ unsigned int inline_buffer_length;
+
+ unsigned int draw_arrays_length;
+ unsigned int draw_arrays_min_start;
+ unsigned int draw_arrays_max_count;
+ /* FIXME: Unknown size, possibly endless, 1250 will do for now */
+ /* Keep in sync with size used in nv2a.c */
+ int32_t draw_arrays_start[1250];
+ int32_t draw_arrays_count[1250];
+ bool draw_arrays_prevent_connect;
+
+ uint32_t regs_[0x2000];
+ DECLARE_BITMAP(regs_dirty, 0x2000 / sizeof(uint32_t));
+
+ bool clearing;
+ bool waiting_for_nop;
+ bool waiting_for_flip;
+ bool waiting_for_context_switch;
+
+ bool flush_pending;
+ QemuEvent flush_complete;
+
+ bool sync_pending;
+ QemuEvent sync_complete;
+
+ unsigned int surface_scale_factor;
+ uint8_t *scale_buf;
+
+ const PGRAPHRenderer *renderer;
+ union {
+ PGRAPHNullState *null_renderer_state;
+ PGRAPHGLState *gl_renderer_state;
+ PGRAPHVkState *vk_renderer_state;
+ };
+} PGRAPHState;
+
+void pgraph_init(NV2AState *d);
+void pgraph_init_thread(NV2AState *d);
+void pgraph_destroy(PGRAPHState *pg);
+void pgraph_context_switch(NV2AState *d, unsigned int channel_id);
+int pgraph_method(NV2AState *d, unsigned int subchannel, unsigned int method,
+ uint32_t parameter, uint32_t *parameters,
+ size_t num_words_available, size_t max_lookahead_words,
+ bool inc);
+void pgraph_check_within_begin_end_block(PGRAPHState *pg);
+
+void *pfifo_thread(void *arg);
+void pfifo_kick(NV2AState *d);
+
+void pgraph_renderer_register(const PGRAPHRenderer *renderer);
+
+// FIXME: Move from here
+
+extern NV2AState *g_nv2a;
+
+// FIXME: Add new function pgraph_is_texture_sampler_active()
+
+static inline uint32_t pgraph_reg_r(PGRAPHState *pg, unsigned int r)
+{
+ assert(r % 4 == 0);
+ return pg->regs_[r];
+}
+
+static inline void pgraph_reg_w(PGRAPHState *pg, unsigned int r, uint32_t v)
+{
+ assert(r % 4 == 0);
+ if (pg->regs_[r] != v) {
+ bitmap_set(pg->regs_dirty, r / sizeof(uint32_t), 1);
+ }
+ pg->regs_[r] = v;
+}
+
+void pgraph_clear_dirty_reg_map(PGRAPHState *pg);
+
+static inline bool pgraph_is_reg_dirty(PGRAPHState *pg, unsigned int reg)
+{
+ return test_bit(reg / sizeof(uint32_t), pg->regs_dirty);
+}
+
+static inline bool pgraph_is_texture_stage_active(PGRAPHState *pg, unsigned int stage)
+{
+ assert(stage < NV2A_MAX_TEXTURES);
+ uint32_t mode = (pgraph_reg_r(pg, NV_PGRAPH_SHADERPROG) >> (stage * 5)) & 0x1F;
+ return mode != 0 && mode != 4;// && mode != 0x11 && mode != 0x0a && mode != 0x09 && mode != 5;
+}
+
+static inline bool pgraph_is_texture_enabled(PGRAPHState *pg, int texture_idx)
+{
+ uint32_t ctl_0 = pgraph_reg_r(pg, NV_PGRAPH_TEXCTL0_0 + texture_idx*4);
+ return // pgraph_is_texture_stage_active(pg, texture_idx) &&
+ GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_ENABLE);
+}
+
+static inline bool pgraph_is_texture_format_compressed(PGRAPHState *pg, int color_format)
+{
+ return color_format == NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5 ||
+ color_format == NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8 ||
+ color_format == NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8;
+}
+
+static inline bool pgraph_color_write_enabled(PGRAPHState *pg)
+{
+ return pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & (
+ NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE
+ | NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE
+ | NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE
+ | NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE);
+}
+
+static inline bool pgraph_zeta_write_enabled(PGRAPHState *pg)
+{
+ return pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & (
+ NV_PGRAPH_CONTROL_0_ZWRITEENABLE
+ | NV_PGRAPH_CONTROL_0_STENCIL_WRITE_ENABLE);
+}
+
+static inline void pgraph_apply_anti_aliasing_factor(PGRAPHState *pg,
+ unsigned int *width,
+ unsigned int *height)
+{
+ switch (pg->surface_shape.anti_aliasing) {
+ case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_1:
+ break;
+ case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_CORNER_2:
+ if (width) { *width *= 2; }
+ break;
+ case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_SQUARE_OFFSET_4:
+ if (width) { *width *= 2; }
+ if (height) { *height *= 2; }
+ break;
+ default:
+ assert(false);
+ break;
+ }
+}
+
+static inline void pgraph_apply_scaling_factor(PGRAPHState *pg,
+ unsigned int *width,
+ unsigned int *height)
+{
+ *width *= pg->surface_scale_factor;
+ *height *= pg->surface_scale_factor;
+}
+
+void pgraph_get_clear_color(PGRAPHState *pg, float rgba[4]);
+void pgraph_get_clear_depth_stencil_value(PGRAPHState *pg, float *depth, int *stencil);
+
+/* Vertex */
+void pgraph_allocate_inline_buffer_vertices(PGRAPHState *pg, unsigned int attr);
+void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg);
+void pgraph_reset_inline_buffers(PGRAPHState *pg);
+void pgraph_reset_draw_arrays(PGRAPHState *pg);
+void pgraph_update_inline_value(VertexAttribute *attr, const uint8_t *data);
+
+/* RDI */
+uint32_t pgraph_rdi_read(PGRAPHState *pg, unsigned int select,
+ unsigned int address);
+void pgraph_rdi_write(PGRAPHState *pg, unsigned int select,
+ unsigned int address, uint32_t val);
+
+static inline void pgraph_argb_pack32_to_rgba_float(uint32_t argb, float *rgba)
+{
+ rgba[0] = ((argb >> 16) & 0xFF) / 255.0f; /* red */
+ rgba[1] = ((argb >> 8) & 0xFF) / 255.0f; /* green */
+ rgba[2] = (argb & 0xFF) / 255.0f; /* blue */
+ rgba[3] = ((argb >> 24) & 0xFF) / 255.0f; /* alpha */
+}
+
+void pgraph_write_zpass_pixel_cnt_report(NV2AState *d, uint32_t parameter, uint32_t result);
+
+#endif
diff --git a/hw/xbox/nv2a/pgraph/profile.c b/hw/xbox/nv2a/pgraph/profile.c
new file mode 100644
index 0000000000..69a1b5bfbd
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/profile.c
@@ -0,0 +1,74 @@
+/*
+ * QEMU Geforce NV2A profiling helpers
+ *
+ * Copyright (c) 2020-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "../nv2a_int.h"
+
+NV2AStats g_nv2a_stats;
+
+void nv2a_profile_increment(void)
+{
+ int64_t now = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+ const int64_t fps_update_interval = 250000;
+ g_nv2a_stats.last_flip_time = now;
+
+ static int64_t frame_count = 0;
+ frame_count++;
+
+ static int64_t ts = 0;
+ int64_t delta = now - ts;
+ if (delta >= fps_update_interval) {
+ g_nv2a_stats.increment_fps = frame_count * 1000000 / delta;
+ ts = now;
+ frame_count = 0;
+ }
+}
+
+void nv2a_profile_flip_stall(void)
+{
+ int64_t now = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+ int64_t render_time = (now-g_nv2a_stats.last_flip_time)/1000;
+
+ g_nv2a_stats.frame_working.mspf = render_time;
+ g_nv2a_stats.frame_history[g_nv2a_stats.frame_ptr] =
+ g_nv2a_stats.frame_working;
+ g_nv2a_stats.frame_ptr =
+ (g_nv2a_stats.frame_ptr + 1) % NV2A_PROF_NUM_FRAMES;
+ g_nv2a_stats.frame_count++;
+ memset(&g_nv2a_stats.frame_working, 0, sizeof(g_nv2a_stats.frame_working));
+}
+
+const char *nv2a_profile_get_counter_name(unsigned int cnt)
+{
+ const char *default_names[NV2A_PROF__COUNT] = {
+ #define _X(x) stringify(x),
+ NV2A_PROF_COUNTERS_XMAC
+ #undef _X
+ };
+
+ assert(cnt < NV2A_PROF__COUNT);
+ return default_names[cnt] + 10; /* 'NV2A_PROF_' */
+}
+
+int nv2a_profile_get_counter_value(unsigned int cnt)
+{
+ assert(cnt < NV2A_PROF__COUNT);
+ unsigned int idx = (g_nv2a_stats.frame_ptr + NV2A_PROF_NUM_FRAMES - 1) %
+ NV2A_PROF_NUM_FRAMES;
+ return g_nv2a_stats.frame_history[idx].counters[cnt];
+}
diff --git a/hw/xbox/nv2a/psh.h b/hw/xbox/nv2a/pgraph/psh.h
similarity index 96%
rename from hw/xbox/nv2a/psh.h
rename to hw/xbox/nv2a/pgraph/psh.h
index 65ef4e43a2..6232a2834a 100644
--- a/hw/xbox/nv2a/psh.h
+++ b/hw/xbox/nv2a/pgraph/psh.h
@@ -20,7 +20,8 @@
#ifndef HW_NV2A_PSH_H
#define HW_NV2A_PSH_H
-#include "shaders_common.h"
+#include
+#include
enum PshAlphaFunc {
ALPHA_FUNC_NEVER,
@@ -51,6 +52,8 @@ enum ConvolutionFilter {
};
typedef struct PshState {
+ bool vulkan;
+
/* fragment shader - register combiner stuff */
uint32_t combiner_control;
uint32_t shader_stage_program;
@@ -67,6 +70,7 @@ typedef struct PshState {
bool compare_mode[4][4];
bool alphakill[4];
enum ConvolutionFilter conv_tex[4];
+ bool tex_x8y24[4];
float border_logical_size[4][3];
float border_inv_real_size[4][3];
@@ -82,6 +86,4 @@ typedef struct PshState {
bool smooth_shading;
} PshState;
-MString *psh_translate(const PshState state);
-
#endif
diff --git a/hw/xbox/nv2a/pgraph/rdi.c b/hw/xbox/nv2a/pgraph/rdi.c
new file mode 100644
index 0000000000..297c7a67c0
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/rdi.c
@@ -0,0 +1,60 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "../nv2a_int.h"
+
+uint32_t pgraph_rdi_read(PGRAPHState *pg, unsigned int select,
+ unsigned int address)
+{
+ uint32_t r = 0;
+ switch(select) {
+ case RDI_INDEX_VTX_CONSTANTS0:
+ case RDI_INDEX_VTX_CONSTANTS1:
+ assert((address / 4) < NV2A_VERTEXSHADER_CONSTANTS);
+ r = pg->vsh_constants[address / 4][3 - address % 4];
+ break;
+ default:
+ fprintf(stderr, "nv2a: unknown rdi read select 0x%x address 0x%x\n",
+ select, address);
+ assert(false);
+ break;
+ }
+ return r;
+}
+
+void pgraph_rdi_write(PGRAPHState *pg, unsigned int select,
+ unsigned int address, uint32_t val)
+{
+ switch(select) {
+ case RDI_INDEX_VTX_CONSTANTS0:
+ case RDI_INDEX_VTX_CONSTANTS1:
+ assert(false); /* Untested */
+ assert((address / 4) < NV2A_VERTEXSHADER_CONSTANTS);
+ pg->vsh_constants_dirty[address / 4] |=
+ (val != pg->vsh_constants[address / 4][3 - address % 4]);
+ pg->vsh_constants[address / 4][3 - address % 4] = val;
+ break;
+ default:
+ NV2A_DPRINTF("unknown rdi write select 0x%x, address 0x%x, val 0x%08x\n",
+ select, address, val);
+ break;
+ }
+}
diff --git a/hw/xbox/nv2a/s3tc.c b/hw/xbox/nv2a/pgraph/s3tc.c
similarity index 71%
rename from hw/xbox/nv2a/s3tc.c
rename to hw/xbox/nv2a/pgraph/s3tc.c
index 454cc43aee..affd058e66 100644
--- a/hw/xbox/nv2a/s3tc.c
+++ b/hw/xbox/nv2a/pgraph/s3tc.c
@@ -1,5 +1,5 @@
/*
- * QEMU texture decompression routines
+ * S3TC Texture Decompression
*
* Copyright (c) 2020 Wilhelm Kovatch
*
@@ -25,13 +25,9 @@
#include "qemu/osdep.h"
#include "s3tc.h"
-static inline void decode_bc1_colors(uint16_t c0,
- uint16_t c1,
- uint8_t r[4],
- uint8_t g[4],
- uint8_t b[4],
- uint8_t a[16],
- bool transparent)
+static void decode_bc1_colors(uint16_t c0, uint16_t c1, uint8_t r[4],
+ uint8_t g[4], uint8_t b[4], uint8_t a[16],
+ bool transparent)
{
r[0] = ((c0 & 0xF800) >> 8) * 0xFF / 0xF8,
g[0] = ((c0 & 0x07E0) >> 3) * 0xFF / 0xFC,
@@ -66,15 +62,10 @@ static inline void decode_bc1_colors(uint16_t c0,
}
}
-static inline void write_block_to_texture(uint8_t *converted_data,
- uint32_t indices,
- int i, int j, int width,
- int z_pos_factor,
- uint8_t r[4],
- uint8_t g[4],
- uint8_t b[4],
- uint8_t a[16],
- bool separate_alpha)
+static void write_block_to_texture(uint8_t *converted_data, uint32_t indices,
+ int i, int j, int width, int z_pos_factor,
+ uint8_t r[4], uint8_t g[4], uint8_t b[4],
+ uint8_t a[16], bool separate_alpha)
{
int x0 = i * 4,
y0 = j * 4;
@@ -89,16 +80,18 @@ static inline void write_block_to_texture(uint8_t *converted_data,
int xy_index = y_index + x - x0;
uint8_t index = (indices >> 2 * xy_index) & 0x03;
uint8_t alpha_index = separate_alpha ? xy_index : index;
- uint32_t color = (r[index] << 24) | (g[index] << 16) | (b[index] << 8) | a[alpha_index];
- *(uint32_t*)(converted_data + (z_plus_y_pos_factor + x) * 4) = color;
+ uint8_t *p = converted_data + (z_plus_y_pos_factor + x) * 4;
+ *p++ = r[index];
+ *p++ = g[index];
+ *p++ = b[index];
+ *p++ = a[alpha_index];
}
}
}
-static inline void decompress_dxt1_block(const uint8_t block_data[8],
- uint8_t *converted_data,
- int i, int j, int width,
- int z_pos_factor)
+static void decompress_dxt1_block(const uint8_t block_data[8],
+ uint8_t *converted_data, int i, int j,
+ int width, int z_pos_factor)
{
uint16_t c0 = ((uint16_t*)block_data)[0],
c1 = ((uint16_t*)block_data)[1];
@@ -111,10 +104,9 @@ static inline void decompress_dxt1_block(const uint8_t block_data[8],
r, g, b, a, false);
}
-static inline void decompress_dxt3_block(const uint8_t block_data[16],
- uint8_t *converted_data,
- int i, int j, int width,
- int z_pos_factor)
+static void decompress_dxt3_block(const uint8_t block_data[16],
+ uint8_t *converted_data, int i, int j,
+ int width, int z_pos_factor)
{
uint16_t c0 = ((uint16_t*)block_data)[4],
c1 = ((uint16_t*)block_data)[5];
@@ -132,10 +124,9 @@ static inline void decompress_dxt3_block(const uint8_t block_data[16],
r, g, b, a, true);
}
-static inline void decompress_dxt5_block(const uint8_t block_data[16],
- uint8_t *converted_data,
- int i, int j, int width,
- int z_pos_factor)
+static void decompress_dxt5_block(const uint8_t block_data[16],
+ uint8_t *converted_data, int i, int j,
+ int width, int z_pos_factor)
{
uint16_t c0 = ((uint16_t*)block_data)[4],
c1 = ((uint16_t*)block_data)[5];
@@ -173,11 +164,9 @@ static inline void decompress_dxt5_block(const uint8_t block_data[16],
r, g, b, a, true);
}
-uint8_t *decompress_3d_texture_data(GLint color_format,
- const uint8_t *data,
- unsigned int width,
- unsigned int height,
- unsigned int depth)
+uint8_t *s3tc_decompress_3d(enum S3TC_DECOMPRESS_FORMAT color_format,
+ const uint8_t *data, unsigned int width,
+ unsigned int height, unsigned int depth)
{
assert((width > 0) && (width % 4 == 0));
assert((height > 0) && (height % 4 == 0));
@@ -196,13 +185,13 @@ uint8_t *decompress_3d_texture_data(GLint color_format,
int sub_block_index = block_index * block_depth + slice;
int z_pos_factor = (k * block_depth + slice) * width * height;
- if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
+ if (color_format == S3TC_DECOMPRESS_FORMAT_DXT1) {
decompress_dxt1_block(data + 8 * sub_block_index, converted_data,
i, j, width, z_pos_factor);
- } else if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT3_EXT) {
+ } else if (color_format == S3TC_DECOMPRESS_FORMAT_DXT3) {
decompress_dxt3_block(data + 16 * sub_block_index, converted_data,
i, j, width, z_pos_factor);
- } else if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT5_EXT) {
+ } else if (color_format == S3TC_DECOMPRESS_FORMAT_DXT5) {
decompress_dxt5_block(data + 16 * sub_block_index, converted_data,
i, j, width, z_pos_factor);
} else {
@@ -216,8 +205,9 @@ uint8_t *decompress_3d_texture_data(GLint color_format,
return converted_data;
}
-uint8_t *decompress_2d_texture_data(GLint color_format, const uint8_t *data,
- unsigned int width, unsigned int height)
+uint8_t *s3tc_decompress_2d(enum S3TC_DECOMPRESS_FORMAT color_format,
+ const uint8_t *data, unsigned int width,
+ unsigned int height)
{
assert((width > 0) && (width % 4 == 0));
assert((height > 0) && (height % 4 == 0));
@@ -226,13 +216,13 @@ uint8_t *decompress_2d_texture_data(GLint color_format, const uint8_t *data,
for (int j = 0; j < num_blocks_y; j++) {
for (int i = 0; i < num_blocks_x; i++) {
int block_index = j * num_blocks_x + i;
- if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
+ if (color_format == S3TC_DECOMPRESS_FORMAT_DXT1) {
decompress_dxt1_block(data + 8 * block_index,
converted_data, i, j, width, 0);
- } else if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT3_EXT) {
+ } else if (color_format == S3TC_DECOMPRESS_FORMAT_DXT3) {
decompress_dxt3_block(data + 16 * block_index,
converted_data, i, j, width, 0);
- } else if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT5_EXT) {
+ } else if (color_format == S3TC_DECOMPRESS_FORMAT_DXT5) {
decompress_dxt5_block(data + 16 * block_index,
converted_data, i, j, width, 0);
} else {
diff --git a/hw/xbox/nv2a/s3tc.h b/hw/xbox/nv2a/pgraph/s3tc.h
similarity index 63%
rename from hw/xbox/nv2a/s3tc.h
rename to hw/xbox/nv2a/pgraph/s3tc.h
index 87dad0d3c4..6a10074e74 100644
--- a/hw/xbox/nv2a/s3tc.h
+++ b/hw/xbox/nv2a/pgraph/s3tc.h
@@ -1,5 +1,5 @@
/*
- * QEMU texture decompression routines
+ * S3TC Texture Decompression
*
* Copyright (c) 2020 Wilhelm Kovatch
*
@@ -22,18 +22,23 @@
* THE SOFTWARE.
*/
-#ifndef S3TC_H
-#define S3TC_H
+#ifndef HW_XBOX_NV2A_PGRAPH_S3TC_H
+#define HW_XBOX_NV2A_PGRAPH_S3TC_H
-#include "gl/gloffscreen.h"
+#include
-uint8_t *decompress_3d_texture_data(GLint color_format,
- const uint8_t *data,
- unsigned int width,
- unsigned int height,
- unsigned int depth);
+enum S3TC_DECOMPRESS_FORMAT {
+ S3TC_DECOMPRESS_FORMAT_DXT1,
+ S3TC_DECOMPRESS_FORMAT_DXT3,
+ S3TC_DECOMPRESS_FORMAT_DXT5,
+};
-uint8_t *decompress_2d_texture_data(GLint color_format, const uint8_t *data,
- unsigned int width, unsigned int height);
+uint8_t *s3tc_decompress_3d(enum S3TC_DECOMPRESS_FORMAT color_format,
+ const uint8_t *data, unsigned int width,
+ unsigned int height, unsigned int depth);
+
+uint8_t *s3tc_decompress_2d(enum S3TC_DECOMPRESS_FORMAT color_format,
+ const uint8_t *data, unsigned int width,
+ unsigned int height);
#endif
diff --git a/hw/xbox/nv2a/pgraph/shaders.c b/hw/xbox/nv2a/pgraph/shaders.c
new file mode 100644
index 0000000000..82737b44f4
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/shaders.c
@@ -0,0 +1,295 @@
+/*
+ * Geforce NV2A PGRAPH OpenGL Renderer
+ *
+ * Copyright (c) 2015 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2020-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "hw/xbox/nv2a/debug.h"
+#include "texture.h"
+#include "pgraph.h"
+#include "shaders.h"
+
+ShaderState pgraph_get_shader_state(PGRAPHState *pg)
+{
+ bool vertex_program = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
+ NV_PGRAPH_CSV0_D_MODE) == 2;
+
+ bool fixed_function = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
+ NV_PGRAPH_CSV0_D_MODE) == 0;
+
+ int program_start = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_C),
+ NV_PGRAPH_CSV0_C_CHEOPS_PROGRAM_START);
+
+ pg->program_data_dirty = false;
+
+ ShaderState state;
+
+ // We will hash it, so make sure any padding is zerod
+ memset(&state, 0, sizeof(ShaderState));
+
+ state.vulkan = pg->renderer->type == CONFIG_DISPLAY_RENDERER_VULKAN;
+ state.surface_scale_factor = pg->surface_scale_factor;
+
+ state.compressed_attrs = pg->compressed_attrs;
+ state.uniform_attrs = pg->uniform_attrs;
+ state.swizzle_attrs = pg->swizzle_attrs;
+
+ /* register combiner stuff */
+ state.psh.vulkan = state.vulkan;
+ state.psh.window_clip_exclusive =
+ pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & NV_PGRAPH_SETUPRASTER_WINDOWCLIPTYPE;
+ state.psh.combiner_control = pgraph_reg_r(pg, NV_PGRAPH_COMBINECTL);
+ state.psh.shader_stage_program = pgraph_reg_r(pg, NV_PGRAPH_SHADERPROG);
+ state.psh.other_stage_input = pgraph_reg_r(pg, NV_PGRAPH_SHADERCTL);
+ state.psh.final_inputs_0 = pgraph_reg_r(pg, NV_PGRAPH_COMBINESPECFOG0);
+ state.psh.final_inputs_1 = pgraph_reg_r(pg, NV_PGRAPH_COMBINESPECFOG1);
+
+ state.psh.alpha_test =
+ pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & NV_PGRAPH_CONTROL_0_ALPHATESTENABLE;
+ state.psh.alpha_func = (enum PshAlphaFunc)GET_MASK(
+ pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0), NV_PGRAPH_CONTROL_0_ALPHAFUNC);
+
+ state.psh.point_sprite = pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+ NV_PGRAPH_SETUPRASTER_POINTSMOOTHENABLE;
+
+ state.psh.shadow_depth_func = (enum PshShadowDepthFunc)GET_MASK(
+ pgraph_reg_r(pg, NV_PGRAPH_SHADOWCTL), NV_PGRAPH_SHADOWCTL_SHADOW_ZFUNC);
+
+ state.fixed_function = fixed_function;
+
+ /* fixed function stuff */
+ if (fixed_function) {
+ state.skinning = (enum VshSkinning)GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
+ NV_PGRAPH_CSV0_D_SKIN);
+ state.lighting =
+ GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_LIGHTING);
+ state.normalization =
+ pgraph_reg_r(pg, NV_PGRAPH_CSV0_C) & NV_PGRAPH_CSV0_C_NORMALIZATION_ENABLE;
+
+ /* color material */
+ state.emission_src = (enum MaterialColorSource)GET_MASK(
+ pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_EMISSION);
+ state.ambient_src = (enum MaterialColorSource)GET_MASK(
+ pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_AMBIENT);
+ state.diffuse_src = (enum MaterialColorSource)GET_MASK(
+ pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_DIFFUSE);
+ state.specular_src = (enum MaterialColorSource)GET_MASK(
+ pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_SPECULAR);
+ }
+
+ /* vertex program stuff */
+ state.vertex_program = vertex_program,
+ state.z_perspective = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) &
+ NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE;
+
+ state.point_params_enable = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
+ NV_PGRAPH_CSV0_D_POINTPARAMSENABLE);
+ state.point_size =
+ GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_POINTSIZE), NV097_SET_POINT_SIZE_V) / 8.0f;
+ if (state.point_params_enable) {
+ for (int i = 0; i < 8; i++) {
+ state.point_params[i] = pg->point_params[i];
+ }
+ }
+
+ /* geometry shader stuff */
+ state.primitive_mode = (enum ShaderPrimitiveMode)pg->primitive_mode;
+ state.polygon_front_mode = (enum ShaderPolygonMode)GET_MASK(
+ pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER), NV_PGRAPH_SETUPRASTER_FRONTFACEMODE);
+ state.polygon_back_mode = (enum ShaderPolygonMode)GET_MASK(
+ pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER), NV_PGRAPH_SETUPRASTER_BACKFACEMODE);
+
+ state.smooth_shading = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3),
+ NV_PGRAPH_CONTROL_3_SHADEMODE) ==
+ NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH;
+ state.psh.smooth_shading = state.smooth_shading;
+
+ state.program_length = 0;
+
+ if (vertex_program) {
+ // copy in vertex program tokens
+ for (int i = program_start; i < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH;
+ i++) {
+ uint32_t *cur_token = (uint32_t *)&pg->program_data[i];
+ memcpy(&state.program_data[state.program_length], cur_token,
+ VSH_TOKEN_SIZE * sizeof(uint32_t));
+ state.program_length++;
+
+ if (vsh_get_field(cur_token, FLD_FINAL)) {
+ break;
+ }
+ }
+ }
+
+ /* Texgen */
+ for (int i = 0; i < 4; i++) {
+ unsigned int reg = (i < 2) ? NV_PGRAPH_CSV1_A : NV_PGRAPH_CSV1_B;
+ for (int j = 0; j < 4; j++) {
+ unsigned int masks[] = {
+ (i % 2) ? NV_PGRAPH_CSV1_A_T1_S : NV_PGRAPH_CSV1_A_T0_S,
+ (i % 2) ? NV_PGRAPH_CSV1_A_T1_T : NV_PGRAPH_CSV1_A_T0_T,
+ (i % 2) ? NV_PGRAPH_CSV1_A_T1_R : NV_PGRAPH_CSV1_A_T0_R,
+ (i % 2) ? NV_PGRAPH_CSV1_A_T1_Q : NV_PGRAPH_CSV1_A_T0_Q
+ };
+ state.texgen[i][j] =
+ (enum VshTexgen)GET_MASK(pgraph_reg_r(pg, reg), masks[j]);
+ }
+ }
+
+ /* Fog */
+ state.fog_enable =
+ pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3) & NV_PGRAPH_CONTROL_3_FOGENABLE;
+ if (state.fog_enable) {
+ /*FIXME: Use CSV0_D? */
+ state.fog_mode = (enum VshFogMode)GET_MASK(
+ pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3), NV_PGRAPH_CONTROL_3_FOG_MODE);
+ state.foggen = (enum VshFoggen)GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
+ NV_PGRAPH_CSV0_D_FOGGENMODE);
+ } else {
+ /* FIXME: Do we still pass the fogmode? */
+ state.fog_mode = (enum VshFogMode)0;
+ state.foggen = (enum VshFoggen)0;
+ }
+
+ /* Texture matrices */
+ for (int i = 0; i < 4; i++) {
+ state.texture_matrix_enable[i] = pg->texture_matrix_enable[i];
+ }
+
+ /* Lighting */
+ if (state.lighting) {
+ for (int i = 0; i < NV2A_MAX_LIGHTS; i++) {
+ state.light[i] = (enum VshLight)GET_MASK(
+ pgraph_reg_r(pg, NV_PGRAPH_CSV0_D), NV_PGRAPH_CSV0_D_LIGHT0 << (i * 2));
+ }
+ }
+
+ /* Copy content of enabled combiner stages */
+ int num_stages = pgraph_reg_r(pg, NV_PGRAPH_COMBINECTL) & 0xFF;
+ for (int i = 0; i < num_stages; i++) {
+ state.psh.rgb_inputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINECOLORI0 + i * 4);
+ state.psh.rgb_outputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINECOLORO0 + i * 4);
+ state.psh.alpha_inputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEALPHAI0 + i * 4);
+ state.psh.alpha_outputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEALPHAO0 + i * 4);
+ // constant_0[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR0 + i * 4);
+ // constant_1[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR1 + i * 4);
+ }
+
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 4; j++) {
+ state.psh.compare_mode[i][j] =
+ (pgraph_reg_r(pg, NV_PGRAPH_SHADERCLIPMODE) >> (4 * i + j)) & 1;
+ }
+
+ uint32_t ctl_0 = pgraph_reg_r(pg, NV_PGRAPH_TEXCTL0_0 + i * 4);
+ bool enabled = pgraph_is_texture_stage_active(pg, i) &&
+ (ctl_0 & NV_PGRAPH_TEXCTL0_0_ENABLE);
+ if (!enabled) {
+ continue;
+ }
+
+ state.psh.alphakill[i] = ctl_0 & NV_PGRAPH_TEXCTL0_0_ALPHAKILLEN;
+
+ uint32_t tex_fmt = pgraph_reg_r(pg, NV_PGRAPH_TEXFMT0 + i * 4);
+ unsigned int color_format = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_COLOR);
+ BasicColorFormatInfo f = kelvin_color_format_info_map[color_format];
+ state.psh.rect_tex[i] = f.linear;
+ state.psh.tex_x8y24[i] = color_format == NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED ||
+ color_format == NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT;
+
+ uint32_t border_source =
+ GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BORDER_SOURCE);
+ bool cubemap = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE);
+ state.psh.border_logical_size[i][0] = 0.0f;
+ state.psh.border_logical_size[i][1] = 0.0f;
+ state.psh.border_logical_size[i][2] = 0.0f;
+ if (border_source != NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR) {
+ if (!f.linear && !cubemap) {
+ // The actual texture will be (at least) double the reported
+ // size and shifted by a 4 texel border but texture coordinates
+ // will still be relative to the reported size.
+ unsigned int reported_width =
+ 1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_U);
+ unsigned int reported_height =
+ 1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_V);
+ unsigned int reported_depth =
+ 1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_P);
+
+ state.psh.border_logical_size[i][0] = reported_width;
+ state.psh.border_logical_size[i][1] = reported_height;
+ state.psh.border_logical_size[i][2] = reported_depth;
+
+ if (reported_width < 8) {
+ state.psh.border_inv_real_size[i][0] = 0.0625f;
+ } else {
+ state.psh.border_inv_real_size[i][0] =
+ 1.0f / (reported_width * 2.0f);
+ }
+ if (reported_height < 8) {
+ state.psh.border_inv_real_size[i][1] = 0.0625f;
+ } else {
+ state.psh.border_inv_real_size[i][1] =
+ 1.0f / (reported_height * 2.0f);
+ }
+ if (reported_depth < 8) {
+ state.psh.border_inv_real_size[i][2] = 0.0625f;
+ } else {
+ state.psh.border_inv_real_size[i][2] =
+ 1.0f / (reported_depth * 2.0f);
+ }
+ } else {
+ NV2A_UNIMPLEMENTED(
+ "Border source texture with linear %d cubemap %d", f.linear,
+ cubemap);
+ }
+ }
+
+ /* Keep track of whether texture data has been loaded as signed
+ * normalized integers or not. This dictates whether or not we will need
+ * to re-map in fragment shader for certain texture modes (e.g.
+ * bumpenvmap).
+ *
+ * FIXME: When signed texture data is loaded as unsigned and remapped in
+ * fragment shader, there may be interpolation artifacts. Fix this to
+ * support signed textures more appropriately.
+ */
+#if 0 // FIXME
+ state.psh.snorm_tex[i] = (f.gl_internal_format == GL_RGB8_SNORM)
+ || (f.gl_internal_format == GL_RG8_SNORM);
+#endif
+ state.psh.shadow_map[i] = f.depth;
+
+ uint32_t filter = pgraph_reg_r(pg, NV_PGRAPH_TEXFILTER0 + i * 4);
+ unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN);
+ enum ConvolutionFilter kernel = CONVOLUTION_FILTER_DISABLED;
+ /* FIXME: We do not distinguish between min and mag when
+ * performing convolution. Just use it if specified for min (common AA
+ * case).
+ */
+ if (min_filter == NV_PGRAPH_TEXFILTER0_MIN_CONVOLUTION_2D_LOD0) {
+ int k = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL);
+ assert(k == NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL_QUINCUNX ||
+ k == NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL_GAUSSIAN_3);
+ kernel = (enum ConvolutionFilter)k;
+ }
+
+ state.psh.conv_tex[i] = kernel;
+ }
+
+ return state;
+}
diff --git a/hw/xbox/nv2a/shaders.h b/hw/xbox/nv2a/pgraph/shaders.h
similarity index 56%
rename from hw/xbox/nv2a/shaders.h
rename to hw/xbox/nv2a/pgraph/shaders.h
index 0362da1099..842658f808 100644
--- a/hw/xbox/nv2a/shaders.h
+++ b/hw/xbox/nv2a/pgraph/shaders.h
@@ -18,17 +18,14 @@
* License along with this library; if not, see .
*/
-#ifndef HW_NV2A_SHADERS_H
-#define HW_NV2A_SHADERS_H
+#ifndef HW_XBOX_NV2A_PGRAPH_SHADERS_H
+#define HW_XBOX_NV2A_PGRAPH_SHADERS_H
-#include "qemu/thread.h"
-#include "qapi/qmp/qstring.h"
-#include "gl/gloffscreen.h"
+#include
+#include "hw/xbox/nv2a/nv2a_regs.h"
-#include "nv2a_regs.h"
#include "vsh.h"
#include "psh.h"
-#include "lru.h"
enum ShaderPrimitiveMode {
PRIM_TYPE_INVALID,
@@ -57,10 +54,13 @@ enum MaterialColorSource {
};
typedef struct ShaderState {
+ bool vulkan;
unsigned int surface_scale_factor;
PshState psh;
uint16_t compressed_attrs;
+ uint16_t uniform_attrs;
+ uint16_t swizzle_attrs;
bool texture_matrix_enable[4];
enum VshTexgen texgen[4][4];
@@ -101,61 +101,8 @@ typedef struct ShaderState {
bool smooth_shading;
} ShaderState;
-typedef struct ShaderBinding {
- GLuint gl_program;
- GLenum gl_primitive_mode;
-
- GLint psh_constant_loc[9][2];
- GLint alpha_ref_loc;
-
- GLint bump_mat_loc[NV2A_MAX_TEXTURES];
- GLint bump_scale_loc[NV2A_MAX_TEXTURES];
- GLint bump_offset_loc[NV2A_MAX_TEXTURES];
- GLint tex_scale_loc[NV2A_MAX_TEXTURES];
-
- GLint surface_size_loc;
- GLint clip_range_loc;
-
- GLint vsh_constant_loc[NV2A_VERTEXSHADER_CONSTANTS];
- uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];
-
- GLint inv_viewport_loc;
- GLint ltctxa_loc[NV2A_LTCTXA_COUNT];
- GLint ltctxb_loc[NV2A_LTCTXB_COUNT];
- GLint ltc1_loc[NV2A_LTC1_COUNT];
-
- GLint fog_color_loc;
- GLint fog_param_loc[2];
- GLint light_infinite_half_vector_loc[NV2A_MAX_LIGHTS];
- GLint light_infinite_direction_loc[NV2A_MAX_LIGHTS];
- GLint light_local_position_loc[NV2A_MAX_LIGHTS];
- GLint light_local_attenuation_loc[NV2A_MAX_LIGHTS];
-
- GLint clip_region_loc[8];
-
- GLint material_alpha_loc;
-} ShaderBinding;
-
-typedef struct ShaderLruNode {
- LruNode node;
- bool cached;
- void *program;
- size_t program_size;
- GLenum program_format;
- ShaderState state;
- ShaderBinding *binding;
- QemuThread *save_thread;
-} ShaderLruNode;
-
typedef struct PGRAPHState PGRAPHState;
-GLenum get_gl_primitive_mode(enum ShaderPolygonMode polygon_mode, enum ShaderPrimitiveMode primitive_mode);
-void update_shader_constant_locations(ShaderBinding *binding, const ShaderState *state);
-ShaderBinding *generate_shaders(const ShaderState *state);
-
-void shader_cache_init(PGRAPHState *pg);
-void shader_write_cache_reload_list(PGRAPHState *pg);
-bool shader_load_from_memory(ShaderLruNode *snode);
-void shader_cache_to_disk(ShaderLruNode *snode);
+ShaderState pgraph_get_shader_state(PGRAPHState *pg);
#endif
diff --git a/hw/xbox/nv2a/pgraph/surface.h b/hw/xbox/nv2a/pgraph/surface.h
new file mode 100644
index 0000000000..d51bc04ea4
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/surface.h
@@ -0,0 +1,35 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_SURFACE_H
+#define HW_XBOX_NV2A_PGRAPH_SURFACE_H
+
+typedef struct SurfaceShape {
+ unsigned int z_format;
+ unsigned int color_format;
+ unsigned int zeta_format;
+ unsigned int log_width, log_height;
+ unsigned int clip_x, clip_y;
+ unsigned int clip_width, clip_height;
+ unsigned int anti_aliasing;
+} SurfaceShape;
+
+#endif
diff --git a/hw/xbox/nv2a/swizzle.c b/hw/xbox/nv2a/pgraph/swizzle.c
similarity index 100%
rename from hw/xbox/nv2a/swizzle.c
rename to hw/xbox/nv2a/pgraph/swizzle.c
diff --git a/hw/xbox/nv2a/swizzle.h b/hw/xbox/nv2a/pgraph/swizzle.h
similarity index 94%
rename from hw/xbox/nv2a/swizzle.h
rename to hw/xbox/nv2a/pgraph/swizzle.h
index 21889b39cf..78ff0740a4 100644
--- a/hw/xbox/nv2a/swizzle.h
+++ b/hw/xbox/nv2a/pgraph/swizzle.h
@@ -18,8 +18,10 @@
* License along with this library; if not, see .
*/
-#ifndef HW_XBOX_SWIZZLE_H
-#define HW_XBOX_SWIZZLE_H
+#ifndef HW_XBOX_NV2A_PGRAPH_SWIZZLE_H
+#define HW_XBOX_NV2A_PGRAPH_SWIZZLE_H
+
+#include
void swizzle_box(
const uint8_t *src_buf,
diff --git a/hw/xbox/nv2a/pgraph/texture.c b/hw/xbox/nv2a/pgraph/texture.c
new file mode 100644
index 0000000000..e5350ea8d4
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/texture.c
@@ -0,0 +1,405 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "hw/xbox/nv2a/nv2a_int.h"
+#include "texture.h"
+#include "util.h"
+
+const BasicColorFormatInfo kelvin_color_format_info_map[66] = {
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8] = { 1, false },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8] = { 1, false },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5] = { 2, false },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5] = { 2, false },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4] = { 2, false },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5] = { 2, false },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8] = { 4, false },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8] = { 4, false },
+
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8] = { 1, false },
+
+ [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5] = { 4, false },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8] = { 4, false },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8] = { 4, false },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5] = { 2, true },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5] = { 2, true },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8] = { 4, true },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8] = { 1, true },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8] = { 2, true },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8] = { 1, false },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8] = { 2, false },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8] = { 1, true },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5] = { 2, true },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4] = { 2, true },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8] = { 4, true },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8] = { 1, true },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8Y8] = { 2, true },
+
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5] = { 2, false },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8] = { 2, false },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8] = { 2, false },
+
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8] = { 2, true },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8] = { 2, true },
+
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_DEPTH_Y16_FIXED] = { 2, false, true },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED] = { 4, true,
+ true },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT] = { 4, true,
+ true },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FIXED] = { 2, true,
+ true },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FLOAT] = { 2, true,
+ true },
+
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16] = { 2, true },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8] = { 4, false },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8] = { 4, false },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8] = { 4, false },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8] = { 4, true },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8] = { 4, true },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8] = { 4, true },
+};
+
+hwaddr pgraph_get_texture_phys_addr(PGRAPHState *pg, int texture_idx)
+{
+ NV2AState *d = container_of(pg, NV2AState, pgraph);
+ int i = texture_idx;
+
+ uint32_t fmt = pgraph_reg_r(pg, NV_PGRAPH_TEXFMT0 + i*4);
+ unsigned int dma_select =
+ GET_MASK(fmt, NV_PGRAPH_TEXFMT0_CONTEXT_DMA);
+
+ hwaddr offset = pgraph_reg_r(pg, NV_PGRAPH_TEXOFFSET0 + i*4);
+
+ hwaddr dma_len;
+ uint8_t *texture_data;
+ if (dma_select) {
+ texture_data = (uint8_t*)nv_dma_map(d, pg->dma_b, &dma_len);
+ } else {
+ texture_data = (uint8_t*)nv_dma_map(d, pg->dma_a, &dma_len);
+ }
+ assert(offset < dma_len);
+ texture_data += offset;
+
+ return texture_data - d->vram_ptr;
+}
+
+hwaddr pgraph_get_texture_palette_phys_addr_length(PGRAPHState *pg, int texture_idx, size_t *length)
+{
+ NV2AState *d = container_of(pg, NV2AState, pgraph);
+ int i = texture_idx;
+
+ uint32_t palette = pgraph_reg_r(pg, NV_PGRAPH_TEXPALETTE0 + i*4);
+ bool palette_dma_select =
+ GET_MASK(palette, NV_PGRAPH_TEXPALETTE0_CONTEXT_DMA);
+ unsigned int palette_length_index =
+ GET_MASK(palette, NV_PGRAPH_TEXPALETTE0_LENGTH);
+ unsigned int palette_offset =
+ palette & NV_PGRAPH_TEXPALETTE0_OFFSET;
+
+ unsigned int palette_length = 0;
+ switch (palette_length_index) {
+ case NV_PGRAPH_TEXPALETTE0_LENGTH_256: palette_length = 256; break;
+ case NV_PGRAPH_TEXPALETTE0_LENGTH_128: palette_length = 128; break;
+ case NV_PGRAPH_TEXPALETTE0_LENGTH_64: palette_length = 64; break;
+ case NV_PGRAPH_TEXPALETTE0_LENGTH_32: palette_length = 32; break;
+ default: assert(false); break;
+ }
+ if (length) {
+ *length = palette_length;
+ }
+
+ hwaddr palette_dma_len;
+ uint8_t *palette_data;
+ if (palette_dma_select) {
+ palette_data = (uint8_t*)nv_dma_map(d, pg->dma_b, &palette_dma_len);
+ } else {
+ palette_data = (uint8_t*)nv_dma_map(d, pg->dma_a, &palette_dma_len);
+ }
+ assert(palette_offset < palette_dma_len);
+ palette_data += palette_offset;
+
+ return palette_data - d->vram_ptr;
+}
+
+size_t pgraph_get_texture_length(PGRAPHState *pg, TextureShape *shape)
+{
+ BasicColorFormatInfo f = kelvin_color_format_info_map[shape->color_format];
+ size_t length = 0;
+
+ if (f.linear) {
+ assert(shape->cubemap == false);
+ assert(shape->dimensionality == 2);
+ length = shape->height * shape->pitch;
+ } else {
+ if (shape->dimensionality >= 2) {
+ unsigned int w = shape->width, h = shape->height;
+ int level;
+ if (!pgraph_is_texture_format_compressed(pg, shape->color_format)) {
+ for (level = 0; level < shape->levels; level++) {
+ w = MAX(w, 1);
+ h = MAX(h, 1);
+ length += w * h * f.bytes_per_pixel;
+ w /= 2;
+ h /= 2;
+ }
+ } else {
+ /* Compressed textures are a bit different */
+ unsigned int block_size =
+ shape->color_format ==
+ NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5 ?
+ 8 : 16;
+ for (level = 0; level < shape->levels; level++) {
+ w = MAX(w, 1);
+ h = MAX(h, 1);
+ unsigned int phys_w = (w + 3) & ~3,
+ phys_h = (h + 3) & ~3;
+ length += phys_w/4 * phys_h/4 * block_size;
+ w /= 2;
+ h /= 2;
+ }
+ }
+ if (shape->cubemap) {
+ assert(shape->dimensionality == 2);
+ length = (length + NV2A_CUBEMAP_FACE_ALIGNMENT - 1) & ~(NV2A_CUBEMAP_FACE_ALIGNMENT - 1);
+ length *= 6;
+ }
+ if (shape->dimensionality >= 3) {
+ length *= shape->depth;
+ }
+ }
+ }
+
+ return length;
+}
+
+TextureShape pgraph_get_texture_shape(PGRAPHState *pg, int texture_idx)
+{
+ int i = texture_idx;
+
+ uint32_t ctl_0 = pgraph_reg_r(pg, NV_PGRAPH_TEXCTL0_0 + i*4);
+ uint32_t ctl_1 = pgraph_reg_r(pg, NV_PGRAPH_TEXCTL1_0 + i*4);
+ uint32_t fmt = pgraph_reg_r(pg, NV_PGRAPH_TEXFMT0 + i*4);
+
+#if DEBUG_NV2A
+ uint32_t filter = pgraph_reg_r(pg, NV_PGRAPH_TEXFILTER0 + i*4);
+ uint32_t address = pgraph_reg_r(pg, NV_PGRAPH_TEXADDRESS0 + i*4);
+#endif
+
+ unsigned int min_mipmap_level =
+ GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_MIN_LOD_CLAMP);
+ unsigned int max_mipmap_level =
+ GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_MAX_LOD_CLAMP);
+
+ unsigned int pitch =
+ GET_MASK(ctl_1, NV_PGRAPH_TEXCTL1_0_IMAGE_PITCH);
+
+ bool cubemap =
+ GET_MASK(fmt, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE);
+ unsigned int dimensionality =
+ GET_MASK(fmt, NV_PGRAPH_TEXFMT0_DIMENSIONALITY);
+
+ int tex_mode = (pgraph_reg_r(pg, NV_PGRAPH_SHADERPROG) >> (texture_idx * 5)) & 0x1F;
+ if (tex_mode == 0x02) {
+ assert(pgraph_is_texture_enabled(pg, texture_idx));
+ // assert(state.dimensionality == 3);
+
+ // OVERRIDE
+ // dimensionality = 3;
+ }
+
+ unsigned int color_format = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_COLOR);
+ unsigned int levels = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_MIPMAP_LEVELS);
+ unsigned int log_width = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_U);
+ unsigned int log_height = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_V);
+ unsigned int log_depth = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_P);
+
+ unsigned int rect_width =
+ GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_TEXIMAGERECT0 + i*4),
+ NV_PGRAPH_TEXIMAGERECT0_WIDTH);
+ unsigned int rect_height =
+ GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_TEXIMAGERECT0 + i*4),
+ NV_PGRAPH_TEXIMAGERECT0_HEIGHT);
+#ifdef DEBUG_NV2A
+ unsigned int lod_bias =
+ GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIPMAP_LOD_BIAS);
+#endif
+ unsigned int border_source = GET_MASK(fmt,
+ NV_PGRAPH_TEXFMT0_BORDER_SOURCE);
+
+ NV2A_DPRINTF(" texture %d is format 0x%x, "
+ "off 0x%" HWADDR_PRIx " (r %d, %d or %d, %d, %d; %d%s),"
+ " filter %x %x, levels %d-%d %d bias %d\n",
+ i, color_format, address,
+ rect_width, rect_height,
+ 1 << log_width, 1 << log_height, 1 << log_depth,
+ pitch,
+ cubemap ? "; cubemap" : "",
+ GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN),
+ GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG),
+ min_mipmap_level, max_mipmap_level, levels,
+ lod_bias);
+
+ assert(color_format < ARRAY_SIZE(kelvin_color_format_info_map));
+ BasicColorFormatInfo f = kelvin_color_format_info_map[color_format];
+ if (f.bytes_per_pixel == 0) {
+ fprintf(stderr, "nv2a: unimplemented texture color format 0x%x\n",
+ color_format);
+ abort();
+ }
+
+ unsigned int width, height, depth;
+ if (f.linear) {
+ assert(dimensionality == 2);
+ width = rect_width;
+ height = rect_height;
+ depth = 1;
+ } else {
+ width = 1 << log_width;
+ height = 1 << log_height;
+ depth = 1 << log_depth;
+ pitch = 0;
+
+ levels = MIN(levels, max_mipmap_level + 1);
+
+ /* Discard mipmap levels that would be smaller than 1x1.
+ * FIXME: Is this actually needed?
+ *
+ * >> Level 0: 32 x 4
+ * Level 1: 16 x 2
+ * Level 2: 8 x 1
+ * Level 3: 4 x 1
+ * Level 4: 2 x 1
+ * Level 5: 1 x 1
+ */
+ levels = MIN(levels, MAX(log_width, log_height) + 1);
+ assert(levels > 0);
+
+ if (dimensionality == 3) {
+ /* FIXME: What about 3D mipmaps? */
+ if (log_width < 2 || log_height < 2) {
+ /* Base level is smaller than 4x4... */
+ levels = 1;
+ } else {
+ levels = MIN(levels, MIN(log_width, log_height) - 1);
+ }
+ }
+ min_mipmap_level = MIN(levels-1, min_mipmap_level);
+ max_mipmap_level = MIN(levels-1, max_mipmap_level);
+ }
+
+ TextureShape shape;
+
+ // We will hash it, so make sure any padding is zero
+ memset(&shape, 0, sizeof(shape));
+
+ shape.cubemap = cubemap;
+ shape.dimensionality = dimensionality;
+ shape.color_format = color_format;
+ shape.levels = levels;
+ shape.width = width;
+ shape.height = height;
+ shape.depth = depth;
+ shape.min_mipmap_level = min_mipmap_level;
+ shape.max_mipmap_level = max_mipmap_level;
+ shape.pitch = pitch;
+ shape.border = border_source != NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR;
+ return shape;
+}
+
+uint8_t *pgraph_convert_texture_data(const TextureShape s, const uint8_t *data,
+ const uint8_t *palette_data,
+ unsigned int width, unsigned int height,
+ unsigned int depth, unsigned int row_pitch,
+ unsigned int slice_pitch,
+ size_t *converted_size)
+{
+ size_t size = 0;
+ uint8_t *converted_data;
+
+ if (s.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8) {
+ size = width * height * depth * 4;
+ converted_data = g_malloc(size);
+ const uint8_t *src = data;
+ uint32_t *dst = (uint32_t *)converted_data;
+ for (int z = 0; z < depth; z++) {
+ for (int y = 0; y < height; y++) {
+ for (int x = 0; x < width; x++) {
+ uint8_t index = src[y * row_pitch + x];
+ uint32_t color = *(uint32_t *)(palette_data + index * 4);
+ *dst++ = color;
+ }
+ }
+ src += slice_pitch;
+ }
+ } else if (s.color_format ==
+ NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8 ||
+ s.color_format ==
+ NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8) {
+ // TODO: Investigate whether a non-1 depth is possible.
+ // Generally the hardware asserts when attempting to use volumetric
+ // textures in linear formats.
+ assert(depth == 1); /* FIXME */
+ // FIXME: only valid if control0 register allows for colorspace
+ // conversion
+ size = width * height * 4;
+ converted_data = g_malloc(size);
+ uint8_t *pixel = converted_data;
+ for (int y = 0; y < height; y++) {
+ const uint8_t *line = &data[y * row_pitch * depth];
+ for (int x = 0; x < width; x++, pixel += 4) {
+ if (s.color_format ==
+ NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8) {
+ convert_yuy2_to_rgb(line, x, &pixel[0], &pixel[1],
+ &pixel[2]);
+ } else {
+ convert_uyvy_to_rgb(line, x, &pixel[0], &pixel[1],
+ &pixel[2]);
+ }
+ pixel[3] = 255;
+ }
+ }
+ } else if (s.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5) {
+ assert(depth == 1); /* FIXME */
+ size = width * height * 3;
+ converted_data = g_malloc(size);
+ for (int y = 0; y < height; y++) {
+ for (int x = 0; x < width; x++) {
+ uint16_t rgb655 = *(uint16_t *)(data + y * row_pitch + x * 2);
+ int8_t *pixel = (int8_t *)&converted_data[(y * width + x) * 3];
+ /* Maps 5 bit G and B signed value range to 8 bit
+ * signed values. R is probably unsigned.
+ */
+ rgb655 ^= (1 << 9) | (1 << 4);
+ pixel[0] = ((rgb655 & 0xFC00) >> 10) * 0x7F / 0x3F;
+ pixel[1] = ((rgb655 & 0x03E0) >> 5) * 0xFF / 0x1F - 0x80;
+ pixel[2] = (rgb655 & 0x001F) * 0xFF / 0x1F - 0x80;
+ }
+ }
+ } else {
+ return NULL;
+ }
+
+ if (converted_size) {
+ *converted_size = size;
+ }
+ return converted_data;
+}
diff --git a/hw/xbox/nv2a/pgraph/texture.h b/hw/xbox/nv2a/pgraph/texture.h
new file mode 100644
index 0000000000..4c9818ca3c
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/texture.h
@@ -0,0 +1,67 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_TEXTURE_H
+#define HW_XBOX_NV2A_PGRAPH_TEXTURE_H
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+
+#include
+#include
+
+#include "hw/xbox/nv2a/nv2a_regs.h"
+
+typedef struct PGRAPHState PGRAPHState;
+
+typedef struct TextureShape {
+ bool cubemap;
+ unsigned int dimensionality;
+ unsigned int color_format;
+ unsigned int levels;
+ unsigned int width, height, depth;
+ bool border;
+
+ unsigned int min_mipmap_level, max_mipmap_level;
+ unsigned int pitch;
+} TextureShape;
+
+typedef struct BasicColorFormatInfo {
+ unsigned int bytes_per_pixel;
+ bool linear;
+ bool depth;
+} BasicColorFormatInfo;
+
+extern const BasicColorFormatInfo kelvin_color_format_info_map[66];
+
+uint8_t *pgraph_convert_texture_data(const TextureShape s, const uint8_t *data,
+ const uint8_t *palette_data,
+ unsigned int width, unsigned int height,
+ unsigned int depth, unsigned int row_pitch,
+ unsigned int slice_pitch,
+ size_t *converted_size);
+
+hwaddr pgraph_get_texture_phys_addr(PGRAPHState *pg, int texture_idx);
+hwaddr pgraph_get_texture_palette_phys_addr_length(PGRAPHState *pg, int texture_idx, size_t *length);
+TextureShape pgraph_get_texture_shape(PGRAPHState *pg, int texture_idx);
+size_t pgraph_get_texture_length(PGRAPHState *pg, TextureShape *shape);
+
+#endif
diff --git a/hw/xbox/nv2a/gl/gloffscreen_common.c b/hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/common.c
similarity index 100%
rename from hw/xbox/nv2a/gl/gloffscreen_common.c
rename to hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/common.c
diff --git a/hw/xbox/nv2a/gl/gloffscreen.h b/hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/gloffscreen.h
similarity index 100%
rename from hw/xbox/nv2a/gl/gloffscreen.h
rename to hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/gloffscreen.h
diff --git a/hw/xbox/nv2a/gl/gloffscreen_sdl.c b/hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/sdl.c
similarity index 98%
rename from hw/xbox/nv2a/gl/gloffscreen_sdl.c
rename to hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/sdl.c
index 2221067ddd..277694cc50 100644
--- a/hw/xbox/nv2a/gl/gloffscreen_sdl.c
+++ b/hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/sdl.c
@@ -1,7 +1,7 @@
/*
* Offscreen OpenGL abstraction layer -- SDL based
*
- * Copyright (c) 2018-2021 Matt Borgerson
+ * Copyright (c) 2018-2024 Matt Borgerson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
diff --git a/hw/xbox/nv2a/thirdparty/meson.build b/hw/xbox/nv2a/pgraph/thirdparty/meson.build
similarity index 62%
rename from hw/xbox/nv2a/thirdparty/meson.build
rename to hw/xbox/nv2a/pgraph/thirdparty/meson.build
index ec4068a77c..d0139f1763 100644
--- a/hw/xbox/nv2a/thirdparty/meson.build
+++ b/hw/xbox/nv2a/pgraph/thirdparty/meson.build
@@ -10,3 +10,9 @@ libnv2a_vsh_cpu = static_library('nv2a_vsh_cpu',
include_directories: ['.', 'nv2a_vsh_cpu/src'])
nv2a_vsh_cpu = declare_dependency(link_with: libnv2a_vsh_cpu,
include_directories: ['nv2a_vsh_cpu/src'])
+
+libgloffscreen = static_library('libgloffscreen',
+ sources: files('gloffscreen/common.c', 'gloffscreen/sdl.c'),
+ dependencies: sdl)
+gloffscreen = declare_dependency(link_with: libgloffscreen,
+ include_directories: ['gloffscreen'])
diff --git a/hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu b/hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu
similarity index 100%
rename from hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu
rename to hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu
diff --git a/hw/xbox/nv2a/pgraph/util.h b/hw/xbox/nv2a/pgraph/util.h
new file mode 100644
index 0000000000..c8a28d3c0d
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/util.h
@@ -0,0 +1,86 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_UTIL_H
+#define HW_XBOX_NV2A_PGRAPH_UTIL_H
+
+static const float f16_max = 511.9375f;
+static const float f24_max = 1.0E30;
+
+/* 16 bit to [0.0, F16_MAX = 511.9375] */
+static inline
+float convert_f16_to_float(uint16_t f16) {
+ if (f16 == 0x0000) { return 0.0; }
+ uint32_t i = (f16 << 11) + 0x3C000000;
+ return *(float*)&i;
+}
+
+/* 24 bit to [0.0, F24_MAX] */
+static inline
+float convert_f24_to_float(uint32_t f24) {
+ assert(!(f24 >> 24));
+ f24 &= 0xFFFFFF;
+ if (f24 == 0x000000) { return 0.0; }
+ uint32_t i = f24 << 7;
+ return *(float*)&i;
+}
+
+static inline
+uint8_t cliptobyte(int x)
+{
+ return (uint8_t)((x < 0) ? 0 : ((x > 255) ? 255 : x));
+}
+
+static inline
+void convert_yuy2_to_rgb(const uint8_t *line, unsigned int ix,
+ uint8_t *r, uint8_t *g, uint8_t* b) {
+ int c, d, e;
+ c = (int)line[ix * 2] - 16;
+ if (ix % 2) {
+ d = (int)line[ix * 2 - 1] - 128;
+ e = (int)line[ix * 2 + 1] - 128;
+ } else {
+ d = (int)line[ix * 2 + 1] - 128;
+ e = (int)line[ix * 2 + 3] - 128;
+ }
+ *r = cliptobyte((298 * c + 409 * e + 128) >> 8);
+ *g = cliptobyte((298 * c - 100 * d - 208 * e + 128) >> 8);
+ *b = cliptobyte((298 * c + 516 * d + 128) >> 8);
+}
+
+static inline
+void convert_uyvy_to_rgb(const uint8_t *line, unsigned int ix,
+ uint8_t *r, uint8_t *g, uint8_t* b) {
+ int c, d, e;
+ c = (int)line[ix * 2 + 1] - 16;
+ if (ix % 2) {
+ d = (int)line[ix * 2 - 2] - 128;
+ e = (int)line[ix * 2 + 0] - 128;
+ } else {
+ d = (int)line[ix * 2 + 0] - 128;
+ e = (int)line[ix * 2 + 2] - 128;
+ }
+ *r = cliptobyte((298 * c + 409 * e + 128) >> 8);
+ *g = cliptobyte((298 * c - 100 * d - 208 * e + 128) >> 8);
+ *b = cliptobyte((298 * c + 516 * d + 128) >> 8);
+}
+
+#endif
diff --git a/hw/xbox/nv2a/pgraph/vertex.c b/hw/xbox/nv2a/pgraph/vertex.c
new file mode 100644
index 0000000000..47f7cb5688
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vertex.c
@@ -0,0 +1,131 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "hw/xbox/nv2a/nv2a_int.h"
+
+void pgraph_update_inline_value(VertexAttribute *attr, const uint8_t *data)
+{
+ assert(attr->count <= 4);
+ attr->inline_value[0] = 0.0f;
+ attr->inline_value[1] = 0.0f;
+ attr->inline_value[2] = 0.0f;
+ attr->inline_value[3] = 1.0f;
+
+ switch (attr->format) {
+ case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D:
+ case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL:
+ for (uint32_t i = 0; i < attr->count; ++i) {
+ attr->inline_value[i] = (float)data[i] / 255.0f;
+ }
+ break;
+ case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1: {
+ const int16_t *val = (const int16_t *) data;
+ for (uint32_t i = 0; i < attr->count; ++i, ++val) {
+ attr->inline_value[i] = MAX(-1.0f, (float) *val / 32767.0f);
+ }
+ break;
+ }
+ case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F:
+ memcpy(attr->inline_value, data, attr->size * attr->count);
+ break;
+ case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K: {
+ const int16_t *val = (const int16_t *) data;
+ for (uint32_t i = 0; i < attr->count; ++i, ++val) {
+ attr->inline_value[i] = (float)*val;
+ }
+ break;
+ }
+ case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP: {
+ /* 3 signed, normalized components packed in 32-bits. (11,11,10) */
+ const int32_t val = *(const int32_t *)data;
+ int32_t x = val & 0x7FF;
+ if (x & 0x400) {
+ x |= 0xFFFFF800;
+ }
+ int32_t y = (val >> 11) & 0x7FF;
+ if (y & 0x400) {
+ y |= 0xFFFFF800;
+ }
+ int32_t z = (val >> 22) & 0x7FF;
+ if (z & 0x200) {
+ z |= 0xFFFFFC00;
+ }
+
+ attr->inline_value[0] = MAX(-1.0f, (float)x / 1023.0f);
+ attr->inline_value[1] = MAX(-1.0f, (float)y / 1023.0f);
+ attr->inline_value[2] = MAX(-1.0f, (float)z / 511.0f);
+ break;
+ }
+ default:
+ fprintf(stderr, "Unknown vertex attribute type: for format 0x%x\n",
+ attr->format);
+ assert(!"Unsupported attribute type");
+ break;
+ }
+}
+
+void pgraph_allocate_inline_buffer_vertices(PGRAPHState *pg, unsigned int attr)
+{
+ VertexAttribute *attribute = &pg->vertex_attributes[attr];
+
+ if (attribute->inline_buffer_populated || pg->inline_buffer_length == 0) {
+ return;
+ }
+
+ /* Now upload the previous attribute value */
+ attribute->inline_buffer_populated = true;
+ for (int i = 0; i < pg->inline_buffer_length; i++) {
+ memcpy(&attribute->inline_buffer[i * 4], attribute->inline_value,
+ sizeof(float) * 4);
+ }
+}
+
+void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg)
+{
+ pgraph_check_within_begin_end_block(pg);
+ assert(pg->inline_buffer_length < NV2A_MAX_BATCH_LENGTH);
+
+ for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+ VertexAttribute *attribute = &pg->vertex_attributes[i];
+ if (attribute->inline_buffer_populated) {
+ memcpy(&attribute->inline_buffer[pg->inline_buffer_length * 4],
+ attribute->inline_value, sizeof(float) * 4);
+ }
+ }
+
+ pg->inline_buffer_length++;
+}
+
+void pgraph_reset_inline_buffers(PGRAPHState *pg)
+{
+ pg->inline_elements_length = 0;
+ pg->inline_array_length = 0;
+ pg->inline_buffer_length = 0;
+ pgraph_reset_draw_arrays(pg);
+}
+
+void pgraph_reset_draw_arrays(PGRAPHState *pg)
+{
+ pg->draw_arrays_length = 0;
+ pg->draw_arrays_min_start = -1;
+ pg->draw_arrays_max_count = 0;
+ pg->draw_arrays_prevent_connect = false;
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/blit.c b/hw/xbox/nv2a/pgraph/vk/blit.c
new file mode 100644
index 0000000000..e4529a3c58
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/blit.c
@@ -0,0 +1,177 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * Based on GL implementation:
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "hw/xbox/nv2a/nv2a_int.h"
+#include "renderer.h"
+
+void pgraph_vk_image_blit(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ ContextSurfaces2DState *context_surfaces = &pg->context_surfaces_2d;
+ ImageBlitState *image_blit = &pg->image_blit;
+ BetaState *beta = &pg->beta;
+
+ pgraph_vk_surface_update(d, false, true, true);
+
+ assert(context_surfaces->object_instance == image_blit->context_surfaces);
+
+ unsigned int bytes_per_pixel;
+ switch (context_surfaces->color_format) {
+ case NV062_SET_COLOR_FORMAT_LE_Y8:
+ bytes_per_pixel = 1;
+ break;
+ case NV062_SET_COLOR_FORMAT_LE_R5G6B5:
+ bytes_per_pixel = 2;
+ break;
+ case NV062_SET_COLOR_FORMAT_LE_A8R8G8B8:
+ case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8:
+ case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8:
+ case NV062_SET_COLOR_FORMAT_LE_Y32:
+ bytes_per_pixel = 4;
+ break;
+ default:
+ fprintf(stderr, "Unknown blit surface format: 0x%x\n",
+ context_surfaces->color_format);
+ assert(false);
+ break;
+ }
+
+ hwaddr source_dma_len, dest_dma_len;
+
+ uint8_t *source = (uint8_t *)nv_dma_map(
+ d, context_surfaces->dma_image_source, &source_dma_len);
+ assert(context_surfaces->source_offset < source_dma_len);
+ source += context_surfaces->source_offset;
+
+ uint8_t *dest = (uint8_t *)nv_dma_map(d, context_surfaces->dma_image_dest,
+ &dest_dma_len);
+ assert(context_surfaces->dest_offset < dest_dma_len);
+ dest += context_surfaces->dest_offset;
+
+ hwaddr source_addr = source - d->vram_ptr;
+ hwaddr dest_addr = dest - d->vram_ptr;
+
+ SurfaceBinding *surf_src = pgraph_vk_surface_get(d, source_addr);
+ if (surf_src) {
+ pgraph_vk_surface_download_if_dirty(d, surf_src);
+ }
+
+ SurfaceBinding *surf_dest = pgraph_vk_surface_get(d, dest_addr);
+ if (surf_dest) {
+ if (image_blit->height < surf_dest->height ||
+ image_blit->width < surf_dest->width) {
+ pgraph_vk_surface_download_if_dirty(d, surf_dest);
+ } else {
+ // The blit will completely replace the surface so any pending
+ // download should be discarded.
+ surf_dest->download_pending = false;
+ surf_dest->draw_dirty = false;
+ }
+ surf_dest->upload_pending = true;
+ pg->draw_time++;
+ }
+
+ hwaddr source_offset = image_blit->in_y * context_surfaces->source_pitch +
+ image_blit->in_x * bytes_per_pixel;
+ hwaddr dest_offset = image_blit->out_y * context_surfaces->dest_pitch +
+ image_blit->out_x * bytes_per_pixel;
+
+ hwaddr source_size =
+ (image_blit->height - 1) * context_surfaces->source_pitch +
+ image_blit->width * bytes_per_pixel;
+ hwaddr dest_size = (image_blit->height - 1) * context_surfaces->dest_pitch +
+ image_blit->width * bytes_per_pixel;
+
+ /* FIXME: What does hardware do in this case? */
+ assert(source_addr + source_offset + source_size <=
+ memory_region_size(d->vram));
+ assert(dest_addr + dest_offset + dest_size <= memory_region_size(d->vram));
+
+ uint8_t *source_row = source + source_offset;
+ uint8_t *dest_row = dest + dest_offset;
+
+ if (image_blit->operation == NV09F_SET_OPERATION_SRCCOPY) {
+ // NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_SRCCOPY");
+ for (unsigned int y = 0; y < image_blit->height; y++) {
+ memmove(dest_row, source_row, image_blit->width * bytes_per_pixel);
+ source_row += context_surfaces->source_pitch;
+ dest_row += context_surfaces->dest_pitch;
+ }
+ } else if (image_blit->operation == NV09F_SET_OPERATION_BLEND_AND) {
+ // NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_BLEND_AND");
+ uint32_t max_beta_mult = 0x7f80;
+ uint32_t beta_mult = beta->beta >> 16;
+ uint32_t inv_beta_mult = max_beta_mult - beta_mult;
+ for (unsigned int y = 0; y < image_blit->height; y++) {
+ for (unsigned int x = 0; x < image_blit->width; x++) {
+ for (unsigned int ch = 0; ch < 3; ch++) {
+ uint32_t a = source_row[x * 4 + ch] * beta_mult;
+ uint32_t b = dest_row[x * 4 + ch] * inv_beta_mult;
+ dest_row[x * 4 + ch] = (a + b) / max_beta_mult;
+ }
+ }
+ source_row += context_surfaces->source_pitch;
+ dest_row += context_surfaces->dest_pitch;
+ }
+ } else {
+ fprintf(stderr, "Unknown blit operation: 0x%x\n",
+ image_blit->operation);
+ assert(false && "Unknown blit operation");
+ }
+
+ NV2A_DPRINTF(" - 0x%tx -> 0x%tx\n", source_addr, dest_addr);
+
+ bool needs_alpha_patching;
+ uint8_t alpha_override;
+ switch (context_surfaces->color_format) {
+ case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8:
+ needs_alpha_patching = true;
+ alpha_override = 0xff;
+ break;
+ case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8:
+ needs_alpha_patching = true;
+ alpha_override = 0;
+ break;
+ default:
+ needs_alpha_patching = false;
+ alpha_override = 0;
+ }
+
+ if (needs_alpha_patching) {
+ dest_row = dest + dest_offset;
+ for (unsigned int y = 0; y < image_blit->height; y++) {
+ for (unsigned int x = 0; x < image_blit->width; x++) {
+ dest_row[x * 4 + 3] = alpha_override;
+ }
+ dest_row += context_surfaces->dest_pitch;
+ }
+ }
+
+ dest_addr += dest_offset;
+ memory_region_set_client_dirty(d->vram, dest_addr, dest_size,
+ DIRTY_MEMORY_VGA);
+ memory_region_set_client_dirty(d->vram, dest_addr, dest_size,
+ DIRTY_MEMORY_NV2A_TEX);
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/buffer.c b/hw/xbox/nv2a/pgraph/vk/buffer.c
new file mode 100644
index 0000000000..440f8ae56e
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/buffer.c
@@ -0,0 +1,206 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "renderer.h"
+#include
+
+static void create_buffer(PGRAPHState *pg, StorageBuffer *buffer)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ VkBufferCreateInfo buffer_create_info = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .size = buffer->buffer_size,
+ .usage = buffer->usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ };
+ VK_CHECK(vmaCreateBuffer(r->allocator, &buffer_create_info,
+ &buffer->alloc_info, &buffer->buffer,
+ &buffer->allocation, NULL));
+}
+
+static void destroy_buffer(PGRAPHState *pg, StorageBuffer *buffer)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ vmaDestroyBuffer(r->allocator, buffer->buffer, buffer->allocation);
+ buffer->buffer = VK_NULL_HANDLE;
+ buffer->allocation = VK_NULL_HANDLE;
+}
+
+void pgraph_vk_init_buffers(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ // FIXME: Profile buffer sizes
+
+ VmaAllocationCreateInfo host_alloc_create_info = {
+ .usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST,
+ .flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT
+ };
+ VmaAllocationCreateInfo device_alloc_create_info = {
+ .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
+ };
+
+ r->storage_buffers[BUFFER_STAGING_DST] = (StorageBuffer){
+ .alloc_info = host_alloc_create_info,
+ .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+ .buffer_size = 4096 * 4096 * 4,
+ };
+
+ r->storage_buffers[BUFFER_STAGING_SRC] = (StorageBuffer){
+ .alloc_info = host_alloc_create_info,
+ .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
+ .buffer_size = r->storage_buffers[BUFFER_STAGING_DST].buffer_size,
+ };
+
+ r->storage_buffers[BUFFER_COMPUTE_DST] = (StorageBuffer){
+ .alloc_info = device_alloc_create_info,
+ .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT |
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+ .buffer_size = (1024 * 10) * (1024 * 10) * 8,
+ };
+
+ r->storage_buffers[BUFFER_COMPUTE_SRC] = (StorageBuffer){
+ .alloc_info = device_alloc_create_info,
+ .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+ .buffer_size = r->storage_buffers[BUFFER_COMPUTE_DST].buffer_size,
+ };
+
+ r->storage_buffers[BUFFER_INDEX] = (StorageBuffer){
+ .alloc_info = device_alloc_create_info,
+ .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT |
+ VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
+ .buffer_size = sizeof(pg->inline_elements) * 100,
+ };
+
+ r->storage_buffers[BUFFER_INDEX_STAGING] = (StorageBuffer){
+ .alloc_info = host_alloc_create_info,
+ .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
+ .buffer_size = r->storage_buffers[BUFFER_INDEX].buffer_size,
+ };
+
+ // FIXME: Don't assume that we can render with host mapped buffer
+ r->storage_buffers[BUFFER_VERTEX_RAM] = (StorageBuffer){
+ .alloc_info = host_alloc_create_info,
+ .usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
+ .buffer_size = memory_region_size(d->vram),
+ };
+
+ r->bitmap_size = memory_region_size(d->vram) / 4096;
+ r->uploaded_bitmap = bitmap_new(r->bitmap_size);
+ bitmap_clear(r->uploaded_bitmap, 0, r->bitmap_size);
+
+ r->storage_buffers[BUFFER_VERTEX_INLINE] = (StorageBuffer){
+ .alloc_info = device_alloc_create_info,
+ .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT |
+ VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
+ .buffer_size = NV2A_VERTEXSHADER_ATTRIBUTES * NV2A_MAX_BATCH_LENGTH *
+ 4 * sizeof(float) * 10,
+ };
+
+ r->storage_buffers[BUFFER_VERTEX_INLINE_STAGING] = (StorageBuffer){
+ .alloc_info = host_alloc_create_info,
+ .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
+ .buffer_size = r->storage_buffers[BUFFER_VERTEX_INLINE].buffer_size,
+ };
+
+ r->storage_buffers[BUFFER_UNIFORM] = (StorageBuffer){
+ .alloc_info = device_alloc_create_info,
+ .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT |
+ VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+ .buffer_size = 8 * 1024 * 1024,
+ };
+
+ r->storage_buffers[BUFFER_UNIFORM_STAGING] = (StorageBuffer){
+ .alloc_info = host_alloc_create_info,
+ .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
+ .buffer_size = r->storage_buffers[BUFFER_UNIFORM].buffer_size,
+ };
+
+ for (int i = 0; i < BUFFER_COUNT; i++) {
+ create_buffer(pg, &r->storage_buffers[i]);
+ }
+
+ // FIXME: Add fallback path for device using host mapped memory
+
+ int buffers_to_map[] = { BUFFER_VERTEX_RAM,
+ BUFFER_INDEX_STAGING,
+ BUFFER_VERTEX_INLINE_STAGING,
+ BUFFER_UNIFORM_STAGING };
+
+ for (int i = 0; i < ARRAY_SIZE(buffers_to_map); i++) {
+ VK_CHECK(vmaMapMemory(
+ r->allocator, r->storage_buffers[buffers_to_map[i]].allocation,
+ (void **)&r->storage_buffers[buffers_to_map[i]].mapped));
+ }
+}
+
+void pgraph_vk_finalize_buffers(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ for (int i = 0; i < BUFFER_COUNT; i++) {
+ if (r->storage_buffers[i].mapped) {
+ vmaUnmapMemory(r->allocator, r->storage_buffers[i].allocation);
+ }
+ destroy_buffer(pg, &r->storage_buffers[i]);
+ }
+
+ g_free(r->uploaded_bitmap);
+ r->uploaded_bitmap = NULL;
+}
+
+bool pgraph_vk_buffer_has_space_for(PGRAPHState *pg, int index,
+ VkDeviceSize size,
+ VkDeviceAddress alignment)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+ StorageBuffer *b = &r->storage_buffers[index];
+ return (ROUND_UP(b->buffer_offset, alignment) + size) <= b->buffer_size;
+}
+
+VkDeviceSize pgraph_vk_append_to_buffer(PGRAPHState *pg, int index, void **data,
+ VkDeviceSize *sizes, size_t count,
+ VkDeviceAddress alignment)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ VkDeviceSize total_size = 0;
+ for (int i = 0; i < count; i++) {
+ total_size += sizes[i];
+ }
+ assert(pgraph_vk_buffer_has_space_for(pg, index, total_size, alignment));
+
+ StorageBuffer *b = &r->storage_buffers[index];
+ VkDeviceSize starting_offset = ROUND_UP(b->buffer_offset, alignment);
+
+ assert(b->mapped);
+
+ for (int i = 0; i < count; i++) {
+ b->buffer_offset = ROUND_UP(b->buffer_offset, alignment);
+ memcpy(b->mapped + b->buffer_offset, data[i], sizes[i]);
+ b->buffer_offset += sizes[i];
+ }
+
+ return starting_offset;
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/command.c b/hw/xbox/nv2a/pgraph/vk/command.c
new file mode 100644
index 0000000000..0e9fc9a2ee
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/command.c
@@ -0,0 +1,119 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "renderer.h"
+
+static void create_command_pool(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ QueueFamilyIndices indices =
+ pgraph_vk_find_queue_families(r->physical_device);
+
+ VkCommandPoolCreateInfo create_info = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+ .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
+ .queueFamilyIndex = indices.queue_family,
+ };
+ VK_CHECK(
+ vkCreateCommandPool(r->device, &create_info, NULL, &r->command_pool));
+}
+
+static void destroy_command_pool(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ vkDestroyCommandPool(r->device, r->command_pool, NULL);
+}
+
+static void create_command_buffers(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ VkCommandBufferAllocateInfo alloc_info = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+ .commandPool = r->command_pool,
+ .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+ .commandBufferCount = ARRAY_SIZE(r->command_buffers),
+ };
+ VK_CHECK(
+ vkAllocateCommandBuffers(r->device, &alloc_info, r->command_buffers));
+
+ r->command_buffer = r->command_buffers[0];
+ r->aux_command_buffer = r->command_buffers[1];
+}
+
+static void destroy_command_buffers(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ vkFreeCommandBuffers(r->device, r->command_pool,
+ ARRAY_SIZE(r->command_buffers), r->command_buffers);
+
+ r->command_buffer = VK_NULL_HANDLE;
+ r->aux_command_buffer = VK_NULL_HANDLE;
+}
+
+VkCommandBuffer pgraph_vk_begin_single_time_commands(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ assert(!r->in_aux_command_buffer);
+ r->in_aux_command_buffer = true;
+
+ VkCommandBufferBeginInfo begin_info = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ };
+ VK_CHECK(vkBeginCommandBuffer(r->aux_command_buffer, &begin_info));
+
+ return r->aux_command_buffer;
+}
+
+void pgraph_vk_end_single_time_commands(PGRAPHState *pg, VkCommandBuffer cmd)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ assert(r->in_aux_command_buffer);
+
+ VK_CHECK(vkEndCommandBuffer(cmd));
+
+ VkSubmitInfo submit_info = {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .commandBufferCount = 1,
+ .pCommandBuffers = &cmd,
+ };
+ VK_CHECK(vkQueueSubmit(r->queue, 1, &submit_info, VK_NULL_HANDLE));
+ nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_AUX);
+ VK_CHECK(vkQueueWaitIdle(r->queue));
+
+ r->in_aux_command_buffer = false;
+}
+
+void pgraph_vk_init_command_buffers(PGRAPHState *pg)
+{
+ create_command_pool(pg);
+ create_command_buffers(pg);
+}
+
+void pgraph_vk_finalize_command_buffers(PGRAPHState *pg)
+{
+ destroy_command_buffers(pg);
+ destroy_command_pool(pg);
+}
\ No newline at end of file
diff --git a/hw/xbox/nv2a/pgraph/vk/constants.h b/hw/xbox/nv2a/pgraph/vk/constants.h
new file mode 100644
index 0000000000..9ae8ba6dd4
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/constants.h
@@ -0,0 +1,418 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_VK_CONSTANTS_H
+#define HW_XBOX_NV2A_PGRAPH_VK_CONSTANTS_H
+
+#include "hw/xbox/nv2a/nv2a_regs.h"
+#include "hw/xbox/nv2a/pgraph/shaders.h"
+#include
+
+static const VkFilter pgraph_texture_min_filter_vk_map[] = {
+ 0,
+ VK_FILTER_NEAREST,
+ VK_FILTER_LINEAR,
+ VK_FILTER_NEAREST,
+ VK_FILTER_LINEAR,
+ VK_FILTER_NEAREST,
+ VK_FILTER_LINEAR,
+ VK_FILTER_LINEAR,
+};
+
+static const VkFilter pgraph_texture_mag_filter_vk_map[] = {
+ 0,
+ VK_FILTER_NEAREST,
+ VK_FILTER_LINEAR,
+ 0,
+ VK_FILTER_LINEAR /* TODO: Convolution filter... */
+};
+
+static const VkSamplerAddressMode pgraph_texture_addr_vk_map[] = {
+ 0,
+ VK_SAMPLER_ADDRESS_MODE_REPEAT,
+ VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT,
+ VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+ VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
+ VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, /* Approximate GL_CLAMP */
+};
+
+static const VkBlendFactor pgraph_blend_factor_vk_map[] = {
+ VK_BLEND_FACTOR_ZERO,
+ VK_BLEND_FACTOR_ONE,
+ VK_BLEND_FACTOR_SRC_COLOR,
+ VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR,
+ VK_BLEND_FACTOR_SRC_ALPHA,
+ VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
+ VK_BLEND_FACTOR_DST_ALPHA,
+ VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA,
+ VK_BLEND_FACTOR_DST_COLOR,
+ VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR,
+ VK_BLEND_FACTOR_SRC_ALPHA_SATURATE,
+ 0,
+ VK_BLEND_FACTOR_CONSTANT_COLOR,
+ VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR,
+ VK_BLEND_FACTOR_CONSTANT_ALPHA,
+ VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA,
+};
+
+static const VkBlendOp pgraph_blend_equation_vk_map[] = {
+ VK_BLEND_OP_SUBTRACT,
+ VK_BLEND_OP_REVERSE_SUBTRACT,
+ VK_BLEND_OP_ADD,
+ VK_BLEND_OP_MIN,
+ VK_BLEND_OP_MAX,
+ VK_BLEND_OP_REVERSE_SUBTRACT,
+ VK_BLEND_OP_ADD,
+};
+
+/* FIXME
+static const GLenum pgraph_blend_logicop_map[] = {
+ GL_CLEAR,
+ GL_AND,
+ GL_AND_REVERSE,
+ GL_COPY,
+ GL_AND_INVERTED,
+ GL_NOOP,
+ GL_XOR,
+ GL_OR,
+ GL_NOR,
+ GL_EQUIV,
+ GL_INVERT,
+ GL_OR_REVERSE,
+ GL_COPY_INVERTED,
+ GL_OR_INVERTED,
+ GL_NAND,
+ GL_SET,
+};
+*/
+
+static const VkCullModeFlags pgraph_cull_face_vk_map[] = {
+ 0,
+ VK_CULL_MODE_FRONT_BIT,
+ VK_CULL_MODE_BACK_BIT,
+ VK_CULL_MODE_FRONT_AND_BACK,
+};
+
+static const VkCompareOp pgraph_depth_func_vk_map[] = {
+ VK_COMPARE_OP_NEVER,
+ VK_COMPARE_OP_LESS,
+ VK_COMPARE_OP_EQUAL,
+ VK_COMPARE_OP_LESS_OR_EQUAL,
+ VK_COMPARE_OP_GREATER,
+ VK_COMPARE_OP_NOT_EQUAL,
+ VK_COMPARE_OP_GREATER_OR_EQUAL,
+ VK_COMPARE_OP_ALWAYS,
+};
+
+static const VkCompareOp pgraph_stencil_func_vk_map[] = {
+ VK_COMPARE_OP_NEVER,
+ VK_COMPARE_OP_LESS,
+ VK_COMPARE_OP_EQUAL,
+ VK_COMPARE_OP_LESS_OR_EQUAL,
+ VK_COMPARE_OP_GREATER,
+ VK_COMPARE_OP_NOT_EQUAL,
+ VK_COMPARE_OP_GREATER_OR_EQUAL,
+ VK_COMPARE_OP_ALWAYS,
+};
+
+static const VkStencilOp pgraph_stencil_op_vk_map[] = {
+ 0,
+ VK_STENCIL_OP_KEEP,
+ VK_STENCIL_OP_ZERO,
+ VK_STENCIL_OP_REPLACE,
+ VK_STENCIL_OP_INCREMENT_AND_CLAMP,
+ VK_STENCIL_OP_DECREMENT_AND_CLAMP,
+ VK_STENCIL_OP_INVERT,
+ VK_STENCIL_OP_INCREMENT_AND_WRAP,
+ VK_STENCIL_OP_DECREMENT_AND_WRAP,
+};
+
+static const VkPolygonMode pgraph_polygon_mode_vk_map[] = {
+ [POLY_MODE_FILL] = VK_POLYGON_MODE_FILL,
+ [POLY_MODE_POINT] = VK_POLYGON_MODE_POINT,
+ [POLY_MODE_LINE] = VK_POLYGON_MODE_LINE,
+};
+
+typedef struct VkColorFormatInfo {
+ VkFormat vk_format;
+ VkComponentMapping component_map;
+} VkColorFormatInfo;
+
+static const VkColorFormatInfo kelvin_color_format_vk_map[66] = {
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8] = {
+ VK_FORMAT_R8_UNORM,
+ { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE },
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8] = {
+ VK_FORMAT_R8_UNORM,
+ { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5] = {
+ VK_FORMAT_A1R5G5B5_UNORM_PACK16,
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5] = {
+ VK_FORMAT_A1R5G5B5_UNORM_PACK16,
+ { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_ONE },
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4] = {
+ VK_FORMAT_A4R4G4B4_UNORM_PACK16,
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5] = {
+ VK_FORMAT_R5G6B5_UNORM_PACK16,
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8] = {
+ VK_FORMAT_B8G8R8A8_UNORM,
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8] = {
+ VK_FORMAT_B8G8R8A8_UNORM,
+ { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_ONE },
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8] = {
+ VK_FORMAT_B8G8R8A8_UNORM, // Converted
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5] = {
+ VK_FORMAT_R8G8B8A8_UNORM, // Converted
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8] = {
+ VK_FORMAT_R8G8B8A8_UNORM, // Converted
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8] = {
+ VK_FORMAT_R8G8B8A8_UNORM, // Converted
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5] = {
+ VK_FORMAT_A1R5G5B5_UNORM_PACK16,
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5] = {
+ VK_FORMAT_R5G6B5_UNORM_PACK16,
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8] = {
+ VK_FORMAT_B8G8R8A8_UNORM,
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8] = {
+ VK_FORMAT_R8_UNORM,
+ { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE, }
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8] = {
+ VK_FORMAT_R8G8_UNORM,
+ { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, }
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8] = {
+ VK_FORMAT_R8_UNORM,
+ { VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R },
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8] = {
+ VK_FORMAT_R8G8_UNORM,
+ { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8] = {
+ VK_FORMAT_R8_UNORM,
+ { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5] = {
+ VK_FORMAT_A1R5G5B5_UNORM_PACK16,
+ { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_ONE },
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4] = {
+ VK_FORMAT_A4R4G4B4_UNORM_PACK16,
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8] = {
+ VK_FORMAT_B8G8R8A8_UNORM,
+ { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_ONE },
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8] = {
+ VK_FORMAT_R8_UNORM,
+ { VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R }
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8Y8] = {
+ VK_FORMAT_R8G8_UNORM,
+ { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5] = {
+ VK_FORMAT_R8G8B8_SNORM, // Converted
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8] = {
+ VK_FORMAT_R8G8_UNORM,
+ { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8] = {
+ VK_FORMAT_R8G8_UNORM,
+ { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8] = {
+ VK_FORMAT_R8G8B8A8_UNORM, // Converted
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8] = {
+ VK_FORMAT_R8G8B8A8_UNORM, // Converted
+ },
+
+ /* Additional information is passed to the pixel shader via the swizzle:
+ * RED: The depth value.
+ * GREEN: 0 for 16-bit, 1 for 24 bit
+ * BLUE: 0 for fixed, 1 for float
+ */
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_DEPTH_Y16_FIXED] = {
+ VK_FORMAT_R16_UNORM, // FIXME
+ { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO },
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED] = {
+ // FIXME
+ // {GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}},
+ VK_FORMAT_R32_UINT,
+ { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO },
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT] = {
+ // FIXME
+ // {GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}},
+ VK_FORMAT_R32_UINT,
+ { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO },
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FIXED] = {
+ VK_FORMAT_R16_UNORM, // FIXME
+ { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO },
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FLOAT] = {
+ VK_FORMAT_R16_SFLOAT,
+ { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ZERO },
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16] = {
+ VK_FORMAT_R16_UNORM,
+ { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE }
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8] = {
+ VK_FORMAT_R8G8B8A8_UNORM,
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8] = {
+ VK_FORMAT_R8G8B8A8_UNORM,
+ { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_R }
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8] = {
+ VK_FORMAT_R8G8B8A8_UNORM,
+ { VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R }
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8] = {
+ VK_FORMAT_R8G8B8A8_UNORM,
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8] = {
+ VK_FORMAT_R8G8B8A8_UNORM,
+ { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_R }
+ },
+ [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8] = {
+ VK_FORMAT_R8G8B8A8_UNORM,
+ { VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R }
+ },
+};
+
+typedef struct BasicSurfaceFormatInfo {
+ unsigned int bytes_per_pixel;
+} BasicSurfaceFormatInfo;
+
+typedef struct SurfaceFormatInfo {
+ unsigned int host_bytes_per_pixel;
+ VkFormat vk_format;
+ VkImageUsageFlags usage;
+ VkImageAspectFlags aspect;
+} SurfaceFormatInfo;
+
+static const BasicSurfaceFormatInfo kelvin_surface_color_format_map[] = {
+ [NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5] = { 2 },
+ [NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5] = { 2 },
+ [NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8] = { 4 },
+ [NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8] = { 4 },
+ [NV097_SET_SURFACE_FORMAT_COLOR_LE_B8] = { 1 },
+ [NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8] = { 2 },
+};
+
+static const SurfaceFormatInfo kelvin_surface_color_format_vk_map[] = {
+ [NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5] =
+ {
+ // FIXME: Force alpha to zero
+ 2,
+ VK_FORMAT_A1R5G5B5_UNORM_PACK16,
+ VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+ VK_IMAGE_ASPECT_COLOR_BIT,
+ },
+ [NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5] =
+ {
+ 2,
+ VK_FORMAT_R5G6B5_UNORM_PACK16,
+ VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+ VK_IMAGE_ASPECT_COLOR_BIT,
+ },
+ [NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8] =
+ {
+ // FIXME: Force alpha to zero
+ 4,
+ VK_FORMAT_B8G8R8A8_UNORM,
+ VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+ VK_IMAGE_ASPECT_COLOR_BIT,
+ },
+ [NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8] =
+ {
+ 4,
+ VK_FORMAT_B8G8R8A8_UNORM,
+ VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+ VK_IMAGE_ASPECT_COLOR_BIT,
+ },
+ [NV097_SET_SURFACE_FORMAT_COLOR_LE_B8] =
+ {
+ // FIXME: Map channel color
+ 1,
+ VK_FORMAT_R8_UNORM,
+ VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+ VK_IMAGE_ASPECT_COLOR_BIT,
+ },
+ [NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8] =
+ {
+ // FIXME: Map channel color
+ 2,
+ VK_FORMAT_R8G8_UNORM,
+ VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+ VK_IMAGE_ASPECT_COLOR_BIT,
+ },
+};
+
+static const BasicSurfaceFormatInfo kelvin_surface_zeta_format_map[] = {
+ [NV097_SET_SURFACE_FORMAT_ZETA_Z16] = { 2 },
+ [NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] = { 4 },
+};
+
+// FIXME: Actually support stored float format
+
+static const SurfaceFormatInfo zeta_d16 = {
+ 2,
+ VK_FORMAT_D16_UNORM,
+ VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+ VK_IMAGE_ASPECT_DEPTH_BIT,
+};
+
+static const SurfaceFormatInfo zeta_d32_sfloat_s8_uint = {
+ 8,
+ VK_FORMAT_D32_SFLOAT_S8_UINT,
+ VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+ VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT,
+};
+
+static const SurfaceFormatInfo zeta_d24_unorm_s8_uint = {
+ 4,
+ VK_FORMAT_D24_UNORM_S8_UINT,
+ VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+ VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT,
+};
+
+#endif
diff --git a/hw/xbox/nv2a/pgraph/vk/debug.c b/hw/xbox/nv2a/pgraph/vk/debug.c
new file mode 100644
index 0000000000..a8cb08c4a2
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/debug.c
@@ -0,0 +1,59 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "renderer.h"
+#include "debug.h"
+
+#ifndef _WIN32
+#include
+#endif
+
+#ifdef CONFIG_RENDERDOC
+#pragma GCC diagnostic ignored "-Wstrict-prototypes"
+#include "thirdparty/renderdoc_app.h"
+#endif
+
+int nv2a_vk_dgroup_indent = 0;
+
+void pgraph_vk_debug_init(void)
+{
+#ifdef CONFIG_RENDERDOC
+ nv2a_dbg_renderdoc_init();
+#endif
+}
+
+void pgraph_vk_debug_frame_terminator(void)
+{
+#ifdef CONFIG_RENDERDOC
+ if (nv2a_dbg_renderdoc_available()) {
+ RENDERDOC_API_1_6_0 *rdoc_api = nv2a_dbg_renderdoc_get_api();
+
+ PGRAPHVkState *r = g_nv2a->pgraph.vk_renderer_state;
+ if (rdoc_api->IsTargetControlConnected()) {
+ if (rdoc_api->IsFrameCapturing()) {
+ rdoc_api->EndFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(r->instance), 0);
+ }
+ if (renderdoc_capture_frames > 0) {
+ rdoc_api->StartFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(r->instance), 0);
+ --renderdoc_capture_frames;
+ }
+ }
+ }
+#endif
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/debug.h b/hw/xbox/nv2a/pgraph/vk/debug.h
new file mode 100644
index 0000000000..62cd63e592
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/debug.h
@@ -0,0 +1,61 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_VK_DEBUG_H
+#define HW_XBOX_NV2A_PGRAPH_VK_DEBUG_H
+
+#define DEBUG_VK 0
+
+extern int nv2a_vk_dgroup_indent;
+
+#define NV2A_VK_XDPRINTF(x, fmt, ...) \
+ do { \
+ if (x) { \
+ for (int i = 0; i < nv2a_vk_dgroup_indent; i++) \
+ fprintf(stderr, " "); \
+ fprintf(stderr, fmt "\n", ##__VA_ARGS__); \
+ } \
+ } while (0)
+
+#define NV2A_VK_DPRINTF(fmt, ...) NV2A_VK_XDPRINTF(DEBUG_VK, fmt, ##__VA_ARGS__)
+
+#define NV2A_VK_DGROUP_BEGIN(fmt, ...) \
+ do { \
+ NV2A_VK_XDPRINTF(DEBUG_VK, fmt, ##__VA_ARGS__); \
+ nv2a_vk_dgroup_indent++; \
+ } while (0)
+
+#define NV2A_VK_DGROUP_END(...) \
+ do { \
+ nv2a_vk_dgroup_indent--; \
+ assert(nv2a_vk_dgroup_indent >= 0); \
+ } while (0)
+
+#define VK_CHECK(x) \
+ do { \
+ VkResult vk_result = (x); \
+ if (vk_result != VK_SUCCESS) { \
+ fprintf(stderr, "vk_result = %d\n", vk_result); \
+ } \
+ assert(vk_result == VK_SUCCESS && "vk check failed"); \
+ } while (0)
+
+void pgraph_vk_debug_frame_terminator(void);
+
+#endif
diff --git a/hw/xbox/nv2a/pgraph/vk/display.c b/hw/xbox/nv2a/pgraph/vk/display.c
new file mode 100644
index 0000000000..595f119ca2
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/display.c
@@ -0,0 +1,896 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "renderer.h"
+
+static const char *display_frag_glsl =
+ "#version 450\n"
+ "layout(binding = 0) uniform sampler2D tex;\n"
+ "layout(binding = 1) uniform sampler2D pvideo_tex;\n"
+ "layout(push_constant, std430) uniform PushConstants {\n"
+ " bool pvideo_enable;\n"
+ " vec2 pvideo_in_pos;\n"
+ " vec4 pvideo_pos;\n"
+ " vec3 pvideo_scale;\n"
+ " bool pvideo_color_key_enable;\n"
+ " vec2 display_size;\n"
+ " float line_offset;\n"
+ " vec4 pvideo_color_key;\n"
+ "};\n"
+ "layout(location = 0) out vec4 out_Color;\n"
+ "void main()\n"
+ "{\n"
+ " vec2 texCoord = gl_FragCoord.xy/display_size;\n"
+ " texCoord.y = 1 - texCoord.y;\n" // GL compat
+ " float rel = display_size.y/textureSize(tex, 0).y/line_offset;\n"
+ " texCoord.y = 1 + rel*(texCoord.y - 1);"
+ " out_Color.rgba = texture(tex, texCoord);\n"
+ // " if (pvideo_enable) {\n"
+ // " vec2 screenCoord = gl_FragCoord.xy - 0.5;\n"
+ // " vec4 output_region = vec4(pvideo_pos.xy, pvideo_pos.xy + pvideo_pos.zw);\n"
+ // " bvec4 clip = bvec4(lessThan(screenCoord, output_region.xy),\n"
+ // " greaterThan(screenCoord, output_region.zw));\n"
+ // " if (!any(clip) && (!pvideo_color_key_enable || out_Color.rgba == pvideo_color_key)) {\n"
+ // " vec2 out_xy = (screenCoord - pvideo_pos.xy) * pvideo_scale.z;\n"
+ // " vec2 in_st = (pvideo_in_pos + out_xy * pvideo_scale.xy) / textureSize(pvideo_tex, 0);\n"
+ // " in_st.y *= -1.0;\n"
+ // " out_Color.rgba = texture(pvideo_tex, in_st);\n"
+ // " }\n"
+ // " }\n"
+ "}\n";
+
+static void create_descriptor_pool(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ VkDescriptorPoolSize pool_sizes = {
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .descriptorCount = 2,
+ };
+
+ VkDescriptorPoolCreateInfo pool_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+ .poolSizeCount = 1,
+ .pPoolSizes = &pool_sizes,
+ .maxSets = 1,
+ .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
+ };
+ VK_CHECK(vkCreateDescriptorPool(r->device, &pool_info, NULL,
+ &r->display.descriptor_pool));
+}
+
+static void destroy_descriptor_pool(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ vkDestroyDescriptorPool(r->device, r->display.descriptor_pool, NULL);
+ r->display.descriptor_pool = VK_NULL_HANDLE;
+}
+
+static void create_descriptor_set_layout(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ VkDescriptorSetLayoutBinding bindings[2];
+
+ for (int i = 0; i < ARRAY_SIZE(bindings); i++) {
+ bindings[i] = (VkDescriptorSetLayoutBinding){
+ .binding = i,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+ };
+ }
+ VkDescriptorSetLayoutCreateInfo layout_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .bindingCount = ARRAY_SIZE(bindings),
+ .pBindings = bindings,
+ };
+ VK_CHECK(vkCreateDescriptorSetLayout(r->device, &layout_info, NULL,
+ &r->display.descriptor_set_layout));
+}
+
+static void destroy_descriptor_set_layout(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ vkDestroyDescriptorSetLayout(r->device, r->display.descriptor_set_layout,
+ NULL);
+ r->display.descriptor_set_layout = VK_NULL_HANDLE;
+}
+
+static void create_descriptor_sets(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ VkDescriptorSetLayout layout = r->display.descriptor_set_layout;
+
+ VkDescriptorSetAllocateInfo alloc_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ .descriptorPool = r->display.descriptor_pool,
+ .descriptorSetCount = 1,
+ .pSetLayouts = &layout,
+ };
+ VK_CHECK(vkAllocateDescriptorSets(r->device, &alloc_info,
+ &r->display.descriptor_set));
+}
+
+static void create_render_pass(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ VkAttachmentDescription attachment;
+
+ VkAttachmentReference color_reference;
+ attachment = (VkAttachmentDescription){
+ .format = VK_FORMAT_R8G8B8A8_UNORM,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+ .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
+ .initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ .finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ };
+ color_reference = (VkAttachmentReference){
+ 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
+ };
+
+ VkSubpassDependency dependency = {
+ .srcSubpass = VK_SUBPASS_EXTERNAL,
+ };
+
+ dependency.srcStageMask |=
+ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+ dependency.dstStageMask |=
+ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+ dependency.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+
+ VkSubpassDescription subpass = {
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .colorAttachmentCount = 1,
+ .pColorAttachments = &color_reference,
+ };
+
+ VkRenderPassCreateInfo renderpass_create_info = {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = &attachment,
+ .subpassCount = 1,
+ .pSubpasses = &subpass,
+ .dependencyCount = 1,
+ .pDependencies = &dependency,
+ };
+ VK_CHECK(vkCreateRenderPass(r->device, &renderpass_create_info, NULL,
+ &r->display.render_pass));
+}
+
+static void destroy_render_pass(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+ vkDestroyRenderPass(r->device, r->display.render_pass, NULL);
+ r->display.render_pass = VK_NULL_HANDLE;
+}
+
+static void create_display_pipeline(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ r->display.display_frag =
+ pgraph_vk_create_shader_module_from_glsl(
+ r, VK_SHADER_STAGE_FRAGMENT_BIT, display_frag_glsl);
+
+ VkPipelineShaderStageCreateInfo shader_stages[] = {
+ (VkPipelineShaderStageCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = r->quad_vert_module->module,
+ .pName = "main",
+ },
+ (VkPipelineShaderStageCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = r->display.display_frag->module,
+ .pName = "main",
+ },
+ };
+
+ VkPipelineVertexInputStateCreateInfo vertex_input = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ };
+
+ VkPipelineInputAssemblyStateCreateInfo input_assembly = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
+ .primitiveRestartEnable = VK_FALSE,
+ };
+
+ VkPipelineViewportStateCreateInfo viewport_state = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ };
+
+ VkPipelineRasterizationStateCreateInfo rasterizer = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .depthClampEnable = VK_FALSE,
+ .rasterizerDiscardEnable = VK_FALSE,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .lineWidth = 1.0f,
+ .cullMode = VK_CULL_MODE_BACK_BIT,
+ .frontFace = VK_FRONT_FACE_CLOCKWISE,
+ .depthBiasEnable = VK_FALSE,
+ };
+
+ VkPipelineMultisampleStateCreateInfo multisampling = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .sampleShadingEnable = VK_FALSE,
+ .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
+ };
+
+ VkPipelineDepthStencilStateCreateInfo depth_stencil = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+ .depthTestEnable = VK_FALSE,
+ .depthCompareOp = VK_COMPARE_OP_ALWAYS,
+ .depthBoundsTestEnable = VK_FALSE,
+ };
+
+ VkPipelineColorBlendAttachmentState color_blend_attachment = {
+ .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
+ .blendEnable = VK_FALSE,
+ };
+
+ VkPipelineColorBlendStateCreateInfo color_blending = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .logicOpEnable = VK_FALSE,
+ .logicOp = VK_LOGIC_OP_COPY,
+ .attachmentCount = 1,
+ .pAttachments = &color_blend_attachment,
+ };
+
+ VkDynamicState dynamic_states[] = { VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR };
+ VkPipelineDynamicStateCreateInfo dynamic_state = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = 2,
+ .pDynamicStates = dynamic_states,
+ };
+
+ VkPushConstantRange push_constant_range = {
+ .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .offset = 0,
+ .size = r->display.display_frag->push_constants.total_size,
+ };
+
+ VkPipelineLayoutCreateInfo pipeline_layout_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &r->display.descriptor_set_layout,
+ .pushConstantRangeCount = 1,
+ .pPushConstantRanges = &push_constant_range,
+ };
+ VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL,
+ &r->display.pipeline_layout));
+
+ VkGraphicsPipelineCreateInfo pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .stageCount = ARRAY_SIZE(shader_stages),
+ .pStages = shader_stages,
+ .pVertexInputState = &vertex_input,
+ .pInputAssemblyState = &input_assembly,
+ .pViewportState = &viewport_state,
+ .pRasterizationState = &rasterizer,
+ .pMultisampleState = &multisampling,
+ .pDepthStencilState = r->zeta_binding ? &depth_stencil : NULL,
+ .pColorBlendState = &color_blending,
+ .pDynamicState = &dynamic_state,
+ .layout = r->display.pipeline_layout,
+ .renderPass = r->display.render_pass,
+ .subpass = 0,
+ .basePipelineHandle = VK_NULL_HANDLE,
+ };
+ VK_CHECK(vkCreateGraphicsPipelines(r->device, r->vk_pipeline_cache, 1,
+ &pipeline_info, NULL,
+ &r->display.pipeline));
+}
+
+static void destroy_display_pipeline(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ vkDestroyPipeline(r->device, r->display.pipeline, NULL);
+ r->display.pipeline = VK_NULL_HANDLE;
+}
+
+static void create_frame_buffer(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ VkFramebufferCreateInfo create_info = {
+ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .renderPass = r->display.render_pass,
+ .attachmentCount = 1,
+ .pAttachments = &r->display.image_view,
+ .width = r->display.width,
+ .height = r->display.height,
+ .layers = 1,
+ };
+ VK_CHECK(vkCreateFramebuffer(r->device, &create_info, NULL,
+ &r->display.framebuffer));
+}
+
+static void destroy_frame_buffer(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+ vkDestroyFramebuffer(r->device, r->display.framebuffer, NULL);
+ r->display.framebuffer = NULL;
+}
+
+static void destroy_current_display_image(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+ PGRAPHVkDisplayState *d = &r->display;
+
+ if (d->image == VK_NULL_HANDLE) {
+ return;
+ }
+
+ destroy_frame_buffer(pg);
+
+#if HAVE_EXTERNAL_MEMORY
+ glDeleteTextures(1, &d->gl_texture_id);
+ d->gl_texture_id = 0;
+
+ glDeleteMemoryObjectsEXT(1, &d->gl_memory_obj);
+ d->gl_memory_obj = 0;
+
+#ifdef WIN32
+ CloseHandle(d->handle);
+ d->handle = 0;
+#endif
+#endif
+
+ vkDestroyImageView(r->device, d->image_view, NULL);
+ d->image_view = VK_NULL_HANDLE;
+
+ vkDestroyImage(r->device, d->image, NULL);
+ d->image = VK_NULL_HANDLE;
+
+ vkFreeMemory(r->device, d->memory, NULL);
+ d->memory = VK_NULL_HANDLE;
+
+ d->draw_time = 0;
+}
+
+// FIXME: We may need to use two images. One for actually rendering display,
+// and another for GL in the correct tiling mode
+
+static void create_display_image_from_surface(PGRAPHState *pg,
+ SurfaceBinding *surface)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+ PGRAPHVkDisplayState *d = &r->display;
+
+ if (r->display.image != VK_NULL_HANDLE) {
+ destroy_current_display_image(pg);
+ }
+
+ const GLint gl_internal_format = GL_RGBA8;
+ bool use_optimal_tiling = true;
+
+#if HAVE_EXTERNAL_MEMORY
+ GLint num_tiling_types;
+ glGetInternalformativ(GL_TEXTURE_2D, gl_internal_format,
+ GL_NUM_TILING_TYPES_EXT, 1, &num_tiling_types);
+ // XXX: Apparently on AMD GL_OPTIMAL_TILING_EXT is reported to be
+ // supported, but doesn't work? On nVidia, GL_LINEAR_TILING_EXT may not
+ // be supported so we must use optimal. Default to optimal unless
+ // linear is explicitly specified...
+ GLint tiling_types[num_tiling_types];
+ glGetInternalformativ(GL_TEXTURE_2D, gl_internal_format,
+ GL_TILING_TYPES_EXT, num_tiling_types, tiling_types);
+ for (int i = 0; i < num_tiling_types; i++) {
+ if (tiling_types[i] == GL_LINEAR_TILING_EXT) {
+ use_optimal_tiling = false;
+ break;
+ }
+ }
+#endif
+
+ // Create image
+ VkImageCreateInfo image_create_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .imageType = VK_IMAGE_TYPE_2D,
+ .extent.width = surface->width,
+ .extent.height = surface->height,
+ .extent.depth = 1,
+ .mipLevels = 1,
+ .arrayLayers = 1,
+ .format = VK_FORMAT_R8G8B8A8_UNORM,
+ .tiling = use_optimal_tiling ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ };
+ pgraph_apply_scaling_factor(pg, &image_create_info.extent.width,
+ &image_create_info.extent.height);
+
+ VkExternalMemoryImageCreateInfo external_memory_image_create_info = {
+ .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
+ .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
+ };
+ image_create_info.pNext = &external_memory_image_create_info;
+
+ VK_CHECK(vkCreateImage(r->device, &image_create_info, NULL, &d->image));
+
+ // Allocate and bind image memory
+ VkMemoryRequirements memory_requirements;
+ vkGetImageMemoryRequirements(r->device, d->image, &memory_requirements);
+
+ VkMemoryAllocateInfo alloc_info = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .allocationSize = memory_requirements.size,
+ .memoryTypeIndex =
+ pgraph_vk_get_memory_type(pg, memory_requirements.memoryTypeBits,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
+ };
+
+ VkExportMemoryAllocateInfo export_memory_alloc_info = {
+ .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
+ .handleTypes =
+#ifdef WIN32
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR
+#else
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT
+#endif
+ ,
+ };
+ alloc_info.pNext = &export_memory_alloc_info;
+
+ VK_CHECK(vkAllocateMemory(r->device, &alloc_info, NULL, &d->memory));
+
+ vkBindImageMemory(r->device, d->image, d->memory, 0);
+
+ // Create Image View
+ VkImageViewCreateInfo image_view_create_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = d->image,
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = image_create_info.format,
+ .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .subresourceRange.levelCount = 1,
+ .subresourceRange.layerCount = 1,
+ };
+ VK_CHECK(vkCreateImageView(r->device, &image_view_create_info, NULL,
+ &d->image_view));
+
+#if HAVE_EXTERNAL_MEMORY
+
+#ifdef WIN32
+
+ VkMemoryGetWin32HandleInfoKHR handle_info = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
+ .memory = d->memory,
+ .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR
+ };
+ VK_CHECK(vkGetMemoryWin32HandleKHR(r->device, &handle_info, &d->handle));
+
+ glCreateMemoryObjectsEXT(1, &d->gl_memory_obj);
+ glImportMemoryWin32HandleEXT(d->gl_memory_obj, memory_requirements.size, GL_HANDLE_TYPE_OPAQUE_WIN32_EXT, d->handle);
+ assert(glGetError() == GL_NO_ERROR);
+
+#else
+
+ VkMemoryGetFdInfoKHR fd_info = {
+ .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
+ .memory = d->memory,
+ .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
+ };
+ VK_CHECK(vkGetMemoryFdKHR(r->device, &fd_info, &d->fd));
+
+ glCreateMemoryObjectsEXT(1, &d->gl_memory_obj);
+ glImportMemoryFdEXT(d->gl_memory_obj, memory_requirements.size,
+ GL_HANDLE_TYPE_OPAQUE_FD_EXT, d->fd);
+ assert(glIsMemoryObjectEXT(d->gl_memory_obj));
+ assert(glGetError() == GL_NO_ERROR);
+
+#endif // WIN32
+
+ glGenTextures(1, &d->gl_texture_id);
+ glBindTexture(GL_TEXTURE_2D, d->gl_texture_id);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_TILING_EXT,
+ use_optimal_tiling ? GL_OPTIMAL_TILING_EXT :
+ GL_LINEAR_TILING_EXT);
+ glTexStorageMem2DEXT(GL_TEXTURE_2D, 1, gl_internal_format,
+ image_create_info.extent.width,
+ image_create_info.extent.height, d->gl_memory_obj, 0);
+ assert(glGetError() == GL_NO_ERROR);
+
+#endif // HAVE_EXTERNAL_MEMORY
+
+ d->width = image_create_info.extent.width;
+ d->height = image_create_info.extent.height;
+
+ create_frame_buffer(pg);
+}
+
+static void update_descriptor_set(PGRAPHState *pg, SurfaceBinding *surface)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ VkDescriptorImageInfo image_infos[2];
+ VkWriteDescriptorSet descriptor_writes[2];
+
+ // Display surface
+ image_infos[0] = (VkDescriptorImageInfo){
+ .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ .imageView = surface->image_view,
+ .sampler = r->display.sampler,
+ };
+ descriptor_writes[0] = (VkWriteDescriptorSet){
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = r->display.descriptor_set,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .descriptorCount = 1,
+ .pImageInfo = &image_infos[0],
+ };
+
+ // FIXME: PVIDEO Overlay
+ image_infos[1] = (VkDescriptorImageInfo){
+ .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ .imageView = r->dummy_texture.image_view,
+ .sampler = r->dummy_texture.sampler,
+ };
+ descriptor_writes[1] = (VkWriteDescriptorSet){
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = r->display.descriptor_set,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .descriptorCount = 1,
+ .pImageInfo = &image_infos[1],
+ };
+
+ vkUpdateDescriptorSets(r->device, ARRAY_SIZE(descriptor_writes),
+ descriptor_writes, 0, NULL);
+}
+
+static void update_uniforms(PGRAPHState *pg, SurfaceBinding *surface)
+{
+ NV2AState *d = container_of(pg, NV2AState, pgraph);
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ unsigned int width, height;
+ uint32_t pline_offset, pstart_addr, pline_compare;
+ d->vga.get_resolution(&d->vga, (int*)&width, (int*)&height);
+ d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
+ int line_offset = surface->pitch / pline_offset;
+
+ /* Adjust viewport height for interlaced mode, used only in 1080i */
+ if (d->vga.cr[NV_PRMCIO_INTERLACE_MODE] != NV_PRMCIO_INTERLACE_MODE_DISABLED) {
+ height *= 2;
+ }
+
+ pgraph_apply_scaling_factor(pg, &width, &height);
+
+ ShaderUniformLayout *l = &r->display.display_frag->push_constants;
+ int display_size_loc = uniform_index(l, "display_size"); // FIXME: Cache
+ int line_offset_loc = uniform_index(l, "line_offset");
+ uniform2f(l, display_size_loc, width, height);
+ uniform1f(l, line_offset_loc, line_offset);
+
+#if 0 // FIXME: PVIDEO overlay
+ // FIXME: This check against PVIDEO_SIZE_IN does not match HW behavior.
+ // Many games seem to pass this value when initializing or tearing down
+ // PVIDEO. On its own, this generally does not result in the overlay being
+ // hidden, however there are certain games (e.g., Ultimate Beach Soccer)
+ // that use an unknown mechanism to hide the overlay without explicitly
+ // stopping it.
+ // Since the value seems to be set to 0xFFFFFFFF only in cases where the
+ // content is not valid, it is probably good enough to treat it as an
+ // implicit stop.
+ bool enabled = (d->pvideo.regs[NV_PVIDEO_BUFFER] & NV_PVIDEO_BUFFER_0_USE)
+ && d->pvideo.regs[NV_PVIDEO_SIZE_IN] != 0xFFFFFFFF;
+ glUniform1ui(d->pgraph.renderer_state->disp_rndr.pvideo_enable_loc, enabled);
+ if (!enabled) {
+ return;
+ }
+
+ hwaddr base = d->pvideo.regs[NV_PVIDEO_BASE];
+ hwaddr limit = d->pvideo.regs[NV_PVIDEO_LIMIT];
+ hwaddr offset = d->pvideo.regs[NV_PVIDEO_OFFSET];
+
+ int in_width =
+ GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_WIDTH);
+ int in_height =
+ GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_HEIGHT);
+
+ int in_s = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN],
+ NV_PVIDEO_POINT_IN_S);
+ int in_t = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN],
+ NV_PVIDEO_POINT_IN_T);
+
+ int in_pitch =
+ GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_PITCH);
+ int in_color =
+ GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_COLOR);
+
+ unsigned int out_width =
+ GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_WIDTH);
+ unsigned int out_height =
+ GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_HEIGHT);
+
+ float scale_x = 1.0f;
+ float scale_y = 1.0f;
+ unsigned int ds_dx = d->pvideo.regs[NV_PVIDEO_DS_DX];
+ unsigned int dt_dy = d->pvideo.regs[NV_PVIDEO_DT_DY];
+ if (ds_dx != NV_PVIDEO_DIN_DOUT_UNITY) {
+ scale_x = pvideo_calculate_scale(ds_dx, out_width);
+ }
+ if (dt_dy != NV_PVIDEO_DIN_DOUT_UNITY) {
+ scale_y = pvideo_calculate_scale(dt_dy, out_height);
+ }
+
+ // On HW, setting NV_PVIDEO_SIZE_IN larger than NV_PVIDEO_SIZE_OUT results
+ // in them being capped to the output size, content is not scaled. This is
+ // particularly important as NV_PVIDEO_SIZE_IN may be set to 0xFFFFFFFF
+ // during initialization or teardown.
+ if (in_width > out_width) {
+ in_width = floorf((float)out_width * scale_x + 0.5f);
+ }
+ if (in_height > out_height) {
+ in_height = floorf((float)out_height * scale_y + 0.5f);
+ }
+
+ /* TODO: support other color formats */
+ assert(in_color == NV_PVIDEO_FORMAT_COLOR_LE_CR8YB8CB8YA8);
+
+ unsigned int out_x =
+ GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_X);
+ unsigned int out_y =
+ GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_Y);
+
+ unsigned int color_key_enabled =
+ GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_DISPLAY);
+ glUniform1ui(d->pgraph.renderer_state->disp_rndr.pvideo_color_key_enable_loc,
+ color_key_enabled);
+
+ // TODO: Verify that masking off the top byte is correct.
+ // SeaBlade sets a color key of 0x80000000 but the texture passed into the
+ // shader is cleared to 0 alpha.
+ unsigned int color_key = d->pvideo.regs[NV_PVIDEO_COLOR_KEY] & 0xFFFFFF;
+ glUniform4f(d->pgraph.renderer_state->disp_rndr.pvideo_color_key_loc,
+ GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_RED) / 255.0,
+ GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_GREEN) / 255.0,
+ GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_BLUE) / 255.0,
+ GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_ALPHA) / 255.0);
+
+ assert(offset + in_pitch * in_height <= limit);
+ hwaddr end = base + offset + in_pitch * in_height;
+ assert(end <= memory_region_size(d->vram));
+
+ pgraph_apply_scaling_factor(pg, &out_x, &out_y);
+ pgraph_apply_scaling_factor(pg, &out_width, &out_height);
+
+ // Translate for the GL viewport origin.
+ out_y = MAX(pg->renderer_state->gl_display_buffer_height - 1 - (int)(out_y + out_height), 0);
+
+ glActiveTexture(GL_TEXTURE0 + 1);
+ glBindTexture(GL_TEXTURE_2D, d->pgraph.renderer_state->disp_rndr.pvideo_tex);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ uint8_t *tex_rgba = convert_texture_data__CR8YB8CB8YA8(
+ d->vram_ptr + base + offset, in_width, in_height, in_pitch);
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, in_width, in_height, 0, GL_RGBA,
+ GL_UNSIGNED_BYTE, tex_rgba);
+ g_free(tex_rgba);
+ glUniform1i(d->pgraph.renderer_state->disp_rndr.pvideo_tex_loc, 1);
+ glUniform2f(d->pgraph.renderer_state->disp_rndr.pvideo_in_pos_loc, in_s, in_t);
+ glUniform4f(d->pgraph.renderer_state->disp_rndr.pvideo_pos_loc,
+ out_x, out_y, out_width, out_height);
+ glUniform3f(d->pgraph.renderer_state->disp_rndr.pvideo_scale_loc,
+ scale_x, scale_y, 1.0f / pg->surface_scale_factor);
+#endif
+}
+
+static void render_display(PGRAPHState *pg, SurfaceBinding *surface)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+ PGRAPHVkDisplayState *disp = &r->display;
+
+ if (disp->draw_time >= surface->draw_time) {
+ return;
+ }
+
+ if (r->in_command_buffer &&
+ surface->draw_time >= r->command_buffer_start_time) {
+ pgraph_vk_finish(pg, VK_FINISH_REASON_PRESENTING);
+ }
+
+ update_uniforms(pg, surface);
+ update_descriptor_set(pg, surface);
+
+ VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg);
+
+ pgraph_vk_transition_image_layout(pg, cmd, surface->image,
+ surface->host_fmt.vk_format,
+ VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+ pgraph_vk_transition_image_layout(
+ pg, cmd, disp->image, VK_FORMAT_R8G8B8A8_UNORM,
+ VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
+
+ VkRenderPassBeginInfo render_pass_begin_info = {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .renderPass = disp->render_pass,
+ .framebuffer = disp->framebuffer,
+ .renderArea.extent.width = disp->width,
+ .renderArea.extent.height = disp->height,
+ };
+ vkCmdBeginRenderPass(cmd, &render_pass_begin_info,
+ VK_SUBPASS_CONTENTS_INLINE);
+ vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ disp->pipeline);
+
+ vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ disp->pipeline_layout, 0, 1, &disp->descriptor_set,
+ 0, NULL);
+
+ VkViewport viewport = {
+ .width = disp->width,
+ .height = disp->height,
+ .minDepth = 0.0,
+ .maxDepth = 1.0,
+ };
+ vkCmdSetViewport(cmd, 0, 1, &viewport);
+
+ VkRect2D scissor = {
+ .extent.width = disp->width,
+ .extent.height = disp->height,
+ };
+ vkCmdSetScissor(cmd, 0, 1, &scissor);
+
+ vkCmdPushConstants(cmd, disp->pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT,
+ 0, disp->display_frag->push_constants.total_size,
+ disp->display_frag->push_constants.allocation);
+
+ vkCmdDraw(cmd, 3, 1, 0, 0);
+
+ vkCmdEndRenderPass(cmd);
+
+#if 0
+ VkImageCopy region = {
+ .srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .srcSubresource.layerCount = 1,
+ .dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .dstSubresource.layerCount = 1,
+ .extent.width = surface->width,
+ .extent.height = surface->height,
+ .extent.depth = 1,
+ };
+ pgraph_apply_scaling_factor(pg, ®ion.extent.width,
+ ®ion.extent.height);
+
+ vkCmdCopyImage(cmd, surface->image,
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, disp->image,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion);
+#endif
+
+ pgraph_vk_transition_image_layout(pg, cmd, surface->image,
+ surface->host_fmt.vk_format,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
+
+ pgraph_vk_transition_image_layout(pg, cmd, disp->image,
+ VK_FORMAT_R8G8B8_UNORM,
+ VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+
+ pgraph_vk_end_single_time_commands(pg, cmd);
+ nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_5);
+
+ disp->draw_time = surface->draw_time;
+}
+
+static void create_surface_sampler(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ VkSamplerCreateInfo sampler_create_info = {
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ .magFilter = VK_FILTER_NEAREST,
+ .minFilter = VK_FILTER_NEAREST,
+ .addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT,
+ .addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT,
+ .addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT,
+ .anisotropyEnable = VK_FALSE,
+ .borderColor = VK_BORDER_COLOR_INT_OPAQUE_WHITE,
+ .unnormalizedCoordinates = VK_FALSE,
+ .compareEnable = VK_FALSE,
+ .compareOp = VK_COMPARE_OP_ALWAYS,
+ .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
+ };
+
+ VK_CHECK(vkCreateSampler(r->device, &sampler_create_info, NULL,
+ &r->display.sampler));
+}
+
+static void destroy_surface_sampler(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ vkDestroySampler(r->device, r->display.sampler, NULL);
+ r->display.sampler = VK_NULL_HANDLE;
+}
+
+void pgraph_vk_init_display(PGRAPHState *pg)
+{
+ create_descriptor_pool(pg);
+ create_descriptor_set_layout(pg);
+ create_descriptor_sets(pg);
+ create_render_pass(pg);
+ create_display_pipeline(pg);
+ create_surface_sampler(pg);
+}
+
+void pgraph_vk_finalize_display(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ if (r->display.image != VK_NULL_HANDLE) {
+ destroy_current_display_image(pg);
+ }
+
+ destroy_surface_sampler(pg);
+ destroy_display_pipeline(pg);
+ destroy_render_pass(pg);
+ destroy_descriptor_set_layout(pg);
+ destroy_descriptor_pool(pg);
+}
+
+void pgraph_vk_render_display(PGRAPHState *pg)
+{
+ NV2AState *d = container_of(pg, NV2AState, pgraph);
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ uint32_t pline_offset, pstart_addr, pline_compare;
+ d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
+ SurfaceBinding *surface =
+ pgraph_vk_surface_get_within(d, d->pcrtc.start + pline_offset);
+ if (surface == NULL || !surface->color) {
+ return;
+ }
+
+ unsigned int width = surface->width, height = surface->height;
+ pgraph_apply_scaling_factor(pg, &width, &height);
+
+ PGRAPHVkDisplayState *disp = &r->display;
+ if (!disp->image || disp->width != width || disp->height != height) {
+ create_display_image_from_surface(pg, surface);
+ }
+
+ render_display(pg, surface);
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c
new file mode 100644
index 0000000000..c4f2cd85e0
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/draw.c
@@ -0,0 +1,1916 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/fast-hash.h"
+#include "renderer.h"
+
+void pgraph_vk_draw_begin(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+
+ NV2A_VK_DPRINTF("NV097_SET_BEGIN_END: 0x%x", d->pgraph.primitive_mode);
+
+ uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0);
+ bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE;
+ bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE;
+ bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE;
+ bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE;
+ bool color_write = mask_alpha || mask_red || mask_green || mask_blue;
+ bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE;
+ bool stencil_test =
+ pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE;
+ bool is_nop_draw = !(color_write || depth_test || stencil_test);
+
+ pgraph_vk_surface_update(d, true, true, depth_test || stencil_test);
+
+ if (is_nop_draw) {
+ NV2A_VK_DPRINTF("nop!");
+ NV2A_VK_DGROUP_END();
+ return;
+ }
+}
+
+static VkPrimitiveTopology get_primitive_topology(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ int polygon_mode = r->shader_binding->state.polygon_front_mode;
+ int primitive_mode = r->shader_binding->state.primitive_mode;
+
+ if (polygon_mode == POLY_MODE_POINT) {
+ return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
+ }
+
+ // FIXME: Replace with LUT
+ switch (primitive_mode) {
+ case PRIM_TYPE_POINTS:
+ return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
+ case PRIM_TYPE_LINES:
+ return VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
+ case PRIM_TYPE_LINE_LOOP:
+ // FIXME: line strips, except that the first and last vertices are also used as a line
+ return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
+ case PRIM_TYPE_LINE_STRIP:
+ return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
+ case PRIM_TYPE_TRIANGLES:
+ return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
+ case PRIM_TYPE_TRIANGLE_STRIP:
+ return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
+ case PRIM_TYPE_TRIANGLE_FAN:
+ return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
+ case PRIM_TYPE_QUADS:
+ return VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY;
+ case PRIM_TYPE_QUAD_STRIP:
+ return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY;
+ case PRIM_TYPE_POLYGON:
+ if (polygon_mode == POLY_MODE_LINE) {
+ return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; // FIXME
+ } else if (polygon_mode == POLY_MODE_FILL) {
+ return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
+ }
+ assert(!"PRIM_TYPE_POLYGON with invalid polygon_mode");
+ return 0;
+ default:
+ assert(!"Invalid primitive_mode");
+ return 0;
+ }
+}
+
+static void pipeline_cache_entry_init(Lru *lru, LruNode *node, void *state)
+{
+ PipelineBinding *snode = container_of(node, PipelineBinding, node);
+ snode->layout = VK_NULL_HANDLE;
+ snode->pipeline = VK_NULL_HANDLE;
+ snode->draw_time = 0;
+}
+
+static void pipeline_cache_entry_post_evict(Lru *lru, LruNode *node)
+{
+ PGRAPHVkState *r = container_of(lru, PGRAPHVkState, pipeline_cache);
+ PipelineBinding *snode = container_of(node, PipelineBinding, node);
+
+ assert((!r->in_command_buffer ||
+ snode->draw_time < r->command_buffer_start_time) &&
+ "Pipeline evicted while in use!");
+
+ vkDestroyPipeline(r->device, snode->pipeline, NULL);
+ snode->pipeline = VK_NULL_HANDLE;
+
+ vkDestroyPipelineLayout(r->device, snode->layout, NULL);
+ snode->layout = VK_NULL_HANDLE;
+
+ fprintf(stderr, "released pipeline\n");
+}
+
+static bool pipeline_cache_entry_compare(Lru *lru, LruNode *node, void *key)
+{
+ PipelineBinding *snode = container_of(node, PipelineBinding, node);
+ return memcmp(&snode->key, key, sizeof(PipelineKey));
+}
+
+static void init_pipeline_cache(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ VkPipelineCacheCreateInfo cache_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO,
+ .flags = 0,
+ .initialDataSize = 0,
+ .pInitialData = NULL,
+ .pNext = NULL,
+ };
+ VK_CHECK(vkCreatePipelineCache(r->device, &cache_info, NULL,
+ &r->vk_pipeline_cache));
+
+ const size_t pipeline_cache_size = 2048;
+ lru_init(&r->pipeline_cache);
+ r->pipeline_cache_entries =
+ g_malloc_n(pipeline_cache_size, sizeof(PipelineBinding));
+ assert(r->pipeline_cache_entries != NULL);
+ for (int i = 0; i < pipeline_cache_size; i++) {
+ lru_add_free(&r->pipeline_cache, &r->pipeline_cache_entries[i].node);
+ }
+
+ r->pipeline_cache.init_node = pipeline_cache_entry_init;
+ r->pipeline_cache.compare_nodes = pipeline_cache_entry_compare;
+ r->pipeline_cache.post_node_evict = pipeline_cache_entry_post_evict;
+}
+
+static void finalize_pipeline_cache(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ lru_flush(&r->pipeline_cache);
+ g_free(r->pipeline_cache_entries);
+ r->pipeline_cache_entries = NULL;
+
+ vkDestroyPipelineCache(r->device, r->vk_pipeline_cache, NULL);
+}
+
+static char const *const quad_glsl =
+ "#version 450\n"
+ "void main()\n"
+ "{\n"
+ " float x = -1.0 + float((gl_VertexIndex & 1) << 2);\n"
+ " float y = -1.0 + float((gl_VertexIndex & 2) << 1);\n"
+ " gl_Position = vec4(x, y, 0, 1);\n"
+ "}\n";
+
+static char const *const solid_frag_glsl =
+ "#version 450\n"
+ "layout(location = 0) out vec4 fragColor;\n"
+ "void main()\n"
+ "{\n"
+ " fragColor = vec4(1.0);"
+ "}\n";
+
+static void init_clear_shaders(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+ r->quad_vert_module = pgraph_vk_create_shader_module_from_glsl(
+ r, VK_SHADER_STAGE_VERTEX_BIT, quad_glsl);
+ r->solid_frag_module = pgraph_vk_create_shader_module_from_glsl(
+ r, VK_SHADER_STAGE_FRAGMENT_BIT, solid_frag_glsl);
+}
+
+void pgraph_vk_init_pipelines(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ init_pipeline_cache(pg);
+ init_clear_shaders(pg);
+
+ VkSemaphoreCreateInfo semaphore_info = {
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO
+ };
+ VK_CHECK(vkCreateSemaphore(r->device, &semaphore_info, NULL,
+ &r->command_buffer_semaphore));
+
+ VkFenceCreateInfo fence_info = {
+ .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
+ };
+ VK_CHECK(
+ vkCreateFence(r->device, &fence_info, NULL, &r->command_buffer_fence));
+}
+
+void pgraph_vk_finalize_pipelines(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ finalize_pipeline_cache(pg);
+
+ vkDestroyFence(r->device, r->command_buffer_fence, NULL);
+ vkDestroySemaphore(r->device, r->command_buffer_semaphore, NULL);
+}
+
+static void init_render_pass_state(PGRAPHState *pg, RenderPassState *state)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ state->color_format = r->color_binding ?
+ r->color_binding->host_fmt.vk_format :
+ VK_FORMAT_UNDEFINED;
+ state->zeta_format = r->zeta_binding ? r->zeta_binding->host_fmt.vk_format :
+ VK_FORMAT_UNDEFINED;
+}
+
+static VkRenderPass create_render_pass(PGRAPHState *pg, RenderPassState *state)
+{
+ NV2A_VK_DPRINTF("Creating render pass");
+
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ VkAttachmentDescription attachments[2];
+ int num_attachments = 0;
+
+ bool color = state->color_format != VK_FORMAT_UNDEFINED;
+ bool zeta = state->zeta_format != VK_FORMAT_UNDEFINED;
+
+ VkAttachmentReference color_reference;
+ if (color) {
+ attachments[num_attachments] = (VkAttachmentDescription){
+ .format = state->color_format,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+ .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
+ .initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ .finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ };
+ color_reference = (VkAttachmentReference){
+ num_attachments, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
+ };
+ num_attachments++;
+ }
+
+ VkAttachmentReference depth_reference;
+ if (zeta) {
+ attachments[num_attachments] = (VkAttachmentDescription){
+ .format = state->zeta_format,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+ .finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+ };
+ depth_reference = (VkAttachmentReference){
+ num_attachments, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+ };
+ num_attachments++;
+ }
+
+ VkSubpassDependency dependency = {
+ .srcSubpass = VK_SUBPASS_EXTERNAL,
+ };
+
+ if (color) {
+ dependency.srcStageMask |=
+ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+ dependency.dstStageMask |=
+ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+ dependency.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+ }
+
+ if (zeta) {
+ dependency.srcStageMask |=
+ VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
+ dependency.dstStageMask |=
+ VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
+ dependency.dstAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+ }
+
+ VkSubpassDescription subpass = {
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .colorAttachmentCount = color ? 1 : 0,
+ .pColorAttachments = color ? &color_reference : NULL,
+ .pDepthStencilAttachment = zeta ? &depth_reference : NULL,
+ };
+
+ VkRenderPassCreateInfo renderpass_create_info = {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+ .attachmentCount = num_attachments,
+ .pAttachments = attachments,
+ .subpassCount = 1,
+ .pSubpasses = &subpass,
+ .dependencyCount = 1,
+ .pDependencies = &dependency,
+ };
+ VkRenderPass render_pass;
+ VK_CHECK(vkCreateRenderPass(r->device, &renderpass_create_info, NULL,
+ &render_pass));
+ return render_pass;
+}
+
+static VkRenderPass add_new_render_pass(PGRAPHState *pg, RenderPassState *state)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ if (r->render_passes_index == r->render_passes_capacity) {
+ int n_blocks = r->render_passes_capacity;
+ r->render_passes_capacity = n_blocks ? (n_blocks * 2) : 256;
+ r->render_passes =
+ g_realloc_n(r->render_passes, r->render_passes_capacity,
+ sizeof(*r->render_passes));
+ }
+
+ RenderPass *rp = &r->render_passes[r->render_passes_index++];
+ memcpy(&rp->state, state, sizeof(*state));
+ rp->render_pass = create_render_pass(pg, state);
+
+ return rp->render_pass;
+}
+
+static VkRenderPass get_render_pass(PGRAPHState *pg, RenderPassState *state)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ for (int i = 0; i < r->render_passes_index; i++) {
+ if (!memcmp(&r->render_passes[i].state, state, sizeof(*state))) {
+ return r->render_passes[i].render_pass;
+ }
+ }
+
+ return add_new_render_pass(pg, state);
+}
+
+static void create_frame_buffer(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ NV2A_VK_DPRINTF("Creating framebuffer");
+
+ assert(r->color_binding || r->zeta_binding);
+
+ if (r->framebuffer_index >= ARRAY_SIZE(r->framebuffers)) {
+ pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE);
+ }
+
+ VkImageView attachments[2];
+ int attachment_count = 0;
+
+ if (r->color_binding) {
+ attachments[attachment_count++] = r->color_binding->image_view;
+ }
+ if (r->zeta_binding) {
+ attachments[attachment_count++] = r->zeta_binding->image_view;
+ }
+
+ SurfaceBinding *binding = r->color_binding ? : r->zeta_binding;
+
+ VkFramebufferCreateInfo create_info = {
+ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .renderPass = r->render_pass,
+ .attachmentCount = attachment_count,
+ .pAttachments = attachments,
+ .width = binding->width,
+ .height = binding->height,
+ .layers = 1,
+ };
+ pgraph_apply_scaling_factor(pg, &create_info.width, &create_info.height);
+ VK_CHECK(vkCreateFramebuffer(r->device, &create_info, NULL,
+ &r->framebuffers[r->framebuffer_index++]));
+}
+
+static void destroy_framebuffers(PGRAPHState *pg)
+{
+ NV2A_VK_DPRINTF("Destroying framebuffer");
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ for (int i = 0; i < r->framebuffer_index; i++) {
+ vkDestroyFramebuffer(r->device, r->framebuffers[i], NULL);
+ r->framebuffers[i] = VK_NULL_HANDLE;
+ }
+ r->framebuffer_index = 0;
+}
+
+static void create_clear_pipeline(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ NV2A_VK_DGROUP_BEGIN("Creating clear pipeline");
+
+ PipelineKey key;
+ memset(&key, 0, sizeof(key));
+ key.clear = true;
+ init_render_pass_state(pg, &key.render_pass_state);
+
+ key.regs[0] = r->clear_parameter;
+
+ uint64_t hash = fast_hash((void *)&key, sizeof(key));
+ LruNode *node = lru_lookup(&r->pipeline_cache, hash, &key);
+ PipelineBinding *snode = container_of(node, PipelineBinding, node);
+
+ if (snode->pipeline != VK_NULL_HANDLE) {
+ NV2A_VK_DPRINTF("Cache hit");
+ r->pipeline_binding_changed = r->pipeline_binding != snode;
+ r->pipeline_binding = snode;
+ NV2A_VK_DGROUP_END();
+ return;
+ }
+
+ NV2A_VK_DPRINTF("Cache miss");
+ nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_GEN);
+ memcpy(&snode->key, &key, sizeof(key));
+
+ bool clear_any_color_channels =
+ r->clear_parameter & NV097_CLEAR_SURFACE_COLOR;
+ bool clear_all_color_channels =
+ (r->clear_parameter & NV097_CLEAR_SURFACE_COLOR) ==
+ (NV097_CLEAR_SURFACE_R | NV097_CLEAR_SURFACE_G | NV097_CLEAR_SURFACE_B |
+ NV097_CLEAR_SURFACE_A);
+ bool partial_color_clear =
+ clear_any_color_channels && !clear_all_color_channels;
+
+ VkPipelineShaderStageCreateInfo shader_stages[] = {
+ (VkPipelineShaderStageCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = r->quad_vert_module->module,
+ .pName = "main",
+ },
+ (VkPipelineShaderStageCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = r->solid_frag_module->module,
+ .pName = "main",
+ },
+ };
+
+ VkPipelineVertexInputStateCreateInfo vertex_input = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ };
+
+ VkPipelineInputAssemblyStateCreateInfo input_assembly = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
+ .primitiveRestartEnable = VK_FALSE,
+ };
+
+ VkPipelineViewportStateCreateInfo viewport_state = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ };
+
+ VkPipelineRasterizationStateCreateInfo rasterizer = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .depthClampEnable = VK_FALSE,
+ .rasterizerDiscardEnable = VK_FALSE,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .lineWidth = 1.0f,
+ .cullMode = VK_CULL_MODE_BACK_BIT,
+ .frontFace = VK_FRONT_FACE_CLOCKWISE,
+ .depthBiasEnable = VK_FALSE,
+ };
+
+ VkPipelineMultisampleStateCreateInfo multisampling = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .sampleShadingEnable = VK_FALSE,
+ .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
+ };
+
+ VkPipelineDepthStencilStateCreateInfo depth_stencil = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+ .depthTestEnable = VK_TRUE,
+ .depthWriteEnable =
+ (r->clear_parameter & NV097_CLEAR_SURFACE_Z) ? VK_TRUE : VK_FALSE,
+ .depthCompareOp = VK_COMPARE_OP_ALWAYS,
+ .depthBoundsTestEnable = VK_FALSE,
+ };
+
+ if (r->clear_parameter & NV097_CLEAR_SURFACE_STENCIL) {
+ depth_stencil.stencilTestEnable = VK_TRUE;
+ depth_stencil.front.failOp = VK_STENCIL_OP_REPLACE;
+ depth_stencil.front.passOp = VK_STENCIL_OP_REPLACE;
+ depth_stencil.front.depthFailOp = VK_STENCIL_OP_REPLACE;
+ depth_stencil.front.compareOp = VK_COMPARE_OP_ALWAYS;
+ depth_stencil.front.compareMask = 0xff;
+ depth_stencil.front.writeMask = 0xff;
+ depth_stencil.front.reference = 0xff;
+ depth_stencil.back = depth_stencil.front;
+ }
+
+ VkColorComponentFlags write_mask = 0;
+ if (r->clear_parameter & NV097_CLEAR_SURFACE_R)
+ write_mask |= VK_COLOR_COMPONENT_R_BIT;
+ if (r->clear_parameter & NV097_CLEAR_SURFACE_G)
+ write_mask |= VK_COLOR_COMPONENT_G_BIT;
+ if (r->clear_parameter & NV097_CLEAR_SURFACE_B)
+ write_mask |= VK_COLOR_COMPONENT_B_BIT;
+ if (r->clear_parameter & NV097_CLEAR_SURFACE_A)
+ write_mask |= VK_COLOR_COMPONENT_A_BIT;
+
+ VkPipelineColorBlendAttachmentState color_blend_attachment = {
+ .colorWriteMask = write_mask,
+ .blendEnable = VK_TRUE,
+ .colorBlendOp = VK_BLEND_OP_ADD,
+ .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .srcColorBlendFactor = VK_BLEND_FACTOR_CONSTANT_COLOR,
+ .alphaBlendOp = VK_BLEND_OP_ADD,
+ .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
+ .srcAlphaBlendFactor = VK_BLEND_FACTOR_CONSTANT_ALPHA,
+ };
+
+ VkPipelineColorBlendStateCreateInfo color_blending = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .logicOpEnable = VK_FALSE,
+ .logicOp = VK_LOGIC_OP_COPY,
+ .attachmentCount = r->color_binding ? 1 : 0,
+ .pAttachments = r->color_binding ? &color_blend_attachment : NULL,
+ };
+
+ VkDynamicState dynamic_states[] = { VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ VK_DYNAMIC_STATE_BLEND_CONSTANTS };
+ VkPipelineDynamicStateCreateInfo dynamic_state = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = partial_color_clear ? 3 : 2,
+ .pDynamicStates = dynamic_states,
+ };
+
+ VkPipelineLayoutCreateInfo pipeline_layout_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ };
+
+ VkPipelineLayout layout;
+ VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL,
+ &layout));
+
+ VkGraphicsPipelineCreateInfo pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .stageCount = ARRAY_SIZE(shader_stages),
+ .pStages = shader_stages,
+ .pVertexInputState = &vertex_input,
+ .pInputAssemblyState = &input_assembly,
+ .pViewportState = &viewport_state,
+ .pRasterizationState = &rasterizer,
+ .pMultisampleState = &multisampling,
+ .pDepthStencilState = r->zeta_binding ? &depth_stencil : NULL,
+ .pColorBlendState = &color_blending,
+ .pDynamicState = &dynamic_state,
+ .layout = layout,
+ .renderPass = get_render_pass(pg, &key.render_pass_state),
+ .subpass = 0,
+ .basePipelineHandle = VK_NULL_HANDLE,
+ };
+
+ VkPipeline pipeline;
+ VK_CHECK(vkCreateGraphicsPipelines(r->device, r->vk_pipeline_cache, 1,
+ &pipeline_info, NULL, &pipeline));
+
+ snode->pipeline = pipeline;
+ snode->layout = layout;
+ snode->render_pass = pipeline_info.renderPass;
+ snode->draw_time = pg->draw_time;
+
+ r->pipeline_binding = snode;
+ r->pipeline_binding_changed = true;
+
+ NV2A_VK_DGROUP_END();
+}
+
+static bool check_render_pass_dirty(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+ assert(r->pipeline_binding);
+
+ RenderPassState state;
+ init_render_pass_state(pg, &state);
+
+ return memcmp(&state, &r->pipeline_binding->key.render_pass_state,
+ sizeof(state)) != 0;
+}
+
+// Quickly check for any state changes that would require more analysis
+static bool check_pipeline_dirty(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+ assert(r->pipeline_binding);
+
+ if (r->shader_bindings_changed || r->texture_bindings_changed ||
+ check_render_pass_dirty(pg)) {
+ return true;
+ }
+
+ const unsigned int regs[] = {
+ NV_PGRAPH_BLEND, NV_PGRAPH_BLENDCOLOR,
+ NV_PGRAPH_CONTROL_0, NV_PGRAPH_CONTROL_1,
+ NV_PGRAPH_CONTROL_2, NV_PGRAPH_CONTROL_3,
+ NV_PGRAPH_SETUPRASTER, NV_PGRAPH_ZCOMPRESSOCCLUDE,
+ NV_PGRAPH_ZOFFSETBIAS, NV_PGRAPH_ZOFFSETFACTOR,
+ };
+
+ for (int i = 0; i < ARRAY_SIZE(regs); i++) {
+ if (pgraph_is_reg_dirty(pg, regs[i])) {
+ return true;
+ }
+ }
+
+ // FIXME: Use dirty bits instead
+ if (memcmp(r->vertex_attribute_descriptions,
+ r->pipeline_binding->key.attribute_descriptions,
+ r->num_active_vertex_attribute_descriptions *
+ sizeof(r->vertex_attribute_descriptions[0])) ||
+ memcmp(r->vertex_binding_descriptions,
+ r->pipeline_binding->key.binding_descriptions,
+ r->num_active_vertex_binding_descriptions *
+ sizeof(r->vertex_binding_descriptions[0]))) {
+ return true;
+ }
+
+ nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_NOTDIRTY);
+
+ return false;
+}
+
+static void init_pipeline_key(PGRAPHState *pg, PipelineKey *key)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ memset(key, 0, sizeof(*key));
+ init_render_pass_state(pg, &key->render_pass_state);
+ memcpy(&key->shader_state, &r->shader_binding->state, sizeof(ShaderState));
+ memcpy(key->binding_descriptions, r->vertex_binding_descriptions,
+ sizeof(key->binding_descriptions[0]) *
+ r->num_active_vertex_binding_descriptions);
+ memcpy(key->attribute_descriptions, r->vertex_attribute_descriptions,
+ sizeof(key->attribute_descriptions[0]) *
+ r->num_active_vertex_attribute_descriptions);
+
+ // FIXME: Register masking
+ // FIXME: Use more dynamic state updates
+ const int regs[] = {
+ NV_PGRAPH_BLEND, NV_PGRAPH_BLENDCOLOR,
+ NV_PGRAPH_CONTROL_0, NV_PGRAPH_CONTROL_1,
+ NV_PGRAPH_CONTROL_2, NV_PGRAPH_CONTROL_3,
+ NV_PGRAPH_SETUPRASTER, NV_PGRAPH_ZCOMPRESSOCCLUDE,
+ NV_PGRAPH_ZOFFSETBIAS, NV_PGRAPH_ZOFFSETFACTOR,
+ };
+ assert(ARRAY_SIZE(regs) == ARRAY_SIZE(key->regs));
+ for (int i = 0; i < ARRAY_SIZE(regs); i++) {
+ key->regs[i] = pgraph_reg_r(pg, regs[i]);
+ }
+}
+
+static void create_pipeline(PGRAPHState *pg)
+{
+ NV2A_VK_DGROUP_BEGIN("Creating pipeline");
+
+ NV2AState *d = container_of(pg, NV2AState, pgraph);
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ pgraph_vk_bind_textures(d);
+ pgraph_vk_bind_shaders(pg);
+
+ // FIXME: If nothing was dirty, don't even try creating the key or hashing.
+ // Just use the same pipeline.
+ if (r->pipeline_binding && !check_pipeline_dirty(pg)) {
+ return;
+ }
+
+ PipelineKey key;
+ init_pipeline_key(pg, &key);
+ uint64_t hash = fast_hash((void *)&key, sizeof(key));
+
+ static uint64_t last_hash;
+ if (hash == last_hash) {
+ nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_MERGE);
+ }
+ last_hash = hash;
+
+ LruNode *node = lru_lookup(&r->pipeline_cache, hash, &key);
+ PipelineBinding *snode = container_of(node, PipelineBinding, node);
+ if (snode->pipeline != VK_NULL_HANDLE) {
+ NV2A_VK_DPRINTF("Cache hit");
+ r->pipeline_binding_changed = r->pipeline_binding != snode;
+ r->pipeline_binding = snode;
+ NV2A_VK_DGROUP_END();
+ return;
+ }
+
+ NV2A_VK_DPRINTF("Cache miss");
+ nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_GEN);
+
+ memcpy(&snode->key, &key, sizeof(key));
+
+ uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0);
+ bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE;
+ bool depth_write = !!(control_0 & NV_PGRAPH_CONTROL_0_ZWRITEENABLE);
+ bool stencil_test =
+ pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE;
+
+ int num_active_shader_stages = 0;
+ VkPipelineShaderStageCreateInfo shader_stages[3];
+
+ if (r->shader_binding->geometry) {
+ shader_stages[num_active_shader_stages++] =
+ (VkPipelineShaderStageCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_GEOMETRY_BIT,
+ .module = r->shader_binding->geometry->module,
+ .pName = "main",
+ };
+ }
+ shader_stages[num_active_shader_stages++] =
+ (VkPipelineShaderStageCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = r->shader_binding->vertex->module,
+ .pName = "main",
+ };
+ shader_stages[num_active_shader_stages++] =
+ (VkPipelineShaderStageCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = r->shader_binding->fragment->module,
+ .pName = "main",
+ };
+
+ VkPipelineVertexInputStateCreateInfo vertex_input = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .vertexBindingDescriptionCount =
+ r->num_active_vertex_binding_descriptions,
+ .pVertexBindingDescriptions = r->vertex_binding_descriptions,
+ .vertexAttributeDescriptionCount =
+ r->num_active_vertex_attribute_descriptions,
+ .pVertexAttributeDescriptions = r->vertex_attribute_descriptions,
+ };
+
+ VkPipelineInputAssemblyStateCreateInfo input_assembly = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .topology = get_primitive_topology(pg),
+ .primitiveRestartEnable = VK_FALSE,
+ };
+
+ VkPipelineViewportStateCreateInfo viewport_state = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ };
+
+
+ void *rasterizer_next_struct = NULL;
+
+ VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_state;
+
+ if (r->provoking_vertex_extension_enabled) {
+ VkProvokingVertexModeEXT provoking_mode =
+ GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3),
+ NV_PGRAPH_CONTROL_3_SHADEMODE) ==
+ NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT ?
+ VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT :
+ VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT;
+
+ provoking_state =
+ (VkPipelineRasterizationProvokingVertexStateCreateInfoEXT){
+ .sType =
+ VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT,
+ .provokingVertexMode = provoking_mode,
+ };
+ rasterizer_next_struct = &provoking_state;
+ } else {
+ // FIXME: Handle in shader?
+ }
+
+ VkPipelineRasterizationStateCreateInfo rasterizer = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .depthClampEnable = VK_FALSE,
+ .rasterizerDiscardEnable = VK_FALSE,
+ .polygonMode = pgraph_polygon_mode_vk_map[r->shader_binding->state
+ .polygon_front_mode],
+ .lineWidth = 1.0f,
+ .frontFace = (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+ NV_PGRAPH_SETUPRASTER_FRONTFACE) ?
+ VK_FRONT_FACE_COUNTER_CLOCKWISE :
+ VK_FRONT_FACE_CLOCKWISE,
+ .depthBiasEnable = VK_FALSE,
+ .pNext = rasterizer_next_struct,
+ };
+
+ if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & NV_PGRAPH_SETUPRASTER_CULLENABLE) {
+ uint32_t cull_face = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER),
+ NV_PGRAPH_SETUPRASTER_CULLCTRL);
+ assert(cull_face < ARRAY_SIZE(pgraph_cull_face_vk_map));
+ rasterizer.cullMode = pgraph_cull_face_vk_map[cull_face];
+ } else {
+ rasterizer.cullMode = VK_CULL_MODE_NONE;
+ }
+
+ VkPipelineMultisampleStateCreateInfo multisampling = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .sampleShadingEnable = VK_FALSE,
+ .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
+ };
+
+ VkPipelineDepthStencilStateCreateInfo depth_stencil = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+ .depthWriteEnable = depth_write ? VK_TRUE : VK_FALSE,
+ };
+
+ if (depth_test) {
+ depth_stencil.depthTestEnable = VK_TRUE;
+ uint32_t depth_func =
+ GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0), NV_PGRAPH_CONTROL_0_ZFUNC);
+ assert(depth_func < ARRAY_SIZE(pgraph_depth_func_vk_map));
+ depth_stencil.depthCompareOp = pgraph_depth_func_vk_map[depth_func];
+ }
+
+ if (stencil_test) {
+ depth_stencil.stencilTestEnable = VK_TRUE;
+ uint32_t stencil_func = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
+ NV_PGRAPH_CONTROL_1_STENCIL_FUNC);
+ uint32_t stencil_ref = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
+ NV_PGRAPH_CONTROL_1_STENCIL_REF);
+ uint32_t mask_read = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
+ NV_PGRAPH_CONTROL_1_STENCIL_MASK_READ);
+ uint32_t mask_write = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
+ NV_PGRAPH_CONTROL_1_STENCIL_MASK_WRITE);
+ uint32_t op_fail = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2),
+ NV_PGRAPH_CONTROL_2_STENCIL_OP_FAIL);
+ uint32_t op_zfail = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2),
+ NV_PGRAPH_CONTROL_2_STENCIL_OP_ZFAIL);
+ uint32_t op_zpass = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2),
+ NV_PGRAPH_CONTROL_2_STENCIL_OP_ZPASS);
+
+ assert(stencil_func < ARRAY_SIZE(pgraph_stencil_func_vk_map));
+ assert(op_fail < ARRAY_SIZE(pgraph_stencil_op_vk_map));
+ assert(op_zfail < ARRAY_SIZE(pgraph_stencil_op_vk_map));
+ assert(op_zpass < ARRAY_SIZE(pgraph_stencil_op_vk_map));
+
+ depth_stencil.front.failOp = pgraph_stencil_op_vk_map[op_fail];
+ depth_stencil.front.passOp = pgraph_stencil_op_vk_map[op_zpass];
+ depth_stencil.front.depthFailOp = pgraph_stencil_op_vk_map[op_zfail];
+ depth_stencil.front.compareOp =
+ pgraph_stencil_func_vk_map[stencil_func];
+ depth_stencil.front.compareMask = mask_read;
+ depth_stencil.front.writeMask = mask_write;
+ depth_stencil.front.reference = stencil_ref;
+ depth_stencil.back = depth_stencil.front;
+ }
+
+ VkColorComponentFlags write_mask = 0;
+ if (control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE)
+ write_mask |= VK_COLOR_COMPONENT_R_BIT;
+ if (control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE)
+ write_mask |= VK_COLOR_COMPONENT_G_BIT;
+ if (control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE)
+ write_mask |= VK_COLOR_COMPONENT_B_BIT;
+ if (control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE)
+ write_mask |= VK_COLOR_COMPONENT_A_BIT;
+
+ VkPipelineColorBlendAttachmentState color_blend_attachment = {
+ .colorWriteMask = write_mask,
+ };
+
+ float blend_constant[4] = { 0, 0, 0, 0 };
+
+ if (pgraph_reg_r(pg, NV_PGRAPH_BLEND) & NV_PGRAPH_BLEND_EN) {
+ color_blend_attachment.blendEnable = VK_TRUE;
+
+ uint32_t sfactor =
+ GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND), NV_PGRAPH_BLEND_SFACTOR);
+ uint32_t dfactor =
+ GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND), NV_PGRAPH_BLEND_DFACTOR);
+ assert(sfactor < ARRAY_SIZE(pgraph_blend_factor_vk_map));
+ assert(dfactor < ARRAY_SIZE(pgraph_blend_factor_vk_map));
+ color_blend_attachment.srcColorBlendFactor =
+ pgraph_blend_factor_vk_map[sfactor];
+ color_blend_attachment.dstColorBlendFactor =
+ pgraph_blend_factor_vk_map[dfactor];
+ color_blend_attachment.srcAlphaBlendFactor =
+ pgraph_blend_factor_vk_map[sfactor];
+ color_blend_attachment.dstAlphaBlendFactor =
+ pgraph_blend_factor_vk_map[dfactor];
+
+ uint32_t equation =
+ GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND), NV_PGRAPH_BLEND_EQN);
+ assert(equation < ARRAY_SIZE(pgraph_blend_equation_vk_map));
+
+ color_blend_attachment.colorBlendOp =
+ pgraph_blend_equation_vk_map[equation];
+ color_blend_attachment.alphaBlendOp =
+ pgraph_blend_equation_vk_map[equation];
+
+ uint32_t blend_color = pgraph_reg_r(pg, NV_PGRAPH_BLENDCOLOR);
+ pgraph_argb_pack32_to_rgba_float(blend_color, blend_constant);
+ }
+
+ VkPipelineColorBlendStateCreateInfo color_blending = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .logicOpEnable = VK_FALSE,
+ .logicOp = VK_LOGIC_OP_COPY,
+ .attachmentCount = r->color_binding ? 1 : 0,
+ .pAttachments = r->color_binding ? &color_blend_attachment : NULL,
+ .blendConstants[0] = blend_constant[0],
+ .blendConstants[1] = blend_constant[1],
+ .blendConstants[2] = blend_constant[2],
+ .blendConstants[3] = blend_constant[3],
+ };
+
+ VkDynamicState dynamic_states[2] = { VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR };
+
+ VkPipelineDynamicStateCreateInfo dynamic_state = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = ARRAY_SIZE(dynamic_states),
+ .pDynamicStates = dynamic_states,
+ };
+
+ // /* Clipping */
+ // glEnable(GL_CLIP_DISTANCE0);
+ // glEnable(GL_CLIP_DISTANCE1);
+
+ // /* Polygon offset */
+ // /* FIXME: GL implementation-specific, maybe do this in VS? */
+ // if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+ // NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE)
+ // if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+ // NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE)
+ // if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+ // NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)
+ if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+ (NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE |
+ NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE |
+ NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) {
+ uint32_t zfactor_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR);
+ float zfactor = *(float *)&zfactor_u32;
+ uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS);
+ float zbias = *(float *)&zbias_u32;
+ rasterizer.depthBiasEnable = VK_TRUE;
+ rasterizer.depthBiasSlopeFactor = zfactor;
+ rasterizer.depthBiasConstantFactor = zbias;
+ }
+
+ if (GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ZCOMPRESSOCCLUDE),
+ NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN) ==
+ NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP) {
+ rasterizer.depthClampEnable = VK_TRUE;
+ }
+
+ // FIXME: Dither
+ // if (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) &
+ // NV_PGRAPH_CONTROL_0_DITHERENABLE))
+ // FIXME: point size
+ // FIXME: Edge Antialiasing
+ // bool anti_aliasing = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ANTIALIASING),
+ // NV_PGRAPH_ANTIALIASING_ENABLE);
+ // if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+ // NV_PGRAPH_SETUPRASTER_LINESMOOTHENABLE) {
+ // FIXME: VK_EXT_line_rasterization
+ // }
+
+ // if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+ // NV_PGRAPH_SETUPRASTER_POLYSMOOTHENABLE) {
+ // FIXME: No direct analog. Just do it with MSAA.
+ // }
+
+ VkPushConstantRange push_constant_range = {
+ .stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
+ .offset = 0,
+ // FIXME: Minimize push constants
+ .size = NV2A_VERTEXSHADER_ATTRIBUTES * 4 * sizeof(float),
+ };
+ VkPipelineLayoutCreateInfo pipeline_layout_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &r->descriptor_set_layout,
+ .pushConstantRangeCount = 1,
+ .pPushConstantRanges = &push_constant_range,
+ };
+ VkPipelineLayout layout;
+ VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL,
+ &layout));
+
+ VkGraphicsPipelineCreateInfo pipeline_create_info = {
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .stageCount = num_active_shader_stages,
+ .pStages = shader_stages,
+ .pVertexInputState = &vertex_input,
+ .pInputAssemblyState = &input_assembly,
+ .pViewportState = &viewport_state,
+ .pRasterizationState = &rasterizer,
+ .pMultisampleState = &multisampling,
+ .pDepthStencilState = r->zeta_binding ? &depth_stencil : NULL,
+ .pColorBlendState = &color_blending,
+ .pDynamicState = &dynamic_state,
+ .layout = layout,
+ .renderPass = get_render_pass(pg, &key.render_pass_state),
+ .subpass = 0,
+ .basePipelineHandle = VK_NULL_HANDLE,
+ };
+ VkPipeline pipeline;
+ VK_CHECK(vkCreateGraphicsPipelines(r->device, r->vk_pipeline_cache, 1,
+ &pipeline_create_info, NULL, &pipeline));
+
+ snode->pipeline = pipeline;
+ snode->layout = layout;
+ snode->render_pass = pipeline_create_info.renderPass;
+ snode->draw_time = pg->draw_time;
+
+ r->pipeline_binding = snode;
+ r->pipeline_binding_changed = true;
+
+ NV2A_VK_DGROUP_END();
+}
+
+static void push_vertex_attrib_values(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ // FIXME: Do partial updates
+
+ float attrib_values[NV2A_VERTEXSHADER_ATTRIBUTES * 4];
+ for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+ attrib_values[i * 4 + 0] = pg->vertex_attributes[i].inline_value[0];
+ attrib_values[i * 4 + 1] = pg->vertex_attributes[i].inline_value[1];
+ attrib_values[i * 4 + 2] = pg->vertex_attributes[i].inline_value[2];
+ attrib_values[i * 4 + 3] = pg->vertex_attributes[i].inline_value[3];
+ }
+
+ vkCmdPushConstants(r->command_buffer, r->pipeline_binding->layout,
+ VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(attrib_values),
+ &attrib_values);
+}
+
+static void bind_descriptor_sets(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+ assert(r->descriptor_set_index >= 1);
+
+ vkCmdBindDescriptorSets(r->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ r->pipeline_binding->layout, 0, 1,
+ &r->descriptor_sets[r->descriptor_set_index - 1], 0,
+ NULL);
+}
+
+static void begin_query(PGRAPHVkState *r)
+{
+ assert(r->in_command_buffer);
+ assert(!r->in_render_pass);
+ assert(!r->query_in_flight);
+
+ // FIXME: We should handle this. Make the query buffer bigger, but at least
+ // flush current queries.
+ assert(r->num_queries_in_flight < r->max_queries_in_flight);
+
+ nv2a_profile_inc_counter(NV2A_PROF_QUERY);
+ vkCmdResetQueryPool(r->command_buffer, r->query_pool,
+ r->num_queries_in_flight, 1);
+ vkCmdBeginQuery(r->command_buffer, r->query_pool, r->num_queries_in_flight,
+ VK_QUERY_CONTROL_PRECISE_BIT);
+
+ r->query_in_flight = true;
+ r->new_query_needed = false;
+ r->num_queries_in_flight++;
+}
+
+static void end_query(PGRAPHVkState *r)
+{
+ assert(r->in_command_buffer);
+ assert(!r->in_render_pass);
+ assert(r->query_in_flight);
+
+ vkCmdEndQuery(r->command_buffer, r->query_pool,
+ r->num_queries_in_flight - 1);
+ r->query_in_flight = false;
+}
+
+static void sync_staging_buffer(PGRAPHState *pg, VkCommandBuffer cmd,
+ int index_src, int index_dst)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+ StorageBuffer *b_src = &r->storage_buffers[index_src];
+ StorageBuffer *b_dst = &r->storage_buffers[index_dst];
+
+ if (!b_src->buffer_offset) {
+ return;
+ }
+
+ VkBufferCopy copy_region = { .size = b_src->buffer_offset };
+ vkCmdCopyBuffer(cmd, b_src->buffer, b_dst->buffer, 1, ©_region);
+
+ b_src->buffer_offset = 0;
+}
+
+static void begin_render_pass(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ assert(r->in_command_buffer);
+ assert(!r->in_render_pass);
+
+ nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_RENDERPASSES);
+
+ unsigned int vp_width = pg->surface_binding_dim.width,
+ vp_height = pg->surface_binding_dim.height;
+ pgraph_apply_scaling_factor(pg, &vp_width, &vp_height);
+
+ assert(r->framebuffer_index > 0);
+
+ VkRenderPassBeginInfo render_pass_begin_info = {
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .renderPass = r->render_pass,
+ .framebuffer = r->framebuffers[r->framebuffer_index - 1],
+ .renderArea.extent.width = vp_width,
+ .renderArea.extent.height = vp_height,
+ .clearValueCount = 0,
+ .pClearValues = NULL,
+ };
+ vkCmdBeginRenderPass(r->command_buffer, &render_pass_begin_info,
+ VK_SUBPASS_CONTENTS_INLINE);
+ r->in_render_pass = true;
+
+}
+
+static void end_render_pass(PGRAPHVkState *r)
+{
+ if (r->in_render_pass) {
+ vkCmdEndRenderPass(r->command_buffer);
+ r->in_render_pass = false;
+ }
+}
+
+const enum NV2A_PROF_COUNTERS_ENUM finish_reason_to_counter_enum[] = {
+ [VK_FINISH_REASON_VERTEX_BUFFER_DIRTY] = NV2A_PROF_FINISH_VERTEX_BUFFER_DIRTY,
+ [VK_FINISH_REASON_SURFACE_CREATE] = NV2A_PROF_FINISH_SURFACE_CREATE,
+ [VK_FINISH_REASON_SURFACE_DOWN] = NV2A_PROF_FINISH_SURFACE_DOWN,
+ [VK_FINISH_REASON_NEED_BUFFER_SPACE] = NV2A_PROF_FINISH_NEED_BUFFER_SPACE,
+ [VK_FINISH_REASON_FRAMEBUFFER_DIRTY] = NV2A_PROF_FINISH_FRAMEBUFFER_DIRTY,
+ [VK_FINISH_REASON_PRESENTING] = NV2A_PROF_FINISH_PRESENTING,
+ [VK_FINISH_REASON_FLIP_STALL] = NV2A_PROF_FINISH_FLIP_STALL,
+ [VK_FINISH_REASON_FLUSH] = NV2A_PROF_FINISH_FLUSH,
+};
+
+void pgraph_vk_finish(PGRAPHState *pg, FinishReason finish_reason)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ assert(!r->in_draw);
+
+ if (r->in_command_buffer) {
+
+ nv2a_profile_inc_counter(finish_reason_to_counter_enum[finish_reason]);
+
+ if (r->in_render_pass) {
+ end_render_pass(r);
+ }
+ if (r->query_in_flight) {
+ end_query(r);
+ }
+ VK_CHECK(vkEndCommandBuffer(r->command_buffer));
+
+ VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); // FIXME: Cleanup
+ sync_staging_buffer(pg, cmd, BUFFER_INDEX_STAGING, BUFFER_INDEX);
+ sync_staging_buffer(pg, cmd, BUFFER_VERTEX_INLINE_STAGING,
+ BUFFER_VERTEX_INLINE);
+ sync_staging_buffer(pg, cmd, BUFFER_UNIFORM_STAGING, BUFFER_UNIFORM);
+ bitmap_clear(r->uploaded_bitmap, 0, r->bitmap_size);
+ VK_CHECK(vkEndCommandBuffer(r->aux_command_buffer));
+ r->in_aux_command_buffer = false;
+
+ VkPipelineStageFlags wait_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+ VkSubmitInfo submit_infos[] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .commandBufferCount = 1,
+ .pCommandBuffers = &r->aux_command_buffer,
+ .signalSemaphoreCount = 1,
+ .pSignalSemaphores = &r->command_buffer_semaphore,
+ },
+ {
+
+ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ .commandBufferCount = 1,
+ .pCommandBuffers = &r->command_buffer,
+ .waitSemaphoreCount = 1,
+ .pWaitSemaphores = &r->command_buffer_semaphore,
+ .pWaitDstStageMask = &wait_stage,
+ }
+ };
+ nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT);
+ vkResetFences(r->device, 1, &r->command_buffer_fence);
+ VK_CHECK(vkQueueSubmit(r->queue, ARRAY_SIZE(submit_infos), submit_infos,
+ r->command_buffer_fence));
+ r->submit_count += 1;
+
+ // Periodically check memory budget
+ const int max_num_submits_before_budget_update = 5;
+ if (finish_reason == VK_FINISH_REASON_FLIP_STALL ||
+ (r->submit_count - r->allocator_last_submit_index) >
+ max_num_submits_before_budget_update) {
+
+ // VMA queries budget via vmaSetCurrentFrameIndex
+ vmaSetCurrentFrameIndex(r->allocator, r->submit_count);
+ r->allocator_last_submit_index = r->submit_count;
+
+ pgraph_vk_check_memory_budget(pg);
+ }
+
+ VK_CHECK(vkWaitForFences(r->device, 1, &r->command_buffer_fence,
+ VK_TRUE, UINT64_MAX));
+
+ r->descriptor_set_index = 0;
+ r->in_command_buffer = false;
+ destroy_framebuffers(pg);
+ }
+
+ NV2AState *d = container_of(pg, NV2AState, pgraph);
+ pgraph_vk_process_pending_reports_internal(d);
+}
+
+void pgraph_vk_begin_command_buffer(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+ assert(!r->in_command_buffer);
+
+ VkCommandBufferBeginInfo command_buffer_begin_info = {
+ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+ };
+ VK_CHECK(vkBeginCommandBuffer(r->command_buffer,
+ &command_buffer_begin_info));
+ r->command_buffer_start_time = pg->draw_time;
+ r->in_command_buffer = true;
+}
+
+// FIXME: Refactor below
+
+void pgraph_vk_ensure_command_buffer(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ if (!r->in_command_buffer) {
+ pgraph_vk_begin_command_buffer(pg);
+ }
+}
+
+void pgraph_vk_ensure_not_in_render_pass(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ end_render_pass(r);
+ if (r->query_in_flight) {
+ end_query(r);
+ }
+}
+
+VkCommandBuffer pgraph_vk_begin_nondraw_commands(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+ pgraph_vk_ensure_command_buffer(pg);
+ pgraph_vk_ensure_not_in_render_pass(pg);
+ return r->command_buffer;
+}
+
+void pgraph_vk_end_nondraw_commands(PGRAPHState *pg, VkCommandBuffer cmd)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+ assert(cmd == r->command_buffer);
+}
+
+// FIXME: Add more metrics for determining command buffer 'fullness' and
+// conservatively flush. Unfortunately there doesn't appear to be a good
+// way to determine what the actual maximum capacity of a command buffer
+// is, but we are obviously not supposed to endlessly append to one command
+// buffer. For other reasons though (like descriptor set amount, surface
+// changes, etc) we do flush often.
+
+static void begin_pre_draw(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ assert(r->color_binding || r->zeta_binding);
+ assert(!r->color_binding || r->color_binding->initialized);
+ assert(!r->zeta_binding || r->zeta_binding->initialized);
+
+ if (pg->clearing) {
+ create_clear_pipeline(pg);
+ } else {
+ create_pipeline(pg);
+ }
+
+ bool render_pass_dirty = r->pipeline_binding->render_pass != r->render_pass;
+
+ if (r->framebuffer_dirty || render_pass_dirty) {
+ pgraph_vk_ensure_not_in_render_pass(pg);
+ }
+ if (render_pass_dirty) {
+ r->render_pass = r->pipeline_binding->render_pass;
+ }
+ if (r->framebuffer_dirty) {
+ create_frame_buffer(pg);
+ r->framebuffer_dirty = false;
+ }
+ if (!pg->clearing) {
+ pgraph_vk_update_descriptor_sets(pg);
+ }
+ if (r->framebuffer_index == 0) {
+ create_frame_buffer(pg);
+ }
+
+ pgraph_vk_ensure_command_buffer(pg);
+}
+
+static void begin_draw(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ assert(r->in_command_buffer);
+
+ // Visibility testing
+ if (pg->zpass_pixel_count_enable) {
+ if (r->new_query_needed && r->query_in_flight) {
+ end_render_pass(r);
+ end_query(r);
+ }
+ if (!r->query_in_flight) {
+ end_render_pass(r);
+ begin_query(r);
+ }
+ } else if (r->query_in_flight) {
+ end_render_pass(r);
+ end_query(r);
+ }
+
+ bool must_bind_pipeline = r->pipeline_binding_changed;
+
+ if (!r->in_render_pass) {
+ begin_render_pass(pg);
+ must_bind_pipeline = true;
+ }
+
+ if (must_bind_pipeline) {
+ nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_BIND);
+ vkCmdBindPipeline(r->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ r->pipeline_binding->pipeline);
+ r->pipeline_binding->draw_time = pg->draw_time;
+
+ unsigned int vp_width = pg->surface_binding_dim.width,
+ vp_height = pg->surface_binding_dim.height;
+ pgraph_apply_scaling_factor(pg, &vp_width, &vp_height);
+
+ VkViewport viewport = {
+ .width = vp_width,
+ .height = vp_height,
+ .minDepth = 0.0,
+ .maxDepth = 1.0,
+ };
+ vkCmdSetViewport(r->command_buffer, 0, 1, &viewport);
+
+ /* Surface clip */
+ /* FIXME: Consider moving to PSH w/ window clip */
+ unsigned int xmin = pg->surface_shape.clip_x -
+ pg->surface_binding_dim.clip_x,
+ ymin = pg->surface_shape.clip_y -
+ pg->surface_binding_dim.clip_y;
+
+ unsigned int xmax = xmin + pg->surface_shape.clip_width - 1,
+ ymax = ymin + pg->surface_shape.clip_height - 1;
+
+ unsigned int scissor_width = xmax - xmin + 1,
+ scissor_height = ymax - ymin + 1;
+
+ pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin);
+ pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height);
+
+ pgraph_apply_scaling_factor(pg, &xmin, &ymin);
+ pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height);
+
+ VkRect2D scissor = {
+ .offset.x = xmin,
+ .offset.y = ymin,
+ .extent.width = scissor_width,
+ .extent.height = scissor_height,
+ };
+ vkCmdSetScissor(r->command_buffer, 0, 1, &scissor);
+ }
+
+ if (!pg->clearing) {
+ bind_descriptor_sets(pg);
+ push_vertex_attrib_values(pg);
+ }
+
+ r->in_draw = true;
+}
+
+static void end_draw(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ assert(r->in_command_buffer);
+ assert(r->in_render_pass);
+
+ r->in_draw = false;
+
+ // FIXME: We could clear less
+ pgraph_clear_dirty_reg_map(pg);
+}
+
+void pgraph_vk_draw_end(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0);
+ bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE;
+ bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE;
+ bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE;
+ bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE;
+ bool color_write = mask_alpha || mask_red || mask_green || mask_blue;
+ bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE;
+ bool stencil_test =
+ pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE;
+ bool is_nop_draw = !(color_write || depth_test || stencil_test);
+
+ if (is_nop_draw) {
+ // FIXME: Check PGRAPH register 0x880.
+ // HW uses bit 11 in 0x880 to enable or disable a color/zeta limit
+ // check that will raise an exception in the case that a draw should
+ // modify the color and/or zeta buffer but the target(s) are masked
+ // off. This check only seems to trigger during the fragment
+ // processing, it is legal to attempt a draw that is entirely
+ // clipped regardless of 0x880. See xemu#635 for context.
+ NV2A_VK_DPRINTF("nop draw!\n");
+ return;
+ }
+
+ pgraph_vk_flush_draw(d);
+
+ pg->draw_time++;
+ if (r->color_binding && pgraph_color_write_enabled(pg)) {
+ r->color_binding->draw_time = pg->draw_time;
+ }
+ if (r->zeta_binding && pgraph_zeta_write_enabled(pg)) {
+ r->zeta_binding->draw_time = pg->draw_time;
+ }
+
+ pgraph_vk_set_surface_dirty(pg, color_write, depth_test || stencil_test);
+}
+
+static int compare_memory_sync_requirement_by_addr(const void *p1,
+ const void *p2)
+{
+ const MemorySyncRequirement *l = p1, *r = p2;
+ if (l->addr < r->addr)
+ return -1;
+ if (l->addr > r->addr)
+ return 1;
+ return 0;
+}
+
+static void sync_vertex_ram_buffer(PGRAPHState *pg)
+{
+ NV2AState *d = container_of(pg, NV2AState, pgraph);
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ if (r->num_vertex_ram_buffer_syncs == 0) {
+ return;
+ }
+
+ // Align sync requirements to page boundaries
+ NV2A_VK_DGROUP_BEGIN("Sync vertex RAM buffer");
+
+ for (int i = 0; i < r->num_vertex_ram_buffer_syncs; i++) {
+ NV2A_VK_DPRINTF("Need to sync vertex memory @%" HWADDR_PRIx
+ ", %" HWADDR_PRIx " bytes",
+ r->vertex_ram_buffer_syncs[i].addr,
+ r->vertex_ram_buffer_syncs[i].size);
+
+ hwaddr start_addr =
+ r->vertex_ram_buffer_syncs[i].addr & TARGET_PAGE_MASK;
+ hwaddr end_addr = r->vertex_ram_buffer_syncs[i].addr +
+ r->vertex_ram_buffer_syncs[i].size;
+ end_addr = ROUND_UP(end_addr, TARGET_PAGE_SIZE);
+
+ NV2A_VK_DPRINTF("- %d: %08" HWADDR_PRIx " %zd bytes"
+ " -> %08" HWADDR_PRIx " %zd bytes", i,
+ r->vertex_ram_buffer_syncs[i].addr,
+ r->vertex_ram_buffer_syncs[i].size, start_addr,
+ end_addr - start_addr);
+
+ r->vertex_ram_buffer_syncs[i].addr = start_addr;
+ r->vertex_ram_buffer_syncs[i].size = end_addr - start_addr;
+ }
+
+ // Sort the requirements in increasing order of addresses
+ qsort(r->vertex_ram_buffer_syncs, r->num_vertex_ram_buffer_syncs,
+ sizeof(MemorySyncRequirement),
+ compare_memory_sync_requirement_by_addr);
+
+ // Merge overlapping/adjacent requests to minimize number of tests
+ MemorySyncRequirement merged[16];
+ int num_syncs = 1;
+
+ merged[0] = r->vertex_ram_buffer_syncs[0];
+
+ for (int i = 1; i < r->num_vertex_ram_buffer_syncs; i++) {
+ MemorySyncRequirement *p = &merged[num_syncs - 1];
+ MemorySyncRequirement *t = &r->vertex_ram_buffer_syncs[i];
+
+ if (t->addr <= (p->addr + p->size)) {
+ // Merge with previous
+ hwaddr p_end_addr = p->addr + p->size;
+ hwaddr t_end_addr = t->addr + t->size;
+ hwaddr new_end_addr = MAX(p_end_addr, t_end_addr);
+ p->size = new_end_addr - p->addr;
+ } else {
+ merged[num_syncs++] = *t;
+ }
+ }
+
+ if (num_syncs < r->num_vertex_ram_buffer_syncs) {
+ NV2A_VK_DPRINTF("Reduced to %d sync checks", num_syncs);
+ }
+
+ for (int i = 0; i < num_syncs; i++) {
+ hwaddr addr = merged[i].addr;
+ VkDeviceSize size = merged[i].size;
+
+ NV2A_VK_DPRINTF("- %d: %08"HWADDR_PRIx" %zd bytes", i, addr, size);
+
+ if (memory_region_test_and_clear_dirty(d->vram, addr, size,
+ DIRTY_MEMORY_NV2A)) {
+ NV2A_VK_DPRINTF("Memory dirty. Synchronizing...");
+ pgraph_vk_update_vertex_ram_buffer(pg, addr, d->vram_ptr + addr,
+ size);
+ }
+ }
+
+ r->num_vertex_ram_buffer_syncs = 0;
+
+ NV2A_VK_DGROUP_END();
+}
+
+void pgraph_vk_clear_surface(NV2AState *d, uint32_t parameter)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ nv2a_profile_inc_counter(NV2A_PROF_CLEAR);
+
+ bool write_color = (parameter & NV097_CLEAR_SURFACE_COLOR);
+ bool write_zeta =
+ (parameter & (NV097_CLEAR_SURFACE_Z | NV097_CLEAR_SURFACE_STENCIL));
+
+ // FIXME: If doing a full surface clear, mark the surface for full clear
+ // and we can just do the clear as part of the surface load.
+ pgraph_vk_surface_update(d, true, write_color, write_zeta);
+
+ if (!(r->color_binding || r->zeta_binding)) {
+ /* Nothing bound to clear */
+ return;
+ }
+
+ pg->clearing = true;
+ r->clear_parameter = parameter;
+
+ unsigned int xmin =
+ GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTX), NV_PGRAPH_CLEARRECTX_XMIN);
+ unsigned int xmax =
+ GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTX), NV_PGRAPH_CLEARRECTX_XMAX);
+ unsigned int ymin =
+ GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTY), NV_PGRAPH_CLEARRECTY_YMIN);
+ unsigned int ymax =
+ GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTY), NV_PGRAPH_CLEARRECTY_YMAX);
+
+ NV2A_VK_DGROUP_BEGIN("CLEAR min=(%d,%d) max=(%d,%d)%s%s", xmin, ymin, xmax,
+ ymax, write_color ? " color" : "",
+ write_zeta ? " zeta" : "");
+
+ begin_pre_draw(pg);
+ begin_draw(pg);
+
+ unsigned int scissor_width = xmax - xmin + 1,
+ scissor_height = ymax - ymin + 1;
+
+ pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin);
+ pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height);
+
+ pgraph_apply_scaling_factor(pg, &xmin, &ymin);
+ pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height);
+
+ VkClearRect clear_rect = {
+ .rect = {
+ .offset = { .x = xmin, .y = ymin },
+ .extent = { .width = scissor_width, .height = scissor_height },
+ },
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ };
+
+ int num_attachments = 0;
+ VkClearAttachment attachments[2];
+
+ if (write_color && r->color_binding) {
+ const bool clear_all_color_channels =
+ (parameter & NV097_CLEAR_SURFACE_COLOR) ==
+ (NV097_CLEAR_SURFACE_R | NV097_CLEAR_SURFACE_G |
+ NV097_CLEAR_SURFACE_B | NV097_CLEAR_SURFACE_A);
+
+ if (clear_all_color_channels) {
+ attachments[num_attachments] = (VkClearAttachment){
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .colorAttachment = 0,
+ };
+ pgraph_get_clear_color(
+ pg, attachments[num_attachments].clearValue.color.float32);
+ num_attachments++;
+ } else {
+ float blend_constants[4];
+ pgraph_get_clear_color(pg, blend_constants);
+ vkCmdSetScissor(r->command_buffer, 0, 1, &clear_rect.rect);
+ vkCmdSetBlendConstants(r->command_buffer, blend_constants);
+ vkCmdDraw(r->command_buffer, 3, 1, 0, 0);
+ }
+ }
+
+ if (write_zeta && r->zeta_binding) {
+ int stencil_value = 0;
+ float depth_value = 1.0;
+ pgraph_get_clear_depth_stencil_value(pg, &depth_value, &stencil_value);
+
+ VkImageAspectFlags aspect = 0;
+ if (parameter & NV097_CLEAR_SURFACE_Z)
+ aspect |= VK_IMAGE_ASPECT_DEPTH_BIT;
+ if (parameter & NV097_CLEAR_SURFACE_STENCIL)
+ aspect |= VK_IMAGE_ASPECT_STENCIL_BIT;
+
+ attachments[num_attachments++] = (VkClearAttachment){
+ .aspectMask = aspect,
+ .clearValue.depthStencil.depth = depth_value,
+ .clearValue.depthStencil.stencil = stencil_value,
+ };
+ }
+
+ if (num_attachments) {
+ vkCmdClearAttachments(r->command_buffer, num_attachments, attachments,
+ 1, &clear_rect);
+ }
+ end_draw(pg);
+
+ pg->clearing = false;
+
+ pgraph_vk_set_surface_dirty(pg, write_color, write_zeta);
+
+ NV2A_VK_DGROUP_END();
+}
+
+#if 0
+static void pgraph_vk_debug_attrs(NV2AState *d)
+{
+ for (int vertex_idx = 0; vertex_idx < pg->draw_arrays_count[i]; vertex_idx++) {
+ NV2A_VK_DGROUP_BEGIN("Vertex %d+%d", pg->draw_arrays_start[i], vertex_idx);
+ for (int attr_idx = 0; attr_idx < NV2A_VERTEXSHADER_ATTRIBUTES; attr_idx++) {
+ VertexAttribute *attr = &pg->vertex_attributes[attr_idx];
+ if (attr->count) {
+ char *p = (char *)d->vram_ptr + r->attribute_offsets[attr_idx] + (pg->draw_arrays_start[i] + vertex_idx) * attr->stride;
+ NV2A_VK_DGROUP_BEGIN("Attribute %d data at %tx", attr_idx, (ptrdiff_t)(p - (char*)d->vram_ptr));
+ for (int count_idx = 0; count_idx < attr->count; count_idx++) {
+ switch (attr->format) {
+ case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F:
+ NV2A_VK_DPRINTF("[%d] %f", count_idx, *(float*)p);
+ p += sizeof(float);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+ NV2A_VK_DGROUP_END();
+ }
+ }
+ NV2A_VK_DGROUP_END();
+ }
+}
+#endif
+
+static void bind_vertex_buffer(PGRAPHState *pg, int buffer_idx,
+ VkDeviceSize offset)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ assert(buffer_idx == BUFFER_VERTEX_RAM ||
+ buffer_idx == BUFFER_VERTEX_INLINE);
+
+ VkBuffer buffers[NV2A_VERTEXSHADER_ATTRIBUTES];
+ VkDeviceSize offsets[NV2A_VERTEXSHADER_ATTRIBUTES];
+
+ for (int i = 0; i < r->num_active_vertex_binding_descriptions; i++) {
+ int attr_idx = r->vertex_attribute_descriptions[i].location;
+ buffers[i] = r->storage_buffers[buffer_idx].buffer;
+ offsets[i] = offset + r->vertex_attribute_offsets[attr_idx];
+ }
+
+ vkCmdBindVertexBuffers(r->command_buffer, 0,
+ r->num_active_vertex_binding_descriptions, buffers,
+ offsets);
+}
+
+void pgraph_vk_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta)
+{
+ NV2A_DPRINTF("pgraph_set_surface_dirty(%d, %d) -- %d %d\n", color, zeta,
+ pgraph_color_write_enabled(pg), pgraph_zeta_write_enabled(pg));
+
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ /* FIXME: Does this apply to CLEARs too? */
+ color = color && pgraph_color_write_enabled(pg);
+ zeta = zeta && pgraph_zeta_write_enabled(pg);
+ pg->surface_color.draw_dirty |= color;
+ pg->surface_zeta.draw_dirty |= zeta;
+
+ if (r->color_binding) {
+ r->color_binding->draw_dirty |= color;
+ r->color_binding->frame_time = pg->frame_time;
+ r->color_binding->cleared = false;
+ }
+
+ if (r->zeta_binding) {
+ r->zeta_binding->draw_dirty |= zeta;
+ r->zeta_binding->frame_time = pg->frame_time;
+ r->zeta_binding->cleared = false;
+ }
+}
+
+static bool ensure_buffer_space(PGRAPHState *pg, int index, VkDeviceSize size)
+{
+ if (!pgraph_vk_buffer_has_space_for(pg, index, size, 1)) {
+ pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE);
+ return true;
+ }
+
+ return false;
+}
+
+void pgraph_vk_flush_draw(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ if (!(r->color_binding || r->zeta_binding)) {
+ NV2A_VK_DPRINTF("No binding present!!!\n");
+ return;
+ }
+
+ r->num_vertex_ram_buffer_syncs = 0;
+
+ if (pg->draw_arrays_length) {
+ NV2A_VK_DGROUP_BEGIN("Draw Arrays");
+ nv2a_profile_inc_counter(NV2A_PROF_DRAW_ARRAYS);
+
+ assert(pg->inline_elements_length == 0);
+ assert(pg->inline_buffer_length == 0);
+ assert(pg->inline_array_length == 0);
+
+ pgraph_vk_bind_vertex_attributes(d, pg->draw_arrays_min_start,
+ pg->draw_arrays_max_count - 1, false,
+ 0, pg->draw_arrays_max_count - 1);
+ sync_vertex_ram_buffer(pg);
+
+ begin_pre_draw(pg);
+ begin_draw(pg);
+ bind_vertex_buffer(pg, BUFFER_VERTEX_RAM, 0);
+ for (int i = 0; i < pg->draw_arrays_length; i++) {
+ uint32_t start = pg->draw_arrays_start[i],
+ count = pg->draw_arrays_count[i];
+ NV2A_VK_DPRINTF("- [%d] Start:%d Count:%d", i, start, count);
+ vkCmdDraw(r->command_buffer, count, 1, start, 0);
+ }
+ end_draw(pg);
+
+ NV2A_VK_DGROUP_END();
+ } else if (pg->inline_elements_length) {
+ NV2A_VK_DGROUP_BEGIN("Inline Elements");
+ assert(pg->inline_buffer_length == 0);
+ assert(pg->inline_array_length == 0);
+
+ nv2a_profile_inc_counter(NV2A_PROF_INLINE_ELEMENTS);
+
+ size_t index_data_size =
+ pg->inline_elements_length * sizeof(pg->inline_elements[0]);
+
+ ensure_buffer_space(pg, BUFFER_INDEX_STAGING, index_data_size);
+
+ uint32_t min_element = (uint32_t)-1;
+ uint32_t max_element = 0;
+ for (int i = 0; i < pg->inline_elements_length; i++) {
+ max_element = MAX(pg->inline_elements[i], max_element);
+ min_element = MIN(pg->inline_elements[i], min_element);
+ }
+ pgraph_vk_bind_vertex_attributes(
+ d, min_element, max_element, false, 0,
+ pg->inline_elements[pg->inline_elements_length - 1]);
+ sync_vertex_ram_buffer(pg);
+
+ begin_pre_draw(pg);
+ VkDeviceSize buffer_offset = pgraph_vk_update_index_buffer(
+ pg, pg->inline_elements, index_data_size);
+ begin_draw(pg);
+ bind_vertex_buffer(pg, BUFFER_VERTEX_RAM, 0);
+ vkCmdBindIndexBuffer(r->command_buffer,
+ r->storage_buffers[BUFFER_INDEX].buffer,
+ buffer_offset, VK_INDEX_TYPE_UINT32);
+ vkCmdDrawIndexed(r->command_buffer, pg->inline_elements_length, 1, 0, 0,
+ 0);
+ end_draw(pg);
+
+ NV2A_VK_DGROUP_END();
+ } else if (pg->inline_buffer_length) {
+ NV2A_VK_DGROUP_BEGIN("Inline Buffer");
+ nv2a_profile_inc_counter(NV2A_PROF_INLINE_BUFFERS);
+ assert(pg->inline_array_length == 0);
+
+ size_t vertex_data_size = pg->inline_buffer_length * sizeof(float) * 4;
+ void *data[NV2A_VERTEXSHADER_ATTRIBUTES];
+ size_t sizes[NV2A_VERTEXSHADER_ATTRIBUTES];
+ size_t offset = 0;
+
+ pgraph_vk_bind_vertex_attributes_inline(d);
+ for (int i = 0; i < r->num_active_vertex_attribute_descriptions; i++) {
+ int attr_index = r->vertex_attribute_descriptions[i].location;
+
+ VertexAttribute *attr = &pg->vertex_attributes[attr_index];
+ r->vertex_attribute_offsets[attr_index] = offset;
+
+ data[i] = attr->inline_buffer;
+ sizes[i] = vertex_data_size;
+
+ attr->inline_buffer_populated = false;
+ offset += vertex_data_size;
+ }
+ ensure_buffer_space(pg, BUFFER_VERTEX_INLINE_STAGING, offset);
+
+ begin_pre_draw(pg);
+ VkDeviceSize buffer_offset = pgraph_vk_update_vertex_inline_buffer(
+ pg, data, sizes, r->num_active_vertex_attribute_descriptions);
+ begin_draw(pg);
+ bind_vertex_buffer(pg, BUFFER_VERTEX_INLINE, buffer_offset);
+ vkCmdDraw(r->command_buffer, pg->inline_buffer_length, 1, 0, 0);
+ end_draw(pg);
+
+ NV2A_VK_DGROUP_END();
+ } else if (pg->inline_array_length) {
+ NV2A_VK_DGROUP_BEGIN("Inline Array");
+ nv2a_profile_inc_counter(NV2A_PROF_INLINE_ARRAYS);
+
+ VkDeviceSize inline_array_data_size = pg->inline_array_length * 4;
+ ensure_buffer_space(pg, BUFFER_VERTEX_INLINE_STAGING,
+ inline_array_data_size);
+
+ unsigned int offset = 0;
+ for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+ VertexAttribute *attr = &pg->vertex_attributes[i];
+ if (attr->count == 0) {
+ continue;
+ }
+
+ /* FIXME: Double check */
+ offset = ROUND_UP(offset, attr->size);
+ attr->inline_array_offset = offset;
+ NV2A_DPRINTF("bind inline attribute %d size=%d, count=%d\n", i,
+ attr->size, attr->count);
+ offset += attr->size * attr->count;
+ offset = ROUND_UP(offset, attr->size);
+ }
+
+ unsigned int vertex_size = offset;
+ unsigned int index_count = pg->inline_array_length * 4 / vertex_size;
+
+ NV2A_DPRINTF("draw inline array %d, %d\n", vertex_size, index_count);
+ pgraph_vk_bind_vertex_attributes(d, 0, index_count - 1, true,
+ vertex_size, index_count - 1);
+
+ begin_pre_draw(pg);
+ void *inline_array_data = pg->inline_array;
+ VkDeviceSize buffer_offset = pgraph_vk_update_vertex_inline_buffer(
+ pg, &inline_array_data, &inline_array_data_size, 1);
+ begin_draw(pg);
+ bind_vertex_buffer(pg, BUFFER_VERTEX_INLINE, buffer_offset);
+ vkCmdDraw(r->command_buffer, index_count, 1, 0, 0);
+ end_draw(pg);
+ NV2A_VK_DGROUP_END();
+ } else {
+ NV2A_VK_DPRINTF("EMPTY NV097_SET_BEGIN_END");
+ NV2A_UNCONFIRMED("EMPTY NV097_SET_BEGIN_END");
+ }
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/glsl.c b/hw/xbox/nv2a/pgraph/vk/glsl.c
new file mode 100644
index 0000000000..fb3aed34f5
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/glsl.c
@@ -0,0 +1,380 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "renderer.h"
+
+#include
+#include
+#include
+
+static const glslang_resource_t
+ resource_limits = { .max_lights = 32,
+ .max_clip_planes = 6,
+ .max_texture_units = 32,
+ .max_texture_coords = 32,
+ .max_vertex_attribs = 64,
+ .max_vertex_uniform_components = 4096,
+ .max_varying_floats = 64,
+ .max_vertex_texture_image_units = 32,
+ .max_combined_texture_image_units = 80,
+ .max_texture_image_units = 32,
+ .max_fragment_uniform_components = 4096,
+ .max_draw_buffers = 32,
+ .max_vertex_uniform_vectors = 128,
+ .max_varying_vectors = 8,
+ .max_fragment_uniform_vectors = 16,
+ .max_vertex_output_vectors = 16,
+ .max_fragment_input_vectors = 15,
+ .min_program_texel_offset = -8,
+ .max_program_texel_offset = 7,
+ .max_clip_distances = 8,
+ .max_compute_work_group_count_x = 65535,
+ .max_compute_work_group_count_y = 65535,
+ .max_compute_work_group_count_z = 65535,
+ .max_compute_work_group_size_x = 1024,
+ .max_compute_work_group_size_y = 1024,
+ .max_compute_work_group_size_z = 64,
+ .max_compute_uniform_components = 1024,
+ .max_compute_texture_image_units = 16,
+ .max_compute_image_uniforms = 8,
+ .max_compute_atomic_counters = 8,
+ .max_compute_atomic_counter_buffers = 1,
+ .max_varying_components = 60,
+ .max_vertex_output_components = 64,
+ .max_geometry_input_components = 64,
+ .max_geometry_output_components = 128,
+ .max_fragment_input_components = 128,
+ .max_image_units = 8,
+ .max_combined_image_units_and_fragment_outputs = 8,
+ .max_combined_shader_output_resources = 8,
+ .max_image_samples = 0,
+ .max_vertex_image_uniforms = 0,
+ .max_tess_control_image_uniforms = 0,
+ .max_tess_evaluation_image_uniforms = 0,
+ .max_geometry_image_uniforms = 0,
+ .max_fragment_image_uniforms = 8,
+ .max_combined_image_uniforms = 8,
+ .max_geometry_texture_image_units = 16,
+ .max_geometry_output_vertices = 256,
+ .max_geometry_total_output_components = 1024,
+ .max_geometry_uniform_components = 1024,
+ .max_geometry_varying_components = 64,
+ .max_tess_control_input_components = 128,
+ .max_tess_control_output_components = 128,
+ .max_tess_control_texture_image_units = 16,
+ .max_tess_control_uniform_components = 1024,
+ .max_tess_control_total_output_components = 4096,
+ .max_tess_evaluation_input_components = 128,
+ .max_tess_evaluation_output_components = 128,
+ .max_tess_evaluation_texture_image_units = 16,
+ .max_tess_evaluation_uniform_components = 1024,
+ .max_tess_patch_components = 120,
+ .max_patch_vertices = 32,
+ .max_tess_gen_level = 64,
+ .max_viewports = 16,
+ .max_vertex_atomic_counters = 0,
+ .max_tess_control_atomic_counters = 0,
+ .max_tess_evaluation_atomic_counters = 0,
+ .max_geometry_atomic_counters = 0,
+ .max_fragment_atomic_counters = 8,
+ .max_combined_atomic_counters = 8,
+ .max_atomic_counter_bindings = 1,
+ .max_vertex_atomic_counter_buffers = 0,
+ .max_tess_control_atomic_counter_buffers = 0,
+ .max_tess_evaluation_atomic_counter_buffers = 0,
+ .max_geometry_atomic_counter_buffers = 0,
+ .max_fragment_atomic_counter_buffers = 1,
+ .max_combined_atomic_counter_buffers = 1,
+ .max_atomic_counter_buffer_size = 16384,
+ .max_transform_feedback_buffers = 4,
+ .max_transform_feedback_interleaved_components = 64,
+ .max_cull_distances = 8,
+ .max_combined_clip_and_cull_distances = 8,
+ .max_samples = 4,
+ .max_mesh_output_vertices_nv = 256,
+ .max_mesh_output_primitives_nv = 512,
+ .max_mesh_work_group_size_x_nv = 32,
+ .max_mesh_work_group_size_y_nv = 1,
+ .max_mesh_work_group_size_z_nv = 1,
+ .max_task_work_group_size_x_nv = 32,
+ .max_task_work_group_size_y_nv = 1,
+ .max_task_work_group_size_z_nv = 1,
+ .max_mesh_view_count_nv = 4,
+ .maxDualSourceDrawBuffersEXT = 1,
+ .limits = {
+ .non_inductive_for_loops = 1,
+ .while_loops = 1,
+ .do_while_loops = 1,
+ .general_uniform_indexing = 1,
+ .general_attribute_matrix_vector_indexing = 1,
+ .general_varying_indexing = 1,
+ .general_sampler_indexing = 1,
+ .general_variable_indexing = 1,
+ .general_constant_matrix_vector_indexing = 1,
+ } };
+
+void pgraph_vk_init_glsl_compiler(void)
+{
+ glslang_initialize_process();
+}
+
+void pgraph_vk_finalize_glsl_compiler(void)
+{
+ glslang_finalize_process();
+}
+
+GByteArray *pgraph_vk_compile_glsl_to_spv(glslang_stage_t stage,
+ const char *glsl_source)
+{
+ const glslang_input_t input = {
+ .language = GLSLANG_SOURCE_GLSL,
+ .stage = stage,
+ .client = GLSLANG_CLIENT_VULKAN,
+ .client_version = GLSLANG_TARGET_VULKAN_1_3,
+ .target_language = GLSLANG_TARGET_SPV,
+ .target_language_version = GLSLANG_TARGET_SPV_1_5,
+ .code = glsl_source,
+ .default_version = 460,
+ .default_profile = GLSLANG_NO_PROFILE,
+ .force_default_version_and_profile = false,
+ .forward_compatible = false,
+ .messages = GLSLANG_MSG_DEFAULT_BIT,
+ .resource = &resource_limits,
+ };
+
+ glslang_shader_t *shader = glslang_shader_create(&input);
+
+ if (!glslang_shader_preprocess(shader, &input)) {
+ fprintf(stderr,
+ "GLSL preprocessing failed\n"
+ "[INFO]: %s\n"
+ "[DEBUG]: %s\n"
+ "%s\n",
+ glslang_shader_get_info_log(shader),
+ glslang_shader_get_info_debug_log(shader), input.code);
+ assert(!"glslang preprocess failed");
+ glslang_shader_delete(shader);
+ return NULL;
+ }
+
+ if (!glslang_shader_parse(shader, &input)) {
+ fprintf(stderr,
+ "GLSL parsing failed\n"
+ "[INFO]: %s\n"
+ "[DEBUG]: %s\n"
+ "%s\n",
+ glslang_shader_get_info_log(shader),
+ glslang_shader_get_info_debug_log(shader),
+ glslang_shader_get_preprocessed_code(shader));
+ assert(!"glslang parse failed");
+ glslang_shader_delete(shader);
+ return NULL;
+ }
+
+ glslang_program_t *program = glslang_program_create();
+ glslang_program_add_shader(program, shader);
+
+ if (!glslang_program_link(program, GLSLANG_MSG_SPV_RULES_BIT |
+ GLSLANG_MSG_VULKAN_RULES_BIT)) {
+ fprintf(stderr,
+ "GLSL linking failed\n"
+ "[INFO]: %s\n"
+ "[DEBUG]: %s\n",
+ glslang_program_get_info_log(program),
+ glslang_program_get_info_debug_log(program));
+ assert(!"glslang link failed");
+ glslang_program_delete(program);
+ glslang_shader_delete(shader);
+ return NULL;
+ }
+
+ glslang_spv_options_t spv_options = {
+ .validate = true,
+
+#if defined(CONFIG_RENDERDOC)
+ .disable_optimizer = true,
+ .generate_debug_info = true,
+ .emit_nonsemantic_shader_debug_info = true,
+ .emit_nonsemantic_shader_debug_source = true,
+#endif
+ };
+ glslang_program_SPIRV_generate_with_options(program, stage, &spv_options);
+
+ const char *spirv_messages = glslang_program_SPIRV_get_messages(program);
+ if (spirv_messages) {
+ printf("%s\b", spirv_messages);
+ }
+
+ size_t num_program_bytes =
+ glslang_program_SPIRV_get_size(program) * sizeof(uint32_t);
+
+ guint8 *data = g_malloc(num_program_bytes);
+ glslang_program_SPIRV_get(program, (unsigned int *)data);
+
+ glslang_program_delete(program);
+ glslang_shader_delete(shader);
+
+ return g_byte_array_new_take(data, num_program_bytes);
+}
+
+VkShaderModule pgraph_vk_create_shader_module_from_spv(PGRAPHVkState *r, GByteArray *spv)
+{
+ VkShaderModuleCreateInfo create_info = {
+ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+ .codeSize = spv->len,
+ .pCode = (uint32_t *)spv->data,
+ };
+ VkShaderModule module;
+ VK_CHECK(
+ vkCreateShaderModule(r->device, &create_info, NULL, &module));
+ return module;
+}
+
+static void block_to_uniforms(const SpvReflectBlockVariable *block, ShaderUniformLayout *layout)
+{
+ assert(!layout->uniforms);
+
+ layout->num_uniforms = block->member_count;
+ layout->uniforms = g_malloc0_n(block->member_count, sizeof(ShaderUniform));
+ layout->total_size = block->size;
+ layout->allocation = g_malloc0(block->size);
+
+ for (uint32_t k = 0; k < block->member_count; ++k) {
+ const SpvReflectBlockVariable *member = &block->members[k];
+
+ assert(member->array.dims_count < 2);
+
+ layout->uniforms[k] = (ShaderUniform){
+ .name = strdup(member->name),
+ .offset = member->offset,
+ .dim_v = MAX(1, member->numeric.vector.component_count),
+ .dim_a = MAX(member->array.dims_count ? member->array.dims[0] : 1, member->numeric.matrix.column_count),
+ .stride = MAX(member->array.stride, member->numeric.matrix.stride),
+ };
+
+ // fprintf(stderr, "<%s offset=%zd dim_v=%zd dim_a=%zd stride=%zd>\n",
+ // layout->uniforms[k].name,
+ // layout->uniforms[k].offset,
+ // layout->uniforms[k].dim_v,
+ // layout->uniforms[k].dim_a,
+ // layout->uniforms[k].stride
+ // );
+ }
+ // fprintf(stderr, "--\n");
+}
+
+static void init_layout_from_spv(ShaderModuleInfo *info)
+{
+ SpvReflectResult result = spvReflectCreateShaderModule(
+ info->spirv->len, info->spirv->data, &info->reflect_module);
+ assert(result == SPV_REFLECT_RESULT_SUCCESS &&
+ "Failed to create SPIR-V shader module");
+
+ uint32_t descriptor_set_count = 0;
+ result = spvReflectEnumerateDescriptorSets(&info->reflect_module,
+ &descriptor_set_count, NULL);
+ assert(result == SPV_REFLECT_RESULT_SUCCESS &&
+ "Failed to enumerate descriptor sets");
+
+ info->descriptor_sets =
+ g_malloc_n(descriptor_set_count, sizeof(SpvReflectDescriptorSet *));
+ result = spvReflectEnumerateDescriptorSets(
+ &info->reflect_module, &descriptor_set_count, info->descriptor_sets);
+ assert(result == SPV_REFLECT_RESULT_SUCCESS &&
+ "Failed to enumerate descriptor sets");
+
+ info->uniforms.num_uniforms = 0;
+ info->uniforms.uniforms = NULL;
+
+ for (uint32_t i = 0; i < descriptor_set_count; ++i) {
+ const SpvReflectDescriptorSet *descriptor_set =
+ info->descriptor_sets[i];
+ for (uint32_t j = 0; j < descriptor_set->binding_count; ++j) {
+ const SpvReflectDescriptorBinding *binding =
+ descriptor_set->bindings[j];
+ if (binding->descriptor_type !=
+ SPV_REFLECT_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
+ continue;
+ }
+
+ const SpvReflectBlockVariable *block = &binding->block;
+ block_to_uniforms(block, &info->uniforms);
+ }
+ }
+
+ info->push_constants.num_uniforms = 0;
+ info->push_constants.uniforms = NULL;
+ assert(info->reflect_module.push_constant_block_count < 2);
+ if (info->reflect_module.push_constant_block_count) {
+ block_to_uniforms(&info->reflect_module.push_constant_blocks[0],
+ &info->push_constants);
+ }
+}
+
+static glslang_stage_t vk_shader_stage_to_glslang_stage(VkShaderStageFlagBits stage)
+{
+ switch (stage) {
+ case VK_SHADER_STAGE_GEOMETRY_BIT:
+ return GLSLANG_STAGE_GEOMETRY;
+ case VK_SHADER_STAGE_VERTEX_BIT:
+ return GLSLANG_STAGE_VERTEX;
+ case VK_SHADER_STAGE_FRAGMENT_BIT:
+ return GLSLANG_STAGE_FRAGMENT;
+ case VK_SHADER_STAGE_COMPUTE_BIT:
+ return GLSLANG_STAGE_COMPUTE;
+ default:
+ assert(0);
+ }
+}
+
+ShaderModuleInfo *pgraph_vk_create_shader_module_from_glsl(
+ PGRAPHVkState *r, VkShaderStageFlagBits stage, const char *glsl)
+{
+ ShaderModuleInfo *info = g_malloc0(sizeof(*info));
+ info->glsl = strdup(glsl);
+ info->spirv = pgraph_vk_compile_glsl_to_spv(
+ vk_shader_stage_to_glslang_stage(stage), glsl);
+ info->module = pgraph_vk_create_shader_module_from_spv(r, info->spirv);
+ init_layout_from_spv(info);
+ return info;
+}
+
+static void finalize_uniform_layout(ShaderUniformLayout *layout)
+{
+ for (int i = 0; i < layout->num_uniforms; i++) {
+ free((void*)layout->uniforms[i].name);
+ }
+ if (layout->uniforms) {
+ g_free(layout->uniforms);
+ }
+}
+
+void pgraph_vk_destroy_shader_module(PGRAPHVkState *r, ShaderModuleInfo *info)
+{
+ if (info->glsl) {
+ free(info->glsl);
+ }
+ finalize_uniform_layout(&info->uniforms);
+ finalize_uniform_layout(&info->push_constants);
+ free(info->descriptor_sets);
+ spvReflectDestroyShaderModule(&info->reflect_module);
+ vkDestroyShaderModule(r->device, info->module, NULL);
+ g_byte_array_unref(info->spirv);
+ g_free(info);
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/glsl.h b/hw/xbox/nv2a/pgraph/vk/glsl.h
new file mode 100644
index 0000000000..3f6ccd9b3a
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/glsl.h
@@ -0,0 +1,205 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_VK_GLSL_H
+#define HW_XBOX_NV2A_PGRAPH_VK_GLSL_H
+
+#include "qemu/osdep.h"
+#include
+#include
+#include
+
+typedef struct ShaderUniform {
+ const char *name;
+ size_t dim_v;
+ size_t dim_a;
+ size_t align;
+ size_t stride;
+ size_t offset;
+} ShaderUniform;
+
+typedef struct ShaderUniformLayout {
+ ShaderUniform *uniforms;
+ size_t num_uniforms;
+ size_t total_size;
+ void *allocation;
+} ShaderUniformLayout;
+
+static inline void uniform_std140(ShaderUniformLayout *layout)
+{
+ size_t offset = 0;
+
+ for (int i = 0; i < layout->num_uniforms; i++) {
+ ShaderUniform *u = &layout->uniforms[i];
+ size_t size = sizeof(float); // float or int
+ size_t align = size;
+ size_t stride = 0;
+
+ size *= u->dim_v;
+ align *= u->dim_v == 3 ? 4 : u->dim_v;
+
+ // If an array, each element is padded to vec4.
+ if (u->dim_a > 1) {
+ align = 4 * sizeof(float);
+ stride = align;
+ size = u->dim_a * align;
+ } else {
+ align = size;
+ stride = 0;
+ }
+
+ offset = ROUND_UP(offset, align);
+
+ u->align = align;
+ u->offset = offset;
+ u->stride = stride;
+
+ offset += size;
+ }
+
+ layout->total_size = offset;
+ assert(layout->total_size);
+}
+
+static inline void uniform_std430(ShaderUniformLayout *layout)
+{
+ size_t offset = 0;
+
+ for (int i = 0; i < layout->num_uniforms; i++) {
+ ShaderUniform *u = &layout->uniforms[i];
+ size_t size = sizeof(float); // float or int
+ size *= u->dim_v;
+ size_t align = size;
+ size *= u->dim_a;
+
+ offset = ROUND_UP(offset, align);
+
+ u->align = align;
+ u->offset = offset;
+ u->stride = u->dim_a > 1 ? (size * u->dim_v) : 0;
+
+ offset += size;
+ }
+
+ layout->total_size = offset;
+ assert(layout->total_size);
+}
+
+static inline int uniform_index(ShaderUniformLayout *layout, const char *name)
+{
+ for (int i = 0; i < layout->num_uniforms; i++) {
+ if (!strcmp(layout->uniforms[i].name, name)) {
+ return i + 1;
+ }
+ }
+
+ return -1;
+}
+
+static inline
+void *uniform_ptr(ShaderUniformLayout *layout, int idx)
+{
+ assert(idx > 0 && "invalid uniform index");
+
+ return (char *)layout->allocation + layout->uniforms[idx - 1].offset;
+}
+
+static inline
+void uniform_copy(ShaderUniformLayout *layout, int idx, void *values, size_t value_size, size_t count)
+{
+ assert(idx > 0 && "invalid uniform index");
+
+ ShaderUniform *u = &layout->uniforms[idx - 1];
+ const size_t element_size = value_size * u->dim_v;
+
+ size_t bytes_remaining = value_size * count;
+ char *p_out = uniform_ptr(layout, idx);
+ char *p_max = p_out + layout->total_size;
+ char *p_in = (char *)values;
+
+ int index = 0;
+ while (bytes_remaining) {
+ assert(p_out < p_max);
+ assert(index < u->dim_a);
+ memcpy(p_out, p_in, element_size);
+ bytes_remaining -= element_size;
+ p_out += u->stride;
+ p_in += element_size;
+ index += 1;
+ }
+}
+
+static inline
+void uniform1fv(ShaderUniformLayout *layout, int idx, size_t count, float *values)
+{
+ uniform_copy(layout, idx, values, sizeof(float), count);
+}
+
+static inline
+void uniform1f(ShaderUniformLayout *layout, int idx, float value)
+{
+ uniform1fv(layout, idx, 1, &value);
+}
+
+static inline
+void uniform2f(ShaderUniformLayout *layout, int idx, float v0, float v1)
+{
+ float values[] = { v0, v1 };
+ uniform1fv(layout, idx, 2, values);
+}
+
+static inline
+void uniform4f(ShaderUniformLayout *layout, int idx, float v0, float v1, float v2, float v3)
+{
+ float values[] = { v0, v1, v2, v3 };
+ uniform1fv(layout, idx, 4, values);
+}
+
+static inline
+void uniformMatrix2fv(ShaderUniformLayout *layout, int idx, float *values)
+{
+ uniform1fv(layout, idx, 4, values);
+}
+
+static inline
+void uniformMatrix4fv(ShaderUniformLayout *layout, int idx, float *values)
+{
+ uniform1fv(layout, idx, 4 * 4, values);
+}
+
+static inline
+void uniform1iv(ShaderUniformLayout *layout, int idx, size_t count, int32_t *values)
+{
+ uniform_copy(layout, idx, values, sizeof(int32_t), count);
+}
+
+static inline
+void uniform1i(ShaderUniformLayout *layout, int idx, int32_t value)
+{
+ uniform1iv(layout, idx, 1, &value);
+}
+
+static inline
+void uniform4i(ShaderUniformLayout *layout, int idx, int v0, int v1, int v2, int v3)
+{
+ int values[] = { v0, v1, v2, v3 };
+ uniform1iv(layout, idx, 4, values);
+}
+
+#endif
diff --git a/hw/xbox/nv2a/pgraph/vk/image.c b/hw/xbox/nv2a/pgraph/vk/image.c
new file mode 100644
index 0000000000..1161d81f54
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/image.c
@@ -0,0 +1,209 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "renderer.h"
+
+static bool check_format_has_depth_component(VkFormat format)
+{
+ return format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
+ format == VK_FORMAT_D24_UNORM_S8_UINT ||
+ format == VK_FORMAT_D16_UNORM;
+}
+
+static bool check_format_has_stencil_component(VkFormat format)
+{
+ return format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
+ format == VK_FORMAT_D24_UNORM_S8_UINT;
+}
+
+void pgraph_vk_transition_image_layout(PGRAPHState *pg, VkCommandBuffer cmd,
+ VkImage image, VkFormat format,
+ VkImageLayout oldLayout,
+ VkImageLayout newLayout)
+{
+ VkImageMemoryBarrier barrier = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+ .oldLayout = oldLayout,
+ .newLayout = newLayout,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .image = image,
+ .subresourceRange.baseMipLevel = 0,
+ .subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS,
+ .subresourceRange.baseArrayLayer = 0,
+ .subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS,
+ };
+
+ if (check_format_has_depth_component(format)) {
+ barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
+
+ if (check_format_has_stencil_component(format)) {
+ barrier.subresourceRange.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
+ }
+ } else {
+ barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+ }
+
+ VkPipelineStageFlags sourceStage;
+ VkPipelineStageFlags destinationStage;
+
+ // Undefined -> Dst
+ if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED &&
+ newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
+ barrier.srcAccessMask = 0;
+ barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+ sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+ destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+
+ // Undefined -> Color
+ } else if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED &&
+ newLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
+ barrier.srcAccessMask = 0;
+ barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+ sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+ destinationStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+
+ // Undefined -> Depth
+ } else if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED &&
+ newLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) {
+ barrier.srcAccessMask = 0;
+ barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+ sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+ destinationStage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
+
+ // Dst -> Shader Read
+ } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
+ newLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
+ barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+ barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+ sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+ destinationStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
+
+ // Dst -> Color
+ } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
+ newLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
+ barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+ barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+ sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+ destinationStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+
+ // Dst -> Depth
+ } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
+ newLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) {
+ barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+ barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+ sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+ destinationStage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
+
+ // Dst -> Src
+ } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
+ newLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) {
+ barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+ barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+ sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+ destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+
+ // Shader Read -> Dst
+ } else if (oldLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL &&
+ newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
+ barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
+ barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+ sourceStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
+ destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+
+ // Shader Read -> Color
+ } else if (oldLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL &&
+ newLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
+ barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
+ barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+ sourceStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
+ destinationStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+
+ // Color -> Src
+ } else if (oldLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
+ newLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) {
+ barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+ barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+ sourceStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+ destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+
+ // Color -> Dst
+ } else if (oldLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
+ newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
+ barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
+ barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+ sourceStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
+ destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+
+ // Color -> Shader Read
+ } else if (oldLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
+ newLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
+ barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+ barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+ sourceStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+ destinationStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
+
+ // Depth -> Src
+ } else if (oldLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL &&
+ newLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) {
+ barrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+ barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+
+ sourceStage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
+ destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+
+ // Depth -> Dst
+ } else if (oldLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL &&
+ newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
+ barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
+ barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+ sourceStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
+ destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+
+ // Src -> Color
+ } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL &&
+ newLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
+ barrier.srcAccessMask = 0;
+ barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+ sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+ destinationStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+
+ // Src -> Depth
+ } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL &&
+ newLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) {
+ barrier.srcAccessMask = 0;
+ barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+ sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+ destinationStage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
+
+ // Src -> Dst
+ } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL &&
+ newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
+ barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+ barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+ sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+ destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+
+ } else {
+ assert(!"unsupported layout transition!");
+ }
+
+ vkCmdPipelineBarrier(cmd, sourceStage, destinationStage, 0, 0,
+ NULL, 0, NULL, 1, &barrier);
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/instance.c b/hw/xbox/nv2a/pgraph/vk/instance.c
new file mode 100644
index 0000000000..4023fd5858
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/instance.c
@@ -0,0 +1,662 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "ui/xemu-settings.h"
+#include "renderer.h"
+#include "xemu-version.h"
+
+#include
+#include
+#include
+
+#include
+
+typedef GArray VkExtensionPropertiesArray;
+typedef GArray StringArray;
+
+static bool enable_validation = false;
+
+static char const *const validation_layers[] = {
+ "VK_LAYER_KHRONOS_validation",
+};
+
+static char const *const required_instance_extensions[] = {
+ VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
+ VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME,
+ VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME,
+};
+
+static char const *const required_device_extensions[] = {
+ VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME,
+ VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME,
+#ifdef WIN32
+ VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
+ VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME,
+#else
+ VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
+ VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME,
+#endif
+};
+
+static VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback(
+ VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,
+ VkDebugUtilsMessageTypeFlagsEXT messageType,
+ const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData, void *pUserData)
+{
+ NV2A_VK_DPRINTF("[vk] %s", pCallbackData->pMessage);
+ fprintf(stderr, "[vk] %s\n", pCallbackData->pMessage);
+
+ if ((messageType & VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT) &&
+ (messageSeverity & (VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT))) {
+ exit(1);
+ }
+ return VK_FALSE;
+}
+
+static bool check_validation_layer_support(void)
+{
+ uint32_t num_available_layers;
+ vkEnumerateInstanceLayerProperties(&num_available_layers, NULL);
+
+ g_autofree VkLayerProperties *available_layers =
+ g_malloc_n(num_available_layers, sizeof(VkLayerProperties));
+ vkEnumerateInstanceLayerProperties(&num_available_layers, available_layers);
+
+ for (int i = 0; i < ARRAY_SIZE(validation_layers); i++) {
+ bool found = false;
+ for (int j = 0; j < num_available_layers; j++) {
+ if (!strcmp(validation_layers[i], available_layers[j].layerName)) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ fprintf(stderr, "desired validation layer not found: %s\n",
+ validation_layers[i]);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static SDL_Window *create_window(void)
+{
+ SDL_Window *window = SDL_CreateWindow(
+ "SDL Offscreen Window", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED,
+ 640, 480, SDL_WINDOW_VULKAN | SDL_WINDOW_HIDDEN);
+
+ if (window == NULL) {
+ fprintf(stderr, "%s: Failed to create window\n", __func__);
+ SDL_Quit();
+ exit(1);
+ }
+
+ return window;
+}
+
+static VkExtensionPropertiesArray *
+get_available_instance_extensions(PGRAPHState *pg)
+{
+ uint32_t num_extensions = 0;
+
+ VK_CHECK(
+ vkEnumerateInstanceExtensionProperties(NULL, &num_extensions, NULL));
+
+ VkExtensionPropertiesArray *extensions = g_array_sized_new(
+ FALSE, FALSE, sizeof(VkExtensionProperties), num_extensions);
+
+ g_array_set_size(extensions, num_extensions);
+ VK_CHECK(vkEnumerateInstanceExtensionProperties(
+ NULL, &num_extensions, (VkExtensionProperties *)extensions->data));
+
+ return extensions;
+}
+
+static bool
+is_extension_available(VkExtensionPropertiesArray *available_extensions,
+ const char *extension_name)
+{
+ for (int i = 0; i < available_extensions->len; i++) {
+ VkExtensionProperties *e =
+ &g_array_index(available_extensions, VkExtensionProperties, i);
+ if (!strcmp(e->extensionName, extension_name)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static StringArray *get_required_instance_extension_names(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ // Add instance extensions SDL lists as required
+ unsigned int sdl_count = 0;
+ SDL_Vulkan_GetInstanceExtensions((SDL_Window *)r->window, &sdl_count, NULL);
+
+ StringArray *extensions =
+ g_array_sized_new(FALSE, FALSE, sizeof(char *),
+ sdl_count + ARRAY_SIZE(required_instance_extensions));
+
+ if (sdl_count) {
+ g_array_set_size(extensions, sdl_count);
+ SDL_Vulkan_GetInstanceExtensions((SDL_Window *)r->window, &sdl_count,
+ (const char **)extensions->data);
+ }
+
+ // Add additional required extensions
+ g_array_append_vals(extensions, required_instance_extensions,
+ ARRAY_SIZE(required_instance_extensions));
+
+ return extensions;
+}
+
+static bool
+add_extension_if_available(VkExtensionPropertiesArray *available_extensions,
+ StringArray *enabled_extension_names,
+ const char *desired_extension_name)
+{
+ if (is_extension_available(available_extensions, desired_extension_name)) {
+ g_array_append_val(enabled_extension_names, desired_extension_name);
+ return true;
+ }
+
+ fprintf(stderr, "Warning: extension not available: %s\n",
+ desired_extension_name);
+ return false;
+}
+
+static void
+add_optional_instance_extension_names(PGRAPHState *pg,
+ VkExtensionPropertiesArray *available_extensions,
+ StringArray *enabled_extension_names)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ r->debug_utils_extension_enabled =
+ g_config.display.vulkan.validation_layers &&
+ add_extension_if_available(available_extensions, enabled_extension_names,
+ VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
+}
+
+static void create_instance(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ r->window = create_window();
+
+ VK_CHECK(volkInitialize());
+
+ VkApplicationInfo app_info = {
+ .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
+ .pApplicationName = "xemu",
+ .applicationVersion = VK_MAKE_VERSION(
+ xemu_version_major, xemu_version_minor, xemu_version_patch),
+ .pEngineName = "No Engine",
+ .engineVersion = VK_MAKE_VERSION(1, 0, 0),
+ .apiVersion = VK_API_VERSION_1_3,
+ };
+
+ g_autofree VkExtensionPropertiesArray *available_extensions =
+ get_available_instance_extensions(pg);
+
+ g_autofree StringArray *enabled_extension_names =
+ get_required_instance_extension_names(pg);
+
+ bool all_required_extensions_available = true;
+ for (int i = 0; i < enabled_extension_names->len; i++) {
+ const char *required_extension =
+ g_array_index(enabled_extension_names, const char *, i);
+ if (!is_extension_available(available_extensions, required_extension)) {
+ fprintf(stderr,
+ "Error: Required instance extension not available: %s\n",
+ required_extension);
+ all_required_extensions_available = false;
+ }
+ }
+ assert(all_required_extensions_available);
+
+ add_optional_instance_extension_names(pg, available_extensions,
+ enabled_extension_names);
+
+ fprintf(stderr, "Enabled instance extensions:\n");
+ for (int i = 0; i < enabled_extension_names->len; i++) {
+ fprintf(stderr, "- %s\n", g_array_index(enabled_extension_names, char *, i));
+ }
+
+ VkInstanceCreateInfo create_info = {
+ .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+ .pApplicationInfo = &app_info,
+ .enabledExtensionCount = enabled_extension_names->len,
+ .ppEnabledExtensionNames =
+ &g_array_index(enabled_extension_names, const char *, 0),
+ };
+
+ VkDebugUtilsMessengerCreateInfoEXT dbg_create_info;
+ if (r->debug_utils_extension_enabled) {
+ dbg_create_info = (VkDebugUtilsMessengerCreateInfoEXT){
+ .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
+ .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
+ .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
+ VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
+ .pfnUserCallback = debugCallback,
+ };
+ }
+
+ enable_validation = g_config.display.vulkan.validation_layers;
+
+ if (enable_validation) {
+ if (check_validation_layer_support()) {
+ fprintf(stderr, "Warning: Validation layers enabled. Expect performance impact.\n");
+ create_info.enabledLayerCount = ARRAY_SIZE(validation_layers);
+ create_info.ppEnabledLayerNames = validation_layers;
+ if (r->debug_utils_extension_enabled) {
+ create_info.pNext =
+ (VkDebugUtilsMessengerCreateInfoEXT *)&dbg_create_info;
+ }
+ } else {
+ fprintf(stderr, "Warning: validation layers not available\n");
+ enable_validation = false;
+ }
+ }
+
+ VK_CHECK(vkCreateInstance(&create_info, NULL, &r->instance));
+
+ volkLoadInstance(r->instance);
+}
+
+static bool is_queue_family_indicies_complete(QueueFamilyIndices indices)
+{
+ return indices.queue_family >= 0;
+}
+
+QueueFamilyIndices pgraph_vk_find_queue_families(VkPhysicalDevice device)
+{
+ QueueFamilyIndices indices = {
+ .queue_family = -1,
+ };
+
+ uint32_t num_queue_families = 0;
+ vkGetPhysicalDeviceQueueFamilyProperties(device, &num_queue_families, NULL);
+
+ g_autofree VkQueueFamilyProperties *queue_families =
+ g_malloc_n(num_queue_families, sizeof(VkQueueFamilyProperties));
+ vkGetPhysicalDeviceQueueFamilyProperties(device, &num_queue_families,
+ queue_families);
+
+ for (int i = 0; i < num_queue_families; i++) {
+ VkQueueFamilyProperties queueFamily = queue_families[i];
+ // FIXME: Support independent graphics, compute queues
+ int required_flags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT;
+ if ((queueFamily.queueFlags & required_flags) == required_flags) {
+ indices.queue_family = i;
+ }
+ if (is_queue_family_indicies_complete(indices)) {
+ break;
+ }
+ }
+
+ return indices;
+}
+
+static VkExtensionPropertiesArray *
+get_available_device_extensions(VkPhysicalDevice device)
+{
+ uint32_t num_extensions = 0;
+
+ VK_CHECK(vkEnumerateDeviceExtensionProperties(device, NULL, &num_extensions,
+ NULL));
+
+ VkExtensionPropertiesArray *extensions = g_array_sized_new(
+ FALSE, FALSE, sizeof(VkExtensionProperties), num_extensions);
+
+ g_array_set_size(extensions, num_extensions);
+ VK_CHECK(vkEnumerateDeviceExtensionProperties(
+ device, NULL, &num_extensions,
+ (VkExtensionProperties *)extensions->data));
+
+ return extensions;
+}
+
+static StringArray *get_required_device_extension_names(void)
+{
+ StringArray *extensions =
+ g_array_sized_new(FALSE, FALSE, sizeof(char *),
+ ARRAY_SIZE(required_device_extensions));
+
+ g_array_append_vals(extensions, required_device_extensions,
+ ARRAY_SIZE(required_device_extensions));
+
+ return extensions;
+}
+
+static void add_optional_device_extension_names(
+ PGRAPHState *pg, VkExtensionPropertiesArray *available_extensions,
+ StringArray *enabled_extension_names)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ r->custom_border_color_extension_enabled =
+ add_extension_if_available(available_extensions, enabled_extension_names,
+ VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
+
+ r->provoking_vertex_extension_enabled =
+ add_extension_if_available(available_extensions, enabled_extension_names,
+ VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME);
+
+ r->memory_budget_extension_enabled = add_extension_if_available(
+ available_extensions, enabled_extension_names,
+ VK_EXT_MEMORY_BUDGET_EXTENSION_NAME);
+}
+
+static bool check_device_support_required_extensions(VkPhysicalDevice device)
+{
+ g_autofree VkExtensionPropertiesArray *available_extensions =
+ get_available_device_extensions(device);
+
+ for (int i = 0; i < ARRAY_SIZE(required_device_extensions); i++) {
+ if (!is_extension_available(available_extensions,
+ required_device_extensions[i])) {
+ fprintf(stderr, "required device extension not found: %s\n",
+ required_device_extensions[i]);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool is_device_compatible(VkPhysicalDevice device)
+{
+ QueueFamilyIndices indices = pgraph_vk_find_queue_families(device);
+
+ return is_queue_family_indicies_complete(indices) &&
+ check_device_support_required_extensions(device);
+ // FIXME: Check formats
+ // FIXME: Check vram
+}
+
+static void select_physical_device(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ uint32_t num_physical_devices = 0;
+
+ vkEnumeratePhysicalDevices(r->instance, &num_physical_devices, NULL);
+ if (num_physical_devices == 0) {
+ assert(!"failed to find GPUs with Vulkan support");
+ }
+
+ g_autofree VkPhysicalDevice *devices =
+ g_malloc_n(num_physical_devices, sizeof(VkPhysicalDevice));
+ vkEnumeratePhysicalDevices(r->instance, &num_physical_devices, devices);
+
+ fprintf(stderr, "Available physical devices:\n");
+ for (int i = 0; i < num_physical_devices; i++) {
+ vkGetPhysicalDeviceProperties(devices[i], &r->device_props);
+ fprintf(stderr, "- %s\n", r->device_props.deviceName);
+ }
+
+ // FIXME: Store preferred device
+
+ r->physical_device = VK_NULL_HANDLE;
+ for (int i = 0; i < num_physical_devices; i++) {
+ if (is_device_compatible(devices[i])) {
+ r->physical_device = devices[i];
+ break;
+ }
+ }
+ if (r->physical_device == VK_NULL_HANDLE) {
+ assert(!"failed to find a suitable GPU");
+ }
+
+ vkGetPhysicalDeviceProperties(r->physical_device, &r->device_props);
+ fprintf(stderr,
+ "Selected physical device: %s\n"
+ "- Vendor: %x, Device: %x\n"
+ "- Driver Version: %d.%d.%d\n",
+ r->device_props.deviceName,
+ r->device_props.vendorID,
+ r->device_props.deviceID,
+ VK_VERSION_MAJOR(r->device_props.driverVersion),
+ VK_VERSION_MINOR(r->device_props.driverVersion),
+ VK_VERSION_PATCH(r->device_props.driverVersion));
+
+ size_t vsh_attr_values_size =
+ NV2A_VERTEXSHADER_ATTRIBUTES * 4 * sizeof(float);
+ assert(r->device_props.limits.maxPushConstantsSize >= vsh_attr_values_size);
+}
+
+static void create_logical_device(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ QueueFamilyIndices indices =
+ pgraph_vk_find_queue_families(r->physical_device);
+
+ g_autofree VkExtensionPropertiesArray *available_extensions =
+ get_available_device_extensions(r->physical_device);
+
+ g_autofree StringArray *enabled_extension_names =
+ get_required_device_extension_names();
+
+ add_optional_device_extension_names(pg, available_extensions,
+ enabled_extension_names);
+
+ fprintf(stderr, "Enabled device extensions:\n");
+ for (int i = 0; i < enabled_extension_names->len; i++) {
+ fprintf(stderr, "- %s\n", g_array_index(enabled_extension_names, char *, i));
+ }
+
+ float queuePriority = 1.0f;
+
+ VkDeviceQueueCreateInfo queue_create_info = {
+ .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+ .queueFamilyIndex = indices.queue_family,
+ .queueCount = 1,
+ .pQueuePriorities = &queuePriority,
+ };
+
+ // Ensure device supports required features
+ VkPhysicalDeviceFeatures available_features, enabled_features;
+ vkGetPhysicalDeviceFeatures(r->physical_device, &available_features);
+ memset(&enabled_features, 0, sizeof(enabled_features));
+
+ struct {
+ const char *name;
+ VkBool32 available, *enabled;
+ } required_features[] = {
+ #define F(n) { #n, available_features.n, &enabled_features.n }
+ F(shaderClipDistance),
+ F(geometryShader),
+ F(shaderTessellationAndGeometryPointSize),
+ F(depthClamp),
+ F(occlusionQueryPrecise),
+ #undef F
+ };
+
+ bool all_features_available = true;
+ for (int i = 0; i < ARRAY_SIZE(required_features); i++) {
+ if (required_features[i].available != VK_TRUE) {
+ fprintf(stderr, "Error: Device does not support required feature %s\n", required_features[i].name);
+ all_features_available = false;
+ }
+ *required_features[i].enabled = VK_TRUE;
+ }
+ assert(all_features_available);
+
+ void *next_struct = NULL;
+
+ VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex_features;
+ if (r->provoking_vertex_extension_enabled) {
+ provoking_vertex_features = (VkPhysicalDeviceProvokingVertexFeaturesEXT){
+ .sType =
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT,
+ .provokingVertexLast = VK_TRUE,
+ .pNext = next_struct,
+ };
+ next_struct = &provoking_vertex_features;
+ }
+
+ VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border_features;
+ if (r->custom_border_color_extension_enabled) {
+ custom_border_features = (VkPhysicalDeviceCustomBorderColorFeaturesEXT){
+ .sType =
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT,
+ .customBorderColors = VK_TRUE,
+ .pNext = next_struct,
+ };
+ next_struct = &custom_border_features;
+ }
+
+ VkDeviceCreateInfo device_create_info = {
+ .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
+ .queueCreateInfoCount = 1,
+ .pQueueCreateInfos = &queue_create_info,
+ .pEnabledFeatures = &enabled_features,
+ .enabledExtensionCount = enabled_extension_names->len,
+ .ppEnabledExtensionNames =
+ &g_array_index(enabled_extension_names, const char *, 0),
+ .pNext = next_struct,
+ };
+
+ if (enable_validation) {
+ device_create_info.enabledLayerCount = ARRAY_SIZE(validation_layers);
+ device_create_info.ppEnabledLayerNames = validation_layers;
+ }
+
+ VK_CHECK(vkCreateDevice(r->physical_device, &device_create_info, NULL,
+ &r->device));
+
+ vkGetDeviceQueue(r->device, indices.queue_family, 0, &r->queue);
+}
+
+uint32_t pgraph_vk_get_memory_type(PGRAPHState *pg, uint32_t type_bits,
+ VkMemoryPropertyFlags properties)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ VkPhysicalDeviceMemoryProperties prop;
+ vkGetPhysicalDeviceMemoryProperties(r->physical_device, &prop);
+ for (uint32_t i = 0; i < prop.memoryTypeCount; i++) {
+ if ((prop.memoryTypes[i].propertyFlags & properties) == properties &&
+ type_bits & (1 << i)) {
+ return i;
+ }
+ }
+ return 0xFFFFFFFF; // Unable to find memoryType
+}
+
+static void init_allocator(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ VmaVulkanFunctions vulkanFunctions = {
+ /// Required when using VMA_DYNAMIC_VULKAN_FUNCTIONS.
+ .vkGetInstanceProcAddr = vkGetInstanceProcAddr,
+ /// Required when using VMA_DYNAMIC_VULKAN_FUNCTIONS.
+ .vkGetDeviceProcAddr = vkGetDeviceProcAddr,
+ .vkGetPhysicalDeviceProperties = vkGetPhysicalDeviceProperties,
+ .vkGetPhysicalDeviceMemoryProperties = vkGetPhysicalDeviceMemoryProperties,
+ .vkAllocateMemory = vkAllocateMemory,
+ .vkFreeMemory = vkFreeMemory,
+ .vkMapMemory = vkMapMemory,
+ .vkUnmapMemory = vkUnmapMemory,
+ .vkFlushMappedMemoryRanges = vkFlushMappedMemoryRanges,
+ .vkInvalidateMappedMemoryRanges = vkInvalidateMappedMemoryRanges,
+ .vkBindBufferMemory = vkBindBufferMemory,
+ .vkBindImageMemory = vkBindImageMemory,
+ .vkGetBufferMemoryRequirements = vkGetBufferMemoryRequirements,
+ .vkGetImageMemoryRequirements = vkGetImageMemoryRequirements,
+ .vkCreateBuffer = vkCreateBuffer,
+ .vkDestroyBuffer = vkDestroyBuffer,
+ .vkCreateImage = vkCreateImage,
+ .vkDestroyImage = vkDestroyImage,
+ .vkCmdCopyBuffer = vkCmdCopyBuffer,
+ #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000
+ /// Fetch "vkGetBufferMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetBufferMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension.
+ .vkGetBufferMemoryRequirements2KHR = vkGetBufferMemoryRequirements2,
+ /// Fetch "vkGetImageMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetImageMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension.
+ .vkGetImageMemoryRequirements2KHR = vkGetImageMemoryRequirements2,
+ #endif
+ #if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000
+ /// Fetch "vkBindBufferMemory2" on Vulkan >= 1.1, fetch "vkBindBufferMemory2KHR" when using VK_KHR_bind_memory2 extension.
+ .vkBindBufferMemory2KHR = vkBindBufferMemory2,
+ /// Fetch "vkBindImageMemory2" on Vulkan >= 1.1, fetch "vkBindImageMemory2KHR" when using VK_KHR_bind_memory2 extension.
+ .vkBindImageMemory2KHR = vkBindImageMemory2,
+ #endif
+ #if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000
+ /// Fetch from "vkGetPhysicalDeviceMemoryProperties2" on Vulkan >= 1.1, but you can also fetch it from "vkGetPhysicalDeviceMemoryProperties2KHR" if you enabled extension VK_KHR_get_physical_device_properties2.
+ .vkGetPhysicalDeviceMemoryProperties2KHR = vkGetPhysicalDeviceMemoryProperties2KHR,
+ #endif
+ #if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000
+ /// Fetch from "vkGetDeviceBufferMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceBufferMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4.
+ .vkGetDeviceBufferMemoryRequirements = vkGetDeviceBufferMemoryRequirements,
+ /// Fetch from "vkGetDeviceImageMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceImageMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4.
+ .vkGetDeviceImageMemoryRequirements = vkGetDeviceImageMemoryRequirements,
+ #endif
+ };
+
+ VmaAllocatorCreateInfo create_info = {
+ .flags = (r->memory_budget_extension_enabled ?
+ VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT :
+ 0),
+ .vulkanApiVersion = VK_API_VERSION_1_3,
+ .instance = r->instance,
+ .physicalDevice = r->physical_device,
+ .device = r->device,
+ .pVulkanFunctions = &vulkanFunctions,
+ };
+
+ VK_CHECK(vmaCreateAllocator(&create_info, &r->allocator));
+}
+
+static void finalize_allocator(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ vmaDestroyAllocator(r->allocator);
+}
+
+void pgraph_vk_init_instance(PGRAPHState *pg)
+{
+ create_instance(pg);
+ select_physical_device(pg);
+ create_logical_device(pg);
+ init_allocator(pg);
+}
+
+void pgraph_vk_finalize_instance(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ finalize_allocator(pg);
+ vkDestroyDevice(r->device, NULL);
+ r->device = VK_NULL_HANDLE;
+
+ vkDestroyInstance(r->instance, NULL);
+ r->instance = VK_NULL_HANDLE;
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/meson.build b/hw/xbox/nv2a/pgraph/vk/meson.build
new file mode 100644
index 0000000000..24c2474cb9
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/meson.build
@@ -0,0 +1,24 @@
+if vulkan.found()
+
+specific_ss.add([sdl, volk, libglslang, vma, vulkan, spirv_reflect, gloffscreen,
+ files(
+ 'blit.c',
+ 'buffer.c',
+ 'command.c',
+ 'debug.c',
+ 'display.c',
+ 'draw.c',
+ 'glsl.c',
+ 'image.c',
+ 'instance.c',
+ 'renderer.c',
+ 'reports.c',
+ 'shaders.c',
+ 'surface-compute.c',
+ 'surface.c',
+ 'texture.c',
+ 'vertex.c',
+ )
+ ])
+
+endif
diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.c b/hw/xbox/nv2a/pgraph/vk/renderer.c
new file mode 100644
index 0000000000..f947aa39e5
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/renderer.c
@@ -0,0 +1,266 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "hw/xbox/nv2a/nv2a_int.h"
+#include "renderer.h"
+
+#include "gloffscreen.h"
+
+#if HAVE_EXTERNAL_MEMORY
+static GloContext *g_gl_context;
+
+static void gl_context_init(void)
+{
+ g_gl_context = glo_context_create();
+}
+#endif
+
+static void pgraph_vk_init_thread(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+
+#if HAVE_EXTERNAL_MEMORY
+ glo_set_current(g_gl_context);
+#endif
+
+ pgraph_vk_init_instance(pg);
+ pgraph_vk_init_command_buffers(pg);
+ pgraph_vk_init_buffers(d);
+ pgraph_vk_init_surfaces(pg);
+ pgraph_vk_init_shaders(pg);
+ pgraph_vk_init_pipelines(pg);
+ pgraph_vk_init_textures(pg);
+ pgraph_vk_init_reports(pg);
+ pgraph_vk_init_compute(pg);
+ pgraph_vk_init_display(pg);
+}
+
+static void pgraph_vk_finalize(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+
+ pgraph_vk_finalize_display(pg);
+ pgraph_vk_finalize_compute(pg);
+ pgraph_vk_finalize_reports(pg);
+ pgraph_vk_finalize_textures(pg);
+ pgraph_vk_finalize_pipelines(pg);
+ pgraph_vk_finalize_shaders(pg);
+ pgraph_vk_finalize_surfaces(pg);
+ pgraph_vk_finalize_buffers(d);
+ pgraph_vk_finalize_command_buffers(pg);
+ pgraph_vk_finalize_instance(pg);
+}
+
+static void pgraph_vk_flush(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+
+ pgraph_vk_finish(pg, VK_FINISH_REASON_FLUSH);
+ pgraph_vk_surface_flush(d);
+ pgraph_vk_mark_textures_possibly_dirty(d, 0, memory_region_size(d->vram));
+ pgraph_vk_update_vertex_ram_buffer(&d->pgraph, 0, d->vram_ptr,
+ memory_region_size(d->vram));
+ for (int i = 0; i < 4; i++) {
+ pg->texture_dirty[i] = true;
+ }
+
+ /* FIXME: Flush more? */
+
+ qatomic_set(&d->pgraph.flush_pending, false);
+ qemu_event_set(&d->pgraph.flush_complete);
+}
+
+static void pgraph_vk_sync(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ pgraph_vk_render_display(pg);
+
+ qatomic_set(&d->pgraph.sync_pending, false);
+ qemu_event_set(&d->pgraph.sync_complete);
+}
+
+static void pgraph_vk_process_pending(NV2AState *d)
+{
+ PGRAPHVkState *r = d->pgraph.vk_renderer_state;
+
+ if (qatomic_read(&r->downloads_pending) ||
+ qatomic_read(&r->download_dirty_surfaces_pending) ||
+ qatomic_read(&d->pgraph.sync_pending) ||
+ qatomic_read(&d->pgraph.flush_pending)
+ ) {
+ qemu_mutex_unlock(&d->pfifo.lock);
+ qemu_mutex_lock(&d->pgraph.lock);
+ if (qatomic_read(&r->downloads_pending)) {
+ pgraph_vk_process_pending_downloads(d);
+ }
+ if (qatomic_read(&r->download_dirty_surfaces_pending)) {
+ pgraph_vk_download_dirty_surfaces(d);
+ }
+ if (qatomic_read(&d->pgraph.sync_pending)) {
+ pgraph_vk_sync(d);
+ }
+ if (qatomic_read(&d->pgraph.flush_pending)) {
+ pgraph_vk_flush(d);
+ }
+ qemu_mutex_unlock(&d->pgraph.lock);
+ qemu_mutex_lock(&d->pfifo.lock);
+ }
+}
+
+static void pgraph_vk_flip_stall(NV2AState *d)
+{
+ pgraph_vk_finish(&d->pgraph, VK_FINISH_REASON_FLIP_STALL);
+ pgraph_vk_debug_frame_terminator();
+}
+
+static void pgraph_vk_pre_savevm_trigger(NV2AState *d)
+{
+ qatomic_set(&d->pgraph.vk_renderer_state->download_dirty_surfaces_pending, true);
+ qemu_event_reset(&d->pgraph.vk_renderer_state->dirty_surfaces_download_complete);
+}
+
+static void pgraph_vk_pre_savevm_wait(NV2AState *d)
+{
+ qemu_event_wait(&d->pgraph.vk_renderer_state->dirty_surfaces_download_complete);
+}
+
+static void pgraph_vk_pre_shutdown_trigger(NV2AState *d)
+{
+ // qatomic_set(&d->pgraph.vk_renderer_state->shader_cache_writeback_pending, true);
+ // qemu_event_reset(&d->pgraph.vk_renderer_state->shader_cache_writeback_complete);
+}
+
+static void pgraph_vk_pre_shutdown_wait(NV2AState *d)
+{
+ // qemu_event_wait(&d->pgraph.vk_renderer_state->shader_cache_writeback_complete);
+}
+
+static int pgraph_vk_get_framebuffer_surface(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ qemu_mutex_lock(&d->pfifo.lock);
+ // FIXME: Possible race condition with pgraph, consider lock
+ uint32_t pline_offset, pstart_addr, pline_compare;
+ d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
+ SurfaceBinding *surface = pgraph_vk_surface_get_within(d, d->pcrtc.start + pline_offset);
+ if (surface == NULL || !surface->color) {
+ qemu_mutex_unlock(&d->pfifo.lock);
+ return 0;
+ }
+
+ assert(surface->color);
+
+ surface->frame_time = pg->frame_time;
+
+#if HAVE_EXTERNAL_MEMORY
+ qemu_event_reset(&d->pgraph.sync_complete);
+ qatomic_set(&pg->sync_pending, true);
+ pfifo_kick(d);
+ qemu_mutex_unlock(&d->pfifo.lock);
+ qemu_event_wait(&d->pgraph.sync_complete);
+ return r->display.gl_texture_id;
+#else
+ qemu_mutex_unlock(&d->pfifo.lock);
+ pgraph_vk_wait_for_surface_download(surface);
+ return 0;
+#endif
+}
+
+static void pgraph_vk_init(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+
+ pg->vk_renderer_state = (PGRAPHVkState *)g_malloc0(sizeof(PGRAPHVkState));
+
+ pgraph_vk_debug_init();
+}
+
+static PGRAPHRenderer pgraph_vk_renderer = {
+ .type = CONFIG_DISPLAY_RENDERER_VULKAN,
+ .name = "Vulkan",
+ .ops = {
+ .init = pgraph_vk_init,
+#if HAVE_EXTERNAL_MEMORY
+ .early_context_init = gl_context_init,
+#endif
+ .init_thread = pgraph_vk_init_thread,
+ .finalize = pgraph_vk_finalize,
+ .clear_report_value = pgraph_vk_clear_report_value,
+ .clear_surface = pgraph_vk_clear_surface,
+ .draw_begin = pgraph_vk_draw_begin,
+ .draw_end = pgraph_vk_draw_end,
+ .flip_stall = pgraph_vk_flip_stall,
+ .flush_draw = pgraph_vk_flush_draw,
+ .get_report = pgraph_vk_get_report,
+ .image_blit = pgraph_vk_image_blit,
+ .pre_savevm_trigger = pgraph_vk_pre_savevm_trigger,
+ .pre_savevm_wait = pgraph_vk_pre_savevm_wait,
+ .pre_shutdown_trigger = pgraph_vk_pre_shutdown_trigger,
+ .pre_shutdown_wait = pgraph_vk_pre_shutdown_wait,
+ .process_pending = pgraph_vk_process_pending,
+ .process_pending_reports = pgraph_vk_process_pending_reports,
+ .surface_update = pgraph_vk_surface_update,
+ .set_surface_scale_factor = pgraph_vk_set_surface_scale_factor,
+ .get_surface_scale_factor = pgraph_vk_get_surface_scale_factor,
+ .get_framebuffer_surface = pgraph_vk_get_framebuffer_surface,
+ }
+};
+
+static void __attribute__((constructor)) register_renderer(void)
+{
+ pgraph_renderer_register(&pgraph_vk_renderer);
+}
+
+void pgraph_vk_check_memory_budget(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ VkPhysicalDeviceMemoryProperties const *props;
+ vmaGetMemoryProperties(r->allocator, &props);
+
+ g_autofree VmaBudget *budgets = g_malloc_n(props->memoryHeapCount, sizeof(VmaBudget));
+ vmaGetHeapBudgets(r->allocator, budgets);
+
+ const float budget_threshold = 0.8;
+ bool near_budget = false;
+
+ for (int i = 0; i < props->memoryHeapCount; i++) {
+ VmaBudget *b = &budgets[i];
+ float use_to_budget_ratio =
+ (double)b->statistics.allocationBytes / (double)b->budget;
+ NV2A_VK_DPRINTF("Heap %d: used %lu/%lu MiB (%.2f%%)", i,
+ b->statistics.allocationBytes / (1024 * 1024),
+ b->budget / (1024 * 1024), use_to_budget_ratio * 100);
+ near_budget |= use_to_budget_ratio > budget_threshold;
+ }
+
+ // If any heaps are near budget, free up some resources
+ if (near_budget) {
+ pgraph_vk_trim_texture_cache(pg);
+ }
+
+#if 0
+ char *s;
+ vmaBuildStatsString(r->allocator, &s, VK_TRUE);
+ puts(s);
+ vmaFreeStatsString(r->allocator, s);
+#endif
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h
new file mode 100644
index 0000000000..a509de8d71
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/renderer.h
@@ -0,0 +1,526 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_VK_RENDERER_H
+#define HW_XBOX_NV2A_PGRAPH_VK_RENDERER_H
+
+#define VK_NO_PROTOTYPES 1
+
+#include "qemu/osdep.h"
+#include "qemu/thread.h"
+#include "qemu/queue.h"
+#include "qemu/lru.h"
+#include "hw/hw.h"
+#include "hw/xbox/nv2a/nv2a_int.h"
+#include "hw/xbox/nv2a/nv2a_regs.h"
+#include "hw/xbox/nv2a/pgraph/surface.h"
+#include "hw/xbox/nv2a/pgraph/texture.h"
+#include "hw/xbox/nv2a/pgraph/shaders.h"
+
+#include
+#include
+#include
+#include
+
+#define VMA_STATIC_VULKAN_FUNCTIONS 1
+#define VMA_DYNAMIC_VULKAN_FUNCTIONS 0
+#include
+
+#include "debug.h"
+#include "constants.h"
+#include "glsl.h"
+
+#define HAVE_EXTERNAL_MEMORY 1
+
+typedef struct QueueFamilyIndices {
+ int queue_family;
+} QueueFamilyIndices;
+
+typedef struct MemorySyncRequirement {
+ hwaddr addr, size;
+} MemorySyncRequirement;
+
+typedef struct RenderPassState {
+ VkFormat color_format;
+ VkFormat zeta_format;
+} RenderPassState;
+
+typedef struct RenderPass {
+ RenderPassState state;
+ VkRenderPass render_pass;
+} RenderPass;
+
+typedef struct PipelineKey {
+ bool clear;
+ RenderPassState render_pass_state;
+ ShaderState shader_state;
+ uint32_t regs[10];
+ VkVertexInputBindingDescription binding_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES];
+ VkVertexInputAttributeDescription attribute_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES];
+} PipelineKey;
+
+typedef struct PipelineBinding {
+ LruNode node;
+ PipelineKey key;
+ VkPipelineLayout layout;
+ VkPipeline pipeline;
+ VkRenderPass render_pass;
+ unsigned int draw_time;
+} PipelineBinding;
+
+enum Buffer {
+ BUFFER_STAGING_DST,
+ BUFFER_STAGING_SRC,
+ BUFFER_COMPUTE_DST,
+ BUFFER_COMPUTE_SRC,
+ BUFFER_INDEX,
+ BUFFER_INDEX_STAGING,
+ BUFFER_VERTEX_RAM,
+ BUFFER_VERTEX_INLINE,
+ BUFFER_VERTEX_INLINE_STAGING,
+ BUFFER_UNIFORM,
+ BUFFER_UNIFORM_STAGING,
+ BUFFER_COUNT
+};
+
+typedef struct StorageBuffer {
+ VkBuffer buffer;
+ VkBufferUsageFlags usage;
+ VmaAllocationCreateInfo alloc_info;
+ VmaAllocation allocation;
+ VkMemoryPropertyFlags properties;
+ size_t buffer_offset;
+ size_t buffer_size;
+ uint8_t *mapped;
+} StorageBuffer;
+
+typedef struct SurfaceBinding {
+ QTAILQ_ENTRY(SurfaceBinding) entry;
+ MemAccessCallback *access_cb;
+
+ hwaddr vram_addr;
+
+ SurfaceShape shape;
+ uintptr_t dma_addr;
+ uintptr_t dma_len;
+ bool color;
+ bool swizzle;
+
+ unsigned int width;
+ unsigned int height;
+ unsigned int pitch;
+ size_t size;
+
+ bool cleared;
+ int frame_time;
+ int draw_time;
+ bool draw_dirty;
+ bool download_pending;
+ bool upload_pending;
+
+ BasicSurfaceFormatInfo fmt;
+ SurfaceFormatInfo host_fmt;
+
+ VkImage image;
+ VkImageView image_view;
+ VmaAllocation allocation;
+
+ // Used for scaling
+ VkImage image_scratch;
+ VkImageLayout image_scratch_current_layout;
+ VmaAllocation allocation_scratch;
+
+ bool initialized;
+} SurfaceBinding;
+
+typedef struct ShaderModuleInfo {
+ char *glsl;
+ GByteArray *spirv;
+ VkShaderModule module;
+ SpvReflectShaderModule reflect_module;
+ SpvReflectDescriptorSet **descriptor_sets;
+ ShaderUniformLayout uniforms;
+ ShaderUniformLayout push_constants;
+} ShaderModuleInfo;
+
+typedef struct ShaderBinding {
+ LruNode node;
+ ShaderState state;
+ ShaderModuleInfo *geometry;
+ ShaderModuleInfo *vertex;
+ ShaderModuleInfo *fragment;
+
+ int psh_constant_loc[9][2];
+ int alpha_ref_loc;
+
+ int bump_mat_loc[NV2A_MAX_TEXTURES];
+ int bump_scale_loc[NV2A_MAX_TEXTURES];
+ int bump_offset_loc[NV2A_MAX_TEXTURES];
+ int tex_scale_loc[NV2A_MAX_TEXTURES];
+
+ int surface_size_loc;
+ int clip_range_loc;
+
+ int vsh_constant_loc;
+ uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];
+
+ int inv_viewport_loc;
+ int ltctxa_loc;
+ int ltctxb_loc;
+ int ltc1_loc;
+
+ int fog_color_loc;
+ int fog_param_loc;
+ int light_infinite_half_vector_loc[NV2A_MAX_LIGHTS];
+ int light_infinite_direction_loc[NV2A_MAX_LIGHTS];
+ int light_local_position_loc[NV2A_MAX_LIGHTS];
+ int light_local_attenuation_loc[NV2A_MAX_LIGHTS];
+
+ int clip_region_loc;
+
+ int material_alpha_loc;
+} ShaderBinding;
+
+typedef struct TextureKey {
+ TextureShape state;
+ hwaddr texture_vram_offset;
+ hwaddr texture_length;
+ hwaddr palette_vram_offset;
+ hwaddr palette_length;
+ float scale;
+} TextureKey;
+
+typedef struct TextureBinding {
+ LruNode node;
+ TextureKey key;
+ VkImage image;
+ VkImageLayout current_layout;
+ VkImageView image_view;
+ VmaAllocation allocation;
+ VkSampler sampler;
+ bool possibly_dirty;
+ uint64_t hash;
+ unsigned int draw_time;
+ uint32_t submit_time;
+} TextureBinding;
+
+typedef struct QueryReport {
+ QSIMPLEQ_ENTRY(QueryReport) entry;
+ bool clear;
+ uint32_t parameter;
+ unsigned int query_count;
+} QueryReport;
+
+typedef struct PGRAPHVkDisplayState {
+ ShaderModuleInfo *display_frag;
+
+ VkDescriptorPool descriptor_pool;
+ VkDescriptorSetLayout descriptor_set_layout;
+ VkDescriptorSet descriptor_set;
+
+ VkPipelineLayout pipeline_layout;
+ VkPipeline pipeline;
+
+ VkRenderPass render_pass;
+ VkFramebuffer framebuffer;
+
+ VkImage image;
+ VkImageView image_view;
+ VkDeviceMemory memory;
+ VkSampler sampler;
+
+ int width, height;
+ int draw_time;
+
+ // OpenGL Interop
+#ifdef WIN32
+ HANDLE handle;
+#else
+ int fd;
+#endif
+ GLuint gl_memory_obj;
+ GLuint gl_texture_id;
+} PGRAPHVkDisplayState;
+
+typedef struct PGRAPHVkComputeState {
+ VkDescriptorPool descriptor_pool;
+ VkDescriptorSetLayout descriptor_set_layout;
+ VkDescriptorSet descriptor_sets[1];
+ VkPipelineLayout pipeline_layout;
+ VkPipeline pipeline_pack_d24s8;
+ VkPipeline pipeline_unpack_d24s8;
+ VkPipeline pipeline_pack_f32s8;
+ VkPipeline pipeline_unpack_f32s8;
+} PGRAPHVkComputeState;
+
+typedef struct PGRAPHVkState {
+ void *window;
+ VkInstance instance;
+
+ bool debug_utils_extension_enabled;
+ bool custom_border_color_extension_enabled;
+ bool provoking_vertex_extension_enabled;
+ bool memory_budget_extension_enabled;
+
+ VkPhysicalDevice physical_device;
+ VkPhysicalDeviceProperties device_props;
+ VkDevice device;
+ VmaAllocator allocator;
+ uint32_t allocator_last_submit_index;
+
+ VkQueue queue;
+ VkCommandPool command_pool;
+ VkCommandBuffer command_buffers[2];
+
+ VkCommandBuffer command_buffer;
+ VkSemaphore command_buffer_semaphore;
+ VkFence command_buffer_fence;
+ unsigned int command_buffer_start_time;
+ bool in_command_buffer;
+ uint32_t submit_count;
+
+ VkCommandBuffer aux_command_buffer;
+ bool in_aux_command_buffer;
+
+ VkFramebuffer framebuffers[50];
+ int framebuffer_index;
+ bool framebuffer_dirty;
+
+ VkRenderPass render_pass;
+ RenderPass *render_passes;
+ int render_passes_index;
+ int render_passes_capacity;
+ bool in_render_pass;
+ bool in_draw;
+
+ Lru pipeline_cache;
+ VkPipelineCache vk_pipeline_cache;
+ PipelineBinding *pipeline_cache_entries;
+ PipelineBinding *pipeline_binding;
+ bool pipeline_binding_changed;
+
+ VkDescriptorPool descriptor_pool;
+ VkDescriptorSetLayout descriptor_set_layout;
+ VkDescriptorSet descriptor_sets[1024];
+ int descriptor_set_index;
+
+ StorageBuffer storage_buffers[BUFFER_COUNT];
+
+ MemorySyncRequirement vertex_ram_buffer_syncs[NV2A_VERTEXSHADER_ATTRIBUTES];
+ size_t num_vertex_ram_buffer_syncs;
+ unsigned long *uploaded_bitmap;
+ size_t bitmap_size;
+
+ VkVertexInputAttributeDescription vertex_attribute_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES];
+ int vertex_attribute_to_description_location[NV2A_VERTEXSHADER_ATTRIBUTES];
+ int num_active_vertex_attribute_descriptions;
+
+ VkVertexInputBindingDescription vertex_binding_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES];
+ int num_active_vertex_binding_descriptions;
+ hwaddr vertex_attribute_offsets[NV2A_VERTEXSHADER_ATTRIBUTES];
+
+ QTAILQ_HEAD(, SurfaceBinding) surfaces;
+ QTAILQ_HEAD(, SurfaceBinding) invalid_surfaces;
+ SurfaceBinding *color_binding, *zeta_binding;
+ bool downloads_pending;
+ QemuEvent downloads_complete;
+ bool download_dirty_surfaces_pending;
+ QemuEvent dirty_surfaces_download_complete; // common
+
+ Lru texture_cache;
+ TextureBinding *texture_cache_entries;
+ TextureBinding *texture_bindings[NV2A_MAX_TEXTURES];
+ TextureBinding dummy_texture;
+ bool texture_bindings_changed;
+
+ Lru shader_cache;
+ ShaderBinding *shader_cache_entries;
+ ShaderBinding *shader_binding;
+ ShaderModuleInfo *quad_vert_module, *solid_frag_module;
+ bool shader_bindings_changed;
+
+ // FIXME: Merge these into a structure
+ uint64_t uniform_buffer_hashes[2];
+ size_t uniform_buffer_offsets[2];
+ bool uniforms_changed;
+
+ VkQueryPool query_pool;
+ int max_queries_in_flight; // FIXME: Move out to constant
+ int num_queries_in_flight;
+ bool new_query_needed;
+ bool query_in_flight;
+ uint32_t zpass_pixel_count_result;
+ QSIMPLEQ_HEAD(, QueryReport) report_queue; // FIXME: Statically allocate
+
+ SurfaceFormatInfo kelvin_surface_zeta_vk_map[3];
+
+ uint32_t clear_parameter;
+
+ PGRAPHVkDisplayState display;
+ PGRAPHVkComputeState compute;
+} PGRAPHVkState;
+
+// renderer.c
+void pgraph_vk_check_memory_budget(PGRAPHState *pg);
+
+// debug.c
+void pgraph_vk_debug_init(void);
+
+// instance.c
+void pgraph_vk_init_instance(PGRAPHState *pg);
+void pgraph_vk_finalize_instance(PGRAPHState *pg);
+QueueFamilyIndices pgraph_vk_find_queue_families(VkPhysicalDevice device);
+uint32_t pgraph_vk_get_memory_type(PGRAPHState *pg, uint32_t type_bits,
+ VkMemoryPropertyFlags properties);
+
+// glsl.c
+void pgraph_vk_init_glsl_compiler(void);
+void pgraph_vk_finalize_glsl_compiler(void);
+GByteArray *pgraph_vk_compile_glsl_to_spv(glslang_stage_t stage,
+ const char *glsl_source);
+VkShaderModule pgraph_vk_create_shader_module_from_spv(PGRAPHVkState *r,
+ GByteArray *spv);
+ShaderModuleInfo *pgraph_vk_create_shader_module_from_glsl(
+ PGRAPHVkState *r, VkShaderStageFlagBits stage, const char *glsl);
+void pgraph_vk_destroy_shader_module(PGRAPHVkState *r, ShaderModuleInfo *info);
+
+// buffer.c
+void pgraph_vk_init_buffers(NV2AState *d);
+void pgraph_vk_finalize_buffers(NV2AState *d);
+bool pgraph_vk_buffer_has_space_for(PGRAPHState *pg, int index,
+ VkDeviceSize size,
+ VkDeviceAddress alignment);
+VkDeviceSize pgraph_vk_append_to_buffer(PGRAPHState *pg, int index, void **data,
+ VkDeviceSize *sizes, size_t count,
+ VkDeviceAddress alignment);
+
+// command.c
+void pgraph_vk_init_command_buffers(PGRAPHState *pg);
+void pgraph_vk_finalize_command_buffers(PGRAPHState *pg);
+VkCommandBuffer pgraph_vk_begin_single_time_commands(PGRAPHState *pg);
+void pgraph_vk_end_single_time_commands(PGRAPHState *pg, VkCommandBuffer cmd);
+
+// image.c
+void pgraph_vk_transition_image_layout(PGRAPHState *pg, VkCommandBuffer cmd,
+ VkImage image, VkFormat format,
+ VkImageLayout oldLayout,
+ VkImageLayout newLayout);
+
+// vertex.c
+void pgraph_vk_bind_vertex_attributes(NV2AState *d, unsigned int min_element,
+ unsigned int max_element,
+ bool inline_data,
+ unsigned int inline_stride,
+ unsigned int provoking_element);
+void pgraph_vk_bind_vertex_attributes_inline(NV2AState *d);
+void pgraph_vk_update_vertex_ram_buffer(PGRAPHState *pg, hwaddr offset, void *data,
+ VkDeviceSize size);
+VkDeviceSize pgraph_vk_update_index_buffer(PGRAPHState *pg, void *data,
+ VkDeviceSize size);
+VkDeviceSize pgraph_vk_update_vertex_inline_buffer(PGRAPHState *pg, void **data,
+ VkDeviceSize *sizes,
+ size_t count);
+
+// surface.c
+void pgraph_vk_init_surfaces(PGRAPHState *pg);
+void pgraph_vk_finalize_surfaces(PGRAPHState *pg);
+void pgraph_vk_surface_flush(NV2AState *d);
+void pgraph_vk_process_pending_downloads(NV2AState *d);
+void pgraph_vk_surface_download_if_dirty(NV2AState *d, SurfaceBinding *surface);
+SurfaceBinding *pgraph_vk_surface_get_within(NV2AState *d, hwaddr addr);
+void pgraph_vk_wait_for_surface_download(SurfaceBinding *e);
+void pgraph_vk_download_dirty_surfaces(NV2AState *d);
+void pgraph_vk_upload_surface_data(NV2AState *d, SurfaceBinding *surface,
+ bool force);
+void pgraph_vk_surface_update(NV2AState *d, bool upload, bool color_write,
+ bool zeta_write);
+SurfaceBinding *pgraph_vk_surface_get(NV2AState *d, hwaddr addr);
+void pgraph_vk_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta);
+void pgraph_vk_set_surface_scale_factor(NV2AState *d, unsigned int scale);
+unsigned int pgraph_vk_get_surface_scale_factor(NV2AState *d);
+void pgraph_vk_reload_surface_scale_factor(PGRAPHState *pg);
+
+// surface-compute.c
+void pgraph_vk_init_compute(PGRAPHState *pg);
+void pgraph_vk_finalize_compute(PGRAPHState *pg);
+void pgraph_vk_pack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface,
+ VkCommandBuffer cmd, VkBuffer src,
+ VkBuffer dst, bool downscale);
+void pgraph_vk_unpack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface,
+ VkCommandBuffer cmd, VkBuffer src,
+ VkBuffer dst);
+
+// display.c
+void pgraph_vk_init_display(PGRAPHState *pg);
+void pgraph_vk_finalize_display(PGRAPHState *pg);
+void pgraph_vk_render_display(PGRAPHState *pg);
+
+// texture.c
+void pgraph_vk_init_textures(PGRAPHState *pg);
+void pgraph_vk_finalize_textures(PGRAPHState *pg);
+void pgraph_vk_bind_textures(NV2AState *d);
+void pgraph_vk_mark_textures_possibly_dirty(NV2AState *d, hwaddr addr,
+ hwaddr size);
+void pgraph_vk_trim_texture_cache(PGRAPHState *pg);
+
+// shaders.c
+void pgraph_vk_init_shaders(PGRAPHState *pg);
+void pgraph_vk_finalize_shaders(PGRAPHState *pg);
+void pgraph_vk_update_descriptor_sets(PGRAPHState *pg);
+void pgraph_vk_bind_shaders(PGRAPHState *pg);
+void pgraph_vk_update_shader_uniforms(PGRAPHState *pg);
+
+// reports.c
+void pgraph_vk_init_reports(PGRAPHState *pg);
+void pgraph_vk_finalize_reports(PGRAPHState *pg);
+void pgraph_vk_clear_report_value(NV2AState *d);
+void pgraph_vk_get_report(NV2AState *d, uint32_t parameter);
+void pgraph_vk_process_pending_reports(NV2AState *d);
+void pgraph_vk_process_pending_reports_internal(NV2AState *d);
+
+typedef enum FinishReason {
+ VK_FINISH_REASON_VERTEX_BUFFER_DIRTY,
+ VK_FINISH_REASON_SURFACE_CREATE,
+ VK_FINISH_REASON_SURFACE_DOWN,
+ VK_FINISH_REASON_NEED_BUFFER_SPACE,
+ VK_FINISH_REASON_FRAMEBUFFER_DIRTY,
+ VK_FINISH_REASON_PRESENTING,
+ VK_FINISH_REASON_FLIP_STALL,
+ VK_FINISH_REASON_FLUSH,
+} FinishReason;
+
+// draw.c
+void pgraph_vk_init_pipelines(PGRAPHState *pg);
+void pgraph_vk_finalize_pipelines(PGRAPHState *pg);
+void pgraph_vk_clear_surface(NV2AState *d, uint32_t parameter);
+void pgraph_vk_draw_begin(NV2AState *d);
+void pgraph_vk_draw_end(NV2AState *d);
+void pgraph_vk_finish(PGRAPHState *pg, FinishReason why);
+void pgraph_vk_flush_draw(NV2AState *d);
+void pgraph_vk_begin_command_buffer(PGRAPHState *pg);
+void pgraph_vk_ensure_command_buffer(PGRAPHState *pg);
+void pgraph_vk_ensure_not_in_render_pass(PGRAPHState *pg);
+
+VkCommandBuffer pgraph_vk_begin_nondraw_commands(PGRAPHState *pg);
+void pgraph_vk_end_nondraw_commands(PGRAPHState *pg, VkCommandBuffer cmd);
+
+// blit.c
+void pgraph_vk_image_blit(NV2AState *d);
+
+#endif
diff --git a/hw/xbox/nv2a/pgraph/vk/reports.c b/hw/xbox/nv2a/pgraph/vk/reports.c
new file mode 100644
index 0000000000..2e6bdf96f3
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/reports.c
@@ -0,0 +1,134 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "renderer.h"
+
+void pgraph_vk_init_reports(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ QSIMPLEQ_INIT(&r->report_queue);
+ r->num_queries_in_flight = 0;
+ r->max_queries_in_flight = 1024;
+ r->new_query_needed = true;
+ r->query_in_flight = false;
+ r->zpass_pixel_count_result = 0;
+
+ VkQueryPoolCreateInfo pool_create_info = (VkQueryPoolCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
+ .queryType = VK_QUERY_TYPE_OCCLUSION,
+ .queryCount = r->max_queries_in_flight,
+ };
+ VK_CHECK(
+ vkCreateQueryPool(r->device, &pool_create_info, NULL, &r->query_pool));
+}
+
+void pgraph_vk_finalize_reports(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ vkDestroyQueryPool(r->device, r->query_pool, NULL);
+}
+
+void pgraph_vk_clear_report_value(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ QueryReport *q = g_malloc(sizeof(QueryReport)); // FIXME: Pre-allocate
+ q->clear = true;
+ QSIMPLEQ_INSERT_TAIL(&r->report_queue, q, entry);
+}
+
+void pgraph_vk_get_report(NV2AState *d, uint32_t parameter)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ uint8_t type = GET_MASK(parameter, NV097_GET_REPORT_TYPE);
+ assert(type == NV097_GET_REPORT_TYPE_ZPASS_PIXEL_CNT);
+
+ QueryReport *q = g_malloc(sizeof(QueryReport)); // FIXME: Pre-allocate
+ q->clear = false;
+ q->parameter = parameter;
+ q->query_count = r->num_queries_in_flight;
+ QSIMPLEQ_INSERT_TAIL(&r->report_queue, q, entry);
+
+ r->new_query_needed = true;
+}
+
+void pgraph_vk_process_pending_reports_internal(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ NV2A_VK_DGROUP_BEGIN("Processing queries");
+
+ assert(!r->in_command_buffer);
+
+ // Fetch all query results
+ g_autofree uint64_t *query_results = NULL;
+
+ if (r->num_queries_in_flight > 0) {
+ size_t size_of_results = r->num_queries_in_flight * sizeof(uint64_t);
+ query_results = g_malloc_n(r->num_queries_in_flight,
+ sizeof(uint64_t)); // FIXME: Pre-allocate
+ VkResult result;
+ do {
+ result = vkGetQueryPoolResults(
+ r->device, r->query_pool, 0, r->num_queries_in_flight,
+ size_of_results, query_results, sizeof(uint64_t),
+ VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
+ } while (result == VK_NOT_READY);
+ }
+
+ // Write out queries
+ QueryReport *q, *next;
+ int num_results_counted = 0;
+
+ int result_divisor = pg->surface_scale_factor * pg->surface_scale_factor;
+
+ QSIMPLEQ_FOREACH_SAFE (q, &r->report_queue, entry, next) {
+ if (q->clear) {
+ NV2A_VK_DPRINTF("Cleared");
+ r->zpass_pixel_count_result = 0;
+ } else {
+ assert(q->query_count >= num_results_counted);
+ assert(q->query_count <= r->num_queries_in_flight);
+
+ while (num_results_counted < q->query_count) {
+ r->zpass_pixel_count_result +=
+ query_results[num_results_counted++];
+ }
+
+ pgraph_write_zpass_pixel_cnt_report(
+ d, q->parameter,
+ r->zpass_pixel_count_result / result_divisor);
+ }
+ QSIMPLEQ_REMOVE_HEAD(&r->report_queue, entry);
+ g_free(q);
+ }
+
+ r->num_queries_in_flight = 0;
+ NV2A_VK_DGROUP_END();
+}
+
+void pgraph_vk_process_pending_reports(NV2AState *d)
+{
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/shaders.c b/hw/xbox/nv2a/pgraph/vk/shaders.c
new file mode 100644
index 0000000000..7d5000d751
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/shaders.c
@@ -0,0 +1,797 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * Based on GL implementation:
+ *
+ * Copyright (c) 2015 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "hw/xbox/nv2a/pgraph/shaders.h"
+#include "hw/xbox/nv2a/pgraph/util.h"
+#include "hw/xbox/nv2a/pgraph/glsl/geom.h"
+#include "hw/xbox/nv2a/pgraph/glsl/vsh.h"
+#include "hw/xbox/nv2a/pgraph/glsl/psh.h"
+#include "qemu/fast-hash.h"
+#include "qemu/mstring.h"
+#include "renderer.h"
+#include
+
+static void create_descriptor_pool(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ size_t num_sets = ARRAY_SIZE(r->descriptor_sets);
+
+ VkDescriptorPoolSize pool_sizes[] = {
+ {
+ .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .descriptorCount = 2 * num_sets,
+ },
+ {
+ .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .descriptorCount = NV2A_MAX_TEXTURES * num_sets,
+ }
+ };
+
+ VkDescriptorPoolCreateInfo pool_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+ .poolSizeCount = ARRAY_SIZE(pool_sizes),
+ .pPoolSizes = pool_sizes,
+ .maxSets = ARRAY_SIZE(r->descriptor_sets),
+ .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
+ };
+ VK_CHECK(vkCreateDescriptorPool(r->device, &pool_info, NULL,
+ &r->descriptor_pool));
+}
+
+static void destroy_descriptor_pool(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ vkDestroyDescriptorPool(r->device, r->descriptor_pool, NULL);
+ r->descriptor_pool = VK_NULL_HANDLE;
+}
+
+static void create_descriptor_set_layout(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ VkDescriptorSetLayoutBinding bindings[2 + NV2A_MAX_TEXTURES];
+
+ bindings[0] = (VkDescriptorSetLayoutBinding){
+ .binding = VSH_UBO_BINDING,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
+ };
+ bindings[1] = (VkDescriptorSetLayoutBinding){
+ .binding = PSH_UBO_BINDING,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+ };
+ for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
+ bindings[2 + i] = (VkDescriptorSetLayoutBinding){
+ .binding = PSH_TEX_BINDING + i,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+ };
+ }
+ VkDescriptorSetLayoutCreateInfo layout_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .bindingCount = ARRAY_SIZE(bindings),
+ .pBindings = bindings,
+ };
+ VK_CHECK(vkCreateDescriptorSetLayout(r->device, &layout_info, NULL,
+ &r->descriptor_set_layout));
+}
+
+static void destroy_descriptor_set_layout(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ vkDestroyDescriptorSetLayout(r->device, r->descriptor_set_layout, NULL);
+ r->descriptor_set_layout = VK_NULL_HANDLE;
+}
+
+static void create_descriptor_sets(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ VkDescriptorSetLayout layouts[ARRAY_SIZE(r->descriptor_sets)];
+ for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
+ layouts[i] = r->descriptor_set_layout;
+ }
+
+ VkDescriptorSetAllocateInfo alloc_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ .descriptorPool = r->descriptor_pool,
+ .descriptorSetCount = ARRAY_SIZE(r->descriptor_sets),
+ .pSetLayouts = layouts,
+ };
+ VK_CHECK(
+ vkAllocateDescriptorSets(r->device, &alloc_info, r->descriptor_sets));
+}
+
+static void destroy_descriptor_sets(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ vkFreeDescriptorSets(r->device, r->descriptor_pool,
+ ARRAY_SIZE(r->descriptor_sets), r->descriptor_sets);
+ for (int i = 0; i < ARRAY_SIZE(r->descriptor_sets); i++) {
+ r->descriptor_sets[i] = VK_NULL_HANDLE;
+ }
+}
+
+void pgraph_vk_update_descriptor_sets(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ bool need_uniform_write =
+ r->uniforms_changed ||
+ !r->storage_buffers[BUFFER_UNIFORM_STAGING].buffer_offset;
+
+ if (!(r->shader_bindings_changed || r->texture_bindings_changed ||
+ (r->descriptor_set_index == 0) || need_uniform_write)) {
+ return; // Nothing changed
+ }
+
+ ShaderBinding *binding = r->shader_binding;
+ ShaderUniformLayout *layouts[] = { &binding->vertex->uniforms,
+ &binding->fragment->uniforms };
+ VkDeviceSize ubo_buffer_total_size = 0;
+ for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
+ ubo_buffer_total_size += layouts[i]->total_size;
+ }
+ bool need_ubo_staging_buffer_reset =
+ r->uniforms_changed &&
+ !pgraph_vk_buffer_has_space_for(pg, BUFFER_UNIFORM_STAGING,
+ ubo_buffer_total_size,
+ r->device_props.limits.minUniformBufferOffsetAlignment);
+
+ bool need_descriptor_write_reset =
+ (r->descriptor_set_index >= ARRAY_SIZE(r->descriptor_sets));
+
+ if (need_descriptor_write_reset || need_ubo_staging_buffer_reset) {
+ pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE);
+ need_uniform_write = true;
+ }
+
+ VkWriteDescriptorSet descriptor_writes[2 + NV2A_MAX_TEXTURES];
+
+ assert(r->descriptor_set_index < ARRAY_SIZE(r->descriptor_sets));
+
+ if (need_uniform_write) {
+ for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
+ void *data = layouts[i]->allocation;
+ VkDeviceSize size = layouts[i]->total_size;
+ r->uniform_buffer_offsets[i] = pgraph_vk_append_to_buffer(
+ pg, BUFFER_UNIFORM_STAGING, &data, &size, 1,
+ r->device_props.limits.minUniformBufferOffsetAlignment);
+ }
+
+ r->uniforms_changed = false;
+ }
+
+ VkDescriptorBufferInfo ubo_buffer_infos[2];
+ for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
+ ubo_buffer_infos[i] = (VkDescriptorBufferInfo){
+ .buffer = r->storage_buffers[BUFFER_UNIFORM].buffer,
+ .offset = r->uniform_buffer_offsets[i],
+ .range = layouts[i]->total_size,
+ };
+ descriptor_writes[i] = (VkWriteDescriptorSet){
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = r->descriptor_sets[r->descriptor_set_index],
+ .dstBinding = i == 0 ? VSH_UBO_BINDING : PSH_UBO_BINDING,
+ .dstArrayElement = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+ .descriptorCount = 1,
+ .pBufferInfo = &ubo_buffer_infos[i],
+ };
+ }
+
+ VkDescriptorImageInfo image_infos[NV2A_MAX_TEXTURES];
+ for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
+ image_infos[i] = (VkDescriptorImageInfo){
+ .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ .imageView = r->texture_bindings[i]->image_view,
+ .sampler = r->texture_bindings[i]->sampler,
+ };
+ descriptor_writes[2 + i] = (VkWriteDescriptorSet){
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = r->descriptor_sets[r->descriptor_set_index],
+ .dstBinding = PSH_TEX_BINDING + i,
+ .dstArrayElement = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .descriptorCount = 1,
+ .pImageInfo = &image_infos[i],
+ };
+ }
+
+ vkUpdateDescriptorSets(r->device, 6, descriptor_writes, 0, NULL);
+
+ r->descriptor_set_index++;
+}
+
+static void update_shader_constant_locations(ShaderBinding *binding)
+{
+ int i, j;
+ char tmp[64];
+
+ /* lookup fragment shader uniforms */
+ for (i = 0; i < 9; i++) {
+ for (j = 0; j < 2; j++) {
+ snprintf(tmp, sizeof(tmp), "c%d_%d", j, i);
+ binding->psh_constant_loc[i][j] =
+ uniform_index(&binding->fragment->uniforms, tmp);
+ }
+ }
+ binding->alpha_ref_loc =
+ uniform_index(&binding->fragment->uniforms, "alphaRef");
+ binding->fog_color_loc =
+ uniform_index(&binding->fragment->uniforms, "fogColor");
+ for (i = 1; i < NV2A_MAX_TEXTURES; i++) {
+ snprintf(tmp, sizeof(tmp), "bumpMat%d", i);
+ binding->bump_mat_loc[i] =
+ uniform_index(&binding->fragment->uniforms, tmp);
+ snprintf(tmp, sizeof(tmp), "bumpScale%d", i);
+ binding->bump_scale_loc[i] =
+ uniform_index(&binding->fragment->uniforms, tmp);
+ snprintf(tmp, sizeof(tmp), "bumpOffset%d", i);
+ binding->bump_offset_loc[i] =
+ uniform_index(&binding->fragment->uniforms, tmp);
+ }
+
+ for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
+ snprintf(tmp, sizeof(tmp), "texScale%d", i);
+ binding->tex_scale_loc[i] =
+ uniform_index(&binding->fragment->uniforms, tmp);
+ }
+
+ /* lookup vertex shader uniforms */
+ binding->vsh_constant_loc = uniform_index(&binding->vertex->uniforms, "c");
+ binding->surface_size_loc =
+ uniform_index(&binding->vertex->uniforms, "surfaceSize");
+ binding->clip_range_loc =
+ uniform_index(&binding->vertex->uniforms, "clipRange");
+ binding->fog_param_loc =
+ uniform_index(&binding->vertex->uniforms, "fogParam");
+
+ binding->inv_viewport_loc =
+ uniform_index(&binding->vertex->uniforms, "invViewport");
+ binding->ltctxa_loc = uniform_index(&binding->vertex->uniforms, "ltctxa");
+ binding->ltctxb_loc = uniform_index(&binding->vertex->uniforms, "ltctxb");
+ binding->ltc1_loc = uniform_index(&binding->vertex->uniforms, "ltc1");
+
+ for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
+ snprintf(tmp, sizeof(tmp), "lightInfiniteHalfVector%d", i);
+ binding->light_infinite_half_vector_loc[i] =
+ uniform_index(&binding->vertex->uniforms, tmp);
+ snprintf(tmp, sizeof(tmp), "lightInfiniteDirection%d", i);
+ binding->light_infinite_direction_loc[i] =
+ uniform_index(&binding->vertex->uniforms, tmp);
+
+ snprintf(tmp, sizeof(tmp), "lightLocalPosition%d", i);
+ binding->light_local_position_loc[i] =
+ uniform_index(&binding->vertex->uniforms, tmp);
+ snprintf(tmp, sizeof(tmp), "lightLocalAttenuation%d", i);
+ binding->light_local_attenuation_loc[i] =
+ uniform_index(&binding->vertex->uniforms, tmp);
+ }
+
+ binding->clip_region_loc =
+ uniform_index(&binding->fragment->uniforms, "clipRegion");
+
+ binding->material_alpha_loc =
+ uniform_index(&binding->vertex->uniforms, "material_alpha");
+}
+
+static void shader_cache_entry_init(Lru *lru, LruNode *node, void *state)
+{
+ ShaderBinding *snode = container_of(node, ShaderBinding, node);
+ memcpy(&snode->state, state, sizeof(ShaderState));
+}
+
+static void shader_cache_entry_post_evict(Lru *lru, LruNode *node)
+{
+ PGRAPHVkState *r = container_of(lru, PGRAPHVkState, shader_cache);
+ ShaderBinding *snode = container_of(node, ShaderBinding, node);
+
+ ShaderModuleInfo *modules[] = {
+ snode->geometry,
+ snode->vertex,
+ snode->fragment,
+ };
+ for (int i = 0; i < ARRAY_SIZE(modules); i++) {
+ if (modules[i]) {
+ pgraph_vk_destroy_shader_module(r, modules[i]);
+ }
+ }
+
+ memset(&snode->state, 0, sizeof(ShaderState));
+}
+
+static bool shader_cache_entry_compare(Lru *lru, LruNode *node, void *key)
+{
+ ShaderBinding *snode = container_of(node, ShaderBinding, node);
+ return memcmp(&snode->state, key, sizeof(ShaderState));
+}
+
+static void shader_cache_init(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ const size_t shader_cache_size = 1024;
+ lru_init(&r->shader_cache);
+ r->shader_cache_entries = g_malloc_n(shader_cache_size, sizeof(ShaderBinding));
+ assert(r->shader_cache_entries != NULL);
+ for (int i = 0; i < shader_cache_size; i++) {
+ lru_add_free(&r->shader_cache, &r->shader_cache_entries[i].node);
+ }
+ r->shader_cache.init_node = shader_cache_entry_init;
+ r->shader_cache.compare_nodes = shader_cache_entry_compare;
+ r->shader_cache.post_node_evict = shader_cache_entry_post_evict;
+}
+
+static void shader_cache_finalize(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ lru_flush(&r->shader_cache);
+ g_free(r->shader_cache_entries);
+ r->shader_cache_entries = NULL;
+}
+
+static ShaderBinding *gen_shaders(PGRAPHState *pg, ShaderState *state)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ uint64_t hash = fast_hash((void *)state, sizeof(*state));
+ LruNode *node = lru_lookup(&r->shader_cache, hash, state);
+ ShaderBinding *snode = container_of(node, ShaderBinding, node);
+
+ NV2A_VK_DPRINTF("shader state hash: %016lx, %p", hash, snode);
+
+ if (!snode->fragment) {
+ NV2A_VK_DPRINTF("cache miss");
+ nv2a_profile_inc_counter(NV2A_PROF_SHADER_GEN);
+
+ char *previous_numeric_locale = setlocale(LC_NUMERIC, NULL);
+ if (previous_numeric_locale) {
+ previous_numeric_locale = g_strdup(previous_numeric_locale);
+ }
+
+ /* Ensure numeric values are printed with '.' radix, no grouping */
+ setlocale(LC_NUMERIC, "C");
+
+ MString *geometry_shader_code = pgraph_gen_geom_glsl(
+ state->polygon_front_mode, state->polygon_back_mode,
+ state->primitive_mode, state->smooth_shading, true);
+ if (geometry_shader_code) {
+ NV2A_VK_DPRINTF("geometry shader: \n%s",
+ mstring_get_str(geometry_shader_code));
+ snode->geometry = pgraph_vk_create_shader_module_from_glsl(
+ r, VK_SHADER_STAGE_GEOMETRY_BIT,
+ mstring_get_str(geometry_shader_code));
+ mstring_unref(geometry_shader_code);
+ } else {
+ memset(&snode->geometry, 0, sizeof(snode->geometry));
+ }
+
+ MString *vertex_shader_code =
+ pgraph_gen_vsh_glsl(state, geometry_shader_code != NULL);
+ NV2A_VK_DPRINTF("vertex shader: \n%s",
+ mstring_get_str(vertex_shader_code));
+ snode->vertex = pgraph_vk_create_shader_module_from_glsl(
+ r, VK_SHADER_STAGE_VERTEX_BIT,
+ mstring_get_str(vertex_shader_code));
+ mstring_unref(vertex_shader_code);
+
+ MString *fragment_shader_code = pgraph_gen_psh_glsl(state->psh);
+ NV2A_VK_DPRINTF("fragment shader: \n%s",
+ mstring_get_str(fragment_shader_code));
+ snode->fragment = pgraph_vk_create_shader_module_from_glsl(
+ r, VK_SHADER_STAGE_FRAGMENT_BIT,
+ mstring_get_str(fragment_shader_code));
+ mstring_unref(fragment_shader_code);
+
+ if (previous_numeric_locale) {
+ setlocale(LC_NUMERIC, previous_numeric_locale);
+ g_free(previous_numeric_locale);
+ }
+
+ update_shader_constant_locations(snode);
+ }
+
+ return snode;
+}
+
+// FIXME: Move to common
+static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding,
+ bool binding_changed, bool vertex_program,
+ bool fixed_function)
+{
+ int i, j;
+
+ /* update combiner constants */
+ for (i = 0; i < 9; i++) {
+ uint32_t constant[2];
+ if (i == 8) {
+ /* final combiner */
+ constant[0] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR0);
+ constant[1] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR1);
+ } else {
+ constant[0] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR0 + i * 4);
+ constant[1] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR1 + i * 4);
+ }
+
+ for (j = 0; j < 2; j++) {
+ GLint loc = binding->psh_constant_loc[i][j];
+ if (loc != -1) {
+ float value[4];
+ pgraph_argb_pack32_to_rgba_float(constant[j], value);
+ uniform1fv(&binding->fragment->uniforms, loc, 4, value);
+ }
+ }
+ }
+ if (binding->alpha_ref_loc != -1) {
+ float alpha_ref = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0),
+ NV_PGRAPH_CONTROL_0_ALPHAREF) /
+ 255.0;
+ uniform1f(&binding->fragment->uniforms, binding->alpha_ref_loc,
+ alpha_ref);
+ }
+
+
+ /* For each texture stage */
+ for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
+ int loc;
+
+ /* Bump luminance only during stages 1 - 3 */
+ if (i > 0) {
+ loc = binding->bump_mat_loc[i];
+ if (loc != -1) {
+ uint32_t m_u32[4];
+ m_u32[0] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT00 + 4 * (i - 1));
+ m_u32[1] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT01 + 4 * (i - 1));
+ m_u32[2] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT10 + 4 * (i - 1));
+ m_u32[3] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT11 + 4 * (i - 1));
+ float m[4];
+ m[0] = *(float*)&m_u32[0];
+ m[1] = *(float*)&m_u32[1];
+ m[2] = *(float*)&m_u32[2];
+ m[3] = *(float*)&m_u32[3];
+ uniformMatrix2fv(&binding->fragment->uniforms, loc, m);
+ }
+ loc = binding->bump_scale_loc[i];
+ if (loc != -1) {
+ uint32_t v =
+ pgraph_reg_r(pg, NV_PGRAPH_BUMPSCALE1 + (i - 1) * 4);
+ uniform1f(&binding->fragment->uniforms, loc,
+ *(float *)&v);
+ }
+ loc = binding->bump_offset_loc[i];
+ if (loc != -1) {
+ uint32_t v =
+ pgraph_reg_r(pg, NV_PGRAPH_BUMPOFFSET1 + (i - 1) * 4);
+ uniform1f(&binding->fragment->uniforms, loc,
+ *(float *)&v);
+ }
+ }
+
+ loc = binding->tex_scale_loc[i];
+ if (loc != -1) {
+ assert(pg->vk_renderer_state->texture_bindings[i] != NULL);
+ float scale = pg->vk_renderer_state->texture_bindings[i]->key.scale;
+ BasicColorFormatInfo f_basic = kelvin_color_format_info_map[pg->vk_renderer_state->texture_bindings[i]->key.state.color_format];
+ if (!f_basic.linear) {
+ scale = 1.0;
+ }
+ uniform1f(&binding->fragment->uniforms, loc, scale);
+ }
+ }
+
+ if (binding->fog_color_loc != -1) {
+ uint32_t fog_color = pgraph_reg_r(pg, NV_PGRAPH_FOGCOLOR);
+ uniform4f(&binding->fragment->uniforms, binding->fog_color_loc,
+ GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_RED) / 255.0,
+ GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_GREEN) / 255.0,
+ GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_BLUE) / 255.0,
+ GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_ALPHA) / 255.0);
+ }
+ if (binding->fog_param_loc != -1) {
+ uint32_t v[2];
+ v[0] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM0);
+ v[1] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM1);
+ uniform2f(&binding->vertex->uniforms,
+ binding->fog_param_loc, *(float *)&v[0],
+ *(float *)&v[1]);
+ }
+
+ float zmax;
+ switch (pg->surface_shape.zeta_format) {
+ case NV097_SET_SURFACE_FORMAT_ZETA_Z16:
+ zmax = pg->surface_shape.z_format ? f16_max : (float)0xFFFF;
+ break;
+ case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8:
+ zmax = pg->surface_shape.z_format ? f24_max : (float)0xFFFFFF;
+ break;
+ default:
+ assert(0);
+ }
+
+ if (fixed_function) {
+ /* update lighting constants */
+ struct {
+ uint32_t *v;
+ int locs;
+ size_t len;
+ } lighting_arrays[] = {
+ { &pg->ltctxa[0][0], binding->ltctxa_loc, NV2A_LTCTXA_COUNT },
+ { &pg->ltctxb[0][0], binding->ltctxb_loc, NV2A_LTCTXB_COUNT },
+ { &pg->ltc1[0][0], binding->ltc1_loc, NV2A_LTC1_COUNT },
+ };
+
+ for (i = 0; i < ARRAY_SIZE(lighting_arrays); i++) {
+ uniform1iv(
+ &binding->vertex->uniforms, lighting_arrays[i].locs,
+ lighting_arrays[i].len * 4, (void *)lighting_arrays[i].v);
+ }
+
+ for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
+ int loc = binding->light_infinite_half_vector_loc[i];
+ if (loc != -1) {
+ uniform1fv(&binding->vertex->uniforms, loc, 3,
+ pg->light_infinite_half_vector[i]);
+ }
+ loc = binding->light_infinite_direction_loc[i];
+ if (loc != -1) {
+ uniform1fv(&binding->vertex->uniforms, loc, 3,
+ pg->light_infinite_direction[i]);
+ }
+
+ loc = binding->light_local_position_loc[i];
+ if (loc != -1) {
+ uniform1fv(&binding->vertex->uniforms, loc, 3,
+ pg->light_local_position[i]);
+ }
+ loc = binding->light_local_attenuation_loc[i];
+ if (loc != -1) {
+ uniform1fv(&binding->vertex->uniforms, loc, 3,
+ pg->light_local_attenuation[i]);
+ }
+ }
+
+ /* estimate the viewport by assuming it matches the surface ... */
+ unsigned int aa_width = 1, aa_height = 1;
+ pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height);
+
+ float m11 = 0.5 * (pg->surface_binding_dim.width / aa_width);
+ float m22 = -0.5 * (pg->surface_binding_dim.height / aa_height);
+ float m33 = zmax;
+ float m41 = *(float *)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][0];
+ float m42 = *(float *)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][1];
+
+ float invViewport[16] = {
+ 1.0 / m11, 0, 0, 0, 0, 1.0 / m22, 0,
+ 0, 0, 0, 1.0 / m33, 0, -1.0 + m41 / m11, 1.0 + m42 / m22,
+ 0, 1.0
+ };
+
+ if (binding->inv_viewport_loc != -1) {
+ uniformMatrix4fv(&binding->vertex->uniforms,
+ binding->inv_viewport_loc, &invViewport[0]);
+ }
+ }
+
+ /* update vertex program constants */
+ uniform1iv(&binding->vertex->uniforms, binding->vsh_constant_loc,
+ NV2A_VERTEXSHADER_CONSTANTS * 4, (void *)pg->vsh_constants);
+
+ if (binding->surface_size_loc != -1) {
+ unsigned int aa_width = 1, aa_height = 1;
+ pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height);
+ uniform2f(&binding->vertex->uniforms, binding->surface_size_loc,
+ pg->surface_binding_dim.width / aa_width,
+ pg->surface_binding_dim.height / aa_height);
+ }
+
+ if (binding->clip_range_loc != -1) {
+ uint32_t v[2];
+ v[0] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMIN);
+ v[1] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMAX);
+ float zclip_min = *(float *)&v[0] / zmax * 2.0 - 1.0;
+ float zclip_max = *(float *)&v[1] / zmax * 2.0 - 1.0;
+ uniform4f(&binding->vertex->uniforms, binding->clip_range_loc, 0,
+ zmax, zclip_min, zclip_max);
+ }
+
+ /* Clipping regions */
+ unsigned int max_gl_width = pg->surface_binding_dim.width;
+ unsigned int max_gl_height = pg->surface_binding_dim.height;
+ pgraph_apply_scaling_factor(pg, &max_gl_width, &max_gl_height);
+
+ uint32_t clip_regions[8][4];
+
+ for (i = 0; i < 8; i++) {
+ uint32_t x = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPX0 + i * 4);
+ unsigned int x_min = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMIN);
+ unsigned int x_max = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMAX) + 1;
+ uint32_t y = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPY0 + i * 4);
+ unsigned int y_min = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMIN);
+ unsigned int y_max = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMAX) + 1;
+ pgraph_apply_anti_aliasing_factor(pg, &x_min, &y_min);
+ pgraph_apply_anti_aliasing_factor(pg, &x_max, &y_max);
+
+ pgraph_apply_scaling_factor(pg, &x_min, &y_min);
+ pgraph_apply_scaling_factor(pg, &x_max, &y_max);
+
+ clip_regions[i][0] = x_min;
+ clip_regions[i][1] = y_min;
+ clip_regions[i][2] = x_max;
+ clip_regions[i][3] = y_max;
+ }
+ uniform1iv(&binding->fragment->uniforms, binding->clip_region_loc,
+ 8 * 4, (void *)clip_regions);
+
+ if (binding->material_alpha_loc != -1) {
+ uniform1f(&binding->vertex->uniforms, binding->material_alpha_loc,
+ pg->material_alpha);
+ }
+}
+
+// Quickly check PGRAPH state to see if any registers have changed that
+// necessitate a full shader state inspection.
+static bool check_shaders_dirty(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ if (!r->shader_binding) {
+ return true;
+ }
+ if (pg->program_data_dirty) {
+ return true;
+ }
+
+ int num_stages = pgraph_reg_r(pg, NV_PGRAPH_COMBINECTL) & 0xFF;
+ for (int i = 0; i < num_stages; i++) {
+ if (pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINEALPHAI0 + i * 4) ||
+ pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINEALPHAO0 + i * 4) ||
+ pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINECOLORI0 + i * 4) ||
+ pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINECOLORO0 + i * 4)) {
+ return true;
+ }
+ }
+ unsigned int regs[] = {
+ NV_PGRAPH_COMBINECTL,
+ NV_PGRAPH_COMBINESPECFOG0,
+ NV_PGRAPH_COMBINESPECFOG1,
+ NV_PGRAPH_CSV0_C,
+ NV_PGRAPH_CSV0_D,
+ NV_PGRAPH_CSV1_A,
+ NV_PGRAPH_CSV1_B,
+ NV_PGRAPH_POINTSIZE,
+ NV_PGRAPH_SHADERCLIPMODE,
+ NV_PGRAPH_SHADERCTL,
+ NV_PGRAPH_SHADERPROG,
+ NV_PGRAPH_SHADOWCTL,
+ };
+ for (int i = 0; i < ARRAY_SIZE(regs); i++) {
+ if (pgraph_is_reg_dirty(pg, regs[i])) {
+ return true;
+ }
+ }
+
+ ShaderState *state = &r->shader_binding->state;
+ if (pg->uniform_attrs != state->uniform_attrs ||
+ pg->swizzle_attrs != state->swizzle_attrs ||
+ pg->compressed_attrs != state->compressed_attrs ||
+ pg->primitive_mode != state->primitive_mode ||
+ pg->surface_scale_factor != state->surface_scale_factor) {
+ return true;
+ }
+
+ // Textures
+ for (int i = 0; i < 4; i++) {
+ if (pg->texture_matrix_enable[i] != pg->vk_renderer_state->shader_binding->state.texture_matrix_enable[i] ||
+ pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXCTL0_0 + i * 4) ||
+ pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXFILTER0 + i * 4) ||
+ pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXFMT0 + i * 4)) {
+ return true;
+ }
+ }
+
+ nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND_NOTDIRTY);
+
+ return false;
+}
+
+void pgraph_vk_bind_shaders(PGRAPHState *pg)
+{
+ NV2A_VK_DGROUP_BEGIN("%s", __func__);
+
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ r->shader_bindings_changed = false;
+
+ if (check_shaders_dirty(pg)) {
+ ShaderState new_state;
+ memset(&new_state, 0, sizeof(ShaderState));
+ new_state = pgraph_get_shader_state(pg);
+ if (!r->shader_binding || memcmp(&r->shader_binding->state, &new_state, sizeof(ShaderState))) {
+ r->shader_binding = gen_shaders(pg, &new_state);
+ r->shader_bindings_changed = true;
+ }
+ }
+
+ // FIXME: Use dirty bits
+ pgraph_vk_update_shader_uniforms(pg);
+
+ NV2A_VK_DGROUP_END();
+}
+
+void pgraph_vk_update_shader_uniforms(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+ NV2A_VK_DGROUP_BEGIN("%s", __func__);
+ nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND);
+
+ assert(r->shader_binding);
+ ShaderBinding *binding = r->shader_binding;
+ ShaderUniformLayout *layouts[] = { &binding->vertex->uniforms,
+ &binding->fragment->uniforms };
+ shader_update_constants(pg, r->shader_binding, true,
+ r->shader_binding->state.vertex_program,
+ r->shader_binding->state.fixed_function);
+
+ for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
+ uint64_t hash = fast_hash(layouts[i]->allocation, layouts[i]->total_size);
+ r->uniforms_changed |= (hash != r->uniform_buffer_hashes[i]);
+ r->uniform_buffer_hashes[i] = hash;
+ }
+
+ nv2a_profile_inc_counter(r->uniforms_changed ?
+ NV2A_PROF_SHADER_UBO_DIRTY :
+ NV2A_PROF_SHADER_UBO_NOTDIRTY);
+
+ NV2A_VK_DGROUP_END();
+}
+
+void pgraph_vk_init_shaders(PGRAPHState *pg)
+{
+ pgraph_vk_init_glsl_compiler();
+ create_descriptor_pool(pg);
+ create_descriptor_set_layout(pg);
+ create_descriptor_sets(pg);
+ shader_cache_init(pg);
+}
+
+void pgraph_vk_finalize_shaders(PGRAPHState *pg)
+{
+ shader_cache_finalize(pg);
+ destroy_descriptor_sets(pg);
+ destroy_descriptor_set_layout(pg);
+ destroy_descriptor_pool(pg);
+ pgraph_vk_finalize_glsl_compiler();
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/surface-compute.c b/hw/xbox/nv2a/pgraph/vk/surface-compute.c
new file mode 100644
index 0000000000..045f8231b8
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/surface-compute.c
@@ -0,0 +1,473 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "hw/xbox/nv2a/pgraph/pgraph.h"
+#include "renderer.h"
+#include
+
+// TODO: Swizzle/Unswizzle
+// TODO: Float depth format (low priority, but would be better for accuracy)
+
+// FIXME: Below pipeline creation assumes identical 3 buffer setup. For
+// swizzle shader we will need more flexibility.
+
+const char *pack_d24_unorm_s8_uint_to_z24s8_glsl =
+ "#version 450\n"
+ "layout(local_size_x = 256) in;\n"
+ "layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n"
+ "layout(binding = 0) buffer DepthIn { uint depth_in[]; };\n"
+ "layout(binding = 1) buffer StencilIn { uint stencil_in[]; };\n"
+ "layout(binding = 2) buffer DepthStencilOut { uint depth_stencil_out[]; };\n"
+ "uint get_input_idx(uint idx_out) {\n"
+ " uint scale = width_in / width_out;"
+ " uint y = (idx_out / width_out) * scale;\n"
+ " uint x = (idx_out % width_out) * scale;\n"
+ " return y * width_in + x;\n"
+ "}\n"
+ "void main() {\n"
+ " uint idx_out = gl_GlobalInvocationID.x;\n"
+ " uint idx_in = get_input_idx(idx_out);\n"
+ " uint depth_value = depth_in[idx_in];\n"
+ " uint stencil_value = (stencil_in[idx_in / 4] >> ((idx_in % 4) * 8)) & 0xff;\n"
+ " depth_stencil_out[idx_out] = depth_value << 8 | stencil_value;\n"
+ "}\n";
+
+const char *unpack_z24s8_to_d24_unorm_s8_uint_glsl =
+ "#version 450\n"
+ "layout(local_size_x = 256) in;\n"
+ "layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n"
+ "layout(binding = 0) buffer DepthOut { uint depth_out[]; };\n"
+ "layout(binding = 1) buffer StencilOut { uint stencil_out[]; };\n"
+ "layout(binding = 2) buffer DepthStencilIn { uint depth_stencil_in[]; };\n"
+ "uint get_input_idx(uint idx_out) {\n"
+ " uint scale = width_out / width_in;"
+ " uint y = (idx_out / width_out) / scale;\n"
+ " uint x = (idx_out % width_out) / scale;\n"
+ " return y * width_in + x;\n"
+ "}\n"
+ "void main() {\n"
+ " uint idx_out = gl_GlobalInvocationID.x;\n"
+ " uint idx_in = get_input_idx(idx_out);\n"
+ " depth_out[idx_out] = depth_stencil_in[idx_in] >> 8;\n"
+ " if (idx_out % 4 == 0) {\n"
+ " uint stencil_value = 0;\n"
+ " for (int i = 0; i < 4; i++) {\n" // Include next 3 pixels
+ " uint v = depth_stencil_in[get_input_idx(idx_out + i)] & 0xff;\n"
+ " stencil_value |= v << (i * 8);\n"
+ " }\n"
+ " stencil_out[idx_out / 4] = stencil_value;\n"
+ " }\n"
+ "}\n";
+
+const char *pack_d32_sfloat_s8_uint_to_z24s8_glsl =
+ "#version 450\n"
+ "layout(local_size_x = 256) in;\n"
+ "layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n"
+ "layout(binding = 0) buffer DepthIn { float depth_in[]; };\n"
+ "layout(binding = 1) buffer StencilIn { uint stencil_in[]; };\n"
+ "layout(binding = 2) buffer DepthStencilOut { uint depth_stencil_out[]; };\n"
+ "uint get_input_idx(uint idx_out) {\n"
+ " uint y = idx_out / width_out;\n"
+ " uint x = idx_out % width_out;\n"
+ " return (y * width_in + x) * (width_in / width_out);\n"
+ "}\n"
+ "void main() {\n"
+ " uint idx_out = gl_GlobalInvocationID.x;\n"
+ " uint idx_in = get_input_idx(idx_out);\n"
+ " uint depth_value = int(depth_in[idx_in] * float(0xffffff));\n"
+ " uint stencil_value = (stencil_in[idx_in / 4] >> ((idx_in % 4) * 8)) & 0xff;\n"
+ " depth_stencil_out[idx_out] = depth_value << 8 | stencil_value;\n"
+ "}\n";
+
+const char *unpack_z24s8_to_d32_sfloat_s8_uint_glsl =
+ "#version 450\n"
+ "layout(local_size_x = 256) in;\n"
+ "layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n"
+ "layout(binding = 0) buffer DepthOut { float depth_out[]; };\n"
+ "layout(binding = 1) buffer StencilOut { uint stencil_out[]; };\n"
+ "layout(binding = 2) buffer DepthStencilIn { uint depth_stencil_in[]; };\n"
+ "uint get_input_idx(uint idx_out) {\n"
+ " uint scale = width_out / width_in;"
+ " uint y = (idx_out / width_out) / scale;\n"
+ " uint x = (idx_out % width_out) / scale;\n"
+ " return y * width_in + x;\n"
+ "}\n"
+ "void main() {\n"
+ " uint idx_out = gl_GlobalInvocationID.x;\n"
+ " uint idx_in = get_input_idx(idx_out);\n"
+ " depth_out[idx_out] = float(depth_stencil_in[idx_in] >> 8) / float(0xffffff);\n"
+ " if (idx_out % 4 == 0) {\n"
+ " uint stencil_value = 0;\n"
+ " for (int i = 0; i < 4; i++) {\n" // Include next 3 pixels
+ " uint v = depth_stencil_in[get_input_idx(idx_out + i)] & 0xff;\n"
+ " stencil_value |= v << (i * 8);\n"
+ " }\n"
+ " stencil_out[idx_out / 4] = stencil_value;\n"
+ " }\n"
+ "}\n";
+
+static void create_descriptor_pool(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ VkDescriptorPoolSize pool_sizes[] = {
+ {
+ .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 3,
+ },
+ };
+
+ VkDescriptorPoolCreateInfo pool_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+ .poolSizeCount = ARRAY_SIZE(pool_sizes),
+ .pPoolSizes = pool_sizes,
+ .maxSets = ARRAY_SIZE(r->compute.descriptor_sets),
+ .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
+ };
+ VK_CHECK(vkCreateDescriptorPool(r->device, &pool_info, NULL,
+ &r->compute.descriptor_pool));
+}
+
+static void destroy_descriptor_pool(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ vkDestroyDescriptorPool(r->device, r->compute.descriptor_pool, NULL);
+ r->compute.descriptor_pool = VK_NULL_HANDLE;
+}
+
+static void create_descriptor_set_layout(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ const int num_buffers = 3;
+
+ VkDescriptorSetLayoutBinding bindings[num_buffers];
+ for (int i = 0; i < num_buffers; i++) {
+ bindings[i] = (VkDescriptorSetLayoutBinding){
+ .binding = i,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ };
+ }
+ VkDescriptorSetLayoutCreateInfo layout_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .bindingCount = ARRAY_SIZE(bindings),
+ .pBindings = bindings,
+ };
+ VK_CHECK(vkCreateDescriptorSetLayout(r->device, &layout_info, NULL,
+ &r->compute.descriptor_set_layout));
+}
+
+static void destroy_descriptor_set_layout(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ vkDestroyDescriptorSetLayout(r->device, r->compute.descriptor_set_layout,
+ NULL);
+ r->compute.descriptor_set_layout = VK_NULL_HANDLE;
+}
+
+static void create_descriptor_sets(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ VkDescriptorSetLayout layouts[ARRAY_SIZE(r->descriptor_sets)];
+ for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
+ layouts[i] = r->compute.descriptor_set_layout;
+ }
+ VkDescriptorSetAllocateInfo alloc_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+ .descriptorPool = r->compute.descriptor_pool,
+ .descriptorSetCount = ARRAY_SIZE(r->compute.descriptor_sets),
+ .pSetLayouts = layouts,
+ };
+ VK_CHECK(vkAllocateDescriptorSets(r->device, &alloc_info,
+ r->compute.descriptor_sets));
+}
+
+static void destroy_descriptor_sets(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ vkFreeDescriptorSets(r->device, r->compute.descriptor_pool,
+ ARRAY_SIZE(r->compute.descriptor_sets),
+ r->compute.descriptor_sets);
+ for (int i = 0; i < ARRAY_SIZE(r->compute.descriptor_sets); i++) {
+ r->compute.descriptor_sets[i] = VK_NULL_HANDLE;
+ }
+}
+
+static void create_compute_pipeline_layout(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ VkPushConstantRange push_constant_range = {
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .size = 2 * sizeof(uint32_t),
+ };
+ VkPipelineLayoutCreateInfo pipeline_layout_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &r->compute.descriptor_set_layout,
+ .pushConstantRangeCount = 1,
+ .pPushConstantRanges = &push_constant_range,
+ };
+ VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL,
+ &r->compute.pipeline_layout));
+}
+
+static VkPipeline create_compute_pipeline(PGRAPHState *pg, const char *glsl)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ ShaderModuleInfo *module = pgraph_vk_create_shader_module_from_glsl(
+ r, VK_SHADER_STAGE_COMPUTE_BIT, glsl);
+
+ VkComputePipelineCreateInfo pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .layout = r->compute.pipeline_layout,
+ .stage =
+ (VkPipelineShaderStageCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pName = "main",
+ .module = module->module,
+ },
+ };
+ VkPipeline pipeline;
+ VK_CHECK(vkCreateComputePipelines(r->device, r->vk_pipeline_cache, 1,
+ &pipeline_info, NULL,
+ &pipeline));
+
+ pgraph_vk_destroy_shader_module(r, module);
+
+ return pipeline;
+}
+
+static void update_descriptor_sets(PGRAPHState *pg,
+ VkDescriptorBufferInfo *buffers, int count)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ assert(count == 3);
+ VkWriteDescriptorSet descriptor_writes[3];
+ const int descriptor_set_index = 0;
+
+ for (int i = 0; i < count; i++) {
+ descriptor_writes[i] = (VkWriteDescriptorSet){
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = r->compute.descriptor_sets[descriptor_set_index],
+ .dstBinding = i,
+ .dstArrayElement = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .pBufferInfo = &buffers[i],
+ };
+ }
+ vkUpdateDescriptorSets(r->device, count, descriptor_writes, 0, NULL);
+}
+
+//
+// Pack depth+stencil into NV097_SET_SURFACE_FORMAT_ZETA_Z24S8
+// formatted buffer with depth in bits 31-8 and stencil in bits 7-0.
+//
+void pgraph_vk_pack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface,
+ VkCommandBuffer cmd, VkBuffer src,
+ VkBuffer dst, bool downscale)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ unsigned int input_width = surface->width, input_height = surface->height;
+ pgraph_apply_scaling_factor(pg, &input_width, &input_height);
+
+ unsigned int output_width = surface->width, output_height = surface->height;
+ if (!downscale) {
+ pgraph_apply_scaling_factor(pg, &output_width, &output_height);
+ }
+
+ size_t depth_bytes_per_pixel = 4;
+ size_t depth_size = input_width * input_height * depth_bytes_per_pixel;
+
+ size_t stencil_bytes_per_pixel = 1;
+ size_t stencil_size = input_width * input_height * stencil_bytes_per_pixel;
+
+ size_t output_bytes_per_pixel = 4;
+ size_t output_size = output_width * output_height * output_bytes_per_pixel;
+
+ VkDescriptorBufferInfo buffers[] = {
+ {
+ .buffer = src,
+ .offset = 0,
+ .range = depth_size,
+ },
+ {
+ .buffer = src,
+ .offset = depth_size,
+ .range = stencil_size,
+ },
+ {
+ .buffer = dst,
+ .offset = 0,
+ .range = output_size,
+ },
+ };
+ update_descriptor_sets(pg, buffers, ARRAY_SIZE(buffers));
+
+ if (surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
+ vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
+ r->compute.pipeline_pack_d24s8);
+ } else if (surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
+ vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
+ r->compute.pipeline_pack_f32s8);
+ } else {
+ assert(!"Unsupported pack format");
+ }
+ vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
+ r->compute.pipeline_layout, 0, 1,
+ &r->compute.descriptor_sets[0], 0, NULL);
+
+ uint32_t push_constants[2] = { input_width, output_width };
+ assert(sizeof(push_constants) == 8);
+ vkCmdPushConstants(cmd, r->compute.pipeline_layout,
+ VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
+ push_constants);
+
+ size_t workgroup_size_in_units = 256;
+ size_t output_size_in_units = output_width * output_height;
+ assert(output_size_in_units % workgroup_size_in_units == 0);
+ size_t group_count = output_size_in_units / workgroup_size_in_units;
+
+ // FIXME: Check max group count
+
+ vkCmdDispatch(cmd, group_count, 1, 1);
+}
+
+void pgraph_vk_unpack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface,
+ VkCommandBuffer cmd, VkBuffer src,
+ VkBuffer dst)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ unsigned int input_width = surface->width, input_height = surface->height;
+
+ unsigned int output_width = surface->width, output_height = surface->height;
+ pgraph_apply_scaling_factor(pg, &output_width, &output_height);
+
+ size_t depth_bytes_per_pixel = 4;
+ size_t depth_size = output_width * output_height * depth_bytes_per_pixel;
+
+ size_t stencil_bytes_per_pixel = 1;
+ size_t stencil_size = output_width * output_height * stencil_bytes_per_pixel;
+
+ size_t input_bytes_per_pixel = 4;
+ size_t input_size = input_width * input_height * input_bytes_per_pixel;
+
+ VkDescriptorBufferInfo buffers[] = {
+ {
+ .buffer = dst,
+ .offset = 0,
+ .range = depth_size,
+ },
+ {
+ .buffer = dst,
+ .offset = depth_size,
+ .range = stencil_size,
+ },
+ {
+ .buffer = src,
+ .offset = 0,
+ .range = input_size,
+ },
+ };
+ update_descriptor_sets(pg, buffers, ARRAY_SIZE(buffers));
+
+ if (surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
+ vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
+ r->compute.pipeline_unpack_d24s8);
+ } else if (surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
+ vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
+ r->compute.pipeline_unpack_f32s8);
+ } else {
+ assert(!"Unsupported pack format");
+ }
+ vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
+ r->compute.pipeline_layout, 0, 1,
+ &r->compute.descriptor_sets[0], 0, NULL);
+
+ assert(output_width >= input_width);
+ uint32_t push_constants[2] = { input_width, output_width };
+ assert(sizeof(push_constants) == 8);
+ vkCmdPushConstants(cmd, r->compute.pipeline_layout,
+ VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
+ push_constants);
+
+ size_t workgroup_size_in_units = 256;
+ size_t output_size_in_units = output_width * output_height;
+ assert(output_size_in_units % workgroup_size_in_units == 0);
+ size_t group_count = output_size_in_units / workgroup_size_in_units;
+
+ // FIXME: Check max group count
+
+ vkCmdDispatch(cmd, group_count, 1, 1);
+}
+
+void pgraph_vk_init_compute(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ create_descriptor_pool(pg);
+ create_descriptor_set_layout(pg);
+ create_descriptor_sets(pg);
+ create_compute_pipeline_layout(pg);
+
+ r->compute.pipeline_pack_d24s8 =
+ create_compute_pipeline(pg, pack_d24_unorm_s8_uint_to_z24s8_glsl);
+ r->compute.pipeline_unpack_d24s8 =
+ create_compute_pipeline(pg, unpack_z24s8_to_d24_unorm_s8_uint_glsl);
+ r->compute.pipeline_pack_f32s8 =
+ create_compute_pipeline(pg, pack_d32_sfloat_s8_uint_to_z24s8_glsl);
+ r->compute.pipeline_unpack_f32s8 =
+ create_compute_pipeline(pg, unpack_z24s8_to_d32_sfloat_s8_uint_glsl);
+}
+
+void pgraph_vk_finalize_compute(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ VkPipeline *pipelines[] = {
+ &r->compute.pipeline_pack_d24s8,
+ &r->compute.pipeline_unpack_d24s8,
+ &r->compute.pipeline_pack_f32s8,
+ &r->compute.pipeline_unpack_f32s8,
+ };
+
+ for (int i = 0; i < ARRAY_SIZE(pipelines); i++) {
+ vkDestroyPipeline(r->device, *pipelines[i], NULL);
+ pipelines[i] = VK_NULL_HANDLE;
+ }
+
+ vkDestroyPipelineLayout(r->device, r->compute.pipeline_layout, NULL);
+ r->compute.pipeline_layout = VK_NULL_HANDLE;
+
+ destroy_descriptor_sets(pg);
+ destroy_descriptor_set_layout(pg);
+ destroy_descriptor_pool(pg);
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/surface.c b/hw/xbox/nv2a/pgraph/vk/surface.c
new file mode 100644
index 0000000000..9df98666bf
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/surface.c
@@ -0,0 +1,1485 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * Based on GL implementation:
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "hw/xbox/nv2a/nv2a_int.h"
+#include "hw/xbox/nv2a/pgraph/swizzle.h"
+#include "qemu/compiler.h"
+#include "ui/xemu-settings.h"
+#include "renderer.h"
+
+const int num_invalid_surfaces_to_keep = 10; // FIXME: Make automatic
+const int max_surface_frame_time_delta = 5;
+
+void pgraph_vk_set_surface_scale_factor(NV2AState *d, unsigned int scale)
+{
+ g_config.display.quality.surface_scale = scale < 1 ? 1 : scale;
+
+ qemu_mutex_unlock_iothread();
+
+ qemu_mutex_lock(&d->pfifo.lock);
+ qatomic_set(&d->pfifo.halt, true);
+ qemu_mutex_unlock(&d->pfifo.lock);
+
+ // FIXME: It's just flush
+ qemu_mutex_lock(&d->pgraph.lock);
+ qemu_event_reset(&d->pgraph.vk_renderer_state->dirty_surfaces_download_complete);
+ qatomic_set(&d->pgraph.vk_renderer_state->download_dirty_surfaces_pending, true);
+ qemu_mutex_unlock(&d->pgraph.lock);
+ qemu_mutex_lock(&d->pfifo.lock);
+ pfifo_kick(d);
+ qemu_mutex_unlock(&d->pfifo.lock);
+ qemu_event_wait(&d->pgraph.vk_renderer_state->dirty_surfaces_download_complete);
+
+ qemu_mutex_lock(&d->pgraph.lock);
+ qemu_event_reset(&d->pgraph.flush_complete);
+ qatomic_set(&d->pgraph.flush_pending, true);
+ qemu_mutex_unlock(&d->pgraph.lock);
+ qemu_mutex_lock(&d->pfifo.lock);
+ pfifo_kick(d);
+ qemu_mutex_unlock(&d->pfifo.lock);
+ qemu_event_wait(&d->pgraph.flush_complete);
+
+ qemu_mutex_lock(&d->pfifo.lock);
+ qatomic_set(&d->pfifo.halt, false);
+ pfifo_kick(d);
+ qemu_mutex_unlock(&d->pfifo.lock);
+
+ qemu_mutex_lock_iothread();
+}
+
+unsigned int pgraph_vk_get_surface_scale_factor(NV2AState *d)
+{
+ return d->pgraph.surface_scale_factor; // FIXME: Move internal to renderer
+}
+
+void pgraph_vk_reload_surface_scale_factor(PGRAPHState *pg)
+{
+ int factor = g_config.display.quality.surface_scale;
+ pg->surface_scale_factor = MAX(factor, 1);
+}
+
+// FIXME: Move to common
+static void get_surface_dimensions(PGRAPHState const *pg, unsigned int *width,
+ unsigned int *height)
+{
+ bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE);
+ if (swizzle) {
+ *width = 1 << pg->surface_shape.log_width;
+ *height = 1 << pg->surface_shape.log_height;
+ } else {
+ *width = pg->surface_shape.clip_width;
+ *height = pg->surface_shape.clip_height;
+ }
+}
+
+// FIXME: Move to common
+static bool framebuffer_dirty(PGRAPHState const *pg)
+{
+ bool shape_changed = memcmp(&pg->surface_shape, &pg->last_surface_shape,
+ sizeof(SurfaceShape)) != 0;
+ if (!shape_changed || (!pg->surface_shape.color_format
+ && !pg->surface_shape.zeta_format)) {
+ return false;
+ }
+ return true;
+}
+
+static void memcpy_image(void *dst, void const *src, int dst_stride,
+ int src_stride, int height)
+{
+ if (dst_stride == src_stride) {
+ memcpy(dst, src, dst_stride * height);
+ return;
+ }
+
+ uint8_t *dst_ptr = (uint8_t *)dst;
+ uint8_t const *src_ptr = (uint8_t *)src;
+
+ size_t copy_stride = MIN(src_stride, dst_stride);
+
+ for (int i = 0; i < height; i++) {
+ memcpy(dst_ptr, src_ptr, copy_stride);
+ dst_ptr += dst_stride;
+ src_ptr += src_stride;
+ }
+}
+
+static void download_surface_to_buffer(NV2AState *d, SurfaceBinding *surface,
+ uint8_t *pixels)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ nv2a_profile_inc_counter(NV2A_PROF_SURF_DOWNLOAD);
+
+ if (r->in_command_buffer &&
+ surface->draw_time >= r->command_buffer_start_time) {
+ pgraph_vk_finish(pg, VK_FINISH_REASON_SURFACE_DOWN);
+ }
+
+ bool downscale = (pg->surface_scale_factor != 1);
+
+ trace_nv2a_pgraph_surface_download(
+ surface->color ? "COLOR" : "ZETA",
+ surface->swizzle ? "sz" : "lin", surface->vram_addr,
+ surface->width, surface->height, surface->pitch,
+ surface->fmt.bytes_per_pixel);
+
+ // Read surface into memory
+ uint8_t *gl_read_buf = pixels;
+
+ uint8_t *swizzle_buf = pixels;
+ if (surface->swizzle) {
+ // FIXME: Swizzle in shader
+ assert(pg->surface_scale_factor == 1 || downscale);
+ swizzle_buf = (uint8_t *)g_malloc(surface->size);
+ gl_read_buf = swizzle_buf;
+ }
+
+ unsigned int scaled_width = surface->width,
+ scaled_height = surface->height;
+ pgraph_apply_scaling_factor(pg, &scaled_width, &scaled_height);
+
+ VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg);
+
+ pgraph_vk_transition_image_layout(
+ pg, cmd, surface->image, surface->host_fmt.vk_format,
+ surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL :
+ VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
+
+ int num_copy_regions = 1;
+ VkBufferImageCopy copy_regions[2];
+ copy_regions[0] = (VkBufferImageCopy){
+ .imageSubresource.aspectMask = surface->color ?
+ VK_IMAGE_ASPECT_COLOR_BIT :
+ VK_IMAGE_ASPECT_DEPTH_BIT,
+ .imageSubresource.layerCount = 1,
+ };
+
+ bool use_compute_to_convert_depth_stencil_format =
+ surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
+ surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT;
+
+ VkImage surface_image_loc;
+ if (downscale && !use_compute_to_convert_depth_stencil_format) {
+ copy_regions[0].imageExtent =
+ (VkExtent3D){ surface->width, surface->height, 1 };
+
+ if (surface->image_scratch_current_layout !=
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
+ pgraph_vk_transition_image_layout(
+ pg, cmd, surface->image_scratch, surface->host_fmt.vk_format,
+ surface->image_scratch_current_layout,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+ surface->image_scratch_current_layout =
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+ }
+
+ VkImageBlit blit_region = {
+ .srcSubresource.aspectMask = surface->host_fmt.aspect,
+ .srcSubresource.mipLevel = 0,
+ .srcSubresource.baseArrayLayer = 0,
+ .srcSubresource.layerCount = 1,
+ .srcOffsets[0] = (VkOffset3D){0, 0, 0},
+ .srcOffsets[1] = (VkOffset3D){scaled_width, scaled_height, 1},
+
+ .dstSubresource.aspectMask = surface->host_fmt.aspect,
+ .dstSubresource.mipLevel = 0,
+ .dstSubresource.baseArrayLayer = 0,
+ .dstSubresource.layerCount = 1,
+ .dstOffsets[0] = (VkOffset3D){0, 0, 0},
+ .dstOffsets[1] = (VkOffset3D){surface->width, surface->height, 1},
+ };
+
+ vkCmdBlitImage(cmd, surface->image,
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+ surface->image_scratch,
+ surface->image_scratch_current_layout, 1, &blit_region,
+ surface->color ? VK_FILTER_LINEAR : VK_FILTER_NEAREST);
+
+ pgraph_vk_transition_image_layout(pg, cmd, surface->image_scratch,
+ surface->host_fmt.vk_format,
+ surface->image_scratch_current_layout,
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
+ surface->image_scratch_current_layout =
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
+ surface_image_loc = surface->image_scratch;
+ } else {
+ copy_regions[0].imageExtent =
+ (VkExtent3D){ scaled_width, scaled_height, 1 };
+ surface_image_loc = surface->image;
+ }
+
+ if (surface->host_fmt.aspect & VK_IMAGE_ASPECT_STENCIL_BIT) {
+ copy_regions[num_copy_regions++] = (VkBufferImageCopy){
+ .bufferOffset = scaled_width * scaled_height * 4,
+ .imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT,
+ .imageSubresource.layerCount = 1,
+ .imageExtent = (VkExtent3D){scaled_width, scaled_height, 1},
+ };
+ }
+
+ int copy_buffer_idx = use_compute_to_convert_depth_stencil_format ?
+ BUFFER_COMPUTE_DST :
+ BUFFER_STAGING_DST;
+ VkBuffer copy_buffer = r->storage_buffers[copy_buffer_idx].buffer;
+
+ vkCmdCopyImageToBuffer(cmd, surface_image_loc,
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, copy_buffer,
+ num_copy_regions, copy_regions);
+
+ // FIXME: Verify output of depth stencil conversion
+ // FIXME: Track current layout and only transition when required
+
+ if (use_compute_to_convert_depth_stencil_format) {
+ size_t bytes_per_pixel = 4;
+ size_t packed_size =
+ downscale ? (surface->width * surface->height * bytes_per_pixel) :
+ (scaled_width * scaled_height * bytes_per_pixel);
+
+ VkBufferMemoryBarrier pre_pack_barrier = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = copy_buffer,
+ .size = VK_WHOLE_SIZE
+ };
+ vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL,
+ 1, &pre_pack_barrier, 0, NULL);
+
+ VkBuffer pack_buffer = r->storage_buffers[BUFFER_COMPUTE_SRC].buffer;
+ pgraph_vk_pack_depth_stencil(pg, surface, cmd, copy_buffer, pack_buffer,
+ downscale);
+
+ VkBufferMemoryBarrier post_pack_barrier = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+ .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = pack_buffer,
+ .size = packed_size
+ };
+ vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
+ VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
+ &post_pack_barrier, 0, NULL);
+
+ copy_buffer = r->storage_buffers[BUFFER_STAGING_DST].buffer;
+ VkBufferCopy buffer_copy_region = {
+ .size = packed_size,
+ };
+ vkCmdCopyBuffer(cmd, pack_buffer, copy_buffer, 1, &buffer_copy_region);
+ }
+
+ size_t downloaded_image_size = surface->host_fmt.host_bytes_per_pixel *
+ surface->width * surface->height;
+ assert((downloaded_image_size) <=
+ r->storage_buffers[BUFFER_STAGING_DST].buffer_size);
+
+ pgraph_vk_transition_image_layout(
+ pg, cmd, surface->image, surface->host_fmt.vk_format,
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+ surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL :
+ VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
+
+ nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_1);
+ pgraph_vk_end_single_time_commands(pg, cmd);
+
+ void *mapped_memory_ptr;
+ VK_CHECK(vmaMapMemory(r->allocator,
+ r->storage_buffers[BUFFER_STAGING_DST].allocation,
+ &mapped_memory_ptr));
+
+ // FIXME: Swizzle in shader
+ // FIXME: Eliminate this extra copy if we need to swizzle
+ // FIXME: Use native buffer copy options for pitch adjust
+
+ bool no_conversion_necessary =
+ surface->color || use_compute_to_convert_depth_stencil_format ||
+ surface->host_fmt.vk_format == VK_FORMAT_D16_UNORM;
+
+ assert(no_conversion_necessary);
+
+ memcpy_image(gl_read_buf, mapped_memory_ptr, surface->pitch,
+ surface->width * surface->fmt.bytes_per_pixel,
+ surface->height);
+
+ vmaUnmapMemory(r->allocator,
+ r->storage_buffers[BUFFER_STAGING_DST].allocation);
+
+ if (surface->swizzle) {
+ swizzle_rect(swizzle_buf, surface->width, surface->height, pixels,
+ surface->pitch, surface->fmt.bytes_per_pixel);
+ nv2a_profile_inc_counter(NV2A_PROF_SURF_SWIZZLE);
+ g_free(swizzle_buf);
+ }
+}
+
+static void download_surface(NV2AState *d, SurfaceBinding *surface, bool force)
+{
+ if (!(surface->download_pending || force)) {
+ return;
+ }
+
+ // FIXME: Respect write enable at last TOU?
+
+ download_surface_to_buffer(d, surface, d->vram_ptr + surface->vram_addr);
+
+ memory_region_set_client_dirty(d->vram, surface->vram_addr,
+ surface->pitch * surface->height,
+ DIRTY_MEMORY_VGA);
+ memory_region_set_client_dirty(d->vram, surface->vram_addr,
+ surface->pitch * surface->height,
+ DIRTY_MEMORY_NV2A_TEX);
+
+ surface->download_pending = false;
+ surface->draw_dirty = false;
+}
+
+void pgraph_vk_wait_for_surface_download(SurfaceBinding *surface)
+{
+ NV2AState *d = g_nv2a;
+
+ if (qatomic_read(&surface->draw_dirty)) {
+ qemu_mutex_lock(&d->pfifo.lock);
+ qemu_event_reset(&d->pgraph.vk_renderer_state->downloads_complete);
+ qatomic_set(&surface->download_pending, true);
+ qatomic_set(&d->pgraph.vk_renderer_state->downloads_pending, true);
+ pfifo_kick(d);
+ qemu_mutex_unlock(&d->pfifo.lock);
+ qemu_event_wait(&d->pgraph.vk_renderer_state->downloads_complete);
+ }
+}
+
+void pgraph_vk_process_pending_downloads(NV2AState *d)
+{
+ PGRAPHVkState *r = d->pgraph.vk_renderer_state;
+ SurfaceBinding *surface;
+
+ QTAILQ_FOREACH(surface, &r->surfaces, entry) {
+ download_surface(d, surface, false);
+ }
+
+ qatomic_set(&r->downloads_pending, false);
+ qemu_event_set(&r->downloads_complete);
+}
+
+void pgraph_vk_download_dirty_surfaces(NV2AState *d)
+{
+ PGRAPHVkState *r = d->pgraph.vk_renderer_state;
+
+ SurfaceBinding *surface;
+ QTAILQ_FOREACH(surface, &r->surfaces, entry) {
+ pgraph_vk_surface_download_if_dirty(d, surface);
+ }
+
+ qatomic_set(&r->download_dirty_surfaces_pending, false);
+ qemu_event_set(&r->dirty_surfaces_download_complete);
+}
+
+static void surface_access_callback(void *opaque, MemoryRegion *mr, hwaddr addr,
+ hwaddr len, bool write)
+{
+ SurfaceBinding *e = opaque;
+ assert(addr >= e->vram_addr);
+ hwaddr offset = addr - e->vram_addr;
+ assert(offset < e->size);
+
+ if (qatomic_read(&e->draw_dirty)) {
+ trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset);
+ pgraph_vk_wait_for_surface_download(e);
+ }
+
+ if (write && !qatomic_read(&e->upload_pending)) {
+ trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset);
+ qatomic_set(&e->upload_pending, true);
+ }
+}
+
+static void register_cpu_access_callback(NV2AState *d, SurfaceBinding *surface)
+{
+ if (tcg_enabled()) {
+ qemu_mutex_unlock(&d->pgraph.lock);
+ qemu_mutex_lock_iothread();
+ mem_access_callback_insert(qemu_get_cpu(0),
+ d->vram, surface->vram_addr, surface->size,
+ &surface->access_cb, &surface_access_callback,
+ surface);
+ qemu_mutex_unlock_iothread();
+ qemu_mutex_lock(&d->pgraph.lock);
+ }
+}
+
+static void unregister_cpu_access_callback(NV2AState *d,
+ SurfaceBinding const *surface)
+{
+ if (tcg_enabled()) {
+ qemu_mutex_unlock(&d->pgraph.lock);
+ qemu_mutex_lock_iothread();
+ mem_access_callback_remove_by_ref(qemu_get_cpu(0), surface->access_cb);
+ qemu_mutex_unlock_iothread();
+ qemu_mutex_lock(&d->pgraph.lock);
+ }
+}
+
+static void bind_surface(PGRAPHVkState *r, SurfaceBinding *surface)
+{
+ if (surface->color) {
+ r->color_binding = surface;
+ } else {
+ r->zeta_binding = surface;
+ }
+
+ r->framebuffer_dirty = true;
+}
+
+static void unbind_surface(NV2AState *d, bool color)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ if (color) {
+ if (r->color_binding) {
+ r->color_binding = NULL;
+ r->framebuffer_dirty = true;
+ }
+ } else {
+ if (r->zeta_binding) {
+ r->zeta_binding = NULL;
+ r->framebuffer_dirty = true;
+ }
+ }
+}
+
+static void invalidate_surface(NV2AState *d, SurfaceBinding *surface)
+{
+ PGRAPHVkState *r = d->pgraph.vk_renderer_state;
+
+ trace_nv2a_pgraph_surface_invalidated(surface->vram_addr);
+
+ // FIXME: We may be reading from the surface in the current command buffer!
+ // Add a detection to handle it. For now, finish to be safe.
+ pgraph_vk_finish(&d->pgraph, VK_FINISH_REASON_SURFACE_DOWN);
+
+ assert((!r->in_command_buffer ||
+ surface->draw_time < r->command_buffer_start_time) &&
+ "Surface evicted while in use!");
+
+ if (surface == r->color_binding) {
+ assert(d->pgraph.surface_color.buffer_dirty);
+ unbind_surface(d, true);
+ }
+ if (surface == r->zeta_binding) {
+ assert(d->pgraph.surface_zeta.buffer_dirty);
+ unbind_surface(d, false);
+ }
+
+ unregister_cpu_access_callback(d, surface);
+
+ QTAILQ_REMOVE(&r->surfaces, surface, entry);
+ QTAILQ_INSERT_HEAD(&r->invalid_surfaces, surface, entry);
+}
+
+static void invalidate_overlapping_surfaces(NV2AState *d,
+ SurfaceBinding const *surface)
+{
+ PGRAPHVkState *r = d->pgraph.vk_renderer_state;
+
+ uintptr_t e_end = surface->vram_addr + surface->size - 1;
+
+ SurfaceBinding *s, *next;
+ QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) {
+ uintptr_t s_end = s->vram_addr + s->size - 1;
+ bool overlapping =
+ !(s->vram_addr > e_end || surface->vram_addr > s_end);
+ if (overlapping) {
+ trace_nv2a_pgraph_surface_evict_overlapping(
+ s->vram_addr, s->width, s->height,
+ s->pitch);
+ pgraph_vk_surface_download_if_dirty(d, s);
+ invalidate_surface(d, s);
+ }
+ }
+}
+
+static void surface_put(NV2AState *d, SurfaceBinding *surface)
+{
+ PGRAPHVkState *r = d->pgraph.vk_renderer_state;
+
+ assert(pgraph_vk_surface_get(d, surface->vram_addr) == NULL);
+
+ invalidate_overlapping_surfaces(d, surface);
+ register_cpu_access_callback(d, surface);
+
+ QTAILQ_INSERT_HEAD(&r->surfaces, surface, entry);
+}
+
+SurfaceBinding *pgraph_vk_surface_get(NV2AState *d, hwaddr addr)
+{
+ PGRAPHVkState *r = d->pgraph.vk_renderer_state;
+
+ SurfaceBinding *surface;
+ QTAILQ_FOREACH (surface, &r->surfaces, entry) {
+ if (surface->vram_addr == addr) {
+ return surface;
+ }
+ }
+
+ return NULL;
+}
+
+SurfaceBinding *pgraph_vk_surface_get_within(NV2AState *d, hwaddr addr)
+{
+ PGRAPHVkState *r = d->pgraph.vk_renderer_state;
+
+ SurfaceBinding *surface;
+ QTAILQ_FOREACH (surface, &r->surfaces, entry) {
+ if (addr >= surface->vram_addr &&
+ addr < (surface->vram_addr + surface->size)) {
+ return surface;
+ }
+ }
+
+ return NULL;
+}
+
+static void set_surface_label(PGRAPHState *pg, SurfaceBinding const *surface)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ g_autofree gchar *label = g_strdup_printf(
+ "Surface %" HWADDR_PRIx "h fmt:%s,%02xh %dx%d aa:%d",
+ surface->vram_addr, surface->color ? "Color" : "Zeta",
+ surface->color ? surface->shape.color_format :
+ surface->shape.zeta_format,
+ surface->width, surface->height, pg->surface_shape.anti_aliasing);
+
+ VkDebugUtilsObjectNameInfoEXT name_info = {
+ .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
+ .objectType = VK_OBJECT_TYPE_IMAGE,
+ .objectHandle = (uint64_t)surface->image,
+ .pObjectName = label,
+ };
+
+ if (r->debug_utils_extension_enabled) {
+ vkSetDebugUtilsObjectNameEXT(r->device, &name_info);
+ }
+ vmaSetAllocationName(r->allocator, surface->allocation, label);
+
+ if (surface->image_scratch) {
+ g_autofree gchar *label_scratch =
+ g_strdup_printf("%s (scratch)", label);
+ name_info.objectHandle = (uint64_t)surface->image_scratch;
+ name_info.pObjectName = label_scratch;
+ if (r->debug_utils_extension_enabled) {
+ vkSetDebugUtilsObjectNameEXT(r->device, &name_info);
+ }
+ vmaSetAllocationName(r->allocator, surface->allocation_scratch,
+ label_scratch);
+ }
+}
+
+static void create_surface_image(PGRAPHState *pg, SurfaceBinding *surface)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ unsigned int width = surface->width, height = surface->height;
+ pgraph_apply_scaling_factor(pg, &width, &height);
+
+ NV2A_VK_DPRINTF(
+ "Creating new surface image width=%d height=%d @ %08" HWADDR_PRIx,
+ width, height, surface->vram_addr);
+
+ VkImageCreateInfo image_create_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .imageType = VK_IMAGE_TYPE_2D,
+ .extent.width = width,
+ .extent.height = height,
+ .extent.depth = 1,
+ .mipLevels = 1,
+ .arrayLayers = 1,
+ .format = surface->host_fmt.vk_format,
+ .tiling = VK_IMAGE_TILING_OPTIMAL,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ .usage = VK_IMAGE_USAGE_SAMPLED_BIT |
+ VK_IMAGE_USAGE_TRANSFER_DST_BIT |
+ VK_IMAGE_USAGE_TRANSFER_SRC_BIT | surface->host_fmt.usage,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ };
+
+ VmaAllocationCreateInfo alloc_create_info = {
+ .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
+ };
+
+ VK_CHECK(vmaCreateImage(r->allocator, &image_create_info,
+ &alloc_create_info, &surface->image,
+ &surface->allocation, NULL));
+
+ if (pg->surface_scale_factor > 1) {
+ VkImageCreateInfo scratch_image_create_info = image_create_info;
+ scratch_image_create_info.extent.width = surface->width;
+ scratch_image_create_info.extent.height = surface->height;
+ VK_CHECK(
+ vmaCreateImage(r->allocator, &scratch_image_create_info,
+ &alloc_create_info, &surface->image_scratch,
+ &surface->allocation_scratch, NULL));
+ surface->image_scratch_current_layout = VK_IMAGE_LAYOUT_UNDEFINED;
+ } else {
+ surface->image_scratch = VK_NULL_HANDLE;
+ surface->allocation_scratch = VK_NULL_HANDLE;
+ }
+
+ VkImageViewCreateInfo image_view_create_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = surface->image,
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = surface->host_fmt.vk_format,
+ .subresourceRange.aspectMask = surface->host_fmt.aspect,
+ .subresourceRange.levelCount = 1,
+ .subresourceRange.layerCount = 1,
+ };
+ VK_CHECK(vkCreateImageView(r->device, &image_view_create_info, NULL,
+ &surface->image_view));
+
+ // FIXME: Go right into main command buffer
+ VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg);
+ pgraph_vk_transition_image_layout(
+ pg, cmd, surface->image, surface->host_fmt.vk_format,
+ VK_IMAGE_LAYOUT_UNDEFINED,
+ surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL :
+ VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
+
+ nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_3);
+ pgraph_vk_end_single_time_commands(pg, cmd);
+ nv2a_profile_inc_counter(NV2A_PROF_SURF_CREATE);
+}
+
+static void migrate_surface_image(SurfaceBinding *dst, SurfaceBinding *src)
+{
+ dst->image = src->image;
+ dst->image_view = src->image_view;
+ dst->allocation = src->allocation;
+ dst->image_scratch = src->image_scratch;
+ dst->image_scratch_current_layout = src->image_scratch_current_layout;
+ dst->allocation_scratch = src->allocation_scratch;
+
+ src->image = VK_NULL_HANDLE;
+ src->image_view = VK_NULL_HANDLE;
+ src->allocation = VK_NULL_HANDLE;
+ src->image_scratch = VK_NULL_HANDLE;
+ src->image_scratch_current_layout = VK_IMAGE_LAYOUT_UNDEFINED;
+ src->allocation_scratch = VK_NULL_HANDLE;
+}
+
+static void destroy_surface_image(PGRAPHVkState *r, SurfaceBinding *surface)
+{
+ vkDestroyImageView(r->device, surface->image_view, NULL);
+ vmaDestroyImage(r->allocator, surface->image, surface->allocation);
+ if (surface->image_scratch) {
+ vmaDestroyImage(r->allocator, surface->image_scratch,
+ surface->allocation_scratch);
+ }
+}
+
+static bool check_invalid_surface_is_compatibile(SurfaceBinding *surface,
+ SurfaceBinding *target)
+{
+ return surface->host_fmt.vk_format == target->host_fmt.vk_format &&
+ surface->width == target->width &&
+ surface->height == target->height &&
+ surface->pitch == target->pitch &&
+ surface->host_fmt.usage == target->host_fmt.usage;
+}
+
+static SurfaceBinding *
+get_any_compatible_invalid_surface(PGRAPHVkState *r, SurfaceBinding *target)
+{
+ SurfaceBinding *surface, *next;
+ QTAILQ_FOREACH_SAFE(surface, &r->invalid_surfaces, entry, next) {
+ if (check_invalid_surface_is_compatibile(surface, target)) {
+ QTAILQ_REMOVE(&r->invalid_surfaces, surface, entry);
+ return surface;
+ }
+ }
+
+ return NULL;
+}
+
+static void prune_invalid_surfaces(PGRAPHVkState *r, int keep)
+{
+ int num_surfaces = 0;
+
+ SurfaceBinding *surface, *next;
+ QTAILQ_FOREACH_SAFE(surface, &r->invalid_surfaces, entry, next) {
+ num_surfaces += 1;
+ if (num_surfaces > keep) {
+ QTAILQ_REMOVE(&r->invalid_surfaces, surface, entry);
+ destroy_surface_image(r, surface);
+ g_free(surface);
+ }
+ }
+}
+
+static void expire_old_surfaces(NV2AState *d)
+{
+ PGRAPHVkState *r = d->pgraph.vk_renderer_state;
+
+ SurfaceBinding *s, *next;
+ QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) {
+ int last_used = d->pgraph.frame_time - s->frame_time;
+ if (last_used >= max_surface_frame_time_delta) {
+ trace_nv2a_pgraph_surface_evict_reason("old", s->vram_addr);
+ pgraph_vk_surface_download_if_dirty(d, s);
+ invalidate_surface(d, s);
+ }
+ }
+}
+
+static bool check_surface_compatibility(SurfaceBinding const *s1,
+ SurfaceBinding const *s2, bool strict)
+{
+ bool format_compatible =
+ (s1->color == s2->color) &&
+ (s1->host_fmt.vk_format == s2->host_fmt.vk_format) &&
+ (s1->pitch == s2->pitch) &&
+ (s1->shape.clip_x <= s2->shape.clip_x) &&
+ (s1->shape.clip_y <= s2->shape.clip_y);
+ if (!format_compatible) {
+ return false;
+ }
+
+ if (!strict) {
+ return (s1->width >= s2->width) && (s1->height >= s2->height);
+ } else {
+ return (s1->width == s2->width) && (s1->height == s2->height);
+ }
+}
+
+void pgraph_vk_surface_download_if_dirty(NV2AState *d, SurfaceBinding *surface)
+{
+ if (surface->draw_dirty) {
+ download_surface(d, surface, true);
+ }
+}
+
+void pgraph_vk_upload_surface_data(NV2AState *d, SurfaceBinding *surface,
+ bool force)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ if (!(surface->upload_pending || force)) {
+ return;
+ }
+
+ nv2a_profile_inc_counter(NV2A_PROF_SURF_UPLOAD);
+
+ pgraph_vk_finish(pg, VK_FINISH_REASON_SURFACE_CREATE); // FIXME: SURFACE_UP
+
+ trace_nv2a_pgraph_surface_upload(
+ surface->color ? "COLOR" : "ZETA",
+ surface->swizzle ? "sz" : "lin", surface->vram_addr,
+ surface->width, surface->height, surface->pitch,
+ surface->fmt.bytes_per_pixel);
+
+ surface->upload_pending = false;
+ surface->draw_time = pg->draw_time;
+
+ uint8_t *data = d->vram_ptr;
+ uint8_t *buf = data + surface->vram_addr;
+
+ g_autofree uint8_t *swizzle_buf = NULL;
+ uint8_t *gl_read_buf = NULL;
+
+ if (surface->swizzle) {
+ swizzle_buf = (uint8_t*)g_malloc(surface->size);
+ gl_read_buf = swizzle_buf;
+ unswizzle_rect(data + surface->vram_addr,
+ surface->width, surface->height,
+ swizzle_buf,
+ surface->pitch,
+ surface->fmt.bytes_per_pixel);
+ nv2a_profile_inc_counter(NV2A_PROF_SURF_SWIZZLE);
+ } else {
+ gl_read_buf = buf;
+ }
+
+ // FIXME: Eliminate extra copies
+
+ VkBufferImageCopy regions[2];
+ int num_regions = 1;
+ regions[0] = (VkBufferImageCopy){
+ .imageSubresource.aspectMask = surface->color ?
+ VK_IMAGE_ASPECT_COLOR_BIT :
+ VK_IMAGE_ASPECT_DEPTH_BIT,
+ .imageSubresource.layerCount = 1,
+ .imageExtent = (VkExtent3D){ surface->width, surface->height, 1 },
+ };
+
+ if (surface->host_fmt.aspect & VK_IMAGE_ASPECT_STENCIL_BIT) {
+ regions[num_regions++] = (VkBufferImageCopy){
+ .imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT,
+ .imageSubresource.layerCount = 1,
+ .imageExtent = (VkExtent3D){ surface->width, surface->height, 1 },
+ };
+ }
+
+ size_t uploaded_image_size = surface->height * surface->width *
+ surface->fmt.bytes_per_pixel;
+
+ StorageBuffer *copy_buffer = &r->storage_buffers[BUFFER_STAGING_SRC];
+ assert(uploaded_image_size <= copy_buffer->buffer_size);
+
+ void *mapped_memory_ptr;
+ VK_CHECK(vmaMapMemory(r->allocator, copy_buffer->allocation,
+ &mapped_memory_ptr));
+
+ bool use_compute_to_convert_depth_stencil_format =
+ surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
+ surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT;
+
+ bool no_conversion_necessary =
+ surface->color || surface->host_fmt.vk_format == VK_FORMAT_D16_UNORM ||
+ use_compute_to_convert_depth_stencil_format;
+ assert(no_conversion_necessary);
+
+ memcpy_image(mapped_memory_ptr, gl_read_buf,
+ surface->width * surface->fmt.bytes_per_pixel, surface->pitch,
+ surface->height);
+
+ vmaUnmapMemory(r->allocator, copy_buffer->allocation);
+
+ VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg);
+
+ unsigned int scaled_width = surface->width, scaled_height = surface->height;
+ pgraph_apply_scaling_factor(pg, &scaled_width, &scaled_height);
+
+ if (use_compute_to_convert_depth_stencil_format) {
+ size_t packed_size = uploaded_image_size;
+ VkBufferCopy buffer_copy_region = {
+ .size = packed_size,
+ };
+ vkCmdCopyBuffer(cmd, copy_buffer->buffer,
+ r->storage_buffers[BUFFER_COMPUTE_DST].buffer, 1,
+ &buffer_copy_region);
+
+ size_t num_pixels = scaled_width * scaled_height;
+ size_t unpacked_depth_image_size = num_pixels * 4;
+ size_t unpacked_stencil_image_size = num_pixels;
+ size_t unpacked_size =
+ unpacked_depth_image_size + unpacked_stencil_image_size;
+
+ VkBufferMemoryBarrier pre_unpack_barrier = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = r->storage_buffers[BUFFER_COMPUTE_DST].buffer,
+ .size = packed_size
+ };
+ vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL,
+ 1, &pre_unpack_barrier, 0, NULL);
+
+ StorageBuffer *unpack_buffer = &r->storage_buffers[BUFFER_COMPUTE_SRC];
+ pgraph_vk_unpack_depth_stencil(
+ pg, surface, cmd, r->storage_buffers[BUFFER_COMPUTE_DST].buffer,
+ unpack_buffer->buffer);
+
+ VkBufferMemoryBarrier post_unpack_barrier = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+ .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = unpack_buffer->buffer,
+ .size = unpacked_size
+ };
+ vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
+ VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
+ &post_unpack_barrier, 0, NULL);
+
+ // Already scaled during compute. Adjust copy regions.
+ regions[0].imageExtent = (VkExtent3D){ scaled_width, scaled_height, 1 };
+ regions[1].imageExtent = regions[0].imageExtent;
+ regions[1].bufferOffset = unpacked_depth_image_size;
+
+ copy_buffer = unpack_buffer;
+ }
+
+ bool upscale = !use_compute_to_convert_depth_stencil_format &&
+ pg->surface_scale_factor > 1;
+
+ if (upscale && surface->image_scratch_current_layout !=
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
+ pgraph_vk_transition_image_layout(pg, cmd, surface->image_scratch,
+ surface->host_fmt.vk_format,
+ surface->image_scratch_current_layout,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+ surface->image_scratch_current_layout =
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+ }
+
+ pgraph_vk_transition_image_layout(
+ pg, cmd, surface->image, surface->host_fmt.vk_format,
+ surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL :
+ VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+
+ vkCmdCopyBufferToImage(cmd, copy_buffer->buffer,
+ upscale ? surface->image_scratch : surface->image,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, num_regions,
+ regions);
+
+ if (upscale) {
+ pgraph_vk_transition_image_layout(pg, cmd, surface->image_scratch,
+ surface->host_fmt.vk_format,
+ surface->image_scratch_current_layout,
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
+ surface->image_scratch_current_layout =
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
+
+ unsigned int scaled_width = surface->width,
+ scaled_height = surface->height;
+ pgraph_apply_scaling_factor(pg, &scaled_width, &scaled_height);
+
+ VkImageBlit blitRegion = {
+ .srcSubresource.aspectMask = surface->host_fmt.aspect,
+ .srcSubresource.mipLevel = 0,
+ .srcSubresource.baseArrayLayer = 0,
+ .srcSubresource.layerCount = 1,
+ .srcOffsets[0] = (VkOffset3D){0, 0, 0},
+ .srcOffsets[1] = (VkOffset3D){surface->width, surface->height, 1},
+
+ .dstSubresource.aspectMask = surface->host_fmt.aspect,
+ .dstSubresource.mipLevel = 0,
+ .dstSubresource.baseArrayLayer = 0,
+ .dstSubresource.layerCount = 1,
+ .dstOffsets[0] = (VkOffset3D){0, 0, 0},
+ .dstOffsets[1] = (VkOffset3D){scaled_width, scaled_height, 1},
+ };
+
+ vkCmdBlitImage(cmd, surface->image_scratch,
+ surface->image_scratch_current_layout, surface->image,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &blitRegion,
+ surface->color ? VK_FILTER_LINEAR : VK_FILTER_NEAREST);
+ }
+
+ pgraph_vk_transition_image_layout(
+ pg, cmd, surface->image, surface->host_fmt.vk_format,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL :
+ VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
+
+ nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_2);
+ pgraph_vk_end_single_time_commands(pg, cmd);
+
+ surface->initialized = true;
+}
+
+static void compare_surfaces(SurfaceBinding const *a, SurfaceBinding const *b)
+{
+ #define DO_CMP(fld) \
+ if (a->fld != b->fld) \
+ trace_nv2a_pgraph_surface_compare_mismatch( \
+ #fld, (long int)a->fld, (long int)b->fld);
+ DO_CMP(shape.clip_x)
+ DO_CMP(shape.clip_width)
+ DO_CMP(shape.clip_y)
+ DO_CMP(shape.clip_height)
+ DO_CMP(fmt.bytes_per_pixel)
+ DO_CMP(host_fmt.vk_format)
+ DO_CMP(color)
+ DO_CMP(swizzle)
+ DO_CMP(vram_addr)
+ DO_CMP(width)
+ DO_CMP(height)
+ DO_CMP(pitch)
+ DO_CMP(size)
+ DO_CMP(dma_addr)
+ DO_CMP(dma_len)
+ DO_CMP(frame_time)
+ DO_CMP(draw_time)
+ #undef DO_CMP
+}
+
+static void populate_surface_binding_target_sized(NV2AState *d, bool color,
+ unsigned int width,
+ unsigned int height,
+ SurfaceBinding *target)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ Surface *surface;
+ hwaddr dma_address;
+ BasicSurfaceFormatInfo fmt;
+ SurfaceFormatInfo host_fmt;
+
+ if (color) {
+ surface = &pg->surface_color;
+ dma_address = pg->dma_color;
+ assert(pg->surface_shape.color_format != 0);
+ assert(pg->surface_shape.color_format <
+ ARRAY_SIZE(kelvin_surface_color_format_vk_map));
+ fmt = kelvin_surface_color_format_map[pg->surface_shape.color_format];
+ host_fmt = kelvin_surface_color_format_vk_map[pg->surface_shape.color_format];
+ if (host_fmt.host_bytes_per_pixel == 0) {
+ fprintf(stderr, "nv2a: unimplemented color surface format 0x%x\n",
+ pg->surface_shape.color_format);
+ abort();
+ }
+ } else {
+ surface = &pg->surface_zeta;
+ dma_address = pg->dma_zeta;
+ assert(pg->surface_shape.zeta_format != 0);
+ assert(pg->surface_shape.zeta_format <
+ ARRAY_SIZE(r->kelvin_surface_zeta_vk_map));
+ fmt = kelvin_surface_zeta_format_map[pg->surface_shape.zeta_format];
+ host_fmt = r->kelvin_surface_zeta_vk_map[pg->surface_shape.zeta_format];
+ // FIXME: Support float 16,24b float format surface
+ }
+
+ DMAObject dma = nv_dma_load(d, dma_address);
+ // There's a bunch of bugs that could cause us to hit this function
+ // at the wrong time and get a invalid dma object.
+ // Check that it's sane.
+ assert(dma.dma_class == NV_DMA_IN_MEMORY_CLASS);
+ // assert(dma.address + surface->offset != 0);
+ assert(surface->offset <= dma.limit);
+ assert(surface->offset + surface->pitch * height <= dma.limit + 1);
+ assert(surface->pitch % fmt.bytes_per_pixel == 0);
+ assert((dma.address & ~0x07FFFFFF) == 0);
+
+ target->shape = (color || !r->color_binding) ? pg->surface_shape :
+ r->color_binding->shape;
+ target->fmt = fmt;
+ target->host_fmt = host_fmt;
+ target->color = color;
+ target->swizzle =
+ (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE);
+ target->vram_addr = dma.address + surface->offset;
+ target->width = width;
+ target->height = height;
+ target->pitch = surface->pitch;
+ target->size = height * MAX(surface->pitch, width * fmt.bytes_per_pixel);
+ target->upload_pending = true;
+ target->download_pending = false;
+ target->draw_dirty = false;
+ target->dma_addr = dma.address;
+ target->dma_len = dma.limit;
+ target->frame_time = pg->frame_time;
+ target->draw_time = pg->draw_time;
+ target->cleared = false;
+
+ target->initialized = false;
+}
+
+static void populate_surface_binding_target(NV2AState *d, bool color,
+ SurfaceBinding *target)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ unsigned int width, height;
+
+ if (color || !r->color_binding) {
+ get_surface_dimensions(pg, &width, &height);
+ pgraph_apply_anti_aliasing_factor(pg, &width, &height);
+
+ // Since we determine surface dimensions based on the clipping
+ // rectangle, make sure to include the surface offset as well.
+ if (pg->surface_type != NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE) {
+ width += pg->surface_shape.clip_x;
+ height += pg->surface_shape.clip_y;
+ }
+ } else {
+ width = r->color_binding->width;
+ height = r->color_binding->height;
+ }
+
+ populate_surface_binding_target_sized(d, color, width, height, target);
+}
+
+static void update_surface_part(NV2AState *d, bool upload, bool color)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ SurfaceBinding target;
+ populate_surface_binding_target(d, color, &target);
+
+ Surface *pg_surface = color ? &pg->surface_color : &pg->surface_zeta;
+
+ bool mem_dirty = !tcg_enabled() && memory_region_test_and_clear_dirty(
+ d->vram, target.vram_addr,
+ target.size, DIRTY_MEMORY_NV2A);
+
+ if (upload && (pg_surface->buffer_dirty || mem_dirty)) {
+ // FIXME: We don't need to be so aggressive flushing the command list
+ // pgraph_vk_finish(pg, VK_FINISH_REASON_SURFACE_CREATE);
+ pgraph_vk_ensure_not_in_render_pass(pg);
+
+ unbind_surface(d, color);
+
+ SurfaceBinding *surface = pgraph_vk_surface_get(d, target.vram_addr);
+ if (surface != NULL) {
+ // FIXME: Support same color/zeta surface target? In the mean time,
+ // if the surface we just found is currently bound, just unbind it.
+ SurfaceBinding *other = (color ? r->zeta_binding
+ : r->color_binding);
+ if (surface == other) {
+ NV2A_UNIMPLEMENTED("Same color & zeta surface offset");
+ unbind_surface(d, !color);
+ }
+ }
+
+ trace_nv2a_pgraph_surface_target(
+ color ? "COLOR" : "ZETA", target.vram_addr,
+ target.swizzle ? "sz" : "ln",
+ pg->surface_shape.anti_aliasing,
+ pg->surface_shape.clip_x,
+ pg->surface_shape.clip_width, pg->surface_shape.clip_y,
+ pg->surface_shape.clip_height);
+
+ bool should_create = true;
+
+ if (surface != NULL) {
+ bool is_compatible =
+ check_surface_compatibility(surface, &target, false);
+
+ void (*trace_fn)(uint32_t addr, uint32_t width, uint32_t height,
+ const char *layout, uint32_t anti_aliasing,
+ uint32_t clip_x, uint32_t clip_width,
+ uint32_t clip_y, uint32_t clip_height,
+ uint32_t pitch) =
+ surface->color ? trace_nv2a_pgraph_surface_match_color :
+ trace_nv2a_pgraph_surface_match_zeta;
+
+ trace_fn(surface->vram_addr, surface->width, surface->height,
+ surface->swizzle ? "sz" : "ln", surface->shape.anti_aliasing,
+ surface->shape.clip_x, surface->shape.clip_width,
+ surface->shape.clip_y, surface->shape.clip_height,
+ surface->pitch);
+
+ assert(!(target.swizzle && pg->clearing));
+
+#if 0
+ if (surface->swizzle != target.swizzle) {
+ // Clears should only be done on linear surfaces. Avoid
+ // synchronization by allowing (1) a surface marked swizzled to
+ // be cleared under the assumption the entire surface is
+ // destined to be cleared and (2) a fully cleared linear surface
+ // to be marked swizzled. Strictly match size to avoid
+ // pathological cases.
+ is_compatible &= (pg->clearing || surface->cleared) &&
+ check_surface_compatibility(surface, &target, true);
+ if (is_compatible) {
+ trace_nv2a_pgraph_surface_migrate_type(
+ target.swizzle ? "swizzled" : "linear");
+ }
+ }
+#endif
+
+ if (is_compatible && color &&
+ !check_surface_compatibility(surface, &target, true)) {
+ SurfaceBinding zeta_entry;
+ populate_surface_binding_target_sized(
+ d, !color, surface->width, surface->height, &zeta_entry);
+ hwaddr color_end = surface->vram_addr + surface->size;
+ hwaddr zeta_end = zeta_entry.vram_addr + zeta_entry.size;
+ is_compatible &= surface->vram_addr >= zeta_end ||
+ zeta_entry.vram_addr >= color_end;
+ }
+
+ if (is_compatible && !color && r->color_binding) {
+ is_compatible &= (surface->width == r->color_binding->width) &&
+ (surface->height == r->color_binding->height);
+ }
+
+ if (is_compatible) {
+ // FIXME: Refactor
+ pg->surface_binding_dim.width = surface->width;
+ pg->surface_binding_dim.clip_x = surface->shape.clip_x;
+ pg->surface_binding_dim.clip_width = surface->shape.clip_width;
+ pg->surface_binding_dim.height = surface->height;
+ pg->surface_binding_dim.clip_y = surface->shape.clip_y;
+ pg->surface_binding_dim.clip_height = surface->shape.clip_height;
+ surface->upload_pending |= mem_dirty;
+ pg->surface_zeta.buffer_dirty |= color;
+ should_create = false;
+ } else {
+ trace_nv2a_pgraph_surface_evict_reason(
+ "incompatible", surface->vram_addr);
+ compare_surfaces(surface, &target);
+ pgraph_vk_surface_download_if_dirty(d, surface);
+ invalidate_surface(d, surface);
+ }
+ }
+
+ if (should_create) {
+ surface = get_any_compatible_invalid_surface(r, &target);
+ if (surface) {
+ migrate_surface_image(&target, surface);
+ } else {
+ surface = g_malloc(sizeof(SurfaceBinding));
+ create_surface_image(pg, &target);
+ }
+
+ *surface = target;
+ set_surface_label(pg, surface);
+ surface_put(d, surface);
+
+ // FIXME: Refactor
+ pg->surface_binding_dim.width = target.width;
+ pg->surface_binding_dim.clip_x = target.shape.clip_x;
+ pg->surface_binding_dim.clip_width = target.shape.clip_width;
+ pg->surface_binding_dim.height = target.height;
+ pg->surface_binding_dim.clip_y = target.shape.clip_y;
+ pg->surface_binding_dim.clip_height = target.shape.clip_height;
+
+ if (color && r->zeta_binding &&
+ (r->zeta_binding->width != target.width ||
+ r->zeta_binding->height != target.height)) {
+ pg->surface_zeta.buffer_dirty = true;
+ }
+ }
+
+ void (*trace_fn)(uint32_t addr, uint32_t width, uint32_t height,
+ const char *layout, uint32_t anti_aliasing,
+ uint32_t clip_x, uint32_t clip_width, uint32_t clip_y,
+ uint32_t clip_height, uint32_t pitch) =
+ color ? (should_create ? trace_nv2a_pgraph_surface_create_color :
+ trace_nv2a_pgraph_surface_hit_color) :
+ (should_create ? trace_nv2a_pgraph_surface_create_zeta :
+ trace_nv2a_pgraph_surface_hit_zeta);
+ trace_fn(surface->vram_addr, surface->width, surface->height,
+ surface->swizzle ? "sz" : "ln", surface->shape.anti_aliasing,
+ surface->shape.clip_x, surface->shape.clip_width,
+ surface->shape.clip_y, surface->shape.clip_height, surface->pitch);
+
+ bind_surface(r, surface);
+ pg_surface->buffer_dirty = false;
+ }
+
+ if (!upload && pg_surface->draw_dirty) {
+ if (!tcg_enabled()) {
+ // FIXME: Cannot monitor for reads/writes; flush now
+ download_surface(d, color ? r->color_binding : r->zeta_binding,
+ true);
+ }
+
+ pg_surface->write_enabled_cache = false;
+ pg_surface->draw_dirty = false;
+ }
+}
+
+// FIXME: Move to common?
+void pgraph_vk_surface_update(NV2AState *d, bool upload, bool color_write,
+ bool zeta_write)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ pg->surface_shape.z_format =
+ GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER),
+ NV_PGRAPH_SETUPRASTER_Z_FORMAT);
+
+ color_write = color_write &&
+ (pg->clearing || pgraph_color_write_enabled(pg));
+ zeta_write = zeta_write && (pg->clearing || pgraph_zeta_write_enabled(pg));
+
+ if (upload) {
+ bool fb_dirty = framebuffer_dirty(pg);
+ if (fb_dirty) {
+ memcpy(&pg->last_surface_shape, &pg->surface_shape,
+ sizeof(SurfaceShape));
+ pg->surface_color.buffer_dirty = true;
+ pg->surface_zeta.buffer_dirty = true;
+ }
+
+ if (pg->surface_color.buffer_dirty) {
+ unbind_surface(d, true);
+ }
+
+ if (color_write) {
+ update_surface_part(d, true, true);
+ }
+
+ if (pg->surface_zeta.buffer_dirty) {
+ unbind_surface(d, false);
+ }
+
+ if (zeta_write) {
+ update_surface_part(d, true, false);
+ }
+ } else {
+ if ((color_write || pg->surface_color.write_enabled_cache)
+ && pg->surface_color.draw_dirty) {
+ update_surface_part(d, false, true);
+ }
+ if ((zeta_write || pg->surface_zeta.write_enabled_cache)
+ && pg->surface_zeta.draw_dirty) {
+ update_surface_part(d, false, false);
+ }
+ }
+
+ if (upload) {
+ pg->draw_time++;
+ }
+
+ bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE);
+
+ if (r->color_binding) {
+ r->color_binding->frame_time = pg->frame_time;
+ if (upload) {
+ pgraph_vk_upload_surface_data(d, r->color_binding, false);
+ r->color_binding->draw_time = pg->draw_time;
+ r->color_binding->swizzle = swizzle;
+ }
+ }
+
+ if (r->zeta_binding) {
+ r->zeta_binding->frame_time = pg->frame_time;
+ if (upload) {
+ pgraph_vk_upload_surface_data(d, r->zeta_binding, false);
+ r->zeta_binding->draw_time = pg->draw_time;
+ r->zeta_binding->swizzle = swizzle;
+ }
+ }
+
+ // Sanity check color and zeta dimensions match
+ if (r->color_binding && r->zeta_binding) {
+ assert(r->color_binding->width == r->zeta_binding->width);
+ assert(r->color_binding->height == r->zeta_binding->height);
+ }
+
+ expire_old_surfaces(d);
+ prune_invalid_surfaces(r, num_invalid_surfaces_to_keep);
+}
+
+static bool check_format_and_usage_supported(PGRAPHVkState *r, VkFormat format,
+ VkImageUsageFlags usage)
+{
+ VkPhysicalDeviceImageFormatInfo2 pdif2 = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
+ .format = format,
+ .type = VK_IMAGE_TYPE_2D,
+ .tiling = VK_IMAGE_TILING_OPTIMAL,
+ .usage = usage,
+ };
+ VkImageFormatProperties2 props = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
+ };
+ VkResult result = vkGetPhysicalDeviceImageFormatProperties2(
+ r->physical_device, &pdif2, &props);
+ return result == VK_SUCCESS;
+}
+
+static bool check_surface_internal_formats_supported(
+ PGRAPHVkState *r, const SurfaceFormatInfo *fmts, size_t count)
+{
+ bool all_supported = true;
+ for (int i = 0; i < count; i++) {
+ const SurfaceFormatInfo *f = &fmts[i];
+ if (f->host_bytes_per_pixel) {
+ all_supported &=
+ check_format_and_usage_supported(r, f->vk_format, f->usage);
+ }
+ }
+ return all_supported;
+}
+
+void pgraph_vk_init_surfaces(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ // Make sure all surface format types are supported. We don't expect issue
+ // with these, and therefore have no fallback mechanism.
+ bool color_formats_supported = check_surface_internal_formats_supported(
+ r, kelvin_surface_color_format_vk_map,
+ ARRAY_SIZE(kelvin_surface_color_format_vk_map));
+ assert(color_formats_supported);
+
+ // Check if the device supports preferred VK_FORMAT_D24_UNORM_S8_UINT
+ // format, fall back to D32_SFLOAT_S8_UINT otherwise.
+ r->kelvin_surface_zeta_vk_map[NV097_SET_SURFACE_FORMAT_ZETA_Z16] = zeta_d16;
+ if (check_surface_internal_formats_supported(r, &zeta_d24_unorm_s8_uint,
+ 1)) {
+ r->kelvin_surface_zeta_vk_map[NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] =
+ zeta_d24_unorm_s8_uint;
+ } else if (check_surface_internal_formats_supported(
+ r, &zeta_d32_sfloat_s8_uint, 1)) {
+ r->kelvin_surface_zeta_vk_map[NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] =
+ zeta_d32_sfloat_s8_uint;
+ } else {
+ assert(!"No suitable depth-stencil format supported");
+ }
+
+ QTAILQ_INIT(&r->surfaces);
+ QTAILQ_INIT(&r->invalid_surfaces);
+
+ r->downloads_pending = false;
+ qemu_event_init(&r->downloads_complete, false);
+ qemu_event_init(&r->dirty_surfaces_download_complete, false);
+
+ r->color_binding = NULL;
+ r->zeta_binding = NULL;
+ r->framebuffer_dirty = true;
+
+ pgraph_vk_reload_surface_scale_factor(pg); // FIXME: Move internal
+}
+
+void pgraph_vk_finalize_surfaces(PGRAPHState *pg)
+{
+ pgraph_vk_surface_flush(container_of(pg, NV2AState, pgraph));
+}
+
+void pgraph_vk_surface_flush(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ // Clear last surface shape to force recreation of buffers at next draw
+ pg->surface_color.draw_dirty = false;
+ pg->surface_zeta.draw_dirty = false;
+ memset(&pg->last_surface_shape, 0, sizeof(pg->last_surface_shape));
+ unbind_surface(d, true);
+ unbind_surface(d, false);
+
+ SurfaceBinding *s, *next;
+ QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) {
+ invalidate_surface(d, s);
+ }
+ prune_invalid_surfaces(r, 0);
+
+ pgraph_vk_reload_surface_scale_factor(pg);
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/texture.c b/hw/xbox/nv2a/pgraph/vk/texture.c
new file mode 100644
index 0000000000..10a4ccd2e4
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/texture.c
@@ -0,0 +1,1456 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * Based on GL implementation:
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "hw/xbox/nv2a/pgraph/s3tc.h"
+#include "hw/xbox/nv2a/pgraph/swizzle.h"
+#include "qemu/fast-hash.h"
+#include "qemu/lru.h"
+#include "renderer.h"
+
+static void texture_cache_release_node_resources(PGRAPHVkState *r, TextureBinding *snode);
+
+static const VkImageType dimensionality_to_vk_image_type[] = {
+ 0,
+ VK_IMAGE_TYPE_1D,
+ VK_IMAGE_TYPE_2D,
+ VK_IMAGE_TYPE_3D,
+};
+static const VkImageViewType dimensionality_to_vk_image_view_type[] = {
+ 0,
+ VK_IMAGE_VIEW_TYPE_1D,
+ VK_IMAGE_VIEW_TYPE_2D,
+ VK_IMAGE_VIEW_TYPE_3D,
+};
+
+static VkSamplerAddressMode lookup_texture_address_mode(int idx)
+{
+ assert(0 < idx && idx < ARRAY_SIZE(pgraph_texture_addr_vk_map));
+ return pgraph_texture_addr_vk_map[idx];
+}
+
+// FIXME: Move to common
+// FIXME: We can shrink the size of this structure
+// FIXME: Use simple allocator
+typedef struct TextureLevel {
+ unsigned int width, height, depth;
+ hwaddr vram_addr;
+ void *decoded_data;
+ size_t decoded_size;
+} TextureLevel;
+
+typedef struct TextureLayer {
+ TextureLevel levels[16];
+} TextureLayer;
+
+typedef struct TextureLayout {
+ TextureLayer layers[6];
+} TextureLayout;
+
+// FIXME: Move to common
+static enum S3TC_DECOMPRESS_FORMAT kelvin_format_to_s3tc_format(int color_format)
+{
+ switch (color_format) {
+ case NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5:
+ return S3TC_DECOMPRESS_FORMAT_DXT1;
+ case NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8:
+ return S3TC_DECOMPRESS_FORMAT_DXT3;
+ case NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8:
+ return S3TC_DECOMPRESS_FORMAT_DXT5;
+ default:
+ assert(false);
+ }
+}
+
+// FIXME: Move to common
+static void memcpy_image(void *dst, void *src, int min_stride, int dst_stride, int src_stride, int height)
+{
+ uint8_t *dst_ptr = (uint8_t *)dst;
+ uint8_t *src_ptr = (uint8_t *)src;
+
+ for (int i = 0; i < height; i++) {
+ memcpy(dst_ptr, src_ptr, min_stride);
+ src_ptr += src_stride;
+ dst_ptr += dst_stride;
+ }
+}
+
+// FIXME: Move to common
+static size_t get_cubemap_layer_size(PGRAPHState *pg, TextureShape s)
+{
+ BasicColorFormatInfo f = kelvin_color_format_info_map[s.color_format];
+ bool is_compressed =
+ pgraph_is_texture_format_compressed(pg, s.color_format);
+ unsigned int block_size;
+
+ unsigned int w = s.width, h = s.height;
+ size_t length = 0;
+
+ if (!f.linear && s.border) {
+ w = MAX(16, w * 2);
+ h = MAX(16, h * 2);
+ }
+
+ if (is_compressed) {
+ block_size =
+ s.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5 ?
+ 8 :
+ 16;
+ }
+
+ for (int level = 0; level < s.levels; level++) {
+ if (is_compressed) {
+ length += w / 4 * h / 4 * block_size;
+ } else {
+ length += w * h * f.bytes_per_pixel;
+ }
+
+ w /= 2;
+ h /= 2;
+ }
+
+ return ROUND_UP(length, NV2A_CUBEMAP_FACE_ALIGNMENT);
+}
+
+// FIXME: Move to common
+// FIXME: More refactoring
+// FIXME: Possible parallelization of decoding
+// FIXME: Bounds checking
+static TextureLayout *get_texture_layout(PGRAPHState *pg, int texture_idx)
+{
+ NV2AState *d = container_of(pg, NV2AState, pgraph);
+ TextureShape s = pgraph_get_texture_shape(pg, texture_idx);
+ BasicColorFormatInfo f = kelvin_color_format_info_map[s.color_format];
+
+ NV2A_VK_DGROUP_BEGIN("Texture %d: cubemap=%d, dimensionality=%d, color_format=0x%x, levels=%d, width=%d, height=%d, depth=%d border=%d, min_mipmap_level=%d, max_mipmap_level=%d, pitch=%d",
+ texture_idx,
+ s.cubemap,
+ s.dimensionality,
+ s.color_format,
+ s.levels,
+ s.width,
+ s.height,
+ s.depth,
+ s.border,
+ s.min_mipmap_level,
+ s.max_mipmap_level,
+ s.pitch
+ );
+
+ // Sanity checks on below assumptions
+ if (f.linear) {
+ assert(s.dimensionality == 2);
+ }
+ if (s.cubemap) {
+ assert(s.dimensionality == 2);
+ assert(!f.linear);
+ }
+ assert(s.dimensionality > 1);
+
+ const hwaddr texture_vram_offset = pgraph_get_texture_phys_addr(pg, texture_idx);
+ void *texture_data_ptr = (char *)d->vram_ptr + texture_vram_offset;
+
+ size_t texture_palette_data_size;
+ const hwaddr texture_palette_vram_offset =
+ pgraph_get_texture_palette_phys_addr_length(pg, texture_idx,
+ &texture_palette_data_size);
+ void *palette_data_ptr = (char *)d->vram_ptr + texture_palette_vram_offset;
+
+ unsigned int adjusted_width = s.width, adjusted_height = s.height,
+ adjusted_pitch = s.pitch, adjusted_depth = s.depth;
+
+ if (!f.linear && s.border) {
+ adjusted_width = MAX(16, adjusted_width * 2);
+ adjusted_height = MAX(16, adjusted_height * 2);
+ adjusted_pitch = adjusted_width * (s.pitch / s.width);
+ adjusted_depth = MAX(16, s.depth * 2);
+ }
+
+ TextureLayout *layout = g_malloc0(sizeof(TextureLayout));
+
+ if (f.linear) {
+ assert(s.pitch % f.bytes_per_pixel == 0 && "Can't handle strides unaligned to pixels");
+
+ size_t converted_size;
+ uint8_t *converted = pgraph_convert_texture_data(
+ s, texture_data_ptr, palette_data_ptr, adjusted_width,
+ adjusted_height, 1, adjusted_pitch, 0, &converted_size);
+
+ if (!converted) {
+ int dst_stride = adjusted_width * f.bytes_per_pixel;
+ assert(adjusted_width <= s.width);
+ converted_size = dst_stride * adjusted_height;
+ converted = g_malloc(converted_size);
+ memcpy_image(converted, texture_data_ptr, adjusted_width * f.bytes_per_pixel, dst_stride,
+ adjusted_pitch, adjusted_height);
+ }
+
+ assert(s.levels == 1);
+ layout->layers[0].levels[0] = (TextureLevel){
+ .width = adjusted_width,
+ .height = adjusted_height,
+ .depth = 1,
+ .decoded_size = converted_size,
+ .decoded_data = converted,
+ };
+
+ NV2A_VK_DGROUP_END();
+ return layout;
+ }
+
+ bool is_compressed = pgraph_is_texture_format_compressed(pg, s.color_format);
+ size_t block_size = 0;
+ if (is_compressed) {
+ bool is_dxt1 =
+ s.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5;
+ block_size = is_dxt1 ? 8 : 16;
+ }
+
+ if (s.dimensionality == 2) {
+ hwaddr layer_size = 0;
+ if (s.cubemap) {
+ layer_size = get_cubemap_layer_size(pg, s);
+ }
+
+ const int num_layers = s.cubemap ? 6 : 1;
+ for (int layer = 0; layer < num_layers; layer++) {
+ unsigned int width = adjusted_width, height = adjusted_height;
+ texture_data_ptr = (char *)d->vram_ptr + texture_vram_offset +
+ layer * layer_size;
+
+ for (int level = 0; level < s.levels; level++) {
+ NV2A_VK_DPRINTF("Layer %d Level %d @ %x", layer, level, (int)((char*)texture_data_ptr - (char*)d->vram_ptr));
+
+ width = MAX(width, 1);
+ height = MAX(height, 1);
+ if (is_compressed) {
+ // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-block-compression#virtual-size-versus-physical-size
+ unsigned int tex_width = width, tex_height = height;
+ unsigned int physical_width = (width + 3) & ~3,
+ physical_height = (height + 3) & ~3;
+ // if (physical_width != width) {
+ // glPixelStorei(GL_UNPACK_ROW_LENGTH, physical_width);
+ // }
+
+ size_t converted_size = width * height * 4;
+ uint8_t *converted = s3tc_decompress_2d(
+ kelvin_format_to_s3tc_format(s.color_format),
+ texture_data_ptr, physical_width, physical_height);
+ assert(converted);
+
+ if (s.cubemap && adjusted_width != s.width) {
+ // FIXME: Consider preserving the border.
+ // There does not seem to be a way to reference the border
+ // texels in a cubemap, so they are discarded.
+
+ // glPixelStorei(GL_UNPACK_SKIP_PIXELS, 4);
+ // glPixelStorei(GL_UNPACK_SKIP_ROWS, 4);
+ tex_width = s.width;
+ tex_height = s.height;
+ // if (physical_width == width) {
+ // glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width);
+ // }
+
+ // FIXME: Crop by 4 pixels on each side
+ }
+
+ layout->layers[layer].levels[level] = (TextureLevel){
+ .width = tex_width,
+ .height = tex_height,
+ .depth = 1,
+ .decoded_size = converted_size,
+ .decoded_data = converted,
+ };
+
+ texture_data_ptr +=
+ physical_width / 4 * physical_height / 4 * block_size;
+ } else {
+ unsigned int pitch = width * f.bytes_per_pixel;
+ unsigned int tex_width = width, tex_height = height;
+
+ size_t converted_size = height * pitch;
+ uint8_t *unswizzled = (uint8_t*)g_malloc(height * pitch);
+ unswizzle_rect(texture_data_ptr, width, height,
+ unswizzled, pitch, f.bytes_per_pixel);
+
+ uint8_t *converted = pgraph_convert_texture_data(
+ s, unswizzled, palette_data_ptr, width, height, 1,
+ pitch, 0, &converted_size);
+
+ if (converted) {
+ g_free(unswizzled);
+ } else {
+ converted = unswizzled;
+ }
+
+ if (s.cubemap && adjusted_width != s.width) {
+ // FIXME: Consider preserving the border.
+ // There does not seem to be a way to reference the border
+ // texels in a cubemap, so they are discarded.
+ // glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width);
+ tex_width = s.width;
+ tex_height = s.height;
+ // pixel_data += 4 * f.bytes_per_pixel + 4 * pitch;
+
+ // FIXME: Crop by 4 pixels on each side
+ }
+
+ layout->layers[layer].levels[level] = (TextureLevel){
+ .width = tex_width,
+ .height = tex_height,
+ .depth = 1,
+ .decoded_size = converted_size,
+ .decoded_data = converted,
+ };
+
+ texture_data_ptr += width * height * f.bytes_per_pixel;
+ }
+
+ width /= 2;
+ height /= 2;
+ }
+ }
+ } else if (s.dimensionality == 3) {
+ assert(!f.linear);
+ unsigned int width = adjusted_width, height = adjusted_height,
+ depth = adjusted_depth;
+
+ for (int level = 0; level < s.levels; level++) {
+ if (is_compressed) {
+ assert(width % 4 == 0 && height % 4 == 0 &&
+ "Compressed 3D texture virtual size");
+
+ width = MAX(width, 4);
+ height = MAX(height, 4);
+ depth = MAX(depth, 1);
+
+ size_t converted_size = width * height * depth * 4;
+ uint8_t *converted = s3tc_decompress_3d(
+ kelvin_format_to_s3tc_format(s.color_format),
+ texture_data_ptr, width, height, depth);
+ assert(converted);
+
+ layout->layers[0].levels[level] = (TextureLevel){
+ .width = width,
+ .height = height,
+ .depth = depth,
+ .decoded_size = converted_size,
+ .decoded_data = converted,
+ };
+
+ texture_data_ptr += width / 4 * height / 4 * depth * block_size;
+ } else {
+ width = MAX(width, 1);
+ height = MAX(height, 1);
+ depth = MAX(depth, 1);
+
+ unsigned int row_pitch = width * f.bytes_per_pixel;
+ unsigned int slice_pitch = row_pitch * height;
+
+ size_t unswizzled_size = slice_pitch * depth;
+ uint8_t *unswizzled = g_malloc(unswizzled_size);
+ unswizzle_box(texture_data_ptr, width, height, depth,
+ unswizzled, row_pitch, slice_pitch,
+ f.bytes_per_pixel);
+
+ size_t converted_size;
+ uint8_t *converted = pgraph_convert_texture_data(
+ s, unswizzled, palette_data_ptr, width, height, depth,
+ row_pitch, slice_pitch, &converted_size);
+
+ if (converted) {
+ g_free(unswizzled);
+ } else {
+ converted = unswizzled;
+ converted_size = unswizzled_size;
+ }
+
+ layout->layers[0].levels[level] = (TextureLevel){
+ .width = width,
+ .height = height,
+ .depth = depth,
+ .decoded_size = converted_size,
+ .decoded_data = converted,
+ };
+
+ texture_data_ptr += width * height * depth * f.bytes_per_pixel;
+ }
+
+ width /= 2;
+ height /= 2;
+ depth /= 2;
+ }
+ }
+
+ NV2A_VK_DGROUP_END();
+ return layout;
+}
+
+struct pgraph_texture_possibly_dirty_struct {
+ hwaddr addr, end;
+};
+
+static void mark_textures_possibly_dirty_visitor(Lru *lru, LruNode *node, void *opaque)
+{
+ struct pgraph_texture_possibly_dirty_struct *test = opaque;
+
+ TextureBinding *tnode = container_of(node, TextureBinding, node);
+ if (tnode->possibly_dirty) {
+ return;
+ }
+
+ uintptr_t k_tex_addr = tnode->key.texture_vram_offset;
+ uintptr_t k_tex_end = k_tex_addr + tnode->key.texture_length - 1;
+ bool overlapping = !(test->addr > k_tex_end || k_tex_addr > test->end);
+
+ if (tnode->key.palette_length > 0) {
+ uintptr_t k_pal_addr = tnode->key.palette_vram_offset;
+ uintptr_t k_pal_end = k_pal_addr + tnode->key.palette_length - 1;
+ overlapping |= !(test->addr > k_pal_end || k_pal_addr > test->end);
+ }
+
+ tnode->possibly_dirty |= overlapping;
+}
+
+void pgraph_vk_mark_textures_possibly_dirty(NV2AState *d,
+ hwaddr addr, hwaddr size)
+{
+ hwaddr end = TARGET_PAGE_ALIGN(addr + size) - 1;
+ addr &= TARGET_PAGE_MASK;
+ assert(end <= memory_region_size(d->vram));
+
+ struct pgraph_texture_possibly_dirty_struct test = {
+ .addr = addr,
+ .end = end,
+ };
+
+ lru_visit_active(&d->pgraph.vk_renderer_state->texture_cache,
+ mark_textures_possibly_dirty_visitor,
+ &test);
+}
+
+static bool check_texture_dirty(NV2AState *d, hwaddr addr, hwaddr size)
+{
+ hwaddr end = TARGET_PAGE_ALIGN(addr + size);
+ addr &= TARGET_PAGE_MASK;
+ assert(end < memory_region_size(d->vram));
+ return memory_region_test_and_clear_dirty(d->vram, addr, end - addr,
+ DIRTY_MEMORY_NV2A_TEX);
+}
+
+// Check if any of the pages spanned by the a texture are dirty.
+static bool check_texture_possibly_dirty(NV2AState *d,
+ hwaddr texture_vram_offset,
+ unsigned int length,
+ hwaddr palette_vram_offset,
+ unsigned int palette_length)
+{
+ bool possibly_dirty = false;
+ if (check_texture_dirty(d, texture_vram_offset, length)) {
+ possibly_dirty = true;
+ pgraph_vk_mark_textures_possibly_dirty(d, texture_vram_offset, length);
+ }
+ if (palette_length && check_texture_dirty(d, palette_vram_offset,
+ palette_length)) {
+ possibly_dirty = true;
+ pgraph_vk_mark_textures_possibly_dirty(d, palette_vram_offset,
+ palette_length);
+ }
+ return possibly_dirty;
+}
+
+// FIXME: Make sure we update sampler when data matches. Should we add filtering
+// options to the textureshape?
+static void upload_texture_image(PGRAPHState *pg, int texture_idx,
+ TextureBinding *binding)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+ TextureShape *state = &binding->key.state;
+ VkColorFormatInfo vkf = kelvin_color_format_vk_map[state->color_format];
+
+ nv2a_profile_inc_counter(NV2A_PROF_TEX_UPLOAD);
+
+ g_autofree TextureLayout *layout = get_texture_layout(pg, texture_idx);
+ const int num_layers = state->cubemap ? 6 : 1;
+
+ // Calculate decoded texture data size
+ size_t texture_data_size = 0;
+ for (int layer_idx = 0; layer_idx < num_layers; layer_idx++) {
+ TextureLayer *layer = &layout->layers[layer_idx];
+ for (int level_idx = 0; level_idx < state->levels; level_idx++) {
+ size_t size = layer->levels[level_idx].decoded_size;
+ assert(size);
+ texture_data_size += size;
+ }
+ }
+
+ assert(texture_data_size <=
+ r->storage_buffers[BUFFER_STAGING_SRC].buffer_size);
+
+ // Copy texture data to mapped device buffer
+ uint8_t *mapped_memory_ptr;
+
+ VK_CHECK(vmaMapMemory(r->allocator,
+ r->storage_buffers[BUFFER_STAGING_SRC].allocation,
+ (void *)&mapped_memory_ptr));
+
+ int num_regions = num_layers * state->levels;
+ g_autofree VkBufferImageCopy *regions =
+ g_malloc0_n(num_regions, sizeof(VkBufferImageCopy));
+
+ VkBufferImageCopy *region = regions;
+ VkDeviceSize buffer_offset = 0;
+
+ for (int layer_idx = 0; layer_idx < num_layers; layer_idx++) {
+ TextureLayer *layer = &layout->layers[layer_idx];
+ NV2A_VK_DPRINTF("Layer %d", layer_idx);
+ for (int level_idx = 0; level_idx < state->levels; level_idx++) {
+ TextureLevel *level = &layer->levels[level_idx];
+ NV2A_VK_DPRINTF(" - Level %d, w=%d h=%d d=%d @ %08" HWADDR_PRIx,
+ level_idx, level->width, level->height,
+ level->depth, buffer_offset);
+ memcpy(mapped_memory_ptr + buffer_offset, level->decoded_data,
+ level->decoded_size);
+ *region = (VkBufferImageCopy){
+ .bufferOffset = buffer_offset,
+ .bufferRowLength = 0, // Tightly packed
+ .bufferImageHeight = 0,
+ .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .imageSubresource.mipLevel = level_idx,
+ .imageSubresource.baseArrayLayer = layer_idx,
+ .imageSubresource.layerCount = 1,
+ .imageOffset = (VkOffset3D){ 0, 0, 0 },
+ .imageExtent =
+ (VkExtent3D){ level->width, level->height, level->depth },
+ };
+ buffer_offset += level->decoded_size;
+ region++;
+ }
+ }
+ assert(buffer_offset <= texture_data_size);
+ vmaUnmapMemory(r->allocator,
+ r->storage_buffers[BUFFER_STAGING_SRC].allocation);
+
+ // FIXME: Use nondraw. Need to fill and copy tex buffer at once
+ VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg);
+
+ pgraph_vk_transition_image_layout(pg, cmd, binding->image, vkf.vk_format,
+ binding->current_layout,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+ binding->current_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+
+ vkCmdCopyBufferToImage(cmd, r->storage_buffers[BUFFER_STAGING_SRC].buffer,
+ binding->image, binding->current_layout,
+ num_regions, regions);
+
+ pgraph_vk_transition_image_layout(pg, cmd, binding->image, vkf.vk_format,
+ binding->current_layout,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+ binding->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+
+ nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_4);
+ pgraph_vk_end_single_time_commands(pg, cmd);
+
+ // Release decoded texture data
+ for (int layer_idx = 0; layer_idx < num_layers; layer_idx++) {
+ TextureLayer *layer = &layout->layers[layer_idx];
+ for (int level_idx = 0; level_idx < state->levels; level_idx++) {
+ g_free(layer->levels[level_idx].decoded_data);
+ }
+ }
+}
+
+static void copy_zeta_surface_to_texture(PGRAPHState *pg, SurfaceBinding *surface,
+ TextureBinding *texture)
+{
+ assert(!surface->color);
+
+ PGRAPHVkState *r = pg->vk_renderer_state;
+ TextureShape *state = &texture->key.state;
+ VkColorFormatInfo vkf = kelvin_color_format_vk_map[state->color_format];
+
+ nv2a_profile_inc_counter(NV2A_PROF_SURF_TO_TEX);
+
+ trace_nv2a_pgraph_surface_render_to_texture(
+ surface->vram_addr, surface->width, surface->height);
+
+ VkCommandBuffer cmd = pgraph_vk_begin_nondraw_commands(pg);
+
+ unsigned int scaled_width = surface->width,
+ scaled_height = surface->height;
+ pgraph_apply_scaling_factor(pg, &scaled_width, &scaled_height);
+
+ pgraph_vk_transition_image_layout(
+ pg, cmd, surface->image, surface->host_fmt.vk_format,
+ surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL :
+ VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
+
+ size_t copied_image_size =
+ scaled_width * scaled_height * surface->host_fmt.host_bytes_per_pixel;
+ size_t stencil_buffer_offset = 0;
+ size_t stencil_buffer_size = 0;
+
+ int num_regions = 0;
+ VkBufferImageCopy regions[2];
+ regions[num_regions++] = (VkBufferImageCopy){
+ .bufferOffset = 0,
+ .bufferRowLength = 0, // Tightly packed
+ .bufferImageHeight = 0, // Tightly packed
+ .imageSubresource.aspectMask = surface->color ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT,
+ .imageSubresource.mipLevel = 0,
+ .imageSubresource.baseArrayLayer = 0,
+ .imageSubresource.layerCount = 1,
+ .imageOffset = (VkOffset3D){0, 0, 0},
+ .imageExtent = (VkExtent3D){scaled_width, scaled_height, 1},
+ };
+
+ if (surface->host_fmt.aspect & VK_IMAGE_ASPECT_STENCIL_BIT) {
+ stencil_buffer_offset = scaled_width * scaled_height * 4;
+ stencil_buffer_size = scaled_width * scaled_height;
+ copied_image_size += stencil_buffer_size;
+
+ regions[num_regions++] = (VkBufferImageCopy){
+ .bufferOffset = stencil_buffer_offset,
+ .bufferRowLength = 0, // Tightly packed
+ .bufferImageHeight = 0, // Tightly packed
+ .imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT,
+ .imageSubresource.mipLevel = 0,
+ .imageSubresource.baseArrayLayer = 0,
+ .imageSubresource.layerCount = 1,
+ .imageOffset = (VkOffset3D){0, 0, 0},
+ .imageExtent = (VkExtent3D){scaled_width, scaled_height, 1},
+ };
+ }
+
+ bool use_compute_to_convert_depth_stencil =
+ surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
+ surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT;
+ assert(use_compute_to_convert_depth_stencil && "Unimplemented");
+
+ StorageBuffer *dst_storage_buffer = &r->storage_buffers[BUFFER_COMPUTE_DST];
+ assert(dst_storage_buffer->buffer_size >= copied_image_size);
+
+ vkCmdCopyImageToBuffer(
+ cmd, surface->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+ dst_storage_buffer->buffer,
+ num_regions, regions);
+
+ if (use_compute_to_convert_depth_stencil) {
+ size_t packed_image_size = scaled_width * scaled_height * 4;
+
+ VkBufferMemoryBarrier pre_pack_barrier = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+ .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = r->storage_buffers[BUFFER_COMPUTE_DST].buffer,
+ .size = VK_WHOLE_SIZE
+ };
+ vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT,
+ VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL,
+ 1, &pre_pack_barrier, 0, NULL);
+
+ pgraph_vk_pack_depth_stencil(
+ pg, surface, cmd,
+ r->storage_buffers[BUFFER_COMPUTE_DST].buffer,
+ r->storage_buffers[BUFFER_COMPUTE_SRC].buffer, false);
+
+ VkBufferMemoryBarrier post_pack_barrier = {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+ .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+ .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = r->storage_buffers[BUFFER_COMPUTE_SRC].buffer,
+ .size = packed_image_size
+ };
+ vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
+ VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
+ &post_pack_barrier, 0, NULL);
+
+ pgraph_vk_transition_image_layout(pg, cmd, texture->image, vkf.vk_format,
+ texture->current_layout,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+ texture->current_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+
+ regions[0] = (VkBufferImageCopy){
+ .bufferOffset = 0,
+ .bufferRowLength = 0,
+ .bufferImageHeight = 0,
+ .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .imageSubresource.mipLevel = 0,
+ .imageSubresource.baseArrayLayer = 0,
+ .imageSubresource.layerCount = 1,
+ .imageOffset = (VkOffset3D){ 0, 0, 0 },
+ .imageExtent = (VkExtent3D){ scaled_width, scaled_height, 1 },
+ };
+
+ vkCmdCopyBufferToImage(
+ cmd, r->storage_buffers[BUFFER_COMPUTE_SRC].buffer, texture->image,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, regions);
+ }
+
+ pgraph_vk_transition_image_layout(
+ pg, cmd, surface->image, surface->host_fmt.vk_format,
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+ surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL :
+ VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
+
+ pgraph_vk_transition_image_layout(pg, cmd, texture->image, vkf.vk_format,
+ texture->current_layout,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+ texture->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+
+ pgraph_vk_end_nondraw_commands(pg, cmd);
+
+ texture->draw_time = surface->draw_time;
+}
+
+// FIXME: Should be able to skip the copy and sample the original surface image
+static void copy_surface_to_texture(PGRAPHState *pg, SurfaceBinding *surface,
+ TextureBinding *texture)
+{
+ if (!surface->color) {
+ copy_zeta_surface_to_texture(pg, surface, texture);
+ return;
+ }
+
+ TextureShape *state = &texture->key.state;
+ VkColorFormatInfo vkf = kelvin_color_format_vk_map[state->color_format];
+
+ nv2a_profile_inc_counter(NV2A_PROF_SURF_TO_TEX);
+
+ trace_nv2a_pgraph_surface_render_to_texture(
+ surface->vram_addr, surface->width, surface->height);
+
+ VkCommandBuffer cmd = pgraph_vk_begin_nondraw_commands(pg);
+
+ pgraph_vk_transition_image_layout(
+ pg, cmd, surface->image, surface->host_fmt.vk_format,
+ surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL :
+ VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
+
+ pgraph_vk_transition_image_layout(pg, cmd, texture->image, vkf.vk_format,
+ texture->current_layout,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+ texture->current_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+
+ VkImageCopy region = {
+ .srcSubresource.aspectMask = surface->host_fmt.aspect,
+ .srcSubresource.layerCount = 1,
+ .dstSubresource.aspectMask = surface->host_fmt.aspect,
+ .dstSubresource.layerCount = 1,
+ .extent.width = surface->width,
+ .extent.height = surface->height,
+ .extent.depth = 1,
+ };
+ pgraph_apply_scaling_factor(pg, ®ion.extent.width,
+ ®ion.extent.height);
+ vkCmdCopyImage(cmd, surface->image,
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, texture->image,
+ texture->current_layout, 1, ®ion);
+
+ pgraph_vk_transition_image_layout(
+ pg, cmd, surface->image, surface->host_fmt.vk_format,
+ VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+ surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL :
+ VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
+
+ pgraph_vk_transition_image_layout(pg, cmd, texture->image, vkf.vk_format,
+ texture->current_layout,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+ texture->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+
+ pgraph_vk_end_nondraw_commands(pg, cmd);
+
+ texture->draw_time = surface->draw_time;
+}
+
+static bool check_surface_to_texture_compatiblity(const SurfaceBinding *surface,
+ const TextureShape *shape)
+{
+ // FIXME: Better checks/handling on formats and surface-texture compat
+
+ if ((!surface->swizzle && surface->pitch != shape->pitch) ||
+ surface->width != shape->width ||
+ surface->height != shape->height) {
+ return false;
+ }
+
+ int surface_fmt = surface->shape.color_format;
+ int texture_fmt = shape->color_format;
+
+ if (!surface->color) {
+ if (surface->shape.zeta_format == NV097_SET_SURFACE_FORMAT_ZETA_Z24S8) {
+ return true;
+ }
+ return false;
+ }
+
+ if (shape->cubemap) {
+ // FIXME: Support rendering surface to cubemap face
+ return false;
+ }
+
+ if (shape->levels > 1) {
+ // FIXME: Support rendering surface to mip levels
+ return false;
+ }
+
+ switch (surface_fmt) {
+ case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5: switch (texture_fmt) {
+ case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5: return true;
+ default: break;
+ }
+ break;
+ case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5: switch (texture_fmt) {
+ case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5: return true;
+ case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5: return true;
+ default: break;
+ }
+ break;
+ case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8: switch(texture_fmt) {
+ case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8: return true;
+ case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8: return true;
+ default: break;
+ }
+ break;
+ case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: switch (texture_fmt) {
+ case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8: return true;
+ case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8: return true;
+ case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8: return true;
+ case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8: return true;
+ default: break;
+ }
+ break;
+ default:
+ break;
+ }
+
+ trace_nv2a_pgraph_surface_texture_compat_failed(
+ surface_fmt, texture_fmt);
+ return false;
+}
+
+static void create_dummy_texture(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ VkImageCreateInfo image_create_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .imageType = VK_IMAGE_TYPE_2D,
+ .extent.width = 16,
+ .extent.height = 16,
+ .extent.depth = 1,
+ .mipLevels = 1,
+ .arrayLayers = 1,
+ .format = VK_FORMAT_R8_UNORM,
+ .tiling = VK_IMAGE_TILING_OPTIMAL,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .flags = 0,
+ };
+
+ VmaAllocationCreateInfo alloc_create_info = {
+ .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
+ };
+
+ VkImage texture_image;
+ VmaAllocation texture_allocation;
+
+ VK_CHECK(vmaCreateImage(r->allocator, &image_create_info,
+ &alloc_create_info, &texture_image,
+ &texture_allocation, NULL));
+
+ VkImageViewCreateInfo image_view_create_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = texture_image,
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = VK_FORMAT_R8_UNORM,
+ .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .subresourceRange.baseMipLevel = 0,
+ .subresourceRange.levelCount = image_create_info.mipLevels,
+ .subresourceRange.baseArrayLayer = 0,
+ .subresourceRange.layerCount = image_create_info.arrayLayers,
+ .components = (VkComponentMapping){ VK_COMPONENT_SWIZZLE_R,
+ VK_COMPONENT_SWIZZLE_R,
+ VK_COMPONENT_SWIZZLE_R,
+ VK_COMPONENT_SWIZZLE_R },
+ };
+ VkImageView texture_image_view;
+ VK_CHECK(vkCreateImageView(r->device, &image_view_create_info, NULL,
+ &texture_image_view));
+
+ VkSamplerCreateInfo sampler_create_info = {
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ .magFilter = VK_FILTER_NEAREST,
+ .minFilter = VK_FILTER_NEAREST,
+ .addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT,
+ .addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT,
+ .addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT,
+ .anisotropyEnable = VK_FALSE,
+ .borderColor = VK_BORDER_COLOR_INT_OPAQUE_WHITE,
+ .unnormalizedCoordinates = VK_FALSE,
+ .compareEnable = VK_FALSE,
+ .compareOp = VK_COMPARE_OP_ALWAYS,
+ .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
+ };
+
+ VkSampler texture_sampler;
+ VK_CHECK(vkCreateSampler(r->device, &sampler_create_info, NULL,
+ &texture_sampler));
+
+ // Copy texture data to mapped device buffer
+ uint8_t *mapped_memory_ptr;
+ size_t texture_data_size =
+ image_create_info.extent.width * image_create_info.extent.height;
+
+ VK_CHECK(vmaMapMemory(r->allocator,
+ r->storage_buffers[BUFFER_STAGING_SRC].allocation,
+ (void *)&mapped_memory_ptr));
+ memset(mapped_memory_ptr, 0xff, texture_data_size);
+ vmaUnmapMemory(r->allocator,
+ r->storage_buffers[BUFFER_STAGING_SRC].allocation);
+
+ VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg);
+
+ pgraph_vk_transition_image_layout(
+ pg, cmd, texture_image, VK_FORMAT_R8_UNORM, VK_IMAGE_LAYOUT_UNDEFINED,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+
+ VkBufferImageCopy region = {
+ .bufferOffset = 0,
+ .bufferRowLength = 0,
+ .bufferImageHeight = 0,
+ .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .imageSubresource.mipLevel = 0,
+ .imageSubresource.baseArrayLayer = 0,
+ .imageSubresource.layerCount = 1,
+ .imageOffset = (VkOffset3D){ 0, 0, 0 },
+ .imageExtent = (VkExtent3D){ image_create_info.extent.width,
+ image_create_info.extent.height, 1 },
+ };
+ vkCmdCopyBufferToImage(cmd, r->storage_buffers[BUFFER_STAGING_SRC].buffer,
+ texture_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ 1, ®ion);
+
+ pgraph_vk_transition_image_layout(pg, cmd, texture_image,
+ VK_FORMAT_R8_UNORM,
+ VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+ VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+ pgraph_vk_end_single_time_commands(pg, cmd);
+
+ r->dummy_texture = (TextureBinding){
+ .key.scale = 1.0,
+ .image = texture_image,
+ .current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+ .allocation = texture_allocation,
+ .image_view = texture_image_view,
+ .sampler = texture_sampler,
+ };
+}
+
+static void destroy_dummy_texture(PGRAPHVkState *r)
+{
+ texture_cache_release_node_resources(r, &r->dummy_texture);
+}
+
+static void set_texture_label(PGRAPHState *pg, TextureBinding *texture)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ g_autofree gchar *label = g_strdup_printf(
+ "Texture %" HWADDR_PRIx "h fmt:%02xh %dx%dx%d lvls:%d",
+ texture->key.texture_vram_offset, texture->key.state.color_format,
+ texture->key.state.width, texture->key.state.height,
+ texture->key.state.depth, texture->key.state.levels);
+
+ VkDebugUtilsObjectNameInfoEXT name_info = {
+ .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
+ .objectType = VK_OBJECT_TYPE_IMAGE,
+ .objectHandle = (uint64_t)texture->image,
+ .pObjectName = label,
+ };
+
+ if (r->debug_utils_extension_enabled) {
+ vkSetDebugUtilsObjectNameEXT(r->device, &name_info);
+ }
+ vmaSetAllocationName(r->allocator, texture->allocation, label);
+}
+
+static void create_texture(PGRAPHState *pg, int texture_idx)
+{
+ NV2A_VK_DGROUP_BEGIN("Creating texture %d", texture_idx);
+
+ NV2AState *d = container_of(pg, NV2AState, pgraph);
+ PGRAPHVkState *r = pg->vk_renderer_state;
+ TextureShape state = pgraph_get_texture_shape(pg, texture_idx); // FIXME: Check for pad issues
+ BasicColorFormatInfo f_basic = kelvin_color_format_info_map[state.color_format];
+
+ const hwaddr texture_vram_offset = pgraph_get_texture_phys_addr(pg, texture_idx);
+ size_t texture_palette_data_size;
+ const hwaddr texture_palette_vram_offset =
+ pgraph_get_texture_palette_phys_addr_length(pg, texture_idx,
+ &texture_palette_data_size);
+
+ size_t texture_length = pgraph_get_texture_length(pg, &state);
+
+ TextureKey key;
+ memset(&key, 0, sizeof(key));
+ key.state = state;
+ key.texture_vram_offset = texture_vram_offset;
+ key.texture_length = texture_length;
+ key.palette_vram_offset = texture_palette_vram_offset;
+ key.palette_length = texture_palette_data_size;
+ key.scale = 1;
+
+ bool is_indexed = (state.color_format ==
+ NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8);
+
+ bool possibly_dirty = false;
+ bool possibly_dirty_checked = false;
+ bool surface_to_texture = false;
+
+ // Check active surfaces to see if this texture was a render target
+ SurfaceBinding *surface = pgraph_vk_surface_get(d, texture_vram_offset);
+ if (surface && state.levels == 1) {
+ surface_to_texture =
+ check_surface_to_texture_compatiblity(surface, &state);
+
+ if (surface_to_texture && surface->upload_pending) {
+ pgraph_vk_upload_surface_data(d, surface, false);
+ }
+ }
+
+ if (!surface_to_texture) {
+ // FIXME: Restructure to support rendering surfaces to cubemap faces
+
+ // Writeback any surfaces which this texture may index
+ hwaddr tex_vram_end = texture_vram_offset + texture_length - 1;
+ QTAILQ_FOREACH(surface, &r->surfaces, entry) {
+ hwaddr surf_vram_end = surface->vram_addr + surface->size - 1;
+ bool overlapping = !(surface->vram_addr >= tex_vram_end
+ || texture_vram_offset >= surf_vram_end);
+ if (overlapping) {
+ pgraph_vk_surface_download_if_dirty(d, surface);
+ }
+ }
+ }
+
+ if (surface_to_texture && pg->surface_scale_factor > 1) {
+ key.scale = pg->surface_scale_factor;
+ }
+
+ uint64_t key_hash = fast_hash((void*)&key, sizeof(key));
+ LruNode *node = lru_lookup(&r->texture_cache, key_hash, &key);
+ TextureBinding *snode = container_of(node, TextureBinding, node);
+ bool binding_found = snode->image != VK_NULL_HANDLE;
+
+ if (binding_found) {
+ NV2A_VK_DPRINTF("Cache hit");
+ r->texture_bindings[texture_idx] = snode;
+ possibly_dirty |= snode->possibly_dirty;
+ } else {
+ possibly_dirty = true;
+ }
+
+ if (!surface_to_texture && !possibly_dirty_checked) {
+ possibly_dirty |= check_texture_possibly_dirty(
+ d, texture_vram_offset, texture_length, texture_palette_vram_offset,
+ texture_palette_data_size);
+ }
+
+ // Calculate hash of texture data, if necessary
+ void *texture_data = (char*)d->vram_ptr + texture_vram_offset;
+ void *palette_data = (char*)d->vram_ptr + texture_palette_vram_offset;
+
+ uint64_t content_hash = 0;
+ if (!surface_to_texture && possibly_dirty) {
+ content_hash = fast_hash(texture_data, texture_length);
+ if (is_indexed) {
+ content_hash ^= fast_hash(palette_data, texture_palette_data_size);
+ }
+ }
+
+ if (binding_found) {
+ if (surface_to_texture) {
+ // FIXME: Add draw time tracking
+ if (surface->draw_time != snode->draw_time) {
+ copy_surface_to_texture(pg, surface, snode);
+ }
+ } else {
+ if (possibly_dirty && content_hash != snode->hash) {
+ upload_texture_image(pg, texture_idx, snode);
+ snode->hash = content_hash;
+ }
+ }
+
+ NV2A_VK_DGROUP_END();
+ return;
+ }
+
+ NV2A_VK_DPRINTF("Cache miss");
+
+ memcpy(&snode->key, &key, sizeof(key));
+ snode->current_layout = VK_IMAGE_LAYOUT_UNDEFINED;
+ snode->possibly_dirty = false;
+ snode->hash = content_hash;
+
+ VkColorFormatInfo vkf = kelvin_color_format_vk_map[state.color_format];
+ assert(vkf.vk_format != 0);
+ assert(0 < state.dimensionality);
+ assert(state.dimensionality < ARRAY_SIZE(dimensionality_to_vk_image_type));
+ assert(state.dimensionality <
+ ARRAY_SIZE(dimensionality_to_vk_image_view_type));
+
+ VkImageCreateInfo image_create_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+ .imageType = dimensionality_to_vk_image_type[state.dimensionality],
+ .extent.width = state.width, // FIXME: Use adjusted size?
+ .extent.height = state.height,
+ .extent.depth = state.depth,
+ .mipLevels = f_basic.linear ? 1 : state.levels,
+ .arrayLayers = state.cubemap ? 6 : 1,
+ .format = vkf.vk_format,
+ .tiling = VK_IMAGE_TILING_OPTIMAL,
+ .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+ .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
+ .samples = VK_SAMPLE_COUNT_1_BIT,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ .flags = (state.cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0),
+ };
+
+ if (surface_to_texture) {
+ pgraph_apply_scaling_factor(pg, &image_create_info.extent.width,
+ &image_create_info.extent.height);
+ }
+
+ VmaAllocationCreateInfo alloc_create_info = {
+ .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
+ };
+
+ VK_CHECK(vmaCreateImage(r->allocator, &image_create_info,
+ &alloc_create_info, &snode->image,
+ &snode->allocation, NULL));
+
+ VkImageViewCreateInfo image_view_create_info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = snode->image,
+ .viewType = state.cubemap ?
+ VK_IMAGE_VIEW_TYPE_CUBE :
+ dimensionality_to_vk_image_view_type[state.dimensionality],
+ .format = vkf.vk_format,
+ .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .subresourceRange.baseMipLevel = 0,
+ .subresourceRange.levelCount = image_create_info.mipLevels,
+ .subresourceRange.baseArrayLayer = 0,
+ .subresourceRange.layerCount = image_create_info.arrayLayers,
+ .components = vkf.component_map,
+ };
+
+ VK_CHECK(vkCreateImageView(r->device, &image_view_create_info, NULL,
+ &snode->image_view));
+
+
+ void *sampler_next_struct = NULL;
+
+ VkSamplerCustomBorderColorCreateInfoEXT custom_border_color_create_info;
+ VkBorderColor vk_border_color;
+ uint32_t border_color_pack32 =
+ pgraph_reg_r(pg, NV_PGRAPH_BORDERCOLOR0 + texture_idx * 4);
+
+ if (r->custom_border_color_extension_enabled) {
+ float border_color_rgba[4];
+ pgraph_argb_pack32_to_rgba_float(border_color_pack32, border_color_rgba);
+
+ custom_border_color_create_info =
+ (VkSamplerCustomBorderColorCreateInfoEXT){
+ .sType =
+ VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT,
+ .customBorderColor.float32 = { border_color_rgba[0],
+ border_color_rgba[1],
+ border_color_rgba[2],
+ border_color_rgba[3] },
+ .format = image_view_create_info.format,
+ .pNext = sampler_next_struct
+ };
+
+ vk_border_color = VK_BORDER_COLOR_FLOAT_CUSTOM_EXT;
+ sampler_next_struct = &custom_border_color_create_info;
+ } else {
+ // FIXME: Handle custom color in shader
+ if (border_color_pack32 == 0x00000000) {
+ vk_border_color = VK_BORDER_COLOR_INT_TRANSPARENT_BLACK;
+ } else if (border_color_pack32 == 0xff000000) {
+ vk_border_color = VK_BORDER_COLOR_INT_OPAQUE_BLACK;
+ } else {
+ vk_border_color = VK_BORDER_COLOR_INT_OPAQUE_WHITE;
+ }
+ }
+
+ uint32_t filter = pgraph_reg_r(pg, NV_PGRAPH_TEXFILTER0 + texture_idx * 4);
+ if (filter & NV_PGRAPH_TEXFILTER0_ASIGNED)
+ NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_ASIGNED");
+ if (filter & NV_PGRAPH_TEXFILTER0_RSIGNED)
+ NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_RSIGNED");
+ if (filter & NV_PGRAPH_TEXFILTER0_GSIGNED)
+ NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_GSIGNED");
+ if (filter & NV_PGRAPH_TEXFILTER0_BSIGNED)
+ NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_BSIGNED");
+
+ unsigned int mag_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG);
+ assert(mag_filter < ARRAY_SIZE(pgraph_texture_mag_filter_vk_map));
+
+ unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN);
+ assert(min_filter < ARRAY_SIZE(pgraph_texture_min_filter_vk_map));
+
+ bool mipmap_nearest =
+ f_basic.linear || image_create_info.mipLevels == 1 ||
+ min_filter == NV_PGRAPH_TEXFILTER0_MIN_BOX_NEARESTLOD ||
+ min_filter == NV_PGRAPH_TEXFILTER0_MIN_TENT_NEARESTLOD;
+
+ uint32_t address =
+ pgraph_reg_r(pg, NV_PGRAPH_TEXADDRESS0 + texture_idx * 4);
+
+ VkSamplerCreateInfo sampler_create_info = {
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ .magFilter = VK_FILTER_LINEAR, // FIXME
+ .minFilter = VK_FILTER_LINEAR, // FIXME
+ .addressModeU = lookup_texture_address_mode(
+ GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRU)),
+ .addressModeV = lookup_texture_address_mode(
+ GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRV)),
+ .addressModeW = lookup_texture_address_mode(
+ GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRP)),
+ .anisotropyEnable = VK_FALSE,
+ // .anisotropyEnable = VK_TRUE,
+ // .maxAnisotropy = properties.limits.maxSamplerAnisotropy,
+ .borderColor = vk_border_color,
+ .unnormalizedCoordinates = f_basic.linear ? VK_TRUE : VK_FALSE,
+ .compareEnable = VK_FALSE,
+ .compareOp = VK_COMPARE_OP_ALWAYS,
+ .mipmapMode = mipmap_nearest ? VK_SAMPLER_MIPMAP_MODE_NEAREST :
+ VK_SAMPLER_MIPMAP_MODE_LINEAR,
+ .minLod = 0.0,
+ .maxLod = f_basic.linear ? 0.0 : image_create_info.mipLevels,
+ .mipLodBias = 0.0,
+ .pNext = sampler_next_struct,
+ };
+
+ VK_CHECK(vkCreateSampler(r->device, &sampler_create_info, NULL,
+ &snode->sampler));
+
+ set_texture_label(pg, snode);
+
+ r->texture_bindings[texture_idx] = snode;
+
+ if (surface_to_texture) {
+ copy_surface_to_texture(pg, surface, snode);
+ } else {
+ upload_texture_image(pg, texture_idx, snode);
+ snode->draw_time = 0;
+ }
+
+ NV2A_VK_DGROUP_END();
+}
+
+static bool check_textures_dirty(PGRAPHState *pg)
+{
+ for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
+ if (pg->texture_dirty[i]) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static void update_timestamps(PGRAPHVkState *r)
+{
+ for (int i = 0; i < ARRAY_SIZE(r->texture_bindings); i++) {
+ if (r->texture_bindings[i]) {
+ r->texture_bindings[i]->submit_time = r->submit_count;
+ }
+ }
+}
+
+void pgraph_vk_bind_textures(NV2AState *d)
+{
+ NV2A_VK_DGROUP_BEGIN("%s", __func__);
+
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ // FIXME: Check for modifications on bind fastpath (CPU hook)
+ // FIXME: Mark textures that are sourced from surfaces so we can track them
+
+ r->texture_bindings_changed = false;
+
+ if (!check_textures_dirty(pg)) {
+ NV2A_VK_DPRINTF("Not dirty");
+ NV2A_VK_DGROUP_END();
+ update_timestamps(r);
+ return;
+ }
+
+ for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
+ if (!pgraph_is_texture_enabled(pg, i)) {
+ r->texture_bindings[i] = &r->dummy_texture;
+ continue;
+ }
+ if (!pg->texture_dirty[i]) { // FIXME: Fails to check memory
+ continue;
+ }
+
+ create_texture(pg, i);
+
+ pg->texture_dirty[i] = false; // FIXME: Move to renderer?
+ }
+
+ r->texture_bindings_changed = true;
+ update_timestamps(r);
+ NV2A_VK_DGROUP_END();
+}
+
+static void texture_cache_entry_init(Lru *lru, LruNode *node, void *state)
+{
+ TextureBinding *snode = container_of(node, TextureBinding, node);
+
+ snode->image = VK_NULL_HANDLE;
+ snode->allocation = VK_NULL_HANDLE;
+ snode->image_view = VK_NULL_HANDLE;
+ snode->sampler = VK_NULL_HANDLE;
+}
+
+static void texture_cache_release_node_resources(PGRAPHVkState *r, TextureBinding *snode)
+{
+ vkDestroySampler(r->device, snode->sampler, NULL);
+ snode->sampler = VK_NULL_HANDLE;
+
+ vkDestroyImageView(r->device, snode->image_view, NULL);
+ snode->image_view = VK_NULL_HANDLE;
+
+ vmaDestroyImage(r->allocator, snode->image, snode->allocation);
+ snode->image = VK_NULL_HANDLE;
+ snode->allocation = VK_NULL_HANDLE;
+}
+
+static bool texture_cache_entry_pre_evict(Lru *lru, LruNode *node)
+{
+ PGRAPHVkState *r = container_of(lru, PGRAPHVkState, texture_cache);
+ TextureBinding *snode = container_of(node, TextureBinding, node);
+
+ // FIXME: Simplify. We don't really need to check bindings
+
+
+ // Currently bound
+ for (int i = 0; i < ARRAY_SIZE(r->texture_bindings); i++) {
+ if (r->texture_bindings[i] == snode) {
+ return false;
+ }
+ }
+
+ // Used in command buffer
+ if (r->in_command_buffer && snode->submit_time == r->submit_count) {
+ return false;
+ }
+
+ return true;
+}
+
+static void texture_cache_entry_post_evict(Lru *lru, LruNode *node)
+{
+ PGRAPHVkState *r = container_of(lru, PGRAPHVkState, texture_cache);
+ TextureBinding *snode = container_of(node, TextureBinding, node);
+ texture_cache_release_node_resources(r, snode);
+}
+
+static bool texture_cache_entry_compare(Lru *lru, LruNode *node, void *key)
+{
+ TextureBinding *snode = container_of(node, TextureBinding, node);
+ return memcmp(&snode->key, key, sizeof(TextureKey));
+}
+
+static void texture_cache_init(PGRAPHVkState *r)
+{
+ const size_t texture_cache_size = 1024;
+ lru_init(&r->texture_cache);
+ r->texture_cache_entries = g_malloc_n(texture_cache_size, sizeof(TextureBinding));
+ assert(r->texture_cache_entries != NULL);
+ for (int i = 0; i < texture_cache_size; i++) {
+ lru_add_free(&r->texture_cache, &r->texture_cache_entries[i].node);
+ }
+ r->texture_cache.init_node = texture_cache_entry_init;
+ r->texture_cache.compare_nodes = texture_cache_entry_compare;
+ r->texture_cache.pre_node_evict = texture_cache_entry_pre_evict;
+ r->texture_cache.post_node_evict = texture_cache_entry_post_evict;
+}
+
+static void texture_cache_finalize(PGRAPHVkState *r)
+{
+ lru_flush(&r->texture_cache);
+ g_free(r->texture_cache_entries);
+ r->texture_cache_entries = NULL;
+}
+
+void pgraph_vk_trim_texture_cache(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ // FIXME: Allow specifying some amount to trim by
+
+ int num_to_evict = r->texture_cache.num_used / 4;
+ int num_evicted = 0;
+
+ while (num_to_evict-- && lru_try_evict_one(&r->texture_cache)) {
+ num_evicted += 1;
+ }
+
+ NV2A_VK_DPRINTF("Evicted %d textures, %d remain", num_evicted, r->texture_cache.num_used);
+}
+
+void pgraph_vk_init_textures(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ texture_cache_init(r);
+ create_dummy_texture(pg);
+}
+
+void pgraph_vk_finalize_textures(PGRAPHState *pg)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ destroy_dummy_texture(r);
+ texture_cache_finalize(r);
+
+ for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
+ r->texture_bindings[i] = NULL;
+ }
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/vertex.c b/hw/xbox/nv2a/pgraph/vk/vertex.c
new file mode 100644
index 0000000000..6625520c65
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/vertex.c
@@ -0,0 +1,312 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * Based on GL implementation:
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "renderer.h"
+
+VkDeviceSize pgraph_vk_update_index_buffer(PGRAPHState *pg, void *data,
+ VkDeviceSize size)
+{
+ nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_2);
+ return pgraph_vk_append_to_buffer(pg, BUFFER_INDEX_STAGING, &data, &size, 1,
+ 1);
+}
+
+VkDeviceSize pgraph_vk_update_vertex_inline_buffer(PGRAPHState *pg, void **data,
+ VkDeviceSize *sizes,
+ size_t count)
+{
+ nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_3);
+ return pgraph_vk_append_to_buffer(pg, BUFFER_VERTEX_INLINE_STAGING, data,
+ sizes, count, 1);
+}
+
+void pgraph_vk_update_vertex_ram_buffer(PGRAPHState *pg, hwaddr offset,
+ void *data, VkDeviceSize size)
+{
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ size_t offset_bit = offset / 4096;
+ size_t nbits = size / 4096;
+ if (find_next_bit(r->uploaded_bitmap, nbits, offset_bit) < nbits) {
+ // Vertex data changed while building the draw list. Finish drawing
+ // before updating RAM buffer.
+ pgraph_vk_finish(pg, VK_FINISH_REASON_VERTEX_BUFFER_DIRTY);
+ }
+
+ nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_1);
+ memcpy(r->storage_buffers[BUFFER_VERTEX_RAM].mapped + offset, data, size);
+
+ bitmap_set(r->uploaded_bitmap, offset_bit, nbits);
+}
+
+static void update_memory_buffer(NV2AState *d, hwaddr addr, hwaddr size)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ assert(r->num_vertex_ram_buffer_syncs <
+ ARRAY_SIZE(r->vertex_ram_buffer_syncs));
+ r->vertex_ram_buffer_syncs[r->num_vertex_ram_buffer_syncs++] =
+ (MemorySyncRequirement){ .addr = addr, .size = size };
+}
+
+static const VkFormat float_to_count[] = {
+ VK_FORMAT_R32_SFLOAT,
+ VK_FORMAT_R32G32_SFLOAT,
+ VK_FORMAT_R32G32B32_SFLOAT,
+ VK_FORMAT_R32G32B32A32_SFLOAT,
+};
+
+static const VkFormat ub_to_count[] = {
+ VK_FORMAT_R8_UNORM,
+ VK_FORMAT_R8G8_UNORM,
+ VK_FORMAT_R8G8B8_UNORM,
+ VK_FORMAT_R8G8B8A8_UNORM,
+};
+
+static const VkFormat s1_to_count[] = {
+ VK_FORMAT_R16_SNORM,
+ VK_FORMAT_R16G16_SNORM,
+ VK_FORMAT_R16G16B16_SNORM,
+ VK_FORMAT_R16G16B16A16_SNORM,
+};
+
+static const VkFormat s32k_to_count[] = {
+ VK_FORMAT_R16_SSCALED,
+ VK_FORMAT_R16G16_SSCALED,
+ VK_FORMAT_R16G16B16_SSCALED,
+ VK_FORMAT_R16G16B16A16_SSCALED,
+};
+
+static char const * const vertex_data_array_format_to_str[] = {
+ [NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D] = "UB_D3D",
+ [NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL] = "UB_OGL",
+ [NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1] = "S1",
+ [NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F] = "F",
+ [NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K] = "S32K",
+ [NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP] = "CMP",
+};
+
+void pgraph_vk_bind_vertex_attributes(NV2AState *d, unsigned int min_element,
+ unsigned int max_element,
+ bool inline_data,
+ unsigned int inline_stride,
+ unsigned int provoking_element)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ unsigned int num_elements = max_element - min_element + 1;
+
+ if (inline_data) {
+ NV2A_VK_DGROUP_BEGIN("%s (num_elements: %d inline stride: %d)",
+ __func__, num_elements, inline_stride);
+ } else {
+ NV2A_VK_DGROUP_BEGIN("%s (num_elements: %d)", __func__, num_elements);
+ }
+
+ pg->compressed_attrs = 0;
+ pg->uniform_attrs = 0;
+ pg->swizzle_attrs = 0;
+
+ r->num_active_vertex_attribute_descriptions = 0;
+ r->num_active_vertex_binding_descriptions = 0;
+
+ for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+ VertexAttribute *attr = &pg->vertex_attributes[i];
+ NV2A_VK_DGROUP_BEGIN("[attr %02d] format=%s, count=%d, stride=%d", i,
+ vertex_data_array_format_to_str[attr->format],
+ attr->count, attr->stride);
+ r->vertex_attribute_to_description_location[i] = -1;
+ if (!attr->count) {
+ pg->uniform_attrs |= 1 << i;
+ NV2A_VK_DPRINTF("inline_value = {%f, %f, %f, %f}",
+ attr->inline_value[0], attr->inline_value[1],
+ attr->inline_value[2], attr->inline_value[3]);
+ NV2A_VK_DGROUP_END();
+ continue;
+ }
+
+ VkFormat vk_format;
+ bool needs_conversion = false;
+ bool d3d_swizzle = false;
+
+ switch (attr->format) {
+ case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D:
+ assert(attr->count == 4);
+ d3d_swizzle = true;
+ /* fallthru */
+ case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL:
+ assert(attr->count <= ARRAY_SIZE(ub_to_count));
+ vk_format = ub_to_count[attr->count - 1];
+ break;
+ case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1:
+ assert(attr->count <= ARRAY_SIZE(s1_to_count));
+ vk_format = s1_to_count[attr->count - 1];
+ break;
+ case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F:
+ assert(attr->count <= ARRAY_SIZE(float_to_count));
+ vk_format = float_to_count[attr->count - 1];
+ break;
+ case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K:
+ assert(attr->count <= ARRAY_SIZE(s32k_to_count));
+ vk_format = s32k_to_count[attr->count - 1];
+ break;
+ case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP:
+ vk_format =
+ VK_FORMAT_R32_SINT; // VK_FORMAT_B10G11R11_UFLOAT_PACK32 ??
+ /* 3 signed, normalized components packed in 32-bits. (11,11,10) */
+ assert(attr->count == 1);
+ needs_conversion = true;
+ break;
+ default:
+ fprintf(stderr, "Unknown vertex type: 0x%x\n", attr->format);
+ assert(false);
+ break;
+ }
+
+ nv2a_profile_inc_counter(NV2A_PROF_ATTR_BIND);
+ hwaddr attrib_data_addr;
+ size_t stride;
+
+ if (needs_conversion) {
+ pg->compressed_attrs |= (1 << i);
+ }
+ if (d3d_swizzle) {
+ pg->swizzle_attrs |= (1 << i);
+ }
+
+ hwaddr start = 0;
+ if (inline_data) {
+ attrib_data_addr = attr->inline_array_offset;
+ stride = inline_stride;
+ } else {
+ hwaddr dma_len;
+ uint8_t *attr_data = (uint8_t *)nv_dma_map(
+ d, attr->dma_select ? pg->dma_vertex_b : pg->dma_vertex_a,
+ &dma_len);
+ assert(attr->offset < dma_len);
+ attrib_data_addr = attr_data + attr->offset - d->vram_ptr;
+ stride = attr->stride;
+ start = attrib_data_addr + min_element * stride;
+ update_memory_buffer(d, start, num_elements * stride);
+ }
+
+ uint32_t provoking_element_index = provoking_element - min_element;
+ size_t element_size = attr->size * attr->count;
+ assert(element_size <= sizeof(attr->inline_value));
+ const uint8_t *last_entry;
+
+ if (inline_data) {
+ last_entry =
+ (uint8_t *)pg->inline_array + attr->inline_array_offset;
+ } else {
+ last_entry = d->vram_ptr + start;
+ }
+ if (!stride) {
+ // Stride of 0 indicates that only the first element should be
+ // used.
+ pg->uniform_attrs |= 1 << i;
+ pgraph_update_inline_value(attr, last_entry);
+ NV2A_VK_DPRINTF("inline_value = {%f, %f, %f, %f}",
+ attr->inline_value[0], attr->inline_value[1],
+ attr->inline_value[2], attr->inline_value[3]);
+ NV2A_VK_DGROUP_END();
+ continue;
+ }
+
+ NV2A_VK_DPRINTF("offset = %08" HWADDR_PRIx, attrib_data_addr);
+ last_entry += stride * provoking_element_index;
+ pgraph_update_inline_value(attr, last_entry);
+
+ r->vertex_attribute_to_description_location[i] =
+ r->num_active_vertex_binding_descriptions;
+
+ r->vertex_binding_descriptions
+ [r->num_active_vertex_binding_descriptions++] =
+ (VkVertexInputBindingDescription){
+ .binding = r->vertex_attribute_to_description_location[i],
+ .stride = stride,
+ .inputRate = VK_VERTEX_INPUT_RATE_VERTEX,
+ };
+
+ r->vertex_attribute_descriptions
+ [r->num_active_vertex_attribute_descriptions++] =
+ (VkVertexInputAttributeDescription){
+ .binding = r->vertex_attribute_to_description_location[i],
+ .location = i,
+ .format = vk_format,
+ };
+
+ r->vertex_attribute_offsets[i] = attrib_data_addr;
+
+ NV2A_VK_DGROUP_END();
+ }
+
+ NV2A_VK_DGROUP_END();
+}
+
+void pgraph_vk_bind_vertex_attributes_inline(NV2AState *d)
+{
+ PGRAPHState *pg = &d->pgraph;
+ PGRAPHVkState *r = pg->vk_renderer_state;
+
+ pg->compressed_attrs = 0;
+ pg->uniform_attrs = 0;
+ pg->swizzle_attrs = 0;
+
+ r->num_active_vertex_attribute_descriptions = 0;
+ r->num_active_vertex_binding_descriptions = 0;
+
+ for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+ VertexAttribute *attr = &pg->vertex_attributes[i];
+ if (attr->inline_buffer_populated) {
+ r->vertex_attribute_to_description_location[i] =
+ r->num_active_vertex_binding_descriptions;
+ r->vertex_binding_descriptions
+ [r->num_active_vertex_binding_descriptions++] =
+ (VkVertexInputBindingDescription){
+ .binding =
+ r->vertex_attribute_to_description_location[i],
+ .stride = 4 * sizeof(float),
+ .inputRate = VK_VERTEX_INPUT_RATE_VERTEX,
+ };
+ r->vertex_attribute_descriptions
+ [r->num_active_vertex_attribute_descriptions++] =
+ (VkVertexInputAttributeDescription){
+ .binding =
+ r->vertex_attribute_to_description_location[i],
+ .location = i,
+ .format = VK_FORMAT_R32G32B32A32_SFLOAT,
+ };
+ memcpy(attr->inline_value,
+ attr->inline_buffer + (pg->inline_buffer_length - 1) * 4,
+ sizeof(attr->inline_value));
+ } else {
+ r->vertex_attribute_to_description_location[i] = -1;
+ pg->uniform_attrs |= 1 << i;
+ }
+ }
+}
\ No newline at end of file
diff --git a/hw/xbox/nv2a/vsh.h b/hw/xbox/nv2a/pgraph/vsh.h
similarity index 92%
rename from hw/xbox/nv2a/vsh.h
rename to hw/xbox/nv2a/pgraph/vsh.h
index 18ef4bb5f2..405b6c9aa6 100644
--- a/hw/xbox/nv2a/vsh.h
+++ b/hw/xbox/nv2a/pgraph/vsh.h
@@ -21,7 +21,7 @@
#define HW_NV2A_VSH_H
#include
-#include "shaders_common.h"
+#include "qemu/mstring.h"
enum VshLight {
LIGHT_OFF,
@@ -130,11 +130,4 @@ typedef enum {
uint8_t vsh_get_field(const uint32_t *shader_token, VshFieldName field_name);
-void vsh_translate(uint16_t version,
- const uint32_t *tokens,
- unsigned int length,
- bool z_perspective,
- MString *header, MString *body);
-
-
#endif
diff --git a/hw/xbox/nv2a/shaders.c b/hw/xbox/nv2a/shaders.c
deleted file mode 100644
index cafe326e93..0000000000
--- a/hw/xbox/nv2a/shaders.c
+++ /dev/null
@@ -1,1599 +0,0 @@
-/*
- * QEMU Geforce NV2A shader generator
- *
- * Copyright (c) 2015 espes
- * Copyright (c) 2015 Jannik Vogel
- * Copyright (c) 2020-2021 Matt Borgerson
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see .
- */
-
-#include "qemu/osdep.h"
-#include
-
-#include "shaders_common.h"
-#include "shaders.h"
-#include "nv2a_int.h"
-#include "ui/xemu-settings.h"
-#include "xemu-version.h"
-
-void mstring_append_fmt(MString *qstring, const char *fmt, ...)
-{
- va_list ap;
- va_start(ap, fmt);
- mstring_append_va(qstring, fmt, ap);
- va_end(ap);
-}
-
-MString *mstring_from_fmt(const char *fmt, ...)
-{
- MString *ret = mstring_new();
- va_list ap;
- va_start(ap, fmt);
- mstring_append_va(ret, fmt, ap);
- va_end(ap);
-
- return ret;
-}
-
-void mstring_append_va(MString *qstring, const char *fmt, va_list va)
-{
- char scratch[256];
-
- va_list ap;
- va_copy(ap, va);
- const int len = vsnprintf(scratch, sizeof(scratch), fmt, ap);
- va_end(ap);
-
- if (len == 0) {
- return;
- } else if (len < sizeof(scratch)) {
- mstring_append(qstring, scratch);
- return;
- }
-
- /* overflowed out scratch buffer, alloc and try again */
- char *buf = g_malloc(len + 1);
- va_copy(ap, va);
- vsnprintf(buf, len + 1, fmt, ap);
- va_end(ap);
-
- mstring_append(qstring, buf);
- g_free(buf);
-}
-
-GLenum get_gl_primitive_mode(enum ShaderPolygonMode polygon_mode, enum ShaderPrimitiveMode primitive_mode)
-{
- if (polygon_mode == POLY_MODE_POINT) {
- return GL_POINTS;
- }
-
- switch (primitive_mode) {
- case PRIM_TYPE_POINTS: return GL_POINTS;
- case PRIM_TYPE_LINES: return GL_LINES;
- case PRIM_TYPE_LINE_LOOP: return GL_LINE_LOOP;
- case PRIM_TYPE_LINE_STRIP: return GL_LINE_STRIP;
- case PRIM_TYPE_TRIANGLES: return GL_TRIANGLES;
- case PRIM_TYPE_TRIANGLE_STRIP: return GL_TRIANGLE_STRIP;
- case PRIM_TYPE_TRIANGLE_FAN: return GL_TRIANGLE_FAN;
- case PRIM_TYPE_QUADS: return GL_LINES_ADJACENCY;
- case PRIM_TYPE_QUAD_STRIP: return GL_LINE_STRIP_ADJACENCY;
- case PRIM_TYPE_POLYGON:
- if (polygon_mode == POLY_MODE_LINE) {
- return GL_LINE_LOOP;
- } else if (polygon_mode == POLY_MODE_FILL) {
- return GL_TRIANGLE_FAN;
- }
-
- assert(!"PRIM_TYPE_POLYGON with invalid polygon_mode");
- return 0;
- default:
- assert(!"Invalid primitive_mode");
- return 0;
- }
-}
-
-static MString* generate_geometry_shader(
- enum ShaderPolygonMode polygon_front_mode,
- enum ShaderPolygonMode polygon_back_mode,
- enum ShaderPrimitiveMode primitive_mode,
- GLenum *gl_primitive_mode,
- bool smooth_shading)
-{
- /* FIXME: Missing support for 2-sided-poly mode */
- assert(polygon_front_mode == polygon_back_mode);
- enum ShaderPolygonMode polygon_mode = polygon_front_mode;
-
- *gl_primitive_mode = get_gl_primitive_mode(polygon_mode, primitive_mode);
-
- /* POINT mode shouldn't require any special work */
- if (polygon_mode == POLY_MODE_POINT) {
- return NULL;
- }
-
- /* Handle LINE and FILL mode */
- const char *layout_in = NULL;
- const char *layout_out = NULL;
- const char *body = NULL;
- switch (primitive_mode) {
- case PRIM_TYPE_POINTS: return NULL;
- case PRIM_TYPE_LINES: return NULL;
- case PRIM_TYPE_LINE_LOOP: return NULL;
- case PRIM_TYPE_LINE_STRIP: return NULL;
- case PRIM_TYPE_TRIANGLES:
- if (polygon_mode == POLY_MODE_FILL) { return NULL; }
- assert(polygon_mode == POLY_MODE_LINE);
- layout_in = "layout(triangles) in;\n";
- layout_out = "layout(line_strip, max_vertices = 4) out;\n";
- body = " emit_vertex(0, 0);\n"
- " emit_vertex(1, 0);\n"
- " emit_vertex(2, 0);\n"
- " emit_vertex(0, 0);\n"
- " EndPrimitive();\n";
- break;
- case PRIM_TYPE_TRIANGLE_STRIP:
- if (polygon_mode == POLY_MODE_FILL) { return NULL; }
- assert(polygon_mode == POLY_MODE_LINE);
- layout_in = "layout(triangles) in;\n";
- layout_out = "layout(line_strip, max_vertices = 4) out;\n";
- /* Imagine a quad made of a tristrip, the comments tell you which
- * vertex we are using */
- body = " if ((gl_PrimitiveIDIn & 1) == 0) {\n"
- " if (gl_PrimitiveIDIn == 0) {\n"
- " emit_vertex(0, 0);\n" /* bottom right */
- " }\n"
- " emit_vertex(1, 0);\n" /* top right */
- " emit_vertex(2, 0);\n" /* bottom left */
- " emit_vertex(0, 0);\n" /* bottom right */
- " } else {\n"
- " emit_vertex(2, 0);\n" /* bottom left */
- " emit_vertex(1, 0);\n" /* top left */
- " emit_vertex(0, 0);\n" /* top right */
- " }\n"
- " EndPrimitive();\n";
- break;
- case PRIM_TYPE_TRIANGLE_FAN:
- if (polygon_mode == POLY_MODE_FILL) { return NULL; }
- assert(polygon_mode == POLY_MODE_LINE);
- layout_in = "layout(triangles) in;\n";
- layout_out = "layout(line_strip, max_vertices = 4) out;\n";
- body = " if (gl_PrimitiveIDIn == 0) {\n"
- " emit_vertex(0, 0);\n"
- " }\n"
- " emit_vertex(1, 0);\n"
- " emit_vertex(2, 0);\n"
- " emit_vertex(0, 0);\n"
- " EndPrimitive();\n";
- break;
- case PRIM_TYPE_QUADS:
- layout_in = "layout(lines_adjacency) in;\n";
- if (polygon_mode == POLY_MODE_LINE) {
- layout_out = "layout(line_strip, max_vertices = 5) out;\n";
- body = " emit_vertex(0, 3);\n"
- " emit_vertex(1, 3);\n"
- " emit_vertex(2, 3);\n"
- " emit_vertex(3, 3);\n"
- " emit_vertex(0, 3);\n"
- " EndPrimitive();\n";
- } else if (polygon_mode == POLY_MODE_FILL) {
- layout_out = "layout(triangle_strip, max_vertices = 4) out;\n";
- body = " emit_vertex(3, 3);\n"
- " emit_vertex(0, 3);\n"
- " emit_vertex(2, 3);\n"
- " emit_vertex(1, 3);\n"
- " EndPrimitive();\n";
- } else {
- assert(false);
- return NULL;
- }
- break;
- case PRIM_TYPE_QUAD_STRIP:
- layout_in = "layout(lines_adjacency) in;\n";
- if (polygon_mode == POLY_MODE_LINE) {
- layout_out = "layout(line_strip, max_vertices = 5) out;\n";
- body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n"
- " if (gl_PrimitiveIDIn == 0) {\n"
- " emit_vertex(0, 3);\n"
- " }\n"
- " emit_vertex(1, 3);\n"
- " emit_vertex(3, 3);\n"
- " emit_vertex(2, 3);\n"
- " emit_vertex(0, 3);\n"
- " EndPrimitive();\n";
- } else if (polygon_mode == POLY_MODE_FILL) {
- layout_out = "layout(triangle_strip, max_vertices = 4) out;\n";
- body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n"
- " emit_vertex(0, 3);\n"
- " emit_vertex(1, 3);\n"
- " emit_vertex(2, 3);\n"
- " emit_vertex(3, 3);\n"
- " EndPrimitive();\n";
- } else {
- assert(false);
- return NULL;
- }
- break;
- case PRIM_TYPE_POLYGON:
- if (polygon_mode == POLY_MODE_LINE) {
- return NULL;
- }
- if (polygon_mode == POLY_MODE_FILL) {
- if (smooth_shading) {
- return NULL;
- }
- layout_in = "layout(triangles) in;\n";
- layout_out = "layout(triangle_strip, max_vertices = 3) out;\n";
- body = " emit_vertex(0, 2);\n"
- " emit_vertex(1, 2);\n"
- " emit_vertex(2, 2);\n"
- " EndPrimitive();\n";
- } else {
- assert(false);
- return NULL;
- }
- break;
-
- default:
- assert(false);
- return NULL;
- }
-
- /* generate a geometry shader to support deprecated primitive types */
- assert(layout_in);
- assert(layout_out);
- assert(body);
- MString* s = mstring_from_str("#version 330\n"
- "\n");
- mstring_append(s, layout_in);
- mstring_append(s, layout_out);
- mstring_append(s, "\n");
- if (smooth_shading) {
- mstring_append(s,
- STRUCT_V_VERTEX_DATA_IN_ARRAY_SMOOTH
- "\n"
- STRUCT_VERTEX_DATA_OUT_SMOOTH
- "\n"
- "void emit_vertex(int index, int _unused) {\n"
- " gl_Position = gl_in[index].gl_Position;\n"
- " gl_PointSize = gl_in[index].gl_PointSize;\n"
- " gl_ClipDistance[0] = gl_in[index].gl_ClipDistance[0];\n"
- " gl_ClipDistance[1] = gl_in[index].gl_ClipDistance[1];\n"
- " vtx_inv_w = v_vtx_inv_w[index];\n"
- " vtx_inv_w_flat = v_vtx_inv_w[index];\n"
- " vtxD0 = v_vtxD0[index];\n"
- " vtxD1 = v_vtxD1[index];\n"
- " vtxB0 = v_vtxB0[index];\n"
- " vtxB1 = v_vtxB1[index];\n"
- " vtxFog = v_vtxFog[index];\n"
- " vtxT0 = v_vtxT0[index];\n"
- " vtxT1 = v_vtxT1[index];\n"
- " vtxT2 = v_vtxT2[index];\n"
- " vtxT3 = v_vtxT3[index];\n"
- " EmitVertex();\n"
- "}\n");
- } else {
- mstring_append(s,
- STRUCT_V_VERTEX_DATA_IN_ARRAY_FLAT
- "\n"
- STRUCT_VERTEX_DATA_OUT_FLAT
- "\n"
- "void emit_vertex(int index, int provoking_index) {\n"
- " gl_Position = gl_in[index].gl_Position;\n"
- " gl_PointSize = gl_in[index].gl_PointSize;\n"
- " gl_ClipDistance[0] = gl_in[index].gl_ClipDistance[0];\n"
- " gl_ClipDistance[1] = gl_in[index].gl_ClipDistance[1];\n"
- " vtx_inv_w = v_vtx_inv_w[index];\n"
- " vtx_inv_w_flat = v_vtx_inv_w[provoking_index];\n"
- " vtxD0 = v_vtxD0[provoking_index];\n"
- " vtxD1 = v_vtxD1[provoking_index];\n"
- " vtxB0 = v_vtxB0[provoking_index];\n"
- " vtxB1 = v_vtxB1[provoking_index];\n"
- " vtxFog = v_vtxFog[index];\n"
- " vtxT0 = v_vtxT0[index];\n"
- " vtxT1 = v_vtxT1[index];\n"
- " vtxT2 = v_vtxT2[index];\n"
- " vtxT3 = v_vtxT3[index];\n"
- " EmitVertex();\n"
- "}\n");
- }
-
- mstring_append(s, "\n"
- "void main() {\n");
- mstring_append(s, body);
- mstring_append(s, "}\n");
-
- return s;
-}
-
-static void append_skinning_code(MString* str, bool mix,
- unsigned int count, const char* type,
- const char* output, const char* input,
- const char* matrix, const char* swizzle)
-{
- if (count == 0) {
- mstring_append_fmt(str, "%s %s = (%s * %s0).%s;\n",
- type, output, input, matrix, swizzle);
- } else {
- mstring_append_fmt(str, "%s %s = %s(0.0);\n", type, output, type);
- if (mix) {
- /* Generated final weight (like GL_WEIGHT_SUM_UNITY_ARB) */
- mstring_append(str, "{\n"
- " float weight_i;\n"
- " float weight_n = 1.0;\n");
- int i;
- for (i = 0; i < count; i++) {
- if (i < (count - 1)) {
- char c = "xyzw"[i];
- mstring_append_fmt(str, " weight_i = weight.%c;\n"
- " weight_n -= weight_i;\n",
- c);
- } else {
- mstring_append(str, " weight_i = weight_n;\n");
- }
- mstring_append_fmt(str, " %s += (%s * %s%d).%s * weight_i;\n",
- output, input, matrix, i, swizzle);
- }
- mstring_append(str, "}\n");
- } else {
- /* Individual weights */
- int i;
- for (i = 0; i < count; i++) {
- char c = "xyzw"[i];
- mstring_append_fmt(str, "%s += (%s * %s%d).%s * weight.%c;\n",
- output, input, matrix, i, swizzle, c);
- }
- }
- }
-}
-
-#define GLSL_C(idx) "c[" stringify(idx) "]"
-#define GLSL_LTCTXA(idx) "ltctxa[" stringify(idx) "]"
-
-#define GLSL_C_MAT4(idx) \
- "mat4(" GLSL_C(idx) ", " GLSL_C(idx+1) ", " \
- GLSL_C(idx+2) ", " GLSL_C(idx+3) ")"
-
-#define GLSL_DEFINE(a, b) "#define " stringify(a) " " b "\n"
-
-static void generate_fixed_function(const ShaderState *state,
- MString *header, MString *body)
-{
- int i, j;
-
- /* generate vertex shader mimicking fixed function */
- mstring_append(header,
-"#define position v0\n"
-"#define weight v1\n"
-"#define normal v2.xyz\n"
-"#define diffuse v3\n"
-"#define specular v4\n"
-"#define fogCoord v5.x\n"
-"#define pointSize v6\n"
-"#define backDiffuse v7\n"
-"#define backSpecular v8\n"
-"#define texture0 v9\n"
-"#define texture1 v10\n"
-"#define texture2 v11\n"
-"#define texture3 v12\n"
-"#define reserved1 v13\n"
-"#define reserved2 v14\n"
-"#define reserved3 v15\n"
-"\n"
-"uniform vec4 ltctxa[" stringify(NV2A_LTCTXA_COUNT) "];\n"
-"uniform vec4 ltctxb[" stringify(NV2A_LTCTXB_COUNT) "];\n"
-"uniform vec4 ltc1[" stringify(NV2A_LTC1_COUNT) "];\n"
-"\n"
-GLSL_DEFINE(projectionMat, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_PMAT0))
-GLSL_DEFINE(compositeMat, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_CMAT0))
-"\n"
-GLSL_DEFINE(texPlaneS0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 0))
-GLSL_DEFINE(texPlaneT0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 1))
-GLSL_DEFINE(texPlaneR0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 2))
-GLSL_DEFINE(texPlaneQ0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 3))
-"\n"
-GLSL_DEFINE(texPlaneS1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 0))
-GLSL_DEFINE(texPlaneT1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 1))
-GLSL_DEFINE(texPlaneR1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 2))
-GLSL_DEFINE(texPlaneQ1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 3))
-"\n"
-GLSL_DEFINE(texPlaneS2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 0))
-GLSL_DEFINE(texPlaneT2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 1))
-GLSL_DEFINE(texPlaneR2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 2))
-GLSL_DEFINE(texPlaneQ2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 3))
-"\n"
-GLSL_DEFINE(texPlaneS3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 0))
-GLSL_DEFINE(texPlaneT3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 1))
-GLSL_DEFINE(texPlaneR3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 2))
-GLSL_DEFINE(texPlaneQ3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 3))
-"\n"
-GLSL_DEFINE(modelViewMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT0))
-GLSL_DEFINE(modelViewMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT1))
-GLSL_DEFINE(modelViewMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT2))
-GLSL_DEFINE(modelViewMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT3))
-"\n"
-GLSL_DEFINE(invModelViewMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT0))
-GLSL_DEFINE(invModelViewMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT1))
-GLSL_DEFINE(invModelViewMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT2))
-GLSL_DEFINE(invModelViewMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT3))
-"\n"
-GLSL_DEFINE(eyePosition, GLSL_C(NV_IGRAPH_XF_XFCTX_EYEP))
-"\n"
-"#define lightAmbientColor(i) "
- "ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_AMB) " + (i)*6].xyz\n"
-"#define lightDiffuseColor(i) "
- "ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_DIF) " + (i)*6].xyz\n"
-"#define lightSpecularColor(i) "
- "ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_SPC) " + (i)*6].xyz\n"
-"\n"
-"#define lightSpotFalloff(i) "
- "ltctxa[" stringify(NV_IGRAPH_XF_LTCTXA_L0_K) " + (i)*2].xyz\n"
-"#define lightSpotDirection(i) "
- "ltctxa[" stringify(NV_IGRAPH_XF_LTCTXA_L0_SPT) " + (i)*2]\n"
-"\n"
-"#define lightLocalRange(i) "
- "ltc1[" stringify(NV_IGRAPH_XF_LTC1_r0) " + (i)].x\n"
-"\n"
-GLSL_DEFINE(sceneAmbientColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_FR_AMB) ".xyz")
-GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz")
-"\n"
-"uniform mat4 invViewport;\n"
-"\n");
-
- /* Skinning */
- unsigned int count;
- bool mix;
- switch (state->skinning) {
- case SKINNING_OFF:
- mix = false; count = 0; break;
- case SKINNING_1WEIGHTS:
- mix = true; count = 2; break;
- case SKINNING_2WEIGHTS2MATRICES:
- mix = false; count = 2; break;
- case SKINNING_2WEIGHTS:
- mix = true; count = 3; break;
- case SKINNING_3WEIGHTS3MATRICES:
- mix = false; count = 3; break;
- case SKINNING_3WEIGHTS:
- mix = true; count = 4; break;
- case SKINNING_4WEIGHTS4MATRICES:
- mix = false; count = 4; break;
- default:
- assert(false);
- break;
- }
- mstring_append_fmt(body, "/* Skinning mode %d */\n",
- state->skinning);
-
- append_skinning_code(body, mix, count, "vec4",
- "tPosition", "position",
- "modelViewMat", "xyzw");
- append_skinning_code(body, mix, count, "vec3",
- "tNormal", "vec4(normal, 0.0)",
- "invModelViewMat", "xyz");
-
- /* Normalization */
- if (state->normalization) {
- mstring_append(body, "tNormal = normalize(tNormal);\n");
- }
-
- /* Texgen */
- for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
- mstring_append_fmt(body, "/* Texgen for stage %d */\n",
- i);
- /* Set each component individually */
- /* FIXME: could be nicer if some channels share the same texgen */
- for (j = 0; j < 4; j++) {
- /* TODO: TexGen View Model missing! */
- char c = "xyzw"[j];
- char cSuffix = "STRQ"[j];
- switch (state->texgen[i][j]) {
- case TEXGEN_DISABLE:
- mstring_append_fmt(body, "oT%d.%c = texture%d.%c;\n",
- i, c, i, c);
- break;
- case TEXGEN_EYE_LINEAR:
- mstring_append_fmt(body, "oT%d.%c = dot(texPlane%c%d, tPosition);\n",
- i, c, cSuffix, i);
- break;
- case TEXGEN_OBJECT_LINEAR:
- mstring_append_fmt(body, "oT%d.%c = dot(texPlane%c%d, position);\n",
- i, c, cSuffix, i);
- break;
- case TEXGEN_SPHERE_MAP:
- assert(j < 2); /* Channels S,T only! */
- mstring_append(body, "{\n");
- /* FIXME: u, r and m only have to be calculated once */
- mstring_append(body, " vec3 u = normalize(tPosition.xyz);\n");
- //FIXME: tNormal before or after normalization? Always normalize?
- mstring_append(body, " vec3 r = reflect(u, tNormal);\n");
-
- /* FIXME: This would consume 1 division fewer and *might* be
- * faster than length:
- * // [z=1/(2*x) => z=1/x*0.5]
- * vec3 ro = r + vec3(0.0, 0.0, 1.0);
- * float m = inversesqrt(dot(ro,ro))*0.5;
- */
-
- mstring_append(body, " float invM = 1.0 / (2.0 * length(r + vec3(0.0, 0.0, 1.0)));\n");
- mstring_append_fmt(body, " oT%d.%c = r.%c * invM + 0.5;\n",
- i, c, c);
- mstring_append(body, "}\n");
- break;
- case TEXGEN_REFLECTION_MAP:
- assert(j < 3); /* Channels S,T,R only! */
- mstring_append(body, "{\n");
- /* FIXME: u and r only have to be calculated once, can share the one from SPHERE_MAP */
- mstring_append(body, " vec3 u = normalize(tPosition.xyz);\n");
- mstring_append(body, " vec3 r = reflect(u, tNormal);\n");
- mstring_append_fmt(body, " oT%d.%c = r.%c;\n",
- i, c, c);
- mstring_append(body, "}\n");
- break;
- case TEXGEN_NORMAL_MAP:
- assert(j < 3); /* Channels S,T,R only! */
- mstring_append_fmt(body, "oT%d.%c = tNormal.%c;\n",
- i, c, c);
- break;
- default:
- assert(false);
- break;
- }
- }
- }
-
- /* Apply texture matrices */
- for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
- if (state->texture_matrix_enable[i]) {
- mstring_append_fmt(body,
- "oT%d = oT%d * texMat%d;\n",
- i, i, i);
- }
- }
-
- /* Lighting */
- if (state->lighting) {
-
- //FIXME: Do 2 passes if we want 2 sided-lighting?
-
- static char alpha_source_diffuse[] = "diffuse.a";
- static char alpha_source_specular[] = "specular.a";
- static char alpha_source_material[] = "material_alpha";
- const char *alpha_source = alpha_source_diffuse;
- if (state->diffuse_src == MATERIAL_COLOR_SRC_MATERIAL) {
- mstring_append(header, "uniform float material_alpha;\n");
- alpha_source = alpha_source_material;
- } else if (state->diffuse_src == MATERIAL_COLOR_SRC_SPECULAR) {
- alpha_source = alpha_source_specular;
- }
-
- if (state->ambient_src == MATERIAL_COLOR_SRC_MATERIAL) {
- mstring_append_fmt(body, "oD0 = vec4(sceneAmbientColor, %s);\n", alpha_source);
- } else if (state->ambient_src == MATERIAL_COLOR_SRC_DIFFUSE) {
- mstring_append_fmt(body, "oD0 = vec4(diffuse.rgb, %s);\n", alpha_source);
- } else if (state->ambient_src == MATERIAL_COLOR_SRC_SPECULAR) {
- mstring_append_fmt(body, "oD0 = vec4(specular.rgb, %s);\n", alpha_source);
- }
-
- mstring_append(body, "oD0.rgb *= materialEmissionColor.rgb;\n");
- if (state->emission_src == MATERIAL_COLOR_SRC_MATERIAL) {
- mstring_append(body, "oD0.rgb += sceneAmbientColor;\n");
- } else if (state->emission_src == MATERIAL_COLOR_SRC_DIFFUSE) {
- mstring_append(body, "oD0.rgb += diffuse.rgb;\n");
- } else if (state->emission_src == MATERIAL_COLOR_SRC_SPECULAR) {
- mstring_append(body, "oD0.rgb += specular.rgb;\n");
- }
-
- mstring_append(body, "oD1 = vec4(0.0, 0.0, 0.0, specular.a);\n");
-
- for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
- if (state->light[i] == LIGHT_OFF) {
- continue;
- }
-
- /* FIXME: It seems that we only have to handle the surface colors if
- * they are not part of the material [= vertex colors].
- * If they are material the cpu will premultiply light
- * colors
- */
-
- mstring_append_fmt(body, "/* Light %d */ {\n", i);
-
- if (state->light[i] == LIGHT_LOCAL
- || state->light[i] == LIGHT_SPOT) {
-
- mstring_append_fmt(header,
- "uniform vec3 lightLocalPosition%d;\n"
- "uniform vec3 lightLocalAttenuation%d;\n",
- i, i);
- mstring_append_fmt(body,
- " vec3 VP = lightLocalPosition%d - tPosition.xyz/tPosition.w;\n"
- " float d = length(VP);\n"
-//FIXME: if (d > lightLocalRange) { .. don't process this light .. } /* inclusive?! */ - what about directional lights?
- " VP = normalize(VP);\n"
- " float attenuation = 1.0 / (lightLocalAttenuation%d.x\n"
- " + lightLocalAttenuation%d.y * d\n"
- " + lightLocalAttenuation%d.z * d * d);\n"
- " vec3 halfVector = normalize(VP + eyePosition.xyz / eyePosition.w);\n" /* FIXME: Not sure if eyePosition is correct */
- " float nDotVP = max(0.0, dot(tNormal, VP));\n"
- " float nDotHV = max(0.0, dot(tNormal, halfVector));\n",
- i, i, i, i);
-
- }
-
- switch(state->light[i]) {
- case LIGHT_INFINITE:
-
- /* lightLocalRange will be 1e+30 here */
-
- mstring_append_fmt(header,
- "uniform vec3 lightInfiniteHalfVector%d;\n"
- "uniform vec3 lightInfiniteDirection%d;\n",
- i, i);
- mstring_append_fmt(body,
- " float attenuation = 1.0;\n"
- " float nDotVP = max(0.0, dot(tNormal, normalize(vec3(lightInfiniteDirection%d))));\n"
- " float nDotHV = max(0.0, dot(tNormal, vec3(lightInfiniteHalfVector%d)));\n",
- i, i);
-
- /* FIXME: Do specular */
-
- /* FIXME: tBackDiffuse */
-
- break;
- case LIGHT_LOCAL:
- /* Everything done already */
- break;
- case LIGHT_SPOT:
- /* https://docs.microsoft.com/en-us/windows/win32/direct3d9/attenuation-and-spotlight-factor#spotlight-factor */
- mstring_append_fmt(body,
- " vec4 spotDir = lightSpotDirection(%d);\n"
- " float invScale = 1/length(spotDir.xyz);\n"
- " float cosHalfPhi = -invScale*spotDir.w;\n"
- " float cosHalfTheta = invScale + cosHalfPhi;\n"
- " float spotDirDotVP = dot(spotDir.xyz, VP);\n"
- " float rho = invScale*spotDirDotVP;\n"
- " if (rho > cosHalfTheta) {\n"
- " } else if (rho <= cosHalfPhi) {\n"
- " attenuation = 0.0;\n"
- " } else {\n"
- " attenuation *= spotDirDotVP + spotDir.w;\n" /* FIXME: lightSpotFalloff */
- " }\n",
- i);
- break;
- default:
- assert(false);
- break;
- }
-
- mstring_append_fmt(body,
- " float pf;\n"
- " if (nDotVP == 0.0) {\n"
- " pf = 0.0;\n"
- " } else {\n"
- " pf = pow(nDotHV, /* specular(l, m, n, l1, m1, n1) */ 0.001);\n"
- " }\n"
- " vec3 lightAmbient = lightAmbientColor(%d) * attenuation;\n"
- " vec3 lightDiffuse = lightDiffuseColor(%d) * attenuation * nDotVP;\n"
- " vec3 lightSpecular = lightSpecularColor(%d) * pf;\n",
- i, i, i);
-
- mstring_append(body,
- " oD0.xyz += lightAmbient;\n");
-
- switch (state->diffuse_src) {
- case MATERIAL_COLOR_SRC_MATERIAL:
- mstring_append(body,
- " oD0.xyz += lightDiffuse;\n");
- break;
- case MATERIAL_COLOR_SRC_DIFFUSE:
- mstring_append(body,
- " oD0.xyz += diffuse.xyz * lightDiffuse;\n");
- break;
- case MATERIAL_COLOR_SRC_SPECULAR:
- mstring_append(body,
- " oD0.xyz += specular.xyz * lightDiffuse;\n");
- break;
- }
-
- mstring_append(body,
- " oD1.xyz += specular.xyz * lightSpecular;\n");
-
- mstring_append(body, "}\n");
- }
- } else {
- mstring_append(body, " oD0 = diffuse;\n");
- mstring_append(body, " oD1 = specular;\n");
- }
- mstring_append(body, " oB0 = backDiffuse;\n");
- mstring_append(body, " oB1 = backSpecular;\n");
-
- /* Fog */
- if (state->fog_enable) {
-
- /* From: https://www.opengl.org/registry/specs/NV/fog_distance.txt */
- switch(state->foggen) {
- case FOGGEN_SPEC_ALPHA:
- /* FIXME: Do we have to clamp here? */
- mstring_append(body, " float fogDistance = clamp(specular.a, 0.0, 1.0);\n");
- break;
- case FOGGEN_RADIAL:
- mstring_append(body, " float fogDistance = length(tPosition.xyz);\n");
- break;
- case FOGGEN_PLANAR:
- case FOGGEN_ABS_PLANAR:
- mstring_append(body, " float fogDistance = dot(fogPlane.xyz, tPosition.xyz) + fogPlane.w;\n");
- if (state->foggen == FOGGEN_ABS_PLANAR) {
- mstring_append(body, " fogDistance = abs(fogDistance);\n");
- }
- break;
- case FOGGEN_FOG_X:
- mstring_append(body, " float fogDistance = fogCoord;\n");
- break;
- default:
- assert(false);
- break;
- }
-
- }
-
- /* If skinning is off the composite matrix already includes the MV matrix */
- if (state->skinning == SKINNING_OFF) {
- mstring_append(body, " tPosition = position;\n");
- }
-
- mstring_append(body,
- " oPos = invViewport * (tPosition * compositeMat);\n"
- " oPos.z = oPos.z * 2.0 - oPos.w;\n");
-
- /* FIXME: Testing */
- if (state->point_params_enable) {
- mstring_append_fmt(
- body,
- " float d_e = length(position * modelViewMat0);\n"
- " oPts.x = 1/sqrt(%f + %f*d_e + %f*d_e*d_e) + %f;\n",
- state->point_params[0], state->point_params[1], state->point_params[2],
- state->point_params[6]);
- mstring_append_fmt(body, " oPts.x = min(oPts.x*%f + %f, 64.0) * %d;\n",
- state->point_params[3], state->point_params[7],
- state->surface_scale_factor);
- } else {
- mstring_append_fmt(body, " oPts.x = %f * %d;\n", state->point_size,
- state->surface_scale_factor);
- }
-
- mstring_append(body,
- " if (oPos.w == 0.0 || isinf(oPos.w)) {\n"
- " vtx_inv_w = 1.0;\n"
- " } else {\n"
- " vtx_inv_w = 1.0 / oPos.w;\n"
- " }\n"
- " vtx_inv_w_flat = vtx_inv_w;\n");
-}
-
-static MString *generate_vertex_shader(const ShaderState *state,
- bool prefix_outputs)
-{
- int i;
- MString *header = mstring_from_str(
-"#version 400\n"
-"\n"
-"uniform vec4 clipRange;\n"
-"uniform vec2 surfaceSize;\n"
-"\n"
-/* All constants in 1 array declaration */
-"uniform vec4 c[" stringify(NV2A_VERTEXSHADER_CONSTANTS) "];\n"
-"\n"
-"uniform vec4 fogColor;\n"
-"uniform float fogParam[2];\n"
-"\n"
-
-GLSL_DEFINE(fogPlane, GLSL_C(NV_IGRAPH_XF_XFCTX_FOG))
-GLSL_DEFINE(texMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T0MAT))
-GLSL_DEFINE(texMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T1MAT))
-GLSL_DEFINE(texMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T2MAT))
-GLSL_DEFINE(texMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T3MAT))
-
-"\n"
-"vec4 oPos = vec4(0.0,0.0,0.0,1.0);\n"
-"vec4 oD0 = vec4(0.0,0.0,0.0,1.0);\n"
-"vec4 oD1 = vec4(0.0,0.0,0.0,1.0);\n"
-"vec4 oB0 = vec4(0.0,0.0,0.0,1.0);\n"
-"vec4 oB1 = vec4(0.0,0.0,0.0,1.0);\n"
-"vec4 oPts = vec4(0.0,0.0,0.0,1.0);\n"
-"vec4 oFog = vec4(0.0,0.0,0.0,1.0);\n"
-"vec4 oT0 = vec4(0.0,0.0,0.0,1.0);\n"
-"vec4 oT1 = vec4(0.0,0.0,0.0,1.0);\n"
-"vec4 oT2 = vec4(0.0,0.0,0.0,1.0);\n"
-"vec4 oT3 = vec4(0.0,0.0,0.0,1.0);\n"
-"\n"
-"vec4 decompress_11_11_10(int cmp) {\n"
-" float x = float(bitfieldExtract(cmp, 0, 11)) / 1023.0;\n"
-" float y = float(bitfieldExtract(cmp, 11, 11)) / 1023.0;\n"
-" float z = float(bitfieldExtract(cmp, 22, 10)) / 511.0;\n"
-" return vec4(x, y, z, 1);\n"
-"}\n");
- if (prefix_outputs) {
- mstring_append(header, state->smooth_shading ?
- STRUCT_V_VERTEX_DATA_OUT_SMOOTH :
- STRUCT_V_VERTEX_DATA_OUT_FLAT);
- mstring_append(header,
- "#define vtx_inv_w v_vtx_inv_w\n"
- "#define vtx_inv_w_flat v_vtx_inv_w_flat\n"
- "#define vtxD0 v_vtxD0\n"
- "#define vtxD1 v_vtxD1\n"
- "#define vtxB0 v_vtxB0\n"
- "#define vtxB1 v_vtxB1\n"
- "#define vtxFog v_vtxFog\n"
- "#define vtxT0 v_vtxT0\n"
- "#define vtxT1 v_vtxT1\n"
- "#define vtxT2 v_vtxT2\n"
- "#define vtxT3 v_vtxT3\n"
- );
- } else {
- mstring_append(header, state->smooth_shading ?
- STRUCT_VERTEX_DATA_OUT_SMOOTH :
- STRUCT_VERTEX_DATA_OUT_FLAT);
- }
- mstring_append(header, "\n");
- for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
- if (state->compressed_attrs & (1 << i)) {
- mstring_append_fmt(header,
- "layout(location = %d) in int v%d_cmp;\n", i, i);
- } else {
- mstring_append_fmt(header, "layout(location = %d) in vec4 v%d;\n",
- i, i);
- }
- }
- mstring_append(header, "\n");
-
- MString *body = mstring_from_str("void main() {\n");
-
- for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
- if (state->compressed_attrs & (1 << i)) {
- mstring_append_fmt(
- body, "vec4 v%d = decompress_11_11_10(v%d_cmp);\n", i, i);
- }
- }
-
- if (state->fixed_function) {
- generate_fixed_function(state, header, body);
- } else if (state->vertex_program) {
- vsh_translate(VSH_VERSION_XVS,
- (uint32_t*)state->program_data,
- state->program_length,
- state->z_perspective,
- header, body);
- } else {
- assert(false);
- }
-
-
- /* Fog */
-
- if (state->fog_enable) {
-
- if (state->vertex_program) {
- /* FIXME: Does foggen do something here? Let's do some tracking..
- *
- * "RollerCoaster Tycoon" has
- * state->vertex_program = true; state->foggen == FOGGEN_PLANAR
- * but expects oFog.x as fogdistance?! Writes oFog.xyzw = v0.z
- */
- mstring_append(body, " float fogDistance = oFog.x;\n");
- }
-
- /* FIXME: Do this per pixel? */
-
- switch (state->fog_mode) {
- case FOG_MODE_LINEAR:
- case FOG_MODE_LINEAR_ABS:
-
- /* f = (end - d) / (end - start)
- * fogParam[1] = -1 / (end - start)
- * fogParam[0] = 1 - end * fogParam[1];
- */
-
- mstring_append(body,
- " if (isinf(fogDistance)) {\n"
- " fogDistance = 0.0;\n"
- " }\n"
- );
- mstring_append(body, " float fogFactor = fogParam[0] + fogDistance * fogParam[1];\n");
- mstring_append(body, " fogFactor -= 1.0;\n");
- break;
- case FOG_MODE_EXP:
- mstring_append(body,
- " if (isinf(fogDistance)) {\n"
- " fogDistance = 0.0;\n"
- " }\n"
- );
- /* fallthru */
- case FOG_MODE_EXP_ABS:
-
- /* f = 1 / (e^(d * density))
- * fogParam[1] = -density / (2 * ln(256))
- * fogParam[0] = 1.5
- */
-
- mstring_append(body, " float fogFactor = fogParam[0] + exp2(fogDistance * fogParam[1] * 16.0);\n");
- mstring_append(body, " fogFactor -= 1.5;\n");
- break;
- case FOG_MODE_EXP2:
- case FOG_MODE_EXP2_ABS:
-
- /* f = 1 / (e^((d * density)^2))
- * fogParam[1] = -density / (2 * sqrt(ln(256)))
- * fogParam[0] = 1.5
- */
-
- mstring_append(body, " float fogFactor = fogParam[0] + exp2(-fogDistance * fogDistance * fogParam[1] * fogParam[1] * 32.0);\n");
- mstring_append(body, " fogFactor -= 1.5;\n");
- break;
- default:
- assert(false);
- break;
- }
- /* Calculate absolute for the modes which need it */
- switch (state->fog_mode) {
- case FOG_MODE_LINEAR_ABS:
- case FOG_MODE_EXP_ABS:
- case FOG_MODE_EXP2_ABS:
- mstring_append(body, " fogFactor = abs(fogFactor);\n");
- break;
- default:
- break;
- }
-
- mstring_append(body, " oFog.xyzw = vec4(fogFactor);\n");
- } else {
- /* FIXME: Is the fog still calculated / passed somehow?!
- */
- mstring_append(body, " oFog.xyzw = vec4(1.0);\n");
- }
-
- /* Set outputs */
- const char *shade_model_mult = state->smooth_shading ? "vtx_inv_w" : "vtx_inv_w_flat";
- mstring_append_fmt(body, "\n"
- " vtxD0 = clamp(oD0, 0.0, 1.0) * %s;\n"
- " vtxD1 = clamp(oD1, 0.0, 1.0) * %s;\n"
- " vtxB0 = clamp(oB0, 0.0, 1.0) * %s;\n"
- " vtxB1 = clamp(oB1, 0.0, 1.0) * %s;\n"
- " vtxFog = oFog.x * vtx_inv_w;\n"
- " vtxT0 = oT0 * vtx_inv_w;\n"
- " vtxT1 = oT1 * vtx_inv_w;\n"
- " vtxT2 = oT2 * vtx_inv_w;\n"
- " vtxT3 = oT3 * vtx_inv_w;\n"
- " gl_Position = oPos;\n"
- " gl_PointSize = oPts.x;\n"
- " gl_ClipDistance[0] = oPos.z - oPos.w*clipRange.z;\n" // Near
- " gl_ClipDistance[1] = oPos.w*clipRange.w - oPos.z;\n" // Far
- "\n"
- "}\n",
- shade_model_mult,
- shade_model_mult,
- shade_model_mult,
- shade_model_mult);
-
-
- /* Return combined header + source */
- mstring_append(header, mstring_get_str(body));
- mstring_unref(body);
- return header;
-
-}
-
-static GLuint create_gl_shader(GLenum gl_shader_type,
- const char *code,
- const char *name)
-{
- GLint compiled = 0;
-
- NV2A_GL_DGROUP_BEGIN("Creating new %s", name);
-
- NV2A_DPRINTF("compile new %s, code:\n%s\n", name, code);
-
- GLuint shader = glCreateShader(gl_shader_type);
- glShaderSource(shader, 1, &code, 0);
- glCompileShader(shader);
-
- /* Check it compiled */
- compiled = 0;
- glGetShaderiv(shader, GL_COMPILE_STATUS, &compiled);
- if (!compiled) {
- GLchar* log;
- GLint log_length;
- glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length);
- log = g_malloc(log_length * sizeof(GLchar));
- glGetShaderInfoLog(shader, log_length, NULL, log);
- fprintf(stderr, "%s\n\n" "nv2a: %s compilation failed: %s\n", code, name, log);
- g_free(log);
-
- NV2A_GL_DGROUP_END();
- abort();
- }
-
- NV2A_GL_DGROUP_END();
-
- return shader;
-}
-
-void update_shader_constant_locations(ShaderBinding *binding, const ShaderState *state)
-{
- int i, j;
- char tmp[64];
-
- /* set texture samplers */
- for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
- char samplerName[16];
- snprintf(samplerName, sizeof(samplerName), "texSamp%d", i);
- GLint texSampLoc = glGetUniformLocation(binding->gl_program, samplerName);
- if (texSampLoc >= 0) {
- glUniform1i(texSampLoc, i);
- }
- }
-
- /* validate the program */
- glValidateProgram(binding->gl_program);
- GLint valid = 0;
- glGetProgramiv(binding->gl_program, GL_VALIDATE_STATUS, &valid);
- if (!valid) {
- GLchar log[1024];
- glGetProgramInfoLog(binding->gl_program, 1024, NULL, log);
- fprintf(stderr, "nv2a: shader validation failed: %s\n", log);
- abort();
- }
-
- /* lookup fragment shader uniforms */
- for (i = 0; i < 9; i++) {
- for (j = 0; j < 2; j++) {
- snprintf(tmp, sizeof(tmp), "c%d_%d", j, i);
- binding->psh_constant_loc[i][j] = glGetUniformLocation(binding->gl_program, tmp);
- }
- }
- binding->alpha_ref_loc = glGetUniformLocation(binding->gl_program, "alphaRef");
- for (i = 1; i < NV2A_MAX_TEXTURES; i++) {
- snprintf(tmp, sizeof(tmp), "bumpMat%d", i);
- binding->bump_mat_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
- snprintf(tmp, sizeof(tmp), "bumpScale%d", i);
- binding->bump_scale_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
- snprintf(tmp, sizeof(tmp), "bumpOffset%d", i);
- binding->bump_offset_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
- }
-
- for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
- snprintf(tmp, sizeof(tmp), "texScale%d", i);
- binding->tex_scale_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
- }
-
- /* lookup vertex shader uniforms */
- for(i = 0; i < NV2A_VERTEXSHADER_CONSTANTS; i++) {
- snprintf(tmp, sizeof(tmp), "c[%d]", i);
- binding->vsh_constant_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
- }
- binding->surface_size_loc = glGetUniformLocation(binding->gl_program, "surfaceSize");
- binding->clip_range_loc = glGetUniformLocation(binding->gl_program, "clipRange");
- binding->fog_color_loc = glGetUniformLocation(binding->gl_program, "fogColor");
- binding->fog_param_loc[0] = glGetUniformLocation(binding->gl_program, "fogParam[0]");
- binding->fog_param_loc[1] = glGetUniformLocation(binding->gl_program, "fogParam[1]");
-
- binding->inv_viewport_loc = glGetUniformLocation(binding->gl_program, "invViewport");
- for (i = 0; i < NV2A_LTCTXA_COUNT; i++) {
- snprintf(tmp, sizeof(tmp), "ltctxa[%d]", i);
- binding->ltctxa_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
- }
- for (i = 0; i < NV2A_LTCTXB_COUNT; i++) {
- snprintf(tmp, sizeof(tmp), "ltctxb[%d]", i);
- binding->ltctxb_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
- }
- for (i = 0; i < NV2A_LTC1_COUNT; i++) {
- snprintf(tmp, sizeof(tmp), "ltc1[%d]", i);
- binding->ltc1_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
- }
- for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
- snprintf(tmp, sizeof(tmp), "lightInfiniteHalfVector%d", i);
- binding->light_infinite_half_vector_loc[i] =
- glGetUniformLocation(binding->gl_program, tmp);
- snprintf(tmp, sizeof(tmp), "lightInfiniteDirection%d", i);
- binding->light_infinite_direction_loc[i] =
- glGetUniformLocation(binding->gl_program, tmp);
-
- snprintf(tmp, sizeof(tmp), "lightLocalPosition%d", i);
- binding->light_local_position_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
- snprintf(tmp, sizeof(tmp), "lightLocalAttenuation%d", i);
- binding->light_local_attenuation_loc[i] =
- glGetUniformLocation(binding->gl_program, tmp);
- }
- for (i = 0; i < 8; i++) {
- snprintf(tmp, sizeof(tmp), "clipRegion[%d]", i);
- binding->clip_region_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
- }
-
- if (state->fixed_function) {
- binding->material_alpha_loc =
- glGetUniformLocation(binding->gl_program, "material_alpha");
- } else {
- binding->material_alpha_loc = -1;
- }
-}
-
-ShaderBinding *generate_shaders(const ShaderState *state)
-{
- char *previous_numeric_locale = setlocale(LC_NUMERIC, NULL);
- if (previous_numeric_locale) {
- previous_numeric_locale = g_strdup(previous_numeric_locale);
- }
-
- /* Ensure numeric values are printed with '.' radix, no grouping */
- setlocale(LC_NUMERIC, "C");
- GLuint program = glCreateProgram();
-
- /* Create an optional geometry shader and find primitive type */
- GLenum gl_primitive_mode;
- MString* geometry_shader_code =
- generate_geometry_shader(state->polygon_front_mode,
- state->polygon_back_mode,
- state->primitive_mode,
- &gl_primitive_mode,
- state->smooth_shading);
- if (geometry_shader_code) {
- const char* geometry_shader_code_str =
- mstring_get_str(geometry_shader_code);
- GLuint geometry_shader = create_gl_shader(GL_GEOMETRY_SHADER,
- geometry_shader_code_str,
- "geometry shader");
- glAttachShader(program, geometry_shader);
- mstring_unref(geometry_shader_code);
- }
-
- /* create the vertex shader */
- MString *vertex_shader_code =
- generate_vertex_shader(state, geometry_shader_code != NULL);
- GLuint vertex_shader = create_gl_shader(GL_VERTEX_SHADER,
- mstring_get_str(vertex_shader_code),
- "vertex shader");
- glAttachShader(program, vertex_shader);
- mstring_unref(vertex_shader_code);
-
- /* generate a fragment shader from register combiners */
- MString *fragment_shader_code = psh_translate(state->psh);
- const char *fragment_shader_code_str =
- mstring_get_str(fragment_shader_code);
- GLuint fragment_shader = create_gl_shader(GL_FRAGMENT_SHADER,
- fragment_shader_code_str,
- "fragment shader");
- glAttachShader(program, fragment_shader);
- mstring_unref(fragment_shader_code);
-
- /* link the program */
- glLinkProgram(program);
- GLint linked = 0;
- glGetProgramiv(program, GL_LINK_STATUS, &linked);
- if(!linked) {
- GLchar log[2048];
- glGetProgramInfoLog(program, 2048, NULL, log);
- fprintf(stderr, "nv2a: shader linking failed: %s\n", log);
- abort();
- }
-
- glUseProgram(program);
-
- ShaderBinding* ret = g_malloc0(sizeof(ShaderBinding));
- ret->gl_program = program;
- ret->gl_primitive_mode = gl_primitive_mode;
-
- update_shader_constant_locations(ret, state);
-
- if (previous_numeric_locale) {
- setlocale(LC_NUMERIC, previous_numeric_locale);
- g_free(previous_numeric_locale);
- }
-
- return ret;
-}
-
-static const char *shader_gl_vendor = NULL;
-
-static void shader_create_cache_folder(void)
-{
- char *shader_path = g_strdup_printf("%sshaders", xemu_settings_get_base_path());
- qemu_mkdir(shader_path);
- g_free(shader_path);
-}
-
-static char *shader_get_lru_cache_path(void)
-{
- return g_strdup_printf("%s/shader_cache_list", xemu_settings_get_base_path());
-}
-
-static void shader_write_lru_list_entry_to_disk(Lru *lru, LruNode *node, void *opaque)
-{
- FILE *lru_list_file = (FILE*) opaque;
- size_t written = fwrite(&node->hash, sizeof(uint64_t), 1, lru_list_file);
- if (written != 1) {
- fprintf(stderr, "nv2a: Failed to write shader list entry %llx to disk\n",
- (unsigned long long) node->hash);
- }
-}
-
-void shader_write_cache_reload_list(PGRAPHState *pg)
-{
- if (!g_config.perf.cache_shaders) {
- qatomic_set(&pg->shader_cache_writeback_pending, false);
- qemu_event_set(&pg->shader_cache_writeback_complete);
- return;
- }
-
- char *shader_lru_path = shader_get_lru_cache_path();
- qemu_thread_join(&pg->shader_disk_thread);
-
- FILE *lru_list = qemu_fopen(shader_lru_path, "wb");
- g_free(shader_lru_path);
- if (!lru_list) {
- fprintf(stderr, "nv2a: Failed to open shader LRU cache for writing\n");
- return;
- }
-
- lru_visit_active(&pg->shader_cache, shader_write_lru_list_entry_to_disk, lru_list);
- fclose(lru_list);
-
- lru_flush(&pg->shader_cache);
-
- qatomic_set(&pg->shader_cache_writeback_pending, false);
- qemu_event_set(&pg->shader_cache_writeback_complete);
-}
-
-bool shader_load_from_memory(ShaderLruNode *snode)
-{
- assert(glGetError() == GL_NO_ERROR);
-
- if (!snode->program) {
- return false;
- }
-
- GLuint gl_program = glCreateProgram();
- glProgramBinary(gl_program, snode->program_format, snode->program, snode->program_size);
- GLint gl_error = glGetError();
- if (gl_error != GL_NO_ERROR) {
- NV2A_DPRINTF("failed to load shader binary from disk: GL error code %d\n", gl_error);
- glDeleteProgram(gl_program);
- return false;
- }
-
- glValidateProgram(gl_program);
- GLint valid = 0;
- glGetProgramiv(gl_program, GL_VALIDATE_STATUS, &valid);
- if (!valid) {
- GLchar log[1024];
- glGetProgramInfoLog(gl_program, 1024, NULL, log);
- NV2A_DPRINTF("failed to load shader binary from disk: %s\n", log);
- glDeleteProgram(gl_program);
- return false;
- }
-
- glUseProgram(gl_program);
-
- ShaderBinding* binding = g_malloc0(sizeof(ShaderBinding));
- binding->gl_program = gl_program;
- binding->gl_primitive_mode = get_gl_primitive_mode(snode->state.polygon_front_mode,
- snode->state.primitive_mode);
- snode->binding = binding;
-
- g_free(snode->program);
- snode->program = NULL;
-
- update_shader_constant_locations(binding, &snode->state);
-
- return true;
-}
-
-static char *shader_get_bin_directory(uint64_t hash)
-{
- const char *cfg_dir = xemu_settings_get_base_path();
- uint64_t bin_mask = 0xffffUL << 48;
- char *shader_bin_dir = g_strdup_printf("%s/shaders/%04lx",
- cfg_dir, (hash & bin_mask) >> 48);
- return shader_bin_dir;
-}
-
-static char *shader_get_binary_path(const char *shader_bin_dir, uint64_t hash)
-{
- uint64_t bin_mask = 0xffffUL << 48;
- return g_strdup_printf("%s/%012lx", shader_bin_dir,
- hash & (~bin_mask));
-}
-
-static void shader_load_from_disk(PGRAPHState *pg, uint64_t hash)
-{
- char *shader_bin_dir = shader_get_bin_directory(hash);
- char *shader_path = shader_get_binary_path(shader_bin_dir, hash);
- char *cached_xemu_version = NULL;
- char *cached_gl_vendor = NULL;
- void *program_buffer = NULL;
-
- uint64_t cached_xemu_version_len;
- uint64_t gl_vendor_len;
- GLenum program_binary_format;
- ShaderState state;
- size_t shader_size;
-
- g_free(shader_bin_dir);
-
- qemu_mutex_lock(&pg->shader_cache_lock);
- if (lru_contains_hash(&pg->shader_cache, hash)) {
- qemu_mutex_unlock(&pg->shader_cache_lock);
- return;
- }
- qemu_mutex_unlock(&pg->shader_cache_lock);
-
- FILE *shader_file = qemu_fopen(shader_path, "rb");
- if (!shader_file) {
- goto error;
- }
-
- size_t nread;
- #define READ_OR_ERR(data, data_len) \
- do { \
- nread = fread(data, data_len, 1, shader_file); \
- if (nread != 1) { \
- fclose(shader_file); \
- goto error; \
- } \
- } while (0)
-
- READ_OR_ERR(&cached_xemu_version_len, sizeof(cached_xemu_version_len));
-
- cached_xemu_version = g_malloc(cached_xemu_version_len +1);
- READ_OR_ERR(cached_xemu_version, cached_xemu_version_len);
- if (strcmp(cached_xemu_version, xemu_version) != 0) {
- fclose(shader_file);
- goto error;
- }
-
- READ_OR_ERR(&gl_vendor_len, sizeof(gl_vendor_len));
-
- cached_gl_vendor = g_malloc(gl_vendor_len);
- READ_OR_ERR(cached_gl_vendor, gl_vendor_len);
- if (strcmp(cached_gl_vendor, shader_gl_vendor) != 0) {
- fclose(shader_file);
- goto error;
- }
-
- READ_OR_ERR(&program_binary_format, sizeof(program_binary_format));
- READ_OR_ERR(&state, sizeof(state));
- READ_OR_ERR(&shader_size, sizeof(shader_size));
-
- program_buffer = g_malloc(shader_size);
- READ_OR_ERR(program_buffer, shader_size);
-
- #undef READ_OR_ERR
-
- fclose(shader_file);
- g_free(shader_path);
- g_free(cached_xemu_version);
- g_free(cached_gl_vendor);
-
- qemu_mutex_lock(&pg->shader_cache_lock);
- LruNode *node = lru_lookup(&pg->shader_cache, hash, &state);
- ShaderLruNode *snode = container_of(node, ShaderLruNode, node);
-
- /* If we happened to regenerate this shader already, then we may as well use the new one */
- if (snode->binding) {
- qemu_mutex_unlock(&pg->shader_cache_lock);
- return;
- }
-
- snode->program_format = program_binary_format;
- snode->program_size = shader_size;
- snode->program = program_buffer;
- snode->cached = true;
- qemu_mutex_unlock(&pg->shader_cache_lock);
- return;
-
-error:
- /* Delete the shader so it won't be loaded again */
- qemu_unlink(shader_path);
- g_free(shader_path);
- g_free(program_buffer);
- g_free(cached_xemu_version);
- g_free(cached_gl_vendor);
-}
-
-static void *shader_reload_lru_from_disk(void *arg)
-{
- if (!g_config.perf.cache_shaders) {
- return NULL;
- }
-
- PGRAPHState *pg = (PGRAPHState*) arg;
- char *shader_lru_path = shader_get_lru_cache_path();
-
- FILE *lru_shaders_list = qemu_fopen(shader_lru_path, "rb");
- g_free(shader_lru_path);
- if (!lru_shaders_list) {
- return NULL;
- }
-
- uint64_t hash;
- while (fread(&hash, sizeof(uint64_t), 1, lru_shaders_list) == 1) {
- shader_load_from_disk(pg, hash);
- }
-
- return NULL;
-}
-
-static void shader_cache_entry_init(Lru *lru, LruNode *node, void *state)
-{
- ShaderLruNode *snode = container_of(node, ShaderLruNode, node);
- memcpy(&snode->state, state, sizeof(ShaderState));
- snode->cached = false;
- snode->binding = NULL;
- snode->program = NULL;
- snode->save_thread = NULL;
-}
-
-static void shader_cache_entry_post_evict(Lru *lru, LruNode *node)
-{
- ShaderLruNode *snode = container_of(node, ShaderLruNode, node);
-
- if (snode->save_thread) {
- qemu_thread_join(snode->save_thread);
- g_free(snode->save_thread);
- }
-
- if (snode->binding) {
- glDeleteProgram(snode->binding->gl_program);
- g_free(snode->binding);
- }
-
- if (snode->program) {
- g_free(snode->program);
- }
-
- snode->cached = false;
- snode->save_thread = NULL;
- snode->binding = NULL;
- snode->program = NULL;
- memset(&snode->state, 0, sizeof(ShaderState));
-}
-
-static bool shader_cache_entry_compare(Lru *lru, LruNode *node, void *key)
-{
- ShaderLruNode *snode = container_of(node, ShaderLruNode, node);
- return memcmp(&snode->state, key, sizeof(ShaderState));
-}
-
-void shader_cache_init(PGRAPHState *pg)
-{
- if (!shader_gl_vendor) {
- shader_gl_vendor = (const char *) glGetString(GL_VENDOR);
- }
-
- shader_create_cache_folder();
-
- /* FIXME: Make this configurable */
- const size_t shader_cache_size = 50*1024;
- lru_init(&pg->shader_cache);
- pg->shader_cache_entries = malloc(shader_cache_size * sizeof(ShaderLruNode));
- assert(pg->shader_cache_entries != NULL);
- for (int i = 0; i < shader_cache_size; i++) {
- lru_add_free(&pg->shader_cache, &pg->shader_cache_entries[i].node);
- }
-
- pg->shader_cache.init_node = shader_cache_entry_init;
- pg->shader_cache.compare_nodes = shader_cache_entry_compare;
- pg->shader_cache.post_node_evict = shader_cache_entry_post_evict;
-
- qemu_thread_create(&pg->shader_disk_thread, "pgraph.shader_cache",
- shader_reload_lru_from_disk, pg, QEMU_THREAD_JOINABLE);
-}
-
-static void *shader_write_to_disk(void *arg)
-{
- ShaderLruNode *snode = (ShaderLruNode*) arg;
-
- char *shader_bin = shader_get_bin_directory(snode->node.hash);
- char *shader_path = shader_get_binary_path(shader_bin, snode->node.hash);
-
- static uint64_t gl_vendor_len;
- if (gl_vendor_len == 0) {
- gl_vendor_len = (uint64_t) (strlen(shader_gl_vendor) + 1);
- }
-
- static uint64_t xemu_version_len = 0;
- if (xemu_version_len == 0) {
- xemu_version_len = (uint64_t) (strlen(xemu_version) + 1);
- }
-
- qemu_mkdir(shader_bin);
- g_free(shader_bin);
-
- FILE *shader_file = qemu_fopen(shader_path, "wb");
- if (!shader_file) {
- goto error;
- }
-
- size_t written;
- #define WRITE_OR_ERR(data, data_size) \
- do { \
- written = fwrite(data, data_size, 1, shader_file); \
- if (written != 1) { \
- fclose(shader_file); \
- goto error; \
- } \
- } while (0)
-
- WRITE_OR_ERR(&xemu_version_len, sizeof(xemu_version_len));
- WRITE_OR_ERR(xemu_version, xemu_version_len);
-
- WRITE_OR_ERR(&gl_vendor_len, sizeof(gl_vendor_len));
- WRITE_OR_ERR(shader_gl_vendor, gl_vendor_len);
-
- WRITE_OR_ERR(&snode->program_format, sizeof(snode->program_format));
- WRITE_OR_ERR(&snode->state, sizeof(snode->state));
-
- WRITE_OR_ERR(&snode->program_size, sizeof(snode->program_size));
- WRITE_OR_ERR(snode->program, snode->program_size);
-
- #undef WRITE_OR_ERR
-
- fclose(shader_file);
-
- g_free(shader_path);
- g_free(snode->program);
- snode->program = NULL;
-
- return NULL;
-
-error:
- fprintf(stderr, "nv2a: Failed to write shader binary file to %s\n", shader_path);
- qemu_unlink(shader_path);
- g_free(shader_path);
- g_free(snode->program);
- snode->program = NULL;
- return NULL;
-}
-
-void shader_cache_to_disk(ShaderLruNode *snode)
-{
- if (!snode->binding || snode->cached) {
- return;
- }
-
- GLint program_size;
- glGetProgramiv(snode->binding->gl_program, GL_PROGRAM_BINARY_LENGTH, &program_size);
-
- if (snode->program) {
- g_free(snode->program);
- snode->program = NULL;
- }
-
- /* program_size might be zero on some systems, if no binary formats are supported */
- if (program_size == 0) {
- return;
- }
-
- snode->program = g_malloc(program_size);
- GLsizei program_size_copied;
- glGetProgramBinary(snode->binding->gl_program, program_size, &program_size_copied,
- &snode->program_format, snode->program);
- assert(glGetError() == GL_NO_ERROR);
-
- snode->program_size = program_size_copied;
- snode->cached = true;
-
- char name[24];
- snprintf(name, sizeof(name), "scache-%llx", (unsigned long long) snode->node.hash);
- snode->save_thread = g_malloc0(sizeof(QemuThread));
- qemu_thread_create(snode->save_thread, name, shader_write_to_disk, snode, QEMU_THREAD_JOINABLE);
-}
diff --git a/hw/xbox/nv2a/shaders_common.h b/hw/xbox/nv2a/shaders_common.h
deleted file mode 100644
index ae2ba9f14d..0000000000
--- a/hw/xbox/nv2a/shaders_common.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * QEMU Geforce NV2A shader common definitions
- *
- * Copyright (c) 2015 espes
- * Copyright (c) 2015 Jannik Vogel
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see .
- */
-
-#ifndef HW_NV2A_SHADERS_COMMON_H
-#define HW_NV2A_SHADERS_COMMON_H
-
-#include "debug.h"
-
-#define DEF_VERTEX_DATA(qualifier, in_out, prefix, suffix) \
- "noperspective " in_out " float " prefix "vtx_inv_w" suffix ";\n" \
- "flat " in_out " float " prefix "vtx_inv_w_flat" suffix ";\n" \
- qualifier " " in_out " vec4 " prefix "vtxD0" suffix ";\n" \
- qualifier " " in_out " vec4 " prefix "vtxD1" suffix ";\n" \
- qualifier " " in_out " vec4 " prefix "vtxB0" suffix ";\n" \
- qualifier " " in_out " vec4 " prefix "vtxB1" suffix ";\n" \
- "noperspective " in_out " float " prefix "vtxFog" suffix ";\n" \
- "noperspective " in_out " vec4 " prefix "vtxT0" suffix ";\n" \
- "noperspective " in_out " vec4 " prefix "vtxT1" suffix ";\n" \
- "noperspective " in_out " vec4 " prefix "vtxT2" suffix ";\n" \
- "noperspective " in_out " vec4 " prefix "vtxT3" suffix ";\n"
-
-#define STRUCT_VERTEX_DATA_OUT_SMOOTH DEF_VERTEX_DATA("noperspective", "out", "", "")
-#define STRUCT_VERTEX_DATA_IN_SMOOTH DEF_VERTEX_DATA("noperspective", "in", "", "")
-#define STRUCT_V_VERTEX_DATA_OUT_SMOOTH DEF_VERTEX_DATA("noperspective", "out", "v_", "")
-#define STRUCT_V_VERTEX_DATA_IN_ARRAY_SMOOTH DEF_VERTEX_DATA("noperspective", "in", "v_", "[]")
-
-#define STRUCT_VERTEX_DATA_OUT_FLAT DEF_VERTEX_DATA("flat", "out", "", "")
-#define STRUCT_VERTEX_DATA_IN_FLAT DEF_VERTEX_DATA("flat", "in", "", "")
-#define STRUCT_V_VERTEX_DATA_OUT_FLAT DEF_VERTEX_DATA("flat", "out", "v_", "")
-#define STRUCT_V_VERTEX_DATA_IN_ARRAY_FLAT DEF_VERTEX_DATA("flat", "in", "v_", "[]")
-
-typedef struct {
- int ref;
- gchar *string;
-} MString;
-
-void mstring_append_fmt(MString *mstring, const char *fmt, ...);
-MString *mstring_from_fmt(const char *fmt, ...);
-void mstring_append_va(MString *mstring, const char *fmt, va_list va);
-
-static inline
-void mstring_ref(MString *mstr)
-{
- mstr->ref++;
-}
-
-static inline
-void mstring_unref(MString *mstr)
-{
- mstr->ref--;
- if (!mstr->ref) {
- g_free(mstr->string);
- g_free(mstr);
- }
-}
-
-static inline
-void mstring_append(MString *mstr, const char *str)
-{
- gchar *n = g_strconcat(mstr->string, str, NULL);
- g_free(mstr->string);
- mstr->string = n;
-}
-
-static inline
-void mstring_append_chr(MString *mstr, char chr)
-{
- mstring_append_fmt(mstr, "%c", chr);
-}
-
-static inline
-void mstring_append_int(MString *mstr, int val)
-{
- mstring_append_fmt(mstr, "%" PRId64, val);
-}
-
-static inline
-MString *mstring_new(void)
-{
- MString *mstr = g_malloc(sizeof(MString));
- mstr->ref = 1;
- mstr->string = g_strdup("");
- return mstr;
-}
-
-static inline
-MString *mstring_from_str(const char *str)
-{
- MString *mstr = g_malloc(sizeof(MString));
- mstr->ref = 1;
- mstr->string = g_strdup(str);
- return mstr;
-}
-
-static inline
-const gchar *mstring_get_str(MString *mstr)
-{
- return mstr->string;
-}
-
-static inline
-size_t mstring_get_length(MString *mstr)
-{
- return strlen(mstr->string);
-}
-
-
-#endif
diff --git a/hw/xbox/nv2a/lru.h b/include/qemu/lru.h
similarity index 87%
rename from hw/xbox/nv2a/lru.h
rename to include/qemu/lru.h
index c0dca7ec5d..b588270282 100644
--- a/hw/xbox/nv2a/lru.h
+++ b/include/qemu/lru.h
@@ -1,7 +1,7 @@
/*
* LRU object list
*
- * Copyright (c) 2021 Matt Borgerson
+ * Copyright (c) 2021-2024 Matt Borgerson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -42,6 +42,8 @@ typedef struct Lru Lru;
struct Lru {
QTAILQ_HEAD(, LruNode) global;
QTAILQ_HEAD(, LruNode) bins[LRU_NUM_BINS];
+ int num_used;
+ int num_free;
/* Initialize a node. */
void (*init_node)(Lru *lru, LruNode *node, void *key);
@@ -67,6 +69,8 @@ void lru_init(Lru *lru)
lru->compare_nodes = NULL;
lru->pre_node_evict = NULL;
lru->post_node_evict = NULL;
+ lru->num_free = 0;
+ lru->num_used = 0;
}
static inline
@@ -74,6 +78,7 @@ void lru_add_free(Lru *lru, LruNode *node)
{
node->next_bin.tqe_circ.tql_prev = NULL;
QTAILQ_INSERT_TAIL(&lru->global, node, next_global);
+ lru->num_free += 1;
}
static inline
@@ -106,29 +111,51 @@ void lru_evict_node(Lru *lru, LruNode *node)
if (lru->post_node_evict) {
lru->post_node_evict(lru, node);
}
+
+ lru->num_used -= 1;
+ lru->num_free += 1;
+}
+
+static inline
+LruNode *lru_try_evict_one(Lru *lru)
+{
+ LruNode *found;
+
+ QTAILQ_FOREACH_REVERSE(found, &lru->global, next_global) {
+ if (lru_is_node_in_use(lru, found)
+ && (!lru->pre_node_evict || lru->pre_node_evict(lru, found))) {
+ lru_evict_node(lru, found);
+ return found;
+ }
+ }
+
+ return NULL;
}
static inline
LruNode *lru_evict_one(Lru *lru)
{
- LruNode *found;
-
- QTAILQ_FOREACH_REVERSE(found, &lru->global, next_global) {
- bool can_evict = true;
- if (lru_is_node_in_use(lru, found) && lru->pre_node_evict) {
- can_evict = lru->pre_node_evict(lru, found);
- }
- if (can_evict) {
- break;
- }
- }
+ LruNode *found = lru_try_evict_one(lru);
assert(found != NULL); /* No evictable node! */
- lru_evict_node(lru, found);
return found;
}
+static inline
+LruNode *lru_get_one_free(Lru *lru)
+{
+ LruNode *found;
+
+ QTAILQ_FOREACH_REVERSE(found, &lru->global, next_global) {
+ if (!lru_is_node_in_use(lru, found)) {
+ return found;
+ }
+ }
+
+ return lru_evict_one(lru);
+}
+
static inline
bool lru_contains_hash(Lru *lru, uint64_t hash)
{
@@ -160,12 +187,15 @@ LruNode *lru_lookup(Lru *lru, uint64_t hash, void *key)
if (found) {
QTAILQ_REMOVE(&lru->bins[bin], found, next_bin);
} else {
- found = lru_evict_one(lru);
+ found = lru_get_one_free(lru);
found->hash = hash;
if (lru->init_node) {
lru->init_node(lru, found, key);
}
assert(found->hash == hash);
+
+ lru->num_used += 1;
+ lru->num_free -= 1;
}
QTAILQ_REMOVE(&lru->global, found, next_global);
diff --git a/include/qemu/mstring.h b/include/qemu/mstring.h
new file mode 100644
index 0000000000..567fd4cdf3
--- /dev/null
+++ b/include/qemu/mstring.h
@@ -0,0 +1,82 @@
+#ifndef MSTRING_H
+#define MSTRING_H
+
+#include "qemu/osdep.h"
+#include
+
+typedef struct {
+ int ref;
+ gchar *string;
+} MString;
+
+void mstring_append_fmt(MString *mstring, const char *fmt, ...);
+MString *mstring_from_fmt(const char *fmt, ...);
+void mstring_append_va(MString *mstring, const char *fmt, va_list va);
+
+static inline
+void mstring_ref(MString *mstr)
+{
+ mstr->ref++;
+}
+
+static inline
+void mstring_unref(MString *mstr)
+{
+ mstr->ref--;
+ if (!mstr->ref) {
+ g_free(mstr->string);
+ g_free(mstr);
+ }
+}
+
+static inline
+void mstring_append(MString *mstr, const char *str)
+{
+ gchar *n = g_strconcat(mstr->string, str, NULL);
+ g_free(mstr->string);
+ mstr->string = n;
+}
+
+static inline
+void mstring_append_chr(MString *mstr, char chr)
+{
+ mstring_append_fmt(mstr, "%c", chr);
+}
+
+static inline
+void mstring_append_int(MString *mstr, int val)
+{
+ mstring_append_fmt(mstr, "%" PRId64, val);
+}
+
+static inline
+MString *mstring_new(void)
+{
+ MString *mstr = g_malloc(sizeof(MString));
+ mstr->ref = 1;
+ mstr->string = g_strdup("");
+ return mstr;
+}
+
+static inline
+MString *mstring_from_str(const char *str)
+{
+ MString *mstr = g_malloc(sizeof(MString));
+ mstr->ref = 1;
+ mstr->string = g_strdup(str);
+ return mstr;
+}
+
+static inline
+const gchar *mstring_get_str(MString *mstr)
+{
+ return mstr->string;
+}
+
+static inline
+size_t mstring_get_length(MString *mstr)
+{
+ return strlen(mstr->string);
+}
+
+#endif
diff --git a/licenses/SPIRV-Reflect.license.txt b/licenses/SPIRV-Reflect.license.txt
new file mode 100644
index 0000000000..261eeb9e9f
--- /dev/null
+++ b/licenses/SPIRV-Reflect.license.txt
@@ -0,0 +1,201 @@
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/licenses/VulkanMemoryAllocator.license.txt b/licenses/VulkanMemoryAllocator.license.txt
new file mode 100644
index 0000000000..b9fff388f1
--- /dev/null
+++ b/licenses/VulkanMemoryAllocator.license.txt
@@ -0,0 +1,19 @@
+Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/licenses/volk.license.txt b/licenses/volk.license.txt
new file mode 100644
index 0000000000..5a717f2678
--- /dev/null
+++ b/licenses/volk.license.txt
@@ -0,0 +1,19 @@
+Copyright (c) 2018-2024 Arseny Kapoulkine
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/meson.build b/meson.build
index 8980f55a13..7c12d40fb5 100644
--- a/meson.build
+++ b/meson.build
@@ -1180,6 +1180,34 @@ if not get_option('opengl').auto() or have_system or have_vhost_user_gpu
link_args: config_host['EPOXY_LIBS'].split() + opengl_libs)
endif
+vulkan = not_found
+if targetos == 'windows'
+ vulkan = declare_dependency(
+ compile_args: ['-DVK_USE_PLATFORM_WIN32_KHR', '-DVK_NO_PROTOTYPES'],
+ )
+ libglslang = declare_dependency(link_args: [
+ '-lglslang',
+ '-lMachineIndependent',
+ '-lGenericCodeGen',
+ '-lSPIRV',
+ '-lSPIRV-Tools',
+ '-lSPIRV-Tools-opt'
+ ])
+elif targetos == 'linux'
+ vulkan = dependency('vulkan')
+ libglslang = declare_dependency(link_args: [
+ '-lglslang',
+ '-lMachineIndependent',
+ '-lGenericCodeGen',
+ '-lSPIRV',
+ '-lSPIRV-Tools',
+ '-lSPIRV-Tools-opt'
+ ])
+endif
+
+subdir('thirdparty')
+
+
gbm = not_found
if (have_system or have_tools) and (virgl.found() or opengl.found())
gbm = dependency('gbm', method: 'pkg-config', required: false,
@@ -1931,6 +1959,7 @@ config_host_data.set('CONFIG_LINUX_IO_URING', linux_io_uring.found())
config_host_data.set('CONFIG_LIBPMEM', libpmem.found())
config_host_data.set('CONFIG_NUMA', numa.found())
config_host_data.set('CONFIG_OPENGL', opengl.found())
+config_host_data.set('CONFIG_VULKAN', vulkan.found())
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
config_host_data.set('CONFIG_RBD', rbd.found())
config_host_data.set('CONFIG_RDMA', rdma.found())
@@ -4054,6 +4083,7 @@ summary_info += {'U2F support': u2f}
summary_info += {'libusb': libusb}
summary_info += {'usb net redir': usbredir}
summary_info += {'OpenGL support (epoxy)': opengl}
+summary_info += {'Vulkan support': vulkan}
summary_info += {'GBM': gbm}
summary_info += {'libiscsi support': libiscsi}
summary_info += {'libnfs support': libnfs}
diff --git a/scripts/archive-source.sh b/scripts/archive-source.sh
index 0496ebeb6c..5502c3bfd7 100755
--- a/scripts/archive-source.sh
+++ b/scripts/archive-source.sh
@@ -28,8 +28,12 @@ sub_file="${sub_tdir}/submodule.tar"
# different to the host OS.
submodules="dtc meson ui/keycodemapdb"
submodules="$submodules tests/fp/berkeley-softfloat-3 tests/fp/berkeley-testfloat-3"
-submodules="$submodules ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig" # xemu extras
+
+# xemu extras
+submodules="$submodules ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig"
submodules="$submodules hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu"
+submodules="$submodules thirdparty/volk thirdparty/VulkanMemoryAllocator thirdparty/SPIRV-Reflect"
+
sub_deinit=""
function cleanup() {
diff --git a/scripts/gen-license.py b/scripts/gen-license.py
index b71d4ecd56..216f441f3c 100755
--- a/scripts/gen-license.py
+++ b/scripts/gen-license.py
@@ -228,7 +228,25 @@ Lib('fpng', 'https://github.com/richgel999/fpng',
Lib('nv2a_vsh_cpu', 'https://github.com/abaire/nv2a_vsh_cpu',
unlicense, 'https://raw.githubusercontent.com/abaire/nv2a_vsh_cpu/main/LICENSE',
ships_static=all_platforms,
- submodule=Submodule('hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu')
+ submodule=Submodule('hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu')
+ ),
+
+Lib('volk', 'https://github.com/zeux/volk',
+ mit, 'https://raw.githubusercontent.com/zeux/volk/master/LICENSE.md',
+ ships_static=all_platforms,
+ submodule=Submodule('thirdparty/volk')
+ ),
+
+Lib('VulkanMemoryAllocator', 'https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator',
+ mit, 'https://raw.githubusercontent.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator/master/LICENSE.txt',
+ ships_static=all_platforms,
+ submodule=Submodule('thirdparty/VulkanMemoryAllocator')
+ ),
+
+Lib('SPIRV-Reflect', 'https://github.com/KhronosGroup/SPIRV-Reflect',
+ apache2, 'https://raw.githubusercontent.com/KhronosGroup/SPIRV-Reflect/main/LICENSE',
+ ships_static=all_platforms,
+ submodule=Submodule('thirdparty/SPIRV-Reflect')
),
#
@@ -344,6 +362,17 @@ Lib('miniz', 'https://github.com/richgel999/miniz',
ships_static={windows}, platform={windows},
version='2.1.0'
),
+
+Lib('glslang', 'https://github.com/KhronosGroup/glslang',
+ bsd_3clause, 'https://raw.githubusercontent.com/KhronosGroup/glslang/main/LICENSE.txt',
+ ships_static={windows}, platform={windows},
+ ),
+
+Lib('SPIRV-Tools', 'https://github.com/KhronosGroup/SPIRV-Tools',
+ apache2, 'https://raw.githubusercontent.com/KhronosGroup/SPIRV-Tools/main/LICENSE',
+ ships_static={windows}, platform={windows},
+ ),
+
]
def gen_license():
diff --git a/thirdparty/SPIRV-Reflect b/thirdparty/SPIRV-Reflect
new file mode 160000
index 0000000000..1d674a82d7
--- /dev/null
+++ b/thirdparty/SPIRV-Reflect
@@ -0,0 +1 @@
+Subproject commit 1d674a82d7e102ed0c02e64e036827db9e8b1a71
diff --git a/thirdparty/VulkanMemoryAllocator b/thirdparty/VulkanMemoryAllocator
new file mode 160000
index 0000000000..009ecd192c
--- /dev/null
+++ b/thirdparty/VulkanMemoryAllocator
@@ -0,0 +1 @@
+Subproject commit 009ecd192c1289c7529bff248a16cfe896254816
diff --git a/thirdparty/meson.build b/thirdparty/meson.build
new file mode 100644
index 0000000000..99ecbd2796
--- /dev/null
+++ b/thirdparty/meson.build
@@ -0,0 +1,12 @@
+if vulkan.found()
+
+libvma = static_library('vma', sources: 'vma.cc', include_directories: 'VulkanMemoryAllocator/include', dependencies: vulkan)
+vma = declare_dependency(include_directories: 'VulkanMemoryAllocator/include', link_with: libvma)
+
+libvolk = static_library('volk', sources: 'volk/volk.c', dependencies: vulkan)
+volk = declare_dependency(include_directories: 'volk', link_with: libvolk, dependencies: vulkan)
+
+libspirv_reflect = static_library('spirv_reflect', sources: 'SPIRV-Reflect/spirv_reflect.c', dependencies: vulkan)
+spirv_reflect = declare_dependency(include_directories: 'SPIRV-Reflect', link_with: libspirv_reflect, dependencies: vulkan)
+
+endif
diff --git a/thirdparty/renderdoc_app.h b/thirdparty/renderdoc_app.h
index 7ee24b69ee..c01e05932e 100644
--- a/thirdparty/renderdoc_app.h
+++ b/thirdparty/renderdoc_app.h
@@ -1,7 +1,7 @@
/******************************************************************************
* The MIT License (MIT)
*
- * Copyright (c) 2019-2022 Baldur Karlsson
+ * Copyright (c) 2019-2024 Baldur Karlsson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -35,7 +35,7 @@
#if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || defined(_MSC_VER)
#define RENDERDOC_CC __cdecl
-#elif defined(__linux__)
+#elif defined(__linux__) || defined(__FreeBSD__)
#define RENDERDOC_CC
#elif defined(__APPLE__)
#define RENDERDOC_CC
@@ -72,7 +72,8 @@ extern "C" {
// RenderDoc capture options
//
-typedef enum RENDERDOC_CaptureOption {
+typedef enum RENDERDOC_CaptureOption
+{
// Allow the application to enable vsync
//
// Default - enabled
@@ -214,6 +215,19 @@ typedef enum RENDERDOC_CaptureOption {
// necessary as directed by a RenderDoc developer.
eRENDERDOC_Option_AllowUnsupportedVendorExtensions = 12,
+ // Define a soft memory limit which some APIs may aim to keep overhead under where
+ // possible. Anything above this limit will where possible be saved directly to disk during
+ // capture.
+ // This will cause increased disk space use (which may cause a capture to fail if disk space is
+ // exhausted) as well as slower capture times.
+ //
+ // Not all memory allocations may be deferred like this so it is not a guarantee of a memory
+ // limit.
+ //
+ // Units are in MBs, suggested values would range from 200MB to 1000MB.
+ //
+ // Default - 0 Megabytes
+ eRENDERDOC_Option_SoftMemoryLimit = 13,
} RENDERDOC_CaptureOption;
// Sets an option that controls how RenderDoc behaves on capture.
@@ -233,7 +247,8 @@ typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionU32)(RENDERDOC_Capture
// If the option is invalid, -FLT_MAX is returned
typedef float(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionF32)(RENDERDOC_CaptureOption opt);
-typedef enum RENDERDOC_InputButton {
+typedef enum RENDERDOC_InputButton
+{
// '0' - '9' matches ASCII values
eRENDERDOC_Key_0 = 0x30,
eRENDERDOC_Key_1 = 0x31,
@@ -321,7 +336,8 @@ typedef void(RENDERDOC_CC *pRENDERDOC_SetFocusToggleKeys)(RENDERDOC_InputButton
// If keys is NULL or num is 0, captures keys will be disabled
typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureKeys)(RENDERDOC_InputButton *keys, int num);
-typedef enum RENDERDOC_OverlayBits {
+typedef enum RENDERDOC_OverlayBits
+{
// This single bit controls whether the overlay is enabled or disabled globally
eRENDERDOC_Overlay_Enabled = 0x1,
@@ -452,6 +468,15 @@ typedef uint32_t(RENDERDOC_CC *pRENDERDOC_LaunchReplayUI)(uint32_t connectTarget
// ignored and the others will be filled out.
typedef void(RENDERDOC_CC *pRENDERDOC_GetAPIVersion)(int *major, int *minor, int *patch);
+// Requests that the replay UI show itself (if hidden or not the current top window). This can be
+// used in conjunction with IsTargetControlConnected and LaunchReplayUI to intelligently handle
+// showing the UI after making a capture.
+//
+// This will return 1 if the request was successfully passed on, though it's not guaranteed that
+// the UI will be on top in all cases depending on OS rules. It will return 0 if there is no current
+// target control connection to make such a request, or if there was another error
+typedef uint32_t(RENDERDOC_CC *pRENDERDOC_ShowReplayUI)();
+
//////////////////////////////////////////////////////////////////////////
// Capturing functions
//
@@ -525,14 +550,15 @@ typedef uint32_t(RENDERDOC_CC *pRENDERDOC_EndFrameCapture)(RENDERDOC_DevicePoint
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_DiscardFrameCapture)(RENDERDOC_DevicePointer device,
RENDERDOC_WindowHandle wndHandle);
-// Requests that the replay UI show itself (if hidden or not the current top window). This can be
-// used in conjunction with IsTargetControlConnected and LaunchReplayUI to intelligently handle
-// showing the UI after making a capture.
+// Only valid to be called between a call to StartFrameCapture and EndFrameCapture. Gives a custom
+// title to the capture produced which will be displayed in the UI.
//
-// This will return 1 if the request was successfully passed on, though it's not guaranteed that
-// the UI will be on top in all cases depending on OS rules. It will return 0 if there is no current
-// target control connection to make such a request, or if there was another error
-typedef uint32_t(RENDERDOC_CC *pRENDERDOC_ShowReplayUI)();
+// If multiple captures are ongoing, this title will be applied to the first capture to end after
+// this call. The second capture to end will have no title, unless this function is called again.
+//
+// Calling this function has no effect if no capture is currently running, and if it is called
+// multiple times only the last title will be used.
+typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureTitle)(const char *title);
//////////////////////////////////////////////////////////////////////////////////////////////////
// RenderDoc API versions
@@ -547,7 +573,8 @@ typedef uint32_t(RENDERDOC_CC *pRENDERDOC_ShowReplayUI)();
// Note that this means the API returned can be higher than the one you might have requested.
// e.g. if you are running against a newer RenderDoc that supports 1.0.1, it will be returned
// instead of 1.0.0. You can check this with the GetAPIVersion entry point
-typedef enum RENDERDOC_Version {
+typedef enum RENDERDOC_Version
+{
eRENDERDOC_API_Version_1_0_0 = 10000, // RENDERDOC_API_1_0_0 = 1 00 00
eRENDERDOC_API_Version_1_0_1 = 10001, // RENDERDOC_API_1_0_1 = 1 00 01
eRENDERDOC_API_Version_1_0_2 = 10002, // RENDERDOC_API_1_0_2 = 1 00 02
@@ -560,6 +587,7 @@ typedef enum RENDERDOC_Version {
eRENDERDOC_API_Version_1_4_1 = 10401, // RENDERDOC_API_1_4_1 = 1 04 01
eRENDERDOC_API_Version_1_4_2 = 10402, // RENDERDOC_API_1_4_2 = 1 04 02
eRENDERDOC_API_Version_1_5_0 = 10500, // RENDERDOC_API_1_5_0 = 1 05 00
+ eRENDERDOC_API_Version_1_6_0 = 10600, // RENDERDOC_API_1_6_0 = 1 06 00
} RENDERDOC_Version;
// API version changelog:
@@ -588,8 +616,10 @@ typedef enum RENDERDOC_Version {
// 1.4.1 - Refactor: Renamed Shutdown to RemoveHooks to better clarify what is happening
// 1.4.2 - Refactor: Renamed 'draws' to 'actions' in callstack capture option.
// 1.5.0 - Added feature: ShowReplayUI() to request that the replay UI show itself if connected
+// 1.6.0 - Added feature: SetCaptureTitle() which can be used to set a title for a
+// capture made with StartFrameCapture() or EndFrameCapture()
-typedef struct RENDERDOC_API_1_5_0
+typedef struct RENDERDOC_API_1_6_0
{
pRENDERDOC_GetAPIVersion GetAPIVersion;
@@ -664,19 +694,23 @@ typedef struct RENDERDOC_API_1_5_0
// new function in 1.5.0
pRENDERDOC_ShowReplayUI ShowReplayUI;
-} RENDERDOC_API_1_5_0;
-typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_0_0;
-typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_0_1;
-typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_0_2;
-typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_1_0;
-typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_1_1;
-typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_1_2;
-typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_2_0;
-typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_3_0;
-typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_4_0;
-typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_4_1;
-typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_4_2;
+ // new function in 1.6.0
+ pRENDERDOC_SetCaptureTitle SetCaptureTitle;
+} RENDERDOC_API_1_6_0;
+
+typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_0;
+typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_1;
+typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_2;
+typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_0;
+typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_1;
+typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_2;
+typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_2_0;
+typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_3_0;
+typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_0;
+typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_1;
+typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_2;
+typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_5_0;
//////////////////////////////////////////////////////////////////////////////////////////////////
// RenderDoc API entry point
diff --git a/thirdparty/vma.cc b/thirdparty/vma.cc
new file mode 100644
index 0000000000..a2023d33b2
--- /dev/null
+++ b/thirdparty/vma.cc
@@ -0,0 +1,2 @@
+#define VMA_IMPLEMENTATION
+#include "vk_mem_alloc.h"
diff --git a/thirdparty/volk b/thirdparty/volk
new file mode 160000
index 0000000000..466085407d
--- /dev/null
+++ b/thirdparty/volk
@@ -0,0 +1 @@
+Subproject commit 466085407d5d2f50583fd663c1d65f93a7709d3e
diff --git a/ui/meson.build b/ui/meson.build
index 18bb7c97c1..75b82df927 100644
--- a/ui/meson.build
+++ b/ui/meson.build
@@ -40,10 +40,6 @@ xemu_cocoa = dependency('appleframeworks', modules: 'Cocoa')
xemu_ss.add(xemu_cocoa)
endif
-if 'CONFIG_LINUX' in config_host
-xemu_ss.add(gtk)
-endif
-
xemu_ss.add(when: 'CONFIG_LINUX', if_true: [gtk, files('xemu-os-utils-linux.c')])
xemu_ss.add(when: 'CONFIG_WIN32', if_true: files('xemu-os-utils-windows.c'))
xemu_ss.add(when: 'CONFIG_DARWIN', if_true: files('xemu-os-utils-macos.m'))
diff --git a/ui/xemu.c b/ui/xemu.c
index d0cec857f4..0d01f22460 100644
--- a/ui/xemu.c
+++ b/ui/xemu.c
@@ -426,6 +426,7 @@ static void handle_keydown(SDL_Event *ev)
{
int win;
struct sdl2_console *scon = get_scon_from_window(ev->key.windowID);
+ if (scon == NULL) return;
int gui_key_modifier_pressed = get_mod_state();
int gui_keysym = 0;
@@ -484,6 +485,7 @@ static void handle_keydown(SDL_Event *ev)
static void handle_keyup(SDL_Event *ev)
{
struct sdl2_console *scon = get_scon_from_window(ev->key.windowID);
+ if (!scon) return;
scon->ignore_hotkeys = false;
sdl2_process_key(scon, &ev->key);
@@ -944,7 +946,7 @@ static void sdl2_display_very_early_init(DisplayOptions *o)
fprintf(stderr, "GL_SHADING_LANGUAGE_VERSION: %s\n", glGetString(GL_SHADING_LANGUAGE_VERSION));
// Initialize offscreen rendering context now
- nv2a_gl_context_init();
+ nv2a_context_init();
SDL_GL_MakeCurrent(NULL, NULL);
// FIXME: atexit(sdl_cleanup);
diff --git a/ui/xui/main-menu.cc b/ui/xui/main-menu.cc
index 75b88cafb6..a9a6c6ec85 100644
--- a/ui/xui/main-menu.cc
+++ b/ui/xui/main-menu.cc
@@ -449,7 +449,15 @@ void MainMenuInputView::Draw()
void MainMenuDisplayView::Draw()
{
- SectionTitle("Quality");
+ SectionTitle("Renderer");
+ ChevronCombo("Backend", &g_config.display.renderer,
+ "Null\0"
+ "OpenGL\0"
+#ifdef CONFIG_VULKAN
+ "Vulkan\0"
+#endif
+ ,
+ "Select desired renderer implementation");
int rendering_scale = nv2a_get_surface_scale_factor() - 1;
if (ChevronCombo("Internal resolution scale", &rendering_scale,
"1x\0"
diff --git a/ui/xui/main.cc b/ui/xui/main.cc
index fd38aa4e7b..069a6282f9 100644
--- a/ui/xui/main.cc
+++ b/ui/xui/main.cc
@@ -216,7 +216,7 @@ void xemu_hud_render(void)
ImGui::NewFrame();
ProcessKeyboardShortcuts();
-#if defined(DEBUG_NV2A_GL) && defined(CONFIG_RENDERDOC)
+#if defined(CONFIG_RENDERDOC)
if (g_capture_renderdoc_frame) {
nv2a_dbg_renderdoc_capture_frames(1);
g_capture_renderdoc_frame = false;
diff --git a/ui/xui/menubar.cc b/ui/xui/menubar.cc
index 2d1f48c604..bce0e7a0fb 100644
--- a/ui/xui/menubar.cc
+++ b/ui/xui/menubar.cc
@@ -71,8 +71,8 @@ void ProcessKeyboardShortcuts(void)
ActionScreenshot();
}
-#if defined(DEBUG_NV2A_GL) && defined(CONFIG_RENDERDOC)
- if (ImGui::IsKeyPressed(ImGuiKey_F10)) {
+#ifdef CONFIG_RENDERDOC
+ if (ImGui::IsKeyPressed(ImGuiKey_F10) && nv2a_dbg_renderdoc_available()) {
nv2a_dbg_renderdoc_capture_frames(1);
}
#endif
@@ -203,7 +203,7 @@ void ShowMainMenu()
ImGui::MenuItem("Monitor", "~", &monitor_window.is_open);
ImGui::MenuItem("Audio", NULL, &apu_window.m_is_open);
ImGui::MenuItem("Video", NULL, &video_window.m_is_open);
-#if defined(DEBUG_NV2A_GL) && defined(CONFIG_RENDERDOC)
+#ifdef CONFIG_RENDERDOC
if (nv2a_dbg_renderdoc_available()) {
ImGui::MenuItem("RenderDoc: Capture", NULL, &g_capture_renderdoc_frame);
}
diff --git a/util/meson.build b/util/meson.build
index 4269ef4e38..72ef1db2b5 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -59,6 +59,7 @@ util_ss.add(files('int128.c'))
util_ss.add(files('memalign.c'))
util_ss.add(when: 'CONFIG_WIN32', if_true: files('miniz/miniz.c'))
util_ss.add(files('fast-hash.c'))
+util_ss.add(files('mstring.c'))
if have_user
util_ss.add(files('selfmap.c'))
diff --git a/util/mstring.c b/util/mstring.c
new file mode 100644
index 0000000000..6cd0af7335
--- /dev/null
+++ b/util/mstring.c
@@ -0,0 +1,49 @@
+#include "qemu/osdep.h"
+#include "qemu/mstring.h"
+
+#include
+
+void mstring_append_fmt(MString *qstring, const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ mstring_append_va(qstring, fmt, ap);
+ va_end(ap);
+}
+
+MString *mstring_from_fmt(const char *fmt, ...)
+{
+ MString *ret = mstring_new();
+ va_list ap;
+ va_start(ap, fmt);
+ mstring_append_va(ret, fmt, ap);
+ va_end(ap);
+
+ return ret;
+}
+
+void mstring_append_va(MString *qstring, const char *fmt, va_list va)
+{
+ char scratch[256];
+
+ va_list ap;
+ va_copy(ap, va);
+ const int len = vsnprintf(scratch, sizeof(scratch), fmt, ap);
+ va_end(ap);
+
+ if (len == 0) {
+ return;
+ } else if (len < sizeof(scratch)) {
+ mstring_append(qstring, scratch);
+ return;
+ }
+
+ /* overflowed out scratch buffer, alloc and try again */
+ char *buf = g_malloc(len + 1);
+ va_copy(ap, va);
+ vsnprintf(buf, len + 1, fmt, ap);
+ va_end(ap);
+
+ mstring_append(qstring, buf);
+ g_free(buf);
+}
diff --git a/xemu-version.c b/xemu-version.c
index 523d955760..f2e7a958e9 100644
--- a/xemu-version.c
+++ b/xemu-version.c
@@ -1,5 +1,8 @@
#include "xemu-version-macro.h"
+const int xemu_version_major = XEMU_VERSION_MAJOR;
+const int xemu_version_minor = XEMU_VERSION_MINOR;
+const int xemu_version_patch = XEMU_VERSION_PATCH;
const char *xemu_version = XEMU_VERSION;
const char *xemu_branch = XEMU_BRANCH;;
const char *xemu_commit = XEMU_COMMIT;
diff --git a/xemu-version.h b/xemu-version.h
index 484af8a9de..a1fe27fccb 100644
--- a/xemu-version.h
+++ b/xemu-version.h
@@ -1,6 +1,9 @@
#ifndef XEMU_VERSION_H
#define XEMU_VERSION_H
+extern const int xemu_version_major;
+extern const int xemu_version_minor;
+extern const int xemu_version_patch;
extern const char *xemu_version;
extern const char *xemu_branch;
extern const char *xemu_commit;