nv2a: Add Vulkan renderer

This commit is contained in:
Matt Borgerson 2024-07-26 17:21:01 -07:00 committed by mborgerson
parent e639e0cdb7
commit a5385803db
114 changed files with 23349 additions and 10302 deletions

View File

@ -71,8 +71,8 @@ IndentWidth: 4
AccessModifierOffset: -4
IndentWrappedFunctionNames: false
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ?
MacroBlockEnd: '.*_END$'
#MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ?
#MacroBlockEnd: '.*_END$'
MaxEmptyLinesToKeep: 2
#PenaltyBreakBeforeFirstCallParameter: 19
#PenaltyBreakComment: 300

13
.gitmodules vendored
View File

@ -82,9 +82,18 @@
[submodule "tomlplusplus"]
path = tomlplusplus
url = https://github.com/marzer/tomlplusplus
[submodule "hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu"]
path = hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu
[submodule "hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu"]
path = hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu
url = https://github.com/abaire/nv2a_vsh_cpu.git
[submodule "ui/thirdparty/httplib"]
path = ui/thirdparty/httplib
url = https://github.com/yhirose/cpp-httplib
[submodule "hw/xbox/nv2a/pgraph/vk/thirdparty/VulkanMemoryAllocator"]
path = thirdparty/VulkanMemoryAllocator
url = https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator
[submodule "thirdparty/volk"]
path = thirdparty/volk
url = https://github.com/zeux/volk
[submodule "thirdparty/SPIRV-Reflect"]
path = thirdparty/SPIRV-Reflect
url = https://github.com/KhronosGroup/SPIRV-Reflect

View File

@ -130,6 +130,12 @@ input:
default: 18 # w
display:
renderer:
type: enum
values: ["NULL", OPENGL, VULKAN]
default: OPENGL
vulkan:
validation_layers: bool
quality:
surface_scale:
type: integer

2
configure vendored
View File

@ -237,7 +237,7 @@ else
git_submodules_action="ignore"
fi
git_submodules="ui/keycodemapdb ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu"
git_submodules="ui/keycodemapdb ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu thirdparty/volk thirdparty/VulkanMemoryAllocator thirdparty/SPIRV-Reflect"
git="git"
# Don't accept a target_list environment variable.

3
debian/control vendored
View File

@ -16,6 +16,9 @@ Build-Depends: debhelper (>= 11),
libssl-dev,
libpcap-dev,
libslirp-dev,
glslang-dev,
libvulkan-dev,
Standards-Version: 3.9.8
Homepage: https://xemu.app
XS-Debian-Vcs-Browser: https://github.com/mborgerson/xemu

View File

@ -1,8 +1,9 @@
/*
* QEMU Geforce NV2A debug helpers
* QEMU Geforce NV2A profiling and debug helpers
*
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2012 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2018-2023 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@ -18,8 +19,8 @@
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_NV2A_DEBUG_H
#define HW_NV2A_DEBUG_H
#ifndef HW_XBOX_NV2A_DEBUG_H
#define HW_XBOX_NV2A_DEBUG_H
#include <stdint.h>
@ -36,54 +37,6 @@
# define NV2A_DPRINTF(format, ...) do { } while (0)
#endif
// #define DEBUG_NV2A_GL
#ifdef DEBUG_NV2A_GL
#include <stdbool.h>
#include "gl/gloffscreen.h"
#include "config-host.h"
void gl_debug_initialize(void);
void gl_debug_message(bool cc, const char *fmt, ...);
void gl_debug_group_begin(const char *fmt, ...);
void gl_debug_group_end(void);
void gl_debug_label(GLenum target, GLuint name, const char *fmt, ...);
void gl_debug_frame_terminator(void);
# define NV2A_GL_DPRINTF(cc, format, ...) \
gl_debug_message(cc, "nv2a: " format, ## __VA_ARGS__)
# define NV2A_GL_DGROUP_BEGIN(format, ...) \
gl_debug_group_begin("nv2a: " format, ## __VA_ARGS__)
# define NV2A_GL_DGROUP_END() \
gl_debug_group_end()
# define NV2A_GL_DLABEL(target, name, format, ...) \
gl_debug_label(target, name, "nv2a: { " format " }", ## __VA_ARGS__)
#define NV2A_GL_DFRAME_TERMINATOR() \
gl_debug_frame_terminator()
#ifdef __cplusplus
extern "C" {
#endif
#ifdef CONFIG_RENDERDOC
bool nv2a_dbg_renderdoc_available(void);
void nv2a_dbg_renderdoc_capture_frames(uint32_t num_frames);
#endif
#ifdef __cplusplus
}
#endif
#else
# define NV2A_GL_DPRINTF(cc, format, ...) do { \
if (cc) NV2A_DPRINTF(format "\n", ##__VA_ARGS__ ); \
} while (0)
# define NV2A_GL_DGROUP_BEGIN(format, ...) do { } while (0)
# define NV2A_GL_DGROUP_END() do { } while (0)
# define NV2A_GL_DLABEL(target, name, format, ...) do { } while (0)
# define NV2A_GL_DFRAME_TERMINATOR() do { } while (0)
#endif
/* Debug prints to identify when unimplemented or unconfirmed features
* are being exercised. These cases likely result in graphical problems of
* varying degree, but should otherwise not crash the system. Enable this
@ -111,6 +64,22 @@ void nv2a_dbg_renderdoc_capture_frames(uint32_t num_frames);
#endif
#define NV2A_PROF_COUNTERS_XMAC \
_X(NV2A_PROF_FINISH_VERTEX_BUFFER_DIRTY) \
_X(NV2A_PROF_FINISH_SURFACE_CREATE) \
_X(NV2A_PROF_FINISH_SURFACE_DOWN) \
_X(NV2A_PROF_FINISH_NEED_BUFFER_SPACE) \
_X(NV2A_PROF_FINISH_FRAMEBUFFER_DIRTY) \
_X(NV2A_PROF_FINISH_PRESENTING) \
_X(NV2A_PROF_FINISH_FLIP_STALL) \
_X(NV2A_PROF_FINISH_FLUSH) \
_X(NV2A_PROF_CLEAR) \
_X(NV2A_PROF_QUEUE_SUBMIT) \
_X(NV2A_PROF_QUEUE_SUBMIT_AUX) \
_X(NV2A_PROF_PIPELINE_NOTDIRTY) \
_X(NV2A_PROF_PIPELINE_GEN) \
_X(NV2A_PROF_PIPELINE_BIND) \
_X(NV2A_PROF_PIPELINE_MERGE) \
_X(NV2A_PROF_PIPELINE_RENDERPASSES) \
_X(NV2A_PROF_BEGIN_ENDS) \
_X(NV2A_PROF_DRAW_ARRAYS) \
_X(NV2A_PROF_INLINE_BUFFERS) \
@ -120,18 +89,26 @@ void nv2a_dbg_renderdoc_capture_frames(uint32_t num_frames);
_X(NV2A_PROF_SHADER_GEN) \
_X(NV2A_PROF_SHADER_BIND) \
_X(NV2A_PROF_SHADER_BIND_NOTDIRTY) \
_X(NV2A_PROF_SHADER_UBO_DIRTY) \
_X(NV2A_PROF_SHADER_UBO_NOTDIRTY) \
_X(NV2A_PROF_ATTR_BIND) \
_X(NV2A_PROF_TEX_UPLOAD) \
_X(NV2A_PROF_TEX_BIND) \
_X(NV2A_PROF_GEOM_BUFFER_UPDATE_1) \
_X(NV2A_PROF_GEOM_BUFFER_UPDATE_2) \
_X(NV2A_PROF_GEOM_BUFFER_UPDATE_3) \
_X(NV2A_PROF_GEOM_BUFFER_UPDATE_4) \
_X(NV2A_PROF_GEOM_BUFFER_UPDATE_4_NOTDIRTY) \
_X(NV2A_PROF_SURF_SWIZZLE) \
_X(NV2A_PROF_SURF_CREATE) \
_X(NV2A_PROF_SURF_DOWNLOAD) \
_X(NV2A_PROF_SURF_UPLOAD) \
_X(NV2A_PROF_SURF_TO_TEX) \
_X(NV2A_PROF_SURF_TO_TEX_FALLBACK) \
_X(NV2A_PROF_QUEUE_SUBMIT_1) \
_X(NV2A_PROF_QUEUE_SUBMIT_2) \
_X(NV2A_PROF_QUEUE_SUBMIT_3) \
_X(NV2A_PROF_QUEUE_SUBMIT_4) \
_X(NV2A_PROF_QUEUE_SUBMIT_5) \
enum NV2A_PROF_COUNTERS_ENUM {
#define _X(x) x,
@ -161,6 +138,21 @@ extern NV2AStats g_nv2a_stats;
const char *nv2a_profile_get_counter_name(unsigned int cnt);
int nv2a_profile_get_counter_value(unsigned int cnt);
void nv2a_profile_increment(void);
void nv2a_profile_flip_stall(void);
static inline void nv2a_profile_inc_counter(enum NV2A_PROF_COUNTERS_ENUM cnt)
{
g_nv2a_stats.frame_working.counters[cnt] += 1;
}
#ifdef CONFIG_RENDERDOC
void nv2a_dbg_renderdoc_init(void);
void *nv2a_dbg_renderdoc_get_api(void);
bool nv2a_dbg_renderdoc_available(void);
void nv2a_dbg_renderdoc_capture_frames(int num_frames);
extern int renderdoc_capture_frames;
#endif
#ifdef __cplusplus
}

View File

@ -1,6 +0,0 @@
softmmu_ss.add([sdl, files(
'gloffscreen_common.c',
'gloffscreen_sdl.c',
)])
# gloffscreen_sdl.o-cflags := $(SDL_CFLAGS)

View File

@ -1,27 +1,17 @@
specific_ss.add(files(
'nv2a.c',
'debug.c',
'pbus.c',
'pcrtc.c',
'pfb.c',
'pfifo.c',
'pgraph.c',
'pmc.c',
'pramdac.c',
'prmcio.c',
'prmdio.c',
'prmvio.c',
'psh.c',
'ptimer.c',
'pvideo.c',
'shaders.c',
'stubs.c',
'user.c',
'vsh.c',
'swizzle.c',
's3tc.c',
))
subdir('gl')
subdir('thirdparty')
specific_ss.add(nv2a_vsh_cpu)
subdir('pgraph')

View File

@ -172,6 +172,16 @@ static void nv2a_get_offsets(VGACommonState *s,
*pline_compare = line_compare;
}
const uint8_t *nv2a_get_dac_palette(void)
{
return g_nv2a->puserdac.palette;
}
int nv2a_get_screen_off(void)
{
return g_nv2a->vga.sr[VGA_SEQ_CLOCK_MODE] & VGA_SR01_SCREEN_OFF;
}
static void nv2a_vga_gfx_update(void *opaque)
{
VGACommonState *vga = opaque;
@ -277,7 +287,7 @@ static void nv2a_reset(NV2AState *d)
}
memset(d->pfifo.regs, 0, sizeof(d->pfifo.regs));
memset(d->pgraph.regs, 0, sizeof(d->pgraph.regs));
memset(d->pgraph.regs_, 0, sizeof(d->pgraph.regs_));
memset(d->pvideo.regs, 0, sizeof(d->pvideo.regs));
d->pcrtc.start = 0;
@ -365,11 +375,10 @@ static void nv2a_vm_state_change(void *opaque, bool running, RunState state)
if (state == RUN_STATE_SAVE_VM) {
nv2a_lock_fifo(d);
qatomic_set(&d->pfifo.halt, true);
qatomic_set(&d->pgraph.download_dirty_surfaces_pending, true);
qemu_event_reset(&d->pgraph.dirty_surfaces_download_complete);
d->pgraph.renderer->ops.pre_savevm_trigger(d);
nv2a_unlock_fifo(d);
qemu_mutex_unlock_iothread();
qemu_event_wait(&d->pgraph.dirty_surfaces_download_complete);
d->pgraph.renderer->ops.pre_savevm_wait(d);
qemu_mutex_lock_iothread();
nv2a_lock_fifo(d);
} else if (state == RUN_STATE_RESTORE_VM) {
@ -382,11 +391,10 @@ static void nv2a_vm_state_change(void *opaque, bool running, RunState state)
nv2a_unlock_fifo(d);
} else if (state == RUN_STATE_SHUTDOWN) {
nv2a_lock_fifo(d);
qatomic_set(&d->pgraph.shader_cache_writeback_pending, true);
qemu_event_reset(&d->pgraph.shader_cache_writeback_complete);
d->pgraph.renderer->ops.pre_shutdown_trigger(d);
nv2a_unlock_fifo(d);
qemu_mutex_unlock_iothread();
qemu_event_wait(&d->pgraph.shader_cache_writeback_complete);
d->pgraph.renderer->ops.pre_shutdown_wait(d);
qemu_mutex_lock_iothread();
}
}
@ -515,9 +523,9 @@ static const VMStateDescription vmstate_nv2a = {
VMSTATE_UINT32(pgraph.inline_buffer_length, NV2AState), // fixme
VMSTATE_UINT32(pgraph.draw_arrays_length, NV2AState),
VMSTATE_UINT32(pgraph.draw_arrays_max_count, NV2AState),
VMSTATE_INT32_ARRAY(pgraph.gl_draw_arrays_start, NV2AState, 1250),
VMSTATE_INT32_ARRAY(pgraph.gl_draw_arrays_count, NV2AState, 1250),
VMSTATE_UINT32_ARRAY(pgraph.regs, NV2AState, 0x2000),
VMSTATE_INT32_ARRAY(pgraph.draw_arrays_start, NV2AState, 1250),
VMSTATE_INT32_ARRAY(pgraph.draw_arrays_count, NV2AState, 1250),
VMSTATE_UINT32_ARRAY(pgraph.regs_, NV2AState, 0x2000),
VMSTATE_UINT32(pmc.pending_interrupts, NV2AState),
VMSTATE_UINT32(pmc.enabled_interrupts, NV2AState),
VMSTATE_UINT32(pfifo.pending_interrupts, NV2AState),

View File

@ -22,7 +22,7 @@
#define HW_NV2A_H
void nv2a_init(PCIBus *bus, int devfn, MemoryRegion *ram);
void nv2a_gl_context_init(void);
void nv2a_context_init(void);
int nv2a_get_framebuffer_surface(void);
void nv2a_set_surface_scale_factor(unsigned int scale);
unsigned int nv2a_get_surface_scale_factor(void);

View File

@ -44,25 +44,12 @@
#include "cpu.h"
#include "trace.h"
#include "swizzle.h"
#include "lru.h"
#include "gl/gloffscreen.h"
#include "nv2a.h"
#include "pgraph/pgraph.h"
#include "debug.h"
#include "shaders.h"
#include "nv2a_regs.h"
#define GET_MASK(v, mask) (((v) & (mask)) >> ctz32(mask))
#define SET_MASK(v, mask, val) \
({ \
const unsigned int __val = (val); \
const unsigned int __mask = (mask); \
(v) &= ~(__mask); \
(v) |= ((__val) << ctz32(__mask)) & (__mask); \
})
#define NV2A_DEVICE(obj) OBJECT_CHECK(NV2AState, (obj), "nv2a")
enum FIFOEngine {
@ -78,347 +65,6 @@ typedef struct DMAObject {
hwaddr limit;
} DMAObject;
typedef struct VertexAttribute {
bool dma_select;
hwaddr offset;
/* inline arrays are packed in order?
* Need to pass the offset to converted attributes */
unsigned int inline_array_offset;
float inline_value[4];
unsigned int format;
unsigned int size; /* size of the data type */
unsigned int count; /* number of components */
uint32_t stride;
bool needs_conversion;
float *inline_buffer;
bool inline_buffer_populated;
GLint gl_count;
GLenum gl_type;
GLboolean gl_normalize;
GLuint gl_inline_buffer;
} VertexAttribute;
typedef struct SurfaceFormatInfo {
unsigned int bytes_per_pixel;
GLint gl_internal_format;
GLenum gl_format;
GLenum gl_type;
GLenum gl_attachment;
} SurfaceFormatInfo;
typedef struct Surface {
bool draw_dirty;
bool buffer_dirty;
bool write_enabled_cache;
unsigned int pitch;
hwaddr offset;
} Surface;
typedef struct SurfaceShape {
unsigned int z_format;
unsigned int color_format;
unsigned int zeta_format;
unsigned int log_width, log_height;
unsigned int clip_x, clip_y;
unsigned int clip_width, clip_height;
unsigned int anti_aliasing;
} SurfaceShape;
typedef struct SurfaceBinding {
QTAILQ_ENTRY(SurfaceBinding) entry;
MemAccessCallback *access_cb;
hwaddr vram_addr;
SurfaceFormatInfo fmt;
SurfaceShape shape;
uintptr_t dma_addr;
uintptr_t dma_len;
bool color;
bool swizzle;
unsigned int width;
unsigned int height;
unsigned int pitch;
size_t size;
GLuint gl_buffer;
bool cleared;
int frame_time;
int draw_time;
bool draw_dirty;
bool download_pending;
bool upload_pending;
} SurfaceBinding;
typedef struct TextureShape {
bool cubemap;
unsigned int dimensionality;
unsigned int color_format;
unsigned int levels;
unsigned int width, height, depth;
bool border;
unsigned int min_mipmap_level, max_mipmap_level;
unsigned int pitch;
} TextureShape;
typedef struct TextureBinding {
GLenum gl_target;
GLuint gl_texture;
unsigned int refcnt;
int draw_time;
uint64_t data_hash;
unsigned int scale;
unsigned int min_filter;
unsigned int mag_filter;
unsigned int addru;
unsigned int addrv;
unsigned int addrp;
uint32_t border_color;
bool border_color_set;
} TextureBinding;
typedef struct TextureKey {
TextureShape state;
hwaddr texture_vram_offset;
hwaddr texture_length;
hwaddr palette_vram_offset;
hwaddr palette_length;
} TextureKey;
typedef struct TextureLruNode {
LruNode node;
TextureKey key;
TextureBinding *binding;
bool possibly_dirty;
} TextureLruNode;
typedef struct VertexKey {
size_t count;
GLuint gl_type;
GLboolean gl_normalize;
size_t stride;
hwaddr addr;
} VertexKey;
typedef struct VertexLruNode {
LruNode node;
VertexKey key;
GLuint gl_buffer;
bool initialized;
} VertexLruNode;
typedef struct KelvinState {
hwaddr object_instance;
} KelvinState;
typedef struct ContextSurfaces2DState {
hwaddr object_instance;
hwaddr dma_image_source;
hwaddr dma_image_dest;
unsigned int color_format;
unsigned int source_pitch, dest_pitch;
hwaddr source_offset, dest_offset;
} ContextSurfaces2DState;
typedef struct ImageBlitState {
hwaddr object_instance;
hwaddr context_surfaces;
unsigned int operation;
unsigned int in_x, in_y;
unsigned int out_x, out_y;
unsigned int width, height;
} ImageBlitState;
typedef struct BetaState {
hwaddr object_instance;
uint32_t beta;
} BetaState;
typedef struct QueryReport {
QSIMPLEQ_ENTRY(QueryReport) entry;
bool clear;
uint32_t parameter;
unsigned int query_count;
GLuint *queries;
} QueryReport;
typedef struct PGRAPHState {
QemuMutex lock;
uint32_t pending_interrupts;
uint32_t enabled_interrupts;
int frame_time;
int draw_time;
struct s2t_rndr {
GLuint fbo, vao, vbo, prog;
GLuint tex_loc, surface_size_loc;
} s2t_rndr;
struct disp_rndr {
GLuint fbo, vao, vbo, prog;
GLuint display_size_loc;
GLuint line_offset_loc;
GLuint tex_loc;
GLuint pvideo_tex;
GLint pvideo_enable_loc;
GLint pvideo_tex_loc;
GLint pvideo_in_pos_loc;
GLint pvideo_pos_loc;
GLint pvideo_scale_loc;
GLint pvideo_color_key_enable_loc;
GLint pvideo_color_key_loc;
GLint palette_loc[256];
} disp_rndr;
/* subchannels state we're not sure the location of... */
ContextSurfaces2DState context_surfaces_2d;
ImageBlitState image_blit;
KelvinState kelvin;
BetaState beta;
hwaddr dma_color, dma_zeta;
Surface surface_color, surface_zeta;
unsigned int surface_type;
SurfaceShape surface_shape;
SurfaceShape last_surface_shape;
QTAILQ_HEAD(, SurfaceBinding) surfaces;
SurfaceBinding *color_binding, *zeta_binding;
struct {
int clip_x;
int clip_width;
int clip_y;
int clip_height;
int width;
int height;
} surface_binding_dim; // FIXME: Refactor
hwaddr dma_a, dma_b;
Lru texture_cache;
TextureLruNode *texture_cache_entries;
bool texture_dirty[NV2A_MAX_TEXTURES];
TextureBinding *texture_binding[NV2A_MAX_TEXTURES];
Lru shader_cache;
ShaderLruNode *shader_cache_entries;
ShaderBinding *shader_binding;
QemuMutex shader_cache_lock;
QemuThread shader_disk_thread;
bool texture_matrix_enable[NV2A_MAX_TEXTURES];
GLuint gl_framebuffer;
GLuint gl_display_buffer;
GLint gl_display_buffer_internal_format;
GLsizei gl_display_buffer_width;
GLsizei gl_display_buffer_height;
GLenum gl_display_buffer_format;
GLenum gl_display_buffer_type;
hwaddr dma_state;
hwaddr dma_notifies;
hwaddr dma_semaphore;
hwaddr dma_report;
hwaddr report_offset;
bool zpass_pixel_count_enable;
unsigned int zpass_pixel_count_result;
unsigned int gl_zpass_pixel_count_query_count;
GLuint *gl_zpass_pixel_count_queries;
QSIMPLEQ_HEAD(, QueryReport) report_queue;
hwaddr dma_vertex_a, dma_vertex_b;
uint32_t primitive_mode;
bool enable_vertex_program_write;
uint32_t vertex_state_shader_v0[4];
uint32_t program_data[NV2A_MAX_TRANSFORM_PROGRAM_LENGTH][VSH_TOKEN_SIZE];
bool program_data_dirty;
uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];
bool vsh_constants_dirty[NV2A_VERTEXSHADER_CONSTANTS];
/* lighting constant arrays */
uint32_t ltctxa[NV2A_LTCTXA_COUNT][4];
bool ltctxa_dirty[NV2A_LTCTXA_COUNT];
uint32_t ltctxb[NV2A_LTCTXB_COUNT][4];
bool ltctxb_dirty[NV2A_LTCTXB_COUNT];
uint32_t ltc1[NV2A_LTC1_COUNT][4];
bool ltc1_dirty[NV2A_LTC1_COUNT];
float material_alpha;
// should figure out where these are in lighting context
float light_infinite_half_vector[NV2A_MAX_LIGHTS][3];
float light_infinite_direction[NV2A_MAX_LIGHTS][3];
float light_local_position[NV2A_MAX_LIGHTS][3];
float light_local_attenuation[NV2A_MAX_LIGHTS][3];
float point_params[8];
VertexAttribute vertex_attributes[NV2A_VERTEXSHADER_ATTRIBUTES];
uint16_t compressed_attrs;
Lru element_cache;
VertexLruNode *element_cache_entries;
unsigned int inline_array_length;
uint32_t inline_array[NV2A_MAX_BATCH_LENGTH];
GLuint gl_inline_array_buffer;
unsigned int inline_elements_length;
uint32_t inline_elements[NV2A_MAX_BATCH_LENGTH];
unsigned int inline_buffer_length;
unsigned int draw_arrays_length;
unsigned int draw_arrays_min_start;
unsigned int draw_arrays_max_count;
/* FIXME: Unknown size, possibly endless, 1250 will do for now */
/* Keep in sync with size used in nv2a.c */
GLint gl_draw_arrays_start[1250];
GLsizei gl_draw_arrays_count[1250];
bool draw_arrays_prevent_connect;
GLuint gl_memory_buffer;
GLuint gl_vertex_array;
uint32_t regs[0x2000];
bool clearing;
bool waiting_for_nop;
bool waiting_for_flip;
bool waiting_for_context_switch;
bool downloads_pending;
bool download_dirty_surfaces_pending;
bool flush_pending;
bool gl_sync_pending;
bool shader_cache_writeback_pending;
QemuEvent downloads_complete;
QemuEvent dirty_surfaces_download_complete;
QemuEvent flush_complete;
QemuEvent gl_sync_complete;
QemuEvent shader_cache_writeback_complete;
unsigned int surface_scale_factor;
uint8_t *scale_buf;
} PGRAPHState;
typedef struct NV2AState {
/*< private >*/
PCIDevice parent_obj;
@ -512,9 +158,6 @@ typedef struct NV2ABlockInfo {
} NV2ABlockInfo;
extern const NV2ABlockInfo blocktable[NV_NUM_BLOCKS];
extern GloContext *g_nv2a_context_render;
extern GloContext *g_nv2a_context_display;
void nv2a_update_irq(NV2AState *d);
static inline
@ -566,20 +209,5 @@ DEFINE_PROTO(user)
DMAObject nv_dma_load(NV2AState *d, hwaddr dma_obj_address);
void *nv_dma_map(NV2AState *d, hwaddr dma_obj_address, hwaddr *len);
void pgraph_init(NV2AState *d);
void pgraph_destroy(PGRAPHState *pg);
void pgraph_context_switch(NV2AState *d, unsigned int channel_id);
int pgraph_method(NV2AState *d, unsigned int subchannel, unsigned int method,
uint32_t parameter, uint32_t *parameters,
size_t num_words_available, size_t max_lookahead_words,
bool inc);
void pgraph_gl_sync(NV2AState *d);
void pgraph_process_pending_reports(NV2AState *d);
void pgraph_process_pending_downloads(NV2AState *d);
void pgraph_download_dirty_surfaces(NV2AState *d);
void pgraph_flush(NV2AState *d);
void *pfifo_thread(void *arg);
void pfifo_kick(NV2AState *d);
#endif

View File

@ -21,6 +21,17 @@
#ifndef HW_NV2A_REGS_H
#define HW_NV2A_REGS_H
#define GET_MASK(v, mask) (((v) & (mask)) >> ctz32(mask))
#define SET_MASK(v, mask, val) \
({ \
const unsigned int __val = (val); \
const unsigned int __mask = (mask); \
(v) &= ~(__mask); \
(v) |= ((__val) << ctz32(__mask)) & (__mask); \
})
#define NV_NUM_BLOCKS 21
#define NV_PMC 0 /* card master control */
#define NV_PBUS 1 /* bus control */

View File

@ -95,23 +95,25 @@ void pfifo_kick(NV2AState *d)
qemu_cond_broadcast(&d->pfifo.fifo_cond);
}
static bool pgraph_can_fifo_access(NV2AState *d) {
return qatomic_read(&d->pgraph.regs[NV_PGRAPH_FIFO]) & NV_PGRAPH_FIFO_ACCESS;
static bool can_fifo_access(NV2AState *d) {
return qatomic_read(&d->pgraph.regs_[NV_PGRAPH_FIFO]) &
NV_PGRAPH_FIFO_ACCESS;
}
/* If NV097_FLIP_STALL was executed, check if the flip has completed.
* This will usually happen in the VSYNC interrupt handler.
*/
static bool pgraph_is_flip_stall_complete(NV2AState *d)
static bool is_flip_stall_complete(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
NV2A_DPRINTF("flip stall read: %d, write: %d, modulo: %d\n",
GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_READ_3D),
GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_WRITE_3D),
GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_MODULO_3D));
uint32_t s = pgraph_reg_r(pg, NV_PGRAPH_SURFACE);
NV2A_DPRINTF("flip stall read: %d, write: %d, modulo: %d\n",
GET_MASK(s, NV_PGRAPH_SURFACE_READ_3D),
GET_MASK(s, NV_PGRAPH_SURFACE_WRITE_3D),
GET_MASK(s, NV_PGRAPH_SURFACE_MODULO_3D));
uint32_t s = pg->regs[NV_PGRAPH_SURFACE];
if (GET_MASK(s, NV_PGRAPH_SURFACE_READ_3D)
!= GET_MASK(s, NV_PGRAPH_SURFACE_WRITE_3D)) {
return true;
@ -126,7 +128,7 @@ static bool pfifo_stall_for_flip(NV2AState *d)
if (qatomic_read(&d->pgraph.waiting_for_flip)) {
qemu_mutex_lock(&d->pgraph.lock);
if (!pgraph_is_flip_stall_complete(d)) {
if (!is_flip_stall_complete(d)) {
should_stall = true;
} else {
d->pgraph.waiting_for_flip = false;
@ -141,7 +143,7 @@ static bool pfifo_puller_should_stall(NV2AState *d)
{
return pfifo_stall_for_flip(d) || qatomic_read(&d->pgraph.waiting_for_nop) ||
qatomic_read(&d->pgraph.waiting_for_context_switch) ||
!pgraph_can_fifo_access(d);
!can_fifo_access(d);
}
static ssize_t pfifo_run_puller(NV2AState *d, uint32_t method_entry,
@ -187,7 +189,7 @@ static ssize_t pfifo_run_puller(NV2AState *d, uint32_t method_entry,
qemu_mutex_lock(&d->pgraph.lock);
// Switch contexts if necessary
if (pgraph_can_fifo_access(d)) {
if (can_fifo_access(d)) {
pgraph_context_switch(d, entry.channel_id);
if (!d->pgraph.waiting_for_context_switch) {
num_proc =
@ -221,7 +223,7 @@ static ssize_t pfifo_run_puller(NV2AState *d, uint32_t method_entry,
qemu_mutex_unlock(&d->pfifo.lock);
qemu_mutex_lock(&d->pgraph.lock);
if (pgraph_can_fifo_access(d)) {
if (can_fifo_access(d)) {
num_proc =
pgraph_method(d, subchannel, method, parameter, parameters,
num_words_available, max_lookahead_words, inc);
@ -242,7 +244,7 @@ static ssize_t pfifo_run_puller(NV2AState *d, uint32_t method_entry,
static bool pfifo_pusher_should_stall(NV2AState *d)
{
return !pgraph_can_fifo_access(d) ||
return !can_fifo_access(d) ||
qatomic_read(&d->pgraph.waiting_for_nop);
}
@ -447,39 +449,11 @@ static void pfifo_run_pusher(NV2AState *d)
}
}
static void process_requests(NV2AState *d)
{
if (qatomic_read(&d->pgraph.downloads_pending) ||
qatomic_read(&d->pgraph.download_dirty_surfaces_pending) ||
qatomic_read(&d->pgraph.gl_sync_pending) ||
qatomic_read(&d->pgraph.flush_pending) ||
qatomic_read(&d->pgraph.shader_cache_writeback_pending)) {
qemu_mutex_unlock(&d->pfifo.lock);
qemu_mutex_lock(&d->pgraph.lock);
if (qatomic_read(&d->pgraph.downloads_pending)) {
pgraph_process_pending_downloads(d);
}
if (qatomic_read(&d->pgraph.download_dirty_surfaces_pending)) {
pgraph_download_dirty_surfaces(d);
}
if (qatomic_read(&d->pgraph.gl_sync_pending)) {
pgraph_gl_sync(d);
}
if (qatomic_read(&d->pgraph.flush_pending)) {
pgraph_flush(d);
}
if (qatomic_read(&d->pgraph.shader_cache_writeback_pending)) {
shader_write_cache_reload_list(&d->pgraph);
}
qemu_mutex_unlock(&d->pgraph.lock);
qemu_mutex_lock(&d->pfifo.lock);
}
}
void *pfifo_thread(void *arg)
{
NV2AState *d = (NV2AState *)arg;
glo_set_current(g_nv2a_context_render);
pgraph_init_thread(d);
rcu_register_thread();
@ -487,13 +461,13 @@ void *pfifo_thread(void *arg)
while (true) {
d->pfifo.fifo_kick = false;
process_requests(d);
d->pgraph.renderer->ops.process_pending(d);
if (!d->pfifo.halt) {
pfifo_run_pusher(d);
}
pgraph_process_pending_reports(d);
d->pgraph.renderer->ops.process_pending_reports(d);
if (!d->pfifo.fifo_kick) {
qemu_cond_broadcast(&d->pfifo.fifo_idle_cond);

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,84 @@
/*
* Geforce NV2A PGRAPH Renderdoc Helpers
*
* Copyright (c) 2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include <stdint.h>
#include <stdbool.h>
#pragma GCC diagnostic ignored "-Wstrict-prototypes"
#include "thirdparty/renderdoc_app.h"
#include "hw/xbox/nv2a/debug.h"
#ifdef _WIN32
#include <libloaderapi.h>
#else
#include <dlfcn.h>
#endif
static RENDERDOC_API_1_6_0 *rdoc_api = NULL;
int renderdoc_capture_frames = 0;
void nv2a_dbg_renderdoc_init(void)
{
if (rdoc_api) {
return;
}
#ifdef _WIN32
HMODULE renderdoc = GetModuleHandleA("renderdoc.dll");
if (renderdoc) {
pRENDERDOC_GetAPI RENDERDOC_GetAPI =
(pRENDERDOC_GetAPI)GetProcAddress(renderdoc, "RENDERDOC_GetAPI");
#else
void *renderdoc = dlopen(
#ifdef __APPLE__
"librenderdoc.dylib",
#else
"librenderdoc.so",
#endif
RTLD_LAZY);
if (renderdoc) {
pRENDERDOC_GetAPI RENDERDOC_GetAPI =
(pRENDERDOC_GetAPI)dlsym(renderdoc, "RENDERDOC_GetAPI");
#endif // _WIN32
int ret =
RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void **)&rdoc_api);
assert(ret == 1 && "Failed to retrieve RenderDoc API.");
} else {
fprintf(stderr, "Error: Failed to open renderdoc library: %s\n", dlerror());
}
}
void *nv2a_dbg_renderdoc_get_api(void)
{
return (void*)rdoc_api;
}
bool nv2a_dbg_renderdoc_available(void)
{
return rdoc_api != NULL;
}
void nv2a_dbg_renderdoc_capture_frames(int num_frames)
{
renderdoc_capture_frames += num_frames;
}

View File

@ -0,0 +1,174 @@
/*
* Geforce NV2A PGRAPH OpenGL Renderer
*
* Copyright (c) 2012 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2018-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "hw/xbox/nv2a/nv2a_int.h"
#include "renderer.h"
// TODO: Optimize. Ideally this should all be done via OpenGL.
void pgraph_gl_image_blit(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
ContextSurfaces2DState *context_surfaces = &pg->context_surfaces_2d;
ImageBlitState *image_blit = &pg->image_blit;
BetaState *beta = &pg->beta;
pgraph_gl_surface_update(d, false, true, true);
assert(context_surfaces->object_instance == image_blit->context_surfaces);
unsigned int bytes_per_pixel;
switch (context_surfaces->color_format) {
case NV062_SET_COLOR_FORMAT_LE_Y8:
bytes_per_pixel = 1;
break;
case NV062_SET_COLOR_FORMAT_LE_R5G6B5:
bytes_per_pixel = 2;
break;
case NV062_SET_COLOR_FORMAT_LE_A8R8G8B8:
case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8:
case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8:
case NV062_SET_COLOR_FORMAT_LE_Y32:
bytes_per_pixel = 4;
break;
default:
fprintf(stderr, "Unknown blit surface format: 0x%x\n",
context_surfaces->color_format);
assert(false);
break;
}
hwaddr source_dma_len, dest_dma_len;
uint8_t *source = (uint8_t *)nv_dma_map(
d, context_surfaces->dma_image_source, &source_dma_len);
assert(context_surfaces->source_offset < source_dma_len);
source += context_surfaces->source_offset;
uint8_t *dest = (uint8_t *)nv_dma_map(d, context_surfaces->dma_image_dest,
&dest_dma_len);
assert(context_surfaces->dest_offset < dest_dma_len);
dest += context_surfaces->dest_offset;
hwaddr source_addr = source - d->vram_ptr;
hwaddr dest_addr = dest - d->vram_ptr;
SurfaceBinding *surf_src = pgraph_gl_surface_get(d, source_addr);
if (surf_src) {
pgraph_gl_surface_download_if_dirty(d, surf_src);
}
SurfaceBinding *surf_dest = pgraph_gl_surface_get(d, dest_addr);
if (surf_dest) {
if (image_blit->height < surf_dest->height ||
image_blit->width < surf_dest->width) {
pgraph_gl_surface_download_if_dirty(d, surf_dest);
} else {
// The blit will completely replace the surface so any pending
// download should be discarded.
surf_dest->download_pending = false;
surf_dest->draw_dirty = false;
}
surf_dest->upload_pending = true;
pg->draw_time++;
}
hwaddr source_offset = image_blit->in_y * context_surfaces->source_pitch +
image_blit->in_x * bytes_per_pixel;
hwaddr dest_offset = image_blit->out_y * context_surfaces->dest_pitch +
image_blit->out_x * bytes_per_pixel;
hwaddr source_size =
(image_blit->height - 1) * context_surfaces->source_pitch +
image_blit->width * bytes_per_pixel;
hwaddr dest_size = (image_blit->height - 1) * context_surfaces->dest_pitch +
image_blit->width * bytes_per_pixel;
/* FIXME: What does hardware do in this case? */
assert(source_addr + source_offset + source_size <=
memory_region_size(d->vram));
assert(dest_addr + dest_offset + dest_size <= memory_region_size(d->vram));
uint8_t *source_row = source + source_offset;
uint8_t *dest_row = dest + dest_offset;
if (image_blit->operation == NV09F_SET_OPERATION_SRCCOPY) {
// NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_SRCCOPY");
for (unsigned int y = 0; y < image_blit->height; y++) {
memmove(dest_row, source_row, image_blit->width * bytes_per_pixel);
source_row += context_surfaces->source_pitch;
dest_row += context_surfaces->dest_pitch;
}
} else if (image_blit->operation == NV09F_SET_OPERATION_BLEND_AND) {
// NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_BLEND_AND");
uint32_t max_beta_mult = 0x7f80;
uint32_t beta_mult = beta->beta >> 16;
uint32_t inv_beta_mult = max_beta_mult - beta_mult;
for (unsigned int y = 0; y < image_blit->height; y++) {
for (unsigned int x = 0; x < image_blit->width; x++) {
for (unsigned int ch = 0; ch < 3; ch++) {
uint32_t a = source_row[x * 4 + ch] * beta_mult;
uint32_t b = dest_row[x * 4 + ch] * inv_beta_mult;
dest_row[x * 4 + ch] = (a + b) / max_beta_mult;
}
}
source_row += context_surfaces->source_pitch;
dest_row += context_surfaces->dest_pitch;
}
} else {
fprintf(stderr, "Unknown blit operation: 0x%x\n",
image_blit->operation);
assert(false && "Unknown blit operation");
}
NV2A_DPRINTF(" - 0x%tx -> 0x%tx\n", source_addr, dest_addr);
bool needs_alpha_patching;
uint8_t alpha_override;
switch (context_surfaces->color_format) {
case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8:
needs_alpha_patching = true;
alpha_override = 0xff;
break;
case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8:
needs_alpha_patching = true;
alpha_override = 0;
break;
default:
needs_alpha_patching = false;
alpha_override = 0;
}
if (needs_alpha_patching) {
dest_row = dest + dest_offset;
for (unsigned int y = 0; y < image_blit->height; y++) {
for (unsigned int x = 0; x < image_blit->width; x++) {
dest_row[x * 4 + 3] = alpha_override;
}
dest_row += context_surfaces->dest_pitch;
}
}
dest_addr += dest_offset;
memory_region_set_client_dirty(d->vram, dest_addr, dest_size,
DIRTY_MEMORY_VGA);
memory_region_set_client_dirty(d->vram, dest_addr, dest_size,
DIRTY_MEMORY_NV2A_TEX);
}

View File

@ -0,0 +1,322 @@
/*
* Geforce NV2A PGRAPH OpenGL Renderer
*
* Copyright (c) 2012 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2018-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_XBOX_NV2A_PGRAPH_GL_CONSTANTS_H
#define HW_XBOX_NV2A_PGRAPH_GL_CONSTANTS_H
#include "qemu/osdep.h"
#include "hw/xbox/nv2a/nv2a_regs.h"
#include "gloffscreen.h"
static const GLenum pgraph_texture_min_filter_gl_map[] = {
0,
GL_NEAREST,
GL_LINEAR,
GL_NEAREST_MIPMAP_NEAREST,
GL_LINEAR_MIPMAP_NEAREST,
GL_NEAREST_MIPMAP_LINEAR,
GL_LINEAR_MIPMAP_LINEAR,
GL_LINEAR,
};
static const GLenum pgraph_texture_mag_filter_gl_map[] = {
0,
GL_NEAREST,
GL_LINEAR,
0,
GL_LINEAR /* TODO: Convolution filter... */
};
static const GLenum pgraph_texture_addr_gl_map[] = {
0,
GL_REPEAT,
GL_MIRRORED_REPEAT,
GL_CLAMP_TO_EDGE,
GL_CLAMP_TO_BORDER,
GL_CLAMP_TO_EDGE, /* Approximate GL_CLAMP */
};
static const GLenum pgraph_blend_factor_gl_map[] = {
GL_ZERO,
GL_ONE,
GL_SRC_COLOR,
GL_ONE_MINUS_SRC_COLOR,
GL_SRC_ALPHA,
GL_ONE_MINUS_SRC_ALPHA,
GL_DST_ALPHA,
GL_ONE_MINUS_DST_ALPHA,
GL_DST_COLOR,
GL_ONE_MINUS_DST_COLOR,
GL_SRC_ALPHA_SATURATE,
0,
GL_CONSTANT_COLOR,
GL_ONE_MINUS_CONSTANT_COLOR,
GL_CONSTANT_ALPHA,
GL_ONE_MINUS_CONSTANT_ALPHA,
};
static const GLenum pgraph_blend_equation_gl_map[] = {
GL_FUNC_SUBTRACT,
GL_FUNC_REVERSE_SUBTRACT,
GL_FUNC_ADD,
GL_MIN,
GL_MAX,
GL_FUNC_REVERSE_SUBTRACT,
GL_FUNC_ADD,
};
/* FIXME
static const GLenum pgraph_blend_logicop_map[] = {
GL_CLEAR,
GL_AND,
GL_AND_REVERSE,
GL_COPY,
GL_AND_INVERTED,
GL_NOOP,
GL_XOR,
GL_OR,
GL_NOR,
GL_EQUIV,
GL_INVERT,
GL_OR_REVERSE,
GL_COPY_INVERTED,
GL_OR_INVERTED,
GL_NAND,
GL_SET,
};
*/
static const GLenum pgraph_cull_face_gl_map[] = {
0,
GL_FRONT,
GL_BACK,
GL_FRONT_AND_BACK
};
static const GLenum pgraph_depth_func_gl_map[] = {
GL_NEVER,
GL_LESS,
GL_EQUAL,
GL_LEQUAL,
GL_GREATER,
GL_NOTEQUAL,
GL_GEQUAL,
GL_ALWAYS,
};
static const GLenum pgraph_stencil_func_gl_map[] = {
GL_NEVER,
GL_LESS,
GL_EQUAL,
GL_LEQUAL,
GL_GREATER,
GL_NOTEQUAL,
GL_GEQUAL,
GL_ALWAYS,
};
static const GLenum pgraph_stencil_op_gl_map[] = {
0,
GL_KEEP,
GL_ZERO,
GL_REPLACE,
GL_INCR,
GL_DECR,
GL_INVERT,
GL_INCR_WRAP,
GL_DECR_WRAP,
};
typedef struct ColorFormatInfo {
unsigned int bytes_per_pixel;
bool linear;
GLint gl_internal_format;
GLenum gl_format;
GLenum gl_type;
GLenum gl_swizzle_mask[4];
bool depth;
} ColorFormatInfo;
static const ColorFormatInfo kelvin_color_format_gl_map[66] = {
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8] =
{1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
{GL_RED, GL_RED, GL_RED, GL_ONE}},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8] =
{1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
{GL_RED, GL_RED, GL_RED, GL_RED}},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5] =
{2, false, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5] =
{2, false, GL_RGB5, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4] =
{2, false, GL_RGBA4, GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5] =
{2, false, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8] =
{4, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8] =
{4, false, GL_RGB8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
/* paletted texture */
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8] =
{1, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
[NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5] =
{4, false, GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, 0, GL_RGBA},
[NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8] =
{4, false, GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, 0, GL_RGBA},
[NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8] =
{4, false, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, 0, GL_RGBA},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5] =
{2, true, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5] =
{2, true, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8] =
{4, true, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8] =
{1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
{GL_RED, GL_RED, GL_RED, GL_ONE}},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8] =
{2, true, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
{GL_RED, GL_GREEN, GL_RED, GL_GREEN}},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8] =
{1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
{GL_ONE, GL_ONE, GL_ONE, GL_RED}},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8] =
{2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
{GL_RED, GL_RED, GL_RED, GL_GREEN}},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8] =
{1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
{GL_RED, GL_RED, GL_RED, GL_RED}},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5] =
{2, true, GL_RGB5, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4] =
{2, true, GL_RGBA4, GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8] =
{4, true, GL_RGB8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8] =
{1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
{GL_ONE, GL_ONE, GL_ONE, GL_RED}},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8Y8] =
{2, true, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
{GL_RED, GL_RED, GL_RED, GL_GREEN}},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5] =
{2, false, GL_RGB8_SNORM, GL_RGB, GL_BYTE}, /* FIXME: This might be signed */
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8] =
{2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
{GL_RED, GL_GREEN, GL_RED, GL_GREEN}},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8] =
{2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
{GL_GREEN, GL_RED, GL_RED, GL_GREEN}},
[NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8] =
{2, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},
[NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8] =
{2, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},
/* Additional information is passed to the pixel shader via the swizzle:
* RED: The depth value.
* GREEN: 0 for 16-bit, 1 for 24 bit
* BLUE: 0 for fixed, 1 for float
*/
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_DEPTH_Y16_FIXED] =
{2, false, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT,
{GL_RED, GL_ZERO, GL_ZERO, GL_ZERO}, true},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED] =
{4, true, GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8,
{GL_RED, GL_ONE, GL_ZERO, GL_ZERO}, true},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT] =
/* FIXME: Uses fixed-point format to match surface format hack below. */
{4, true, GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8,
{GL_RED, GL_ONE, GL_ZERO, GL_ZERO}, true},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FIXED] =
{2, true, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT,
{GL_RED, GL_ZERO, GL_ZERO, GL_ZERO}, true},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FLOAT] =
{2, true, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_HALF_FLOAT,
{GL_RED, GL_ZERO, GL_ONE, GL_ZERO}, true},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16] =
{2, true, GL_R16, GL_RED, GL_UNSIGNED_SHORT,
{GL_RED, GL_RED, GL_RED, GL_ONE}},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8] =
{4, false, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8] =
{4, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8] =
{4, false, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8] =
{4, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8] =
{4, true, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8] =
{4, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}
};
typedef struct SurfaceFormatInfo {
unsigned int bytes_per_pixel;
GLint gl_internal_format;
GLenum gl_format;
GLenum gl_type;
GLenum gl_attachment;
} SurfaceFormatInfo;
static const SurfaceFormatInfo kelvin_surface_color_format_gl_map[] = {
[NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5] =
{2, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, GL_COLOR_ATTACHMENT0},
[NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5] =
{2, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, GL_COLOR_ATTACHMENT0},
[NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8] =
{4, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_COLOR_ATTACHMENT0},
[NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8] =
{4, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_COLOR_ATTACHMENT0},
// FIXME: Map channel color
[NV097_SET_SURFACE_FORMAT_COLOR_LE_B8] =
{1, GL_R8, GL_RED, GL_UNSIGNED_BYTE, GL_COLOR_ATTACHMENT0},
[NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8] =
{2, GL_RG8, GL_RG, GL_UNSIGNED_SHORT, GL_COLOR_ATTACHMENT0},
};
static const SurfaceFormatInfo kelvin_surface_zeta_float_format_gl_map[] = {
[NV097_SET_SURFACE_FORMAT_ZETA_Z16] =
{2, GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_HALF_FLOAT, GL_DEPTH_ATTACHMENT},
[NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] =
/* FIXME: GL does not support packing floating-point Z24S8 OOTB, so for
* now just emulate this with fixed-point Z24S8. Possible compat
* improvement with custom conversion.
*/
{4, GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, GL_DEPTH_STENCIL_ATTACHMENT},
};
static const SurfaceFormatInfo kelvin_surface_zeta_fixed_format_gl_map[] = {
[NV097_SET_SURFACE_FORMAT_ZETA_Z16] =
{2, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, GL_DEPTH_ATTACHMENT},
[NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] =
{4, GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, GL_DEPTH_STENCIL_ATTACHMENT},
};
#endif

View File

@ -1,5 +1,5 @@
/*
* QEMU Geforce NV2A debug helpers
* Geforce NV2A PGRAPH OpenGL Renderer
*
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2012 espes
@ -18,6 +18,7 @@
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "renderer.h"
#include "debug.h"
#ifdef DEBUG_NV2A_GL
@ -28,15 +29,8 @@
#include <assert.h>
#ifdef CONFIG_RENDERDOC
#pragma GCC diagnostic ignored "-Wstrict-prototypes"
#include "thirdparty/renderdoc_app.h"
#ifdef _WIN32
#include <libloaderapi.h>
#else
#include <dlfcn.h>
#endif
static RENDERDOC_API_1_1_2 *rdoc_api = NULL;
static int32_t renderdoc_capture_frames = 0;
#endif
#define CHECK_GL_ERROR() do { \
@ -74,31 +68,7 @@ void gl_debug_initialize(void)
}
#ifdef CONFIG_RENDERDOC
const char *renderdoc_lib;
void* renderdoc;
#ifdef __APPLE__
renderdoc_lib = "librenderdoc.dylib";
#elif _WIN32
renderdoc_lib = "renderdoc.dll";
#else
renderdoc_lib = "librenderdoc.so";
#endif
#ifdef _WIN32
renderdoc = GetModuleHandleA(renderdoc_lib);
if (renderdoc) {
pRENDERDOC_GetAPI RENDERDOC_GetAPI = (pRENDERDOC_GetAPI)GetProcAddress(
renderdoc, "RENDERDOC_GetAPI");
#else
renderdoc = dlopen(renderdoc_lib, RTLD_NOW | RTLD_NOLOAD);
if (renderdoc) {
pRENDERDOC_GetAPI RENDERDOC_GetAPI = (pRENDERDOC_GetAPI)dlsym(
renderdoc, "RENDERDOC_GetAPI");
#endif
int ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_1_2,
(void **)&rdoc_api);
assert(ret == 1 && "Failed to retrieve RenderDoc API.");
}
nv2a_dbg_renderdoc_init();
#endif
}
@ -179,7 +149,10 @@ void gl_debug_frame_terminator(void)
CHECK_GL_ERROR();
#ifdef CONFIG_RENDERDOC
if (rdoc_api) {
if (nv2a_dbg_renderdoc_available()) {
RENDERDOC_API_1_6_0 *rdoc_api = nv2a_dbg_renderdoc_get_api();
if (rdoc_api->IsTargetControlConnected()) {
if (rdoc_api->IsFrameCapturing()) {
rdoc_api->EndFrameCapture(NULL, NULL);
@ -190,7 +163,7 @@ void gl_debug_frame_terminator(void)
error);
}
}
if (renderdoc_capture_frames) {
if (renderdoc_capture_frames > 0) {
rdoc_api->StartFrameCapture(NULL, NULL);
GLenum error = glGetError();
if (error != GL_NO_ERROR) {
@ -203,22 +176,10 @@ void gl_debug_frame_terminator(void)
}
}
#endif
if (!has_GL_GREMEDY_frame_terminator) {
return;
if (has_GL_GREMEDY_frame_terminator) {
glFrameTerminatorGREMEDY();
CHECK_GL_ERROR();
}
glFrameTerminatorGREMEDY();
CHECK_GL_ERROR();
}
#ifdef CONFIG_RENDERDOC
bool nv2a_dbg_renderdoc_available(void) {
return rdoc_api != NULL;
}
void nv2a_dbg_renderdoc_capture_frames(uint32_t num_frames) {
renderdoc_capture_frames = num_frames;
}
#endif
#endif // DEBUG_NV2A_GL

View File

@ -0,0 +1,60 @@
/*
* Geforce NV2A PGRAPH OpenGL Renderer
*
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2012 espes
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_XBOX_NV2A_PGRAPH_GL_DEBUG_H
#define HW_XBOX_NV2A_PGRAPH_GL_DEBUG_H
// #define DEBUG_NV2A_GL
#ifdef DEBUG_NV2A_GL
#include <stdbool.h>
#include "gloffscreen.h"
#include "config-host.h"
void gl_debug_initialize(void);
void gl_debug_message(bool cc, const char *fmt, ...);
void gl_debug_group_begin(const char *fmt, ...);
void gl_debug_group_end(void);
void gl_debug_label(GLenum target, GLuint name, const char *fmt, ...);
void gl_debug_frame_terminator(void);
# define NV2A_GL_DPRINTF(cc, format, ...) \
gl_debug_message(cc, "nv2a: " format, ## __VA_ARGS__)
# define NV2A_GL_DGROUP_BEGIN(format, ...) \
gl_debug_group_begin("nv2a: " format, ## __VA_ARGS__)
# define NV2A_GL_DGROUP_END() \
gl_debug_group_end()
# define NV2A_GL_DLABEL(target, name, format, ...) \
gl_debug_label(target, name, "nv2a: { " format " }", ## __VA_ARGS__)
#define NV2A_GL_DFRAME_TERMINATOR() \
gl_debug_frame_terminator()
#else
# define NV2A_GL_DPRINTF(cc, format, ...) do { \
if (cc) NV2A_DPRINTF(format "\n", ##__VA_ARGS__ ); \
} while (0)
# define NV2A_GL_DGROUP_BEGIN(format, ...) do { } while (0)
# define NV2A_GL_DGROUP_END() do { } while (0)
# define NV2A_GL_DLABEL(target, name, format, ...) do { } while (0)
# define NV2A_GL_DFRAME_TERMINATOR() do { } while (0)
#endif
#endif

View File

@ -0,0 +1,407 @@
/*
* Geforce NV2A PGRAPH OpenGL Renderer
*
* Copyright (c) 2012 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2018-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "hw/xbox/nv2a/nv2a_int.h"
#include "hw/xbox/nv2a/pgraph/util.h"
#include "renderer.h"
#include <math.h>
void pgraph_gl_init_display_renderer(NV2AState *d)
{
struct PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
glGenTextures(1, &r->gl_display_buffer);
r->gl_display_buffer_internal_format = 0;
r->gl_display_buffer_width = 0;
r->gl_display_buffer_height = 0;
r->gl_display_buffer_format = 0;
r->gl_display_buffer_type = 0;
const char *vs =
"#version 330\n"
"void main()\n"
"{\n"
" float x = -1.0 + float((gl_VertexID & 1) << 2);\n"
" float y = -1.0 + float((gl_VertexID & 2) << 1);\n"
" gl_Position = vec4(x, y, 0, 1);\n"
"}\n";
/* FIXME: improve interlace handling, pvideo */
const char *fs =
"#version 330\n"
"uniform sampler2D tex;\n"
"uniform bool pvideo_enable;\n"
"uniform sampler2D pvideo_tex;\n"
"uniform vec2 pvideo_in_pos;\n"
"uniform vec4 pvideo_pos;\n"
"uniform vec3 pvideo_scale;\n"
"uniform bool pvideo_color_key_enable;\n"
"uniform vec4 pvideo_color_key;\n"
"uniform vec2 display_size;\n"
"uniform float line_offset;\n"
"layout(location = 0) out vec4 out_Color;\n"
"void main()\n"
"{\n"
" vec2 texCoord = gl_FragCoord.xy/display_size;\n"
" float rel = display_size.y/textureSize(tex, 0).y/line_offset;\n"
" texCoord.y = 1 + rel*(texCoord.y - 1);"
" out_Color.rgba = texture(tex, texCoord);\n"
" if (pvideo_enable) {\n"
" vec2 screenCoord = gl_FragCoord.xy - 0.5;\n"
" vec4 output_region = vec4(pvideo_pos.xy, pvideo_pos.xy + pvideo_pos.zw);\n"
" bvec4 clip = bvec4(lessThan(screenCoord, output_region.xy),\n"
" greaterThan(screenCoord, output_region.zw));\n"
" if (!any(clip) && (!pvideo_color_key_enable || out_Color.rgba == pvideo_color_key)) {\n"
" vec2 out_xy = (screenCoord - pvideo_pos.xy) * pvideo_scale.z;\n"
" vec2 in_st = (pvideo_in_pos + out_xy * pvideo_scale.xy) / textureSize(pvideo_tex, 0);\n"
" in_st.y *= -1.0;\n"
" out_Color.rgba = texture(pvideo_tex, in_st);\n"
" }\n"
" }\n"
"}\n";
r->disp_rndr.prog = pgraph_gl_compile_shader(vs, fs);
r->disp_rndr.tex_loc = glGetUniformLocation(r->disp_rndr.prog, "tex");
r->disp_rndr.pvideo_enable_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_enable");
r->disp_rndr.pvideo_tex_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_tex");
r->disp_rndr.pvideo_in_pos_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_in_pos");
r->disp_rndr.pvideo_pos_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_pos");
r->disp_rndr.pvideo_scale_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_scale");
r->disp_rndr.pvideo_color_key_enable_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_color_key_enable");
r->disp_rndr.pvideo_color_key_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_color_key");
r->disp_rndr.display_size_loc = glGetUniformLocation(r->disp_rndr.prog, "display_size");
r->disp_rndr.line_offset_loc = glGetUniformLocation(r->disp_rndr.prog, "line_offset");
glGenVertexArrays(1, &r->disp_rndr.vao);
glBindVertexArray(r->disp_rndr.vao);
glGenBuffers(1, &r->disp_rndr.vbo);
glBindBuffer(GL_ARRAY_BUFFER, r->disp_rndr.vbo);
glBufferData(GL_ARRAY_BUFFER, 0, NULL, GL_STATIC_DRAW);
glGenFramebuffers(1, &r->disp_rndr.fbo);
glGenTextures(1, &r->disp_rndr.pvideo_tex);
assert(glGetError() == GL_NO_ERROR);
}
static uint8_t *convert_texture_data__CR8YB8CB8YA8(const uint8_t *data,
unsigned int width,
unsigned int height,
unsigned int pitch)
{
uint8_t *converted_data = (uint8_t *)g_malloc(width * height * 4);
int x, y;
for (y = 0; y < height; y++) {
const uint8_t *line = &data[y * pitch];
const uint32_t row_offset = y * width;
for (x = 0; x < width; x++) {
uint8_t *pixel = &converted_data[(row_offset + x) * 4];
convert_yuy2_to_rgb(line, x, &pixel[0], &pixel[1], &pixel[2]);
pixel[3] = 255;
}
}
return converted_data;
}
static float pvideo_calculate_scale(unsigned int din_dout,
unsigned int output_size)
{
float calculated_in = din_dout * (output_size - 1);
calculated_in = floorf(calculated_in / (1 << 20) + 0.5f);
return (calculated_in + 1.0f) / output_size;
}
static void render_display_pvideo_overlay(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
// FIXME: This check against PVIDEO_SIZE_IN does not match HW behavior.
// Many games seem to pass this value when initializing or tearing down
// PVIDEO. On its own, this generally does not result in the overlay being
// hidden, however there are certain games (e.g., Ultimate Beach Soccer)
// that use an unknown mechanism to hide the overlay without explicitly
// stopping it.
// Since the value seems to be set to 0xFFFFFFFF only in cases where the
// content is not valid, it is probably good enough to treat it as an
// implicit stop.
bool enabled = (d->pvideo.regs[NV_PVIDEO_BUFFER] & NV_PVIDEO_BUFFER_0_USE)
&& d->pvideo.regs[NV_PVIDEO_SIZE_IN] != 0xFFFFFFFF;
glUniform1ui(r->disp_rndr.pvideo_enable_loc, enabled);
if (!enabled) {
return;
}
hwaddr base = d->pvideo.regs[NV_PVIDEO_BASE];
hwaddr limit = d->pvideo.regs[NV_PVIDEO_LIMIT];
hwaddr offset = d->pvideo.regs[NV_PVIDEO_OFFSET];
int in_width =
GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_WIDTH);
int in_height =
GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_HEIGHT);
int in_s = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN],
NV_PVIDEO_POINT_IN_S);
int in_t = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN],
NV_PVIDEO_POINT_IN_T);
int in_pitch =
GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_PITCH);
int in_color =
GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_COLOR);
unsigned int out_width =
GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_WIDTH);
unsigned int out_height =
GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_HEIGHT);
float scale_x = 1.0f;
float scale_y = 1.0f;
unsigned int ds_dx = d->pvideo.regs[NV_PVIDEO_DS_DX];
unsigned int dt_dy = d->pvideo.regs[NV_PVIDEO_DT_DY];
if (ds_dx != NV_PVIDEO_DIN_DOUT_UNITY) {
scale_x = pvideo_calculate_scale(ds_dx, out_width);
}
if (dt_dy != NV_PVIDEO_DIN_DOUT_UNITY) {
scale_y = pvideo_calculate_scale(dt_dy, out_height);
}
// On HW, setting NV_PVIDEO_SIZE_IN larger than NV_PVIDEO_SIZE_OUT results
// in them being capped to the output size, content is not scaled. This is
// particularly important as NV_PVIDEO_SIZE_IN may be set to 0xFFFFFFFF
// during initialization or teardown.
if (in_width > out_width) {
in_width = floorf((float)out_width * scale_x + 0.5f);
}
if (in_height > out_height) {
in_height = floorf((float)out_height * scale_y + 0.5f);
}
/* TODO: support other color formats */
assert(in_color == NV_PVIDEO_FORMAT_COLOR_LE_CR8YB8CB8YA8);
unsigned int out_x =
GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_X);
unsigned int out_y =
GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_Y);
unsigned int color_key_enabled =
GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_DISPLAY);
glUniform1ui(r->disp_rndr.pvideo_color_key_enable_loc,
color_key_enabled);
// TODO: Verify that masking off the top byte is correct.
// SeaBlade sets a color key of 0x80000000 but the texture passed into the
// shader is cleared to 0 alpha.
unsigned int color_key = d->pvideo.regs[NV_PVIDEO_COLOR_KEY] & 0xFFFFFF;
glUniform4f(r->disp_rndr.pvideo_color_key_loc,
GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_RED) / 255.0,
GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_GREEN) / 255.0,
GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_BLUE) / 255.0,
GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_ALPHA) / 255.0);
assert(offset + in_pitch * in_height <= limit);
hwaddr end = base + offset + in_pitch * in_height;
assert(end <= memory_region_size(d->vram));
pgraph_apply_scaling_factor(pg, &out_x, &out_y);
pgraph_apply_scaling_factor(pg, &out_width, &out_height);
// Translate for the GL viewport origin.
out_y = MAX(r->gl_display_buffer_height - 1 - (int)(out_y + out_height), 0);
glActiveTexture(GL_TEXTURE0 + 1);
glBindTexture(GL_TEXTURE_2D, r->disp_rndr.pvideo_tex);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
uint8_t *tex_rgba = convert_texture_data__CR8YB8CB8YA8(
d->vram_ptr + base + offset, in_width, in_height, in_pitch);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, in_width, in_height, 0, GL_RGBA,
GL_UNSIGNED_BYTE, tex_rgba);
g_free(tex_rgba);
glUniform1i(r->disp_rndr.pvideo_tex_loc, 1);
glUniform2f(r->disp_rndr.pvideo_in_pos_loc, in_s, in_t);
glUniform4f(r->disp_rndr.pvideo_pos_loc,
out_x, out_y, out_width, out_height);
glUniform3f(r->disp_rndr.pvideo_scale_loc,
scale_x, scale_y, 1.0f / pg->surface_scale_factor);
}
static void render_display(NV2AState *d, SurfaceBinding *surface)
{
struct PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
unsigned int width, height;
uint32_t pline_offset, pstart_addr, pline_compare;
d->vga.get_resolution(&d->vga, (int*)&width, (int*)&height);
d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
int line_offset = surface->pitch / pline_offset;
/* Adjust viewport height for interlaced mode, used only in 1080i */
if (d->vga.cr[NV_PRMCIO_INTERLACE_MODE] != NV_PRMCIO_INTERLACE_MODE_DISABLED) {
height *= 2;
}
pgraph_apply_scaling_factor(pg, &width, &height);
glBindFramebuffer(GL_FRAMEBUFFER, r->disp_rndr.fbo);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, r->gl_display_buffer);
bool recreate = (
surface->fmt.gl_internal_format != r->gl_display_buffer_internal_format
|| width != r->gl_display_buffer_width
|| height != r->gl_display_buffer_height
|| surface->fmt.gl_format != r->gl_display_buffer_format
|| surface->fmt.gl_type != r->gl_display_buffer_type
);
if (recreate) {
/* XXX: There's apparently a bug in some Intel OpenGL drivers for
* Windows that will leak this texture when its orphaned after use in
* another context, apparently regardless of which thread it's created
* or released on.
*
* Driver: 27.20.100.8729 9/11/2020 W10 x64
* Track: https://community.intel.com/t5/Graphics/OpenGL-Windows-drivers-for-Intel-HD-630-leaking-GPU-memory-when/td-p/1274423
*/
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
r->gl_display_buffer_internal_format = surface->fmt.gl_internal_format;
r->gl_display_buffer_width = width;
r->gl_display_buffer_height = height;
r->gl_display_buffer_format = surface->fmt.gl_format;
r->gl_display_buffer_type = surface->fmt.gl_type;
glTexImage2D(GL_TEXTURE_2D, 0,
r->gl_display_buffer_internal_format,
r->gl_display_buffer_width,
r->gl_display_buffer_height,
0,
r->gl_display_buffer_format,
r->gl_display_buffer_type,
NULL);
}
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
GL_TEXTURE_2D, r->gl_display_buffer, 0);
GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0};
glDrawBuffers(1, DrawBuffers);
assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
glBindTexture(GL_TEXTURE_2D, surface->gl_buffer);
glBindVertexArray(r->disp_rndr.vao);
glBindBuffer(GL_ARRAY_BUFFER, r->disp_rndr.vbo);
glUseProgram(r->disp_rndr.prog);
glProgramUniform1i(r->disp_rndr.prog, r->disp_rndr.tex_loc, 0);
glUniform2f(r->disp_rndr.display_size_loc, width, height);
glUniform1f(r->disp_rndr.line_offset_loc, line_offset);
render_display_pvideo_overlay(d);
glViewport(0, 0, width, height);
glColorMask(true, true, true, true);
glDisable(GL_SCISSOR_TEST);
glDisable(GL_BLEND);
glDisable(GL_STENCIL_TEST);
glDisable(GL_CULL_FACE);
glDisable(GL_DEPTH_TEST);
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
glClear(GL_COLOR_BUFFER_BIT);
glDrawArrays(GL_TRIANGLES, 0, 3);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
GL_TEXTURE_2D, 0, 0);
}
static void gl_fence(void)
{
GLsync fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
int result = glClientWaitSync(fence, GL_SYNC_FLUSH_COMMANDS_BIT,
(GLuint64)(5000000000));
assert(result == GL_CONDITION_SATISFIED || result == GL_ALREADY_SIGNALED);
glDeleteSync(fence);
}
void pgraph_gl_sync(NV2AState *d)
{
uint32_t pline_offset, pstart_addr, pline_compare;
d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
SurfaceBinding *surface = pgraph_gl_surface_get_within(d, d->pcrtc.start + pline_offset);
if (surface == NULL) {
qemu_event_set(&d->pgraph.sync_complete);
return;
}
/* FIXME: Sanity check surface dimensions */
/* Wait for queued commands to complete */
pgraph_gl_upload_surface_data(d, surface, !tcg_enabled());
gl_fence();
assert(glGetError() == GL_NO_ERROR);
/* Render framebuffer in display context */
glo_set_current(g_nv2a_context_display);
render_display(d, surface);
gl_fence();
assert(glGetError() == GL_NO_ERROR);
/* Switch back to original context */
glo_set_current(g_nv2a_context_render);
qatomic_set(&d->pgraph.sync_pending, false);
qemu_event_set(&d->pgraph.sync_complete);
}
int pgraph_gl_get_framebuffer_surface(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
qemu_mutex_lock(&d->pfifo.lock);
// FIXME: Possible race condition with pgraph, consider lock
uint32_t pline_offset, pstart_addr, pline_compare;
d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
SurfaceBinding *surface = pgraph_gl_surface_get_within(d, d->pcrtc.start + pline_offset);
if (surface == NULL || !surface->color) {
qemu_mutex_unlock(&d->pfifo.lock);
return 0;
}
assert(surface->color);
assert(surface->fmt.gl_attachment == GL_COLOR_ATTACHMENT0);
assert(surface->fmt.gl_format == GL_RGBA
|| surface->fmt.gl_format == GL_RGB
|| surface->fmt.gl_format == GL_BGR
|| surface->fmt.gl_format == GL_BGRA
);
surface->frame_time = pg->frame_time;
qemu_event_reset(&d->pgraph.sync_complete);
qatomic_set(&pg->sync_pending, true);
pfifo_kick(d);
qemu_mutex_unlock(&d->pfifo.lock);
qemu_event_wait(&d->pgraph.sync_complete);
return r->gl_display_buffer;
}

View File

@ -0,0 +1,528 @@
/*
* Geforce NV2A PGRAPH OpenGL Renderer
*
* Copyright (c) 2012 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2018-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/fast-hash.h"
#include "hw/xbox/nv2a/nv2a_int.h"
#include "debug.h"
#include "renderer.h"
void pgraph_gl_clear_surface(NV2AState *d, uint32_t parameter)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
NV2A_DPRINTF("---------PRE CLEAR ------\n");
pg->clearing = true;
GLbitfield gl_mask = 0;
bool write_color = (parameter & NV097_CLEAR_SURFACE_COLOR);
bool write_zeta =
(parameter & (NV097_CLEAR_SURFACE_Z | NV097_CLEAR_SURFACE_STENCIL));
if (write_zeta) {
GLint gl_clear_stencil;
GLfloat gl_clear_depth;
pgraph_get_clear_depth_stencil_value(pg, &gl_clear_depth,
&gl_clear_stencil);
if (parameter & NV097_CLEAR_SURFACE_Z) {
gl_mask |= GL_DEPTH_BUFFER_BIT;
glDepthMask(GL_TRUE);
glClearDepth(gl_clear_depth);
}
if (parameter & NV097_CLEAR_SURFACE_STENCIL) {
gl_mask |= GL_STENCIL_BUFFER_BIT;
glStencilMask(0xff);
glClearStencil(gl_clear_stencil);
}
}
if (write_color) {
gl_mask |= GL_COLOR_BUFFER_BIT;
glColorMask((parameter & NV097_CLEAR_SURFACE_R)
? GL_TRUE : GL_FALSE,
(parameter & NV097_CLEAR_SURFACE_G)
? GL_TRUE : GL_FALSE,
(parameter & NV097_CLEAR_SURFACE_B)
? GL_TRUE : GL_FALSE,
(parameter & NV097_CLEAR_SURFACE_A)
? GL_TRUE : GL_FALSE);
GLfloat rgba[4];
pgraph_get_clear_color(pg, rgba);
glClearColor(rgba[0], rgba[1], rgba[2], rgba[3]);
}
pgraph_gl_surface_update(d, true, write_color, write_zeta);
/* FIXME: Needs confirmation */
unsigned int xmin =
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTX), NV_PGRAPH_CLEARRECTX_XMIN);
unsigned int xmax =
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTX), NV_PGRAPH_CLEARRECTX_XMAX);
unsigned int ymin =
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTY), NV_PGRAPH_CLEARRECTY_YMIN);
unsigned int ymax =
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTY), NV_PGRAPH_CLEARRECTY_YMAX);
NV2A_DPRINTF(
"------------------CLEAR 0x%x %d,%d - %d,%d %x---------------\n",
parameter, xmin, ymin, xmax, ymax,
d->pgraph.regs_[NV_PGRAPH_COLORCLEARVALUE]);
unsigned int scissor_width = xmax - xmin + 1,
scissor_height = ymax - ymin + 1;
pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin);
pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height);
ymin = pg->surface_binding_dim.height - (ymin + scissor_height);
NV2A_DPRINTF("Translated clear rect to %d,%d - %d,%d\n", xmin, ymin,
xmin + scissor_width - 1, ymin + scissor_height - 1);
bool full_clear = !xmin && !ymin &&
scissor_width >= pg->surface_binding_dim.width &&
scissor_height >= pg->surface_binding_dim.height;
pgraph_apply_scaling_factor(pg, &xmin, &ymin);
pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height);
/* FIXME: Respect window clip?!?! */
glEnable(GL_SCISSOR_TEST);
glScissor(xmin, ymin, scissor_width, scissor_height);
/* Dither */
/* FIXME: Maybe also disable it here? + GL implementation dependent */
if (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & NV_PGRAPH_CONTROL_0_DITHERENABLE) {
glEnable(GL_DITHER);
} else {
glDisable(GL_DITHER);
}
glClear(gl_mask);
glDisable(GL_SCISSOR_TEST);
pgraph_gl_set_surface_dirty(pg, write_color, write_zeta);
if (r->color_binding) {
r->color_binding->cleared = full_clear && write_color;
}
if (r->zeta_binding) {
r->zeta_binding->cleared = full_clear && write_zeta;
}
pg->clearing = false;
}
void pgraph_gl_draw_begin(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
NV2A_GL_DGROUP_BEGIN("NV097_SET_BEGIN_END: 0x%x", pg->primitive_mode);
uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0);
bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE;
bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE;
bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE;
bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE;
bool color_write = mask_alpha || mask_red || mask_green || mask_blue;
bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE;
bool stencil_test =
pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE;
bool is_nop_draw = !(color_write || depth_test || stencil_test);
pgraph_gl_surface_update(d, true, true, depth_test || stencil_test);
if (is_nop_draw) {
return;
}
assert(r->color_binding || r->zeta_binding);
pgraph_gl_bind_textures(d);
pgraph_gl_bind_shaders(pg);
glColorMask(mask_red, mask_green, mask_blue, mask_alpha);
glDepthMask(!!(control_0 & NV_PGRAPH_CONTROL_0_ZWRITEENABLE));
glStencilMask(GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
NV_PGRAPH_CONTROL_1_STENCIL_MASK_WRITE));
if (pgraph_reg_r(pg, NV_PGRAPH_BLEND) & NV_PGRAPH_BLEND_EN) {
glEnable(GL_BLEND);
uint32_t sfactor = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND),
NV_PGRAPH_BLEND_SFACTOR);
uint32_t dfactor = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND),
NV_PGRAPH_BLEND_DFACTOR);
assert(sfactor < ARRAY_SIZE(pgraph_blend_factor_gl_map));
assert(dfactor < ARRAY_SIZE(pgraph_blend_factor_gl_map));
glBlendFunc(pgraph_blend_factor_gl_map[sfactor],
pgraph_blend_factor_gl_map[dfactor]);
uint32_t equation = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND),
NV_PGRAPH_BLEND_EQN);
assert(equation < ARRAY_SIZE(pgraph_blend_equation_gl_map));
glBlendEquation(pgraph_blend_equation_gl_map[equation]);
uint32_t blend_color = pgraph_reg_r(pg, NV_PGRAPH_BLENDCOLOR);
float gl_blend_color[4];
pgraph_argb_pack32_to_rgba_float(blend_color, gl_blend_color);
glBlendColor(gl_blend_color[0], gl_blend_color[1], gl_blend_color[2],
gl_blend_color[3]);
} else {
glDisable(GL_BLEND);
}
/* Face culling */
if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER)
& NV_PGRAPH_SETUPRASTER_CULLENABLE) {
uint32_t cull_face = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER),
NV_PGRAPH_SETUPRASTER_CULLCTRL);
assert(cull_face < ARRAY_SIZE(pgraph_cull_face_gl_map));
glCullFace(pgraph_cull_face_gl_map[cull_face]);
glEnable(GL_CULL_FACE);
} else {
glDisable(GL_CULL_FACE);
}
/* Clipping */
glEnable(GL_CLIP_DISTANCE0);
glEnable(GL_CLIP_DISTANCE1);
/* Front-face select */
glFrontFace(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER)
& NV_PGRAPH_SETUPRASTER_FRONTFACE
? GL_CCW : GL_CW);
/* Polygon offset */
/* FIXME: GL implementation-specific, maybe do this in VS? */
if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE) {
glEnable(GL_POLYGON_OFFSET_FILL);
} else {
glDisable(GL_POLYGON_OFFSET_FILL);
}
if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE) {
glEnable(GL_POLYGON_OFFSET_LINE);
} else {
glDisable(GL_POLYGON_OFFSET_LINE);
}
if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE) {
glEnable(GL_POLYGON_OFFSET_POINT);
} else {
glDisable(GL_POLYGON_OFFSET_POINT);
}
if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
(NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE |
NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE |
NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) {
uint32_t zfactor_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR);
GLfloat zfactor = *(float*)&zfactor_u32;
uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS);
GLfloat zbias = *(float*)&zbias_u32;
glPolygonOffset(zfactor, zbias);
}
/* Depth testing */
if (depth_test) {
glEnable(GL_DEPTH_TEST);
uint32_t depth_func = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0),
NV_PGRAPH_CONTROL_0_ZFUNC);
assert(depth_func < ARRAY_SIZE(pgraph_depth_func_gl_map));
glDepthFunc(pgraph_depth_func_gl_map[depth_func]);
} else {
glDisable(GL_DEPTH_TEST);
}
if (GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ZCOMPRESSOCCLUDE),
NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN) ==
NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP) {
glEnable(GL_DEPTH_CLAMP);
} else {
glDisable(GL_DEPTH_CLAMP);
}
if (GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3),
NV_PGRAPH_CONTROL_3_SHADEMODE) ==
NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT) {
glProvokingVertex(GL_FIRST_VERTEX_CONVENTION);
}
if (stencil_test) {
glEnable(GL_STENCIL_TEST);
uint32_t stencil_func = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
NV_PGRAPH_CONTROL_1_STENCIL_FUNC);
uint32_t stencil_ref = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
NV_PGRAPH_CONTROL_1_STENCIL_REF);
uint32_t func_mask = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
NV_PGRAPH_CONTROL_1_STENCIL_MASK_READ);
uint32_t op_fail = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2),
NV_PGRAPH_CONTROL_2_STENCIL_OP_FAIL);
uint32_t op_zfail = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2),
NV_PGRAPH_CONTROL_2_STENCIL_OP_ZFAIL);
uint32_t op_zpass = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2),
NV_PGRAPH_CONTROL_2_STENCIL_OP_ZPASS);
assert(stencil_func < ARRAY_SIZE(pgraph_stencil_func_gl_map));
assert(op_fail < ARRAY_SIZE(pgraph_stencil_op_gl_map));
assert(op_zfail < ARRAY_SIZE(pgraph_stencil_op_gl_map));
assert(op_zpass < ARRAY_SIZE(pgraph_stencil_op_gl_map));
glStencilFunc(
pgraph_stencil_func_gl_map[stencil_func],
stencil_ref,
func_mask);
glStencilOp(
pgraph_stencil_op_gl_map[op_fail],
pgraph_stencil_op_gl_map[op_zfail],
pgraph_stencil_op_gl_map[op_zpass]);
} else {
glDisable(GL_STENCIL_TEST);
}
/* Dither */
/* FIXME: GL implementation dependent */
if (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) &
NV_PGRAPH_CONTROL_0_DITHERENABLE) {
glEnable(GL_DITHER);
} else {
glDisable(GL_DITHER);
}
glEnable(GL_PROGRAM_POINT_SIZE);
bool anti_aliasing = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ANTIALIASING), NV_PGRAPH_ANTIALIASING_ENABLE);
/* Edge Antialiasing */
if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
NV_PGRAPH_SETUPRASTER_LINESMOOTHENABLE) {
glEnable(GL_LINE_SMOOTH);
} else {
glDisable(GL_LINE_SMOOTH);
}
if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
NV_PGRAPH_SETUPRASTER_POLYSMOOTHENABLE) {
glEnable(GL_POLYGON_SMOOTH);
} else {
glDisable(GL_POLYGON_SMOOTH);
}
unsigned int vp_width = pg->surface_binding_dim.width,
vp_height = pg->surface_binding_dim.height;
pgraph_apply_scaling_factor(pg, &vp_width, &vp_height);
glViewport(0, 0, vp_width, vp_height);
/* Surface clip */
/* FIXME: Consider moving to PSH w/ window clip */
unsigned int xmin = pg->surface_shape.clip_x - pg->surface_binding_dim.clip_x,
ymin = pg->surface_shape.clip_y - pg->surface_binding_dim.clip_y;
unsigned int xmax = xmin + pg->surface_shape.clip_width - 1,
ymax = ymin + pg->surface_shape.clip_height - 1;
unsigned int scissor_width = xmax - xmin + 1,
scissor_height = ymax - ymin + 1;
pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin);
pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height);
ymin = pg->surface_binding_dim.height - (ymin + scissor_height);
pgraph_apply_scaling_factor(pg, &xmin, &ymin);
pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height);
glEnable(GL_SCISSOR_TEST);
glScissor(xmin, ymin, scissor_width, scissor_height);
/* Visibility testing */
if (pg->zpass_pixel_count_enable) {
r->gl_zpass_pixel_count_query_count++;
r->gl_zpass_pixel_count_queries = (GLuint*)g_realloc(
r->gl_zpass_pixel_count_queries,
sizeof(GLuint) * r->gl_zpass_pixel_count_query_count);
GLuint gl_query;
glGenQueries(1, &gl_query);
r->gl_zpass_pixel_count_queries[
r->gl_zpass_pixel_count_query_count - 1] = gl_query;
glBeginQuery(GL_SAMPLES_PASSED, gl_query);
}
}
void pgraph_gl_draw_end(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0);
bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE;
bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE;
bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE;
bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE;
bool color_write = mask_alpha || mask_red || mask_green || mask_blue;
bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE;
bool stencil_test =
pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE;
bool is_nop_draw = !(color_write || depth_test || stencil_test);
if (is_nop_draw) {
// FIXME: Check PGRAPH register 0x880.
// HW uses bit 11 in 0x880 to enable or disable a color/zeta limit
// check that will raise an exception in the case that a draw should
// modify the color and/or zeta buffer but the target(s) are masked
// off. This check only seems to trigger during the fragment
// processing, it is legal to attempt a draw that is entirely
// clipped regardless of 0x880. See xemu#635 for context.
return;
}
pgraph_gl_flush_draw(d);
/* End of visibility testing */
if (pg->zpass_pixel_count_enable) {
nv2a_profile_inc_counter(NV2A_PROF_QUERY);
glEndQuery(GL_SAMPLES_PASSED);
}
pg->draw_time++;
if (r->color_binding && pgraph_color_write_enabled(pg)) {
r->color_binding->draw_time = pg->draw_time;
}
if (r->zeta_binding && pgraph_zeta_write_enabled(pg)) {
r->zeta_binding->draw_time = pg->draw_time;
}
pgraph_gl_set_surface_dirty(pg, color_write, depth_test || stencil_test);
NV2A_GL_DGROUP_END();
}
void pgraph_gl_flush_draw(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
if (!(r->color_binding || r->zeta_binding)) {
return;
}
assert(r->shader_binding);
if (pg->draw_arrays_length) {
NV2A_GL_DPRINTF(false, "Draw Arrays");
nv2a_profile_inc_counter(NV2A_PROF_DRAW_ARRAYS);
assert(pg->inline_elements_length == 0);
assert(pg->inline_buffer_length == 0);
assert(pg->inline_array_length == 0);
pgraph_gl_bind_vertex_attributes(d, pg->draw_arrays_min_start,
pg->draw_arrays_max_count - 1,
false, 0,
pg->draw_arrays_max_count - 1);
glMultiDrawArrays(r->shader_binding->gl_primitive_mode,
pg->draw_arrays_start,
pg->draw_arrays_count,
pg->draw_arrays_length);
} else if (pg->inline_elements_length) {
NV2A_GL_DPRINTF(false, "Inline Elements");
nv2a_profile_inc_counter(NV2A_PROF_INLINE_ELEMENTS);
assert(pg->inline_buffer_length == 0);
assert(pg->inline_array_length == 0);
uint32_t min_element = (uint32_t)-1;
uint32_t max_element = 0;
for (int i=0; i < pg->inline_elements_length; i++) {
max_element = MAX(pg->inline_elements[i], max_element);
min_element = MIN(pg->inline_elements[i], min_element);
}
pgraph_gl_bind_vertex_attributes(
d, min_element, max_element, false, 0,
pg->inline_elements[pg->inline_elements_length - 1]);
VertexKey k;
memset(&k, 0, sizeof(VertexKey));
k.count = pg->inline_elements_length;
k.gl_type = GL_UNSIGNED_INT;
k.gl_normalize = GL_FALSE;
k.stride = sizeof(uint32_t);
uint64_t h = fast_hash((uint8_t*)pg->inline_elements,
pg->inline_elements_length * 4);
LruNode *node = lru_lookup(&r->element_cache, h, &k);
VertexLruNode *found = container_of(node, VertexLruNode, node);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, found->gl_buffer);
if (!found->initialized) {
nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_4);
glBufferData(GL_ELEMENT_ARRAY_BUFFER,
pg->inline_elements_length * 4,
pg->inline_elements, GL_STATIC_DRAW);
found->initialized = true;
} else {
nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_4_NOTDIRTY);
}
glDrawElements(r->shader_binding->gl_primitive_mode,
pg->inline_elements_length, GL_UNSIGNED_INT,
(void *)0);
} else if (pg->inline_buffer_length) {
NV2A_GL_DPRINTF(false, "Inline Buffer");
nv2a_profile_inc_counter(NV2A_PROF_INLINE_BUFFERS);
assert(pg->inline_array_length == 0);
if (pg->compressed_attrs) {
pg->compressed_attrs = 0;
pgraph_gl_bind_shaders(pg);
}
for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
VertexAttribute *attr = &pg->vertex_attributes[i];
if (attr->inline_buffer_populated) {
nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_3);
glBindBuffer(GL_ARRAY_BUFFER, r->gl_inline_buffer[i]);
glBufferData(GL_ARRAY_BUFFER,
pg->inline_buffer_length * sizeof(float) * 4,
attr->inline_buffer, GL_STREAM_DRAW);
glVertexAttribPointer(i, 4, GL_FLOAT, GL_FALSE, 0, 0);
glEnableVertexAttribArray(i);
attr->inline_buffer_populated = false;
memcpy(attr->inline_value,
attr->inline_buffer + (pg->inline_buffer_length - 1) * 4,
sizeof(attr->inline_value));
} else {
glDisableVertexAttribArray(i);
glVertexAttrib4fv(i, attr->inline_value);
}
}
glDrawArrays(r->shader_binding->gl_primitive_mode,
0, pg->inline_buffer_length);
} else if (pg->inline_array_length) {
NV2A_GL_DPRINTF(false, "Inline Array");
nv2a_profile_inc_counter(NV2A_PROF_INLINE_ARRAYS);
unsigned int index_count = pgraph_gl_bind_inline_array(d);
glDrawArrays(r->shader_binding->gl_primitive_mode,
0, index_count);
} else {
NV2A_GL_DPRINTF(true, "EMPTY NV097_SET_BEGIN_END");
NV2A_UNCONFIRMED("EMPTY NV097_SET_BEGIN_END");
}
}

View File

@ -0,0 +1,12 @@
specific_ss.add([sdl, gloffscreen, files(
'blit.c',
'debug.c',
'display.c',
'draw.c',
'renderer.c',
'reports.c',
'shaders.c',
'surface.c',
'texture.c',
'vertex.c',
)])

View File

@ -0,0 +1,201 @@
/*
* Geforce NV2A PGRAPH OpenGL Renderer
*
* Copyright (c) 2012 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2018-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "hw/xbox/nv2a/nv2a_int.h"
#include "hw/xbox/nv2a/pgraph/pgraph.h"
#include "debug.h"
#include "renderer.h"
GloContext *g_nv2a_context_render;
GloContext *g_nv2a_context_display;
static void nv2a_gl_context_init(void)
{
g_nv2a_context_render = glo_context_create();
g_nv2a_context_display = glo_context_create();
}
static void pgraph_gl_init_thread(NV2AState *d)
{
glo_set_current(g_nv2a_context_render);
}
static void pgraph_gl_deinit(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
glo_set_current(g_nv2a_context_render);
pgraph_gl_deinit_surfaces(pg);
pgraph_gl_deinit_shader_cache(pg);
pgraph_gl_deinit_texture_cache(pg);
glo_set_current(NULL);
glo_context_destroy(g_nv2a_context_render);
glo_context_destroy(g_nv2a_context_display);
}
static void pgraph_gl_flip_stall(NV2AState *d)
{
NV2A_GL_DFRAME_TERMINATOR();
glFinish();
}
static void pgraph_gl_flush(NV2AState *d)
{
pgraph_gl_surface_flush(d);
pgraph_gl_mark_textures_possibly_dirty(d, 0, memory_region_size(d->vram));
pgraph_gl_update_entire_memory_buffer(d);
/* FIXME: Flush more? */
qatomic_set(&d->pgraph.flush_pending, false);
qemu_event_set(&d->pgraph.flush_complete);
}
static void pgraph_gl_process_pending(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
if (qatomic_read(&r->downloads_pending) ||
qatomic_read(&r->download_dirty_surfaces_pending) ||
qatomic_read(&d->pgraph.sync_pending) ||
qatomic_read(&d->pgraph.flush_pending) ||
qatomic_read(&r->shader_cache_writeback_pending)) {
qemu_mutex_unlock(&d->pfifo.lock);
qemu_mutex_lock(&d->pgraph.lock);
if (qatomic_read(&r->downloads_pending)) {
pgraph_gl_process_pending_downloads(d);
}
if (qatomic_read(&r->download_dirty_surfaces_pending)) {
pgraph_gl_download_dirty_surfaces(d);
}
if (qatomic_read(&d->pgraph.sync_pending)) {
pgraph_gl_sync(d);
}
if (qatomic_read(&d->pgraph.flush_pending)) {
pgraph_gl_flush(d);
}
if (qatomic_read(&r->shader_cache_writeback_pending)) {
pgraph_gl_shader_write_cache_reload_list(&d->pgraph);
}
qemu_mutex_unlock(&d->pgraph.lock);
qemu_mutex_lock(&d->pfifo.lock);
}
}
static void pgraph_gl_pre_savevm_trigger(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
qatomic_set(&r->download_dirty_surfaces_pending, true);
qemu_event_reset(&r->dirty_surfaces_download_complete);
}
static void pgraph_gl_pre_savevm_wait(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
qemu_event_wait(&r->dirty_surfaces_download_complete);
}
static void pgraph_gl_pre_shutdown_trigger(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
qatomic_set(&r->shader_cache_writeback_pending, true);
qemu_event_reset(&r->shader_cache_writeback_complete);
}
static void pgraph_gl_pre_shutdown_wait(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
qemu_event_wait(&r->shader_cache_writeback_complete);
}
static void pgraph_gl_init(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
pg->gl_renderer_state = g_malloc(sizeof(PGRAPHGLState));
/* fire up opengl */
glo_set_current(g_nv2a_context_render);
#ifdef DEBUG_NV2A_GL
gl_debug_initialize();
#endif
/* DXT textures */
assert(glo_check_extension("GL_EXT_texture_compression_s3tc"));
/* Internal RGB565 texture format */
assert(glo_check_extension("GL_ARB_ES2_compatibility"));
pgraph_gl_init_surfaces(pg);
pgraph_gl_init_reports(d);
pgraph_gl_init_texture_cache(d);
pgraph_gl_init_vertex_cache(d);
pgraph_gl_init_shader_cache(pg);
glo_set_current(g_nv2a_context_display);
pgraph_gl_init_display_renderer(d);
glo_set_current(NULL);
}
static PGRAPHRenderer pgraph_gl_renderer = {
.type = CONFIG_DISPLAY_RENDERER_OPENGL,
.name = "OpenGL",
.ops = {
.init = pgraph_gl_init,
.early_context_init = nv2a_gl_context_init,
.init_thread = pgraph_gl_init_thread,
.finalize = pgraph_gl_deinit,
.clear_report_value = pgraph_gl_clear_report_value,
.clear_surface = pgraph_gl_clear_surface,
.draw_begin = pgraph_gl_draw_begin,
.draw_end = pgraph_gl_draw_end,
.flip_stall = pgraph_gl_flip_stall,
.flush_draw = pgraph_gl_flush_draw,
.get_report = pgraph_gl_get_report,
.image_blit = pgraph_gl_image_blit,
.pre_savevm_trigger = pgraph_gl_pre_savevm_trigger,
.pre_savevm_wait = pgraph_gl_pre_savevm_wait,
.pre_shutdown_trigger = pgraph_gl_pre_shutdown_trigger,
.pre_shutdown_wait = pgraph_gl_pre_shutdown_wait,
.process_pending = pgraph_gl_process_pending,
.process_pending_reports = pgraph_gl_process_pending_reports,
.surface_update = pgraph_gl_surface_update,
.set_surface_scale_factor = pgraph_gl_set_surface_scale_factor,
.get_surface_scale_factor = pgraph_gl_get_surface_scale_factor,
.get_framebuffer_surface = pgraph_gl_get_framebuffer_surface,
}
};
static void __attribute__((constructor)) register_renderer(void)
{
pgraph_renderer_register(&pgraph_gl_renderer);
}

View File

@ -0,0 +1,283 @@
/*
* Geforce NV2A PGRAPH OpenGL Renderer
*
* Copyright (c) 2012 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2018-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_XBOX_NV2A_PGRAPH_GL_RENDERER_H
#define HW_XBOX_NV2A_PGRAPH_GL_RENDERER_H
#include "qemu/osdep.h"
#include "qemu/thread.h"
#include "qemu/queue.h"
#include "qemu/lru.h"
#include "hw/hw.h"
#include "hw/xbox/nv2a/nv2a_int.h"
#include "hw/xbox/nv2a/nv2a_regs.h"
#include "hw/xbox/nv2a/pgraph/surface.h"
#include "hw/xbox/nv2a/pgraph/texture.h"
#include "hw/xbox/nv2a/pgraph/shaders.h"
#include "gloffscreen.h"
#include "constants.h"
typedef struct SurfaceBinding {
QTAILQ_ENTRY(SurfaceBinding) entry;
MemAccessCallback *access_cb;
hwaddr vram_addr;
SurfaceShape shape;
uintptr_t dma_addr;
uintptr_t dma_len;
bool color;
bool swizzle;
unsigned int width;
unsigned int height;
unsigned int pitch;
size_t size;
bool cleared;
int frame_time;
int draw_time;
bool draw_dirty;
bool download_pending;
bool upload_pending;
GLuint gl_buffer;
SurfaceFormatInfo fmt;
} SurfaceBinding;
typedef struct TextureBinding {
unsigned int refcnt;
int draw_time;
uint64_t data_hash;
unsigned int scale;
unsigned int min_filter;
unsigned int mag_filter;
unsigned int addru;
unsigned int addrv;
unsigned int addrp;
uint32_t border_color;
bool border_color_set;
GLenum gl_target;
GLuint gl_texture;
} TextureBinding;
typedef struct ShaderBinding {
GLuint gl_program;
GLenum gl_primitive_mode;
GLint psh_constant_loc[9][2];
GLint alpha_ref_loc;
GLint bump_mat_loc[NV2A_MAX_TEXTURES];
GLint bump_scale_loc[NV2A_MAX_TEXTURES];
GLint bump_offset_loc[NV2A_MAX_TEXTURES];
GLint tex_scale_loc[NV2A_MAX_TEXTURES];
GLint surface_size_loc;
GLint clip_range_loc;
GLint vsh_constant_loc[NV2A_VERTEXSHADER_CONSTANTS];
uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];
GLint inv_viewport_loc;
GLint ltctxa_loc[NV2A_LTCTXA_COUNT];
GLint ltctxb_loc[NV2A_LTCTXB_COUNT];
GLint ltc1_loc[NV2A_LTC1_COUNT];
GLint fog_color_loc;
GLint fog_param_loc;
GLint light_infinite_half_vector_loc[NV2A_MAX_LIGHTS];
GLint light_infinite_direction_loc[NV2A_MAX_LIGHTS];
GLint light_local_position_loc[NV2A_MAX_LIGHTS];
GLint light_local_attenuation_loc[NV2A_MAX_LIGHTS];
GLint clip_region_loc[8];
GLint material_alpha_loc;
} ShaderBinding;
typedef struct ShaderLruNode {
LruNode node;
bool cached;
void *program;
size_t program_size;
GLenum program_format;
ShaderState state;
ShaderBinding *binding;
QemuThread *save_thread;
} ShaderLruNode;
typedef struct VertexKey {
size_t count;
size_t stride;
hwaddr addr;
GLboolean gl_normalize;
GLuint gl_type;
} VertexKey;
typedef struct VertexLruNode {
LruNode node;
VertexKey key;
bool initialized;
GLuint gl_buffer;
} VertexLruNode;
typedef struct TextureKey {
TextureShape state;
hwaddr texture_vram_offset;
hwaddr texture_length;
hwaddr palette_vram_offset;
hwaddr palette_length;
} TextureKey;
typedef struct TextureLruNode {
LruNode node;
TextureKey key;
TextureBinding *binding;
bool possibly_dirty;
} TextureLruNode;
typedef struct QueryReport {
QSIMPLEQ_ENTRY(QueryReport) entry;
bool clear;
uint32_t parameter;
unsigned int query_count;
GLuint *queries;
} QueryReport;
typedef struct PGRAPHGLState {
GLuint gl_framebuffer;
GLuint gl_display_buffer;
GLint gl_display_buffer_internal_format;
GLsizei gl_display_buffer_width;
GLsizei gl_display_buffer_height;
GLenum gl_display_buffer_format;
GLenum gl_display_buffer_type;
Lru element_cache;
VertexLruNode *element_cache_entries;
GLuint gl_inline_array_buffer;
GLuint gl_memory_buffer;
GLuint gl_vertex_array;
GLuint gl_inline_buffer[NV2A_VERTEXSHADER_ATTRIBUTES];
QTAILQ_HEAD(, SurfaceBinding) surfaces;
SurfaceBinding *color_binding, *zeta_binding;
bool downloads_pending;
QemuEvent downloads_complete;
bool download_dirty_surfaces_pending;
QemuEvent dirty_surfaces_download_complete; // common
TextureBinding *texture_binding[NV2A_MAX_TEXTURES];
Lru texture_cache;
TextureLruNode *texture_cache_entries;
Lru shader_cache;
ShaderLruNode *shader_cache_entries;
ShaderBinding *shader_binding;
QemuMutex shader_cache_lock;
QemuThread shader_disk_thread;
unsigned int zpass_pixel_count_result;
unsigned int gl_zpass_pixel_count_query_count;
GLuint *gl_zpass_pixel_count_queries;
QSIMPLEQ_HEAD(, QueryReport) report_queue;
bool shader_cache_writeback_pending;
QemuEvent shader_cache_writeback_complete;
struct s2t_rndr {
GLuint fbo, vao, vbo, prog;
GLuint tex_loc, surface_size_loc;
} s2t_rndr;
struct disp_rndr {
GLuint fbo, vao, vbo, prog;
GLuint display_size_loc;
GLuint line_offset_loc;
GLuint tex_loc;
GLuint pvideo_tex;
GLint pvideo_enable_loc;
GLint pvideo_tex_loc;
GLint pvideo_in_pos_loc;
GLint pvideo_pos_loc;
GLint pvideo_scale_loc;
GLint pvideo_color_key_enable_loc;
GLint pvideo_color_key_loc;
GLint palette_loc[256];
} disp_rndr;
} PGRAPHGLState;
extern GloContext *g_nv2a_context_render;
extern GloContext *g_nv2a_context_display;
unsigned int pgraph_gl_bind_inline_array(NV2AState *d);
void pgraph_gl_bind_shaders(PGRAPHState *pg);
void pgraph_gl_bind_textures(NV2AState *d);
void pgraph_gl_bind_vertex_attributes(NV2AState *d, unsigned int min_element, unsigned int max_element, bool inline_data, unsigned int inline_stride, unsigned int provoking_element);
bool pgraph_gl_check_surface_to_texture_compatibility(const SurfaceBinding *surface, const TextureShape *shape);
GLuint pgraph_gl_compile_shader(const char *vs_src, const char *fs_src);
void pgraph_gl_deinit_shader_cache(PGRAPHState *pg);
void pgraph_gl_deinit_surfaces(PGRAPHState *pg);
void pgraph_gl_deinit_texture_cache(PGRAPHState *pg);
void pgraph_gl_download_dirty_surfaces(NV2AState *d);
void pgraph_gl_clear_report_value(NV2AState *d);
void pgraph_gl_clear_surface(NV2AState *d, uint32_t parameter);
void pgraph_gl_draw_begin(NV2AState *d);
void pgraph_gl_draw_end(NV2AState *d);
void pgraph_gl_flush_draw(NV2AState *d);
void pgraph_gl_get_report(NV2AState *d, uint32_t parameter);
void pgraph_gl_image_blit(NV2AState *d);
void pgraph_gl_mark_textures_possibly_dirty(NV2AState *d, hwaddr addr, hwaddr size);
void pgraph_gl_process_pending_reports(NV2AState *d);
void pgraph_gl_surface_flush(NV2AState *d);
void pgraph_gl_surface_update(NV2AState *d, bool upload, bool color_write, bool zeta_write);
void pgraph_gl_sync(NV2AState *d);
void pgraph_gl_update_entire_memory_buffer(NV2AState *d);
void pgraph_gl_init_display_renderer(NV2AState *d);
void pgraph_gl_init_reports(NV2AState *d);
void pgraph_gl_init_shader_cache(PGRAPHState *pg);
void pgraph_gl_init_surfaces(PGRAPHState *pg);
void pgraph_gl_init_texture_cache(NV2AState *d);
void pgraph_gl_init_vertex_cache(NV2AState *d);
void pgraph_gl_process_pending_downloads(NV2AState *d);
void pgraph_gl_reload_surface_scale_factor(PGRAPHState *pg);
void pgraph_gl_render_surface_to_texture(NV2AState *d, SurfaceBinding *surface, TextureBinding *texture, TextureShape *texture_shape, int texture_unit);
void pgraph_gl_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta);
void pgraph_gl_surface_download_if_dirty(NV2AState *d, SurfaceBinding *surface);
SurfaceBinding *pgraph_gl_surface_get(NV2AState *d, hwaddr addr);
SurfaceBinding *pgraph_gl_surface_get_within(NV2AState *d, hwaddr addr);
void pgraph_gl_surface_invalidate(NV2AState *d, SurfaceBinding *e);
void pgraph_gl_unbind_surface(NV2AState *d, bool color);
void pgraph_gl_upload_surface_data(NV2AState *d, SurfaceBinding *surface, bool force);
void pgraph_gl_shader_cache_to_disk(ShaderLruNode *snode);
bool pgraph_gl_shader_load_from_memory(ShaderLruNode *snode);
void pgraph_gl_shader_write_cache_reload_list(PGRAPHState *pg);
void pgraph_gl_set_surface_scale_factor(NV2AState *d, unsigned int scale);
unsigned int pgraph_gl_get_surface_scale_factor(NV2AState *d);
int pgraph_gl_get_framebuffer_surface(NV2AState *d);
#endif

View File

@ -0,0 +1,111 @@
/*
* Geforce NV2A PGRAPH OpenGL Renderer
*
* Copyright (c) 2012 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2018-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include <hw/xbox/nv2a/nv2a_int.h>
#include "renderer.h"
static void process_pending_report(NV2AState *d, QueryReport *report)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
if (report->clear) {
r->zpass_pixel_count_result = 0;
return;
}
uint8_t type = GET_MASK(report->parameter, NV097_GET_REPORT_TYPE);
assert(type == NV097_GET_REPORT_TYPE_ZPASS_PIXEL_CNT);
/* FIXME: Multisampling affects this (both: OGL and Xbox GPU),
* not sure if CLEARs also count
*/
/* FIXME: What about clipping regions etc? */
for (int i = 0; i < report->query_count; i++) {
GLuint gl_query_result = 0;
glGetQueryObjectuiv(report->queries[i], GL_QUERY_RESULT, &gl_query_result);
gl_query_result /= pg->surface_scale_factor * pg->surface_scale_factor;
r->zpass_pixel_count_result += gl_query_result;
}
if (report->query_count) {
glDeleteQueries(report->query_count, report->queries);
g_free(report->queries);
}
pgraph_write_zpass_pixel_cnt_report(d, report->parameter, r->zpass_pixel_count_result);
}
void pgraph_gl_process_pending_reports(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
QueryReport *report, *next;
QSIMPLEQ_FOREACH_SAFE(report, &r->report_queue, entry, next) {
process_pending_report(d, report);
QSIMPLEQ_REMOVE_HEAD(&r->report_queue, entry);
g_free(report);
}
}
void pgraph_gl_clear_report_value(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
/* FIXME: Does this have a value in parameter? Also does this (also?) modify
* the report memory block?
*/
if (r->gl_zpass_pixel_count_query_count) {
glDeleteQueries(r->gl_zpass_pixel_count_query_count,
r->gl_zpass_pixel_count_queries);
r->gl_zpass_pixel_count_query_count = 0;
}
QueryReport *report = g_malloc(sizeof(QueryReport));
report->clear = true;
QSIMPLEQ_INSERT_TAIL(&r->report_queue, report, entry);
}
void pgraph_gl_init_reports(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
QSIMPLEQ_INIT(&r->report_queue);
}
void pgraph_gl_get_report(NV2AState *d, uint32_t parameter)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
QueryReport *report = g_malloc(sizeof(QueryReport));
report->clear = false;
report->parameter = parameter;
report->query_count = r->gl_zpass_pixel_count_query_count;
report->queries = r->gl_zpass_pixel_count_queries;
QSIMPLEQ_INSERT_TAIL(&r->report_queue, report, entry);
r->gl_zpass_pixel_count_query_count = 0;
r->gl_zpass_pixel_count_queries = NULL;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,819 @@
/*
* Geforce NV2A PGRAPH OpenGL Renderer
*
* Copyright (c) 2012 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2018-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/fast-hash.h"
#include "hw/xbox/nv2a/nv2a_int.h"
#include "hw/xbox/nv2a/pgraph/swizzle.h"
#include "hw/xbox/nv2a/pgraph/s3tc.h"
#include "hw/xbox/nv2a/pgraph/texture.h"
#include "debug.h"
#include "renderer.h"
static TextureBinding* generate_texture(const TextureShape s, const uint8_t *texture_data, const uint8_t *palette_data);
static void texture_binding_destroy(gpointer data);
struct pgraph_texture_possibly_dirty_struct {
hwaddr addr, end;
};
static void mark_textures_possibly_dirty_visitor(Lru *lru, LruNode *node, void *opaque)
{
struct pgraph_texture_possibly_dirty_struct *test =
(struct pgraph_texture_possibly_dirty_struct *)opaque;
struct TextureLruNode *tnode = container_of(node, TextureLruNode, node);
if (tnode->binding == NULL || tnode->possibly_dirty) {
return;
}
uintptr_t k_tex_addr = tnode->key.texture_vram_offset;
uintptr_t k_tex_end = k_tex_addr + tnode->key.texture_length - 1;
bool overlapping = !(test->addr > k_tex_end || k_tex_addr > test->end);
if (tnode->key.palette_length > 0) {
uintptr_t k_pal_addr = tnode->key.palette_vram_offset;
uintptr_t k_pal_end = k_pal_addr + tnode->key.palette_length - 1;
overlapping |= !(test->addr > k_pal_end || k_pal_addr > test->end);
}
tnode->possibly_dirty |= overlapping;
}
void pgraph_gl_mark_textures_possibly_dirty(NV2AState *d,
hwaddr addr, hwaddr size)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
hwaddr end = TARGET_PAGE_ALIGN(addr + size) - 1;
addr &= TARGET_PAGE_MASK;
assert(end <= memory_region_size(d->vram));
struct pgraph_texture_possibly_dirty_struct test = {
.addr = addr,
.end = end,
};
lru_visit_active(&r->texture_cache,
mark_textures_possibly_dirty_visitor,
&test);
}
static bool check_texture_dirty(NV2AState *d, hwaddr addr, hwaddr size)
{
hwaddr end = TARGET_PAGE_ALIGN(addr + size);
addr &= TARGET_PAGE_MASK;
assert(end < memory_region_size(d->vram));
return memory_region_test_and_clear_dirty(d->vram, addr, end - addr,
DIRTY_MEMORY_NV2A_TEX);
}
// Check if any of the pages spanned by the a texture are dirty.
static bool check_texture_possibly_dirty(NV2AState *d,
hwaddr texture_vram_offset,
unsigned int length,
hwaddr palette_vram_offset,
unsigned int palette_length)
{
bool possibly_dirty = false;
if (check_texture_dirty(d, texture_vram_offset, length)) {
possibly_dirty = true;
pgraph_gl_mark_textures_possibly_dirty(d, texture_vram_offset, length);
}
if (palette_length && check_texture_dirty(d, palette_vram_offset,
palette_length)) {
possibly_dirty = true;
pgraph_gl_mark_textures_possibly_dirty(d, palette_vram_offset,
palette_length);
}
return possibly_dirty;
}
static void apply_texture_parameters(TextureBinding *binding,
const BasicColorFormatInfo *f,
unsigned int dimensionality,
unsigned int filter,
unsigned int address,
bool is_bordered,
uint32_t border_color)
{
unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN);
unsigned int mag_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG);
unsigned int addru = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRU);
unsigned int addrv = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRV);
unsigned int addrp = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRP);
if (f->linear) {
/* somtimes games try to set mipmap min filters on linear textures.
* this could indicate a bug... */
switch (min_filter) {
case NV_PGRAPH_TEXFILTER0_MIN_BOX_NEARESTLOD:
case NV_PGRAPH_TEXFILTER0_MIN_BOX_TENT_LOD:
min_filter = NV_PGRAPH_TEXFILTER0_MIN_BOX_LOD0;
break;
case NV_PGRAPH_TEXFILTER0_MIN_TENT_NEARESTLOD:
case NV_PGRAPH_TEXFILTER0_MIN_TENT_TENT_LOD:
min_filter = NV_PGRAPH_TEXFILTER0_MIN_TENT_LOD0;
break;
}
}
if (min_filter != binding->min_filter) {
glTexParameteri(binding->gl_target, GL_TEXTURE_MIN_FILTER,
pgraph_texture_min_filter_gl_map[min_filter]);
binding->min_filter = min_filter;
}
if (mag_filter != binding->mag_filter) {
glTexParameteri(binding->gl_target, GL_TEXTURE_MAG_FILTER,
pgraph_texture_mag_filter_gl_map[mag_filter]);
binding->mag_filter = mag_filter;
}
/* Texture wrapping */
assert(addru < ARRAY_SIZE(pgraph_texture_addr_gl_map));
if (addru != binding->addru) {
glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_S,
pgraph_texture_addr_gl_map[addru]);
binding->addru = addru;
}
bool needs_border_color = binding->addru == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER;
if (dimensionality > 1) {
if (addrv != binding->addrv) {
assert(addrv < ARRAY_SIZE(pgraph_texture_addr_gl_map));
glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_T,
pgraph_texture_addr_gl_map[addrv]);
binding->addrv = addrv;
}
needs_border_color = needs_border_color || binding->addrv == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER;
}
if (dimensionality > 2) {
if (addrp != binding->addrp) {
assert(addrp < ARRAY_SIZE(pgraph_texture_addr_gl_map));
glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_R,
pgraph_texture_addr_gl_map[addrp]);
binding->addrp = addrp;
}
needs_border_color = needs_border_color || binding->addrp == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER;
}
if (!is_bordered && needs_border_color) {
if (!binding->border_color_set || binding->border_color != border_color) {
/* FIXME: Color channels might be wrong order */
GLfloat gl_border_color[4];
pgraph_argb_pack32_to_rgba_float(border_color, gl_border_color);
glTexParameterfv(binding->gl_target, GL_TEXTURE_BORDER_COLOR,
gl_border_color);
binding->border_color_set = true;
binding->border_color = border_color;
}
}
}
void pgraph_gl_bind_textures(NV2AState *d)
{
int i;
PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
NV2A_GL_DGROUP_BEGIN("%s", __func__);
for (i=0; i<NV2A_MAX_TEXTURES; i++) {
bool enabled = pgraph_is_texture_enabled(pg, i);
/* FIXME: What happens if texture is disabled but stage is active? */
glActiveTexture(GL_TEXTURE0 + i);
if (!enabled) {
glBindTexture(GL_TEXTURE_CUBE_MAP, 0);
glBindTexture(GL_TEXTURE_RECTANGLE, 0);
glBindTexture(GL_TEXTURE_1D, 0);
glBindTexture(GL_TEXTURE_2D, 0);
glBindTexture(GL_TEXTURE_3D, 0);
continue;
}
uint32_t filter = pgraph_reg_r(pg, NV_PGRAPH_TEXFILTER0 + i*4);
uint32_t address = pgraph_reg_r(pg, NV_PGRAPH_TEXADDRESS0 + i*4);
uint32_t border_color = pgraph_reg_r(pg, NV_PGRAPH_BORDERCOLOR0 + i*4);
/* Check for unsupported features */
if (filter & NV_PGRAPH_TEXFILTER0_ASIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_ASIGNED");
if (filter & NV_PGRAPH_TEXFILTER0_RSIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_RSIGNED");
if (filter & NV_PGRAPH_TEXFILTER0_GSIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_GSIGNED");
if (filter & NV_PGRAPH_TEXFILTER0_BSIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_BSIGNED");
TextureShape state = pgraph_get_texture_shape(pg, i);
hwaddr texture_vram_offset, palette_vram_offset;
size_t length, palette_length;
length = pgraph_get_texture_length(pg, &state);
texture_vram_offset = pgraph_get_texture_phys_addr(pg, i);
palette_vram_offset = pgraph_get_texture_palette_phys_addr_length(pg, i, &palette_length);
assert((texture_vram_offset + length) < memory_region_size(d->vram));
assert((palette_vram_offset + palette_length)
< memory_region_size(d->vram));
bool is_indexed = (state.color_format ==
NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8);
bool possibly_dirty = false;
bool possibly_dirty_checked = false;
SurfaceBinding *surface = pgraph_gl_surface_get(d, texture_vram_offset);
TextureBinding *tbind = r->texture_binding[i];
if (!pg->texture_dirty[i] && tbind) {
bool reusable = false;
if (surface && tbind->draw_time == surface->draw_time) {
reusable = true;
} else if (!surface) {
possibly_dirty = check_texture_possibly_dirty(
d,
texture_vram_offset,
length,
palette_vram_offset,
is_indexed ? palette_length : 0);
possibly_dirty_checked = true;
reusable = !possibly_dirty;
}
if (reusable) {
glBindTexture(r->texture_binding[i]->gl_target,
r->texture_binding[i]->gl_texture);
apply_texture_parameters(r->texture_binding[i],
&kelvin_color_format_info_map[state.color_format],
state.dimensionality,
filter,
address,
state.border,
border_color);
continue;
}
}
/*
* Check active surfaces to see if this texture was a render target
*/
bool surf_to_tex = false;
if (surface != NULL) {
surf_to_tex = pgraph_gl_check_surface_to_texture_compatibility(
surface, &state);
if (surf_to_tex && surface->upload_pending) {
pgraph_gl_upload_surface_data(d, surface, false);
}
}
if (!surf_to_tex) {
// FIXME: Restructure to support rendering surfaces to cubemap faces
// Writeback any surfaces which this texture may index
hwaddr tex_vram_end = texture_vram_offset + length - 1;
QTAILQ_FOREACH(surface, &r->surfaces, entry) {
hwaddr surf_vram_end = surface->vram_addr + surface->size - 1;
bool overlapping = !(surface->vram_addr >= tex_vram_end
|| texture_vram_offset >= surf_vram_end);
if (overlapping) {
pgraph_gl_surface_download_if_dirty(d, surface);
}
}
}
TextureKey key;
memset(&key, 0, sizeof(TextureKey));
key.state = state;
key.texture_vram_offset = texture_vram_offset;
key.texture_length = length;
if (is_indexed) {
key.palette_vram_offset = palette_vram_offset;
key.palette_length = palette_length;
}
// Search for existing texture binding in cache
uint64_t tex_binding_hash = fast_hash((uint8_t*)&key, sizeof(key));
LruNode *found = lru_lookup(&r->texture_cache,
tex_binding_hash, &key);
TextureLruNode *key_out = container_of(found, TextureLruNode, node);
possibly_dirty |= (key_out->binding == NULL) || key_out->possibly_dirty;
if (!surf_to_tex && !possibly_dirty_checked) {
possibly_dirty |= check_texture_possibly_dirty(
d,
texture_vram_offset,
length,
palette_vram_offset,
is_indexed ? palette_length : 0);
}
// Calculate hash of texture data, if necessary
void *texture_data = (char*)d->vram_ptr + texture_vram_offset;
void *palette_data = (char*)d->vram_ptr + palette_vram_offset;
uint64_t tex_data_hash = 0;
if (!surf_to_tex && possibly_dirty) {
tex_data_hash = fast_hash(texture_data, length);
if (is_indexed) {
tex_data_hash ^= fast_hash(palette_data, palette_length);
}
}
// Free existing binding, if texture data has changed
bool must_destroy = (key_out->binding != NULL)
&& possibly_dirty
&& (key_out->binding->data_hash != tex_data_hash);
if (must_destroy) {
texture_binding_destroy(key_out->binding);
key_out->binding = NULL;
}
if (key_out->binding == NULL) {
// Must create the texture
key_out->binding = generate_texture(state, texture_data, palette_data);
key_out->binding->data_hash = tex_data_hash;
key_out->binding->scale = 1;
} else {
// Saved an upload! Reuse existing texture in graphics memory.
glBindTexture(key_out->binding->gl_target,
key_out->binding->gl_texture);
}
key_out->possibly_dirty = false;
TextureBinding *binding = key_out->binding;
binding->refcnt++;
if (surf_to_tex && binding->draw_time < surface->draw_time) {
trace_nv2a_pgraph_surface_render_to_texture(
surface->vram_addr, surface->width, surface->height);
pgraph_gl_render_surface_to_texture(d, surface, binding, &state, i);
binding->draw_time = surface->draw_time;
if (binding->gl_target == GL_TEXTURE_RECTANGLE) {
binding->scale = pg->surface_scale_factor;
} else {
binding->scale = 1;
}
}
apply_texture_parameters(binding,
&kelvin_color_format_info_map[state.color_format],
state.dimensionality,
filter,
address,
state.border,
border_color);
if (r->texture_binding[i]) {
if (r->texture_binding[i]->gl_target != binding->gl_target) {
glBindTexture(r->texture_binding[i]->gl_target, 0);
}
texture_binding_destroy(r->texture_binding[i]);
}
r->texture_binding[i] = binding;
pg->texture_dirty[i] = false;
}
NV2A_GL_DGROUP_END();
}
static enum S3TC_DECOMPRESS_FORMAT
gl_internal_format_to_s3tc_enum(GLint gl_internal_format)
{
switch (gl_internal_format) {
case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
return S3TC_DECOMPRESS_FORMAT_DXT1;
case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
return S3TC_DECOMPRESS_FORMAT_DXT3;
case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
return S3TC_DECOMPRESS_FORMAT_DXT5;
default:
assert(!"Invalid format");
}
}
static void upload_gl_texture(GLenum gl_target,
const TextureShape s,
const uint8_t *texture_data,
const uint8_t *palette_data)
{
ColorFormatInfo f = kelvin_color_format_gl_map[s.color_format];
nv2a_profile_inc_counter(NV2A_PROF_TEX_UPLOAD);
unsigned int adjusted_width = s.width;
unsigned int adjusted_height = s.height;
unsigned int adjusted_pitch = s.pitch;
unsigned int adjusted_depth = s.depth;
if (!f.linear && s.border) {
adjusted_width = MAX(16, adjusted_width * 2);
adjusted_height = MAX(16, adjusted_height * 2);
adjusted_pitch = adjusted_width * (s.pitch / s.width);
adjusted_depth = MAX(16, s.depth * 2);
}
switch(gl_target) {
case GL_TEXTURE_1D:
assert(false);
break;
case GL_TEXTURE_RECTANGLE: {
/* Can't handle strides unaligned to pixels */
assert(s.pitch % f.bytes_per_pixel == 0);
uint8_t *converted = pgraph_convert_texture_data(
s, texture_data, palette_data, adjusted_width, adjusted_height, 1,
adjusted_pitch, 0, NULL);
glPixelStorei(GL_UNPACK_ROW_LENGTH,
converted ? 0 : adjusted_pitch / f.bytes_per_pixel);
glTexImage2D(gl_target, 0, f.gl_internal_format,
adjusted_width, adjusted_height, 0,
f.gl_format, f.gl_type,
converted ? converted : texture_data);
if (converted) {
g_free(converted);
}
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
break;
}
case GL_TEXTURE_2D:
case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: {
unsigned int width = adjusted_width, height = adjusted_height;
int level;
for (level = 0; level < s.levels; level++) {
width = MAX(width, 1);
height = MAX(height, 1);
if (f.gl_format == 0) { /* compressed */
// https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-block-compression#virtual-size-versus-physical-size
unsigned int block_size =
f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT ?
8 : 16;
unsigned int physical_width = (width + 3) & ~3,
physical_height = (height + 3) & ~3;
if (physical_width != width) {
glPixelStorei(GL_UNPACK_ROW_LENGTH, physical_width);
}
uint8_t *converted = s3tc_decompress_2d(
gl_internal_format_to_s3tc_enum(f.gl_internal_format),
texture_data, physical_width, physical_height);
unsigned int tex_width = width;
unsigned int tex_height = height;
if (s.cubemap && adjusted_width != s.width) {
// FIXME: Consider preserving the border.
// There does not seem to be a way to reference the border
// texels in a cubemap, so they are discarded.
glPixelStorei(GL_UNPACK_SKIP_PIXELS, 4);
glPixelStorei(GL_UNPACK_SKIP_ROWS, 4);
tex_width = s.width;
tex_height = s.height;
if (physical_width == width) {
glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width);
}
}
glTexImage2D(gl_target, level, GL_RGBA, tex_width, tex_height, 0,
GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, converted);
g_free(converted);
if (physical_width != width) {
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
}
if (s.cubemap && adjusted_width != s.width) {
glPixelStorei(GL_UNPACK_SKIP_PIXELS, 0);
glPixelStorei(GL_UNPACK_SKIP_ROWS, 0);
if (physical_width == width) {
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
}
}
texture_data +=
physical_width / 4 * physical_height / 4 * block_size;
} else {
unsigned int pitch = width * f.bytes_per_pixel;
uint8_t *unswizzled = (uint8_t*)g_malloc(height * pitch);
unswizzle_rect(texture_data, width, height,
unswizzled, pitch, f.bytes_per_pixel);
uint8_t *converted = pgraph_convert_texture_data(
s, unswizzled, palette_data, width, height, 1, pitch, 0,
NULL);
uint8_t *pixel_data = converted ? converted : unswizzled;
unsigned int tex_width = width;
unsigned int tex_height = height;
if (s.cubemap && adjusted_width != s.width) {
// FIXME: Consider preserving the border.
// There does not seem to be a way to reference the border
// texels in a cubemap, so they are discarded.
glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width);
tex_width = s.width;
tex_height = s.height;
pixel_data += 4 * f.bytes_per_pixel + 4 * pitch;
}
glTexImage2D(gl_target, level, f.gl_internal_format, tex_width,
tex_height, 0, f.gl_format, f.gl_type,
pixel_data);
if (s.cubemap && s.border) {
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
}
if (converted) {
g_free(converted);
}
g_free(unswizzled);
texture_data += width * height * f.bytes_per_pixel;
}
width /= 2;
height /= 2;
}
break;
}
case GL_TEXTURE_3D: {
unsigned int width = adjusted_width;
unsigned int height = adjusted_height;
unsigned int depth = adjusted_depth;
assert(f.linear == false);
int level;
for (level = 0; level < s.levels; level++) {
if (f.gl_format == 0) { /* compressed */
assert(width % 4 == 0 && height % 4 == 0 &&
"Compressed 3D texture virtual size");
width = MAX(width, 4);
height = MAX(height, 4);
depth = MAX(depth, 1);
unsigned int block_size;
if (f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
block_size = 8;
} else {
block_size = 16;
}
size_t texture_size = width/4 * height/4 * depth * block_size;
uint8_t *converted = s3tc_decompress_3d(
gl_internal_format_to_s3tc_enum(f.gl_internal_format),
texture_data, width, height, depth);
glTexImage3D(gl_target, level, GL_RGBA8,
width, height, depth, 0,
GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV,
converted);
g_free(converted);
texture_data += texture_size;
} else {
width = MAX(width, 1);
height = MAX(height, 1);
depth = MAX(depth, 1);
unsigned int row_pitch = width * f.bytes_per_pixel;
unsigned int slice_pitch = row_pitch * height;
uint8_t *unswizzled = (uint8_t*)g_malloc(slice_pitch * depth);
unswizzle_box(texture_data, width, height, depth, unswizzled,
row_pitch, slice_pitch, f.bytes_per_pixel);
uint8_t *converted = pgraph_convert_texture_data(
s, unswizzled, palette_data, width, height, depth,
row_pitch, slice_pitch, NULL);
glTexImage3D(gl_target, level, f.gl_internal_format,
width, height, depth, 0,
f.gl_format, f.gl_type,
converted ? converted : unswizzled);
if (converted) {
g_free(converted);
}
g_free(unswizzled);
texture_data += width * height * depth * f.bytes_per_pixel;
}
width /= 2;
height /= 2;
depth /= 2;
}
break;
}
default:
assert(false);
break;
}
}
static TextureBinding* generate_texture(const TextureShape s,
const uint8_t *texture_data,
const uint8_t *palette_data)
{
ColorFormatInfo f = kelvin_color_format_gl_map[s.color_format];
/* Create a new opengl texture */
GLuint gl_texture;
glGenTextures(1, &gl_texture);
GLenum gl_target;
if (s.cubemap) {
assert(f.linear == false);
assert(s.dimensionality == 2);
gl_target = GL_TEXTURE_CUBE_MAP;
} else {
if (f.linear) {
/* linear textures use unnormalised texcoords.
* GL_TEXTURE_RECTANGLE_ARB conveniently also does, but
* does not allow repeat and mirror wrap modes.
* (or mipmapping, but xbox d3d says 'Non swizzled and non
* compressed textures cannot be mip mapped.')
* Not sure if that'll be an issue. */
/* FIXME: GLSL 330 provides us with textureSize()! Use that? */
gl_target = GL_TEXTURE_RECTANGLE;
assert(s.dimensionality == 2);
} else {
switch(s.dimensionality) {
case 1: gl_target = GL_TEXTURE_1D; break;
case 2: gl_target = GL_TEXTURE_2D; break;
case 3: gl_target = GL_TEXTURE_3D; break;
default:
assert(false);
break;
}
}
}
glBindTexture(gl_target, gl_texture);
NV2A_GL_DLABEL(GL_TEXTURE, gl_texture,
"offset: 0x%08lx, format: 0x%02X%s, %d dimensions%s, "
"width: %d, height: %d, depth: %d",
texture_data - g_nv2a->vram_ptr,
s.color_format, f.linear ? "" : " (SZ)",
s.dimensionality, s.cubemap ? " (Cubemap)" : "",
s.width, s.height, s.depth);
if (gl_target == GL_TEXTURE_CUBE_MAP) {
ColorFormatInfo f = kelvin_color_format_gl_map[s.color_format];
unsigned int block_size;
if (f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
block_size = 8;
} else {
block_size = 16;
}
size_t length = 0;
unsigned int w = s.width;
unsigned int h = s.height;
if (!f.linear && s.border) {
w = MAX(16, w * 2);
h = MAX(16, h * 2);
}
int level;
for (level = 0; level < s.levels; level++) {
if (f.gl_format == 0) {
length += w/4 * h/4 * block_size;
} else {
length += w * h * f.bytes_per_pixel;
}
w /= 2;
h /= 2;
}
length = (length + NV2A_CUBEMAP_FACE_ALIGNMENT - 1) & ~(NV2A_CUBEMAP_FACE_ALIGNMENT - 1);
upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_X,
s, texture_data + 0 * length, palette_data);
upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
s, texture_data + 1 * length, palette_data);
upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
s, texture_data + 2 * length, palette_data);
upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y,
s, texture_data + 3 * length, palette_data);
upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
s, texture_data + 4 * length, palette_data);
upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z,
s, texture_data + 5 * length, palette_data);
} else {
upload_gl_texture(gl_target, s, texture_data, palette_data);
}
/* Linear textures don't support mipmapping */
if (!f.linear) {
glTexParameteri(gl_target, GL_TEXTURE_BASE_LEVEL,
s.min_mipmap_level);
glTexParameteri(gl_target, GL_TEXTURE_MAX_LEVEL,
s.levels - 1);
}
if (f.gl_swizzle_mask[0] != 0 || f.gl_swizzle_mask[1] != 0
|| f.gl_swizzle_mask[2] != 0 || f.gl_swizzle_mask[3] != 0) {
glTexParameteriv(gl_target, GL_TEXTURE_SWIZZLE_RGBA,
(const GLint *)f.gl_swizzle_mask);
}
TextureBinding* ret = (TextureBinding *)g_malloc(sizeof(TextureBinding));
ret->gl_target = gl_target;
ret->gl_texture = gl_texture;
ret->refcnt = 1;
ret->draw_time = 0;
ret->data_hash = 0;
ret->min_filter = 0xFFFFFFFF;
ret->mag_filter = 0xFFFFFFFF;
ret->addru = 0xFFFFFFFF;
ret->addrv = 0xFFFFFFFF;
ret->addrp = 0xFFFFFFFF;
ret->border_color_set = false;
return ret;
}
static void texture_binding_destroy(gpointer data)
{
TextureBinding *binding = (TextureBinding *)data;
assert(binding->refcnt > 0);
binding->refcnt--;
if (binding->refcnt == 0) {
glDeleteTextures(1, &binding->gl_texture);
g_free(binding);
}
}
/* functions for texture LRU cache */
static void texture_cache_entry_init(Lru *lru, LruNode *node, void *key)
{
TextureLruNode *tnode = container_of(node, TextureLruNode, node);
memcpy(&tnode->key, key, sizeof(TextureKey));
tnode->binding = NULL;
tnode->possibly_dirty = false;
}
static void texture_cache_entry_post_evict(Lru *lru, LruNode *node)
{
TextureLruNode *tnode = container_of(node, TextureLruNode, node);
if (tnode->binding) {
texture_binding_destroy(tnode->binding);
tnode->binding = NULL;
tnode->possibly_dirty = false;
}
}
static bool texture_cache_entry_compare(Lru *lru, LruNode *node, void *key)
{
TextureLruNode *tnode = container_of(node, TextureLruNode, node);
return memcmp(&tnode->key, key, sizeof(TextureKey));
}
void pgraph_gl_init_texture_cache(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
const size_t texture_cache_size = 512;
lru_init(&r->texture_cache);
r->texture_cache_entries = malloc(texture_cache_size * sizeof(TextureLruNode));
assert(r->texture_cache_entries != NULL);
for (int i = 0; i < texture_cache_size; i++) {
lru_add_free(&r->texture_cache, &r->texture_cache_entries[i].node);
}
r->texture_cache.init_node = texture_cache_entry_init;
r->texture_cache.compare_nodes = texture_cache_entry_compare;
r->texture_cache.post_node_evict = texture_cache_entry_post_evict;
}
void pgraph_gl_deinit_texture_cache(PGRAPHState *pg)
{
PGRAPHGLState *r = pg->gl_renderer_state;
// Clear out texture cache
lru_flush(&r->texture_cache);
free(r->texture_cache_entries);
}

View File

@ -0,0 +1,283 @@
/*
* Geforce NV2A PGRAPH OpenGL Renderer
*
* Copyright (c) 2012 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2018-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "hw/xbox/nv2a/nv2a_regs.h"
#include <hw/xbox/nv2a/nv2a_int.h>
#include "debug.h"
#include "renderer.h"
static void update_memory_buffer(NV2AState *d, hwaddr addr, hwaddr size,
bool quick)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
glBindBuffer(GL_ARRAY_BUFFER, r->gl_memory_buffer);
hwaddr end = TARGET_PAGE_ALIGN(addr + size);
addr &= TARGET_PAGE_MASK;
assert(end < memory_region_size(d->vram));
static hwaddr last_addr, last_end;
if (quick && (addr >= last_addr) && (end <= last_end)) {
return;
}
last_addr = addr;
last_end = end;
size = end - addr;
if (memory_region_test_and_clear_dirty(d->vram, addr, size,
DIRTY_MEMORY_NV2A)) {
glBufferSubData(GL_ARRAY_BUFFER, addr, size,
d->vram_ptr + addr);
nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_1);
}
}
void pgraph_gl_update_entire_memory_buffer(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
glBindBuffer(GL_ARRAY_BUFFER, r->gl_memory_buffer);
glBufferSubData(GL_ARRAY_BUFFER, 0, memory_region_size(d->vram), d->vram_ptr);
}
void pgraph_gl_bind_vertex_attributes(NV2AState *d, unsigned int min_element,
unsigned int max_element, bool inline_data,
unsigned int inline_stride,
unsigned int provoking_element)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
bool updated_memory_buffer = false;
unsigned int num_elements = max_element - min_element + 1;
if (inline_data) {
NV2A_GL_DGROUP_BEGIN("%s (num_elements: %d inline stride: %d)",
__func__, num_elements, inline_stride);
} else {
NV2A_GL_DGROUP_BEGIN("%s (num_elements: %d)", __func__, num_elements);
}
pg->compressed_attrs = 0;
for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
VertexAttribute *attr = &pg->vertex_attributes[i];
if (!attr->count) {
glDisableVertexAttribArray(i);
glVertexAttrib4fv(i, attr->inline_value);
continue;
}
NV2A_DPRINTF("vertex data array format=%d, count=%d, stride=%d\n",
attr->format, attr->count, attr->stride);
GLint gl_count = attr->count;
GLenum gl_type;
GLboolean gl_normalize;
bool needs_conversion = false;
switch (attr->format) {
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D:
gl_type = GL_UNSIGNED_BYTE;
gl_normalize = GL_TRUE;
// http://www.opengl.org/registry/specs/ARB/vertex_array_bgra.txt
gl_count = GL_BGRA;
break;
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL:
gl_type = GL_UNSIGNED_BYTE;
gl_normalize = GL_TRUE;
break;
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1:
gl_type = GL_SHORT;
gl_normalize = GL_TRUE;
break;
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F:
gl_type = GL_FLOAT;
gl_normalize = GL_FALSE;
break;
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K:
gl_type = GL_SHORT;
gl_normalize = GL_FALSE;
break;
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP:
/* 3 signed, normalized components packed in 32-bits. (11,11,10) */
gl_type = GL_INT;
assert(attr->count == 1);
needs_conversion = true;
break;
default:
fprintf(stderr, "Unknown vertex type: 0x%x\n", attr->format);
assert(false);
break;
}
nv2a_profile_inc_counter(NV2A_PROF_ATTR_BIND);
hwaddr attrib_data_addr;
size_t stride;
if (needs_conversion) {
pg->compressed_attrs |= (1 << i);
}
hwaddr start = 0;
if (inline_data) {
glBindBuffer(GL_ARRAY_BUFFER, r->gl_inline_array_buffer);
attrib_data_addr = attr->inline_array_offset;
stride = inline_stride;
} else {
hwaddr dma_len;
uint8_t *attr_data = (uint8_t *)nv_dma_map(
d, attr->dma_select ? pg->dma_vertex_b : pg->dma_vertex_a,
&dma_len);
assert(attr->offset < dma_len);
attrib_data_addr = attr_data + attr->offset - d->vram_ptr;
stride = attr->stride;
start = attrib_data_addr + min_element * stride;
update_memory_buffer(d, start, num_elements * stride,
updated_memory_buffer);
updated_memory_buffer = true;
}
uint32_t provoking_element_index = provoking_element - min_element;
size_t element_size = attr->size * attr->count;
assert(element_size <= sizeof(attr->inline_value));
const uint8_t *last_entry;
if (inline_data) {
last_entry = (uint8_t*)pg->inline_array + attr->inline_array_offset;
} else {
last_entry = d->vram_ptr + start;
}
if (!stride) {
// Stride of 0 indicates that only the first element should be
// used.
pgraph_update_inline_value(attr, last_entry);
glDisableVertexAttribArray(i);
glVertexAttrib4fv(i, attr->inline_value);
continue;
}
if (needs_conversion) {
glVertexAttribIPointer(i, gl_count, gl_type, stride,
(void *)attrib_data_addr);
} else {
glVertexAttribPointer(i, gl_count, gl_type, gl_normalize, stride,
(void *)attrib_data_addr);
}
glEnableVertexAttribArray(i);
last_entry += stride * provoking_element_index;
pgraph_update_inline_value(attr, last_entry);
}
NV2A_GL_DGROUP_END();
}
unsigned int pgraph_gl_bind_inline_array(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
unsigned int offset = 0;
for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
VertexAttribute *attr = &pg->vertex_attributes[i];
if (attr->count == 0) {
continue;
}
/* FIXME: Double check */
offset = ROUND_UP(offset, attr->size);
attr->inline_array_offset = offset;
NV2A_DPRINTF("bind inline attribute %d size=%d, count=%d\n",
i, attr->size, attr->count);
offset += attr->size * attr->count;
offset = ROUND_UP(offset, attr->size);
}
unsigned int vertex_size = offset;
unsigned int index_count = pg->inline_array_length*4 / vertex_size;
NV2A_DPRINTF("draw inline array %d, %d\n", vertex_size, index_count);
nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_2);
glBindBuffer(GL_ARRAY_BUFFER, r->gl_inline_array_buffer);
glBufferData(GL_ARRAY_BUFFER, NV2A_MAX_BATCH_LENGTH * sizeof(uint32_t),
NULL, GL_STREAM_DRAW);
glBufferSubData(GL_ARRAY_BUFFER, 0, index_count * vertex_size, pg->inline_array);
pgraph_gl_bind_vertex_attributes(d, 0, index_count-1, true, vertex_size,
index_count-1);
return index_count;
}
static void vertex_cache_entry_init(Lru *lru, LruNode *node, void *key)
{
VertexLruNode *vnode = container_of(node, VertexLruNode, node);
memcpy(&vnode->key, key, sizeof(struct VertexKey));
vnode->initialized = false;
}
static bool vertex_cache_entry_compare(Lru *lru, LruNode *node, void *key)
{
VertexLruNode *vnode = container_of(node, VertexLruNode, node);
return memcmp(&vnode->key, key, sizeof(VertexKey));
}
void pgraph_gl_init_vertex_cache(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
const size_t element_cache_size = 50*1024;
lru_init(&r->element_cache);
r->element_cache_entries = malloc(element_cache_size * sizeof(VertexLruNode));
assert(r->element_cache_entries != NULL);
GLuint element_cache_buffers[element_cache_size];
glGenBuffers(element_cache_size, element_cache_buffers);
for (int i = 0; i < element_cache_size; i++) {
r->element_cache_entries[i].gl_buffer = element_cache_buffers[i];
lru_add_free(&r->element_cache, &r->element_cache_entries[i].node);
}
r->element_cache.init_node = vertex_cache_entry_init;
r->element_cache.compare_nodes = vertex_cache_entry_compare;
GLint max_vertex_attributes;
glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_vertex_attributes);
assert(max_vertex_attributes >= NV2A_VERTEXSHADER_ATTRIBUTES);
glGenBuffers(NV2A_VERTEXSHADER_ATTRIBUTES, r->gl_inline_buffer);
glGenBuffers(1, &r->gl_inline_array_buffer);
glGenBuffers(1, &r->gl_memory_buffer);
glBindBuffer(GL_ARRAY_BUFFER, r->gl_memory_buffer);
glBufferData(GL_ARRAY_BUFFER, memory_region_size(d->vram),
NULL, GL_DYNAMIC_DRAW);
glGenVertexArrays(1, &r->gl_vertex_array);
glBindVertexArray(r->gl_vertex_array);
assert(glGetError() == GL_NO_ERROR);
}

View File

@ -0,0 +1,58 @@
/*
* Geforce NV2A PGRAPH GLSL Shader Generator
*
* Copyright (c) 2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "common.h"
MString *pgraph_get_glsl_vtx_header(MString *out, bool location, bool smooth, bool in, bool prefix, bool array)
{
const char *flat_s = "flat";
const char *noperspective_s = "noperspective";
const char *qualifier_s = smooth ? noperspective_s : flat_s;
const char *qualifiers[11] = {
noperspective_s, flat_s, qualifier_s, qualifier_s,
qualifier_s, qualifier_s, noperspective_s, noperspective_s,
noperspective_s, noperspective_s, noperspective_s
};
const char *in_out_s = in ? "in" : "out";
const char *float_s = "float";
const char *vec4_s = "vec4";
const char *types[11] = { float_s, float_s, vec4_s, vec4_s, vec4_s, vec4_s,
float_s, vec4_s, vec4_s, vec4_s, vec4_s };
const char *prefix_s = prefix ? "v_" : "";
const char *names[11] = {
"vtx_inv_w", "vtx_inv_w_flat", "vtxD0", "vtxD1", "vtxB0", "vtxB1",
"vtxFog", "vtxT0", "vtxT1", "vtxT2", "vtxT3",
};
const char *suffix_s = array ? "[]" : "";
for (int i = 0; i < 11; i++) {
if (location) {
mstring_append_fmt(out, "layout(location = %d) ", i);
}
mstring_append_fmt(out, "%s %s %s %s%s%s;\n",
qualifiers[i], in_out_s, types[i], prefix_s, names[i], suffix_s);
}
return out;
}

View File

@ -0,0 +1,38 @@
/*
* Geforce NV2A PGRAPH GLSL Shader Generator
*
* Copyright (c) 2015 espes
* Copyright (c) 2015 Jannik Vogel
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_NV2A_SHADERS_COMMON_H
#define HW_NV2A_SHADERS_COMMON_H
#include "qemu/mstring.h"
#include <stdbool.h>
#define GLSL_C(idx) "c[" stringify(idx) "]"
#define GLSL_LTCTXA(idx) "ltctxa[" stringify(idx) "]"
#define GLSL_C_MAT4(idx) \
"mat4(" GLSL_C(idx) ", " GLSL_C(idx+1) ", " \
GLSL_C(idx+2) ", " GLSL_C(idx+3) ")"
#define GLSL_DEFINE(a, b) "#define " stringify(a) " " b "\n"
MString *pgraph_get_glsl_vtx_header(MString *out, bool location, bool smooth, bool in, bool prefix, bool array);
#endif

View File

@ -0,0 +1,228 @@
/*
* Geforce NV2A PGRAPH GLSL Shader Generator
*
* Copyright (c) 2015 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2020-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "hw/xbox/nv2a/pgraph/shaders.h"
#include "common.h"
#include "geom.h"
MString *pgraph_gen_geom_glsl(enum ShaderPolygonMode polygon_front_mode,
enum ShaderPolygonMode polygon_back_mode,
enum ShaderPrimitiveMode primitive_mode,
bool smooth_shading,
bool vulkan)
{
/* FIXME: Missing support for 2-sided-poly mode */
assert(polygon_front_mode == polygon_back_mode);
enum ShaderPolygonMode polygon_mode = polygon_front_mode;
/* POINT mode shouldn't require any special work */
if (polygon_mode == POLY_MODE_POINT) {
return NULL;
}
/* Handle LINE and FILL mode */
const char *layout_in = NULL;
const char *layout_out = NULL;
const char *body = NULL;
switch (primitive_mode) {
case PRIM_TYPE_POINTS: return NULL;
case PRIM_TYPE_LINES: return NULL;
case PRIM_TYPE_LINE_LOOP: return NULL;
case PRIM_TYPE_LINE_STRIP: return NULL;
case PRIM_TYPE_TRIANGLES:
if (polygon_mode == POLY_MODE_FILL) { return NULL; }
assert(polygon_mode == POLY_MODE_LINE);
layout_in = "layout(triangles) in;\n";
layout_out = "layout(line_strip, max_vertices = 4) out;\n";
body = " emit_vertex(0, 0);\n"
" emit_vertex(1, 0);\n"
" emit_vertex(2, 0);\n"
" emit_vertex(0, 0);\n"
" EndPrimitive();\n";
break;
case PRIM_TYPE_TRIANGLE_STRIP:
if (polygon_mode == POLY_MODE_FILL) { return NULL; }
assert(polygon_mode == POLY_MODE_LINE);
layout_in = "layout(triangles) in;\n";
layout_out = "layout(line_strip, max_vertices = 4) out;\n";
/* Imagine a quad made of a tristrip, the comments tell you which
* vertex we are using */
body = " if ((gl_PrimitiveIDIn & 1) == 0) {\n"
" if (gl_PrimitiveIDIn == 0) {\n"
" emit_vertex(0, 0);\n" /* bottom right */
" }\n"
" emit_vertex(1, 0);\n" /* top right */
" emit_vertex(2, 0);\n" /* bottom left */
" emit_vertex(0, 0);\n" /* bottom right */
" } else {\n"
" emit_vertex(2, 0);\n" /* bottom left */
" emit_vertex(1, 0);\n" /* top left */
" emit_vertex(0, 0);\n" /* top right */
" }\n"
" EndPrimitive();\n";
break;
case PRIM_TYPE_TRIANGLE_FAN:
if (polygon_mode == POLY_MODE_FILL) { return NULL; }
assert(polygon_mode == POLY_MODE_LINE);
layout_in = "layout(triangles) in;\n";
layout_out = "layout(line_strip, max_vertices = 4) out;\n";
body = " if (gl_PrimitiveIDIn == 0) {\n"
" emit_vertex(0, 0);\n"
" }\n"
" emit_vertex(1, 0);\n"
" emit_vertex(2, 0);\n"
" emit_vertex(0, 0);\n"
" EndPrimitive();\n";
break;
case PRIM_TYPE_QUADS:
layout_in = "layout(lines_adjacency) in;\n";
if (polygon_mode == POLY_MODE_LINE) {
layout_out = "layout(line_strip, max_vertices = 5) out;\n";
body = " emit_vertex(0, 3);\n"
" emit_vertex(1, 3);\n"
" emit_vertex(2, 3);\n"
" emit_vertex(3, 3);\n"
" emit_vertex(0, 3);\n"
" EndPrimitive();\n";
} else if (polygon_mode == POLY_MODE_FILL) {
layout_out = "layout(triangle_strip, max_vertices = 4) out;\n";
body = " emit_vertex(3, 3);\n"
" emit_vertex(0, 3);\n"
" emit_vertex(2, 3);\n"
" emit_vertex(1, 3);\n"
" EndPrimitive();\n";
} else {
assert(false);
return NULL;
}
break;
case PRIM_TYPE_QUAD_STRIP:
layout_in = "layout(lines_adjacency) in;\n";
if (polygon_mode == POLY_MODE_LINE) {
layout_out = "layout(line_strip, max_vertices = 5) out;\n";
body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n"
" if (gl_PrimitiveIDIn == 0) {\n"
" emit_vertex(0, 3);\n"
" }\n"
" emit_vertex(1, 3);\n"
" emit_vertex(3, 3);\n"
" emit_vertex(2, 3);\n"
" emit_vertex(0, 3);\n"
" EndPrimitive();\n";
} else if (polygon_mode == POLY_MODE_FILL) {
layout_out = "layout(triangle_strip, max_vertices = 4) out;\n";
body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n"
" emit_vertex(0, 3);\n"
" emit_vertex(1, 3);\n"
" emit_vertex(2, 3);\n"
" emit_vertex(3, 3);\n"
" EndPrimitive();\n";
} else {
assert(false);
return NULL;
}
break;
case PRIM_TYPE_POLYGON:
if (polygon_mode == POLY_MODE_LINE) {
return NULL;
}
if (polygon_mode == POLY_MODE_FILL) {
if (smooth_shading) {
return NULL;
}
layout_in = "layout(triangles) in;\n";
layout_out = "layout(triangle_strip, max_vertices = 3) out;\n";
body = " emit_vertex(0, 2);\n"
" emit_vertex(1, 2);\n"
" emit_vertex(2, 2);\n"
" EndPrimitive();\n";
} else {
assert(false);
return NULL;
}
break;
default:
assert(false);
return NULL;
}
/* generate a geometry shader to support deprecated primitive types */
assert(layout_in);
assert(layout_out);
assert(body);
MString *s = mstring_new();
mstring_append_fmt(s, "#version %d\n\n", vulkan ? 450 : 400);
mstring_append(s, layout_in);
mstring_append(s, layout_out);
mstring_append(s, "\n");
pgraph_get_glsl_vtx_header(s, vulkan, smooth_shading, true, true, true);
pgraph_get_glsl_vtx_header(s, vulkan, smooth_shading, false, false, false);
if (smooth_shading) {
mstring_append(s,
"void emit_vertex(int index, int _unused) {\n"
" gl_Position = gl_in[index].gl_Position;\n"
" gl_PointSize = gl_in[index].gl_PointSize;\n"
// " gl_ClipDistance[0] = gl_in[index].gl_ClipDistance[0];\n"
// " gl_ClipDistance[1] = gl_in[index].gl_ClipDistance[1];\n"
" vtx_inv_w = v_vtx_inv_w[index];\n"
" vtx_inv_w_flat = v_vtx_inv_w[index];\n"
" vtxD0 = v_vtxD0[index];\n"
" vtxD1 = v_vtxD1[index];\n"
" vtxB0 = v_vtxB0[index];\n"
" vtxB1 = v_vtxB1[index];\n"
" vtxFog = v_vtxFog[index];\n"
" vtxT0 = v_vtxT0[index];\n"
" vtxT1 = v_vtxT1[index];\n"
" vtxT2 = v_vtxT2[index];\n"
" vtxT3 = v_vtxT3[index];\n"
" EmitVertex();\n"
"}\n");
} else {
mstring_append(s,
"void emit_vertex(int index, int provoking_index) {\n"
" gl_Position = gl_in[index].gl_Position;\n"
" gl_PointSize = gl_in[index].gl_PointSize;\n"
// " gl_ClipDistance[0] = gl_in[index].gl_ClipDistance[0];\n"
// " gl_ClipDistance[1] = gl_in[index].gl_ClipDistance[1];\n"
" vtx_inv_w = v_vtx_inv_w[index];\n"
" vtx_inv_w_flat = v_vtx_inv_w[provoking_index];\n"
" vtxD0 = v_vtxD0[provoking_index];\n"
" vtxD1 = v_vtxD1[provoking_index];\n"
" vtxB0 = v_vtxB0[provoking_index];\n"
" vtxB1 = v_vtxB1[provoking_index];\n"
" vtxFog = v_vtxFog[index];\n"
" vtxT0 = v_vtxT0[index];\n"
" vtxT1 = v_vtxT1[index];\n"
" vtxT2 = v_vtxT2[index];\n"
" vtxT3 = v_vtxT3[index];\n"
" EmitVertex();\n"
"}\n");
}
mstring_append(s, "\n"
"void main() {\n");
mstring_append(s, body);
mstring_append(s, "}\n");
return s;
}

View File

@ -0,0 +1,34 @@
/*
* Geforce NV2A PGRAPH GLSL Shader Generator
*
* Copyright (c) 2015 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2020-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_GEOM_H
#define HW_XBOX_NV2A_PGRAPH_GLSL_GEOM_H
#include "qemu/mstring.h"
#include "hw/xbox/nv2a/pgraph/shaders.h"
MString *pgraph_gen_geom_glsl(enum ShaderPolygonMode polygon_front_mode,
enum ShaderPolygonMode polygon_back_mode,
enum ShaderPrimitiveMode primitive_mode,
bool smooth_shading,
bool vulkan);
#endif

View File

@ -0,0 +1,8 @@
specific_ss.add([files(
'common.c',
'geom.c',
'psh.c',
'vsh.c',
'vsh-ff.c',
'vsh-prog.c',
)])

View File

@ -3,7 +3,7 @@
*
* Copyright (c) 2013 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2020-2021 Matt Borgerson
* Copyright (c) 2020-2024 Matt Borgerson
*
* Based on:
* Cxbx, PixelShader.cpp
@ -34,9 +34,9 @@
#include <stdbool.h>
#include <stdint.h>
#include "qapi/qmp/qstring.h"
#include "shaders_common.h"
#include "common.h"
#include "hw/xbox/nv2a/debug.h"
#include "hw/xbox/nv2a/pgraph/psh.h"
#include "psh.h"
/*
@ -575,7 +575,7 @@ static const char* get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *s
return NULL;
case PS_TEXTUREMODES_PROJECT2D:
return state->rect_tex[i] ? sampler2DRect : sampler2D;
return (state->rect_tex[i] && !state->vulkan) ? sampler2DRect : sampler2D;
case PS_TEXTUREMODES_BUMPENVMAP:
case PS_TEXTUREMODES_BUMPENVMAP_LUM:
@ -584,12 +584,15 @@ static const char* get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *s
fprintf(stderr, "Shadow map support not implemented for mode %d\n", mode);
assert(!"Shadow map support not implemented for this mode");
}
return state->rect_tex[i] ? sampler2DRect : sampler2D;
return (state->rect_tex[i] && !state->vulkan) ? sampler2DRect : sampler2D;
case PS_TEXTUREMODES_PROJECT3D:
case PS_TEXTUREMODES_DOT_STR_3D:
if (state->tex_x8y24[i] && state->vulkan) {
return "usampler2D";
}
if (state->shadow_map[i]) {
return state->rect_tex[i] ? sampler2DRect : sampler2D;
return (state->rect_tex[i] && !state->vulkan) ? sampler2DRect : sampler2D;
}
return sampler3D;
@ -634,12 +637,28 @@ static void psh_append_shadowmap(const struct PixelShader *ps, int i, bool compa
return;
}
mstring_append_fmt(vars,
"pT%d.xy *= texScale%d;\n"
"vec4 t%d_depth = textureProj(texSamp%d, pT%d.xyw);\n",
i, i, i, i, i);
mstring_append_fmt(vars, "pT%d.xy *= texScale%d;\n", i, i);
const char *comparison = shadow_comparison_map[ps->state.shadow_depth_func];
if (ps->state.rect_tex[i] && ps->state.vulkan) {
if (ps->state.tex_x8y24[i]) {
mstring_append_fmt(
vars,
"uvec4 t%d_depth_raw = texture(texSamp%d, pT%d.xy/pT%d.w);\n", i, i, i, i);
mstring_append_fmt(
vars,
"vec4 t%d_depth = vec4(float(t%d_depth_raw.x & 0xFFFFFF), 1.0, 0.0, 0.0);",
i, i);
} else {
mstring_append_fmt(
vars,
"vec4 t%d_depth = textureLod(texSamp%d, pT%d.xy/pT%d.w, 0);\n", i,
i, i, i);
}
} else {
mstring_append_fmt(
vars, "vec4 t%d_depth = textureProj(texSamp%d, pT%d.xyw);\n", i, i,
i);
}
// Depth.y != 0 indicates 24 bit; depth.z != 0 indicates float.
if (compare_z) {
@ -685,18 +704,69 @@ static void apply_border_adjustment(const struct PixelShader *ps, MString *vars,
var_name, var_name, i, ps->state.border_inv_real_size[i][0], ps->state.border_inv_real_size[i][1], ps->state.border_inv_real_size[i][2]);
}
static void apply_convolution_filter(const struct PixelShader *ps, MString *vars, int tex)
{
// FIXME: Convolution for 2D textures
// FIXME: Quincunx
assert(ps->state.rect_tex[tex]);
if (ps->state.vulkan) {
mstring_append_fmt(vars,
"vec4 t%d = vec4(0.0);\n"
"for (int i = 0; i < 9; i++) {\n"
" vec2 texCoord = pT%d.xy/pT%d.w + convolution3x3[i];\n"
" t%d += textureLod(texSamp%d, texCoord, 0) * gaussian3x3[i];\n"
"}\n", tex, tex, tex, tex, tex);
} else {
mstring_append_fmt(vars,
"vec4 t%d = vec4(0.0);\n"
"for (int i = 0; i < 9; i++) {\n"
" vec3 texCoord = pT%d.xyw + vec3(convolution3x3[i], 0);\n"
" t%d += textureProj(texSamp%d, texCoord) * gaussian3x3[i];\n"
"}\n", tex, tex, tex, tex, tex);
}
}
static MString* psh_convert(struct PixelShader *ps)
{
int i;
const char *u = ps->state.vulkan ? "" : "uniform "; // FIXME: Remove
MString *preflight = mstring_new();
mstring_append(preflight, ps->state.smooth_shading ?
STRUCT_VERTEX_DATA_IN_SMOOTH :
STRUCT_VERTEX_DATA_IN_FLAT);
mstring_append(preflight, "\n");
mstring_append(preflight, "out vec4 fragColor;\n");
mstring_append(preflight, "\n");
mstring_append(preflight, "uniform vec4 fogColor;\n");
pgraph_get_glsl_vtx_header(preflight, ps->state.vulkan,
ps->state.smooth_shading, true, false, false);
if (ps->state.vulkan) {
mstring_append_fmt(preflight,
"layout(location = 0) out vec4 fragColor;\n"
"layout(binding = %d, std140) uniform PshUniforms {\n", PSH_UBO_BINDING);
} else {
mstring_append_fmt(preflight,
"layout(location = 0) out vec4 fragColor;\n");
}
mstring_append_fmt(preflight, "%sfloat alphaRef;\n"
"%svec4 fogColor;\n"
"%sivec4 clipRegion[8];\n",
u, u, u);
for (int i = 0; i < 4; i++) {
mstring_append_fmt(preflight, "%smat2 bumpMat%d;\n"
"%sfloat bumpScale%d;\n"
"%sfloat bumpOffset%d;\n"
"%sfloat texScale%d;\n",
u, i, u, i, u, i, u, i);
}
for (int i = 0; i < 9; i++) {
for (int j = 0; j < 2; j++) {
mstring_append_fmt(preflight, "%svec4 c%d_%d;\n", u, j, i);
}
}
if (ps->state.vulkan) {
mstring_append(preflight, "};\n");
}
const char *dotmap_funcs[] = {
"dotmap_zero_to_one",
@ -766,22 +836,12 @@ static MString* psh_convert(struct PixelShader *ps)
" vec2(-1.0,-1.0),vec2(0.0,-1.0),vec2(1.0,-1.0),\n"
" vec2(-1.0, 0.0),vec2(0.0, 0.0),vec2(1.0, 0.0),\n"
" vec2(-1.0, 1.0),vec2(0.0, 1.0),vec2(1.0, 1.0));\n"
"vec4 gaussianFilter2DRectProj(sampler2DRect sampler, vec3 texCoord) {\n"
" vec4 sum = vec4(0.0);\n"
" for (int i = 0; i < 9; i++) {\n"
" sum += gaussian3x3[i]*textureProj(sampler,\n"
" texCoord + vec3(convolution3x3[i], 0.0));\n"
" }\n"
" return sum;\n"
"}\n"
);
/* Window Clipping */
MString *clip = mstring_new();
mstring_append(preflight, "uniform ivec4 clipRegion[8];\n");
mstring_append_fmt(clip, "/* Window-clip (%s) */\n",
ps->state.window_clip_exclusive ?
"Exclusive" : "Inclusive");
mstring_append_fmt(clip, "/* Window-clip (%slusive) */\n",
ps->state.window_clip_exclusive ? "Exc" : "Inc");
if (!ps->state.window_clip_exclusive) {
mstring_append(clip, "bool clipContained = false;\n");
}
@ -856,23 +916,27 @@ static MString* psh_convert(struct PixelShader *ps)
if (ps->state.shadow_map[i]) {
psh_append_shadowmap(ps, i, false, vars);
} else {
const char *lookup = "textureProj";
if ((ps->state.conv_tex[i] == CONVOLUTION_FILTER_GAUSSIAN)
|| (ps->state.conv_tex[i] == CONVOLUTION_FILTER_QUINCUNX)) {
/* FIXME: Quincunx looks better than Linear and costs less than
* Gaussian, but Gaussian should be plenty fast so use it for
* now.
*/
if (ps->state.rect_tex[i]) {
lookup = "gaussianFilter2DRectProj";
} else {
NV2A_UNIMPLEMENTED("Convolution for 2D textures");
}
}
apply_border_adjustment(ps, vars, i, "pT%d");
mstring_append_fmt(vars, "pT%d.xy = texScale%d * pT%d.xy;\n", i, i, i);
mstring_append_fmt(vars, "vec4 t%d = %s(texSamp%d, pT%d.xyw);\n",
i, lookup, i, i);
if (ps->state.rect_tex[i]) {
if ((ps->state.conv_tex[i] ==
CONVOLUTION_FILTER_GAUSSIAN) ||
(ps->state.conv_tex[i] ==
CONVOLUTION_FILTER_QUINCUNX)) {
apply_convolution_filter(ps, vars, i);
} else {
if (ps->state.vulkan) {
mstring_append_fmt(vars, "vec4 t%d = textureLod(texSamp%d, pT%d.xy/pT%d.w, 0);\n",
i, i, i, i);
} else {
mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, pT%d.xyw);\n",
i, i, i);
}
}
} else {
mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, pT%d.xyw);\n",
i, i, i);
}
}
break;
}
@ -880,6 +944,7 @@ static MString* psh_convert(struct PixelShader *ps)
if (ps->state.shadow_map[i]) {
psh_append_shadowmap(ps, i, true, vars);
} else {
assert(!ps->state.rect_tex[i]);
apply_border_adjustment(ps, vars, i, "pT%d");
mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, pT%d.xyzw);\n",
i, i, i);
@ -906,7 +971,6 @@ static MString* psh_convert(struct PixelShader *ps)
}
case PS_TEXTUREMODES_BUMPENVMAP:
assert(i >= 1);
mstring_append_fmt(preflight, "uniform mat2 bumpMat%d;\n", i);
if (ps->state.snorm_tex[ps->input_tex[i]]) {
/* Input color channels already signed (FIXME: May not always want signed textures in this case) */
@ -925,9 +989,6 @@ static MString* psh_convert(struct PixelShader *ps)
break;
case PS_TEXTUREMODES_BUMPENVMAP_LUM:
assert(i >= 1);
mstring_append_fmt(preflight, "uniform float bumpScale%d;\n", i);
mstring_append_fmt(preflight, "uniform float bumpOffset%d;\n", i);
mstring_append_fmt(preflight, "uniform mat2 bumpMat%d;\n", i);
if (ps->state.snorm_tex[ps->input_tex[i]]) {
/* Input color channels already signed (FIXME: May not always want signed textures in this case) */
@ -1060,8 +1121,10 @@ static MString* psh_convert(struct PixelShader *ps)
break;
}
mstring_append_fmt(preflight, "uniform float texScale%d;\n", i);
if (sampler_type != NULL) {
if (ps->state.vulkan) {
mstring_append_fmt(preflight, "layout(binding = %d) ", PSH_TEX_BINDING + i);
}
mstring_append_fmt(preflight, "uniform %s texSamp%d;\n", sampler_type, i);
/* As this means a texture fetch does happen, do alphakill */
@ -1091,7 +1154,6 @@ static MString* psh_convert(struct PixelShader *ps)
}
if (ps->state.alpha_test && ps->state.alpha_func != ALPHA_FUNC_ALWAYS) {
mstring_append_fmt(preflight, "uniform float alphaRef;\n");
if (ps->state.alpha_func == ALPHA_FUNC_NEVER) {
mstring_append(ps->code, "discard;\n");
} else {
@ -1112,10 +1174,6 @@ static MString* psh_convert(struct PixelShader *ps)
}
}
for (i = 0; i < ps->num_const_refs; i++) {
mstring_append_fmt(preflight, "uniform vec4 %s;\n", ps->const_refs[i]);
}
for (i = 0; i < ps->num_var_refs; i++) {
mstring_append_fmt(vars, "vec4 %s;\n", ps->var_refs[i]);
if (strcmp(ps->var_refs[i], "r0") == 0) {
@ -1128,7 +1186,7 @@ static MString* psh_convert(struct PixelShader *ps)
}
MString *final = mstring_new();
mstring_append(final, "#version 330\n\n");
mstring_append_fmt(final, "#version %d\n\n", ps->state.vulkan ? 450 : 400);
mstring_append(final, mstring_get_str(preflight));
mstring_append(final, "void main() {\n");
mstring_append(final, mstring_get_str(clip));
@ -1175,7 +1233,7 @@ static void parse_combiner_output(uint32_t value, struct OutputInfo *out)
out->cd_alphablue = flags & 0x40;
}
MString *psh_translate(const PshState state)
MString *pgraph_gen_psh_glsl(const PshState state)
{
int i;
struct PixelShader ps;

View File

@ -0,0 +1,41 @@
/*
* Geforce NV2A PGRAPH GLSL Shader Generator
*
* Copyright (c) 2013 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2020-2024 Matt Borgerson
*
* Based on:
* Cxbx, PixelShader.cpp
* Copyright (c) 2004 Aaron Robinson <caustik@caustik.com>
* Kingofc <kingofc@freenet.de>
* Xeon, XBD3DPixelShader.cpp
* Copyright (c) 2003 _SF_
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 or
* (at your option) version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_PSH_H
#define HW_XBOX_NV2A_PGRAPH_GLSL_PSH_H
#include "qemu/mstring.h"
#include "hw/xbox/nv2a/pgraph/shaders.h"
// FIXME: Move to struct
#define PSH_UBO_BINDING 1
#define PSH_TEX_BINDING 2
MString *pgraph_gen_psh_glsl(const PshState state);
#endif

View File

@ -0,0 +1,497 @@
/*
* Geforce NV2A PGRAPH GLSL Shader Generator
*
* Copyright (c) 2015 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2020-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "hw/xbox/nv2a/pgraph/shaders.h"
#include "common.h"
#include "vsh-ff.h"
static void append_skinning_code(MString* str, bool mix,
unsigned int count, const char* type,
const char* output, const char* input,
const char* matrix, const char* swizzle);
void pgraph_gen_vsh_ff_glsl(const ShaderState *state, MString *header,
MString *body, MString *uniforms)
{
int i, j;
const char *u = state->vulkan ? "" : "uniform "; // FIXME: Remove
/* generate vertex shader mimicking fixed function */
mstring_append(header,
"#define position v0\n"
"#define weight v1\n"
"#define normal v2.xyz\n"
"#define diffuse v3\n"
"#define specular v4\n"
"#define fogCoord v5.x\n"
"#define pointSize v6\n"
"#define backDiffuse v7\n"
"#define backSpecular v8\n"
"#define texture0 v9\n"
"#define texture1 v10\n"
"#define texture2 v11\n"
"#define texture3 v12\n"
"#define reserved1 v13\n"
"#define reserved2 v14\n"
"#define reserved3 v15\n"
"\n");
mstring_append_fmt(uniforms,
"%svec4 ltctxa[" stringify(NV2A_LTCTXA_COUNT) "];\n"
"%svec4 ltctxb[" stringify(NV2A_LTCTXB_COUNT) "];\n"
"%svec4 ltc1[" stringify(NV2A_LTC1_COUNT) "];\n", u, u, u
);
mstring_append(header,
"\n"
GLSL_DEFINE(projectionMat, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_PMAT0))
GLSL_DEFINE(compositeMat, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_CMAT0))
"\n"
GLSL_DEFINE(texPlaneS0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 0))
GLSL_DEFINE(texPlaneT0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 1))
GLSL_DEFINE(texPlaneR0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 2))
GLSL_DEFINE(texPlaneQ0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 3))
"\n"
GLSL_DEFINE(texPlaneS1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 0))
GLSL_DEFINE(texPlaneT1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 1))
GLSL_DEFINE(texPlaneR1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 2))
GLSL_DEFINE(texPlaneQ1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 3))
"\n"
GLSL_DEFINE(texPlaneS2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 0))
GLSL_DEFINE(texPlaneT2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 1))
GLSL_DEFINE(texPlaneR2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 2))
GLSL_DEFINE(texPlaneQ2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 3))
"\n"
GLSL_DEFINE(texPlaneS3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 0))
GLSL_DEFINE(texPlaneT3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 1))
GLSL_DEFINE(texPlaneR3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 2))
GLSL_DEFINE(texPlaneQ3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 3))
"\n"
GLSL_DEFINE(modelViewMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT0))
GLSL_DEFINE(modelViewMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT1))
GLSL_DEFINE(modelViewMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT2))
GLSL_DEFINE(modelViewMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT3))
"\n"
GLSL_DEFINE(invModelViewMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT0))
GLSL_DEFINE(invModelViewMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT1))
GLSL_DEFINE(invModelViewMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT2))
GLSL_DEFINE(invModelViewMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT3))
"\n"
GLSL_DEFINE(eyePosition, GLSL_C(NV_IGRAPH_XF_XFCTX_EYEP))
"\n"
"#define lightAmbientColor(i) "
"ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_AMB) " + (i)*6].xyz\n"
"#define lightDiffuseColor(i) "
"ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_DIF) " + (i)*6].xyz\n"
"#define lightSpecularColor(i) "
"ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_SPC) " + (i)*6].xyz\n"
"\n"
"#define lightSpotFalloff(i) "
"ltctxa[" stringify(NV_IGRAPH_XF_LTCTXA_L0_K) " + (i)*2].xyz\n"
"#define lightSpotDirection(i) "
"ltctxa[" stringify(NV_IGRAPH_XF_LTCTXA_L0_SPT) " + (i)*2]\n"
"\n"
"#define lightLocalRange(i) "
"ltc1[" stringify(NV_IGRAPH_XF_LTC1_r0) " + (i)].x\n"
"\n"
GLSL_DEFINE(sceneAmbientColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_FR_AMB) ".xyz")
GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz")
"\n"
);
mstring_append_fmt(uniforms,
"%smat4 invViewport;\n", u);
/* Skinning */
unsigned int count;
bool mix;
switch (state->skinning) {
case SKINNING_OFF:
mix = false; count = 0; break;
case SKINNING_1WEIGHTS:
mix = true; count = 2; break;
case SKINNING_2WEIGHTS2MATRICES:
mix = false; count = 2; break;
case SKINNING_2WEIGHTS:
mix = true; count = 3; break;
case SKINNING_3WEIGHTS3MATRICES:
mix = false; count = 3; break;
case SKINNING_3WEIGHTS:
mix = true; count = 4; break;
case SKINNING_4WEIGHTS4MATRICES:
mix = false; count = 4; break;
default:
assert(false);
break;
}
mstring_append_fmt(body, "/* Skinning mode %d */\n",
state->skinning);
append_skinning_code(body, mix, count, "vec4",
"tPosition", "position",
"modelViewMat", "xyzw");
append_skinning_code(body, mix, count, "vec3",
"tNormal", "vec4(normal, 0.0)",
"invModelViewMat", "xyz");
/* Normalization */
if (state->normalization) {
mstring_append(body, "tNormal = normalize(tNormal);\n");
}
/* Texgen */
for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
mstring_append_fmt(body, "/* Texgen for stage %d */\n",
i);
/* Set each component individually */
/* FIXME: could be nicer if some channels share the same texgen */
for (j = 0; j < 4; j++) {
/* TODO: TexGen View Model missing! */
char c = "xyzw"[j];
char cSuffix = "STRQ"[j];
switch (state->texgen[i][j]) {
case TEXGEN_DISABLE:
mstring_append_fmt(body, "oT%d.%c = texture%d.%c;\n",
i, c, i, c);
break;
case TEXGEN_EYE_LINEAR:
mstring_append_fmt(body, "oT%d.%c = dot(texPlane%c%d, tPosition);\n",
i, c, cSuffix, i);
break;
case TEXGEN_OBJECT_LINEAR:
mstring_append_fmt(body, "oT%d.%c = dot(texPlane%c%d, position);\n",
i, c, cSuffix, i);
break;
case TEXGEN_SPHERE_MAP:
assert(j < 2); /* Channels S,T only! */
mstring_append(body, "{\n");
/* FIXME: u, r and m only have to be calculated once */
mstring_append(body, " vec3 u = normalize(tPosition.xyz);\n");
//FIXME: tNormal before or after normalization? Always normalize?
mstring_append(body, " vec3 r = reflect(u, tNormal);\n");
/* FIXME: This would consume 1 division fewer and *might* be
* faster than length:
* // [z=1/(2*x) => z=1/x*0.5]
* vec3 ro = r + vec3(0.0, 0.0, 1.0);
* float m = inversesqrt(dot(ro,ro))*0.5;
*/
mstring_append(body, " float invM = 1.0 / (2.0 * length(r + vec3(0.0, 0.0, 1.0)));\n");
mstring_append_fmt(body, " oT%d.%c = r.%c * invM + 0.5;\n",
i, c, c);
mstring_append(body, "}\n");
break;
case TEXGEN_REFLECTION_MAP:
assert(j < 3); /* Channels S,T,R only! */
mstring_append(body, "{\n");
/* FIXME: u and r only have to be calculated once, can share the one from SPHERE_MAP */
mstring_append(body, " vec3 u = normalize(tPosition.xyz);\n");
mstring_append(body, " vec3 r = reflect(u, tNormal);\n");
mstring_append_fmt(body, " oT%d.%c = r.%c;\n",
i, c, c);
mstring_append(body, "}\n");
break;
case TEXGEN_NORMAL_MAP:
assert(j < 3); /* Channels S,T,R only! */
mstring_append_fmt(body, "oT%d.%c = tNormal.%c;\n",
i, c, c);
break;
default:
assert(false);
break;
}
}
}
/* Apply texture matrices */
for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
if (state->texture_matrix_enable[i]) {
mstring_append_fmt(body,
"oT%d = oT%d * texMat%d;\n",
i, i, i);
}
}
/* Lighting */
if (state->lighting) {
//FIXME: Do 2 passes if we want 2 sided-lighting?
static char alpha_source_diffuse[] = "diffuse.a";
static char alpha_source_specular[] = "specular.a";
static char alpha_source_material[] = "material_alpha";
const char *alpha_source = alpha_source_diffuse;
if (state->diffuse_src == MATERIAL_COLOR_SRC_MATERIAL) {
mstring_append_fmt(uniforms, "%sfloat material_alpha;\n", u);
alpha_source = alpha_source_material;
} else if (state->diffuse_src == MATERIAL_COLOR_SRC_SPECULAR) {
alpha_source = alpha_source_specular;
}
if (state->ambient_src == MATERIAL_COLOR_SRC_MATERIAL) {
mstring_append_fmt(body, "oD0 = vec4(sceneAmbientColor, %s);\n", alpha_source);
} else if (state->ambient_src == MATERIAL_COLOR_SRC_DIFFUSE) {
mstring_append_fmt(body, "oD0 = vec4(diffuse.rgb, %s);\n", alpha_source);
} else if (state->ambient_src == MATERIAL_COLOR_SRC_SPECULAR) {
mstring_append_fmt(body, "oD0 = vec4(specular.rgb, %s);\n", alpha_source);
}
mstring_append(body, "oD0.rgb *= materialEmissionColor.rgb;\n");
if (state->emission_src == MATERIAL_COLOR_SRC_MATERIAL) {
mstring_append(body, "oD0.rgb += sceneAmbientColor;\n");
} else if (state->emission_src == MATERIAL_COLOR_SRC_DIFFUSE) {
mstring_append(body, "oD0.rgb += diffuse.rgb;\n");
} else if (state->emission_src == MATERIAL_COLOR_SRC_SPECULAR) {
mstring_append(body, "oD0.rgb += specular.rgb;\n");
}
mstring_append(body, "oD1 = vec4(0.0, 0.0, 0.0, specular.a);\n");
for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
if (state->light[i] == LIGHT_OFF) {
continue;
}
/* FIXME: It seems that we only have to handle the surface colors if
* they are not part of the material [= vertex colors].
* If they are material the cpu will premultiply light
* colors
*/
mstring_append_fmt(body, "/* Light %d */ {\n", i);
if (state->light[i] == LIGHT_LOCAL
|| state->light[i] == LIGHT_SPOT) {
mstring_append_fmt(uniforms,
"%svec3 lightLocalPosition%d;\n"
"%svec3 lightLocalAttenuation%d;\n",
u, i, u, i);
mstring_append_fmt(body,
" vec3 VP = lightLocalPosition%d - tPosition.xyz/tPosition.w;\n"
" float d = length(VP);\n"
//FIXME: if (d > lightLocalRange) { .. don't process this light .. } /* inclusive?! */ - what about directional lights?
" VP = normalize(VP);\n"
" float attenuation = 1.0 / (lightLocalAttenuation%d.x\n"
" + lightLocalAttenuation%d.y * d\n"
" + lightLocalAttenuation%d.z * d * d);\n"
" vec3 halfVector = normalize(VP + eyePosition.xyz / eyePosition.w);\n" /* FIXME: Not sure if eyePosition is correct */
" float nDotVP = max(0.0, dot(tNormal, VP));\n"
" float nDotHV = max(0.0, dot(tNormal, halfVector));\n",
i, i, i, i);
}
switch(state->light[i]) {
case LIGHT_INFINITE:
/* lightLocalRange will be 1e+30 here */
mstring_append_fmt(uniforms,
"%svec3 lightInfiniteHalfVector%d;\n"
"%svec3 lightInfiniteDirection%d;\n",
u, i, u, i);
mstring_append_fmt(body,
" float attenuation = 1.0;\n"
" float nDotVP = max(0.0, dot(tNormal, normalize(vec3(lightInfiniteDirection%d))));\n"
" float nDotHV = max(0.0, dot(tNormal, vec3(lightInfiniteHalfVector%d)));\n",
i, i);
/* FIXME: Do specular */
/* FIXME: tBackDiffuse */
break;
case LIGHT_LOCAL:
/* Everything done already */
break;
case LIGHT_SPOT:
/* https://docs.microsoft.com/en-us/windows/win32/direct3d9/attenuation-and-spotlight-factor#spotlight-factor */
mstring_append_fmt(body,
" vec4 spotDir = lightSpotDirection(%d);\n"
" float invScale = 1/length(spotDir.xyz);\n"
" float cosHalfPhi = -invScale*spotDir.w;\n"
" float cosHalfTheta = invScale + cosHalfPhi;\n"
" float spotDirDotVP = dot(spotDir.xyz, VP);\n"
" float rho = invScale*spotDirDotVP;\n"
" if (rho > cosHalfTheta) {\n"
" } else if (rho <= cosHalfPhi) {\n"
" attenuation = 0.0;\n"
" } else {\n"
" attenuation *= spotDirDotVP + spotDir.w;\n" /* FIXME: lightSpotFalloff */
" }\n",
i);
break;
default:
assert(false);
break;
}
mstring_append_fmt(body,
" float pf;\n"
" if (nDotVP == 0.0) {\n"
" pf = 0.0;\n"
" } else {\n"
" pf = pow(nDotHV, /* specular(l, m, n, l1, m1, n1) */ 0.001);\n"
" }\n"
" vec3 lightAmbient = lightAmbientColor(%d) * attenuation;\n"
" vec3 lightDiffuse = lightDiffuseColor(%d) * attenuation * nDotVP;\n"
" vec3 lightSpecular = lightSpecularColor(%d) * pf;\n",
i, i, i);
mstring_append(body,
" oD0.xyz += lightAmbient;\n");
switch (state->diffuse_src) {
case MATERIAL_COLOR_SRC_MATERIAL:
mstring_append(body,
" oD0.xyz += lightDiffuse;\n");
break;
case MATERIAL_COLOR_SRC_DIFFUSE:
mstring_append(body,
" oD0.xyz += diffuse.xyz * lightDiffuse;\n");
break;
case MATERIAL_COLOR_SRC_SPECULAR:
mstring_append(body,
" oD0.xyz += specular.xyz * lightDiffuse;\n");
break;
}
mstring_append(body,
" oD1.xyz += specular.xyz * lightSpecular;\n");
mstring_append(body, "}\n");
}
} else {
mstring_append(body, " oD0 = diffuse;\n");
mstring_append(body, " oD1 = specular;\n");
}
mstring_append(body, " oB0 = backDiffuse;\n");
mstring_append(body, " oB1 = backSpecular;\n");
/* Fog */
if (state->fog_enable) {
/* From: https://www.opengl.org/registry/specs/NV/fog_distance.txt */
switch(state->foggen) {
case FOGGEN_SPEC_ALPHA:
/* FIXME: Do we have to clamp here? */
mstring_append(body, " float fogDistance = clamp(specular.a, 0.0, 1.0);\n");
break;
case FOGGEN_RADIAL:
mstring_append(body, " float fogDistance = length(tPosition.xyz);\n");
break;
case FOGGEN_PLANAR:
case FOGGEN_ABS_PLANAR:
mstring_append(body, " float fogDistance = dot(fogPlane.xyz, tPosition.xyz) + fogPlane.w;\n");
if (state->foggen == FOGGEN_ABS_PLANAR) {
mstring_append(body, " fogDistance = abs(fogDistance);\n");
}
break;
case FOGGEN_FOG_X:
mstring_append(body, " float fogDistance = fogCoord;\n");
break;
default:
assert(false);
break;
}
}
/* If skinning is off the composite matrix already includes the MV matrix */
if (state->skinning == SKINNING_OFF) {
mstring_append(body, " tPosition = position;\n");
}
mstring_append(body,
" oPos = invViewport * (tPosition * compositeMat);\n"
);
if (state->vulkan) {
mstring_append(body, " oPos.y *= -1;\n");
} else {
mstring_append(body, " oPos.z = oPos.z * 2.0 - oPos.w;\n");
}
/* FIXME: Testing */
if (state->point_params_enable) {
mstring_append_fmt(
body,
" float d_e = length(position * modelViewMat0);\n"
" oPts.x = 1/sqrt(%f + %f*d_e + %f*d_e*d_e) + %f;\n",
state->point_params[0], state->point_params[1], state->point_params[2],
state->point_params[6]);
mstring_append_fmt(body, " oPts.x = min(oPts.x*%f + %f, 64.0) * %d;\n",
state->point_params[3], state->point_params[7],
state->surface_scale_factor);
} else {
mstring_append_fmt(body, " oPts.x = %f * %d;\n", state->point_size,
state->surface_scale_factor);
}
mstring_append(body,
" if (oPos.w == 0.0 || isinf(oPos.w)) {\n"
" vtx_inv_w = 1.0;\n"
" } else {\n"
" vtx_inv_w = 1.0 / oPos.w;\n"
" }\n"
" vtx_inv_w_flat = vtx_inv_w;\n");
}
static void append_skinning_code(MString* str, bool mix,
unsigned int count, const char* type,
const char* output, const char* input,
const char* matrix, const char* swizzle)
{
if (count == 0) {
mstring_append_fmt(str, "%s %s = (%s * %s0).%s;\n",
type, output, input, matrix, swizzle);
} else {
mstring_append_fmt(str, "%s %s = %s(0.0);\n", type, output, type);
if (mix) {
/* Generated final weight (like GL_WEIGHT_SUM_UNITY_ARB) */
mstring_append(str, "{\n"
" float weight_i;\n"
" float weight_n = 1.0;\n");
int i;
for (i = 0; i < count; i++) {
if (i < (count - 1)) {
char c = "xyzw"[i];
mstring_append_fmt(str, " weight_i = weight.%c;\n"
" weight_n -= weight_i;\n",
c);
} else {
mstring_append(str, " weight_i = weight_n;\n");
}
mstring_append_fmt(str, " %s += (%s * %s%d).%s * weight_i;\n",
output, input, matrix, i, swizzle);
}
mstring_append(str, "}\n");
} else {
/* Individual weights */
int i;
for (i = 0; i < count; i++) {
char c = "xyzw"[i];
mstring_append_fmt(str, "%s += (%s * %s%d).%s * weight.%c;\n",
output, input, matrix, i, swizzle, c);
}
}
}
}

View File

@ -0,0 +1,31 @@
/*
* Geforce NV2A PGRAPH GLSL Shader Generator
*
* Copyright (c) 2015 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2020-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_VSH_FF_H
#define HW_XBOX_NV2A_PGRAPH_GLSL_VSH_FF_H
#include "qemu/mstring.h"
#include "hw/xbox/nv2a/pgraph/shaders.h"
void pgraph_gen_vsh_ff_glsl(const ShaderState *state, MString *header,
MString *body, MString *uniforms);
#endif

View File

@ -1,5 +1,5 @@
/*
* QEMU Geforce NV2A vertex shader translation
* Geforce NV2A PGRAPH GLSL Shader Generator
*
* Copyright (c) 2014 Jannik Vogel
* Copyright (c) 2012 espes
@ -32,8 +32,9 @@
#include <stdbool.h>
#include <assert.h>
#include "shaders_common.h"
#include "vsh.h"
#include "hw/xbox/nv2a/pgraph/vsh.h"
#include "common.h"
#include "vsh-prog.h"
#define VSH_D3DSCM_CORRECTION 96
@ -794,10 +795,11 @@ static const char* vsh_header =
" return t;\n"
"}\n";
void vsh_translate(uint16_t version,
void pgraph_gen_vsh_prog_glsl(uint16_t version,
const uint32_t *tokens,
unsigned int length,
bool z_perspective,
bool vulkan,
MString *header, MString *body)
{
@ -843,14 +845,30 @@ void vsh_translate(uint16_t version,
* TODO: the pixel-center co-ordinate differences should handled
*/
" oPos.x = 2.0 * (oPos.x - surfaceSize.x * 0.5) / surfaceSize.x;\n"
" oPos.y = -2.0 * (oPos.y - surfaceSize.y * 0.5) / surfaceSize.y;\n"
);
);
if (vulkan) {
mstring_append(body,
" oPos.y = 2.0 * oPos.y / surfaceSize.y - 1.0;\n");
} else {
mstring_append(body, " oPos.y = -2.0 * (oPos.y - surfaceSize.y * 0.5) "
"/ surfaceSize.y;\n");
}
if (z_perspective) {
mstring_append(body, " oPos.z = oPos.w;\n");
}
mstring_append(body,
" if (clipRange.y != clipRange.x) {\n");
if (vulkan) {
mstring_append(body, " oPos.z /= clipRange.y;\n");
} else {
mstring_append(body,
" oPos.z = (oPos.z - clipRange.x)/(0.5*(clipRange.y "
"- clipRange.x)) - 1;\n");
}
mstring_append(body,
" if (clipRange.y != clipRange.x) {\n"
" oPos.z = (oPos.z - clipRange.x)/(0.5*(clipRange.y - clipRange.x)) - 1;\n"
" }\n"
/* Correct for the perspective divide */

View File

@ -0,0 +1,35 @@
/*
* Geforce NV2A PGRAPH GLSL Shader Generator
*
* Copyright (c) 2014 Jannik Vogel
* Copyright (c) 2012 espes
*
* Based on:
* Cxbx, VertexShader.cpp
* Copyright (c) 2004 Aaron Robinson <caustik@caustik.com>
* Kingofc <kingofc@freenet.de>
* Dxbx, uPushBuffer.pas
* Copyright (c) 2007 Shadow_tj, PatrickvL
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 or
* (at your option) version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_VSH_PROG_H
#define HW_XBOX_NV2A_PGRAPH_GLSL_VSH_PROG_H
void pgraph_gen_vsh_prog_glsl(uint16_t version, const uint32_t *tokens,
unsigned int length, bool z_perspective,
bool vulkan, MString *header, MString *body);
#endif

View File

@ -0,0 +1,274 @@
/*
* Geforce NV2A PGRAPH GLSL Shader Generator
*
* Copyright (c) 2015 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2020-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "hw/xbox/nv2a/pgraph/shaders.h"
#include "common.h"
#include "vsh.h"
#include "vsh-ff.h"
#include "vsh-prog.h"
#include <stdbool.h>
MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs)
{
int i;
MString *output = mstring_new();
mstring_append_fmt(output, "#version %d\n\n", state->vulkan ? 450 : 400);
MString *header = mstring_from_str("");
MString *uniforms = mstring_from_str("");
const char *u = state->vulkan ? "" : "uniform "; // FIXME: Remove
mstring_append_fmt(uniforms,
"%svec4 clipRange;\n"
"%svec2 surfaceSize;\n"
"%svec4 c[" stringify(NV2A_VERTEXSHADER_CONSTANTS) "];\n"
"%svec2 fogParam;\n",
u, u, u, u
);
mstring_append(header,
GLSL_DEFINE(fogPlane, GLSL_C(NV_IGRAPH_XF_XFCTX_FOG))
GLSL_DEFINE(texMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T0MAT))
GLSL_DEFINE(texMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T1MAT))
GLSL_DEFINE(texMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T2MAT))
GLSL_DEFINE(texMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T3MAT))
"\n"
"vec4 oPos = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 oD0 = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 oD1 = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 oB0 = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 oB1 = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 oPts = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 oFog = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 oT0 = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 oT1 = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 oT2 = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 oT3 = vec4(0.0,0.0,0.0,1.0);\n"
"\n"
"vec4 decompress_11_11_10(int cmp) {\n"
" float x = float(bitfieldExtract(cmp, 0, 11)) / 1023.0;\n"
" float y = float(bitfieldExtract(cmp, 11, 11)) / 1023.0;\n"
" float z = float(bitfieldExtract(cmp, 22, 10)) / 511.0;\n"
" return vec4(x, y, z, 1);\n"
"}\n");
pgraph_get_glsl_vtx_header(header, state->vulkan, state->smooth_shading,
false, prefix_outputs, false);
if (prefix_outputs) {
mstring_append(header,
"#define vtx_inv_w v_vtx_inv_w\n"
"#define vtx_inv_w_flat v_vtx_inv_w_flat\n"
"#define vtxD0 v_vtxD0\n"
"#define vtxD1 v_vtxD1\n"
"#define vtxB0 v_vtxB0\n"
"#define vtxB1 v_vtxB1\n"
"#define vtxFog v_vtxFog\n"
"#define vtxT0 v_vtxT0\n"
"#define vtxT1 v_vtxT1\n"
"#define vtxT2 v_vtxT2\n"
"#define vtxT3 v_vtxT3\n"
);
}
mstring_append(header, "\n");
for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
bool is_uniform = state->uniform_attrs & (1 << i);
bool is_compressed = state->compressed_attrs & (1 << i);
assert(!(is_uniform && is_compressed));
if (is_uniform) {
mstring_append_fmt(header, "vec4 v%d = inlineValue[%d];\n", i, i);
} else {
if (state->compressed_attrs & (1 << i)) {
mstring_append_fmt(header,
"layout(location = %d) in int v%d_cmp;\n", i, i);
} else if (state->swizzle_attrs & (1 << i)) {
mstring_append_fmt(header, "layout(location = %d) in vec4 v%d_sw;\n",
i, i);
} else {
mstring_append_fmt(header, "layout(location = %d) in vec4 v%d;\n",
i, i);
}
}
}
mstring_append(header, "\n");
MString *body = mstring_from_str("void main() {\n");
for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
if (state->compressed_attrs & (1 << i)) {
mstring_append_fmt(
body, "vec4 v%d = decompress_11_11_10(v%d_cmp);\n", i, i);
}
if (state->swizzle_attrs & (1 << i)) {
mstring_append_fmt(body, "vec4 v%d = v%d_sw.bgra;\n", i, i);
}
}
if (state->fixed_function) {
pgraph_gen_vsh_ff_glsl(state, header, body, uniforms);
} else if (state->vertex_program) {
pgraph_gen_vsh_prog_glsl(VSH_VERSION_XVS,
(uint32_t *)state->program_data,
state->program_length, state->z_perspective,
state->vulkan, header, body);
} else {
assert(false);
}
/* Fog */
if (state->fog_enable) {
if (state->vertex_program) {
/* FIXME: Does foggen do something here? Let's do some tracking..
*
* "RollerCoaster Tycoon" has
* state->vertex_program = true; state->foggen == FOGGEN_PLANAR
* but expects oFog.x as fogdistance?! Writes oFog.xyzw = v0.z
*/
mstring_append(body, " float fogDistance = oFog.x;\n");
}
/* FIXME: Do this per pixel? */
switch (state->fog_mode) {
case FOG_MODE_LINEAR:
case FOG_MODE_LINEAR_ABS:
/* f = (end - d) / (end - start)
* fogParam.y = -1 / (end - start)
* fogParam.x = 1 - end * fogParam.y;
*/
mstring_append(body,
" if (isinf(fogDistance)) {\n"
" fogDistance = 0.0;\n"
" }\n"
);
mstring_append(body, " float fogFactor = fogParam.x + fogDistance * fogParam.y;\n");
mstring_append(body, " fogFactor -= 1.0;\n");
break;
case FOG_MODE_EXP:
mstring_append(body,
" if (isinf(fogDistance)) {\n"
" fogDistance = 0.0;\n"
" }\n"
);
/* fallthru */
case FOG_MODE_EXP_ABS:
/* f = 1 / (e^(d * density))
* fogParam.y = -density / (2 * ln(256))
* fogParam.x = 1.5
*/
mstring_append(body, " float fogFactor = fogParam.x + exp2(fogDistance * fogParam.y * 16.0);\n");
mstring_append(body, " fogFactor -= 1.5;\n");
break;
case FOG_MODE_EXP2:
case FOG_MODE_EXP2_ABS:
/* f = 1 / (e^((d * density)^2))
* fogParam.y = -density / (2 * sqrt(ln(256)))
* fogParam.x = 1.5
*/
mstring_append(body, " float fogFactor = fogParam.x + exp2(-fogDistance * fogDistance * fogParam.y * fogParam.y * 32.0);\n");
mstring_append(body, " fogFactor -= 1.5;\n");
break;
default:
assert(false);
break;
}
/* Calculate absolute for the modes which need it */
switch (state->fog_mode) {
case FOG_MODE_LINEAR_ABS:
case FOG_MODE_EXP_ABS:
case FOG_MODE_EXP2_ABS:
mstring_append(body, " fogFactor = abs(fogFactor);\n");
break;
default:
break;
}
mstring_append(body, " oFog.xyzw = vec4(fogFactor);\n");
} else {
/* FIXME: Is the fog still calculated / passed somehow?!
*/
mstring_append(body, " oFog.xyzw = vec4(1.0);\n");
}
/* Set outputs */
const char *shade_model_mult = state->smooth_shading ? "vtx_inv_w" : "vtx_inv_w_flat";
mstring_append_fmt(body, "\n"
" vtxD0 = clamp(oD0, 0.0, 1.0) * %s;\n"
" vtxD1 = clamp(oD1, 0.0, 1.0) * %s;\n"
" vtxB0 = clamp(oB0, 0.0, 1.0) * %s;\n"
" vtxB1 = clamp(oB1, 0.0, 1.0) * %s;\n"
" vtxFog = oFog.x * vtx_inv_w;\n"
" vtxT0 = oT0 * vtx_inv_w;\n"
" vtxT1 = oT1 * vtx_inv_w;\n"
" vtxT2 = oT2 * vtx_inv_w;\n"
" vtxT3 = oT3 * vtx_inv_w;\n"
" gl_Position = oPos;\n"
" gl_PointSize = oPts.x;\n"
// " gl_ClipDistance[0] = oPos.z - oPos.w*clipRange.z;\n" // Near
// " gl_ClipDistance[1] = oPos.w*clipRange.w - oPos.z;\n" // Far
"\n"
"}\n",
shade_model_mult,
shade_model_mult,
shade_model_mult,
shade_model_mult);
/* Return combined header + source */
if (state->vulkan) {
mstring_append_fmt(
output, "layout(binding = %d, std140) uniform VshUniforms {\n%s};\n\n",
VSH_UBO_BINDING, mstring_get_str(uniforms));
// FIXME: Only needed for vk, for gl we use glVertexAttrib
mstring_append_fmt(output,
"layout(push_constant) uniform PushConstants {\n"
"vec4 inlineValue[" stringify(NV2A_VERTEXSHADER_ATTRIBUTES) "];\n"
"};\n\n");
} else {
mstring_append(
output, mstring_get_str(uniforms));
}
mstring_append(output, mstring_get_str(header));
mstring_unref(header);
mstring_append(output, mstring_get_str(body));
mstring_unref(body);
return output;
}

View File

@ -0,0 +1,33 @@
/*
* Geforce NV2A PGRAPH GLSL Shader Generator
*
* Copyright (c) 2015 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2020-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_VSH_H
#define HW_XBOX_NV2A_PGRAPH_GLSL_VSH_H
#include "qemu/mstring.h"
#include "hw/xbox/nv2a/pgraph/shaders.h"
// FIXME: Move to struct
#define VSH_UBO_BINDING 0
MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs);
#endif

View File

@ -0,0 +1,19 @@
specific_ss.add(files(
'pgraph.c',
'profile.c',
'rdi.c',
's3tc.c',
'shaders.c',
'swizzle.c',
'texture.c',
'vertex.c',
))
if have_renderdoc
specific_ss.add(files('debug_renderdoc.c'))
endif
subdir('thirdparty')
subdir('null')
subdir('gl')
subdir('glsl')
subdir('vk')
specific_ss.add(nv2a_vsh_cpu)

View File

@ -0,0 +1,3 @@
specific_ss.add([sdl, files(
'renderer.c',
)])

View File

@ -0,0 +1,146 @@
/*
* Geforce NV2A PGRAPH Null Renderer
*
* Copyright (c) 2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "qemu/thread.h"
#include "hw/hw.h"
#include "hw/xbox/nv2a/nv2a_int.h"
static void pgraph_null_sync(NV2AState *d)
{
qatomic_set(&d->pgraph.sync_pending, false);
qemu_event_set(&d->pgraph.sync_complete);
}
static void pgraph_null_flush(NV2AState *d)
{
qatomic_set(&d->pgraph.flush_pending, false);
qemu_event_set(&d->pgraph.flush_complete);
}
static void pgraph_null_process_pending(NV2AState *d)
{
if (
qatomic_read(&d->pgraph.sync_pending) ||
qatomic_read(&d->pgraph.flush_pending)
) {
qemu_mutex_unlock(&d->pfifo.lock);
qemu_mutex_lock(&d->pgraph.lock);
if (qatomic_read(&d->pgraph.sync_pending)) {
pgraph_null_sync(d);
}
if (qatomic_read(&d->pgraph.flush_pending)) {
pgraph_null_flush(d);
}
qemu_mutex_unlock(&d->pgraph.lock);
qemu_mutex_lock(&d->pfifo.lock);
}
}
static void pgraph_null_clear_report_value(NV2AState *d)
{
}
static void pgraph_null_clear_surface(NV2AState *d, uint32_t parameter)
{
}
static void pgraph_null_draw_begin(NV2AState *d)
{
}
static void pgraph_null_draw_end(NV2AState *d)
{
}
static void pgraph_null_flip_stall(NV2AState *d)
{
}
static void pgraph_null_flush_draw(NV2AState *d)
{
}
static void pgraph_null_get_report(NV2AState *d, uint32_t parameter)
{
pgraph_write_zpass_pixel_cnt_report(d, parameter, 0);
}
static void pgraph_null_image_blit(NV2AState *d)
{
}
static void pgraph_null_pre_savevm_trigger(NV2AState *d)
{
}
static void pgraph_null_pre_savevm_wait(NV2AState *d)
{
}
static void pgraph_null_pre_shutdown_trigger(NV2AState *d)
{
}
static void pgraph_null_pre_shutdown_wait(NV2AState *d)
{
}
static void pgraph_null_process_pending_reports(NV2AState *d)
{
}
static void pgraph_null_surface_update(NV2AState *d, bool upload,
bool color_write, bool zeta_write)
{
}
static void pgraph_null_init(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
pg->null_renderer_state = NULL;
}
static PGRAPHRenderer pgraph_null_renderer = {
.type = CONFIG_DISPLAY_RENDERER_NULL,
.name = "Null",
.ops = {
.init = pgraph_null_init,
.clear_report_value = pgraph_null_clear_report_value,
.clear_surface = pgraph_null_clear_surface,
.draw_begin = pgraph_null_draw_begin,
.draw_end = pgraph_null_draw_end,
.flip_stall = pgraph_null_flip_stall,
.flush_draw = pgraph_null_flush_draw,
.get_report = pgraph_null_get_report,
.image_blit = pgraph_null_image_blit,
.pre_savevm_trigger = pgraph_null_pre_savevm_trigger,
.pre_savevm_wait = pgraph_null_pre_savevm_wait,
.pre_shutdown_trigger = pgraph_null_pre_shutdown_trigger,
.pre_shutdown_wait = pgraph_null_pre_shutdown_wait,
.process_pending = pgraph_null_process_pending,
.process_pending_reports = pgraph_null_process_pending_reports,
.surface_update = pgraph_null_surface_update,
}
};
static void __attribute__((constructor)) register_renderer(void)
{
pgraph_renderer_register(&pgraph_null_renderer);
}

2874
hw/xbox/nv2a/pgraph/pgraph.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,383 @@
/*
* QEMU Geforce NV2A PGRAPH internal definitions
*
* Copyright (c) 2012 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2018-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_XBOX_NV2A_PGRAPH_H
#define HW_XBOX_NV2A_PGRAPH_H
#include "xemu-config.h"
#include "qemu/osdep.h"
#include "qemu/bitmap.h"
#include "qemu/units.h"
#include "qemu/thread.h"
#include "cpu.h"
#include "shaders.h"
#include "surface.h"
#include "util.h"
typedef struct NV2AState NV2AState;
typedef struct PGRAPHNullState PGRAPHNullState;
typedef struct PGRAPHGLState PGRAPHGLState;
typedef struct PGRAPHVkState PGRAPHVkState;
typedef struct VertexAttribute {
bool dma_select;
hwaddr offset;
/* inline arrays are packed in order?
* Need to pass the offset to converted attributes */
unsigned int inline_array_offset;
float inline_value[4];
unsigned int format;
unsigned int size; /* size of the data type */
unsigned int count; /* number of components */
uint32_t stride;
bool needs_conversion;
float *inline_buffer;
bool inline_buffer_populated;
} VertexAttribute;
typedef struct Surface {
bool draw_dirty;
bool buffer_dirty;
bool write_enabled_cache;
unsigned int pitch;
hwaddr offset;
} Surface;
typedef struct KelvinState {
hwaddr object_instance;
} KelvinState;
typedef struct ContextSurfaces2DState {
hwaddr object_instance;
hwaddr dma_image_source;
hwaddr dma_image_dest;
unsigned int color_format;
unsigned int source_pitch, dest_pitch;
hwaddr source_offset, dest_offset;
} ContextSurfaces2DState;
typedef struct ImageBlitState {
hwaddr object_instance;
hwaddr context_surfaces;
unsigned int operation;
unsigned int in_x, in_y;
unsigned int out_x, out_y;
unsigned int width, height;
} ImageBlitState;
typedef struct BetaState {
hwaddr object_instance;
uint32_t beta;
} BetaState;
typedef struct PGRAPHRenderer {
CONFIG_DISPLAY_RENDERER type;
const char *name;
struct {
void (*early_context_init)(void);
void (*init)(NV2AState *d);
void (*init_thread)(NV2AState *d);
void (*finalize)(NV2AState *d);
void (*clear_report_value)(NV2AState *d);
void (*clear_surface)(NV2AState *d, uint32_t parameter);
void (*draw_begin)(NV2AState *d);
void (*draw_end)(NV2AState *d);
void (*flip_stall)(NV2AState *d);
void (*flush_draw)(NV2AState *d);
void (*get_report)(NV2AState *d, uint32_t parameter);
void (*image_blit)(NV2AState *d);
void (*pre_savevm_trigger)(NV2AState *d);
void (*pre_savevm_wait)(NV2AState *d);
void (*pre_shutdown_trigger)(NV2AState *d);
void (*pre_shutdown_wait)(NV2AState *d);
void (*process_pending)(NV2AState *d);
void (*process_pending_reports)(NV2AState *d);
void (*surface_flush)(NV2AState *d);
void (*surface_update)(NV2AState *d, bool upload, bool color_write, bool zeta_write);
void (*set_surface_scale_factor)(NV2AState *d, unsigned int scale);
unsigned int (*get_surface_scale_factor)(NV2AState *d);
int (*get_framebuffer_surface)(NV2AState *d);
} ops;
} PGRAPHRenderer;
typedef struct PGRAPHState {
QemuMutex lock;
uint32_t pending_interrupts;
uint32_t enabled_interrupts;
int frame_time;
int draw_time;
/* subchannels state we're not sure the location of... */
ContextSurfaces2DState context_surfaces_2d;
ImageBlitState image_blit;
KelvinState kelvin;
BetaState beta;
hwaddr dma_color, dma_zeta;
Surface surface_color, surface_zeta;
unsigned int surface_type;
SurfaceShape surface_shape;
SurfaceShape last_surface_shape;
struct {
int clip_x;
int clip_width;
int clip_y;
int clip_height;
int width;
int height;
} surface_binding_dim; // FIXME: Refactor
hwaddr dma_a, dma_b;
bool texture_dirty[NV2A_MAX_TEXTURES];
bool texture_matrix_enable[NV2A_MAX_TEXTURES];
hwaddr dma_state;
hwaddr dma_notifies;
hwaddr dma_semaphore;
hwaddr dma_report;
hwaddr report_offset;
bool zpass_pixel_count_enable;
hwaddr dma_vertex_a, dma_vertex_b;
uint32_t primitive_mode;
bool enable_vertex_program_write; // FIXME: Not used anywhere???
uint32_t vertex_state_shader_v0[4];
uint32_t program_data[NV2A_MAX_TRANSFORM_PROGRAM_LENGTH][VSH_TOKEN_SIZE];
bool program_data_dirty;
uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];
bool vsh_constants_dirty[NV2A_VERTEXSHADER_CONSTANTS];
/* lighting constant arrays */
uint32_t ltctxa[NV2A_LTCTXA_COUNT][4];
bool ltctxa_dirty[NV2A_LTCTXA_COUNT];
uint32_t ltctxb[NV2A_LTCTXB_COUNT][4];
bool ltctxb_dirty[NV2A_LTCTXB_COUNT];
uint32_t ltc1[NV2A_LTC1_COUNT][4];
bool ltc1_dirty[NV2A_LTC1_COUNT];
float material_alpha;
// should figure out where these are in lighting context
float light_infinite_half_vector[NV2A_MAX_LIGHTS][3];
float light_infinite_direction[NV2A_MAX_LIGHTS][3];
float light_local_position[NV2A_MAX_LIGHTS][3];
float light_local_attenuation[NV2A_MAX_LIGHTS][3];
float point_params[8];
VertexAttribute vertex_attributes[NV2A_VERTEXSHADER_ATTRIBUTES];
uint16_t compressed_attrs;
uint16_t uniform_attrs;
uint16_t swizzle_attrs;
unsigned int inline_array_length;
uint32_t inline_array[NV2A_MAX_BATCH_LENGTH];
unsigned int inline_elements_length;
uint32_t inline_elements[NV2A_MAX_BATCH_LENGTH];
unsigned int inline_buffer_length;
unsigned int draw_arrays_length;
unsigned int draw_arrays_min_start;
unsigned int draw_arrays_max_count;
/* FIXME: Unknown size, possibly endless, 1250 will do for now */
/* Keep in sync with size used in nv2a.c */
int32_t draw_arrays_start[1250];
int32_t draw_arrays_count[1250];
bool draw_arrays_prevent_connect;
uint32_t regs_[0x2000];
DECLARE_BITMAP(regs_dirty, 0x2000 / sizeof(uint32_t));
bool clearing;
bool waiting_for_nop;
bool waiting_for_flip;
bool waiting_for_context_switch;
bool flush_pending;
QemuEvent flush_complete;
bool sync_pending;
QemuEvent sync_complete;
unsigned int surface_scale_factor;
uint8_t *scale_buf;
const PGRAPHRenderer *renderer;
union {
PGRAPHNullState *null_renderer_state;
PGRAPHGLState *gl_renderer_state;
PGRAPHVkState *vk_renderer_state;
};
} PGRAPHState;
void pgraph_init(NV2AState *d);
void pgraph_init_thread(NV2AState *d);
void pgraph_destroy(PGRAPHState *pg);
void pgraph_context_switch(NV2AState *d, unsigned int channel_id);
int pgraph_method(NV2AState *d, unsigned int subchannel, unsigned int method,
uint32_t parameter, uint32_t *parameters,
size_t num_words_available, size_t max_lookahead_words,
bool inc);
void pgraph_check_within_begin_end_block(PGRAPHState *pg);
void *pfifo_thread(void *arg);
void pfifo_kick(NV2AState *d);
void pgraph_renderer_register(const PGRAPHRenderer *renderer);
// FIXME: Move from here
extern NV2AState *g_nv2a;
// FIXME: Add new function pgraph_is_texture_sampler_active()
static inline uint32_t pgraph_reg_r(PGRAPHState *pg, unsigned int r)
{
assert(r % 4 == 0);
return pg->regs_[r];
}
static inline void pgraph_reg_w(PGRAPHState *pg, unsigned int r, uint32_t v)
{
assert(r % 4 == 0);
if (pg->regs_[r] != v) {
bitmap_set(pg->regs_dirty, r / sizeof(uint32_t), 1);
}
pg->regs_[r] = v;
}
void pgraph_clear_dirty_reg_map(PGRAPHState *pg);
static inline bool pgraph_is_reg_dirty(PGRAPHState *pg, unsigned int reg)
{
return test_bit(reg / sizeof(uint32_t), pg->regs_dirty);
}
static inline bool pgraph_is_texture_stage_active(PGRAPHState *pg, unsigned int stage)
{
assert(stage < NV2A_MAX_TEXTURES);
uint32_t mode = (pgraph_reg_r(pg, NV_PGRAPH_SHADERPROG) >> (stage * 5)) & 0x1F;
return mode != 0 && mode != 4;// && mode != 0x11 && mode != 0x0a && mode != 0x09 && mode != 5;
}
static inline bool pgraph_is_texture_enabled(PGRAPHState *pg, int texture_idx)
{
uint32_t ctl_0 = pgraph_reg_r(pg, NV_PGRAPH_TEXCTL0_0 + texture_idx*4);
return // pgraph_is_texture_stage_active(pg, texture_idx) &&
GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_ENABLE);
}
static inline bool pgraph_is_texture_format_compressed(PGRAPHState *pg, int color_format)
{
return color_format == NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5 ||
color_format == NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8 ||
color_format == NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8;
}
static inline bool pgraph_color_write_enabled(PGRAPHState *pg)
{
return pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & (
NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE
| NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE
| NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE
| NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE);
}
static inline bool pgraph_zeta_write_enabled(PGRAPHState *pg)
{
return pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & (
NV_PGRAPH_CONTROL_0_ZWRITEENABLE
| NV_PGRAPH_CONTROL_0_STENCIL_WRITE_ENABLE);
}
static inline void pgraph_apply_anti_aliasing_factor(PGRAPHState *pg,
unsigned int *width,
unsigned int *height)
{
switch (pg->surface_shape.anti_aliasing) {
case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_1:
break;
case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_CORNER_2:
if (width) { *width *= 2; }
break;
case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_SQUARE_OFFSET_4:
if (width) { *width *= 2; }
if (height) { *height *= 2; }
break;
default:
assert(false);
break;
}
}
static inline void pgraph_apply_scaling_factor(PGRAPHState *pg,
unsigned int *width,
unsigned int *height)
{
*width *= pg->surface_scale_factor;
*height *= pg->surface_scale_factor;
}
void pgraph_get_clear_color(PGRAPHState *pg, float rgba[4]);
void pgraph_get_clear_depth_stencil_value(PGRAPHState *pg, float *depth, int *stencil);
/* Vertex */
void pgraph_allocate_inline_buffer_vertices(PGRAPHState *pg, unsigned int attr);
void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg);
void pgraph_reset_inline_buffers(PGRAPHState *pg);
void pgraph_reset_draw_arrays(PGRAPHState *pg);
void pgraph_update_inline_value(VertexAttribute *attr, const uint8_t *data);
/* RDI */
uint32_t pgraph_rdi_read(PGRAPHState *pg, unsigned int select,
unsigned int address);
void pgraph_rdi_write(PGRAPHState *pg, unsigned int select,
unsigned int address, uint32_t val);
static inline void pgraph_argb_pack32_to_rgba_float(uint32_t argb, float *rgba)
{
rgba[0] = ((argb >> 16) & 0xFF) / 255.0f; /* red */
rgba[1] = ((argb >> 8) & 0xFF) / 255.0f; /* green */
rgba[2] = (argb & 0xFF) / 255.0f; /* blue */
rgba[3] = ((argb >> 24) & 0xFF) / 255.0f; /* alpha */
}
void pgraph_write_zpass_pixel_cnt_report(NV2AState *d, uint32_t parameter, uint32_t result);
#endif

View File

@ -0,0 +1,74 @@
/*
* QEMU Geforce NV2A profiling helpers
*
* Copyright (c) 2020-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "../nv2a_int.h"
NV2AStats g_nv2a_stats;
void nv2a_profile_increment(void)
{
int64_t now = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
const int64_t fps_update_interval = 250000;
g_nv2a_stats.last_flip_time = now;
static int64_t frame_count = 0;
frame_count++;
static int64_t ts = 0;
int64_t delta = now - ts;
if (delta >= fps_update_interval) {
g_nv2a_stats.increment_fps = frame_count * 1000000 / delta;
ts = now;
frame_count = 0;
}
}
void nv2a_profile_flip_stall(void)
{
int64_t now = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
int64_t render_time = (now-g_nv2a_stats.last_flip_time)/1000;
g_nv2a_stats.frame_working.mspf = render_time;
g_nv2a_stats.frame_history[g_nv2a_stats.frame_ptr] =
g_nv2a_stats.frame_working;
g_nv2a_stats.frame_ptr =
(g_nv2a_stats.frame_ptr + 1) % NV2A_PROF_NUM_FRAMES;
g_nv2a_stats.frame_count++;
memset(&g_nv2a_stats.frame_working, 0, sizeof(g_nv2a_stats.frame_working));
}
const char *nv2a_profile_get_counter_name(unsigned int cnt)
{
const char *default_names[NV2A_PROF__COUNT] = {
#define _X(x) stringify(x),
NV2A_PROF_COUNTERS_XMAC
#undef _X
};
assert(cnt < NV2A_PROF__COUNT);
return default_names[cnt] + 10; /* 'NV2A_PROF_' */
}
int nv2a_profile_get_counter_value(unsigned int cnt)
{
assert(cnt < NV2A_PROF__COUNT);
unsigned int idx = (g_nv2a_stats.frame_ptr + NV2A_PROF_NUM_FRAMES - 1) %
NV2A_PROF_NUM_FRAMES;
return g_nv2a_stats.frame_history[idx].counters[cnt];
}

View File

@ -20,7 +20,8 @@
#ifndef HW_NV2A_PSH_H
#define HW_NV2A_PSH_H
#include "shaders_common.h"
#include <stdint.h>
#include <stdbool.h>
enum PshAlphaFunc {
ALPHA_FUNC_NEVER,
@ -51,6 +52,8 @@ enum ConvolutionFilter {
};
typedef struct PshState {
bool vulkan;
/* fragment shader - register combiner stuff */
uint32_t combiner_control;
uint32_t shader_stage_program;
@ -67,6 +70,7 @@ typedef struct PshState {
bool compare_mode[4][4];
bool alphakill[4];
enum ConvolutionFilter conv_tex[4];
bool tex_x8y24[4];
float border_logical_size[4][3];
float border_inv_real_size[4][3];
@ -82,6 +86,4 @@ typedef struct PshState {
bool smooth_shading;
} PshState;
MString *psh_translate(const PshState state);
#endif

60
hw/xbox/nv2a/pgraph/rdi.c Normal file
View File

@ -0,0 +1,60 @@
/*
* QEMU Geforce NV2A implementation
*
* Copyright (c) 2012 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2018-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "../nv2a_int.h"
uint32_t pgraph_rdi_read(PGRAPHState *pg, unsigned int select,
unsigned int address)
{
uint32_t r = 0;
switch(select) {
case RDI_INDEX_VTX_CONSTANTS0:
case RDI_INDEX_VTX_CONSTANTS1:
assert((address / 4) < NV2A_VERTEXSHADER_CONSTANTS);
r = pg->vsh_constants[address / 4][3 - address % 4];
break;
default:
fprintf(stderr, "nv2a: unknown rdi read select 0x%x address 0x%x\n",
select, address);
assert(false);
break;
}
return r;
}
void pgraph_rdi_write(PGRAPHState *pg, unsigned int select,
unsigned int address, uint32_t val)
{
switch(select) {
case RDI_INDEX_VTX_CONSTANTS0:
case RDI_INDEX_VTX_CONSTANTS1:
assert(false); /* Untested */
assert((address / 4) < NV2A_VERTEXSHADER_CONSTANTS);
pg->vsh_constants_dirty[address / 4] |=
(val != pg->vsh_constants[address / 4][3 - address % 4]);
pg->vsh_constants[address / 4][3 - address % 4] = val;
break;
default:
NV2A_DPRINTF("unknown rdi write select 0x%x, address 0x%x, val 0x%08x\n",
select, address, val);
break;
}
}

View File

@ -1,5 +1,5 @@
/*
* QEMU texture decompression routines
* S3TC Texture Decompression
*
* Copyright (c) 2020 Wilhelm Kovatch
*
@ -25,13 +25,9 @@
#include "qemu/osdep.h"
#include "s3tc.h"
static inline void decode_bc1_colors(uint16_t c0,
uint16_t c1,
uint8_t r[4],
uint8_t g[4],
uint8_t b[4],
uint8_t a[16],
bool transparent)
static void decode_bc1_colors(uint16_t c0, uint16_t c1, uint8_t r[4],
uint8_t g[4], uint8_t b[4], uint8_t a[16],
bool transparent)
{
r[0] = ((c0 & 0xF800) >> 8) * 0xFF / 0xF8,
g[0] = ((c0 & 0x07E0) >> 3) * 0xFF / 0xFC,
@ -66,15 +62,10 @@ static inline void decode_bc1_colors(uint16_t c0,
}
}
static inline void write_block_to_texture(uint8_t *converted_data,
uint32_t indices,
int i, int j, int width,
int z_pos_factor,
uint8_t r[4],
uint8_t g[4],
uint8_t b[4],
uint8_t a[16],
bool separate_alpha)
static void write_block_to_texture(uint8_t *converted_data, uint32_t indices,
int i, int j, int width, int z_pos_factor,
uint8_t r[4], uint8_t g[4], uint8_t b[4],
uint8_t a[16], bool separate_alpha)
{
int x0 = i * 4,
y0 = j * 4;
@ -89,16 +80,18 @@ static inline void write_block_to_texture(uint8_t *converted_data,
int xy_index = y_index + x - x0;
uint8_t index = (indices >> 2 * xy_index) & 0x03;
uint8_t alpha_index = separate_alpha ? xy_index : index;
uint32_t color = (r[index] << 24) | (g[index] << 16) | (b[index] << 8) | a[alpha_index];
*(uint32_t*)(converted_data + (z_plus_y_pos_factor + x) * 4) = color;
uint8_t *p = converted_data + (z_plus_y_pos_factor + x) * 4;
*p++ = r[index];
*p++ = g[index];
*p++ = b[index];
*p++ = a[alpha_index];
}
}
}
static inline void decompress_dxt1_block(const uint8_t block_data[8],
uint8_t *converted_data,
int i, int j, int width,
int z_pos_factor)
static void decompress_dxt1_block(const uint8_t block_data[8],
uint8_t *converted_data, int i, int j,
int width, int z_pos_factor)
{
uint16_t c0 = ((uint16_t*)block_data)[0],
c1 = ((uint16_t*)block_data)[1];
@ -111,10 +104,9 @@ static inline void decompress_dxt1_block(const uint8_t block_data[8],
r, g, b, a, false);
}
static inline void decompress_dxt3_block(const uint8_t block_data[16],
uint8_t *converted_data,
int i, int j, int width,
int z_pos_factor)
static void decompress_dxt3_block(const uint8_t block_data[16],
uint8_t *converted_data, int i, int j,
int width, int z_pos_factor)
{
uint16_t c0 = ((uint16_t*)block_data)[4],
c1 = ((uint16_t*)block_data)[5];
@ -132,10 +124,9 @@ static inline void decompress_dxt3_block(const uint8_t block_data[16],
r, g, b, a, true);
}
static inline void decompress_dxt5_block(const uint8_t block_data[16],
uint8_t *converted_data,
int i, int j, int width,
int z_pos_factor)
static void decompress_dxt5_block(const uint8_t block_data[16],
uint8_t *converted_data, int i, int j,
int width, int z_pos_factor)
{
uint16_t c0 = ((uint16_t*)block_data)[4],
c1 = ((uint16_t*)block_data)[5];
@ -173,11 +164,9 @@ static inline void decompress_dxt5_block(const uint8_t block_data[16],
r, g, b, a, true);
}
uint8_t *decompress_3d_texture_data(GLint color_format,
const uint8_t *data,
unsigned int width,
unsigned int height,
unsigned int depth)
uint8_t *s3tc_decompress_3d(enum S3TC_DECOMPRESS_FORMAT color_format,
const uint8_t *data, unsigned int width,
unsigned int height, unsigned int depth)
{
assert((width > 0) && (width % 4 == 0));
assert((height > 0) && (height % 4 == 0));
@ -196,13 +185,13 @@ uint8_t *decompress_3d_texture_data(GLint color_format,
int sub_block_index = block_index * block_depth + slice;
int z_pos_factor = (k * block_depth + slice) * width * height;
if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
if (color_format == S3TC_DECOMPRESS_FORMAT_DXT1) {
decompress_dxt1_block(data + 8 * sub_block_index, converted_data,
i, j, width, z_pos_factor);
} else if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT3_EXT) {
} else if (color_format == S3TC_DECOMPRESS_FORMAT_DXT3) {
decompress_dxt3_block(data + 16 * sub_block_index, converted_data,
i, j, width, z_pos_factor);
} else if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT5_EXT) {
} else if (color_format == S3TC_DECOMPRESS_FORMAT_DXT5) {
decompress_dxt5_block(data + 16 * sub_block_index, converted_data,
i, j, width, z_pos_factor);
} else {
@ -216,8 +205,9 @@ uint8_t *decompress_3d_texture_data(GLint color_format,
return converted_data;
}
uint8_t *decompress_2d_texture_data(GLint color_format, const uint8_t *data,
unsigned int width, unsigned int height)
uint8_t *s3tc_decompress_2d(enum S3TC_DECOMPRESS_FORMAT color_format,
const uint8_t *data, unsigned int width,
unsigned int height)
{
assert((width > 0) && (width % 4 == 0));
assert((height > 0) && (height % 4 == 0));
@ -226,13 +216,13 @@ uint8_t *decompress_2d_texture_data(GLint color_format, const uint8_t *data,
for (int j = 0; j < num_blocks_y; j++) {
for (int i = 0; i < num_blocks_x; i++) {
int block_index = j * num_blocks_x + i;
if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
if (color_format == S3TC_DECOMPRESS_FORMAT_DXT1) {
decompress_dxt1_block(data + 8 * block_index,
converted_data, i, j, width, 0);
} else if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT3_EXT) {
} else if (color_format == S3TC_DECOMPRESS_FORMAT_DXT3) {
decompress_dxt3_block(data + 16 * block_index,
converted_data, i, j, width, 0);
} else if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT5_EXT) {
} else if (color_format == S3TC_DECOMPRESS_FORMAT_DXT5) {
decompress_dxt5_block(data + 16 * block_index,
converted_data, i, j, width, 0);
} else {

View File

@ -1,5 +1,5 @@
/*
* QEMU texture decompression routines
* S3TC Texture Decompression
*
* Copyright (c) 2020 Wilhelm Kovatch
*
@ -22,18 +22,23 @@
* THE SOFTWARE.
*/
#ifndef S3TC_H
#define S3TC_H
#ifndef HW_XBOX_NV2A_PGRAPH_S3TC_H
#define HW_XBOX_NV2A_PGRAPH_S3TC_H
#include "gl/gloffscreen.h"
#include <stdint.h>
uint8_t *decompress_3d_texture_data(GLint color_format,
const uint8_t *data,
unsigned int width,
unsigned int height,
unsigned int depth);
enum S3TC_DECOMPRESS_FORMAT {
S3TC_DECOMPRESS_FORMAT_DXT1,
S3TC_DECOMPRESS_FORMAT_DXT3,
S3TC_DECOMPRESS_FORMAT_DXT5,
};
uint8_t *decompress_2d_texture_data(GLint color_format, const uint8_t *data,
unsigned int width, unsigned int height);
uint8_t *s3tc_decompress_3d(enum S3TC_DECOMPRESS_FORMAT color_format,
const uint8_t *data, unsigned int width,
unsigned int height, unsigned int depth);
uint8_t *s3tc_decompress_2d(enum S3TC_DECOMPRESS_FORMAT color_format,
const uint8_t *data, unsigned int width,
unsigned int height);
#endif

View File

@ -0,0 +1,295 @@
/*
* Geforce NV2A PGRAPH OpenGL Renderer
*
* Copyright (c) 2015 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2020-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "hw/xbox/nv2a/debug.h"
#include "texture.h"
#include "pgraph.h"
#include "shaders.h"
ShaderState pgraph_get_shader_state(PGRAPHState *pg)
{
bool vertex_program = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
NV_PGRAPH_CSV0_D_MODE) == 2;
bool fixed_function = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
NV_PGRAPH_CSV0_D_MODE) == 0;
int program_start = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_C),
NV_PGRAPH_CSV0_C_CHEOPS_PROGRAM_START);
pg->program_data_dirty = false;
ShaderState state;
// We will hash it, so make sure any padding is zerod
memset(&state, 0, sizeof(ShaderState));
state.vulkan = pg->renderer->type == CONFIG_DISPLAY_RENDERER_VULKAN;
state.surface_scale_factor = pg->surface_scale_factor;
state.compressed_attrs = pg->compressed_attrs;
state.uniform_attrs = pg->uniform_attrs;
state.swizzle_attrs = pg->swizzle_attrs;
/* register combiner stuff */
state.psh.vulkan = state.vulkan;
state.psh.window_clip_exclusive =
pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & NV_PGRAPH_SETUPRASTER_WINDOWCLIPTYPE;
state.psh.combiner_control = pgraph_reg_r(pg, NV_PGRAPH_COMBINECTL);
state.psh.shader_stage_program = pgraph_reg_r(pg, NV_PGRAPH_SHADERPROG);
state.psh.other_stage_input = pgraph_reg_r(pg, NV_PGRAPH_SHADERCTL);
state.psh.final_inputs_0 = pgraph_reg_r(pg, NV_PGRAPH_COMBINESPECFOG0);
state.psh.final_inputs_1 = pgraph_reg_r(pg, NV_PGRAPH_COMBINESPECFOG1);
state.psh.alpha_test =
pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & NV_PGRAPH_CONTROL_0_ALPHATESTENABLE;
state.psh.alpha_func = (enum PshAlphaFunc)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0), NV_PGRAPH_CONTROL_0_ALPHAFUNC);
state.psh.point_sprite = pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
NV_PGRAPH_SETUPRASTER_POINTSMOOTHENABLE;
state.psh.shadow_depth_func = (enum PshShadowDepthFunc)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_SHADOWCTL), NV_PGRAPH_SHADOWCTL_SHADOW_ZFUNC);
state.fixed_function = fixed_function;
/* fixed function stuff */
if (fixed_function) {
state.skinning = (enum VshSkinning)GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
NV_PGRAPH_CSV0_D_SKIN);
state.lighting =
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_LIGHTING);
state.normalization =
pgraph_reg_r(pg, NV_PGRAPH_CSV0_C) & NV_PGRAPH_CSV0_C_NORMALIZATION_ENABLE;
/* color material */
state.emission_src = (enum MaterialColorSource)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_EMISSION);
state.ambient_src = (enum MaterialColorSource)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_AMBIENT);
state.diffuse_src = (enum MaterialColorSource)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_DIFFUSE);
state.specular_src = (enum MaterialColorSource)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_SPECULAR);
}
/* vertex program stuff */
state.vertex_program = vertex_program,
state.z_perspective = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) &
NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE;
state.point_params_enable = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
NV_PGRAPH_CSV0_D_POINTPARAMSENABLE);
state.point_size =
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_POINTSIZE), NV097_SET_POINT_SIZE_V) / 8.0f;
if (state.point_params_enable) {
for (int i = 0; i < 8; i++) {
state.point_params[i] = pg->point_params[i];
}
}
/* geometry shader stuff */
state.primitive_mode = (enum ShaderPrimitiveMode)pg->primitive_mode;
state.polygon_front_mode = (enum ShaderPolygonMode)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER), NV_PGRAPH_SETUPRASTER_FRONTFACEMODE);
state.polygon_back_mode = (enum ShaderPolygonMode)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER), NV_PGRAPH_SETUPRASTER_BACKFACEMODE);
state.smooth_shading = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3),
NV_PGRAPH_CONTROL_3_SHADEMODE) ==
NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH;
state.psh.smooth_shading = state.smooth_shading;
state.program_length = 0;
if (vertex_program) {
// copy in vertex program tokens
for (int i = program_start; i < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH;
i++) {
uint32_t *cur_token = (uint32_t *)&pg->program_data[i];
memcpy(&state.program_data[state.program_length], cur_token,
VSH_TOKEN_SIZE * sizeof(uint32_t));
state.program_length++;
if (vsh_get_field(cur_token, FLD_FINAL)) {
break;
}
}
}
/* Texgen */
for (int i = 0; i < 4; i++) {
unsigned int reg = (i < 2) ? NV_PGRAPH_CSV1_A : NV_PGRAPH_CSV1_B;
for (int j = 0; j < 4; j++) {
unsigned int masks[] = {
(i % 2) ? NV_PGRAPH_CSV1_A_T1_S : NV_PGRAPH_CSV1_A_T0_S,
(i % 2) ? NV_PGRAPH_CSV1_A_T1_T : NV_PGRAPH_CSV1_A_T0_T,
(i % 2) ? NV_PGRAPH_CSV1_A_T1_R : NV_PGRAPH_CSV1_A_T0_R,
(i % 2) ? NV_PGRAPH_CSV1_A_T1_Q : NV_PGRAPH_CSV1_A_T0_Q
};
state.texgen[i][j] =
(enum VshTexgen)GET_MASK(pgraph_reg_r(pg, reg), masks[j]);
}
}
/* Fog */
state.fog_enable =
pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3) & NV_PGRAPH_CONTROL_3_FOGENABLE;
if (state.fog_enable) {
/*FIXME: Use CSV0_D? */
state.fog_mode = (enum VshFogMode)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3), NV_PGRAPH_CONTROL_3_FOG_MODE);
state.foggen = (enum VshFoggen)GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
NV_PGRAPH_CSV0_D_FOGGENMODE);
} else {
/* FIXME: Do we still pass the fogmode? */
state.fog_mode = (enum VshFogMode)0;
state.foggen = (enum VshFoggen)0;
}
/* Texture matrices */
for (int i = 0; i < 4; i++) {
state.texture_matrix_enable[i] = pg->texture_matrix_enable[i];
}
/* Lighting */
if (state.lighting) {
for (int i = 0; i < NV2A_MAX_LIGHTS; i++) {
state.light[i] = (enum VshLight)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_CSV0_D), NV_PGRAPH_CSV0_D_LIGHT0 << (i * 2));
}
}
/* Copy content of enabled combiner stages */
int num_stages = pgraph_reg_r(pg, NV_PGRAPH_COMBINECTL) & 0xFF;
for (int i = 0; i < num_stages; i++) {
state.psh.rgb_inputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINECOLORI0 + i * 4);
state.psh.rgb_outputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINECOLORO0 + i * 4);
state.psh.alpha_inputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEALPHAI0 + i * 4);
state.psh.alpha_outputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEALPHAO0 + i * 4);
// constant_0[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR0 + i * 4);
// constant_1[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR1 + i * 4);
}
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
state.psh.compare_mode[i][j] =
(pgraph_reg_r(pg, NV_PGRAPH_SHADERCLIPMODE) >> (4 * i + j)) & 1;
}
uint32_t ctl_0 = pgraph_reg_r(pg, NV_PGRAPH_TEXCTL0_0 + i * 4);
bool enabled = pgraph_is_texture_stage_active(pg, i) &&
(ctl_0 & NV_PGRAPH_TEXCTL0_0_ENABLE);
if (!enabled) {
continue;
}
state.psh.alphakill[i] = ctl_0 & NV_PGRAPH_TEXCTL0_0_ALPHAKILLEN;
uint32_t tex_fmt = pgraph_reg_r(pg, NV_PGRAPH_TEXFMT0 + i * 4);
unsigned int color_format = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_COLOR);
BasicColorFormatInfo f = kelvin_color_format_info_map[color_format];
state.psh.rect_tex[i] = f.linear;
state.psh.tex_x8y24[i] = color_format == NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED ||
color_format == NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT;
uint32_t border_source =
GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BORDER_SOURCE);
bool cubemap = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE);
state.psh.border_logical_size[i][0] = 0.0f;
state.psh.border_logical_size[i][1] = 0.0f;
state.psh.border_logical_size[i][2] = 0.0f;
if (border_source != NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR) {
if (!f.linear && !cubemap) {
// The actual texture will be (at least) double the reported
// size and shifted by a 4 texel border but texture coordinates
// will still be relative to the reported size.
unsigned int reported_width =
1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_U);
unsigned int reported_height =
1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_V);
unsigned int reported_depth =
1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_P);
state.psh.border_logical_size[i][0] = reported_width;
state.psh.border_logical_size[i][1] = reported_height;
state.psh.border_logical_size[i][2] = reported_depth;
if (reported_width < 8) {
state.psh.border_inv_real_size[i][0] = 0.0625f;
} else {
state.psh.border_inv_real_size[i][0] =
1.0f / (reported_width * 2.0f);
}
if (reported_height < 8) {
state.psh.border_inv_real_size[i][1] = 0.0625f;
} else {
state.psh.border_inv_real_size[i][1] =
1.0f / (reported_height * 2.0f);
}
if (reported_depth < 8) {
state.psh.border_inv_real_size[i][2] = 0.0625f;
} else {
state.psh.border_inv_real_size[i][2] =
1.0f / (reported_depth * 2.0f);
}
} else {
NV2A_UNIMPLEMENTED(
"Border source texture with linear %d cubemap %d", f.linear,
cubemap);
}
}
/* Keep track of whether texture data has been loaded as signed
* normalized integers or not. This dictates whether or not we will need
* to re-map in fragment shader for certain texture modes (e.g.
* bumpenvmap).
*
* FIXME: When signed texture data is loaded as unsigned and remapped in
* fragment shader, there may be interpolation artifacts. Fix this to
* support signed textures more appropriately.
*/
#if 0 // FIXME
state.psh.snorm_tex[i] = (f.gl_internal_format == GL_RGB8_SNORM)
|| (f.gl_internal_format == GL_RG8_SNORM);
#endif
state.psh.shadow_map[i] = f.depth;
uint32_t filter = pgraph_reg_r(pg, NV_PGRAPH_TEXFILTER0 + i * 4);
unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN);
enum ConvolutionFilter kernel = CONVOLUTION_FILTER_DISABLED;
/* FIXME: We do not distinguish between min and mag when
* performing convolution. Just use it if specified for min (common AA
* case).
*/
if (min_filter == NV_PGRAPH_TEXFILTER0_MIN_CONVOLUTION_2D_LOD0) {
int k = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL);
assert(k == NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL_QUINCUNX ||
k == NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL_GAUSSIAN_3);
kernel = (enum ConvolutionFilter)k;
}
state.psh.conv_tex[i] = kernel;
}
return state;
}

View File

@ -18,17 +18,14 @@
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_NV2A_SHADERS_H
#define HW_NV2A_SHADERS_H
#ifndef HW_XBOX_NV2A_PGRAPH_SHADERS_H
#define HW_XBOX_NV2A_PGRAPH_SHADERS_H
#include "qemu/thread.h"
#include "qapi/qmp/qstring.h"
#include "gl/gloffscreen.h"
#include <stdint.h>
#include "hw/xbox/nv2a/nv2a_regs.h"
#include "nv2a_regs.h"
#include "vsh.h"
#include "psh.h"
#include "lru.h"
enum ShaderPrimitiveMode {
PRIM_TYPE_INVALID,
@ -57,10 +54,13 @@ enum MaterialColorSource {
};
typedef struct ShaderState {
bool vulkan;
unsigned int surface_scale_factor;
PshState psh;
uint16_t compressed_attrs;
uint16_t uniform_attrs;
uint16_t swizzle_attrs;
bool texture_matrix_enable[4];
enum VshTexgen texgen[4][4];
@ -101,61 +101,8 @@ typedef struct ShaderState {
bool smooth_shading;
} ShaderState;
typedef struct ShaderBinding {
GLuint gl_program;
GLenum gl_primitive_mode;
GLint psh_constant_loc[9][2];
GLint alpha_ref_loc;
GLint bump_mat_loc[NV2A_MAX_TEXTURES];
GLint bump_scale_loc[NV2A_MAX_TEXTURES];
GLint bump_offset_loc[NV2A_MAX_TEXTURES];
GLint tex_scale_loc[NV2A_MAX_TEXTURES];
GLint surface_size_loc;
GLint clip_range_loc;
GLint vsh_constant_loc[NV2A_VERTEXSHADER_CONSTANTS];
uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];
GLint inv_viewport_loc;
GLint ltctxa_loc[NV2A_LTCTXA_COUNT];
GLint ltctxb_loc[NV2A_LTCTXB_COUNT];
GLint ltc1_loc[NV2A_LTC1_COUNT];
GLint fog_color_loc;
GLint fog_param_loc[2];
GLint light_infinite_half_vector_loc[NV2A_MAX_LIGHTS];
GLint light_infinite_direction_loc[NV2A_MAX_LIGHTS];
GLint light_local_position_loc[NV2A_MAX_LIGHTS];
GLint light_local_attenuation_loc[NV2A_MAX_LIGHTS];
GLint clip_region_loc[8];
GLint material_alpha_loc;
} ShaderBinding;
typedef struct ShaderLruNode {
LruNode node;
bool cached;
void *program;
size_t program_size;
GLenum program_format;
ShaderState state;
ShaderBinding *binding;
QemuThread *save_thread;
} ShaderLruNode;
typedef struct PGRAPHState PGRAPHState;
GLenum get_gl_primitive_mode(enum ShaderPolygonMode polygon_mode, enum ShaderPrimitiveMode primitive_mode);
void update_shader_constant_locations(ShaderBinding *binding, const ShaderState *state);
ShaderBinding *generate_shaders(const ShaderState *state);
void shader_cache_init(PGRAPHState *pg);
void shader_write_cache_reload_list(PGRAPHState *pg);
bool shader_load_from_memory(ShaderLruNode *snode);
void shader_cache_to_disk(ShaderLruNode *snode);
ShaderState pgraph_get_shader_state(PGRAPHState *pg);
#endif

View File

@ -0,0 +1,35 @@
/*
* QEMU Geforce NV2A implementation
*
* Copyright (c) 2012 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2018-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_XBOX_NV2A_PGRAPH_SURFACE_H
#define HW_XBOX_NV2A_PGRAPH_SURFACE_H
typedef struct SurfaceShape {
unsigned int z_format;
unsigned int color_format;
unsigned int zeta_format;
unsigned int log_width, log_height;
unsigned int clip_x, clip_y;
unsigned int clip_width, clip_height;
unsigned int anti_aliasing;
} SurfaceShape;
#endif

View File

@ -18,8 +18,10 @@
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_XBOX_SWIZZLE_H
#define HW_XBOX_SWIZZLE_H
#ifndef HW_XBOX_NV2A_PGRAPH_SWIZZLE_H
#define HW_XBOX_NV2A_PGRAPH_SWIZZLE_H
#include <stdint.h>
void swizzle_box(
const uint8_t *src_buf,

View File

@ -0,0 +1,405 @@
/*
* QEMU Geforce NV2A implementation
*
* Copyright (c) 2012 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2018-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "hw/xbox/nv2a/nv2a_int.h"
#include "texture.h"
#include "util.h"
const BasicColorFormatInfo kelvin_color_format_info_map[66] = {
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8] = { 1, false },
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8] = { 1, false },
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5] = { 2, false },
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5] = { 2, false },
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4] = { 2, false },
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5] = { 2, false },
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8] = { 4, false },
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8] = { 4, false },
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8] = { 1, false },
[NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5] = { 4, false },
[NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8] = { 4, false },
[NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8] = { 4, false },
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5] = { 2, true },
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5] = { 2, true },
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8] = { 4, true },
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8] = { 1, true },
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8] = { 2, true },
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8] = { 1, false },
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8] = { 2, false },
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8] = { 1, true },
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5] = { 2, true },
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4] = { 2, true },
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8] = { 4, true },
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8] = { 1, true },
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8Y8] = { 2, true },
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5] = { 2, false },
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8] = { 2, false },
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8] = { 2, false },
[NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8] = { 2, true },
[NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8] = { 2, true },
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_DEPTH_Y16_FIXED] = { 2, false, true },
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED] = { 4, true,
true },
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT] = { 4, true,
true },
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FIXED] = { 2, true,
true },
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FLOAT] = { 2, true,
true },
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16] = { 2, true },
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8] = { 4, false },
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8] = { 4, false },
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8] = { 4, false },
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8] = { 4, true },
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8] = { 4, true },
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8] = { 4, true },
};
hwaddr pgraph_get_texture_phys_addr(PGRAPHState *pg, int texture_idx)
{
NV2AState *d = container_of(pg, NV2AState, pgraph);
int i = texture_idx;
uint32_t fmt = pgraph_reg_r(pg, NV_PGRAPH_TEXFMT0 + i*4);
unsigned int dma_select =
GET_MASK(fmt, NV_PGRAPH_TEXFMT0_CONTEXT_DMA);
hwaddr offset = pgraph_reg_r(pg, NV_PGRAPH_TEXOFFSET0 + i*4);
hwaddr dma_len;
uint8_t *texture_data;
if (dma_select) {
texture_data = (uint8_t*)nv_dma_map(d, pg->dma_b, &dma_len);
} else {
texture_data = (uint8_t*)nv_dma_map(d, pg->dma_a, &dma_len);
}
assert(offset < dma_len);
texture_data += offset;
return texture_data - d->vram_ptr;
}
hwaddr pgraph_get_texture_palette_phys_addr_length(PGRAPHState *pg, int texture_idx, size_t *length)
{
NV2AState *d = container_of(pg, NV2AState, pgraph);
int i = texture_idx;
uint32_t palette = pgraph_reg_r(pg, NV_PGRAPH_TEXPALETTE0 + i*4);
bool palette_dma_select =
GET_MASK(palette, NV_PGRAPH_TEXPALETTE0_CONTEXT_DMA);
unsigned int palette_length_index =
GET_MASK(palette, NV_PGRAPH_TEXPALETTE0_LENGTH);
unsigned int palette_offset =
palette & NV_PGRAPH_TEXPALETTE0_OFFSET;
unsigned int palette_length = 0;
switch (palette_length_index) {
case NV_PGRAPH_TEXPALETTE0_LENGTH_256: palette_length = 256; break;
case NV_PGRAPH_TEXPALETTE0_LENGTH_128: palette_length = 128; break;
case NV_PGRAPH_TEXPALETTE0_LENGTH_64: palette_length = 64; break;
case NV_PGRAPH_TEXPALETTE0_LENGTH_32: palette_length = 32; break;
default: assert(false); break;
}
if (length) {
*length = palette_length;
}
hwaddr palette_dma_len;
uint8_t *palette_data;
if (palette_dma_select) {
palette_data = (uint8_t*)nv_dma_map(d, pg->dma_b, &palette_dma_len);
} else {
palette_data = (uint8_t*)nv_dma_map(d, pg->dma_a, &palette_dma_len);
}
assert(palette_offset < palette_dma_len);
palette_data += palette_offset;
return palette_data - d->vram_ptr;
}
size_t pgraph_get_texture_length(PGRAPHState *pg, TextureShape *shape)
{
BasicColorFormatInfo f = kelvin_color_format_info_map[shape->color_format];
size_t length = 0;
if (f.linear) {
assert(shape->cubemap == false);
assert(shape->dimensionality == 2);
length = shape->height * shape->pitch;
} else {
if (shape->dimensionality >= 2) {
unsigned int w = shape->width, h = shape->height;
int level;
if (!pgraph_is_texture_format_compressed(pg, shape->color_format)) {
for (level = 0; level < shape->levels; level++) {
w = MAX(w, 1);
h = MAX(h, 1);
length += w * h * f.bytes_per_pixel;
w /= 2;
h /= 2;
}
} else {
/* Compressed textures are a bit different */
unsigned int block_size =
shape->color_format ==
NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5 ?
8 : 16;
for (level = 0; level < shape->levels; level++) {
w = MAX(w, 1);
h = MAX(h, 1);
unsigned int phys_w = (w + 3) & ~3,
phys_h = (h + 3) & ~3;
length += phys_w/4 * phys_h/4 * block_size;
w /= 2;
h /= 2;
}
}
if (shape->cubemap) {
assert(shape->dimensionality == 2);
length = (length + NV2A_CUBEMAP_FACE_ALIGNMENT - 1) & ~(NV2A_CUBEMAP_FACE_ALIGNMENT - 1);
length *= 6;
}
if (shape->dimensionality >= 3) {
length *= shape->depth;
}
}
}
return length;
}
TextureShape pgraph_get_texture_shape(PGRAPHState *pg, int texture_idx)
{
int i = texture_idx;
uint32_t ctl_0 = pgraph_reg_r(pg, NV_PGRAPH_TEXCTL0_0 + i*4);
uint32_t ctl_1 = pgraph_reg_r(pg, NV_PGRAPH_TEXCTL1_0 + i*4);
uint32_t fmt = pgraph_reg_r(pg, NV_PGRAPH_TEXFMT0 + i*4);
#if DEBUG_NV2A
uint32_t filter = pgraph_reg_r(pg, NV_PGRAPH_TEXFILTER0 + i*4);
uint32_t address = pgraph_reg_r(pg, NV_PGRAPH_TEXADDRESS0 + i*4);
#endif
unsigned int min_mipmap_level =
GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_MIN_LOD_CLAMP);
unsigned int max_mipmap_level =
GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_MAX_LOD_CLAMP);
unsigned int pitch =
GET_MASK(ctl_1, NV_PGRAPH_TEXCTL1_0_IMAGE_PITCH);
bool cubemap =
GET_MASK(fmt, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE);
unsigned int dimensionality =
GET_MASK(fmt, NV_PGRAPH_TEXFMT0_DIMENSIONALITY);
int tex_mode = (pgraph_reg_r(pg, NV_PGRAPH_SHADERPROG) >> (texture_idx * 5)) & 0x1F;
if (tex_mode == 0x02) {
assert(pgraph_is_texture_enabled(pg, texture_idx));
// assert(state.dimensionality == 3);
// OVERRIDE
// dimensionality = 3;
}
unsigned int color_format = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_COLOR);
unsigned int levels = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_MIPMAP_LEVELS);
unsigned int log_width = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_U);
unsigned int log_height = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_V);
unsigned int log_depth = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_P);
unsigned int rect_width =
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_TEXIMAGERECT0 + i*4),
NV_PGRAPH_TEXIMAGERECT0_WIDTH);
unsigned int rect_height =
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_TEXIMAGERECT0 + i*4),
NV_PGRAPH_TEXIMAGERECT0_HEIGHT);
#ifdef DEBUG_NV2A
unsigned int lod_bias =
GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIPMAP_LOD_BIAS);
#endif
unsigned int border_source = GET_MASK(fmt,
NV_PGRAPH_TEXFMT0_BORDER_SOURCE);
NV2A_DPRINTF(" texture %d is format 0x%x, "
"off 0x%" HWADDR_PRIx " (r %d, %d or %d, %d, %d; %d%s),"
" filter %x %x, levels %d-%d %d bias %d\n",
i, color_format, address,
rect_width, rect_height,
1 << log_width, 1 << log_height, 1 << log_depth,
pitch,
cubemap ? "; cubemap" : "",
GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN),
GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG),
min_mipmap_level, max_mipmap_level, levels,
lod_bias);
assert(color_format < ARRAY_SIZE(kelvin_color_format_info_map));
BasicColorFormatInfo f = kelvin_color_format_info_map[color_format];
if (f.bytes_per_pixel == 0) {
fprintf(stderr, "nv2a: unimplemented texture color format 0x%x\n",
color_format);
abort();
}
unsigned int width, height, depth;
if (f.linear) {
assert(dimensionality == 2);
width = rect_width;
height = rect_height;
depth = 1;
} else {
width = 1 << log_width;
height = 1 << log_height;
depth = 1 << log_depth;
pitch = 0;
levels = MIN(levels, max_mipmap_level + 1);
/* Discard mipmap levels that would be smaller than 1x1.
* FIXME: Is this actually needed?
*
* >> Level 0: 32 x 4
* Level 1: 16 x 2
* Level 2: 8 x 1
* Level 3: 4 x 1
* Level 4: 2 x 1
* Level 5: 1 x 1
*/
levels = MIN(levels, MAX(log_width, log_height) + 1);
assert(levels > 0);
if (dimensionality == 3) {
/* FIXME: What about 3D mipmaps? */
if (log_width < 2 || log_height < 2) {
/* Base level is smaller than 4x4... */
levels = 1;
} else {
levels = MIN(levels, MIN(log_width, log_height) - 1);
}
}
min_mipmap_level = MIN(levels-1, min_mipmap_level);
max_mipmap_level = MIN(levels-1, max_mipmap_level);
}
TextureShape shape;
// We will hash it, so make sure any padding is zero
memset(&shape, 0, sizeof(shape));
shape.cubemap = cubemap;
shape.dimensionality = dimensionality;
shape.color_format = color_format;
shape.levels = levels;
shape.width = width;
shape.height = height;
shape.depth = depth;
shape.min_mipmap_level = min_mipmap_level;
shape.max_mipmap_level = max_mipmap_level;
shape.pitch = pitch;
shape.border = border_source != NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR;
return shape;
}
uint8_t *pgraph_convert_texture_data(const TextureShape s, const uint8_t *data,
const uint8_t *palette_data,
unsigned int width, unsigned int height,
unsigned int depth, unsigned int row_pitch,
unsigned int slice_pitch,
size_t *converted_size)
{
size_t size = 0;
uint8_t *converted_data;
if (s.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8) {
size = width * height * depth * 4;
converted_data = g_malloc(size);
const uint8_t *src = data;
uint32_t *dst = (uint32_t *)converted_data;
for (int z = 0; z < depth; z++) {
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
uint8_t index = src[y * row_pitch + x];
uint32_t color = *(uint32_t *)(palette_data + index * 4);
*dst++ = color;
}
}
src += slice_pitch;
}
} else if (s.color_format ==
NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8 ||
s.color_format ==
NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8) {
// TODO: Investigate whether a non-1 depth is possible.
// Generally the hardware asserts when attempting to use volumetric
// textures in linear formats.
assert(depth == 1); /* FIXME */
// FIXME: only valid if control0 register allows for colorspace
// conversion
size = width * height * 4;
converted_data = g_malloc(size);
uint8_t *pixel = converted_data;
for (int y = 0; y < height; y++) {
const uint8_t *line = &data[y * row_pitch * depth];
for (int x = 0; x < width; x++, pixel += 4) {
if (s.color_format ==
NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8) {
convert_yuy2_to_rgb(line, x, &pixel[0], &pixel[1],
&pixel[2]);
} else {
convert_uyvy_to_rgb(line, x, &pixel[0], &pixel[1],
&pixel[2]);
}
pixel[3] = 255;
}
}
} else if (s.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5) {
assert(depth == 1); /* FIXME */
size = width * height * 3;
converted_data = g_malloc(size);
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
uint16_t rgb655 = *(uint16_t *)(data + y * row_pitch + x * 2);
int8_t *pixel = (int8_t *)&converted_data[(y * width + x) * 3];
/* Maps 5 bit G and B signed value range to 8 bit
* signed values. R is probably unsigned.
*/
rgb655 ^= (1 << 9) | (1 << 4);
pixel[0] = ((rgb655 & 0xFC00) >> 10) * 0x7F / 0x3F;
pixel[1] = ((rgb655 & 0x03E0) >> 5) * 0xFF / 0x1F - 0x80;
pixel[2] = (rgb655 & 0x001F) * 0xFF / 0x1F - 0x80;
}
}
} else {
return NULL;
}
if (converted_size) {
*converted_size = size;
}
return converted_data;
}

View File

@ -0,0 +1,67 @@
/*
* QEMU Geforce NV2A implementation
*
* Copyright (c) 2012 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2018-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_XBOX_NV2A_PGRAPH_TEXTURE_H
#define HW_XBOX_NV2A_PGRAPH_TEXTURE_H
#include "qemu/osdep.h"
#include "cpu.h"
#include <stdbool.h>
#include <stdint.h>
#include "hw/xbox/nv2a/nv2a_regs.h"
typedef struct PGRAPHState PGRAPHState;
typedef struct TextureShape {
bool cubemap;
unsigned int dimensionality;
unsigned int color_format;
unsigned int levels;
unsigned int width, height, depth;
bool border;
unsigned int min_mipmap_level, max_mipmap_level;
unsigned int pitch;
} TextureShape;
typedef struct BasicColorFormatInfo {
unsigned int bytes_per_pixel;
bool linear;
bool depth;
} BasicColorFormatInfo;
extern const BasicColorFormatInfo kelvin_color_format_info_map[66];
uint8_t *pgraph_convert_texture_data(const TextureShape s, const uint8_t *data,
const uint8_t *palette_data,
unsigned int width, unsigned int height,
unsigned int depth, unsigned int row_pitch,
unsigned int slice_pitch,
size_t *converted_size);
hwaddr pgraph_get_texture_phys_addr(PGRAPHState *pg, int texture_idx);
hwaddr pgraph_get_texture_palette_phys_addr_length(PGRAPHState *pg, int texture_idx, size_t *length);
TextureShape pgraph_get_texture_shape(PGRAPHState *pg, int texture_idx);
size_t pgraph_get_texture_length(PGRAPHState *pg, TextureShape *shape);
#endif

View File

@ -1,7 +1,7 @@
/*
* Offscreen OpenGL abstraction layer -- SDL based
*
* Copyright (c) 2018-2021 Matt Borgerson
* Copyright (c) 2018-2024 Matt Borgerson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal

View File

@ -10,3 +10,9 @@ libnv2a_vsh_cpu = static_library('nv2a_vsh_cpu',
include_directories: ['.', 'nv2a_vsh_cpu/src'])
nv2a_vsh_cpu = declare_dependency(link_with: libnv2a_vsh_cpu,
include_directories: ['nv2a_vsh_cpu/src'])
libgloffscreen = static_library('libgloffscreen',
sources: files('gloffscreen/common.c', 'gloffscreen/sdl.c'),
dependencies: sdl)
gloffscreen = declare_dependency(link_with: libgloffscreen,
include_directories: ['gloffscreen'])

View File

@ -0,0 +1,86 @@
/*
* QEMU Geforce NV2A implementation
*
* Copyright (c) 2012 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2018-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_XBOX_NV2A_PGRAPH_UTIL_H
#define HW_XBOX_NV2A_PGRAPH_UTIL_H
static const float f16_max = 511.9375f;
static const float f24_max = 1.0E30;
/* 16 bit to [0.0, F16_MAX = 511.9375] */
static inline
float convert_f16_to_float(uint16_t f16) {
if (f16 == 0x0000) { return 0.0; }
uint32_t i = (f16 << 11) + 0x3C000000;
return *(float*)&i;
}
/* 24 bit to [0.0, F24_MAX] */
static inline
float convert_f24_to_float(uint32_t f24) {
assert(!(f24 >> 24));
f24 &= 0xFFFFFF;
if (f24 == 0x000000) { return 0.0; }
uint32_t i = f24 << 7;
return *(float*)&i;
}
static inline
uint8_t cliptobyte(int x)
{
return (uint8_t)((x < 0) ? 0 : ((x > 255) ? 255 : x));
}
static inline
void convert_yuy2_to_rgb(const uint8_t *line, unsigned int ix,
uint8_t *r, uint8_t *g, uint8_t* b) {
int c, d, e;
c = (int)line[ix * 2] - 16;
if (ix % 2) {
d = (int)line[ix * 2 - 1] - 128;
e = (int)line[ix * 2 + 1] - 128;
} else {
d = (int)line[ix * 2 + 1] - 128;
e = (int)line[ix * 2 + 3] - 128;
}
*r = cliptobyte((298 * c + 409 * e + 128) >> 8);
*g = cliptobyte((298 * c - 100 * d - 208 * e + 128) >> 8);
*b = cliptobyte((298 * c + 516 * d + 128) >> 8);
}
static inline
void convert_uyvy_to_rgb(const uint8_t *line, unsigned int ix,
uint8_t *r, uint8_t *g, uint8_t* b) {
int c, d, e;
c = (int)line[ix * 2 + 1] - 16;
if (ix % 2) {
d = (int)line[ix * 2 - 2] - 128;
e = (int)line[ix * 2 + 0] - 128;
} else {
d = (int)line[ix * 2 + 0] - 128;
e = (int)line[ix * 2 + 2] - 128;
}
*r = cliptobyte((298 * c + 409 * e + 128) >> 8);
*g = cliptobyte((298 * c - 100 * d - 208 * e + 128) >> 8);
*b = cliptobyte((298 * c + 516 * d + 128) >> 8);
}
#endif

View File

@ -0,0 +1,131 @@
/*
* QEMU Geforce NV2A implementation
*
* Copyright (c) 2012 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2018-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "hw/xbox/nv2a/nv2a_int.h"
void pgraph_update_inline_value(VertexAttribute *attr, const uint8_t *data)
{
assert(attr->count <= 4);
attr->inline_value[0] = 0.0f;
attr->inline_value[1] = 0.0f;
attr->inline_value[2] = 0.0f;
attr->inline_value[3] = 1.0f;
switch (attr->format) {
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D:
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL:
for (uint32_t i = 0; i < attr->count; ++i) {
attr->inline_value[i] = (float)data[i] / 255.0f;
}
break;
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1: {
const int16_t *val = (const int16_t *) data;
for (uint32_t i = 0; i < attr->count; ++i, ++val) {
attr->inline_value[i] = MAX(-1.0f, (float) *val / 32767.0f);
}
break;
}
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F:
memcpy(attr->inline_value, data, attr->size * attr->count);
break;
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K: {
const int16_t *val = (const int16_t *) data;
for (uint32_t i = 0; i < attr->count; ++i, ++val) {
attr->inline_value[i] = (float)*val;
}
break;
}
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP: {
/* 3 signed, normalized components packed in 32-bits. (11,11,10) */
const int32_t val = *(const int32_t *)data;
int32_t x = val & 0x7FF;
if (x & 0x400) {
x |= 0xFFFFF800;
}
int32_t y = (val >> 11) & 0x7FF;
if (y & 0x400) {
y |= 0xFFFFF800;
}
int32_t z = (val >> 22) & 0x7FF;
if (z & 0x200) {
z |= 0xFFFFFC00;
}
attr->inline_value[0] = MAX(-1.0f, (float)x / 1023.0f);
attr->inline_value[1] = MAX(-1.0f, (float)y / 1023.0f);
attr->inline_value[2] = MAX(-1.0f, (float)z / 511.0f);
break;
}
default:
fprintf(stderr, "Unknown vertex attribute type: for format 0x%x\n",
attr->format);
assert(!"Unsupported attribute type");
break;
}
}
void pgraph_allocate_inline_buffer_vertices(PGRAPHState *pg, unsigned int attr)
{
VertexAttribute *attribute = &pg->vertex_attributes[attr];
if (attribute->inline_buffer_populated || pg->inline_buffer_length == 0) {
return;
}
/* Now upload the previous attribute value */
attribute->inline_buffer_populated = true;
for (int i = 0; i < pg->inline_buffer_length; i++) {
memcpy(&attribute->inline_buffer[i * 4], attribute->inline_value,
sizeof(float) * 4);
}
}
void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg)
{
pgraph_check_within_begin_end_block(pg);
assert(pg->inline_buffer_length < NV2A_MAX_BATCH_LENGTH);
for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
VertexAttribute *attribute = &pg->vertex_attributes[i];
if (attribute->inline_buffer_populated) {
memcpy(&attribute->inline_buffer[pg->inline_buffer_length * 4],
attribute->inline_value, sizeof(float) * 4);
}
}
pg->inline_buffer_length++;
}
void pgraph_reset_inline_buffers(PGRAPHState *pg)
{
pg->inline_elements_length = 0;
pg->inline_array_length = 0;
pg->inline_buffer_length = 0;
pgraph_reset_draw_arrays(pg);
}
void pgraph_reset_draw_arrays(PGRAPHState *pg)
{
pg->draw_arrays_length = 0;
pg->draw_arrays_min_start = -1;
pg->draw_arrays_max_count = 0;
pg->draw_arrays_prevent_connect = false;
}

View File

@ -0,0 +1,177 @@
/*
* Geforce NV2A PGRAPH Vulkan Renderer
*
* Copyright (c) 2024 Matt Borgerson
*
* Based on GL implementation:
*
* Copyright (c) 2012 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2018-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "hw/xbox/nv2a/nv2a_int.h"
#include "renderer.h"
void pgraph_vk_image_blit(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
ContextSurfaces2DState *context_surfaces = &pg->context_surfaces_2d;
ImageBlitState *image_blit = &pg->image_blit;
BetaState *beta = &pg->beta;
pgraph_vk_surface_update(d, false, true, true);
assert(context_surfaces->object_instance == image_blit->context_surfaces);
unsigned int bytes_per_pixel;
switch (context_surfaces->color_format) {
case NV062_SET_COLOR_FORMAT_LE_Y8:
bytes_per_pixel = 1;
break;
case NV062_SET_COLOR_FORMAT_LE_R5G6B5:
bytes_per_pixel = 2;
break;
case NV062_SET_COLOR_FORMAT_LE_A8R8G8B8:
case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8:
case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8:
case NV062_SET_COLOR_FORMAT_LE_Y32:
bytes_per_pixel = 4;
break;
default:
fprintf(stderr, "Unknown blit surface format: 0x%x\n",
context_surfaces->color_format);
assert(false);
break;
}
hwaddr source_dma_len, dest_dma_len;
uint8_t *source = (uint8_t *)nv_dma_map(
d, context_surfaces->dma_image_source, &source_dma_len);
assert(context_surfaces->source_offset < source_dma_len);
source += context_surfaces->source_offset;
uint8_t *dest = (uint8_t *)nv_dma_map(d, context_surfaces->dma_image_dest,
&dest_dma_len);
assert(context_surfaces->dest_offset < dest_dma_len);
dest += context_surfaces->dest_offset;
hwaddr source_addr = source - d->vram_ptr;
hwaddr dest_addr = dest - d->vram_ptr;
SurfaceBinding *surf_src = pgraph_vk_surface_get(d, source_addr);
if (surf_src) {
pgraph_vk_surface_download_if_dirty(d, surf_src);
}
SurfaceBinding *surf_dest = pgraph_vk_surface_get(d, dest_addr);
if (surf_dest) {
if (image_blit->height < surf_dest->height ||
image_blit->width < surf_dest->width) {
pgraph_vk_surface_download_if_dirty(d, surf_dest);
} else {
// The blit will completely replace the surface so any pending
// download should be discarded.
surf_dest->download_pending = false;
surf_dest->draw_dirty = false;
}
surf_dest->upload_pending = true;
pg->draw_time++;
}
hwaddr source_offset = image_blit->in_y * context_surfaces->source_pitch +
image_blit->in_x * bytes_per_pixel;
hwaddr dest_offset = image_blit->out_y * context_surfaces->dest_pitch +
image_blit->out_x * bytes_per_pixel;
hwaddr source_size =
(image_blit->height - 1) * context_surfaces->source_pitch +
image_blit->width * bytes_per_pixel;
hwaddr dest_size = (image_blit->height - 1) * context_surfaces->dest_pitch +
image_blit->width * bytes_per_pixel;
/* FIXME: What does hardware do in this case? */
assert(source_addr + source_offset + source_size <=
memory_region_size(d->vram));
assert(dest_addr + dest_offset + dest_size <= memory_region_size(d->vram));
uint8_t *source_row = source + source_offset;
uint8_t *dest_row = dest + dest_offset;
if (image_blit->operation == NV09F_SET_OPERATION_SRCCOPY) {
// NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_SRCCOPY");
for (unsigned int y = 0; y < image_blit->height; y++) {
memmove(dest_row, source_row, image_blit->width * bytes_per_pixel);
source_row += context_surfaces->source_pitch;
dest_row += context_surfaces->dest_pitch;
}
} else if (image_blit->operation == NV09F_SET_OPERATION_BLEND_AND) {
// NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_BLEND_AND");
uint32_t max_beta_mult = 0x7f80;
uint32_t beta_mult = beta->beta >> 16;
uint32_t inv_beta_mult = max_beta_mult - beta_mult;
for (unsigned int y = 0; y < image_blit->height; y++) {
for (unsigned int x = 0; x < image_blit->width; x++) {
for (unsigned int ch = 0; ch < 3; ch++) {
uint32_t a = source_row[x * 4 + ch] * beta_mult;
uint32_t b = dest_row[x * 4 + ch] * inv_beta_mult;
dest_row[x * 4 + ch] = (a + b) / max_beta_mult;
}
}
source_row += context_surfaces->source_pitch;
dest_row += context_surfaces->dest_pitch;
}
} else {
fprintf(stderr, "Unknown blit operation: 0x%x\n",
image_blit->operation);
assert(false && "Unknown blit operation");
}
NV2A_DPRINTF(" - 0x%tx -> 0x%tx\n", source_addr, dest_addr);
bool needs_alpha_patching;
uint8_t alpha_override;
switch (context_surfaces->color_format) {
case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8:
needs_alpha_patching = true;
alpha_override = 0xff;
break;
case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8:
needs_alpha_patching = true;
alpha_override = 0;
break;
default:
needs_alpha_patching = false;
alpha_override = 0;
}
if (needs_alpha_patching) {
dest_row = dest + dest_offset;
for (unsigned int y = 0; y < image_blit->height; y++) {
for (unsigned int x = 0; x < image_blit->width; x++) {
dest_row[x * 4 + 3] = alpha_override;
}
dest_row += context_surfaces->dest_pitch;
}
}
dest_addr += dest_offset;
memory_region_set_client_dirty(d->vram, dest_addr, dest_size,
DIRTY_MEMORY_VGA);
memory_region_set_client_dirty(d->vram, dest_addr, dest_size,
DIRTY_MEMORY_NV2A_TEX);
}

View File

@ -0,0 +1,206 @@
/*
* Geforce NV2A PGRAPH Vulkan Renderer
*
* Copyright (c) 2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "renderer.h"
#include <vulkan/vulkan_core.h>
static void create_buffer(PGRAPHState *pg, StorageBuffer *buffer)
{
PGRAPHVkState *r = pg->vk_renderer_state;
VkBufferCreateInfo buffer_create_info = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.size = buffer->buffer_size,
.usage = buffer->usage,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
};
VK_CHECK(vmaCreateBuffer(r->allocator, &buffer_create_info,
&buffer->alloc_info, &buffer->buffer,
&buffer->allocation, NULL));
}
static void destroy_buffer(PGRAPHState *pg, StorageBuffer *buffer)
{
PGRAPHVkState *r = pg->vk_renderer_state;
vmaDestroyBuffer(r->allocator, buffer->buffer, buffer->allocation);
buffer->buffer = VK_NULL_HANDLE;
buffer->allocation = VK_NULL_HANDLE;
}
void pgraph_vk_init_buffers(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHVkState *r = pg->vk_renderer_state;
// FIXME: Profile buffer sizes
VmaAllocationCreateInfo host_alloc_create_info = {
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST,
.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT
};
VmaAllocationCreateInfo device_alloc_create_info = {
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
};
r->storage_buffers[BUFFER_STAGING_DST] = (StorageBuffer){
.alloc_info = host_alloc_create_info,
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT,
.buffer_size = 4096 * 4096 * 4,
};
r->storage_buffers[BUFFER_STAGING_SRC] = (StorageBuffer){
.alloc_info = host_alloc_create_info,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
.buffer_size = r->storage_buffers[BUFFER_STAGING_DST].buffer_size,
};
r->storage_buffers[BUFFER_COMPUTE_DST] = (StorageBuffer){
.alloc_info = device_alloc_create_info,
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
.buffer_size = (1024 * 10) * (1024 * 10) * 8,
};
r->storage_buffers[BUFFER_COMPUTE_SRC] = (StorageBuffer){
.alloc_info = device_alloc_create_info,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
.buffer_size = r->storage_buffers[BUFFER_COMPUTE_DST].buffer_size,
};
r->storage_buffers[BUFFER_INDEX] = (StorageBuffer){
.alloc_info = device_alloc_create_info,
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
.buffer_size = sizeof(pg->inline_elements) * 100,
};
r->storage_buffers[BUFFER_INDEX_STAGING] = (StorageBuffer){
.alloc_info = host_alloc_create_info,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
.buffer_size = r->storage_buffers[BUFFER_INDEX].buffer_size,
};
// FIXME: Don't assume that we can render with host mapped buffer
r->storage_buffers[BUFFER_VERTEX_RAM] = (StorageBuffer){
.alloc_info = host_alloc_create_info,
.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
.buffer_size = memory_region_size(d->vram),
};
r->bitmap_size = memory_region_size(d->vram) / 4096;
r->uploaded_bitmap = bitmap_new(r->bitmap_size);
bitmap_clear(r->uploaded_bitmap, 0, r->bitmap_size);
r->storage_buffers[BUFFER_VERTEX_INLINE] = (StorageBuffer){
.alloc_info = device_alloc_create_info,
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
.buffer_size = NV2A_VERTEXSHADER_ATTRIBUTES * NV2A_MAX_BATCH_LENGTH *
4 * sizeof(float) * 10,
};
r->storage_buffers[BUFFER_VERTEX_INLINE_STAGING] = (StorageBuffer){
.alloc_info = host_alloc_create_info,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
.buffer_size = r->storage_buffers[BUFFER_VERTEX_INLINE].buffer_size,
};
r->storage_buffers[BUFFER_UNIFORM] = (StorageBuffer){
.alloc_info = device_alloc_create_info,
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
.buffer_size = 8 * 1024 * 1024,
};
r->storage_buffers[BUFFER_UNIFORM_STAGING] = (StorageBuffer){
.alloc_info = host_alloc_create_info,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
.buffer_size = r->storage_buffers[BUFFER_UNIFORM].buffer_size,
};
for (int i = 0; i < BUFFER_COUNT; i++) {
create_buffer(pg, &r->storage_buffers[i]);
}
// FIXME: Add fallback path for device using host mapped memory
int buffers_to_map[] = { BUFFER_VERTEX_RAM,
BUFFER_INDEX_STAGING,
BUFFER_VERTEX_INLINE_STAGING,
BUFFER_UNIFORM_STAGING };
for (int i = 0; i < ARRAY_SIZE(buffers_to_map); i++) {
VK_CHECK(vmaMapMemory(
r->allocator, r->storage_buffers[buffers_to_map[i]].allocation,
(void **)&r->storage_buffers[buffers_to_map[i]].mapped));
}
}
void pgraph_vk_finalize_buffers(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHVkState *r = pg->vk_renderer_state;
for (int i = 0; i < BUFFER_COUNT; i++) {
if (r->storage_buffers[i].mapped) {
vmaUnmapMemory(r->allocator, r->storage_buffers[i].allocation);
}
destroy_buffer(pg, &r->storage_buffers[i]);
}
g_free(r->uploaded_bitmap);
r->uploaded_bitmap = NULL;
}
bool pgraph_vk_buffer_has_space_for(PGRAPHState *pg, int index,
VkDeviceSize size,
VkDeviceAddress alignment)
{
PGRAPHVkState *r = pg->vk_renderer_state;
StorageBuffer *b = &r->storage_buffers[index];
return (ROUND_UP(b->buffer_offset, alignment) + size) <= b->buffer_size;
}
VkDeviceSize pgraph_vk_append_to_buffer(PGRAPHState *pg, int index, void **data,
VkDeviceSize *sizes, size_t count,
VkDeviceAddress alignment)
{
PGRAPHVkState *r = pg->vk_renderer_state;
VkDeviceSize total_size = 0;
for (int i = 0; i < count; i++) {
total_size += sizes[i];
}
assert(pgraph_vk_buffer_has_space_for(pg, index, total_size, alignment));
StorageBuffer *b = &r->storage_buffers[index];
VkDeviceSize starting_offset = ROUND_UP(b->buffer_offset, alignment);
assert(b->mapped);
for (int i = 0; i < count; i++) {
b->buffer_offset = ROUND_UP(b->buffer_offset, alignment);
memcpy(b->mapped + b->buffer_offset, data[i], sizes[i]);
b->buffer_offset += sizes[i];
}
return starting_offset;
}

View File

@ -0,0 +1,119 @@
/*
* Geforce NV2A PGRAPH Vulkan Renderer
*
* Copyright (c) 2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "renderer.h"
static void create_command_pool(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
QueueFamilyIndices indices =
pgraph_vk_find_queue_families(r->physical_device);
VkCommandPoolCreateInfo create_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
.queueFamilyIndex = indices.queue_family,
};
VK_CHECK(
vkCreateCommandPool(r->device, &create_info, NULL, &r->command_pool));
}
static void destroy_command_pool(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
vkDestroyCommandPool(r->device, r->command_pool, NULL);
}
static void create_command_buffers(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
VkCommandBufferAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
.commandPool = r->command_pool,
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
.commandBufferCount = ARRAY_SIZE(r->command_buffers),
};
VK_CHECK(
vkAllocateCommandBuffers(r->device, &alloc_info, r->command_buffers));
r->command_buffer = r->command_buffers[0];
r->aux_command_buffer = r->command_buffers[1];
}
static void destroy_command_buffers(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
vkFreeCommandBuffers(r->device, r->command_pool,
ARRAY_SIZE(r->command_buffers), r->command_buffers);
r->command_buffer = VK_NULL_HANDLE;
r->aux_command_buffer = VK_NULL_HANDLE;
}
VkCommandBuffer pgraph_vk_begin_single_time_commands(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
assert(!r->in_aux_command_buffer);
r->in_aux_command_buffer = true;
VkCommandBufferBeginInfo begin_info = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
};
VK_CHECK(vkBeginCommandBuffer(r->aux_command_buffer, &begin_info));
return r->aux_command_buffer;
}
void pgraph_vk_end_single_time_commands(PGRAPHState *pg, VkCommandBuffer cmd)
{
PGRAPHVkState *r = pg->vk_renderer_state;
assert(r->in_aux_command_buffer);
VK_CHECK(vkEndCommandBuffer(cmd));
VkSubmitInfo submit_info = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.commandBufferCount = 1,
.pCommandBuffers = &cmd,
};
VK_CHECK(vkQueueSubmit(r->queue, 1, &submit_info, VK_NULL_HANDLE));
nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_AUX);
VK_CHECK(vkQueueWaitIdle(r->queue));
r->in_aux_command_buffer = false;
}
void pgraph_vk_init_command_buffers(PGRAPHState *pg)
{
create_command_pool(pg);
create_command_buffers(pg);
}
void pgraph_vk_finalize_command_buffers(PGRAPHState *pg)
{
destroy_command_buffers(pg);
destroy_command_pool(pg);
}

View File

@ -0,0 +1,418 @@
/*
* Geforce NV2A PGRAPH Vulkan Renderer
*
* Copyright (c) 2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_XBOX_NV2A_PGRAPH_VK_CONSTANTS_H
#define HW_XBOX_NV2A_PGRAPH_VK_CONSTANTS_H
#include "hw/xbox/nv2a/nv2a_regs.h"
#include "hw/xbox/nv2a/pgraph/shaders.h"
#include <vulkan/vulkan.h>
static const VkFilter pgraph_texture_min_filter_vk_map[] = {
0,
VK_FILTER_NEAREST,
VK_FILTER_LINEAR,
VK_FILTER_NEAREST,
VK_FILTER_LINEAR,
VK_FILTER_NEAREST,
VK_FILTER_LINEAR,
VK_FILTER_LINEAR,
};
static const VkFilter pgraph_texture_mag_filter_vk_map[] = {
0,
VK_FILTER_NEAREST,
VK_FILTER_LINEAR,
0,
VK_FILTER_LINEAR /* TODO: Convolution filter... */
};
static const VkSamplerAddressMode pgraph_texture_addr_vk_map[] = {
0,
VK_SAMPLER_ADDRESS_MODE_REPEAT,
VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, /* Approximate GL_CLAMP */
};
static const VkBlendFactor pgraph_blend_factor_vk_map[] = {
VK_BLEND_FACTOR_ZERO,
VK_BLEND_FACTOR_ONE,
VK_BLEND_FACTOR_SRC_COLOR,
VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR,
VK_BLEND_FACTOR_SRC_ALPHA,
VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
VK_BLEND_FACTOR_DST_ALPHA,
VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA,
VK_BLEND_FACTOR_DST_COLOR,
VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR,
VK_BLEND_FACTOR_SRC_ALPHA_SATURATE,
0,
VK_BLEND_FACTOR_CONSTANT_COLOR,
VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR,
VK_BLEND_FACTOR_CONSTANT_ALPHA,
VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA,
};
static const VkBlendOp pgraph_blend_equation_vk_map[] = {
VK_BLEND_OP_SUBTRACT,
VK_BLEND_OP_REVERSE_SUBTRACT,
VK_BLEND_OP_ADD,
VK_BLEND_OP_MIN,
VK_BLEND_OP_MAX,
VK_BLEND_OP_REVERSE_SUBTRACT,
VK_BLEND_OP_ADD,
};
/* FIXME
static const GLenum pgraph_blend_logicop_map[] = {
GL_CLEAR,
GL_AND,
GL_AND_REVERSE,
GL_COPY,
GL_AND_INVERTED,
GL_NOOP,
GL_XOR,
GL_OR,
GL_NOR,
GL_EQUIV,
GL_INVERT,
GL_OR_REVERSE,
GL_COPY_INVERTED,
GL_OR_INVERTED,
GL_NAND,
GL_SET,
};
*/
static const VkCullModeFlags pgraph_cull_face_vk_map[] = {
0,
VK_CULL_MODE_FRONT_BIT,
VK_CULL_MODE_BACK_BIT,
VK_CULL_MODE_FRONT_AND_BACK,
};
static const VkCompareOp pgraph_depth_func_vk_map[] = {
VK_COMPARE_OP_NEVER,
VK_COMPARE_OP_LESS,
VK_COMPARE_OP_EQUAL,
VK_COMPARE_OP_LESS_OR_EQUAL,
VK_COMPARE_OP_GREATER,
VK_COMPARE_OP_NOT_EQUAL,
VK_COMPARE_OP_GREATER_OR_EQUAL,
VK_COMPARE_OP_ALWAYS,
};
static const VkCompareOp pgraph_stencil_func_vk_map[] = {
VK_COMPARE_OP_NEVER,
VK_COMPARE_OP_LESS,
VK_COMPARE_OP_EQUAL,
VK_COMPARE_OP_LESS_OR_EQUAL,
VK_COMPARE_OP_GREATER,
VK_COMPARE_OP_NOT_EQUAL,
VK_COMPARE_OP_GREATER_OR_EQUAL,
VK_COMPARE_OP_ALWAYS,
};
static const VkStencilOp pgraph_stencil_op_vk_map[] = {
0,
VK_STENCIL_OP_KEEP,
VK_STENCIL_OP_ZERO,
VK_STENCIL_OP_REPLACE,
VK_STENCIL_OP_INCREMENT_AND_CLAMP,
VK_STENCIL_OP_DECREMENT_AND_CLAMP,
VK_STENCIL_OP_INVERT,
VK_STENCIL_OP_INCREMENT_AND_WRAP,
VK_STENCIL_OP_DECREMENT_AND_WRAP,
};
static const VkPolygonMode pgraph_polygon_mode_vk_map[] = {
[POLY_MODE_FILL] = VK_POLYGON_MODE_FILL,
[POLY_MODE_POINT] = VK_POLYGON_MODE_POINT,
[POLY_MODE_LINE] = VK_POLYGON_MODE_LINE,
};
typedef struct VkColorFormatInfo {
VkFormat vk_format;
VkComponentMapping component_map;
} VkColorFormatInfo;
static const VkColorFormatInfo kelvin_color_format_vk_map[66] = {
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8] = {
VK_FORMAT_R8_UNORM,
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE },
},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8] = {
VK_FORMAT_R8_UNORM,
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }
},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5] = {
VK_FORMAT_A1R5G5B5_UNORM_PACK16,
},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5] = {
VK_FORMAT_A1R5G5B5_UNORM_PACK16,
{ VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_ONE },
},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4] = {
VK_FORMAT_A4R4G4B4_UNORM_PACK16,
},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5] = {
VK_FORMAT_R5G6B5_UNORM_PACK16,
},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8] = {
VK_FORMAT_B8G8R8A8_UNORM,
},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8] = {
VK_FORMAT_B8G8R8A8_UNORM,
{ VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_ONE },
},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8] = {
VK_FORMAT_B8G8R8A8_UNORM, // Converted
},
[NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5] = {
VK_FORMAT_R8G8B8A8_UNORM, // Converted
},
[NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8] = {
VK_FORMAT_R8G8B8A8_UNORM, // Converted
},
[NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8] = {
VK_FORMAT_R8G8B8A8_UNORM, // Converted
},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5] = {
VK_FORMAT_A1R5G5B5_UNORM_PACK16,
},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5] = {
VK_FORMAT_R5G6B5_UNORM_PACK16,
},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8] = {
VK_FORMAT_B8G8R8A8_UNORM,
},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8] = {
VK_FORMAT_R8_UNORM,
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE, }
},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8] = {
VK_FORMAT_R8G8_UNORM,
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, }
},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8] = {
VK_FORMAT_R8_UNORM,
{ VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R },
},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8] = {
VK_FORMAT_R8G8_UNORM,
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }
},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8] = {
VK_FORMAT_R8_UNORM,
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }
},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5] = {
VK_FORMAT_A1R5G5B5_UNORM_PACK16,
{ VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_ONE },
},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4] = {
VK_FORMAT_A4R4G4B4_UNORM_PACK16,
},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8] = {
VK_FORMAT_B8G8R8A8_UNORM,
{ VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_ONE },
},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8] = {
VK_FORMAT_R8_UNORM,
{ VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R }
},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8Y8] = {
VK_FORMAT_R8G8_UNORM,
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }
},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5] = {
VK_FORMAT_R8G8B8_SNORM, // Converted
},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8] = {
VK_FORMAT_R8G8_UNORM,
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }
},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8] = {
VK_FORMAT_R8G8_UNORM,
{ VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }
},
[NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8] = {
VK_FORMAT_R8G8B8A8_UNORM, // Converted
},
[NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8] = {
VK_FORMAT_R8G8B8A8_UNORM, // Converted
},
/* Additional information is passed to the pixel shader via the swizzle:
* RED: The depth value.
* GREEN: 0 for 16-bit, 1 for 24 bit
* BLUE: 0 for fixed, 1 for float
*/
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_DEPTH_Y16_FIXED] = {
VK_FORMAT_R16_UNORM, // FIXME
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO },
},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED] = {
// FIXME
// {GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}},
VK_FORMAT_R32_UINT,
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO },
},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT] = {
// FIXME
// {GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}},
VK_FORMAT_R32_UINT,
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO },
},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FIXED] = {
VK_FORMAT_R16_UNORM, // FIXME
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO },
},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FLOAT] = {
VK_FORMAT_R16_SFLOAT,
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ZERO },
},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16] = {
VK_FORMAT_R16_UNORM,
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE }
},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8] = {
VK_FORMAT_R8G8B8A8_UNORM,
},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8] = {
VK_FORMAT_R8G8B8A8_UNORM,
{ VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_R }
},
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8] = {
VK_FORMAT_R8G8B8A8_UNORM,
{ VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R }
},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8] = {
VK_FORMAT_R8G8B8A8_UNORM,
},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8] = {
VK_FORMAT_R8G8B8A8_UNORM,
{ VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_R }
},
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8] = {
VK_FORMAT_R8G8B8A8_UNORM,
{ VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R }
},
};
typedef struct BasicSurfaceFormatInfo {
unsigned int bytes_per_pixel;
} BasicSurfaceFormatInfo;
typedef struct SurfaceFormatInfo {
unsigned int host_bytes_per_pixel;
VkFormat vk_format;
VkImageUsageFlags usage;
VkImageAspectFlags aspect;
} SurfaceFormatInfo;
static const BasicSurfaceFormatInfo kelvin_surface_color_format_map[] = {
[NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5] = { 2 },
[NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5] = { 2 },
[NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8] = { 4 },
[NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8] = { 4 },
[NV097_SET_SURFACE_FORMAT_COLOR_LE_B8] = { 1 },
[NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8] = { 2 },
};
static const SurfaceFormatInfo kelvin_surface_color_format_vk_map[] = {
[NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5] =
{
// FIXME: Force alpha to zero
2,
VK_FORMAT_A1R5G5B5_UNORM_PACK16,
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
VK_IMAGE_ASPECT_COLOR_BIT,
},
[NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5] =
{
2,
VK_FORMAT_R5G6B5_UNORM_PACK16,
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
VK_IMAGE_ASPECT_COLOR_BIT,
},
[NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8] =
{
// FIXME: Force alpha to zero
4,
VK_FORMAT_B8G8R8A8_UNORM,
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
VK_IMAGE_ASPECT_COLOR_BIT,
},
[NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8] =
{
4,
VK_FORMAT_B8G8R8A8_UNORM,
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
VK_IMAGE_ASPECT_COLOR_BIT,
},
[NV097_SET_SURFACE_FORMAT_COLOR_LE_B8] =
{
// FIXME: Map channel color
1,
VK_FORMAT_R8_UNORM,
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
VK_IMAGE_ASPECT_COLOR_BIT,
},
[NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8] =
{
// FIXME: Map channel color
2,
VK_FORMAT_R8G8_UNORM,
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
VK_IMAGE_ASPECT_COLOR_BIT,
},
};
static const BasicSurfaceFormatInfo kelvin_surface_zeta_format_map[] = {
[NV097_SET_SURFACE_FORMAT_ZETA_Z16] = { 2 },
[NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] = { 4 },
};
// FIXME: Actually support stored float format
static const SurfaceFormatInfo zeta_d16 = {
2,
VK_FORMAT_D16_UNORM,
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
VK_IMAGE_ASPECT_DEPTH_BIT,
};
static const SurfaceFormatInfo zeta_d32_sfloat_s8_uint = {
8,
VK_FORMAT_D32_SFLOAT_S8_UINT,
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT,
};
static const SurfaceFormatInfo zeta_d24_unorm_s8_uint = {
4,
VK_FORMAT_D24_UNORM_S8_UINT,
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT,
};
#endif

View File

@ -0,0 +1,59 @@
/*
* Geforce NV2A PGRAPH Vulkan Renderer
*
* Copyright (c) 2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "renderer.h"
#include "debug.h"
#ifndef _WIN32
#include <dlfcn.h>
#endif
#ifdef CONFIG_RENDERDOC
#pragma GCC diagnostic ignored "-Wstrict-prototypes"
#include "thirdparty/renderdoc_app.h"
#endif
int nv2a_vk_dgroup_indent = 0;
void pgraph_vk_debug_init(void)
{
#ifdef CONFIG_RENDERDOC
nv2a_dbg_renderdoc_init();
#endif
}
void pgraph_vk_debug_frame_terminator(void)
{
#ifdef CONFIG_RENDERDOC
if (nv2a_dbg_renderdoc_available()) {
RENDERDOC_API_1_6_0 *rdoc_api = nv2a_dbg_renderdoc_get_api();
PGRAPHVkState *r = g_nv2a->pgraph.vk_renderer_state;
if (rdoc_api->IsTargetControlConnected()) {
if (rdoc_api->IsFrameCapturing()) {
rdoc_api->EndFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(r->instance), 0);
}
if (renderdoc_capture_frames > 0) {
rdoc_api->StartFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(r->instance), 0);
--renderdoc_capture_frames;
}
}
}
#endif
}

View File

@ -0,0 +1,61 @@
/*
* Geforce NV2A PGRAPH Vulkan Renderer
*
* Copyright (c) 2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_XBOX_NV2A_PGRAPH_VK_DEBUG_H
#define HW_XBOX_NV2A_PGRAPH_VK_DEBUG_H
#define DEBUG_VK 0
extern int nv2a_vk_dgroup_indent;
#define NV2A_VK_XDPRINTF(x, fmt, ...) \
do { \
if (x) { \
for (int i = 0; i < nv2a_vk_dgroup_indent; i++) \
fprintf(stderr, " "); \
fprintf(stderr, fmt "\n", ##__VA_ARGS__); \
} \
} while (0)
#define NV2A_VK_DPRINTF(fmt, ...) NV2A_VK_XDPRINTF(DEBUG_VK, fmt, ##__VA_ARGS__)
#define NV2A_VK_DGROUP_BEGIN(fmt, ...) \
do { \
NV2A_VK_XDPRINTF(DEBUG_VK, fmt, ##__VA_ARGS__); \
nv2a_vk_dgroup_indent++; \
} while (0)
#define NV2A_VK_DGROUP_END(...) \
do { \
nv2a_vk_dgroup_indent--; \
assert(nv2a_vk_dgroup_indent >= 0); \
} while (0)
#define VK_CHECK(x) \
do { \
VkResult vk_result = (x); \
if (vk_result != VK_SUCCESS) { \
fprintf(stderr, "vk_result = %d\n", vk_result); \
} \
assert(vk_result == VK_SUCCESS && "vk check failed"); \
} while (0)
void pgraph_vk_debug_frame_terminator(void);
#endif

View File

@ -0,0 +1,896 @@
/*
* Geforce NV2A PGRAPH Vulkan Renderer
*
* Copyright (c) 2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "renderer.h"
static const char *display_frag_glsl =
"#version 450\n"
"layout(binding = 0) uniform sampler2D tex;\n"
"layout(binding = 1) uniform sampler2D pvideo_tex;\n"
"layout(push_constant, std430) uniform PushConstants {\n"
" bool pvideo_enable;\n"
" vec2 pvideo_in_pos;\n"
" vec4 pvideo_pos;\n"
" vec3 pvideo_scale;\n"
" bool pvideo_color_key_enable;\n"
" vec2 display_size;\n"
" float line_offset;\n"
" vec4 pvideo_color_key;\n"
"};\n"
"layout(location = 0) out vec4 out_Color;\n"
"void main()\n"
"{\n"
" vec2 texCoord = gl_FragCoord.xy/display_size;\n"
" texCoord.y = 1 - texCoord.y;\n" // GL compat
" float rel = display_size.y/textureSize(tex, 0).y/line_offset;\n"
" texCoord.y = 1 + rel*(texCoord.y - 1);"
" out_Color.rgba = texture(tex, texCoord);\n"
// " if (pvideo_enable) {\n"
// " vec2 screenCoord = gl_FragCoord.xy - 0.5;\n"
// " vec4 output_region = vec4(pvideo_pos.xy, pvideo_pos.xy + pvideo_pos.zw);\n"
// " bvec4 clip = bvec4(lessThan(screenCoord, output_region.xy),\n"
// " greaterThan(screenCoord, output_region.zw));\n"
// " if (!any(clip) && (!pvideo_color_key_enable || out_Color.rgba == pvideo_color_key)) {\n"
// " vec2 out_xy = (screenCoord - pvideo_pos.xy) * pvideo_scale.z;\n"
// " vec2 in_st = (pvideo_in_pos + out_xy * pvideo_scale.xy) / textureSize(pvideo_tex, 0);\n"
// " in_st.y *= -1.0;\n"
// " out_Color.rgba = texture(pvideo_tex, in_st);\n"
// " }\n"
// " }\n"
"}\n";
static void create_descriptor_pool(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
VkDescriptorPoolSize pool_sizes = {
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = 2,
};
VkDescriptorPoolCreateInfo pool_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.poolSizeCount = 1,
.pPoolSizes = &pool_sizes,
.maxSets = 1,
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
};
VK_CHECK(vkCreateDescriptorPool(r->device, &pool_info, NULL,
&r->display.descriptor_pool));
}
static void destroy_descriptor_pool(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
vkDestroyDescriptorPool(r->device, r->display.descriptor_pool, NULL);
r->display.descriptor_pool = VK_NULL_HANDLE;
}
static void create_descriptor_set_layout(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
VkDescriptorSetLayoutBinding bindings[2];
for (int i = 0; i < ARRAY_SIZE(bindings); i++) {
bindings[i] = (VkDescriptorSetLayoutBinding){
.binding = i,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
};
}
VkDescriptorSetLayoutCreateInfo layout_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.bindingCount = ARRAY_SIZE(bindings),
.pBindings = bindings,
};
VK_CHECK(vkCreateDescriptorSetLayout(r->device, &layout_info, NULL,
&r->display.descriptor_set_layout));
}
static void destroy_descriptor_set_layout(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
vkDestroyDescriptorSetLayout(r->device, r->display.descriptor_set_layout,
NULL);
r->display.descriptor_set_layout = VK_NULL_HANDLE;
}
static void create_descriptor_sets(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
VkDescriptorSetLayout layout = r->display.descriptor_set_layout;
VkDescriptorSetAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.descriptorPool = r->display.descriptor_pool,
.descriptorSetCount = 1,
.pSetLayouts = &layout,
};
VK_CHECK(vkAllocateDescriptorSets(r->device, &alloc_info,
&r->display.descriptor_set));
}
static void create_render_pass(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
VkAttachmentDescription attachment;
VkAttachmentReference color_reference;
attachment = (VkAttachmentDescription){
.format = VK_FORMAT_R8G8B8A8_UNORM,
.samples = VK_SAMPLE_COUNT_1_BIT,
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
};
color_reference = (VkAttachmentReference){
0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
};
VkSubpassDependency dependency = {
.srcSubpass = VK_SUBPASS_EXTERNAL,
};
dependency.srcStageMask |=
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
dependency.dstStageMask |=
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
dependency.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
VkSubpassDescription subpass = {
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
.colorAttachmentCount = 1,
.pColorAttachments = &color_reference,
};
VkRenderPassCreateInfo renderpass_create_info = {
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
.attachmentCount = 1,
.pAttachments = &attachment,
.subpassCount = 1,
.pSubpasses = &subpass,
.dependencyCount = 1,
.pDependencies = &dependency,
};
VK_CHECK(vkCreateRenderPass(r->device, &renderpass_create_info, NULL,
&r->display.render_pass));
}
static void destroy_render_pass(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
vkDestroyRenderPass(r->device, r->display.render_pass, NULL);
r->display.render_pass = VK_NULL_HANDLE;
}
static void create_display_pipeline(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
r->display.display_frag =
pgraph_vk_create_shader_module_from_glsl(
r, VK_SHADER_STAGE_FRAGMENT_BIT, display_frag_glsl);
VkPipelineShaderStageCreateInfo shader_stages[] = {
(VkPipelineShaderStageCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_VERTEX_BIT,
.module = r->quad_vert_module->module,
.pName = "main",
},
(VkPipelineShaderStageCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
.module = r->display.display_frag->module,
.pName = "main",
},
};
VkPipelineVertexInputStateCreateInfo vertex_input = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
};
VkPipelineInputAssemblyStateCreateInfo input_assembly = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
.primitiveRestartEnable = VK_FALSE,
};
VkPipelineViewportStateCreateInfo viewport_state = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.viewportCount = 1,
.scissorCount = 1,
};
VkPipelineRasterizationStateCreateInfo rasterizer = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.depthClampEnable = VK_FALSE,
.rasterizerDiscardEnable = VK_FALSE,
.polygonMode = VK_POLYGON_MODE_FILL,
.lineWidth = 1.0f,
.cullMode = VK_CULL_MODE_BACK_BIT,
.frontFace = VK_FRONT_FACE_CLOCKWISE,
.depthBiasEnable = VK_FALSE,
};
VkPipelineMultisampleStateCreateInfo multisampling = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.sampleShadingEnable = VK_FALSE,
.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
};
VkPipelineDepthStencilStateCreateInfo depth_stencil = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.depthTestEnable = VK_FALSE,
.depthCompareOp = VK_COMPARE_OP_ALWAYS,
.depthBoundsTestEnable = VK_FALSE,
};
VkPipelineColorBlendAttachmentState color_blend_attachment = {
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
.blendEnable = VK_FALSE,
};
VkPipelineColorBlendStateCreateInfo color_blending = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.logicOpEnable = VK_FALSE,
.logicOp = VK_LOGIC_OP_COPY,
.attachmentCount = 1,
.pAttachments = &color_blend_attachment,
};
VkDynamicState dynamic_states[] = { VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR };
VkPipelineDynamicStateCreateInfo dynamic_state = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.dynamicStateCount = 2,
.pDynamicStates = dynamic_states,
};
VkPushConstantRange push_constant_range = {
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
.offset = 0,
.size = r->display.display_frag->push_constants.total_size,
};
VkPipelineLayoutCreateInfo pipeline_layout_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = &r->display.descriptor_set_layout,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &push_constant_range,
};
VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL,
&r->display.pipeline_layout));
VkGraphicsPipelineCreateInfo pipeline_info = {
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.stageCount = ARRAY_SIZE(shader_stages),
.pStages = shader_stages,
.pVertexInputState = &vertex_input,
.pInputAssemblyState = &input_assembly,
.pViewportState = &viewport_state,
.pRasterizationState = &rasterizer,
.pMultisampleState = &multisampling,
.pDepthStencilState = r->zeta_binding ? &depth_stencil : NULL,
.pColorBlendState = &color_blending,
.pDynamicState = &dynamic_state,
.layout = r->display.pipeline_layout,
.renderPass = r->display.render_pass,
.subpass = 0,
.basePipelineHandle = VK_NULL_HANDLE,
};
VK_CHECK(vkCreateGraphicsPipelines(r->device, r->vk_pipeline_cache, 1,
&pipeline_info, NULL,
&r->display.pipeline));
}
static void destroy_display_pipeline(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
vkDestroyPipeline(r->device, r->display.pipeline, NULL);
r->display.pipeline = VK_NULL_HANDLE;
}
static void create_frame_buffer(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
VkFramebufferCreateInfo create_info = {
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
.renderPass = r->display.render_pass,
.attachmentCount = 1,
.pAttachments = &r->display.image_view,
.width = r->display.width,
.height = r->display.height,
.layers = 1,
};
VK_CHECK(vkCreateFramebuffer(r->device, &create_info, NULL,
&r->display.framebuffer));
}
static void destroy_frame_buffer(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
vkDestroyFramebuffer(r->device, r->display.framebuffer, NULL);
r->display.framebuffer = NULL;
}
static void destroy_current_display_image(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
PGRAPHVkDisplayState *d = &r->display;
if (d->image == VK_NULL_HANDLE) {
return;
}
destroy_frame_buffer(pg);
#if HAVE_EXTERNAL_MEMORY
glDeleteTextures(1, &d->gl_texture_id);
d->gl_texture_id = 0;
glDeleteMemoryObjectsEXT(1, &d->gl_memory_obj);
d->gl_memory_obj = 0;
#ifdef WIN32
CloseHandle(d->handle);
d->handle = 0;
#endif
#endif
vkDestroyImageView(r->device, d->image_view, NULL);
d->image_view = VK_NULL_HANDLE;
vkDestroyImage(r->device, d->image, NULL);
d->image = VK_NULL_HANDLE;
vkFreeMemory(r->device, d->memory, NULL);
d->memory = VK_NULL_HANDLE;
d->draw_time = 0;
}
// FIXME: We may need to use two images. One for actually rendering display,
// and another for GL in the correct tiling mode
static void create_display_image_from_surface(PGRAPHState *pg,
SurfaceBinding *surface)
{
PGRAPHVkState *r = pg->vk_renderer_state;
PGRAPHVkDisplayState *d = &r->display;
if (r->display.image != VK_NULL_HANDLE) {
destroy_current_display_image(pg);
}
const GLint gl_internal_format = GL_RGBA8;
bool use_optimal_tiling = true;
#if HAVE_EXTERNAL_MEMORY
GLint num_tiling_types;
glGetInternalformativ(GL_TEXTURE_2D, gl_internal_format,
GL_NUM_TILING_TYPES_EXT, 1, &num_tiling_types);
// XXX: Apparently on AMD GL_OPTIMAL_TILING_EXT is reported to be
// supported, but doesn't work? On nVidia, GL_LINEAR_TILING_EXT may not
// be supported so we must use optimal. Default to optimal unless
// linear is explicitly specified...
GLint tiling_types[num_tiling_types];
glGetInternalformativ(GL_TEXTURE_2D, gl_internal_format,
GL_TILING_TYPES_EXT, num_tiling_types, tiling_types);
for (int i = 0; i < num_tiling_types; i++) {
if (tiling_types[i] == GL_LINEAR_TILING_EXT) {
use_optimal_tiling = false;
break;
}
}
#endif
// Create image
VkImageCreateInfo image_create_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.imageType = VK_IMAGE_TYPE_2D,
.extent.width = surface->width,
.extent.height = surface->height,
.extent.depth = 1,
.mipLevels = 1,
.arrayLayers = 1,
.format = VK_FORMAT_R8G8B8A8_UNORM,
.tiling = use_optimal_tiling ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
.samples = VK_SAMPLE_COUNT_1_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
};
pgraph_apply_scaling_factor(pg, &image_create_info.extent.width,
&image_create_info.extent.height);
VkExternalMemoryImageCreateInfo external_memory_image_create_info = {
.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
};
image_create_info.pNext = &external_memory_image_create_info;
VK_CHECK(vkCreateImage(r->device, &image_create_info, NULL, &d->image));
// Allocate and bind image memory
VkMemoryRequirements memory_requirements;
vkGetImageMemoryRequirements(r->device, d->image, &memory_requirements);
VkMemoryAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.allocationSize = memory_requirements.size,
.memoryTypeIndex =
pgraph_vk_get_memory_type(pg, memory_requirements.memoryTypeBits,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
};
VkExportMemoryAllocateInfo export_memory_alloc_info = {
.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
.handleTypes =
#ifdef WIN32
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR
#else
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT
#endif
,
};
alloc_info.pNext = &export_memory_alloc_info;
VK_CHECK(vkAllocateMemory(r->device, &alloc_info, NULL, &d->memory));
vkBindImageMemory(r->device, d->image, d->memory, 0);
// Create Image View
VkImageViewCreateInfo image_view_create_info = {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = d->image,
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = image_create_info.format,
.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.subresourceRange.levelCount = 1,
.subresourceRange.layerCount = 1,
};
VK_CHECK(vkCreateImageView(r->device, &image_view_create_info, NULL,
&d->image_view));
#if HAVE_EXTERNAL_MEMORY
#ifdef WIN32
VkMemoryGetWin32HandleInfoKHR handle_info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
.memory = d->memory,
.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR
};
VK_CHECK(vkGetMemoryWin32HandleKHR(r->device, &handle_info, &d->handle));
glCreateMemoryObjectsEXT(1, &d->gl_memory_obj);
glImportMemoryWin32HandleEXT(d->gl_memory_obj, memory_requirements.size, GL_HANDLE_TYPE_OPAQUE_WIN32_EXT, d->handle);
assert(glGetError() == GL_NO_ERROR);
#else
VkMemoryGetFdInfoKHR fd_info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
.memory = d->memory,
.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
};
VK_CHECK(vkGetMemoryFdKHR(r->device, &fd_info, &d->fd));
glCreateMemoryObjectsEXT(1, &d->gl_memory_obj);
glImportMemoryFdEXT(d->gl_memory_obj, memory_requirements.size,
GL_HANDLE_TYPE_OPAQUE_FD_EXT, d->fd);
assert(glIsMemoryObjectEXT(d->gl_memory_obj));
assert(glGetError() == GL_NO_ERROR);
#endif // WIN32
glGenTextures(1, &d->gl_texture_id);
glBindTexture(GL_TEXTURE_2D, d->gl_texture_id);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_TILING_EXT,
use_optimal_tiling ? GL_OPTIMAL_TILING_EXT :
GL_LINEAR_TILING_EXT);
glTexStorageMem2DEXT(GL_TEXTURE_2D, 1, gl_internal_format,
image_create_info.extent.width,
image_create_info.extent.height, d->gl_memory_obj, 0);
assert(glGetError() == GL_NO_ERROR);
#endif // HAVE_EXTERNAL_MEMORY
d->width = image_create_info.extent.width;
d->height = image_create_info.extent.height;
create_frame_buffer(pg);
}
static void update_descriptor_set(PGRAPHState *pg, SurfaceBinding *surface)
{
PGRAPHVkState *r = pg->vk_renderer_state;
VkDescriptorImageInfo image_infos[2];
VkWriteDescriptorSet descriptor_writes[2];
// Display surface
image_infos[0] = (VkDescriptorImageInfo){
.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
.imageView = surface->image_view,
.sampler = r->display.sampler,
};
descriptor_writes[0] = (VkWriteDescriptorSet){
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstSet = r->display.descriptor_set,
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = 1,
.pImageInfo = &image_infos[0],
};
// FIXME: PVIDEO Overlay
image_infos[1] = (VkDescriptorImageInfo){
.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
.imageView = r->dummy_texture.image_view,
.sampler = r->dummy_texture.sampler,
};
descriptor_writes[1] = (VkWriteDescriptorSet){
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstSet = r->display.descriptor_set,
.dstBinding = 1,
.dstArrayElement = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = 1,
.pImageInfo = &image_infos[1],
};
vkUpdateDescriptorSets(r->device, ARRAY_SIZE(descriptor_writes),
descriptor_writes, 0, NULL);
}
static void update_uniforms(PGRAPHState *pg, SurfaceBinding *surface)
{
NV2AState *d = container_of(pg, NV2AState, pgraph);
PGRAPHVkState *r = pg->vk_renderer_state;
unsigned int width, height;
uint32_t pline_offset, pstart_addr, pline_compare;
d->vga.get_resolution(&d->vga, (int*)&width, (int*)&height);
d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
int line_offset = surface->pitch / pline_offset;
/* Adjust viewport height for interlaced mode, used only in 1080i */
if (d->vga.cr[NV_PRMCIO_INTERLACE_MODE] != NV_PRMCIO_INTERLACE_MODE_DISABLED) {
height *= 2;
}
pgraph_apply_scaling_factor(pg, &width, &height);
ShaderUniformLayout *l = &r->display.display_frag->push_constants;
int display_size_loc = uniform_index(l, "display_size"); // FIXME: Cache
int line_offset_loc = uniform_index(l, "line_offset");
uniform2f(l, display_size_loc, width, height);
uniform1f(l, line_offset_loc, line_offset);
#if 0 // FIXME: PVIDEO overlay
// FIXME: This check against PVIDEO_SIZE_IN does not match HW behavior.
// Many games seem to pass this value when initializing or tearing down
// PVIDEO. On its own, this generally does not result in the overlay being
// hidden, however there are certain games (e.g., Ultimate Beach Soccer)
// that use an unknown mechanism to hide the overlay without explicitly
// stopping it.
// Since the value seems to be set to 0xFFFFFFFF only in cases where the
// content is not valid, it is probably good enough to treat it as an
// implicit stop.
bool enabled = (d->pvideo.regs[NV_PVIDEO_BUFFER] & NV_PVIDEO_BUFFER_0_USE)
&& d->pvideo.regs[NV_PVIDEO_SIZE_IN] != 0xFFFFFFFF;
glUniform1ui(d->pgraph.renderer_state->disp_rndr.pvideo_enable_loc, enabled);
if (!enabled) {
return;
}
hwaddr base = d->pvideo.regs[NV_PVIDEO_BASE];
hwaddr limit = d->pvideo.regs[NV_PVIDEO_LIMIT];
hwaddr offset = d->pvideo.regs[NV_PVIDEO_OFFSET];
int in_width =
GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_WIDTH);
int in_height =
GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_HEIGHT);
int in_s = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN],
NV_PVIDEO_POINT_IN_S);
int in_t = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN],
NV_PVIDEO_POINT_IN_T);
int in_pitch =
GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_PITCH);
int in_color =
GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_COLOR);
unsigned int out_width =
GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_WIDTH);
unsigned int out_height =
GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_HEIGHT);
float scale_x = 1.0f;
float scale_y = 1.0f;
unsigned int ds_dx = d->pvideo.regs[NV_PVIDEO_DS_DX];
unsigned int dt_dy = d->pvideo.regs[NV_PVIDEO_DT_DY];
if (ds_dx != NV_PVIDEO_DIN_DOUT_UNITY) {
scale_x = pvideo_calculate_scale(ds_dx, out_width);
}
if (dt_dy != NV_PVIDEO_DIN_DOUT_UNITY) {
scale_y = pvideo_calculate_scale(dt_dy, out_height);
}
// On HW, setting NV_PVIDEO_SIZE_IN larger than NV_PVIDEO_SIZE_OUT results
// in them being capped to the output size, content is not scaled. This is
// particularly important as NV_PVIDEO_SIZE_IN may be set to 0xFFFFFFFF
// during initialization or teardown.
if (in_width > out_width) {
in_width = floorf((float)out_width * scale_x + 0.5f);
}
if (in_height > out_height) {
in_height = floorf((float)out_height * scale_y + 0.5f);
}
/* TODO: support other color formats */
assert(in_color == NV_PVIDEO_FORMAT_COLOR_LE_CR8YB8CB8YA8);
unsigned int out_x =
GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_X);
unsigned int out_y =
GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_Y);
unsigned int color_key_enabled =
GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_DISPLAY);
glUniform1ui(d->pgraph.renderer_state->disp_rndr.pvideo_color_key_enable_loc,
color_key_enabled);
// TODO: Verify that masking off the top byte is correct.
// SeaBlade sets a color key of 0x80000000 but the texture passed into the
// shader is cleared to 0 alpha.
unsigned int color_key = d->pvideo.regs[NV_PVIDEO_COLOR_KEY] & 0xFFFFFF;
glUniform4f(d->pgraph.renderer_state->disp_rndr.pvideo_color_key_loc,
GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_RED) / 255.0,
GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_GREEN) / 255.0,
GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_BLUE) / 255.0,
GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_ALPHA) / 255.0);
assert(offset + in_pitch * in_height <= limit);
hwaddr end = base + offset + in_pitch * in_height;
assert(end <= memory_region_size(d->vram));
pgraph_apply_scaling_factor(pg, &out_x, &out_y);
pgraph_apply_scaling_factor(pg, &out_width, &out_height);
// Translate for the GL viewport origin.
out_y = MAX(pg->renderer_state->gl_display_buffer_height - 1 - (int)(out_y + out_height), 0);
glActiveTexture(GL_TEXTURE0 + 1);
glBindTexture(GL_TEXTURE_2D, d->pgraph.renderer_state->disp_rndr.pvideo_tex);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
uint8_t *tex_rgba = convert_texture_data__CR8YB8CB8YA8(
d->vram_ptr + base + offset, in_width, in_height, in_pitch);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, in_width, in_height, 0, GL_RGBA,
GL_UNSIGNED_BYTE, tex_rgba);
g_free(tex_rgba);
glUniform1i(d->pgraph.renderer_state->disp_rndr.pvideo_tex_loc, 1);
glUniform2f(d->pgraph.renderer_state->disp_rndr.pvideo_in_pos_loc, in_s, in_t);
glUniform4f(d->pgraph.renderer_state->disp_rndr.pvideo_pos_loc,
out_x, out_y, out_width, out_height);
glUniform3f(d->pgraph.renderer_state->disp_rndr.pvideo_scale_loc,
scale_x, scale_y, 1.0f / pg->surface_scale_factor);
#endif
}
static void render_display(PGRAPHState *pg, SurfaceBinding *surface)
{
PGRAPHVkState *r = pg->vk_renderer_state;
PGRAPHVkDisplayState *disp = &r->display;
if (disp->draw_time >= surface->draw_time) {
return;
}
if (r->in_command_buffer &&
surface->draw_time >= r->command_buffer_start_time) {
pgraph_vk_finish(pg, VK_FINISH_REASON_PRESENTING);
}
update_uniforms(pg, surface);
update_descriptor_set(pg, surface);
VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg);
pgraph_vk_transition_image_layout(pg, cmd, surface->image,
surface->host_fmt.vk_format,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
pgraph_vk_transition_image_layout(
pg, cmd, disp->image, VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
VkRenderPassBeginInfo render_pass_begin_info = {
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.renderPass = disp->render_pass,
.framebuffer = disp->framebuffer,
.renderArea.extent.width = disp->width,
.renderArea.extent.height = disp->height,
};
vkCmdBeginRenderPass(cmd, &render_pass_begin_info,
VK_SUBPASS_CONTENTS_INLINE);
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
disp->pipeline);
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
disp->pipeline_layout, 0, 1, &disp->descriptor_set,
0, NULL);
VkViewport viewport = {
.width = disp->width,
.height = disp->height,
.minDepth = 0.0,
.maxDepth = 1.0,
};
vkCmdSetViewport(cmd, 0, 1, &viewport);
VkRect2D scissor = {
.extent.width = disp->width,
.extent.height = disp->height,
};
vkCmdSetScissor(cmd, 0, 1, &scissor);
vkCmdPushConstants(cmd, disp->pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT,
0, disp->display_frag->push_constants.total_size,
disp->display_frag->push_constants.allocation);
vkCmdDraw(cmd, 3, 1, 0, 0);
vkCmdEndRenderPass(cmd);
#if 0
VkImageCopy region = {
.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.srcSubresource.layerCount = 1,
.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.dstSubresource.layerCount = 1,
.extent.width = surface->width,
.extent.height = surface->height,
.extent.depth = 1,
};
pgraph_apply_scaling_factor(pg, &region.extent.width,
&region.extent.height);
vkCmdCopyImage(cmd, surface->image,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, disp->image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &region);
#endif
pgraph_vk_transition_image_layout(pg, cmd, surface->image,
surface->host_fmt.vk_format,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
pgraph_vk_transition_image_layout(pg, cmd, disp->image,
VK_FORMAT_R8G8B8_UNORM,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
pgraph_vk_end_single_time_commands(pg, cmd);
nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_5);
disp->draw_time = surface->draw_time;
}
static void create_surface_sampler(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
VkSamplerCreateInfo sampler_create_info = {
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
.magFilter = VK_FILTER_NEAREST,
.minFilter = VK_FILTER_NEAREST,
.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT,
.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT,
.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT,
.anisotropyEnable = VK_FALSE,
.borderColor = VK_BORDER_COLOR_INT_OPAQUE_WHITE,
.unnormalizedCoordinates = VK_FALSE,
.compareEnable = VK_FALSE,
.compareOp = VK_COMPARE_OP_ALWAYS,
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
};
VK_CHECK(vkCreateSampler(r->device, &sampler_create_info, NULL,
&r->display.sampler));
}
static void destroy_surface_sampler(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
vkDestroySampler(r->device, r->display.sampler, NULL);
r->display.sampler = VK_NULL_HANDLE;
}
void pgraph_vk_init_display(PGRAPHState *pg)
{
create_descriptor_pool(pg);
create_descriptor_set_layout(pg);
create_descriptor_sets(pg);
create_render_pass(pg);
create_display_pipeline(pg);
create_surface_sampler(pg);
}
void pgraph_vk_finalize_display(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
if (r->display.image != VK_NULL_HANDLE) {
destroy_current_display_image(pg);
}
destroy_surface_sampler(pg);
destroy_display_pipeline(pg);
destroy_render_pass(pg);
destroy_descriptor_set_layout(pg);
destroy_descriptor_pool(pg);
}
void pgraph_vk_render_display(PGRAPHState *pg)
{
NV2AState *d = container_of(pg, NV2AState, pgraph);
PGRAPHVkState *r = pg->vk_renderer_state;
uint32_t pline_offset, pstart_addr, pline_compare;
d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
SurfaceBinding *surface =
pgraph_vk_surface_get_within(d, d->pcrtc.start + pline_offset);
if (surface == NULL || !surface->color) {
return;
}
unsigned int width = surface->width, height = surface->height;
pgraph_apply_scaling_factor(pg, &width, &height);
PGRAPHVkDisplayState *disp = &r->display;
if (!disp->image || disp->width != width || disp->height != height) {
create_display_image_from_surface(pg, surface);
}
render_display(pg, surface);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,380 @@
/*
* Geforce NV2A PGRAPH Vulkan Renderer
*
* Copyright (c) 2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "renderer.h"
#include <assert.h>
#include <glslang/Include/glslang_c_interface.h>
#include <stdio.h>
static const glslang_resource_t
resource_limits = { .max_lights = 32,
.max_clip_planes = 6,
.max_texture_units = 32,
.max_texture_coords = 32,
.max_vertex_attribs = 64,
.max_vertex_uniform_components = 4096,
.max_varying_floats = 64,
.max_vertex_texture_image_units = 32,
.max_combined_texture_image_units = 80,
.max_texture_image_units = 32,
.max_fragment_uniform_components = 4096,
.max_draw_buffers = 32,
.max_vertex_uniform_vectors = 128,
.max_varying_vectors = 8,
.max_fragment_uniform_vectors = 16,
.max_vertex_output_vectors = 16,
.max_fragment_input_vectors = 15,
.min_program_texel_offset = -8,
.max_program_texel_offset = 7,
.max_clip_distances = 8,
.max_compute_work_group_count_x = 65535,
.max_compute_work_group_count_y = 65535,
.max_compute_work_group_count_z = 65535,
.max_compute_work_group_size_x = 1024,
.max_compute_work_group_size_y = 1024,
.max_compute_work_group_size_z = 64,
.max_compute_uniform_components = 1024,
.max_compute_texture_image_units = 16,
.max_compute_image_uniforms = 8,
.max_compute_atomic_counters = 8,
.max_compute_atomic_counter_buffers = 1,
.max_varying_components = 60,
.max_vertex_output_components = 64,
.max_geometry_input_components = 64,
.max_geometry_output_components = 128,
.max_fragment_input_components = 128,
.max_image_units = 8,
.max_combined_image_units_and_fragment_outputs = 8,
.max_combined_shader_output_resources = 8,
.max_image_samples = 0,
.max_vertex_image_uniforms = 0,
.max_tess_control_image_uniforms = 0,
.max_tess_evaluation_image_uniforms = 0,
.max_geometry_image_uniforms = 0,
.max_fragment_image_uniforms = 8,
.max_combined_image_uniforms = 8,
.max_geometry_texture_image_units = 16,
.max_geometry_output_vertices = 256,
.max_geometry_total_output_components = 1024,
.max_geometry_uniform_components = 1024,
.max_geometry_varying_components = 64,
.max_tess_control_input_components = 128,
.max_tess_control_output_components = 128,
.max_tess_control_texture_image_units = 16,
.max_tess_control_uniform_components = 1024,
.max_tess_control_total_output_components = 4096,
.max_tess_evaluation_input_components = 128,
.max_tess_evaluation_output_components = 128,
.max_tess_evaluation_texture_image_units = 16,
.max_tess_evaluation_uniform_components = 1024,
.max_tess_patch_components = 120,
.max_patch_vertices = 32,
.max_tess_gen_level = 64,
.max_viewports = 16,
.max_vertex_atomic_counters = 0,
.max_tess_control_atomic_counters = 0,
.max_tess_evaluation_atomic_counters = 0,
.max_geometry_atomic_counters = 0,
.max_fragment_atomic_counters = 8,
.max_combined_atomic_counters = 8,
.max_atomic_counter_bindings = 1,
.max_vertex_atomic_counter_buffers = 0,
.max_tess_control_atomic_counter_buffers = 0,
.max_tess_evaluation_atomic_counter_buffers = 0,
.max_geometry_atomic_counter_buffers = 0,
.max_fragment_atomic_counter_buffers = 1,
.max_combined_atomic_counter_buffers = 1,
.max_atomic_counter_buffer_size = 16384,
.max_transform_feedback_buffers = 4,
.max_transform_feedback_interleaved_components = 64,
.max_cull_distances = 8,
.max_combined_clip_and_cull_distances = 8,
.max_samples = 4,
.max_mesh_output_vertices_nv = 256,
.max_mesh_output_primitives_nv = 512,
.max_mesh_work_group_size_x_nv = 32,
.max_mesh_work_group_size_y_nv = 1,
.max_mesh_work_group_size_z_nv = 1,
.max_task_work_group_size_x_nv = 32,
.max_task_work_group_size_y_nv = 1,
.max_task_work_group_size_z_nv = 1,
.max_mesh_view_count_nv = 4,
.maxDualSourceDrawBuffersEXT = 1,
.limits = {
.non_inductive_for_loops = 1,
.while_loops = 1,
.do_while_loops = 1,
.general_uniform_indexing = 1,
.general_attribute_matrix_vector_indexing = 1,
.general_varying_indexing = 1,
.general_sampler_indexing = 1,
.general_variable_indexing = 1,
.general_constant_matrix_vector_indexing = 1,
} };
void pgraph_vk_init_glsl_compiler(void)
{
glslang_initialize_process();
}
void pgraph_vk_finalize_glsl_compiler(void)
{
glslang_finalize_process();
}
GByteArray *pgraph_vk_compile_glsl_to_spv(glslang_stage_t stage,
const char *glsl_source)
{
const glslang_input_t input = {
.language = GLSLANG_SOURCE_GLSL,
.stage = stage,
.client = GLSLANG_CLIENT_VULKAN,
.client_version = GLSLANG_TARGET_VULKAN_1_3,
.target_language = GLSLANG_TARGET_SPV,
.target_language_version = GLSLANG_TARGET_SPV_1_5,
.code = glsl_source,
.default_version = 460,
.default_profile = GLSLANG_NO_PROFILE,
.force_default_version_and_profile = false,
.forward_compatible = false,
.messages = GLSLANG_MSG_DEFAULT_BIT,
.resource = &resource_limits,
};
glslang_shader_t *shader = glslang_shader_create(&input);
if (!glslang_shader_preprocess(shader, &input)) {
fprintf(stderr,
"GLSL preprocessing failed\n"
"[INFO]: %s\n"
"[DEBUG]: %s\n"
"%s\n",
glslang_shader_get_info_log(shader),
glslang_shader_get_info_debug_log(shader), input.code);
assert(!"glslang preprocess failed");
glslang_shader_delete(shader);
return NULL;
}
if (!glslang_shader_parse(shader, &input)) {
fprintf(stderr,
"GLSL parsing failed\n"
"[INFO]: %s\n"
"[DEBUG]: %s\n"
"%s\n",
glslang_shader_get_info_log(shader),
glslang_shader_get_info_debug_log(shader),
glslang_shader_get_preprocessed_code(shader));
assert(!"glslang parse failed");
glslang_shader_delete(shader);
return NULL;
}
glslang_program_t *program = glslang_program_create();
glslang_program_add_shader(program, shader);
if (!glslang_program_link(program, GLSLANG_MSG_SPV_RULES_BIT |
GLSLANG_MSG_VULKAN_RULES_BIT)) {
fprintf(stderr,
"GLSL linking failed\n"
"[INFO]: %s\n"
"[DEBUG]: %s\n",
glslang_program_get_info_log(program),
glslang_program_get_info_debug_log(program));
assert(!"glslang link failed");
glslang_program_delete(program);
glslang_shader_delete(shader);
return NULL;
}
glslang_spv_options_t spv_options = {
.validate = true,
#if defined(CONFIG_RENDERDOC)
.disable_optimizer = true,
.generate_debug_info = true,
.emit_nonsemantic_shader_debug_info = true,
.emit_nonsemantic_shader_debug_source = true,
#endif
};
glslang_program_SPIRV_generate_with_options(program, stage, &spv_options);
const char *spirv_messages = glslang_program_SPIRV_get_messages(program);
if (spirv_messages) {
printf("%s\b", spirv_messages);
}
size_t num_program_bytes =
glslang_program_SPIRV_get_size(program) * sizeof(uint32_t);
guint8 *data = g_malloc(num_program_bytes);
glslang_program_SPIRV_get(program, (unsigned int *)data);
glslang_program_delete(program);
glslang_shader_delete(shader);
return g_byte_array_new_take(data, num_program_bytes);
}
VkShaderModule pgraph_vk_create_shader_module_from_spv(PGRAPHVkState *r, GByteArray *spv)
{
VkShaderModuleCreateInfo create_info = {
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
.codeSize = spv->len,
.pCode = (uint32_t *)spv->data,
};
VkShaderModule module;
VK_CHECK(
vkCreateShaderModule(r->device, &create_info, NULL, &module));
return module;
}
static void block_to_uniforms(const SpvReflectBlockVariable *block, ShaderUniformLayout *layout)
{
assert(!layout->uniforms);
layout->num_uniforms = block->member_count;
layout->uniforms = g_malloc0_n(block->member_count, sizeof(ShaderUniform));
layout->total_size = block->size;
layout->allocation = g_malloc0(block->size);
for (uint32_t k = 0; k < block->member_count; ++k) {
const SpvReflectBlockVariable *member = &block->members[k];
assert(member->array.dims_count < 2);
layout->uniforms[k] = (ShaderUniform){
.name = strdup(member->name),
.offset = member->offset,
.dim_v = MAX(1, member->numeric.vector.component_count),
.dim_a = MAX(member->array.dims_count ? member->array.dims[0] : 1, member->numeric.matrix.column_count),
.stride = MAX(member->array.stride, member->numeric.matrix.stride),
};
// fprintf(stderr, "<%s offset=%zd dim_v=%zd dim_a=%zd stride=%zd>\n",
// layout->uniforms[k].name,
// layout->uniforms[k].offset,
// layout->uniforms[k].dim_v,
// layout->uniforms[k].dim_a,
// layout->uniforms[k].stride
// );
}
// fprintf(stderr, "--\n");
}
static void init_layout_from_spv(ShaderModuleInfo *info)
{
SpvReflectResult result = spvReflectCreateShaderModule(
info->spirv->len, info->spirv->data, &info->reflect_module);
assert(result == SPV_REFLECT_RESULT_SUCCESS &&
"Failed to create SPIR-V shader module");
uint32_t descriptor_set_count = 0;
result = spvReflectEnumerateDescriptorSets(&info->reflect_module,
&descriptor_set_count, NULL);
assert(result == SPV_REFLECT_RESULT_SUCCESS &&
"Failed to enumerate descriptor sets");
info->descriptor_sets =
g_malloc_n(descriptor_set_count, sizeof(SpvReflectDescriptorSet *));
result = spvReflectEnumerateDescriptorSets(
&info->reflect_module, &descriptor_set_count, info->descriptor_sets);
assert(result == SPV_REFLECT_RESULT_SUCCESS &&
"Failed to enumerate descriptor sets");
info->uniforms.num_uniforms = 0;
info->uniforms.uniforms = NULL;
for (uint32_t i = 0; i < descriptor_set_count; ++i) {
const SpvReflectDescriptorSet *descriptor_set =
info->descriptor_sets[i];
for (uint32_t j = 0; j < descriptor_set->binding_count; ++j) {
const SpvReflectDescriptorBinding *binding =
descriptor_set->bindings[j];
if (binding->descriptor_type !=
SPV_REFLECT_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
continue;
}
const SpvReflectBlockVariable *block = &binding->block;
block_to_uniforms(block, &info->uniforms);
}
}
info->push_constants.num_uniforms = 0;
info->push_constants.uniforms = NULL;
assert(info->reflect_module.push_constant_block_count < 2);
if (info->reflect_module.push_constant_block_count) {
block_to_uniforms(&info->reflect_module.push_constant_blocks[0],
&info->push_constants);
}
}
static glslang_stage_t vk_shader_stage_to_glslang_stage(VkShaderStageFlagBits stage)
{
switch (stage) {
case VK_SHADER_STAGE_GEOMETRY_BIT:
return GLSLANG_STAGE_GEOMETRY;
case VK_SHADER_STAGE_VERTEX_BIT:
return GLSLANG_STAGE_VERTEX;
case VK_SHADER_STAGE_FRAGMENT_BIT:
return GLSLANG_STAGE_FRAGMENT;
case VK_SHADER_STAGE_COMPUTE_BIT:
return GLSLANG_STAGE_COMPUTE;
default:
assert(0);
}
}
ShaderModuleInfo *pgraph_vk_create_shader_module_from_glsl(
PGRAPHVkState *r, VkShaderStageFlagBits stage, const char *glsl)
{
ShaderModuleInfo *info = g_malloc0(sizeof(*info));
info->glsl = strdup(glsl);
info->spirv = pgraph_vk_compile_glsl_to_spv(
vk_shader_stage_to_glslang_stage(stage), glsl);
info->module = pgraph_vk_create_shader_module_from_spv(r, info->spirv);
init_layout_from_spv(info);
return info;
}
static void finalize_uniform_layout(ShaderUniformLayout *layout)
{
for (int i = 0; i < layout->num_uniforms; i++) {
free((void*)layout->uniforms[i].name);
}
if (layout->uniforms) {
g_free(layout->uniforms);
}
}
void pgraph_vk_destroy_shader_module(PGRAPHVkState *r, ShaderModuleInfo *info)
{
if (info->glsl) {
free(info->glsl);
}
finalize_uniform_layout(&info->uniforms);
finalize_uniform_layout(&info->push_constants);
free(info->descriptor_sets);
spvReflectDestroyShaderModule(&info->reflect_module);
vkDestroyShaderModule(r->device, info->module, NULL);
g_byte_array_unref(info->spirv);
g_free(info);
}

View File

@ -0,0 +1,205 @@
/*
* Geforce NV2A PGRAPH Vulkan Renderer
*
* Copyright (c) 2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_XBOX_NV2A_PGRAPH_VK_GLSL_H
#define HW_XBOX_NV2A_PGRAPH_VK_GLSL_H
#include "qemu/osdep.h"
#include <stdint.h>
#include <assert.h>
#include <string.h>
typedef struct ShaderUniform {
const char *name;
size_t dim_v;
size_t dim_a;
size_t align;
size_t stride;
size_t offset;
} ShaderUniform;
typedef struct ShaderUniformLayout {
ShaderUniform *uniforms;
size_t num_uniforms;
size_t total_size;
void *allocation;
} ShaderUniformLayout;
static inline void uniform_std140(ShaderUniformLayout *layout)
{
size_t offset = 0;
for (int i = 0; i < layout->num_uniforms; i++) {
ShaderUniform *u = &layout->uniforms[i];
size_t size = sizeof(float); // float or int
size_t align = size;
size_t stride = 0;
size *= u->dim_v;
align *= u->dim_v == 3 ? 4 : u->dim_v;
// If an array, each element is padded to vec4.
if (u->dim_a > 1) {
align = 4 * sizeof(float);
stride = align;
size = u->dim_a * align;
} else {
align = size;
stride = 0;
}
offset = ROUND_UP(offset, align);
u->align = align;
u->offset = offset;
u->stride = stride;
offset += size;
}
layout->total_size = offset;
assert(layout->total_size);
}
static inline void uniform_std430(ShaderUniformLayout *layout)
{
size_t offset = 0;
for (int i = 0; i < layout->num_uniforms; i++) {
ShaderUniform *u = &layout->uniforms[i];
size_t size = sizeof(float); // float or int
size *= u->dim_v;
size_t align = size;
size *= u->dim_a;
offset = ROUND_UP(offset, align);
u->align = align;
u->offset = offset;
u->stride = u->dim_a > 1 ? (size * u->dim_v) : 0;
offset += size;
}
layout->total_size = offset;
assert(layout->total_size);
}
static inline int uniform_index(ShaderUniformLayout *layout, const char *name)
{
for (int i = 0; i < layout->num_uniforms; i++) {
if (!strcmp(layout->uniforms[i].name, name)) {
return i + 1;
}
}
return -1;
}
static inline
void *uniform_ptr(ShaderUniformLayout *layout, int idx)
{
assert(idx > 0 && "invalid uniform index");
return (char *)layout->allocation + layout->uniforms[idx - 1].offset;
}
static inline
void uniform_copy(ShaderUniformLayout *layout, int idx, void *values, size_t value_size, size_t count)
{
assert(idx > 0 && "invalid uniform index");
ShaderUniform *u = &layout->uniforms[idx - 1];
const size_t element_size = value_size * u->dim_v;
size_t bytes_remaining = value_size * count;
char *p_out = uniform_ptr(layout, idx);
char *p_max = p_out + layout->total_size;
char *p_in = (char *)values;
int index = 0;
while (bytes_remaining) {
assert(p_out < p_max);
assert(index < u->dim_a);
memcpy(p_out, p_in, element_size);
bytes_remaining -= element_size;
p_out += u->stride;
p_in += element_size;
index += 1;
}
}
static inline
void uniform1fv(ShaderUniformLayout *layout, int idx, size_t count, float *values)
{
uniform_copy(layout, idx, values, sizeof(float), count);
}
static inline
void uniform1f(ShaderUniformLayout *layout, int idx, float value)
{
uniform1fv(layout, idx, 1, &value);
}
static inline
void uniform2f(ShaderUniformLayout *layout, int idx, float v0, float v1)
{
float values[] = { v0, v1 };
uniform1fv(layout, idx, 2, values);
}
static inline
void uniform4f(ShaderUniformLayout *layout, int idx, float v0, float v1, float v2, float v3)
{
float values[] = { v0, v1, v2, v3 };
uniform1fv(layout, idx, 4, values);
}
static inline
void uniformMatrix2fv(ShaderUniformLayout *layout, int idx, float *values)
{
uniform1fv(layout, idx, 4, values);
}
static inline
void uniformMatrix4fv(ShaderUniformLayout *layout, int idx, float *values)
{
uniform1fv(layout, idx, 4 * 4, values);
}
static inline
void uniform1iv(ShaderUniformLayout *layout, int idx, size_t count, int32_t *values)
{
uniform_copy(layout, idx, values, sizeof(int32_t), count);
}
static inline
void uniform1i(ShaderUniformLayout *layout, int idx, int32_t value)
{
uniform1iv(layout, idx, 1, &value);
}
static inline
void uniform4i(ShaderUniformLayout *layout, int idx, int v0, int v1, int v2, int v3)
{
int values[] = { v0, v1, v2, v3 };
uniform1iv(layout, idx, 4, values);
}
#endif

View File

@ -0,0 +1,209 @@
/*
* Geforce NV2A PGRAPH Vulkan Renderer
*
* Copyright (c) 2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "renderer.h"
static bool check_format_has_depth_component(VkFormat format)
{
return format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
format == VK_FORMAT_D24_UNORM_S8_UINT ||
format == VK_FORMAT_D16_UNORM;
}
static bool check_format_has_stencil_component(VkFormat format)
{
return format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
format == VK_FORMAT_D24_UNORM_S8_UINT;
}
void pgraph_vk_transition_image_layout(PGRAPHState *pg, VkCommandBuffer cmd,
VkImage image, VkFormat format,
VkImageLayout oldLayout,
VkImageLayout newLayout)
{
VkImageMemoryBarrier barrier = {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.oldLayout = oldLayout,
.newLayout = newLayout,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange.baseMipLevel = 0,
.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS,
.subresourceRange.baseArrayLayer = 0,
.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS,
};
if (check_format_has_depth_component(format)) {
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
if (check_format_has_stencil_component(format)) {
barrier.subresourceRange.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
}
} else {
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
}
VkPipelineStageFlags sourceStage;
VkPipelineStageFlags destinationStage;
// Undefined -> Dst
if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED &&
newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
barrier.srcAccessMask = 0;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
// Undefined -> Color
} else if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED &&
newLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
barrier.srcAccessMask = 0;
barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
destinationStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
// Undefined -> Depth
} else if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED &&
newLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) {
barrier.srcAccessMask = 0;
barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
destinationStage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
// Dst -> Shader Read
} else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
newLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
destinationStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
// Dst -> Color
} else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
newLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
destinationStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
// Dst -> Depth
} else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
newLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) {
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
destinationStage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
// Dst -> Src
} else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
newLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) {
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
// Shader Read -> Dst
} else if (oldLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL &&
newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
sourceStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
// Shader Read -> Color
} else if (oldLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL &&
newLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
sourceStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
destinationStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
// Color -> Src
} else if (oldLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
newLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) {
barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
sourceStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
// Color -> Dst
} else if (oldLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
sourceStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
// Color -> Shader Read
} else if (oldLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
newLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
sourceStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
destinationStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
// Depth -> Src
} else if (oldLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL &&
newLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) {
barrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
sourceStage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
// Depth -> Dst
} else if (oldLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL &&
newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
sourceStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
// Src -> Color
} else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL &&
newLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
barrier.srcAccessMask = 0;
barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
destinationStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
// Src -> Depth
} else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL &&
newLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) {
barrier.srcAccessMask = 0;
barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
destinationStage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
// Src -> Dst
} else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL &&
newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
} else {
assert(!"unsupported layout transition!");
}
vkCmdPipelineBarrier(cmd, sourceStage, destinationStage, 0, 0,
NULL, 0, NULL, 1, &barrier);
}

View File

@ -0,0 +1,662 @@
/*
* Geforce NV2A PGRAPH Vulkan Renderer
*
* Copyright (c) 2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "ui/xemu-settings.h"
#include "renderer.h"
#include "xemu-version.h"
#include <SDL.h>
#include <SDL_syswm.h>
#include <SDL_vulkan.h>
#include <volk.h>
typedef GArray VkExtensionPropertiesArray;
typedef GArray StringArray;
static bool enable_validation = false;
static char const *const validation_layers[] = {
"VK_LAYER_KHRONOS_validation",
};
static char const *const required_instance_extensions[] = {
VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME,
VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME,
};
static char const *const required_device_extensions[] = {
VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME,
VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME,
#ifdef WIN32
VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME,
#else
VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME,
#endif
};
static VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback(
VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,
VkDebugUtilsMessageTypeFlagsEXT messageType,
const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData, void *pUserData)
{
NV2A_VK_DPRINTF("[vk] %s", pCallbackData->pMessage);
fprintf(stderr, "[vk] %s\n", pCallbackData->pMessage);
if ((messageType & VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT) &&
(messageSeverity & (VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT))) {
exit(1);
}
return VK_FALSE;
}
static bool check_validation_layer_support(void)
{
uint32_t num_available_layers;
vkEnumerateInstanceLayerProperties(&num_available_layers, NULL);
g_autofree VkLayerProperties *available_layers =
g_malloc_n(num_available_layers, sizeof(VkLayerProperties));
vkEnumerateInstanceLayerProperties(&num_available_layers, available_layers);
for (int i = 0; i < ARRAY_SIZE(validation_layers); i++) {
bool found = false;
for (int j = 0; j < num_available_layers; j++) {
if (!strcmp(validation_layers[i], available_layers[j].layerName)) {
found = true;
break;
}
}
if (!found) {
fprintf(stderr, "desired validation layer not found: %s\n",
validation_layers[i]);
return false;
}
}
return true;
}
static SDL_Window *create_window(void)
{
SDL_Window *window = SDL_CreateWindow(
"SDL Offscreen Window", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED,
640, 480, SDL_WINDOW_VULKAN | SDL_WINDOW_HIDDEN);
if (window == NULL) {
fprintf(stderr, "%s: Failed to create window\n", __func__);
SDL_Quit();
exit(1);
}
return window;
}
static VkExtensionPropertiesArray *
get_available_instance_extensions(PGRAPHState *pg)
{
uint32_t num_extensions = 0;
VK_CHECK(
vkEnumerateInstanceExtensionProperties(NULL, &num_extensions, NULL));
VkExtensionPropertiesArray *extensions = g_array_sized_new(
FALSE, FALSE, sizeof(VkExtensionProperties), num_extensions);
g_array_set_size(extensions, num_extensions);
VK_CHECK(vkEnumerateInstanceExtensionProperties(
NULL, &num_extensions, (VkExtensionProperties *)extensions->data));
return extensions;
}
static bool
is_extension_available(VkExtensionPropertiesArray *available_extensions,
const char *extension_name)
{
for (int i = 0; i < available_extensions->len; i++) {
VkExtensionProperties *e =
&g_array_index(available_extensions, VkExtensionProperties, i);
if (!strcmp(e->extensionName, extension_name)) {
return true;
}
}
return false;
}
static StringArray *get_required_instance_extension_names(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
// Add instance extensions SDL lists as required
unsigned int sdl_count = 0;
SDL_Vulkan_GetInstanceExtensions((SDL_Window *)r->window, &sdl_count, NULL);
StringArray *extensions =
g_array_sized_new(FALSE, FALSE, sizeof(char *),
sdl_count + ARRAY_SIZE(required_instance_extensions));
if (sdl_count) {
g_array_set_size(extensions, sdl_count);
SDL_Vulkan_GetInstanceExtensions((SDL_Window *)r->window, &sdl_count,
(const char **)extensions->data);
}
// Add additional required extensions
g_array_append_vals(extensions, required_instance_extensions,
ARRAY_SIZE(required_instance_extensions));
return extensions;
}
static bool
add_extension_if_available(VkExtensionPropertiesArray *available_extensions,
StringArray *enabled_extension_names,
const char *desired_extension_name)
{
if (is_extension_available(available_extensions, desired_extension_name)) {
g_array_append_val(enabled_extension_names, desired_extension_name);
return true;
}
fprintf(stderr, "Warning: extension not available: %s\n",
desired_extension_name);
return false;
}
static void
add_optional_instance_extension_names(PGRAPHState *pg,
VkExtensionPropertiesArray *available_extensions,
StringArray *enabled_extension_names)
{
PGRAPHVkState *r = pg->vk_renderer_state;
r->debug_utils_extension_enabled =
g_config.display.vulkan.validation_layers &&
add_extension_if_available(available_extensions, enabled_extension_names,
VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
}
static void create_instance(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
r->window = create_window();
VK_CHECK(volkInitialize());
VkApplicationInfo app_info = {
.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
.pApplicationName = "xemu",
.applicationVersion = VK_MAKE_VERSION(
xemu_version_major, xemu_version_minor, xemu_version_patch),
.pEngineName = "No Engine",
.engineVersion = VK_MAKE_VERSION(1, 0, 0),
.apiVersion = VK_API_VERSION_1_3,
};
g_autofree VkExtensionPropertiesArray *available_extensions =
get_available_instance_extensions(pg);
g_autofree StringArray *enabled_extension_names =
get_required_instance_extension_names(pg);
bool all_required_extensions_available = true;
for (int i = 0; i < enabled_extension_names->len; i++) {
const char *required_extension =
g_array_index(enabled_extension_names, const char *, i);
if (!is_extension_available(available_extensions, required_extension)) {
fprintf(stderr,
"Error: Required instance extension not available: %s\n",
required_extension);
all_required_extensions_available = false;
}
}
assert(all_required_extensions_available);
add_optional_instance_extension_names(pg, available_extensions,
enabled_extension_names);
fprintf(stderr, "Enabled instance extensions:\n");
for (int i = 0; i < enabled_extension_names->len; i++) {
fprintf(stderr, "- %s\n", g_array_index(enabled_extension_names, char *, i));
}
VkInstanceCreateInfo create_info = {
.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
.pApplicationInfo = &app_info,
.enabledExtensionCount = enabled_extension_names->len,
.ppEnabledExtensionNames =
&g_array_index(enabled_extension_names, const char *, 0),
};
VkDebugUtilsMessengerCreateInfoEXT dbg_create_info;
if (r->debug_utils_extension_enabled) {
dbg_create_info = (VkDebugUtilsMessengerCreateInfoEXT){
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
.pfnUserCallback = debugCallback,
};
}
enable_validation = g_config.display.vulkan.validation_layers;
if (enable_validation) {
if (check_validation_layer_support()) {
fprintf(stderr, "Warning: Validation layers enabled. Expect performance impact.\n");
create_info.enabledLayerCount = ARRAY_SIZE(validation_layers);
create_info.ppEnabledLayerNames = validation_layers;
if (r->debug_utils_extension_enabled) {
create_info.pNext =
(VkDebugUtilsMessengerCreateInfoEXT *)&dbg_create_info;
}
} else {
fprintf(stderr, "Warning: validation layers not available\n");
enable_validation = false;
}
}
VK_CHECK(vkCreateInstance(&create_info, NULL, &r->instance));
volkLoadInstance(r->instance);
}
static bool is_queue_family_indicies_complete(QueueFamilyIndices indices)
{
return indices.queue_family >= 0;
}
QueueFamilyIndices pgraph_vk_find_queue_families(VkPhysicalDevice device)
{
QueueFamilyIndices indices = {
.queue_family = -1,
};
uint32_t num_queue_families = 0;
vkGetPhysicalDeviceQueueFamilyProperties(device, &num_queue_families, NULL);
g_autofree VkQueueFamilyProperties *queue_families =
g_malloc_n(num_queue_families, sizeof(VkQueueFamilyProperties));
vkGetPhysicalDeviceQueueFamilyProperties(device, &num_queue_families,
queue_families);
for (int i = 0; i < num_queue_families; i++) {
VkQueueFamilyProperties queueFamily = queue_families[i];
// FIXME: Support independent graphics, compute queues
int required_flags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT;
if ((queueFamily.queueFlags & required_flags) == required_flags) {
indices.queue_family = i;
}
if (is_queue_family_indicies_complete(indices)) {
break;
}
}
return indices;
}
static VkExtensionPropertiesArray *
get_available_device_extensions(VkPhysicalDevice device)
{
uint32_t num_extensions = 0;
VK_CHECK(vkEnumerateDeviceExtensionProperties(device, NULL, &num_extensions,
NULL));
VkExtensionPropertiesArray *extensions = g_array_sized_new(
FALSE, FALSE, sizeof(VkExtensionProperties), num_extensions);
g_array_set_size(extensions, num_extensions);
VK_CHECK(vkEnumerateDeviceExtensionProperties(
device, NULL, &num_extensions,
(VkExtensionProperties *)extensions->data));
return extensions;
}
static StringArray *get_required_device_extension_names(void)
{
StringArray *extensions =
g_array_sized_new(FALSE, FALSE, sizeof(char *),
ARRAY_SIZE(required_device_extensions));
g_array_append_vals(extensions, required_device_extensions,
ARRAY_SIZE(required_device_extensions));
return extensions;
}
static void add_optional_device_extension_names(
PGRAPHState *pg, VkExtensionPropertiesArray *available_extensions,
StringArray *enabled_extension_names)
{
PGRAPHVkState *r = pg->vk_renderer_state;
r->custom_border_color_extension_enabled =
add_extension_if_available(available_extensions, enabled_extension_names,
VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
r->provoking_vertex_extension_enabled =
add_extension_if_available(available_extensions, enabled_extension_names,
VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME);
r->memory_budget_extension_enabled = add_extension_if_available(
available_extensions, enabled_extension_names,
VK_EXT_MEMORY_BUDGET_EXTENSION_NAME);
}
static bool check_device_support_required_extensions(VkPhysicalDevice device)
{
g_autofree VkExtensionPropertiesArray *available_extensions =
get_available_device_extensions(device);
for (int i = 0; i < ARRAY_SIZE(required_device_extensions); i++) {
if (!is_extension_available(available_extensions,
required_device_extensions[i])) {
fprintf(stderr, "required device extension not found: %s\n",
required_device_extensions[i]);
return false;
}
}
return true;
}
static bool is_device_compatible(VkPhysicalDevice device)
{
QueueFamilyIndices indices = pgraph_vk_find_queue_families(device);
return is_queue_family_indicies_complete(indices) &&
check_device_support_required_extensions(device);
// FIXME: Check formats
// FIXME: Check vram
}
static void select_physical_device(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
uint32_t num_physical_devices = 0;
vkEnumeratePhysicalDevices(r->instance, &num_physical_devices, NULL);
if (num_physical_devices == 0) {
assert(!"failed to find GPUs with Vulkan support");
}
g_autofree VkPhysicalDevice *devices =
g_malloc_n(num_physical_devices, sizeof(VkPhysicalDevice));
vkEnumeratePhysicalDevices(r->instance, &num_physical_devices, devices);
fprintf(stderr, "Available physical devices:\n");
for (int i = 0; i < num_physical_devices; i++) {
vkGetPhysicalDeviceProperties(devices[i], &r->device_props);
fprintf(stderr, "- %s\n", r->device_props.deviceName);
}
// FIXME: Store preferred device
r->physical_device = VK_NULL_HANDLE;
for (int i = 0; i < num_physical_devices; i++) {
if (is_device_compatible(devices[i])) {
r->physical_device = devices[i];
break;
}
}
if (r->physical_device == VK_NULL_HANDLE) {
assert(!"failed to find a suitable GPU");
}
vkGetPhysicalDeviceProperties(r->physical_device, &r->device_props);
fprintf(stderr,
"Selected physical device: %s\n"
"- Vendor: %x, Device: %x\n"
"- Driver Version: %d.%d.%d\n",
r->device_props.deviceName,
r->device_props.vendorID,
r->device_props.deviceID,
VK_VERSION_MAJOR(r->device_props.driverVersion),
VK_VERSION_MINOR(r->device_props.driverVersion),
VK_VERSION_PATCH(r->device_props.driverVersion));
size_t vsh_attr_values_size =
NV2A_VERTEXSHADER_ATTRIBUTES * 4 * sizeof(float);
assert(r->device_props.limits.maxPushConstantsSize >= vsh_attr_values_size);
}
static void create_logical_device(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
QueueFamilyIndices indices =
pgraph_vk_find_queue_families(r->physical_device);
g_autofree VkExtensionPropertiesArray *available_extensions =
get_available_device_extensions(r->physical_device);
g_autofree StringArray *enabled_extension_names =
get_required_device_extension_names();
add_optional_device_extension_names(pg, available_extensions,
enabled_extension_names);
fprintf(stderr, "Enabled device extensions:\n");
for (int i = 0; i < enabled_extension_names->len; i++) {
fprintf(stderr, "- %s\n", g_array_index(enabled_extension_names, char *, i));
}
float queuePriority = 1.0f;
VkDeviceQueueCreateInfo queue_create_info = {
.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
.queueFamilyIndex = indices.queue_family,
.queueCount = 1,
.pQueuePriorities = &queuePriority,
};
// Ensure device supports required features
VkPhysicalDeviceFeatures available_features, enabled_features;
vkGetPhysicalDeviceFeatures(r->physical_device, &available_features);
memset(&enabled_features, 0, sizeof(enabled_features));
struct {
const char *name;
VkBool32 available, *enabled;
} required_features[] = {
#define F(n) { #n, available_features.n, &enabled_features.n }
F(shaderClipDistance),
F(geometryShader),
F(shaderTessellationAndGeometryPointSize),
F(depthClamp),
F(occlusionQueryPrecise),
#undef F
};
bool all_features_available = true;
for (int i = 0; i < ARRAY_SIZE(required_features); i++) {
if (required_features[i].available != VK_TRUE) {
fprintf(stderr, "Error: Device does not support required feature %s\n", required_features[i].name);
all_features_available = false;
}
*required_features[i].enabled = VK_TRUE;
}
assert(all_features_available);
void *next_struct = NULL;
VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex_features;
if (r->provoking_vertex_extension_enabled) {
provoking_vertex_features = (VkPhysicalDeviceProvokingVertexFeaturesEXT){
.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT,
.provokingVertexLast = VK_TRUE,
.pNext = next_struct,
};
next_struct = &provoking_vertex_features;
}
VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border_features;
if (r->custom_border_color_extension_enabled) {
custom_border_features = (VkPhysicalDeviceCustomBorderColorFeaturesEXT){
.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT,
.customBorderColors = VK_TRUE,
.pNext = next_struct,
};
next_struct = &custom_border_features;
}
VkDeviceCreateInfo device_create_info = {
.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
.queueCreateInfoCount = 1,
.pQueueCreateInfos = &queue_create_info,
.pEnabledFeatures = &enabled_features,
.enabledExtensionCount = enabled_extension_names->len,
.ppEnabledExtensionNames =
&g_array_index(enabled_extension_names, const char *, 0),
.pNext = next_struct,
};
if (enable_validation) {
device_create_info.enabledLayerCount = ARRAY_SIZE(validation_layers);
device_create_info.ppEnabledLayerNames = validation_layers;
}
VK_CHECK(vkCreateDevice(r->physical_device, &device_create_info, NULL,
&r->device));
vkGetDeviceQueue(r->device, indices.queue_family, 0, &r->queue);
}
uint32_t pgraph_vk_get_memory_type(PGRAPHState *pg, uint32_t type_bits,
VkMemoryPropertyFlags properties)
{
PGRAPHVkState *r = pg->vk_renderer_state;
VkPhysicalDeviceMemoryProperties prop;
vkGetPhysicalDeviceMemoryProperties(r->physical_device, &prop);
for (uint32_t i = 0; i < prop.memoryTypeCount; i++) {
if ((prop.memoryTypes[i].propertyFlags & properties) == properties &&
type_bits & (1 << i)) {
return i;
}
}
return 0xFFFFFFFF; // Unable to find memoryType
}
static void init_allocator(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
VmaVulkanFunctions vulkanFunctions = {
/// Required when using VMA_DYNAMIC_VULKAN_FUNCTIONS.
.vkGetInstanceProcAddr = vkGetInstanceProcAddr,
/// Required when using VMA_DYNAMIC_VULKAN_FUNCTIONS.
.vkGetDeviceProcAddr = vkGetDeviceProcAddr,
.vkGetPhysicalDeviceProperties = vkGetPhysicalDeviceProperties,
.vkGetPhysicalDeviceMemoryProperties = vkGetPhysicalDeviceMemoryProperties,
.vkAllocateMemory = vkAllocateMemory,
.vkFreeMemory = vkFreeMemory,
.vkMapMemory = vkMapMemory,
.vkUnmapMemory = vkUnmapMemory,
.vkFlushMappedMemoryRanges = vkFlushMappedMemoryRanges,
.vkInvalidateMappedMemoryRanges = vkInvalidateMappedMemoryRanges,
.vkBindBufferMemory = vkBindBufferMemory,
.vkBindImageMemory = vkBindImageMemory,
.vkGetBufferMemoryRequirements = vkGetBufferMemoryRequirements,
.vkGetImageMemoryRequirements = vkGetImageMemoryRequirements,
.vkCreateBuffer = vkCreateBuffer,
.vkDestroyBuffer = vkDestroyBuffer,
.vkCreateImage = vkCreateImage,
.vkDestroyImage = vkDestroyImage,
.vkCmdCopyBuffer = vkCmdCopyBuffer,
#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000
/// Fetch "vkGetBufferMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetBufferMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension.
.vkGetBufferMemoryRequirements2KHR = vkGetBufferMemoryRequirements2,
/// Fetch "vkGetImageMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetImageMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension.
.vkGetImageMemoryRequirements2KHR = vkGetImageMemoryRequirements2,
#endif
#if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000
/// Fetch "vkBindBufferMemory2" on Vulkan >= 1.1, fetch "vkBindBufferMemory2KHR" when using VK_KHR_bind_memory2 extension.
.vkBindBufferMemory2KHR = vkBindBufferMemory2,
/// Fetch "vkBindImageMemory2" on Vulkan >= 1.1, fetch "vkBindImageMemory2KHR" when using VK_KHR_bind_memory2 extension.
.vkBindImageMemory2KHR = vkBindImageMemory2,
#endif
#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000
/// Fetch from "vkGetPhysicalDeviceMemoryProperties2" on Vulkan >= 1.1, but you can also fetch it from "vkGetPhysicalDeviceMemoryProperties2KHR" if you enabled extension VK_KHR_get_physical_device_properties2.
.vkGetPhysicalDeviceMemoryProperties2KHR = vkGetPhysicalDeviceMemoryProperties2KHR,
#endif
#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000
/// Fetch from "vkGetDeviceBufferMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceBufferMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4.
.vkGetDeviceBufferMemoryRequirements = vkGetDeviceBufferMemoryRequirements,
/// Fetch from "vkGetDeviceImageMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceImageMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4.
.vkGetDeviceImageMemoryRequirements = vkGetDeviceImageMemoryRequirements,
#endif
};
VmaAllocatorCreateInfo create_info = {
.flags = (r->memory_budget_extension_enabled ?
VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT :
0),
.vulkanApiVersion = VK_API_VERSION_1_3,
.instance = r->instance,
.physicalDevice = r->physical_device,
.device = r->device,
.pVulkanFunctions = &vulkanFunctions,
};
VK_CHECK(vmaCreateAllocator(&create_info, &r->allocator));
}
static void finalize_allocator(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
vmaDestroyAllocator(r->allocator);
}
void pgraph_vk_init_instance(PGRAPHState *pg)
{
create_instance(pg);
select_physical_device(pg);
create_logical_device(pg);
init_allocator(pg);
}
void pgraph_vk_finalize_instance(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
finalize_allocator(pg);
vkDestroyDevice(r->device, NULL);
r->device = VK_NULL_HANDLE;
vkDestroyInstance(r->instance, NULL);
r->instance = VK_NULL_HANDLE;
}

View File

@ -0,0 +1,24 @@
if vulkan.found()
specific_ss.add([sdl, volk, libglslang, vma, vulkan, spirv_reflect, gloffscreen,
files(
'blit.c',
'buffer.c',
'command.c',
'debug.c',
'display.c',
'draw.c',
'glsl.c',
'image.c',
'instance.c',
'renderer.c',
'reports.c',
'shaders.c',
'surface-compute.c',
'surface.c',
'texture.c',
'vertex.c',
)
])
endif

View File

@ -0,0 +1,266 @@
/*
* Geforce NV2A PGRAPH Vulkan Renderer
*
* Copyright (c) 2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "hw/xbox/nv2a/nv2a_int.h"
#include "renderer.h"
#include "gloffscreen.h"
#if HAVE_EXTERNAL_MEMORY
static GloContext *g_gl_context;
static void gl_context_init(void)
{
g_gl_context = glo_context_create();
}
#endif
static void pgraph_vk_init_thread(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
#if HAVE_EXTERNAL_MEMORY
glo_set_current(g_gl_context);
#endif
pgraph_vk_init_instance(pg);
pgraph_vk_init_command_buffers(pg);
pgraph_vk_init_buffers(d);
pgraph_vk_init_surfaces(pg);
pgraph_vk_init_shaders(pg);
pgraph_vk_init_pipelines(pg);
pgraph_vk_init_textures(pg);
pgraph_vk_init_reports(pg);
pgraph_vk_init_compute(pg);
pgraph_vk_init_display(pg);
}
static void pgraph_vk_finalize(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
pgraph_vk_finalize_display(pg);
pgraph_vk_finalize_compute(pg);
pgraph_vk_finalize_reports(pg);
pgraph_vk_finalize_textures(pg);
pgraph_vk_finalize_pipelines(pg);
pgraph_vk_finalize_shaders(pg);
pgraph_vk_finalize_surfaces(pg);
pgraph_vk_finalize_buffers(d);
pgraph_vk_finalize_command_buffers(pg);
pgraph_vk_finalize_instance(pg);
}
static void pgraph_vk_flush(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
pgraph_vk_finish(pg, VK_FINISH_REASON_FLUSH);
pgraph_vk_surface_flush(d);
pgraph_vk_mark_textures_possibly_dirty(d, 0, memory_region_size(d->vram));
pgraph_vk_update_vertex_ram_buffer(&d->pgraph, 0, d->vram_ptr,
memory_region_size(d->vram));
for (int i = 0; i < 4; i++) {
pg->texture_dirty[i] = true;
}
/* FIXME: Flush more? */
qatomic_set(&d->pgraph.flush_pending, false);
qemu_event_set(&d->pgraph.flush_complete);
}
static void pgraph_vk_sync(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
pgraph_vk_render_display(pg);
qatomic_set(&d->pgraph.sync_pending, false);
qemu_event_set(&d->pgraph.sync_complete);
}
static void pgraph_vk_process_pending(NV2AState *d)
{
PGRAPHVkState *r = d->pgraph.vk_renderer_state;
if (qatomic_read(&r->downloads_pending) ||
qatomic_read(&r->download_dirty_surfaces_pending) ||
qatomic_read(&d->pgraph.sync_pending) ||
qatomic_read(&d->pgraph.flush_pending)
) {
qemu_mutex_unlock(&d->pfifo.lock);
qemu_mutex_lock(&d->pgraph.lock);
if (qatomic_read(&r->downloads_pending)) {
pgraph_vk_process_pending_downloads(d);
}
if (qatomic_read(&r->download_dirty_surfaces_pending)) {
pgraph_vk_download_dirty_surfaces(d);
}
if (qatomic_read(&d->pgraph.sync_pending)) {
pgraph_vk_sync(d);
}
if (qatomic_read(&d->pgraph.flush_pending)) {
pgraph_vk_flush(d);
}
qemu_mutex_unlock(&d->pgraph.lock);
qemu_mutex_lock(&d->pfifo.lock);
}
}
static void pgraph_vk_flip_stall(NV2AState *d)
{
pgraph_vk_finish(&d->pgraph, VK_FINISH_REASON_FLIP_STALL);
pgraph_vk_debug_frame_terminator();
}
static void pgraph_vk_pre_savevm_trigger(NV2AState *d)
{
qatomic_set(&d->pgraph.vk_renderer_state->download_dirty_surfaces_pending, true);
qemu_event_reset(&d->pgraph.vk_renderer_state->dirty_surfaces_download_complete);
}
static void pgraph_vk_pre_savevm_wait(NV2AState *d)
{
qemu_event_wait(&d->pgraph.vk_renderer_state->dirty_surfaces_download_complete);
}
static void pgraph_vk_pre_shutdown_trigger(NV2AState *d)
{
// qatomic_set(&d->pgraph.vk_renderer_state->shader_cache_writeback_pending, true);
// qemu_event_reset(&d->pgraph.vk_renderer_state->shader_cache_writeback_complete);
}
static void pgraph_vk_pre_shutdown_wait(NV2AState *d)
{
// qemu_event_wait(&d->pgraph.vk_renderer_state->shader_cache_writeback_complete);
}
static int pgraph_vk_get_framebuffer_surface(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHVkState *r = pg->vk_renderer_state;
qemu_mutex_lock(&d->pfifo.lock);
// FIXME: Possible race condition with pgraph, consider lock
uint32_t pline_offset, pstart_addr, pline_compare;
d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
SurfaceBinding *surface = pgraph_vk_surface_get_within(d, d->pcrtc.start + pline_offset);
if (surface == NULL || !surface->color) {
qemu_mutex_unlock(&d->pfifo.lock);
return 0;
}
assert(surface->color);
surface->frame_time = pg->frame_time;
#if HAVE_EXTERNAL_MEMORY
qemu_event_reset(&d->pgraph.sync_complete);
qatomic_set(&pg->sync_pending, true);
pfifo_kick(d);
qemu_mutex_unlock(&d->pfifo.lock);
qemu_event_wait(&d->pgraph.sync_complete);
return r->display.gl_texture_id;
#else
qemu_mutex_unlock(&d->pfifo.lock);
pgraph_vk_wait_for_surface_download(surface);
return 0;
#endif
}
static void pgraph_vk_init(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
pg->vk_renderer_state = (PGRAPHVkState *)g_malloc0(sizeof(PGRAPHVkState));
pgraph_vk_debug_init();
}
static PGRAPHRenderer pgraph_vk_renderer = {
.type = CONFIG_DISPLAY_RENDERER_VULKAN,
.name = "Vulkan",
.ops = {
.init = pgraph_vk_init,
#if HAVE_EXTERNAL_MEMORY
.early_context_init = gl_context_init,
#endif
.init_thread = pgraph_vk_init_thread,
.finalize = pgraph_vk_finalize,
.clear_report_value = pgraph_vk_clear_report_value,
.clear_surface = pgraph_vk_clear_surface,
.draw_begin = pgraph_vk_draw_begin,
.draw_end = pgraph_vk_draw_end,
.flip_stall = pgraph_vk_flip_stall,
.flush_draw = pgraph_vk_flush_draw,
.get_report = pgraph_vk_get_report,
.image_blit = pgraph_vk_image_blit,
.pre_savevm_trigger = pgraph_vk_pre_savevm_trigger,
.pre_savevm_wait = pgraph_vk_pre_savevm_wait,
.pre_shutdown_trigger = pgraph_vk_pre_shutdown_trigger,
.pre_shutdown_wait = pgraph_vk_pre_shutdown_wait,
.process_pending = pgraph_vk_process_pending,
.process_pending_reports = pgraph_vk_process_pending_reports,
.surface_update = pgraph_vk_surface_update,
.set_surface_scale_factor = pgraph_vk_set_surface_scale_factor,
.get_surface_scale_factor = pgraph_vk_get_surface_scale_factor,
.get_framebuffer_surface = pgraph_vk_get_framebuffer_surface,
}
};
static void __attribute__((constructor)) register_renderer(void)
{
pgraph_renderer_register(&pgraph_vk_renderer);
}
void pgraph_vk_check_memory_budget(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
VkPhysicalDeviceMemoryProperties const *props;
vmaGetMemoryProperties(r->allocator, &props);
g_autofree VmaBudget *budgets = g_malloc_n(props->memoryHeapCount, sizeof(VmaBudget));
vmaGetHeapBudgets(r->allocator, budgets);
const float budget_threshold = 0.8;
bool near_budget = false;
for (int i = 0; i < props->memoryHeapCount; i++) {
VmaBudget *b = &budgets[i];
float use_to_budget_ratio =
(double)b->statistics.allocationBytes / (double)b->budget;
NV2A_VK_DPRINTF("Heap %d: used %lu/%lu MiB (%.2f%%)", i,
b->statistics.allocationBytes / (1024 * 1024),
b->budget / (1024 * 1024), use_to_budget_ratio * 100);
near_budget |= use_to_budget_ratio > budget_threshold;
}
// If any heaps are near budget, free up some resources
if (near_budget) {
pgraph_vk_trim_texture_cache(pg);
}
#if 0
char *s;
vmaBuildStatsString(r->allocator, &s, VK_TRUE);
puts(s);
vmaFreeStatsString(r->allocator, s);
#endif
}

View File

@ -0,0 +1,526 @@
/*
* Geforce NV2A PGRAPH Vulkan Renderer
*
* Copyright (c) 2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_XBOX_NV2A_PGRAPH_VK_RENDERER_H
#define HW_XBOX_NV2A_PGRAPH_VK_RENDERER_H
#define VK_NO_PROTOTYPES 1
#include "qemu/osdep.h"
#include "qemu/thread.h"
#include "qemu/queue.h"
#include "qemu/lru.h"
#include "hw/hw.h"
#include "hw/xbox/nv2a/nv2a_int.h"
#include "hw/xbox/nv2a/nv2a_regs.h"
#include "hw/xbox/nv2a/pgraph/surface.h"
#include "hw/xbox/nv2a/pgraph/texture.h"
#include "hw/xbox/nv2a/pgraph/shaders.h"
#include <vulkan/vulkan.h>
#include <glslang/Include/glslang_c_interface.h>
#include <volk.h>
#include <spirv_reflect.h>
#define VMA_STATIC_VULKAN_FUNCTIONS 1
#define VMA_DYNAMIC_VULKAN_FUNCTIONS 0
#include <vk_mem_alloc.h>
#include "debug.h"
#include "constants.h"
#include "glsl.h"
#define HAVE_EXTERNAL_MEMORY 1
typedef struct QueueFamilyIndices {
int queue_family;
} QueueFamilyIndices;
typedef struct MemorySyncRequirement {
hwaddr addr, size;
} MemorySyncRequirement;
typedef struct RenderPassState {
VkFormat color_format;
VkFormat zeta_format;
} RenderPassState;
typedef struct RenderPass {
RenderPassState state;
VkRenderPass render_pass;
} RenderPass;
typedef struct PipelineKey {
bool clear;
RenderPassState render_pass_state;
ShaderState shader_state;
uint32_t regs[10];
VkVertexInputBindingDescription binding_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES];
VkVertexInputAttributeDescription attribute_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES];
} PipelineKey;
typedef struct PipelineBinding {
LruNode node;
PipelineKey key;
VkPipelineLayout layout;
VkPipeline pipeline;
VkRenderPass render_pass;
unsigned int draw_time;
} PipelineBinding;
enum Buffer {
BUFFER_STAGING_DST,
BUFFER_STAGING_SRC,
BUFFER_COMPUTE_DST,
BUFFER_COMPUTE_SRC,
BUFFER_INDEX,
BUFFER_INDEX_STAGING,
BUFFER_VERTEX_RAM,
BUFFER_VERTEX_INLINE,
BUFFER_VERTEX_INLINE_STAGING,
BUFFER_UNIFORM,
BUFFER_UNIFORM_STAGING,
BUFFER_COUNT
};
typedef struct StorageBuffer {
VkBuffer buffer;
VkBufferUsageFlags usage;
VmaAllocationCreateInfo alloc_info;
VmaAllocation allocation;
VkMemoryPropertyFlags properties;
size_t buffer_offset;
size_t buffer_size;
uint8_t *mapped;
} StorageBuffer;
typedef struct SurfaceBinding {
QTAILQ_ENTRY(SurfaceBinding) entry;
MemAccessCallback *access_cb;
hwaddr vram_addr;
SurfaceShape shape;
uintptr_t dma_addr;
uintptr_t dma_len;
bool color;
bool swizzle;
unsigned int width;
unsigned int height;
unsigned int pitch;
size_t size;
bool cleared;
int frame_time;
int draw_time;
bool draw_dirty;
bool download_pending;
bool upload_pending;
BasicSurfaceFormatInfo fmt;
SurfaceFormatInfo host_fmt;
VkImage image;
VkImageView image_view;
VmaAllocation allocation;
// Used for scaling
VkImage image_scratch;
VkImageLayout image_scratch_current_layout;
VmaAllocation allocation_scratch;
bool initialized;
} SurfaceBinding;
typedef struct ShaderModuleInfo {
char *glsl;
GByteArray *spirv;
VkShaderModule module;
SpvReflectShaderModule reflect_module;
SpvReflectDescriptorSet **descriptor_sets;
ShaderUniformLayout uniforms;
ShaderUniformLayout push_constants;
} ShaderModuleInfo;
typedef struct ShaderBinding {
LruNode node;
ShaderState state;
ShaderModuleInfo *geometry;
ShaderModuleInfo *vertex;
ShaderModuleInfo *fragment;
int psh_constant_loc[9][2];
int alpha_ref_loc;
int bump_mat_loc[NV2A_MAX_TEXTURES];
int bump_scale_loc[NV2A_MAX_TEXTURES];
int bump_offset_loc[NV2A_MAX_TEXTURES];
int tex_scale_loc[NV2A_MAX_TEXTURES];
int surface_size_loc;
int clip_range_loc;
int vsh_constant_loc;
uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];
int inv_viewport_loc;
int ltctxa_loc;
int ltctxb_loc;
int ltc1_loc;
int fog_color_loc;
int fog_param_loc;
int light_infinite_half_vector_loc[NV2A_MAX_LIGHTS];
int light_infinite_direction_loc[NV2A_MAX_LIGHTS];
int light_local_position_loc[NV2A_MAX_LIGHTS];
int light_local_attenuation_loc[NV2A_MAX_LIGHTS];
int clip_region_loc;
int material_alpha_loc;
} ShaderBinding;
typedef struct TextureKey {
TextureShape state;
hwaddr texture_vram_offset;
hwaddr texture_length;
hwaddr palette_vram_offset;
hwaddr palette_length;
float scale;
} TextureKey;
typedef struct TextureBinding {
LruNode node;
TextureKey key;
VkImage image;
VkImageLayout current_layout;
VkImageView image_view;
VmaAllocation allocation;
VkSampler sampler;
bool possibly_dirty;
uint64_t hash;
unsigned int draw_time;
uint32_t submit_time;
} TextureBinding;
typedef struct QueryReport {
QSIMPLEQ_ENTRY(QueryReport) entry;
bool clear;
uint32_t parameter;
unsigned int query_count;
} QueryReport;
typedef struct PGRAPHVkDisplayState {
ShaderModuleInfo *display_frag;
VkDescriptorPool descriptor_pool;
VkDescriptorSetLayout descriptor_set_layout;
VkDescriptorSet descriptor_set;
VkPipelineLayout pipeline_layout;
VkPipeline pipeline;
VkRenderPass render_pass;
VkFramebuffer framebuffer;
VkImage image;
VkImageView image_view;
VkDeviceMemory memory;
VkSampler sampler;
int width, height;
int draw_time;
// OpenGL Interop
#ifdef WIN32
HANDLE handle;
#else
int fd;
#endif
GLuint gl_memory_obj;
GLuint gl_texture_id;
} PGRAPHVkDisplayState;
typedef struct PGRAPHVkComputeState {
VkDescriptorPool descriptor_pool;
VkDescriptorSetLayout descriptor_set_layout;
VkDescriptorSet descriptor_sets[1];
VkPipelineLayout pipeline_layout;
VkPipeline pipeline_pack_d24s8;
VkPipeline pipeline_unpack_d24s8;
VkPipeline pipeline_pack_f32s8;
VkPipeline pipeline_unpack_f32s8;
} PGRAPHVkComputeState;
typedef struct PGRAPHVkState {
void *window;
VkInstance instance;
bool debug_utils_extension_enabled;
bool custom_border_color_extension_enabled;
bool provoking_vertex_extension_enabled;
bool memory_budget_extension_enabled;
VkPhysicalDevice physical_device;
VkPhysicalDeviceProperties device_props;
VkDevice device;
VmaAllocator allocator;
uint32_t allocator_last_submit_index;
VkQueue queue;
VkCommandPool command_pool;
VkCommandBuffer command_buffers[2];
VkCommandBuffer command_buffer;
VkSemaphore command_buffer_semaphore;
VkFence command_buffer_fence;
unsigned int command_buffer_start_time;
bool in_command_buffer;
uint32_t submit_count;
VkCommandBuffer aux_command_buffer;
bool in_aux_command_buffer;
VkFramebuffer framebuffers[50];
int framebuffer_index;
bool framebuffer_dirty;
VkRenderPass render_pass;
RenderPass *render_passes;
int render_passes_index;
int render_passes_capacity;
bool in_render_pass;
bool in_draw;
Lru pipeline_cache;
VkPipelineCache vk_pipeline_cache;
PipelineBinding *pipeline_cache_entries;
PipelineBinding *pipeline_binding;
bool pipeline_binding_changed;
VkDescriptorPool descriptor_pool;
VkDescriptorSetLayout descriptor_set_layout;
VkDescriptorSet descriptor_sets[1024];
int descriptor_set_index;
StorageBuffer storage_buffers[BUFFER_COUNT];
MemorySyncRequirement vertex_ram_buffer_syncs[NV2A_VERTEXSHADER_ATTRIBUTES];
size_t num_vertex_ram_buffer_syncs;
unsigned long *uploaded_bitmap;
size_t bitmap_size;
VkVertexInputAttributeDescription vertex_attribute_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES];
int vertex_attribute_to_description_location[NV2A_VERTEXSHADER_ATTRIBUTES];
int num_active_vertex_attribute_descriptions;
VkVertexInputBindingDescription vertex_binding_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES];
int num_active_vertex_binding_descriptions;
hwaddr vertex_attribute_offsets[NV2A_VERTEXSHADER_ATTRIBUTES];
QTAILQ_HEAD(, SurfaceBinding) surfaces;
QTAILQ_HEAD(, SurfaceBinding) invalid_surfaces;
SurfaceBinding *color_binding, *zeta_binding;
bool downloads_pending;
QemuEvent downloads_complete;
bool download_dirty_surfaces_pending;
QemuEvent dirty_surfaces_download_complete; // common
Lru texture_cache;
TextureBinding *texture_cache_entries;
TextureBinding *texture_bindings[NV2A_MAX_TEXTURES];
TextureBinding dummy_texture;
bool texture_bindings_changed;
Lru shader_cache;
ShaderBinding *shader_cache_entries;
ShaderBinding *shader_binding;
ShaderModuleInfo *quad_vert_module, *solid_frag_module;
bool shader_bindings_changed;
// FIXME: Merge these into a structure
uint64_t uniform_buffer_hashes[2];
size_t uniform_buffer_offsets[2];
bool uniforms_changed;
VkQueryPool query_pool;
int max_queries_in_flight; // FIXME: Move out to constant
int num_queries_in_flight;
bool new_query_needed;
bool query_in_flight;
uint32_t zpass_pixel_count_result;
QSIMPLEQ_HEAD(, QueryReport) report_queue; // FIXME: Statically allocate
SurfaceFormatInfo kelvin_surface_zeta_vk_map[3];
uint32_t clear_parameter;
PGRAPHVkDisplayState display;
PGRAPHVkComputeState compute;
} PGRAPHVkState;
// renderer.c
void pgraph_vk_check_memory_budget(PGRAPHState *pg);
// debug.c
void pgraph_vk_debug_init(void);
// instance.c
void pgraph_vk_init_instance(PGRAPHState *pg);
void pgraph_vk_finalize_instance(PGRAPHState *pg);
QueueFamilyIndices pgraph_vk_find_queue_families(VkPhysicalDevice device);
uint32_t pgraph_vk_get_memory_type(PGRAPHState *pg, uint32_t type_bits,
VkMemoryPropertyFlags properties);
// glsl.c
void pgraph_vk_init_glsl_compiler(void);
void pgraph_vk_finalize_glsl_compiler(void);
GByteArray *pgraph_vk_compile_glsl_to_spv(glslang_stage_t stage,
const char *glsl_source);
VkShaderModule pgraph_vk_create_shader_module_from_spv(PGRAPHVkState *r,
GByteArray *spv);
ShaderModuleInfo *pgraph_vk_create_shader_module_from_glsl(
PGRAPHVkState *r, VkShaderStageFlagBits stage, const char *glsl);
void pgraph_vk_destroy_shader_module(PGRAPHVkState *r, ShaderModuleInfo *info);
// buffer.c
void pgraph_vk_init_buffers(NV2AState *d);
void pgraph_vk_finalize_buffers(NV2AState *d);
bool pgraph_vk_buffer_has_space_for(PGRAPHState *pg, int index,
VkDeviceSize size,
VkDeviceAddress alignment);
VkDeviceSize pgraph_vk_append_to_buffer(PGRAPHState *pg, int index, void **data,
VkDeviceSize *sizes, size_t count,
VkDeviceAddress alignment);
// command.c
void pgraph_vk_init_command_buffers(PGRAPHState *pg);
void pgraph_vk_finalize_command_buffers(PGRAPHState *pg);
VkCommandBuffer pgraph_vk_begin_single_time_commands(PGRAPHState *pg);
void pgraph_vk_end_single_time_commands(PGRAPHState *pg, VkCommandBuffer cmd);
// image.c
void pgraph_vk_transition_image_layout(PGRAPHState *pg, VkCommandBuffer cmd,
VkImage image, VkFormat format,
VkImageLayout oldLayout,
VkImageLayout newLayout);
// vertex.c
void pgraph_vk_bind_vertex_attributes(NV2AState *d, unsigned int min_element,
unsigned int max_element,
bool inline_data,
unsigned int inline_stride,
unsigned int provoking_element);
void pgraph_vk_bind_vertex_attributes_inline(NV2AState *d);
void pgraph_vk_update_vertex_ram_buffer(PGRAPHState *pg, hwaddr offset, void *data,
VkDeviceSize size);
VkDeviceSize pgraph_vk_update_index_buffer(PGRAPHState *pg, void *data,
VkDeviceSize size);
VkDeviceSize pgraph_vk_update_vertex_inline_buffer(PGRAPHState *pg, void **data,
VkDeviceSize *sizes,
size_t count);
// surface.c
void pgraph_vk_init_surfaces(PGRAPHState *pg);
void pgraph_vk_finalize_surfaces(PGRAPHState *pg);
void pgraph_vk_surface_flush(NV2AState *d);
void pgraph_vk_process_pending_downloads(NV2AState *d);
void pgraph_vk_surface_download_if_dirty(NV2AState *d, SurfaceBinding *surface);
SurfaceBinding *pgraph_vk_surface_get_within(NV2AState *d, hwaddr addr);
void pgraph_vk_wait_for_surface_download(SurfaceBinding *e);
void pgraph_vk_download_dirty_surfaces(NV2AState *d);
void pgraph_vk_upload_surface_data(NV2AState *d, SurfaceBinding *surface,
bool force);
void pgraph_vk_surface_update(NV2AState *d, bool upload, bool color_write,
bool zeta_write);
SurfaceBinding *pgraph_vk_surface_get(NV2AState *d, hwaddr addr);
void pgraph_vk_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta);
void pgraph_vk_set_surface_scale_factor(NV2AState *d, unsigned int scale);
unsigned int pgraph_vk_get_surface_scale_factor(NV2AState *d);
void pgraph_vk_reload_surface_scale_factor(PGRAPHState *pg);
// surface-compute.c
void pgraph_vk_init_compute(PGRAPHState *pg);
void pgraph_vk_finalize_compute(PGRAPHState *pg);
void pgraph_vk_pack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface,
VkCommandBuffer cmd, VkBuffer src,
VkBuffer dst, bool downscale);
void pgraph_vk_unpack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface,
VkCommandBuffer cmd, VkBuffer src,
VkBuffer dst);
// display.c
void pgraph_vk_init_display(PGRAPHState *pg);
void pgraph_vk_finalize_display(PGRAPHState *pg);
void pgraph_vk_render_display(PGRAPHState *pg);
// texture.c
void pgraph_vk_init_textures(PGRAPHState *pg);
void pgraph_vk_finalize_textures(PGRAPHState *pg);
void pgraph_vk_bind_textures(NV2AState *d);
void pgraph_vk_mark_textures_possibly_dirty(NV2AState *d, hwaddr addr,
hwaddr size);
void pgraph_vk_trim_texture_cache(PGRAPHState *pg);
// shaders.c
void pgraph_vk_init_shaders(PGRAPHState *pg);
void pgraph_vk_finalize_shaders(PGRAPHState *pg);
void pgraph_vk_update_descriptor_sets(PGRAPHState *pg);
void pgraph_vk_bind_shaders(PGRAPHState *pg);
void pgraph_vk_update_shader_uniforms(PGRAPHState *pg);
// reports.c
void pgraph_vk_init_reports(PGRAPHState *pg);
void pgraph_vk_finalize_reports(PGRAPHState *pg);
void pgraph_vk_clear_report_value(NV2AState *d);
void pgraph_vk_get_report(NV2AState *d, uint32_t parameter);
void pgraph_vk_process_pending_reports(NV2AState *d);
void pgraph_vk_process_pending_reports_internal(NV2AState *d);
typedef enum FinishReason {
VK_FINISH_REASON_VERTEX_BUFFER_DIRTY,
VK_FINISH_REASON_SURFACE_CREATE,
VK_FINISH_REASON_SURFACE_DOWN,
VK_FINISH_REASON_NEED_BUFFER_SPACE,
VK_FINISH_REASON_FRAMEBUFFER_DIRTY,
VK_FINISH_REASON_PRESENTING,
VK_FINISH_REASON_FLIP_STALL,
VK_FINISH_REASON_FLUSH,
} FinishReason;
// draw.c
void pgraph_vk_init_pipelines(PGRAPHState *pg);
void pgraph_vk_finalize_pipelines(PGRAPHState *pg);
void pgraph_vk_clear_surface(NV2AState *d, uint32_t parameter);
void pgraph_vk_draw_begin(NV2AState *d);
void pgraph_vk_draw_end(NV2AState *d);
void pgraph_vk_finish(PGRAPHState *pg, FinishReason why);
void pgraph_vk_flush_draw(NV2AState *d);
void pgraph_vk_begin_command_buffer(PGRAPHState *pg);
void pgraph_vk_ensure_command_buffer(PGRAPHState *pg);
void pgraph_vk_ensure_not_in_render_pass(PGRAPHState *pg);
VkCommandBuffer pgraph_vk_begin_nondraw_commands(PGRAPHState *pg);
void pgraph_vk_end_nondraw_commands(PGRAPHState *pg, VkCommandBuffer cmd);
// blit.c
void pgraph_vk_image_blit(NV2AState *d);
#endif

View File

@ -0,0 +1,134 @@
/*
* Geforce NV2A PGRAPH Vulkan Renderer
*
* Copyright (c) 2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "renderer.h"
void pgraph_vk_init_reports(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
QSIMPLEQ_INIT(&r->report_queue);
r->num_queries_in_flight = 0;
r->max_queries_in_flight = 1024;
r->new_query_needed = true;
r->query_in_flight = false;
r->zpass_pixel_count_result = 0;
VkQueryPoolCreateInfo pool_create_info = (VkQueryPoolCreateInfo){
.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
.queryType = VK_QUERY_TYPE_OCCLUSION,
.queryCount = r->max_queries_in_flight,
};
VK_CHECK(
vkCreateQueryPool(r->device, &pool_create_info, NULL, &r->query_pool));
}
void pgraph_vk_finalize_reports(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
vkDestroyQueryPool(r->device, r->query_pool, NULL);
}
void pgraph_vk_clear_report_value(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHVkState *r = pg->vk_renderer_state;
QueryReport *q = g_malloc(sizeof(QueryReport)); // FIXME: Pre-allocate
q->clear = true;
QSIMPLEQ_INSERT_TAIL(&r->report_queue, q, entry);
}
void pgraph_vk_get_report(NV2AState *d, uint32_t parameter)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHVkState *r = pg->vk_renderer_state;
uint8_t type = GET_MASK(parameter, NV097_GET_REPORT_TYPE);
assert(type == NV097_GET_REPORT_TYPE_ZPASS_PIXEL_CNT);
QueryReport *q = g_malloc(sizeof(QueryReport)); // FIXME: Pre-allocate
q->clear = false;
q->parameter = parameter;
q->query_count = r->num_queries_in_flight;
QSIMPLEQ_INSERT_TAIL(&r->report_queue, q, entry);
r->new_query_needed = true;
}
void pgraph_vk_process_pending_reports_internal(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHVkState *r = pg->vk_renderer_state;
NV2A_VK_DGROUP_BEGIN("Processing queries");
assert(!r->in_command_buffer);
// Fetch all query results
g_autofree uint64_t *query_results = NULL;
if (r->num_queries_in_flight > 0) {
size_t size_of_results = r->num_queries_in_flight * sizeof(uint64_t);
query_results = g_malloc_n(r->num_queries_in_flight,
sizeof(uint64_t)); // FIXME: Pre-allocate
VkResult result;
do {
result = vkGetQueryPoolResults(
r->device, r->query_pool, 0, r->num_queries_in_flight,
size_of_results, query_results, sizeof(uint64_t),
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
} while (result == VK_NOT_READY);
}
// Write out queries
QueryReport *q, *next;
int num_results_counted = 0;
int result_divisor = pg->surface_scale_factor * pg->surface_scale_factor;
QSIMPLEQ_FOREACH_SAFE (q, &r->report_queue, entry, next) {
if (q->clear) {
NV2A_VK_DPRINTF("Cleared");
r->zpass_pixel_count_result = 0;
} else {
assert(q->query_count >= num_results_counted);
assert(q->query_count <= r->num_queries_in_flight);
while (num_results_counted < q->query_count) {
r->zpass_pixel_count_result +=
query_results[num_results_counted++];
}
pgraph_write_zpass_pixel_cnt_report(
d, q->parameter,
r->zpass_pixel_count_result / result_divisor);
}
QSIMPLEQ_REMOVE_HEAD(&r->report_queue, entry);
g_free(q);
}
r->num_queries_in_flight = 0;
NV2A_VK_DGROUP_END();
}
void pgraph_vk_process_pending_reports(NV2AState *d)
{
}

View File

@ -0,0 +1,797 @@
/*
* Geforce NV2A PGRAPH Vulkan Renderer
*
* Copyright (c) 2024 Matt Borgerson
*
* Based on GL implementation:
*
* Copyright (c) 2015 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2018-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "hw/xbox/nv2a/pgraph/shaders.h"
#include "hw/xbox/nv2a/pgraph/util.h"
#include "hw/xbox/nv2a/pgraph/glsl/geom.h"
#include "hw/xbox/nv2a/pgraph/glsl/vsh.h"
#include "hw/xbox/nv2a/pgraph/glsl/psh.h"
#include "qemu/fast-hash.h"
#include "qemu/mstring.h"
#include "renderer.h"
#include <locale.h>
static void create_descriptor_pool(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
size_t num_sets = ARRAY_SIZE(r->descriptor_sets);
VkDescriptorPoolSize pool_sizes[] = {
{
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.descriptorCount = 2 * num_sets,
},
{
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = NV2A_MAX_TEXTURES * num_sets,
}
};
VkDescriptorPoolCreateInfo pool_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.poolSizeCount = ARRAY_SIZE(pool_sizes),
.pPoolSizes = pool_sizes,
.maxSets = ARRAY_SIZE(r->descriptor_sets),
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
};
VK_CHECK(vkCreateDescriptorPool(r->device, &pool_info, NULL,
&r->descriptor_pool));
}
static void destroy_descriptor_pool(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
vkDestroyDescriptorPool(r->device, r->descriptor_pool, NULL);
r->descriptor_pool = VK_NULL_HANDLE;
}
static void create_descriptor_set_layout(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
VkDescriptorSetLayoutBinding bindings[2 + NV2A_MAX_TEXTURES];
bindings[0] = (VkDescriptorSetLayoutBinding){
.binding = VSH_UBO_BINDING,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
};
bindings[1] = (VkDescriptorSetLayoutBinding){
.binding = PSH_UBO_BINDING,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
};
for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
bindings[2 + i] = (VkDescriptorSetLayoutBinding){
.binding = PSH_TEX_BINDING + i,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
};
}
VkDescriptorSetLayoutCreateInfo layout_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.bindingCount = ARRAY_SIZE(bindings),
.pBindings = bindings,
};
VK_CHECK(vkCreateDescriptorSetLayout(r->device, &layout_info, NULL,
&r->descriptor_set_layout));
}
static void destroy_descriptor_set_layout(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
vkDestroyDescriptorSetLayout(r->device, r->descriptor_set_layout, NULL);
r->descriptor_set_layout = VK_NULL_HANDLE;
}
static void create_descriptor_sets(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
VkDescriptorSetLayout layouts[ARRAY_SIZE(r->descriptor_sets)];
for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
layouts[i] = r->descriptor_set_layout;
}
VkDescriptorSetAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.descriptorPool = r->descriptor_pool,
.descriptorSetCount = ARRAY_SIZE(r->descriptor_sets),
.pSetLayouts = layouts,
};
VK_CHECK(
vkAllocateDescriptorSets(r->device, &alloc_info, r->descriptor_sets));
}
static void destroy_descriptor_sets(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
vkFreeDescriptorSets(r->device, r->descriptor_pool,
ARRAY_SIZE(r->descriptor_sets), r->descriptor_sets);
for (int i = 0; i < ARRAY_SIZE(r->descriptor_sets); i++) {
r->descriptor_sets[i] = VK_NULL_HANDLE;
}
}
void pgraph_vk_update_descriptor_sets(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
bool need_uniform_write =
r->uniforms_changed ||
!r->storage_buffers[BUFFER_UNIFORM_STAGING].buffer_offset;
if (!(r->shader_bindings_changed || r->texture_bindings_changed ||
(r->descriptor_set_index == 0) || need_uniform_write)) {
return; // Nothing changed
}
ShaderBinding *binding = r->shader_binding;
ShaderUniformLayout *layouts[] = { &binding->vertex->uniforms,
&binding->fragment->uniforms };
VkDeviceSize ubo_buffer_total_size = 0;
for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
ubo_buffer_total_size += layouts[i]->total_size;
}
bool need_ubo_staging_buffer_reset =
r->uniforms_changed &&
!pgraph_vk_buffer_has_space_for(pg, BUFFER_UNIFORM_STAGING,
ubo_buffer_total_size,
r->device_props.limits.minUniformBufferOffsetAlignment);
bool need_descriptor_write_reset =
(r->descriptor_set_index >= ARRAY_SIZE(r->descriptor_sets));
if (need_descriptor_write_reset || need_ubo_staging_buffer_reset) {
pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE);
need_uniform_write = true;
}
VkWriteDescriptorSet descriptor_writes[2 + NV2A_MAX_TEXTURES];
assert(r->descriptor_set_index < ARRAY_SIZE(r->descriptor_sets));
if (need_uniform_write) {
for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
void *data = layouts[i]->allocation;
VkDeviceSize size = layouts[i]->total_size;
r->uniform_buffer_offsets[i] = pgraph_vk_append_to_buffer(
pg, BUFFER_UNIFORM_STAGING, &data, &size, 1,
r->device_props.limits.minUniformBufferOffsetAlignment);
}
r->uniforms_changed = false;
}
VkDescriptorBufferInfo ubo_buffer_infos[2];
for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
ubo_buffer_infos[i] = (VkDescriptorBufferInfo){
.buffer = r->storage_buffers[BUFFER_UNIFORM].buffer,
.offset = r->uniform_buffer_offsets[i],
.range = layouts[i]->total_size,
};
descriptor_writes[i] = (VkWriteDescriptorSet){
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstSet = r->descriptor_sets[r->descriptor_set_index],
.dstBinding = i == 0 ? VSH_UBO_BINDING : PSH_UBO_BINDING,
.dstArrayElement = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.descriptorCount = 1,
.pBufferInfo = &ubo_buffer_infos[i],
};
}
VkDescriptorImageInfo image_infos[NV2A_MAX_TEXTURES];
for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
image_infos[i] = (VkDescriptorImageInfo){
.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
.imageView = r->texture_bindings[i]->image_view,
.sampler = r->texture_bindings[i]->sampler,
};
descriptor_writes[2 + i] = (VkWriteDescriptorSet){
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstSet = r->descriptor_sets[r->descriptor_set_index],
.dstBinding = PSH_TEX_BINDING + i,
.dstArrayElement = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.descriptorCount = 1,
.pImageInfo = &image_infos[i],
};
}
vkUpdateDescriptorSets(r->device, 6, descriptor_writes, 0, NULL);
r->descriptor_set_index++;
}
static void update_shader_constant_locations(ShaderBinding *binding)
{
int i, j;
char tmp[64];
/* lookup fragment shader uniforms */
for (i = 0; i < 9; i++) {
for (j = 0; j < 2; j++) {
snprintf(tmp, sizeof(tmp), "c%d_%d", j, i);
binding->psh_constant_loc[i][j] =
uniform_index(&binding->fragment->uniforms, tmp);
}
}
binding->alpha_ref_loc =
uniform_index(&binding->fragment->uniforms, "alphaRef");
binding->fog_color_loc =
uniform_index(&binding->fragment->uniforms, "fogColor");
for (i = 1; i < NV2A_MAX_TEXTURES; i++) {
snprintf(tmp, sizeof(tmp), "bumpMat%d", i);
binding->bump_mat_loc[i] =
uniform_index(&binding->fragment->uniforms, tmp);
snprintf(tmp, sizeof(tmp), "bumpScale%d", i);
binding->bump_scale_loc[i] =
uniform_index(&binding->fragment->uniforms, tmp);
snprintf(tmp, sizeof(tmp), "bumpOffset%d", i);
binding->bump_offset_loc[i] =
uniform_index(&binding->fragment->uniforms, tmp);
}
for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
snprintf(tmp, sizeof(tmp), "texScale%d", i);
binding->tex_scale_loc[i] =
uniform_index(&binding->fragment->uniforms, tmp);
}
/* lookup vertex shader uniforms */
binding->vsh_constant_loc = uniform_index(&binding->vertex->uniforms, "c");
binding->surface_size_loc =
uniform_index(&binding->vertex->uniforms, "surfaceSize");
binding->clip_range_loc =
uniform_index(&binding->vertex->uniforms, "clipRange");
binding->fog_param_loc =
uniform_index(&binding->vertex->uniforms, "fogParam");
binding->inv_viewport_loc =
uniform_index(&binding->vertex->uniforms, "invViewport");
binding->ltctxa_loc = uniform_index(&binding->vertex->uniforms, "ltctxa");
binding->ltctxb_loc = uniform_index(&binding->vertex->uniforms, "ltctxb");
binding->ltc1_loc = uniform_index(&binding->vertex->uniforms, "ltc1");
for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
snprintf(tmp, sizeof(tmp), "lightInfiniteHalfVector%d", i);
binding->light_infinite_half_vector_loc[i] =
uniform_index(&binding->vertex->uniforms, tmp);
snprintf(tmp, sizeof(tmp), "lightInfiniteDirection%d", i);
binding->light_infinite_direction_loc[i] =
uniform_index(&binding->vertex->uniforms, tmp);
snprintf(tmp, sizeof(tmp), "lightLocalPosition%d", i);
binding->light_local_position_loc[i] =
uniform_index(&binding->vertex->uniforms, tmp);
snprintf(tmp, sizeof(tmp), "lightLocalAttenuation%d", i);
binding->light_local_attenuation_loc[i] =
uniform_index(&binding->vertex->uniforms, tmp);
}
binding->clip_region_loc =
uniform_index(&binding->fragment->uniforms, "clipRegion");
binding->material_alpha_loc =
uniform_index(&binding->vertex->uniforms, "material_alpha");
}
static void shader_cache_entry_init(Lru *lru, LruNode *node, void *state)
{
ShaderBinding *snode = container_of(node, ShaderBinding, node);
memcpy(&snode->state, state, sizeof(ShaderState));
}
static void shader_cache_entry_post_evict(Lru *lru, LruNode *node)
{
PGRAPHVkState *r = container_of(lru, PGRAPHVkState, shader_cache);
ShaderBinding *snode = container_of(node, ShaderBinding, node);
ShaderModuleInfo *modules[] = {
snode->geometry,
snode->vertex,
snode->fragment,
};
for (int i = 0; i < ARRAY_SIZE(modules); i++) {
if (modules[i]) {
pgraph_vk_destroy_shader_module(r, modules[i]);
}
}
memset(&snode->state, 0, sizeof(ShaderState));
}
static bool shader_cache_entry_compare(Lru *lru, LruNode *node, void *key)
{
ShaderBinding *snode = container_of(node, ShaderBinding, node);
return memcmp(&snode->state, key, sizeof(ShaderState));
}
static void shader_cache_init(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
const size_t shader_cache_size = 1024;
lru_init(&r->shader_cache);
r->shader_cache_entries = g_malloc_n(shader_cache_size, sizeof(ShaderBinding));
assert(r->shader_cache_entries != NULL);
for (int i = 0; i < shader_cache_size; i++) {
lru_add_free(&r->shader_cache, &r->shader_cache_entries[i].node);
}
r->shader_cache.init_node = shader_cache_entry_init;
r->shader_cache.compare_nodes = shader_cache_entry_compare;
r->shader_cache.post_node_evict = shader_cache_entry_post_evict;
}
static void shader_cache_finalize(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
lru_flush(&r->shader_cache);
g_free(r->shader_cache_entries);
r->shader_cache_entries = NULL;
}
static ShaderBinding *gen_shaders(PGRAPHState *pg, ShaderState *state)
{
PGRAPHVkState *r = pg->vk_renderer_state;
uint64_t hash = fast_hash((void *)state, sizeof(*state));
LruNode *node = lru_lookup(&r->shader_cache, hash, state);
ShaderBinding *snode = container_of(node, ShaderBinding, node);
NV2A_VK_DPRINTF("shader state hash: %016lx, %p", hash, snode);
if (!snode->fragment) {
NV2A_VK_DPRINTF("cache miss");
nv2a_profile_inc_counter(NV2A_PROF_SHADER_GEN);
char *previous_numeric_locale = setlocale(LC_NUMERIC, NULL);
if (previous_numeric_locale) {
previous_numeric_locale = g_strdup(previous_numeric_locale);
}
/* Ensure numeric values are printed with '.' radix, no grouping */
setlocale(LC_NUMERIC, "C");
MString *geometry_shader_code = pgraph_gen_geom_glsl(
state->polygon_front_mode, state->polygon_back_mode,
state->primitive_mode, state->smooth_shading, true);
if (geometry_shader_code) {
NV2A_VK_DPRINTF("geometry shader: \n%s",
mstring_get_str(geometry_shader_code));
snode->geometry = pgraph_vk_create_shader_module_from_glsl(
r, VK_SHADER_STAGE_GEOMETRY_BIT,
mstring_get_str(geometry_shader_code));
mstring_unref(geometry_shader_code);
} else {
memset(&snode->geometry, 0, sizeof(snode->geometry));
}
MString *vertex_shader_code =
pgraph_gen_vsh_glsl(state, geometry_shader_code != NULL);
NV2A_VK_DPRINTF("vertex shader: \n%s",
mstring_get_str(vertex_shader_code));
snode->vertex = pgraph_vk_create_shader_module_from_glsl(
r, VK_SHADER_STAGE_VERTEX_BIT,
mstring_get_str(vertex_shader_code));
mstring_unref(vertex_shader_code);
MString *fragment_shader_code = pgraph_gen_psh_glsl(state->psh);
NV2A_VK_DPRINTF("fragment shader: \n%s",
mstring_get_str(fragment_shader_code));
snode->fragment = pgraph_vk_create_shader_module_from_glsl(
r, VK_SHADER_STAGE_FRAGMENT_BIT,
mstring_get_str(fragment_shader_code));
mstring_unref(fragment_shader_code);
if (previous_numeric_locale) {
setlocale(LC_NUMERIC, previous_numeric_locale);
g_free(previous_numeric_locale);
}
update_shader_constant_locations(snode);
}
return snode;
}
// FIXME: Move to common
static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding,
bool binding_changed, bool vertex_program,
bool fixed_function)
{
int i, j;
/* update combiner constants */
for (i = 0; i < 9; i++) {
uint32_t constant[2];
if (i == 8) {
/* final combiner */
constant[0] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR0);
constant[1] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR1);
} else {
constant[0] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR0 + i * 4);
constant[1] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR1 + i * 4);
}
for (j = 0; j < 2; j++) {
GLint loc = binding->psh_constant_loc[i][j];
if (loc != -1) {
float value[4];
pgraph_argb_pack32_to_rgba_float(constant[j], value);
uniform1fv(&binding->fragment->uniforms, loc, 4, value);
}
}
}
if (binding->alpha_ref_loc != -1) {
float alpha_ref = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0),
NV_PGRAPH_CONTROL_0_ALPHAREF) /
255.0;
uniform1f(&binding->fragment->uniforms, binding->alpha_ref_loc,
alpha_ref);
}
/* For each texture stage */
for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
int loc;
/* Bump luminance only during stages 1 - 3 */
if (i > 0) {
loc = binding->bump_mat_loc[i];
if (loc != -1) {
uint32_t m_u32[4];
m_u32[0] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT00 + 4 * (i - 1));
m_u32[1] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT01 + 4 * (i - 1));
m_u32[2] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT10 + 4 * (i - 1));
m_u32[3] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT11 + 4 * (i - 1));
float m[4];
m[0] = *(float*)&m_u32[0];
m[1] = *(float*)&m_u32[1];
m[2] = *(float*)&m_u32[2];
m[3] = *(float*)&m_u32[3];
uniformMatrix2fv(&binding->fragment->uniforms, loc, m);
}
loc = binding->bump_scale_loc[i];
if (loc != -1) {
uint32_t v =
pgraph_reg_r(pg, NV_PGRAPH_BUMPSCALE1 + (i - 1) * 4);
uniform1f(&binding->fragment->uniforms, loc,
*(float *)&v);
}
loc = binding->bump_offset_loc[i];
if (loc != -1) {
uint32_t v =
pgraph_reg_r(pg, NV_PGRAPH_BUMPOFFSET1 + (i - 1) * 4);
uniform1f(&binding->fragment->uniforms, loc,
*(float *)&v);
}
}
loc = binding->tex_scale_loc[i];
if (loc != -1) {
assert(pg->vk_renderer_state->texture_bindings[i] != NULL);
float scale = pg->vk_renderer_state->texture_bindings[i]->key.scale;
BasicColorFormatInfo f_basic = kelvin_color_format_info_map[pg->vk_renderer_state->texture_bindings[i]->key.state.color_format];
if (!f_basic.linear) {
scale = 1.0;
}
uniform1f(&binding->fragment->uniforms, loc, scale);
}
}
if (binding->fog_color_loc != -1) {
uint32_t fog_color = pgraph_reg_r(pg, NV_PGRAPH_FOGCOLOR);
uniform4f(&binding->fragment->uniforms, binding->fog_color_loc,
GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_RED) / 255.0,
GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_GREEN) / 255.0,
GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_BLUE) / 255.0,
GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_ALPHA) / 255.0);
}
if (binding->fog_param_loc != -1) {
uint32_t v[2];
v[0] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM0);
v[1] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM1);
uniform2f(&binding->vertex->uniforms,
binding->fog_param_loc, *(float *)&v[0],
*(float *)&v[1]);
}
float zmax;
switch (pg->surface_shape.zeta_format) {
case NV097_SET_SURFACE_FORMAT_ZETA_Z16:
zmax = pg->surface_shape.z_format ? f16_max : (float)0xFFFF;
break;
case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8:
zmax = pg->surface_shape.z_format ? f24_max : (float)0xFFFFFF;
break;
default:
assert(0);
}
if (fixed_function) {
/* update lighting constants */
struct {
uint32_t *v;
int locs;
size_t len;
} lighting_arrays[] = {
{ &pg->ltctxa[0][0], binding->ltctxa_loc, NV2A_LTCTXA_COUNT },
{ &pg->ltctxb[0][0], binding->ltctxb_loc, NV2A_LTCTXB_COUNT },
{ &pg->ltc1[0][0], binding->ltc1_loc, NV2A_LTC1_COUNT },
};
for (i = 0; i < ARRAY_SIZE(lighting_arrays); i++) {
uniform1iv(
&binding->vertex->uniforms, lighting_arrays[i].locs,
lighting_arrays[i].len * 4, (void *)lighting_arrays[i].v);
}
for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
int loc = binding->light_infinite_half_vector_loc[i];
if (loc != -1) {
uniform1fv(&binding->vertex->uniforms, loc, 3,
pg->light_infinite_half_vector[i]);
}
loc = binding->light_infinite_direction_loc[i];
if (loc != -1) {
uniform1fv(&binding->vertex->uniforms, loc, 3,
pg->light_infinite_direction[i]);
}
loc = binding->light_local_position_loc[i];
if (loc != -1) {
uniform1fv(&binding->vertex->uniforms, loc, 3,
pg->light_local_position[i]);
}
loc = binding->light_local_attenuation_loc[i];
if (loc != -1) {
uniform1fv(&binding->vertex->uniforms, loc, 3,
pg->light_local_attenuation[i]);
}
}
/* estimate the viewport by assuming it matches the surface ... */
unsigned int aa_width = 1, aa_height = 1;
pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height);
float m11 = 0.5 * (pg->surface_binding_dim.width / aa_width);
float m22 = -0.5 * (pg->surface_binding_dim.height / aa_height);
float m33 = zmax;
float m41 = *(float *)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][0];
float m42 = *(float *)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][1];
float invViewport[16] = {
1.0 / m11, 0, 0, 0, 0, 1.0 / m22, 0,
0, 0, 0, 1.0 / m33, 0, -1.0 + m41 / m11, 1.0 + m42 / m22,
0, 1.0
};
if (binding->inv_viewport_loc != -1) {
uniformMatrix4fv(&binding->vertex->uniforms,
binding->inv_viewport_loc, &invViewport[0]);
}
}
/* update vertex program constants */
uniform1iv(&binding->vertex->uniforms, binding->vsh_constant_loc,
NV2A_VERTEXSHADER_CONSTANTS * 4, (void *)pg->vsh_constants);
if (binding->surface_size_loc != -1) {
unsigned int aa_width = 1, aa_height = 1;
pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height);
uniform2f(&binding->vertex->uniforms, binding->surface_size_loc,
pg->surface_binding_dim.width / aa_width,
pg->surface_binding_dim.height / aa_height);
}
if (binding->clip_range_loc != -1) {
uint32_t v[2];
v[0] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMIN);
v[1] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMAX);
float zclip_min = *(float *)&v[0] / zmax * 2.0 - 1.0;
float zclip_max = *(float *)&v[1] / zmax * 2.0 - 1.0;
uniform4f(&binding->vertex->uniforms, binding->clip_range_loc, 0,
zmax, zclip_min, zclip_max);
}
/* Clipping regions */
unsigned int max_gl_width = pg->surface_binding_dim.width;
unsigned int max_gl_height = pg->surface_binding_dim.height;
pgraph_apply_scaling_factor(pg, &max_gl_width, &max_gl_height);
uint32_t clip_regions[8][4];
for (i = 0; i < 8; i++) {
uint32_t x = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPX0 + i * 4);
unsigned int x_min = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMIN);
unsigned int x_max = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMAX) + 1;
uint32_t y = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPY0 + i * 4);
unsigned int y_min = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMIN);
unsigned int y_max = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMAX) + 1;
pgraph_apply_anti_aliasing_factor(pg, &x_min, &y_min);
pgraph_apply_anti_aliasing_factor(pg, &x_max, &y_max);
pgraph_apply_scaling_factor(pg, &x_min, &y_min);
pgraph_apply_scaling_factor(pg, &x_max, &y_max);
clip_regions[i][0] = x_min;
clip_regions[i][1] = y_min;
clip_regions[i][2] = x_max;
clip_regions[i][3] = y_max;
}
uniform1iv(&binding->fragment->uniforms, binding->clip_region_loc,
8 * 4, (void *)clip_regions);
if (binding->material_alpha_loc != -1) {
uniform1f(&binding->vertex->uniforms, binding->material_alpha_loc,
pg->material_alpha);
}
}
// Quickly check PGRAPH state to see if any registers have changed that
// necessitate a full shader state inspection.
static bool check_shaders_dirty(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
if (!r->shader_binding) {
return true;
}
if (pg->program_data_dirty) {
return true;
}
int num_stages = pgraph_reg_r(pg, NV_PGRAPH_COMBINECTL) & 0xFF;
for (int i = 0; i < num_stages; i++) {
if (pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINEALPHAI0 + i * 4) ||
pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINEALPHAO0 + i * 4) ||
pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINECOLORI0 + i * 4) ||
pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINECOLORO0 + i * 4)) {
return true;
}
}
unsigned int regs[] = {
NV_PGRAPH_COMBINECTL,
NV_PGRAPH_COMBINESPECFOG0,
NV_PGRAPH_COMBINESPECFOG1,
NV_PGRAPH_CSV0_C,
NV_PGRAPH_CSV0_D,
NV_PGRAPH_CSV1_A,
NV_PGRAPH_CSV1_B,
NV_PGRAPH_POINTSIZE,
NV_PGRAPH_SHADERCLIPMODE,
NV_PGRAPH_SHADERCTL,
NV_PGRAPH_SHADERPROG,
NV_PGRAPH_SHADOWCTL,
};
for (int i = 0; i < ARRAY_SIZE(regs); i++) {
if (pgraph_is_reg_dirty(pg, regs[i])) {
return true;
}
}
ShaderState *state = &r->shader_binding->state;
if (pg->uniform_attrs != state->uniform_attrs ||
pg->swizzle_attrs != state->swizzle_attrs ||
pg->compressed_attrs != state->compressed_attrs ||
pg->primitive_mode != state->primitive_mode ||
pg->surface_scale_factor != state->surface_scale_factor) {
return true;
}
// Textures
for (int i = 0; i < 4; i++) {
if (pg->texture_matrix_enable[i] != pg->vk_renderer_state->shader_binding->state.texture_matrix_enable[i] ||
pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXCTL0_0 + i * 4) ||
pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXFILTER0 + i * 4) ||
pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXFMT0 + i * 4)) {
return true;
}
}
nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND_NOTDIRTY);
return false;
}
void pgraph_vk_bind_shaders(PGRAPHState *pg)
{
NV2A_VK_DGROUP_BEGIN("%s", __func__);
PGRAPHVkState *r = pg->vk_renderer_state;
r->shader_bindings_changed = false;
if (check_shaders_dirty(pg)) {
ShaderState new_state;
memset(&new_state, 0, sizeof(ShaderState));
new_state = pgraph_get_shader_state(pg);
if (!r->shader_binding || memcmp(&r->shader_binding->state, &new_state, sizeof(ShaderState))) {
r->shader_binding = gen_shaders(pg, &new_state);
r->shader_bindings_changed = true;
}
}
// FIXME: Use dirty bits
pgraph_vk_update_shader_uniforms(pg);
NV2A_VK_DGROUP_END();
}
void pgraph_vk_update_shader_uniforms(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
NV2A_VK_DGROUP_BEGIN("%s", __func__);
nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND);
assert(r->shader_binding);
ShaderBinding *binding = r->shader_binding;
ShaderUniformLayout *layouts[] = { &binding->vertex->uniforms,
&binding->fragment->uniforms };
shader_update_constants(pg, r->shader_binding, true,
r->shader_binding->state.vertex_program,
r->shader_binding->state.fixed_function);
for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
uint64_t hash = fast_hash(layouts[i]->allocation, layouts[i]->total_size);
r->uniforms_changed |= (hash != r->uniform_buffer_hashes[i]);
r->uniform_buffer_hashes[i] = hash;
}
nv2a_profile_inc_counter(r->uniforms_changed ?
NV2A_PROF_SHADER_UBO_DIRTY :
NV2A_PROF_SHADER_UBO_NOTDIRTY);
NV2A_VK_DGROUP_END();
}
void pgraph_vk_init_shaders(PGRAPHState *pg)
{
pgraph_vk_init_glsl_compiler();
create_descriptor_pool(pg);
create_descriptor_set_layout(pg);
create_descriptor_sets(pg);
shader_cache_init(pg);
}
void pgraph_vk_finalize_shaders(PGRAPHState *pg)
{
shader_cache_finalize(pg);
destroy_descriptor_sets(pg);
destroy_descriptor_set_layout(pg);
destroy_descriptor_pool(pg);
pgraph_vk_finalize_glsl_compiler();
}

View File

@ -0,0 +1,473 @@
/*
* Geforce NV2A PGRAPH Vulkan Renderer
*
* Copyright (c) 2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "hw/xbox/nv2a/pgraph/pgraph.h"
#include "renderer.h"
#include <vulkan/vulkan_core.h>
// TODO: Swizzle/Unswizzle
// TODO: Float depth format (low priority, but would be better for accuracy)
// FIXME: Below pipeline creation assumes identical 3 buffer setup. For
// swizzle shader we will need more flexibility.
const char *pack_d24_unorm_s8_uint_to_z24s8_glsl =
"#version 450\n"
"layout(local_size_x = 256) in;\n"
"layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n"
"layout(binding = 0) buffer DepthIn { uint depth_in[]; };\n"
"layout(binding = 1) buffer StencilIn { uint stencil_in[]; };\n"
"layout(binding = 2) buffer DepthStencilOut { uint depth_stencil_out[]; };\n"
"uint get_input_idx(uint idx_out) {\n"
" uint scale = width_in / width_out;"
" uint y = (idx_out / width_out) * scale;\n"
" uint x = (idx_out % width_out) * scale;\n"
" return y * width_in + x;\n"
"}\n"
"void main() {\n"
" uint idx_out = gl_GlobalInvocationID.x;\n"
" uint idx_in = get_input_idx(idx_out);\n"
" uint depth_value = depth_in[idx_in];\n"
" uint stencil_value = (stencil_in[idx_in / 4] >> ((idx_in % 4) * 8)) & 0xff;\n"
" depth_stencil_out[idx_out] = depth_value << 8 | stencil_value;\n"
"}\n";
const char *unpack_z24s8_to_d24_unorm_s8_uint_glsl =
"#version 450\n"
"layout(local_size_x = 256) in;\n"
"layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n"
"layout(binding = 0) buffer DepthOut { uint depth_out[]; };\n"
"layout(binding = 1) buffer StencilOut { uint stencil_out[]; };\n"
"layout(binding = 2) buffer DepthStencilIn { uint depth_stencil_in[]; };\n"
"uint get_input_idx(uint idx_out) {\n"
" uint scale = width_out / width_in;"
" uint y = (idx_out / width_out) / scale;\n"
" uint x = (idx_out % width_out) / scale;\n"
" return y * width_in + x;\n"
"}\n"
"void main() {\n"
" uint idx_out = gl_GlobalInvocationID.x;\n"
" uint idx_in = get_input_idx(idx_out);\n"
" depth_out[idx_out] = depth_stencil_in[idx_in] >> 8;\n"
" if (idx_out % 4 == 0) {\n"
" uint stencil_value = 0;\n"
" for (int i = 0; i < 4; i++) {\n" // Include next 3 pixels
" uint v = depth_stencil_in[get_input_idx(idx_out + i)] & 0xff;\n"
" stencil_value |= v << (i * 8);\n"
" }\n"
" stencil_out[idx_out / 4] = stencil_value;\n"
" }\n"
"}\n";
const char *pack_d32_sfloat_s8_uint_to_z24s8_glsl =
"#version 450\n"
"layout(local_size_x = 256) in;\n"
"layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n"
"layout(binding = 0) buffer DepthIn { float depth_in[]; };\n"
"layout(binding = 1) buffer StencilIn { uint stencil_in[]; };\n"
"layout(binding = 2) buffer DepthStencilOut { uint depth_stencil_out[]; };\n"
"uint get_input_idx(uint idx_out) {\n"
" uint y = idx_out / width_out;\n"
" uint x = idx_out % width_out;\n"
" return (y * width_in + x) * (width_in / width_out);\n"
"}\n"
"void main() {\n"
" uint idx_out = gl_GlobalInvocationID.x;\n"
" uint idx_in = get_input_idx(idx_out);\n"
" uint depth_value = int(depth_in[idx_in] * float(0xffffff));\n"
" uint stencil_value = (stencil_in[idx_in / 4] >> ((idx_in % 4) * 8)) & 0xff;\n"
" depth_stencil_out[idx_out] = depth_value << 8 | stencil_value;\n"
"}\n";
const char *unpack_z24s8_to_d32_sfloat_s8_uint_glsl =
"#version 450\n"
"layout(local_size_x = 256) in;\n"
"layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n"
"layout(binding = 0) buffer DepthOut { float depth_out[]; };\n"
"layout(binding = 1) buffer StencilOut { uint stencil_out[]; };\n"
"layout(binding = 2) buffer DepthStencilIn { uint depth_stencil_in[]; };\n"
"uint get_input_idx(uint idx_out) {\n"
" uint scale = width_out / width_in;"
" uint y = (idx_out / width_out) / scale;\n"
" uint x = (idx_out % width_out) / scale;\n"
" return y * width_in + x;\n"
"}\n"
"void main() {\n"
" uint idx_out = gl_GlobalInvocationID.x;\n"
" uint idx_in = get_input_idx(idx_out);\n"
" depth_out[idx_out] = float(depth_stencil_in[idx_in] >> 8) / float(0xffffff);\n"
" if (idx_out % 4 == 0) {\n"
" uint stencil_value = 0;\n"
" for (int i = 0; i < 4; i++) {\n" // Include next 3 pixels
" uint v = depth_stencil_in[get_input_idx(idx_out + i)] & 0xff;\n"
" stencil_value |= v << (i * 8);\n"
" }\n"
" stencil_out[idx_out / 4] = stencil_value;\n"
" }\n"
"}\n";
static void create_descriptor_pool(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
VkDescriptorPoolSize pool_sizes[] = {
{
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 3,
},
};
VkDescriptorPoolCreateInfo pool_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.poolSizeCount = ARRAY_SIZE(pool_sizes),
.pPoolSizes = pool_sizes,
.maxSets = ARRAY_SIZE(r->compute.descriptor_sets),
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
};
VK_CHECK(vkCreateDescriptorPool(r->device, &pool_info, NULL,
&r->compute.descriptor_pool));
}
static void destroy_descriptor_pool(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
vkDestroyDescriptorPool(r->device, r->compute.descriptor_pool, NULL);
r->compute.descriptor_pool = VK_NULL_HANDLE;
}
static void create_descriptor_set_layout(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
const int num_buffers = 3;
VkDescriptorSetLayoutBinding bindings[num_buffers];
for (int i = 0; i < num_buffers; i++) {
bindings[i] = (VkDescriptorSetLayoutBinding){
.binding = i,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
};
}
VkDescriptorSetLayoutCreateInfo layout_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.bindingCount = ARRAY_SIZE(bindings),
.pBindings = bindings,
};
VK_CHECK(vkCreateDescriptorSetLayout(r->device, &layout_info, NULL,
&r->compute.descriptor_set_layout));
}
static void destroy_descriptor_set_layout(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
vkDestroyDescriptorSetLayout(r->device, r->compute.descriptor_set_layout,
NULL);
r->compute.descriptor_set_layout = VK_NULL_HANDLE;
}
static void create_descriptor_sets(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
VkDescriptorSetLayout layouts[ARRAY_SIZE(r->descriptor_sets)];
for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
layouts[i] = r->compute.descriptor_set_layout;
}
VkDescriptorSetAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.descriptorPool = r->compute.descriptor_pool,
.descriptorSetCount = ARRAY_SIZE(r->compute.descriptor_sets),
.pSetLayouts = layouts,
};
VK_CHECK(vkAllocateDescriptorSets(r->device, &alloc_info,
r->compute.descriptor_sets));
}
static void destroy_descriptor_sets(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
vkFreeDescriptorSets(r->device, r->compute.descriptor_pool,
ARRAY_SIZE(r->compute.descriptor_sets),
r->compute.descriptor_sets);
for (int i = 0; i < ARRAY_SIZE(r->compute.descriptor_sets); i++) {
r->compute.descriptor_sets[i] = VK_NULL_HANDLE;
}
}
static void create_compute_pipeline_layout(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
VkPushConstantRange push_constant_range = {
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.size = 2 * sizeof(uint32_t),
};
VkPipelineLayoutCreateInfo pipeline_layout_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = &r->compute.descriptor_set_layout,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &push_constant_range,
};
VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL,
&r->compute.pipeline_layout));
}
static VkPipeline create_compute_pipeline(PGRAPHState *pg, const char *glsl)
{
PGRAPHVkState *r = pg->vk_renderer_state;
ShaderModuleInfo *module = pgraph_vk_create_shader_module_from_glsl(
r, VK_SHADER_STAGE_COMPUTE_BIT, glsl);
VkComputePipelineCreateInfo pipeline_info = {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.layout = r->compute.pipeline_layout,
.stage =
(VkPipelineShaderStageCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.pName = "main",
.module = module->module,
},
};
VkPipeline pipeline;
VK_CHECK(vkCreateComputePipelines(r->device, r->vk_pipeline_cache, 1,
&pipeline_info, NULL,
&pipeline));
pgraph_vk_destroy_shader_module(r, module);
return pipeline;
}
static void update_descriptor_sets(PGRAPHState *pg,
VkDescriptorBufferInfo *buffers, int count)
{
PGRAPHVkState *r = pg->vk_renderer_state;
assert(count == 3);
VkWriteDescriptorSet descriptor_writes[3];
const int descriptor_set_index = 0;
for (int i = 0; i < count; i++) {
descriptor_writes[i] = (VkWriteDescriptorSet){
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstSet = r->compute.descriptor_sets[descriptor_set_index],
.dstBinding = i,
.dstArrayElement = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.pBufferInfo = &buffers[i],
};
}
vkUpdateDescriptorSets(r->device, count, descriptor_writes, 0, NULL);
}
//
// Pack depth+stencil into NV097_SET_SURFACE_FORMAT_ZETA_Z24S8
// formatted buffer with depth in bits 31-8 and stencil in bits 7-0.
//
void pgraph_vk_pack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface,
VkCommandBuffer cmd, VkBuffer src,
VkBuffer dst, bool downscale)
{
PGRAPHVkState *r = pg->vk_renderer_state;
unsigned int input_width = surface->width, input_height = surface->height;
pgraph_apply_scaling_factor(pg, &input_width, &input_height);
unsigned int output_width = surface->width, output_height = surface->height;
if (!downscale) {
pgraph_apply_scaling_factor(pg, &output_width, &output_height);
}
size_t depth_bytes_per_pixel = 4;
size_t depth_size = input_width * input_height * depth_bytes_per_pixel;
size_t stencil_bytes_per_pixel = 1;
size_t stencil_size = input_width * input_height * stencil_bytes_per_pixel;
size_t output_bytes_per_pixel = 4;
size_t output_size = output_width * output_height * output_bytes_per_pixel;
VkDescriptorBufferInfo buffers[] = {
{
.buffer = src,
.offset = 0,
.range = depth_size,
},
{
.buffer = src,
.offset = depth_size,
.range = stencil_size,
},
{
.buffer = dst,
.offset = 0,
.range = output_size,
},
};
update_descriptor_sets(pg, buffers, ARRAY_SIZE(buffers));
if (surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
r->compute.pipeline_pack_d24s8);
} else if (surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
r->compute.pipeline_pack_f32s8);
} else {
assert(!"Unsupported pack format");
}
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
r->compute.pipeline_layout, 0, 1,
&r->compute.descriptor_sets[0], 0, NULL);
uint32_t push_constants[2] = { input_width, output_width };
assert(sizeof(push_constants) == 8);
vkCmdPushConstants(cmd, r->compute.pipeline_layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
push_constants);
size_t workgroup_size_in_units = 256;
size_t output_size_in_units = output_width * output_height;
assert(output_size_in_units % workgroup_size_in_units == 0);
size_t group_count = output_size_in_units / workgroup_size_in_units;
// FIXME: Check max group count
vkCmdDispatch(cmd, group_count, 1, 1);
}
void pgraph_vk_unpack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface,
VkCommandBuffer cmd, VkBuffer src,
VkBuffer dst)
{
PGRAPHVkState *r = pg->vk_renderer_state;
unsigned int input_width = surface->width, input_height = surface->height;
unsigned int output_width = surface->width, output_height = surface->height;
pgraph_apply_scaling_factor(pg, &output_width, &output_height);
size_t depth_bytes_per_pixel = 4;
size_t depth_size = output_width * output_height * depth_bytes_per_pixel;
size_t stencil_bytes_per_pixel = 1;
size_t stencil_size = output_width * output_height * stencil_bytes_per_pixel;
size_t input_bytes_per_pixel = 4;
size_t input_size = input_width * input_height * input_bytes_per_pixel;
VkDescriptorBufferInfo buffers[] = {
{
.buffer = dst,
.offset = 0,
.range = depth_size,
},
{
.buffer = dst,
.offset = depth_size,
.range = stencil_size,
},
{
.buffer = src,
.offset = 0,
.range = input_size,
},
};
update_descriptor_sets(pg, buffers, ARRAY_SIZE(buffers));
if (surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
r->compute.pipeline_unpack_d24s8);
} else if (surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
r->compute.pipeline_unpack_f32s8);
} else {
assert(!"Unsupported pack format");
}
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
r->compute.pipeline_layout, 0, 1,
&r->compute.descriptor_sets[0], 0, NULL);
assert(output_width >= input_width);
uint32_t push_constants[2] = { input_width, output_width };
assert(sizeof(push_constants) == 8);
vkCmdPushConstants(cmd, r->compute.pipeline_layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
push_constants);
size_t workgroup_size_in_units = 256;
size_t output_size_in_units = output_width * output_height;
assert(output_size_in_units % workgroup_size_in_units == 0);
size_t group_count = output_size_in_units / workgroup_size_in_units;
// FIXME: Check max group count
vkCmdDispatch(cmd, group_count, 1, 1);
}
void pgraph_vk_init_compute(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
create_descriptor_pool(pg);
create_descriptor_set_layout(pg);
create_descriptor_sets(pg);
create_compute_pipeline_layout(pg);
r->compute.pipeline_pack_d24s8 =
create_compute_pipeline(pg, pack_d24_unorm_s8_uint_to_z24s8_glsl);
r->compute.pipeline_unpack_d24s8 =
create_compute_pipeline(pg, unpack_z24s8_to_d24_unorm_s8_uint_glsl);
r->compute.pipeline_pack_f32s8 =
create_compute_pipeline(pg, pack_d32_sfloat_s8_uint_to_z24s8_glsl);
r->compute.pipeline_unpack_f32s8 =
create_compute_pipeline(pg, unpack_z24s8_to_d32_sfloat_s8_uint_glsl);
}
void pgraph_vk_finalize_compute(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
VkPipeline *pipelines[] = {
&r->compute.pipeline_pack_d24s8,
&r->compute.pipeline_unpack_d24s8,
&r->compute.pipeline_pack_f32s8,
&r->compute.pipeline_unpack_f32s8,
};
for (int i = 0; i < ARRAY_SIZE(pipelines); i++) {
vkDestroyPipeline(r->device, *pipelines[i], NULL);
pipelines[i] = VK_NULL_HANDLE;
}
vkDestroyPipelineLayout(r->device, r->compute.pipeline_layout, NULL);
r->compute.pipeline_layout = VK_NULL_HANDLE;
destroy_descriptor_sets(pg);
destroy_descriptor_set_layout(pg);
destroy_descriptor_pool(pg);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,312 @@
/*
* Geforce NV2A PGRAPH Vulkan Renderer
*
* Copyright (c) 2024 Matt Borgerson
*
* Based on GL implementation:
*
* Copyright (c) 2012 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2018-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "renderer.h"
VkDeviceSize pgraph_vk_update_index_buffer(PGRAPHState *pg, void *data,
VkDeviceSize size)
{
nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_2);
return pgraph_vk_append_to_buffer(pg, BUFFER_INDEX_STAGING, &data, &size, 1,
1);
}
VkDeviceSize pgraph_vk_update_vertex_inline_buffer(PGRAPHState *pg, void **data,
VkDeviceSize *sizes,
size_t count)
{
nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_3);
return pgraph_vk_append_to_buffer(pg, BUFFER_VERTEX_INLINE_STAGING, data,
sizes, count, 1);
}
void pgraph_vk_update_vertex_ram_buffer(PGRAPHState *pg, hwaddr offset,
void *data, VkDeviceSize size)
{
PGRAPHVkState *r = pg->vk_renderer_state;
size_t offset_bit = offset / 4096;
size_t nbits = size / 4096;
if (find_next_bit(r->uploaded_bitmap, nbits, offset_bit) < nbits) {
// Vertex data changed while building the draw list. Finish drawing
// before updating RAM buffer.
pgraph_vk_finish(pg, VK_FINISH_REASON_VERTEX_BUFFER_DIRTY);
}
nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_1);
memcpy(r->storage_buffers[BUFFER_VERTEX_RAM].mapped + offset, data, size);
bitmap_set(r->uploaded_bitmap, offset_bit, nbits);
}
static void update_memory_buffer(NV2AState *d, hwaddr addr, hwaddr size)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHVkState *r = pg->vk_renderer_state;
assert(r->num_vertex_ram_buffer_syncs <
ARRAY_SIZE(r->vertex_ram_buffer_syncs));
r->vertex_ram_buffer_syncs[r->num_vertex_ram_buffer_syncs++] =
(MemorySyncRequirement){ .addr = addr, .size = size };
}
static const VkFormat float_to_count[] = {
VK_FORMAT_R32_SFLOAT,
VK_FORMAT_R32G32_SFLOAT,
VK_FORMAT_R32G32B32_SFLOAT,
VK_FORMAT_R32G32B32A32_SFLOAT,
};
static const VkFormat ub_to_count[] = {
VK_FORMAT_R8_UNORM,
VK_FORMAT_R8G8_UNORM,
VK_FORMAT_R8G8B8_UNORM,
VK_FORMAT_R8G8B8A8_UNORM,
};
static const VkFormat s1_to_count[] = {
VK_FORMAT_R16_SNORM,
VK_FORMAT_R16G16_SNORM,
VK_FORMAT_R16G16B16_SNORM,
VK_FORMAT_R16G16B16A16_SNORM,
};
static const VkFormat s32k_to_count[] = {
VK_FORMAT_R16_SSCALED,
VK_FORMAT_R16G16_SSCALED,
VK_FORMAT_R16G16B16_SSCALED,
VK_FORMAT_R16G16B16A16_SSCALED,
};
static char const * const vertex_data_array_format_to_str[] = {
[NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D] = "UB_D3D",
[NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL] = "UB_OGL",
[NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1] = "S1",
[NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F] = "F",
[NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K] = "S32K",
[NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP] = "CMP",
};
void pgraph_vk_bind_vertex_attributes(NV2AState *d, unsigned int min_element,
unsigned int max_element,
bool inline_data,
unsigned int inline_stride,
unsigned int provoking_element)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHVkState *r = pg->vk_renderer_state;
unsigned int num_elements = max_element - min_element + 1;
if (inline_data) {
NV2A_VK_DGROUP_BEGIN("%s (num_elements: %d inline stride: %d)",
__func__, num_elements, inline_stride);
} else {
NV2A_VK_DGROUP_BEGIN("%s (num_elements: %d)", __func__, num_elements);
}
pg->compressed_attrs = 0;
pg->uniform_attrs = 0;
pg->swizzle_attrs = 0;
r->num_active_vertex_attribute_descriptions = 0;
r->num_active_vertex_binding_descriptions = 0;
for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
VertexAttribute *attr = &pg->vertex_attributes[i];
NV2A_VK_DGROUP_BEGIN("[attr %02d] format=%s, count=%d, stride=%d", i,
vertex_data_array_format_to_str[attr->format],
attr->count, attr->stride);
r->vertex_attribute_to_description_location[i] = -1;
if (!attr->count) {
pg->uniform_attrs |= 1 << i;
NV2A_VK_DPRINTF("inline_value = {%f, %f, %f, %f}",
attr->inline_value[0], attr->inline_value[1],
attr->inline_value[2], attr->inline_value[3]);
NV2A_VK_DGROUP_END();
continue;
}
VkFormat vk_format;
bool needs_conversion = false;
bool d3d_swizzle = false;
switch (attr->format) {
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D:
assert(attr->count == 4);
d3d_swizzle = true;
/* fallthru */
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL:
assert(attr->count <= ARRAY_SIZE(ub_to_count));
vk_format = ub_to_count[attr->count - 1];
break;
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1:
assert(attr->count <= ARRAY_SIZE(s1_to_count));
vk_format = s1_to_count[attr->count - 1];
break;
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F:
assert(attr->count <= ARRAY_SIZE(float_to_count));
vk_format = float_to_count[attr->count - 1];
break;
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K:
assert(attr->count <= ARRAY_SIZE(s32k_to_count));
vk_format = s32k_to_count[attr->count - 1];
break;
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP:
vk_format =
VK_FORMAT_R32_SINT; // VK_FORMAT_B10G11R11_UFLOAT_PACK32 ??
/* 3 signed, normalized components packed in 32-bits. (11,11,10) */
assert(attr->count == 1);
needs_conversion = true;
break;
default:
fprintf(stderr, "Unknown vertex type: 0x%x\n", attr->format);
assert(false);
break;
}
nv2a_profile_inc_counter(NV2A_PROF_ATTR_BIND);
hwaddr attrib_data_addr;
size_t stride;
if (needs_conversion) {
pg->compressed_attrs |= (1 << i);
}
if (d3d_swizzle) {
pg->swizzle_attrs |= (1 << i);
}
hwaddr start = 0;
if (inline_data) {
attrib_data_addr = attr->inline_array_offset;
stride = inline_stride;
} else {
hwaddr dma_len;
uint8_t *attr_data = (uint8_t *)nv_dma_map(
d, attr->dma_select ? pg->dma_vertex_b : pg->dma_vertex_a,
&dma_len);
assert(attr->offset < dma_len);
attrib_data_addr = attr_data + attr->offset - d->vram_ptr;
stride = attr->stride;
start = attrib_data_addr + min_element * stride;
update_memory_buffer(d, start, num_elements * stride);
}
uint32_t provoking_element_index = provoking_element - min_element;
size_t element_size = attr->size * attr->count;
assert(element_size <= sizeof(attr->inline_value));
const uint8_t *last_entry;
if (inline_data) {
last_entry =
(uint8_t *)pg->inline_array + attr->inline_array_offset;
} else {
last_entry = d->vram_ptr + start;
}
if (!stride) {
// Stride of 0 indicates that only the first element should be
// used.
pg->uniform_attrs |= 1 << i;
pgraph_update_inline_value(attr, last_entry);
NV2A_VK_DPRINTF("inline_value = {%f, %f, %f, %f}",
attr->inline_value[0], attr->inline_value[1],
attr->inline_value[2], attr->inline_value[3]);
NV2A_VK_DGROUP_END();
continue;
}
NV2A_VK_DPRINTF("offset = %08" HWADDR_PRIx, attrib_data_addr);
last_entry += stride * provoking_element_index;
pgraph_update_inline_value(attr, last_entry);
r->vertex_attribute_to_description_location[i] =
r->num_active_vertex_binding_descriptions;
r->vertex_binding_descriptions
[r->num_active_vertex_binding_descriptions++] =
(VkVertexInputBindingDescription){
.binding = r->vertex_attribute_to_description_location[i],
.stride = stride,
.inputRate = VK_VERTEX_INPUT_RATE_VERTEX,
};
r->vertex_attribute_descriptions
[r->num_active_vertex_attribute_descriptions++] =
(VkVertexInputAttributeDescription){
.binding = r->vertex_attribute_to_description_location[i],
.location = i,
.format = vk_format,
};
r->vertex_attribute_offsets[i] = attrib_data_addr;
NV2A_VK_DGROUP_END();
}
NV2A_VK_DGROUP_END();
}
void pgraph_vk_bind_vertex_attributes_inline(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHVkState *r = pg->vk_renderer_state;
pg->compressed_attrs = 0;
pg->uniform_attrs = 0;
pg->swizzle_attrs = 0;
r->num_active_vertex_attribute_descriptions = 0;
r->num_active_vertex_binding_descriptions = 0;
for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
VertexAttribute *attr = &pg->vertex_attributes[i];
if (attr->inline_buffer_populated) {
r->vertex_attribute_to_description_location[i] =
r->num_active_vertex_binding_descriptions;
r->vertex_binding_descriptions
[r->num_active_vertex_binding_descriptions++] =
(VkVertexInputBindingDescription){
.binding =
r->vertex_attribute_to_description_location[i],
.stride = 4 * sizeof(float),
.inputRate = VK_VERTEX_INPUT_RATE_VERTEX,
};
r->vertex_attribute_descriptions
[r->num_active_vertex_attribute_descriptions++] =
(VkVertexInputAttributeDescription){
.binding =
r->vertex_attribute_to_description_location[i],
.location = i,
.format = VK_FORMAT_R32G32B32A32_SFLOAT,
};
memcpy(attr->inline_value,
attr->inline_buffer + (pg->inline_buffer_length - 1) * 4,
sizeof(attr->inline_value));
} else {
r->vertex_attribute_to_description_location[i] = -1;
pg->uniform_attrs |= 1 << i;
}
}
}

View File

@ -21,7 +21,7 @@
#define HW_NV2A_VSH_H
#include <stdbool.h>
#include "shaders_common.h"
#include "qemu/mstring.h"
enum VshLight {
LIGHT_OFF,
@ -130,11 +130,4 @@ typedef enum {
uint8_t vsh_get_field(const uint32_t *shader_token, VshFieldName field_name);
void vsh_translate(uint16_t version,
const uint32_t *tokens,
unsigned int length,
bool z_perspective,
MString *header, MString *body);
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,125 +0,0 @@
/*
* QEMU Geforce NV2A shader common definitions
*
* Copyright (c) 2015 espes
* Copyright (c) 2015 Jannik Vogel
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_NV2A_SHADERS_COMMON_H
#define HW_NV2A_SHADERS_COMMON_H
#include "debug.h"
#define DEF_VERTEX_DATA(qualifier, in_out, prefix, suffix) \
"noperspective " in_out " float " prefix "vtx_inv_w" suffix ";\n" \
"flat " in_out " float " prefix "vtx_inv_w_flat" suffix ";\n" \
qualifier " " in_out " vec4 " prefix "vtxD0" suffix ";\n" \
qualifier " " in_out " vec4 " prefix "vtxD1" suffix ";\n" \
qualifier " " in_out " vec4 " prefix "vtxB0" suffix ";\n" \
qualifier " " in_out " vec4 " prefix "vtxB1" suffix ";\n" \
"noperspective " in_out " float " prefix "vtxFog" suffix ";\n" \
"noperspective " in_out " vec4 " prefix "vtxT0" suffix ";\n" \
"noperspective " in_out " vec4 " prefix "vtxT1" suffix ";\n" \
"noperspective " in_out " vec4 " prefix "vtxT2" suffix ";\n" \
"noperspective " in_out " vec4 " prefix "vtxT3" suffix ";\n"
#define STRUCT_VERTEX_DATA_OUT_SMOOTH DEF_VERTEX_DATA("noperspective", "out", "", "")
#define STRUCT_VERTEX_DATA_IN_SMOOTH DEF_VERTEX_DATA("noperspective", "in", "", "")
#define STRUCT_V_VERTEX_DATA_OUT_SMOOTH DEF_VERTEX_DATA("noperspective", "out", "v_", "")
#define STRUCT_V_VERTEX_DATA_IN_ARRAY_SMOOTH DEF_VERTEX_DATA("noperspective", "in", "v_", "[]")
#define STRUCT_VERTEX_DATA_OUT_FLAT DEF_VERTEX_DATA("flat", "out", "", "")
#define STRUCT_VERTEX_DATA_IN_FLAT DEF_VERTEX_DATA("flat", "in", "", "")
#define STRUCT_V_VERTEX_DATA_OUT_FLAT DEF_VERTEX_DATA("flat", "out", "v_", "")
#define STRUCT_V_VERTEX_DATA_IN_ARRAY_FLAT DEF_VERTEX_DATA("flat", "in", "v_", "[]")
typedef struct {
int ref;
gchar *string;
} MString;
void mstring_append_fmt(MString *mstring, const char *fmt, ...);
MString *mstring_from_fmt(const char *fmt, ...);
void mstring_append_va(MString *mstring, const char *fmt, va_list va);
static inline
void mstring_ref(MString *mstr)
{
mstr->ref++;
}
static inline
void mstring_unref(MString *mstr)
{
mstr->ref--;
if (!mstr->ref) {
g_free(mstr->string);
g_free(mstr);
}
}
static inline
void mstring_append(MString *mstr, const char *str)
{
gchar *n = g_strconcat(mstr->string, str, NULL);
g_free(mstr->string);
mstr->string = n;
}
static inline
void mstring_append_chr(MString *mstr, char chr)
{
mstring_append_fmt(mstr, "%c", chr);
}
static inline
void mstring_append_int(MString *mstr, int val)
{
mstring_append_fmt(mstr, "%" PRId64, val);
}
static inline
MString *mstring_new(void)
{
MString *mstr = g_malloc(sizeof(MString));
mstr->ref = 1;
mstr->string = g_strdup("");
return mstr;
}
static inline
MString *mstring_from_str(const char *str)
{
MString *mstr = g_malloc(sizeof(MString));
mstr->ref = 1;
mstr->string = g_strdup(str);
return mstr;
}
static inline
const gchar *mstring_get_str(MString *mstr)
{
return mstr->string;
}
static inline
size_t mstring_get_length(MString *mstr)
{
return strlen(mstr->string);
}
#endif

View File

@ -1,7 +1,7 @@
/*
* LRU object list
*
* Copyright (c) 2021 Matt Borgerson
* Copyright (c) 2021-2024 Matt Borgerson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@ -42,6 +42,8 @@ typedef struct Lru Lru;
struct Lru {
QTAILQ_HEAD(, LruNode) global;
QTAILQ_HEAD(, LruNode) bins[LRU_NUM_BINS];
int num_used;
int num_free;
/* Initialize a node. */
void (*init_node)(Lru *lru, LruNode *node, void *key);
@ -67,6 +69,8 @@ void lru_init(Lru *lru)
lru->compare_nodes = NULL;
lru->pre_node_evict = NULL;
lru->post_node_evict = NULL;
lru->num_free = 0;
lru->num_used = 0;
}
static inline
@ -74,6 +78,7 @@ void lru_add_free(Lru *lru, LruNode *node)
{
node->next_bin.tqe_circ.tql_prev = NULL;
QTAILQ_INSERT_TAIL(&lru->global, node, next_global);
lru->num_free += 1;
}
static inline
@ -106,29 +111,51 @@ void lru_evict_node(Lru *lru, LruNode *node)
if (lru->post_node_evict) {
lru->post_node_evict(lru, node);
}
lru->num_used -= 1;
lru->num_free += 1;
}
static inline
LruNode *lru_try_evict_one(Lru *lru)
{
LruNode *found;
QTAILQ_FOREACH_REVERSE(found, &lru->global, next_global) {
if (lru_is_node_in_use(lru, found)
&& (!lru->pre_node_evict || lru->pre_node_evict(lru, found))) {
lru_evict_node(lru, found);
return found;
}
}
return NULL;
}
static inline
LruNode *lru_evict_one(Lru *lru)
{
LruNode *found;
QTAILQ_FOREACH_REVERSE(found, &lru->global, next_global) {
bool can_evict = true;
if (lru_is_node_in_use(lru, found) && lru->pre_node_evict) {
can_evict = lru->pre_node_evict(lru, found);
}
if (can_evict) {
break;
}
}
LruNode *found = lru_try_evict_one(lru);
assert(found != NULL); /* No evictable node! */
lru_evict_node(lru, found);
return found;
}
static inline
LruNode *lru_get_one_free(Lru *lru)
{
LruNode *found;
QTAILQ_FOREACH_REVERSE(found, &lru->global, next_global) {
if (!lru_is_node_in_use(lru, found)) {
return found;
}
}
return lru_evict_one(lru);
}
static inline
bool lru_contains_hash(Lru *lru, uint64_t hash)
{
@ -160,12 +187,15 @@ LruNode *lru_lookup(Lru *lru, uint64_t hash, void *key)
if (found) {
QTAILQ_REMOVE(&lru->bins[bin], found, next_bin);
} else {
found = lru_evict_one(lru);
found = lru_get_one_free(lru);
found->hash = hash;
if (lru->init_node) {
lru->init_node(lru, found, key);
}
assert(found->hash == hash);
lru->num_used += 1;
lru->num_free -= 1;
}
QTAILQ_REMOVE(&lru->global, found, next_global);

82
include/qemu/mstring.h Normal file
View File

@ -0,0 +1,82 @@
#ifndef MSTRING_H
#define MSTRING_H
#include "qemu/osdep.h"
#include <string.h>
typedef struct {
int ref;
gchar *string;
} MString;
void mstring_append_fmt(MString *mstring, const char *fmt, ...);
MString *mstring_from_fmt(const char *fmt, ...);
void mstring_append_va(MString *mstring, const char *fmt, va_list va);
static inline
void mstring_ref(MString *mstr)
{
mstr->ref++;
}
static inline
void mstring_unref(MString *mstr)
{
mstr->ref--;
if (!mstr->ref) {
g_free(mstr->string);
g_free(mstr);
}
}
static inline
void mstring_append(MString *mstr, const char *str)
{
gchar *n = g_strconcat(mstr->string, str, NULL);
g_free(mstr->string);
mstr->string = n;
}
static inline
void mstring_append_chr(MString *mstr, char chr)
{
mstring_append_fmt(mstr, "%c", chr);
}
static inline
void mstring_append_int(MString *mstr, int val)
{
mstring_append_fmt(mstr, "%" PRId64, val);
}
static inline
MString *mstring_new(void)
{
MString *mstr = g_malloc(sizeof(MString));
mstr->ref = 1;
mstr->string = g_strdup("");
return mstr;
}
static inline
MString *mstring_from_str(const char *str)
{
MString *mstr = g_malloc(sizeof(MString));
mstr->ref = 1;
mstr->string = g_strdup(str);
return mstr;
}
static inline
const gchar *mstring_get_str(MString *mstr)
{
return mstr->string;
}
static inline
size_t mstring_get_length(MString *mstr)
{
return strlen(mstr->string);
}
#endif

View File

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -0,0 +1,19 @@
Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

19
licenses/volk.license.txt Normal file
View File

@ -0,0 +1,19 @@
Copyright (c) 2018-2024 Arseny Kapoulkine
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -1180,6 +1180,34 @@ if not get_option('opengl').auto() or have_system or have_vhost_user_gpu
link_args: config_host['EPOXY_LIBS'].split() + opengl_libs)
endif
vulkan = not_found
if targetos == 'windows'
vulkan = declare_dependency(
compile_args: ['-DVK_USE_PLATFORM_WIN32_KHR', '-DVK_NO_PROTOTYPES'],
)
libglslang = declare_dependency(link_args: [
'-lglslang',
'-lMachineIndependent',
'-lGenericCodeGen',
'-lSPIRV',
'-lSPIRV-Tools',
'-lSPIRV-Tools-opt'
])
elif targetos == 'linux'
vulkan = dependency('vulkan')
libglslang = declare_dependency(link_args: [
'-lglslang',
'-lMachineIndependent',
'-lGenericCodeGen',
'-lSPIRV',
'-lSPIRV-Tools',
'-lSPIRV-Tools-opt'
])
endif
subdir('thirdparty')
gbm = not_found
if (have_system or have_tools) and (virgl.found() or opengl.found())
gbm = dependency('gbm', method: 'pkg-config', required: false,
@ -1931,6 +1959,7 @@ config_host_data.set('CONFIG_LINUX_IO_URING', linux_io_uring.found())
config_host_data.set('CONFIG_LIBPMEM', libpmem.found())
config_host_data.set('CONFIG_NUMA', numa.found())
config_host_data.set('CONFIG_OPENGL', opengl.found())
config_host_data.set('CONFIG_VULKAN', vulkan.found())
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
config_host_data.set('CONFIG_RBD', rbd.found())
config_host_data.set('CONFIG_RDMA', rdma.found())
@ -4054,6 +4083,7 @@ summary_info += {'U2F support': u2f}
summary_info += {'libusb': libusb}
summary_info += {'usb net redir': usbredir}
summary_info += {'OpenGL support (epoxy)': opengl}
summary_info += {'Vulkan support': vulkan}
summary_info += {'GBM': gbm}
summary_info += {'libiscsi support': libiscsi}
summary_info += {'libnfs support': libnfs}

View File

@ -28,8 +28,12 @@ sub_file="${sub_tdir}/submodule.tar"
# different to the host OS.
submodules="dtc meson ui/keycodemapdb"
submodules="$submodules tests/fp/berkeley-softfloat-3 tests/fp/berkeley-testfloat-3"
submodules="$submodules ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig" # xemu extras
# xemu extras
submodules="$submodules ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig"
submodules="$submodules hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu"
submodules="$submodules thirdparty/volk thirdparty/VulkanMemoryAllocator thirdparty/SPIRV-Reflect"
sub_deinit=""
function cleanup() {

View File

@ -228,7 +228,25 @@ Lib('fpng', 'https://github.com/richgel999/fpng',
Lib('nv2a_vsh_cpu', 'https://github.com/abaire/nv2a_vsh_cpu',
unlicense, 'https://raw.githubusercontent.com/abaire/nv2a_vsh_cpu/main/LICENSE',
ships_static=all_platforms,
submodule=Submodule('hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu')
submodule=Submodule('hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu')
),
Lib('volk', 'https://github.com/zeux/volk',
mit, 'https://raw.githubusercontent.com/zeux/volk/master/LICENSE.md',
ships_static=all_platforms,
submodule=Submodule('thirdparty/volk')
),
Lib('VulkanMemoryAllocator', 'https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator',
mit, 'https://raw.githubusercontent.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator/master/LICENSE.txt',
ships_static=all_platforms,
submodule=Submodule('thirdparty/VulkanMemoryAllocator')
),
Lib('SPIRV-Reflect', 'https://github.com/KhronosGroup/SPIRV-Reflect',
apache2, 'https://raw.githubusercontent.com/KhronosGroup/SPIRV-Reflect/main/LICENSE',
ships_static=all_platforms,
submodule=Submodule('thirdparty/SPIRV-Reflect')
),
#
@ -344,6 +362,17 @@ Lib('miniz', 'https://github.com/richgel999/miniz',
ships_static={windows}, platform={windows},
version='2.1.0'
),
Lib('glslang', 'https://github.com/KhronosGroup/glslang',
bsd_3clause, 'https://raw.githubusercontent.com/KhronosGroup/glslang/main/LICENSE.txt',
ships_static={windows}, platform={windows},
),
Lib('SPIRV-Tools', 'https://github.com/KhronosGroup/SPIRV-Tools',
apache2, 'https://raw.githubusercontent.com/KhronosGroup/SPIRV-Tools/main/LICENSE',
ships_static={windows}, platform={windows},
),
]
def gen_license():

1
thirdparty/SPIRV-Reflect vendored Submodule

@ -0,0 +1 @@
Subproject commit 1d674a82d7e102ed0c02e64e036827db9e8b1a71

Some files were not shown because too many files have changed in this diff Show More