mirror of https://github.com/xemu-project/xemu.git
nv2a: Add Vulkan renderer
This commit is contained in:
parent
e639e0cdb7
commit
a5385803db
|
@ -71,8 +71,8 @@ IndentWidth: 4
|
|||
AccessModifierOffset: -4
|
||||
IndentWrappedFunctionNames: false
|
||||
KeepEmptyLinesAtTheStartOfBlocks: false
|
||||
MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ?
|
||||
MacroBlockEnd: '.*_END$'
|
||||
#MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ?
|
||||
#MacroBlockEnd: '.*_END$'
|
||||
MaxEmptyLinesToKeep: 2
|
||||
#PenaltyBreakBeforeFirstCallParameter: 19
|
||||
#PenaltyBreakComment: 300
|
||||
|
|
|
@ -82,9 +82,18 @@
|
|||
[submodule "tomlplusplus"]
|
||||
path = tomlplusplus
|
||||
url = https://github.com/marzer/tomlplusplus
|
||||
[submodule "hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu"]
|
||||
path = hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu
|
||||
[submodule "hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu"]
|
||||
path = hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu
|
||||
url = https://github.com/abaire/nv2a_vsh_cpu.git
|
||||
[submodule "ui/thirdparty/httplib"]
|
||||
path = ui/thirdparty/httplib
|
||||
url = https://github.com/yhirose/cpp-httplib
|
||||
[submodule "hw/xbox/nv2a/pgraph/vk/thirdparty/VulkanMemoryAllocator"]
|
||||
path = thirdparty/VulkanMemoryAllocator
|
||||
url = https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator
|
||||
[submodule "thirdparty/volk"]
|
||||
path = thirdparty/volk
|
||||
url = https://github.com/zeux/volk
|
||||
[submodule "thirdparty/SPIRV-Reflect"]
|
||||
path = thirdparty/SPIRV-Reflect
|
||||
url = https://github.com/KhronosGroup/SPIRV-Reflect
|
||||
|
|
|
@ -130,6 +130,12 @@ input:
|
|||
default: 18 # w
|
||||
|
||||
display:
|
||||
renderer:
|
||||
type: enum
|
||||
values: ["NULL", OPENGL, VULKAN]
|
||||
default: OPENGL
|
||||
vulkan:
|
||||
validation_layers: bool
|
||||
quality:
|
||||
surface_scale:
|
||||
type: integer
|
||||
|
|
|
@ -237,7 +237,7 @@ else
|
|||
git_submodules_action="ignore"
|
||||
fi
|
||||
|
||||
git_submodules="ui/keycodemapdb ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu"
|
||||
git_submodules="ui/keycodemapdb ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu thirdparty/volk thirdparty/VulkanMemoryAllocator thirdparty/SPIRV-Reflect"
|
||||
git="git"
|
||||
|
||||
# Don't accept a target_list environment variable.
|
||||
|
|
|
@ -16,6 +16,9 @@ Build-Depends: debhelper (>= 11),
|
|||
libssl-dev,
|
||||
libpcap-dev,
|
||||
libslirp-dev,
|
||||
glslang-dev,
|
||||
libvulkan-dev,
|
||||
|
||||
Standards-Version: 3.9.8
|
||||
Homepage: https://xemu.app
|
||||
XS-Debian-Vcs-Browser: https://github.com/mborgerson/xemu
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
/*
|
||||
* QEMU Geforce NV2A debug helpers
|
||||
* QEMU Geforce NV2A profiling and debug helpers
|
||||
*
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2012 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2018-2023 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
|
@ -18,8 +19,8 @@
|
|||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef HW_NV2A_DEBUG_H
|
||||
#define HW_NV2A_DEBUG_H
|
||||
#ifndef HW_XBOX_NV2A_DEBUG_H
|
||||
#define HW_XBOX_NV2A_DEBUG_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
|
@ -36,54 +37,6 @@
|
|||
# define NV2A_DPRINTF(format, ...) do { } while (0)
|
||||
#endif
|
||||
|
||||
// #define DEBUG_NV2A_GL
|
||||
#ifdef DEBUG_NV2A_GL
|
||||
|
||||
#include <stdbool.h>
|
||||
#include "gl/gloffscreen.h"
|
||||
#include "config-host.h"
|
||||
|
||||
void gl_debug_initialize(void);
|
||||
void gl_debug_message(bool cc, const char *fmt, ...);
|
||||
void gl_debug_group_begin(const char *fmt, ...);
|
||||
void gl_debug_group_end(void);
|
||||
void gl_debug_label(GLenum target, GLuint name, const char *fmt, ...);
|
||||
void gl_debug_frame_terminator(void);
|
||||
|
||||
# define NV2A_GL_DPRINTF(cc, format, ...) \
|
||||
gl_debug_message(cc, "nv2a: " format, ## __VA_ARGS__)
|
||||
# define NV2A_GL_DGROUP_BEGIN(format, ...) \
|
||||
gl_debug_group_begin("nv2a: " format, ## __VA_ARGS__)
|
||||
# define NV2A_GL_DGROUP_END() \
|
||||
gl_debug_group_end()
|
||||
# define NV2A_GL_DLABEL(target, name, format, ...) \
|
||||
gl_debug_label(target, name, "nv2a: { " format " }", ## __VA_ARGS__)
|
||||
#define NV2A_GL_DFRAME_TERMINATOR() \
|
||||
gl_debug_frame_terminator()
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_RENDERDOC
|
||||
bool nv2a_dbg_renderdoc_available(void);
|
||||
void nv2a_dbg_renderdoc_capture_frames(uint32_t num_frames);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#else
|
||||
# define NV2A_GL_DPRINTF(cc, format, ...) do { \
|
||||
if (cc) NV2A_DPRINTF(format "\n", ##__VA_ARGS__ ); \
|
||||
} while (0)
|
||||
# define NV2A_GL_DGROUP_BEGIN(format, ...) do { } while (0)
|
||||
# define NV2A_GL_DGROUP_END() do { } while (0)
|
||||
# define NV2A_GL_DLABEL(target, name, format, ...) do { } while (0)
|
||||
# define NV2A_GL_DFRAME_TERMINATOR() do { } while (0)
|
||||
#endif
|
||||
|
||||
/* Debug prints to identify when unimplemented or unconfirmed features
|
||||
* are being exercised. These cases likely result in graphical problems of
|
||||
* varying degree, but should otherwise not crash the system. Enable this
|
||||
|
@ -111,6 +64,22 @@ void nv2a_dbg_renderdoc_capture_frames(uint32_t num_frames);
|
|||
#endif
|
||||
|
||||
#define NV2A_PROF_COUNTERS_XMAC \
|
||||
_X(NV2A_PROF_FINISH_VERTEX_BUFFER_DIRTY) \
|
||||
_X(NV2A_PROF_FINISH_SURFACE_CREATE) \
|
||||
_X(NV2A_PROF_FINISH_SURFACE_DOWN) \
|
||||
_X(NV2A_PROF_FINISH_NEED_BUFFER_SPACE) \
|
||||
_X(NV2A_PROF_FINISH_FRAMEBUFFER_DIRTY) \
|
||||
_X(NV2A_PROF_FINISH_PRESENTING) \
|
||||
_X(NV2A_PROF_FINISH_FLIP_STALL) \
|
||||
_X(NV2A_PROF_FINISH_FLUSH) \
|
||||
_X(NV2A_PROF_CLEAR) \
|
||||
_X(NV2A_PROF_QUEUE_SUBMIT) \
|
||||
_X(NV2A_PROF_QUEUE_SUBMIT_AUX) \
|
||||
_X(NV2A_PROF_PIPELINE_NOTDIRTY) \
|
||||
_X(NV2A_PROF_PIPELINE_GEN) \
|
||||
_X(NV2A_PROF_PIPELINE_BIND) \
|
||||
_X(NV2A_PROF_PIPELINE_MERGE) \
|
||||
_X(NV2A_PROF_PIPELINE_RENDERPASSES) \
|
||||
_X(NV2A_PROF_BEGIN_ENDS) \
|
||||
_X(NV2A_PROF_DRAW_ARRAYS) \
|
||||
_X(NV2A_PROF_INLINE_BUFFERS) \
|
||||
|
@ -120,18 +89,26 @@ void nv2a_dbg_renderdoc_capture_frames(uint32_t num_frames);
|
|||
_X(NV2A_PROF_SHADER_GEN) \
|
||||
_X(NV2A_PROF_SHADER_BIND) \
|
||||
_X(NV2A_PROF_SHADER_BIND_NOTDIRTY) \
|
||||
_X(NV2A_PROF_SHADER_UBO_DIRTY) \
|
||||
_X(NV2A_PROF_SHADER_UBO_NOTDIRTY) \
|
||||
_X(NV2A_PROF_ATTR_BIND) \
|
||||
_X(NV2A_PROF_TEX_UPLOAD) \
|
||||
_X(NV2A_PROF_TEX_BIND) \
|
||||
_X(NV2A_PROF_GEOM_BUFFER_UPDATE_1) \
|
||||
_X(NV2A_PROF_GEOM_BUFFER_UPDATE_2) \
|
||||
_X(NV2A_PROF_GEOM_BUFFER_UPDATE_3) \
|
||||
_X(NV2A_PROF_GEOM_BUFFER_UPDATE_4) \
|
||||
_X(NV2A_PROF_GEOM_BUFFER_UPDATE_4_NOTDIRTY) \
|
||||
_X(NV2A_PROF_SURF_SWIZZLE) \
|
||||
_X(NV2A_PROF_SURF_CREATE) \
|
||||
_X(NV2A_PROF_SURF_DOWNLOAD) \
|
||||
_X(NV2A_PROF_SURF_UPLOAD) \
|
||||
_X(NV2A_PROF_SURF_TO_TEX) \
|
||||
_X(NV2A_PROF_SURF_TO_TEX_FALLBACK) \
|
||||
_X(NV2A_PROF_QUEUE_SUBMIT_1) \
|
||||
_X(NV2A_PROF_QUEUE_SUBMIT_2) \
|
||||
_X(NV2A_PROF_QUEUE_SUBMIT_3) \
|
||||
_X(NV2A_PROF_QUEUE_SUBMIT_4) \
|
||||
_X(NV2A_PROF_QUEUE_SUBMIT_5) \
|
||||
|
||||
enum NV2A_PROF_COUNTERS_ENUM {
|
||||
#define _X(x) x,
|
||||
|
@ -161,6 +138,21 @@ extern NV2AStats g_nv2a_stats;
|
|||
|
||||
const char *nv2a_profile_get_counter_name(unsigned int cnt);
|
||||
int nv2a_profile_get_counter_value(unsigned int cnt);
|
||||
void nv2a_profile_increment(void);
|
||||
void nv2a_profile_flip_stall(void);
|
||||
|
||||
static inline void nv2a_profile_inc_counter(enum NV2A_PROF_COUNTERS_ENUM cnt)
|
||||
{
|
||||
g_nv2a_stats.frame_working.counters[cnt] += 1;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RENDERDOC
|
||||
void nv2a_dbg_renderdoc_init(void);
|
||||
void *nv2a_dbg_renderdoc_get_api(void);
|
||||
bool nv2a_dbg_renderdoc_available(void);
|
||||
void nv2a_dbg_renderdoc_capture_frames(int num_frames);
|
||||
extern int renderdoc_capture_frames;
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -1,6 +0,0 @@
|
|||
softmmu_ss.add([sdl, files(
|
||||
'gloffscreen_common.c',
|
||||
'gloffscreen_sdl.c',
|
||||
)])
|
||||
|
||||
# gloffscreen_sdl.o-cflags := $(SDL_CFLAGS)
|
|
@ -1,27 +1,17 @@
|
|||
specific_ss.add(files(
|
||||
'nv2a.c',
|
||||
'debug.c',
|
||||
'pbus.c',
|
||||
'pcrtc.c',
|
||||
'pfb.c',
|
||||
'pfifo.c',
|
||||
'pgraph.c',
|
||||
'pmc.c',
|
||||
'pramdac.c',
|
||||
'prmcio.c',
|
||||
'prmdio.c',
|
||||
'prmvio.c',
|
||||
'psh.c',
|
||||
'ptimer.c',
|
||||
'pvideo.c',
|
||||
'shaders.c',
|
||||
'stubs.c',
|
||||
'user.c',
|
||||
'vsh.c',
|
||||
'swizzle.c',
|
||||
's3tc.c',
|
||||
))
|
||||
subdir('gl')
|
||||
|
||||
subdir('thirdparty')
|
||||
specific_ss.add(nv2a_vsh_cpu)
|
||||
subdir('pgraph')
|
||||
|
|
|
@ -172,6 +172,16 @@ static void nv2a_get_offsets(VGACommonState *s,
|
|||
*pline_compare = line_compare;
|
||||
}
|
||||
|
||||
const uint8_t *nv2a_get_dac_palette(void)
|
||||
{
|
||||
return g_nv2a->puserdac.palette;
|
||||
}
|
||||
|
||||
int nv2a_get_screen_off(void)
|
||||
{
|
||||
return g_nv2a->vga.sr[VGA_SEQ_CLOCK_MODE] & VGA_SR01_SCREEN_OFF;
|
||||
}
|
||||
|
||||
static void nv2a_vga_gfx_update(void *opaque)
|
||||
{
|
||||
VGACommonState *vga = opaque;
|
||||
|
@ -277,7 +287,7 @@ static void nv2a_reset(NV2AState *d)
|
|||
}
|
||||
|
||||
memset(d->pfifo.regs, 0, sizeof(d->pfifo.regs));
|
||||
memset(d->pgraph.regs, 0, sizeof(d->pgraph.regs));
|
||||
memset(d->pgraph.regs_, 0, sizeof(d->pgraph.regs_));
|
||||
memset(d->pvideo.regs, 0, sizeof(d->pvideo.regs));
|
||||
|
||||
d->pcrtc.start = 0;
|
||||
|
@ -365,11 +375,10 @@ static void nv2a_vm_state_change(void *opaque, bool running, RunState state)
|
|||
if (state == RUN_STATE_SAVE_VM) {
|
||||
nv2a_lock_fifo(d);
|
||||
qatomic_set(&d->pfifo.halt, true);
|
||||
qatomic_set(&d->pgraph.download_dirty_surfaces_pending, true);
|
||||
qemu_event_reset(&d->pgraph.dirty_surfaces_download_complete);
|
||||
d->pgraph.renderer->ops.pre_savevm_trigger(d);
|
||||
nv2a_unlock_fifo(d);
|
||||
qemu_mutex_unlock_iothread();
|
||||
qemu_event_wait(&d->pgraph.dirty_surfaces_download_complete);
|
||||
d->pgraph.renderer->ops.pre_savevm_wait(d);
|
||||
qemu_mutex_lock_iothread();
|
||||
nv2a_lock_fifo(d);
|
||||
} else if (state == RUN_STATE_RESTORE_VM) {
|
||||
|
@ -382,11 +391,10 @@ static void nv2a_vm_state_change(void *opaque, bool running, RunState state)
|
|||
nv2a_unlock_fifo(d);
|
||||
} else if (state == RUN_STATE_SHUTDOWN) {
|
||||
nv2a_lock_fifo(d);
|
||||
qatomic_set(&d->pgraph.shader_cache_writeback_pending, true);
|
||||
qemu_event_reset(&d->pgraph.shader_cache_writeback_complete);
|
||||
d->pgraph.renderer->ops.pre_shutdown_trigger(d);
|
||||
nv2a_unlock_fifo(d);
|
||||
qemu_mutex_unlock_iothread();
|
||||
qemu_event_wait(&d->pgraph.shader_cache_writeback_complete);
|
||||
d->pgraph.renderer->ops.pre_shutdown_wait(d);
|
||||
qemu_mutex_lock_iothread();
|
||||
}
|
||||
}
|
||||
|
@ -515,9 +523,9 @@ static const VMStateDescription vmstate_nv2a = {
|
|||
VMSTATE_UINT32(pgraph.inline_buffer_length, NV2AState), // fixme
|
||||
VMSTATE_UINT32(pgraph.draw_arrays_length, NV2AState),
|
||||
VMSTATE_UINT32(pgraph.draw_arrays_max_count, NV2AState),
|
||||
VMSTATE_INT32_ARRAY(pgraph.gl_draw_arrays_start, NV2AState, 1250),
|
||||
VMSTATE_INT32_ARRAY(pgraph.gl_draw_arrays_count, NV2AState, 1250),
|
||||
VMSTATE_UINT32_ARRAY(pgraph.regs, NV2AState, 0x2000),
|
||||
VMSTATE_INT32_ARRAY(pgraph.draw_arrays_start, NV2AState, 1250),
|
||||
VMSTATE_INT32_ARRAY(pgraph.draw_arrays_count, NV2AState, 1250),
|
||||
VMSTATE_UINT32_ARRAY(pgraph.regs_, NV2AState, 0x2000),
|
||||
VMSTATE_UINT32(pmc.pending_interrupts, NV2AState),
|
||||
VMSTATE_UINT32(pmc.enabled_interrupts, NV2AState),
|
||||
VMSTATE_UINT32(pfifo.pending_interrupts, NV2AState),
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
#define HW_NV2A_H
|
||||
|
||||
void nv2a_init(PCIBus *bus, int devfn, MemoryRegion *ram);
|
||||
void nv2a_gl_context_init(void);
|
||||
void nv2a_context_init(void);
|
||||
int nv2a_get_framebuffer_surface(void);
|
||||
void nv2a_set_surface_scale_factor(unsigned int scale);
|
||||
unsigned int nv2a_get_surface_scale_factor(void);
|
||||
|
|
|
@ -44,25 +44,12 @@
|
|||
#include "cpu.h"
|
||||
|
||||
#include "trace.h"
|
||||
#include "swizzle.h"
|
||||
#include "lru.h"
|
||||
#include "gl/gloffscreen.h"
|
||||
|
||||
#include "nv2a.h"
|
||||
#include "pgraph/pgraph.h"
|
||||
#include "debug.h"
|
||||
#include "shaders.h"
|
||||
#include "nv2a_regs.h"
|
||||
|
||||
#define GET_MASK(v, mask) (((v) & (mask)) >> ctz32(mask))
|
||||
|
||||
#define SET_MASK(v, mask, val) \
|
||||
({ \
|
||||
const unsigned int __val = (val); \
|
||||
const unsigned int __mask = (mask); \
|
||||
(v) &= ~(__mask); \
|
||||
(v) |= ((__val) << ctz32(__mask)) & (__mask); \
|
||||
})
|
||||
|
||||
#define NV2A_DEVICE(obj) OBJECT_CHECK(NV2AState, (obj), "nv2a")
|
||||
|
||||
enum FIFOEngine {
|
||||
|
@ -78,347 +65,6 @@ typedef struct DMAObject {
|
|||
hwaddr limit;
|
||||
} DMAObject;
|
||||
|
||||
typedef struct VertexAttribute {
|
||||
bool dma_select;
|
||||
hwaddr offset;
|
||||
|
||||
/* inline arrays are packed in order?
|
||||
* Need to pass the offset to converted attributes */
|
||||
unsigned int inline_array_offset;
|
||||
|
||||
float inline_value[4];
|
||||
|
||||
unsigned int format;
|
||||
unsigned int size; /* size of the data type */
|
||||
unsigned int count; /* number of components */
|
||||
uint32_t stride;
|
||||
|
||||
bool needs_conversion;
|
||||
|
||||
float *inline_buffer;
|
||||
bool inline_buffer_populated;
|
||||
|
||||
GLint gl_count;
|
||||
GLenum gl_type;
|
||||
GLboolean gl_normalize;
|
||||
|
||||
GLuint gl_inline_buffer;
|
||||
} VertexAttribute;
|
||||
|
||||
typedef struct SurfaceFormatInfo {
|
||||
unsigned int bytes_per_pixel;
|
||||
GLint gl_internal_format;
|
||||
GLenum gl_format;
|
||||
GLenum gl_type;
|
||||
GLenum gl_attachment;
|
||||
} SurfaceFormatInfo;
|
||||
|
||||
typedef struct Surface {
|
||||
bool draw_dirty;
|
||||
bool buffer_dirty;
|
||||
bool write_enabled_cache;
|
||||
unsigned int pitch;
|
||||
|
||||
hwaddr offset;
|
||||
} Surface;
|
||||
|
||||
typedef struct SurfaceShape {
|
||||
unsigned int z_format;
|
||||
unsigned int color_format;
|
||||
unsigned int zeta_format;
|
||||
unsigned int log_width, log_height;
|
||||
unsigned int clip_x, clip_y;
|
||||
unsigned int clip_width, clip_height;
|
||||
unsigned int anti_aliasing;
|
||||
} SurfaceShape;
|
||||
|
||||
typedef struct SurfaceBinding {
|
||||
QTAILQ_ENTRY(SurfaceBinding) entry;
|
||||
MemAccessCallback *access_cb;
|
||||
|
||||
hwaddr vram_addr;
|
||||
|
||||
SurfaceFormatInfo fmt;
|
||||
SurfaceShape shape;
|
||||
uintptr_t dma_addr;
|
||||
uintptr_t dma_len;
|
||||
bool color;
|
||||
bool swizzle;
|
||||
|
||||
unsigned int width;
|
||||
unsigned int height;
|
||||
unsigned int pitch;
|
||||
size_t size;
|
||||
|
||||
GLuint gl_buffer;
|
||||
|
||||
bool cleared;
|
||||
int frame_time;
|
||||
int draw_time;
|
||||
bool draw_dirty;
|
||||
bool download_pending;
|
||||
bool upload_pending;
|
||||
} SurfaceBinding;
|
||||
|
||||
typedef struct TextureShape {
|
||||
bool cubemap;
|
||||
unsigned int dimensionality;
|
||||
unsigned int color_format;
|
||||
unsigned int levels;
|
||||
unsigned int width, height, depth;
|
||||
bool border;
|
||||
|
||||
unsigned int min_mipmap_level, max_mipmap_level;
|
||||
unsigned int pitch;
|
||||
} TextureShape;
|
||||
|
||||
typedef struct TextureBinding {
|
||||
GLenum gl_target;
|
||||
GLuint gl_texture;
|
||||
unsigned int refcnt;
|
||||
int draw_time;
|
||||
uint64_t data_hash;
|
||||
unsigned int scale;
|
||||
unsigned int min_filter;
|
||||
unsigned int mag_filter;
|
||||
unsigned int addru;
|
||||
unsigned int addrv;
|
||||
unsigned int addrp;
|
||||
uint32_t border_color;
|
||||
bool border_color_set;
|
||||
} TextureBinding;
|
||||
|
||||
typedef struct TextureKey {
|
||||
TextureShape state;
|
||||
hwaddr texture_vram_offset;
|
||||
hwaddr texture_length;
|
||||
hwaddr palette_vram_offset;
|
||||
hwaddr palette_length;
|
||||
} TextureKey;
|
||||
|
||||
typedef struct TextureLruNode {
|
||||
LruNode node;
|
||||
TextureKey key;
|
||||
TextureBinding *binding;
|
||||
bool possibly_dirty;
|
||||
} TextureLruNode;
|
||||
|
||||
typedef struct VertexKey {
|
||||
size_t count;
|
||||
GLuint gl_type;
|
||||
GLboolean gl_normalize;
|
||||
size_t stride;
|
||||
hwaddr addr;
|
||||
} VertexKey;
|
||||
|
||||
typedef struct VertexLruNode {
|
||||
LruNode node;
|
||||
VertexKey key;
|
||||
GLuint gl_buffer;
|
||||
bool initialized;
|
||||
} VertexLruNode;
|
||||
|
||||
typedef struct KelvinState {
|
||||
hwaddr object_instance;
|
||||
} KelvinState;
|
||||
|
||||
typedef struct ContextSurfaces2DState {
|
||||
hwaddr object_instance;
|
||||
hwaddr dma_image_source;
|
||||
hwaddr dma_image_dest;
|
||||
unsigned int color_format;
|
||||
unsigned int source_pitch, dest_pitch;
|
||||
hwaddr source_offset, dest_offset;
|
||||
} ContextSurfaces2DState;
|
||||
|
||||
typedef struct ImageBlitState {
|
||||
hwaddr object_instance;
|
||||
hwaddr context_surfaces;
|
||||
unsigned int operation;
|
||||
unsigned int in_x, in_y;
|
||||
unsigned int out_x, out_y;
|
||||
unsigned int width, height;
|
||||
} ImageBlitState;
|
||||
|
||||
typedef struct BetaState {
|
||||
hwaddr object_instance;
|
||||
uint32_t beta;
|
||||
} BetaState;
|
||||
|
||||
typedef struct QueryReport {
|
||||
QSIMPLEQ_ENTRY(QueryReport) entry;
|
||||
bool clear;
|
||||
uint32_t parameter;
|
||||
unsigned int query_count;
|
||||
GLuint *queries;
|
||||
} QueryReport;
|
||||
|
||||
typedef struct PGRAPHState {
|
||||
QemuMutex lock;
|
||||
|
||||
uint32_t pending_interrupts;
|
||||
uint32_t enabled_interrupts;
|
||||
|
||||
int frame_time;
|
||||
int draw_time;
|
||||
|
||||
struct s2t_rndr {
|
||||
GLuint fbo, vao, vbo, prog;
|
||||
GLuint tex_loc, surface_size_loc;
|
||||
} s2t_rndr;
|
||||
|
||||
struct disp_rndr {
|
||||
GLuint fbo, vao, vbo, prog;
|
||||
GLuint display_size_loc;
|
||||
GLuint line_offset_loc;
|
||||
GLuint tex_loc;
|
||||
GLuint pvideo_tex;
|
||||
GLint pvideo_enable_loc;
|
||||
GLint pvideo_tex_loc;
|
||||
GLint pvideo_in_pos_loc;
|
||||
GLint pvideo_pos_loc;
|
||||
GLint pvideo_scale_loc;
|
||||
GLint pvideo_color_key_enable_loc;
|
||||
GLint pvideo_color_key_loc;
|
||||
GLint palette_loc[256];
|
||||
} disp_rndr;
|
||||
|
||||
/* subchannels state we're not sure the location of... */
|
||||
ContextSurfaces2DState context_surfaces_2d;
|
||||
ImageBlitState image_blit;
|
||||
KelvinState kelvin;
|
||||
BetaState beta;
|
||||
|
||||
hwaddr dma_color, dma_zeta;
|
||||
Surface surface_color, surface_zeta;
|
||||
unsigned int surface_type;
|
||||
SurfaceShape surface_shape;
|
||||
SurfaceShape last_surface_shape;
|
||||
QTAILQ_HEAD(, SurfaceBinding) surfaces;
|
||||
SurfaceBinding *color_binding, *zeta_binding;
|
||||
struct {
|
||||
int clip_x;
|
||||
int clip_width;
|
||||
int clip_y;
|
||||
int clip_height;
|
||||
int width;
|
||||
int height;
|
||||
} surface_binding_dim; // FIXME: Refactor
|
||||
|
||||
hwaddr dma_a, dma_b;
|
||||
Lru texture_cache;
|
||||
TextureLruNode *texture_cache_entries;
|
||||
bool texture_dirty[NV2A_MAX_TEXTURES];
|
||||
TextureBinding *texture_binding[NV2A_MAX_TEXTURES];
|
||||
|
||||
Lru shader_cache;
|
||||
ShaderLruNode *shader_cache_entries;
|
||||
ShaderBinding *shader_binding;
|
||||
QemuMutex shader_cache_lock;
|
||||
QemuThread shader_disk_thread;
|
||||
|
||||
bool texture_matrix_enable[NV2A_MAX_TEXTURES];
|
||||
|
||||
GLuint gl_framebuffer;
|
||||
|
||||
GLuint gl_display_buffer;
|
||||
GLint gl_display_buffer_internal_format;
|
||||
GLsizei gl_display_buffer_width;
|
||||
GLsizei gl_display_buffer_height;
|
||||
GLenum gl_display_buffer_format;
|
||||
GLenum gl_display_buffer_type;
|
||||
|
||||
hwaddr dma_state;
|
||||
hwaddr dma_notifies;
|
||||
hwaddr dma_semaphore;
|
||||
|
||||
hwaddr dma_report;
|
||||
hwaddr report_offset;
|
||||
bool zpass_pixel_count_enable;
|
||||
unsigned int zpass_pixel_count_result;
|
||||
unsigned int gl_zpass_pixel_count_query_count;
|
||||
GLuint *gl_zpass_pixel_count_queries;
|
||||
QSIMPLEQ_HEAD(, QueryReport) report_queue;
|
||||
|
||||
hwaddr dma_vertex_a, dma_vertex_b;
|
||||
|
||||
uint32_t primitive_mode;
|
||||
|
||||
bool enable_vertex_program_write;
|
||||
|
||||
uint32_t vertex_state_shader_v0[4];
|
||||
uint32_t program_data[NV2A_MAX_TRANSFORM_PROGRAM_LENGTH][VSH_TOKEN_SIZE];
|
||||
bool program_data_dirty;
|
||||
|
||||
uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];
|
||||
bool vsh_constants_dirty[NV2A_VERTEXSHADER_CONSTANTS];
|
||||
|
||||
/* lighting constant arrays */
|
||||
uint32_t ltctxa[NV2A_LTCTXA_COUNT][4];
|
||||
bool ltctxa_dirty[NV2A_LTCTXA_COUNT];
|
||||
uint32_t ltctxb[NV2A_LTCTXB_COUNT][4];
|
||||
bool ltctxb_dirty[NV2A_LTCTXB_COUNT];
|
||||
uint32_t ltc1[NV2A_LTC1_COUNT][4];
|
||||
bool ltc1_dirty[NV2A_LTC1_COUNT];
|
||||
|
||||
float material_alpha;
|
||||
|
||||
// should figure out where these are in lighting context
|
||||
float light_infinite_half_vector[NV2A_MAX_LIGHTS][3];
|
||||
float light_infinite_direction[NV2A_MAX_LIGHTS][3];
|
||||
float light_local_position[NV2A_MAX_LIGHTS][3];
|
||||
float light_local_attenuation[NV2A_MAX_LIGHTS][3];
|
||||
|
||||
float point_params[8];
|
||||
|
||||
VertexAttribute vertex_attributes[NV2A_VERTEXSHADER_ATTRIBUTES];
|
||||
uint16_t compressed_attrs;
|
||||
|
||||
Lru element_cache;
|
||||
VertexLruNode *element_cache_entries;
|
||||
|
||||
unsigned int inline_array_length;
|
||||
uint32_t inline_array[NV2A_MAX_BATCH_LENGTH];
|
||||
GLuint gl_inline_array_buffer;
|
||||
|
||||
unsigned int inline_elements_length;
|
||||
uint32_t inline_elements[NV2A_MAX_BATCH_LENGTH];
|
||||
|
||||
unsigned int inline_buffer_length;
|
||||
|
||||
unsigned int draw_arrays_length;
|
||||
unsigned int draw_arrays_min_start;
|
||||
unsigned int draw_arrays_max_count;
|
||||
/* FIXME: Unknown size, possibly endless, 1250 will do for now */
|
||||
/* Keep in sync with size used in nv2a.c */
|
||||
GLint gl_draw_arrays_start[1250];
|
||||
GLsizei gl_draw_arrays_count[1250];
|
||||
bool draw_arrays_prevent_connect;
|
||||
|
||||
GLuint gl_memory_buffer;
|
||||
GLuint gl_vertex_array;
|
||||
|
||||
uint32_t regs[0x2000];
|
||||
|
||||
bool clearing;
|
||||
bool waiting_for_nop;
|
||||
bool waiting_for_flip;
|
||||
bool waiting_for_context_switch;
|
||||
bool downloads_pending;
|
||||
bool download_dirty_surfaces_pending;
|
||||
bool flush_pending;
|
||||
bool gl_sync_pending;
|
||||
bool shader_cache_writeback_pending;
|
||||
QemuEvent downloads_complete;
|
||||
QemuEvent dirty_surfaces_download_complete;
|
||||
QemuEvent flush_complete;
|
||||
QemuEvent gl_sync_complete;
|
||||
QemuEvent shader_cache_writeback_complete;
|
||||
|
||||
unsigned int surface_scale_factor;
|
||||
uint8_t *scale_buf;
|
||||
} PGRAPHState;
|
||||
|
||||
typedef struct NV2AState {
|
||||
/*< private >*/
|
||||
PCIDevice parent_obj;
|
||||
|
@ -512,9 +158,6 @@ typedef struct NV2ABlockInfo {
|
|||
} NV2ABlockInfo;
|
||||
extern const NV2ABlockInfo blocktable[NV_NUM_BLOCKS];
|
||||
|
||||
extern GloContext *g_nv2a_context_render;
|
||||
extern GloContext *g_nv2a_context_display;
|
||||
|
||||
void nv2a_update_irq(NV2AState *d);
|
||||
|
||||
static inline
|
||||
|
@ -566,20 +209,5 @@ DEFINE_PROTO(user)
|
|||
DMAObject nv_dma_load(NV2AState *d, hwaddr dma_obj_address);
|
||||
void *nv_dma_map(NV2AState *d, hwaddr dma_obj_address, hwaddr *len);
|
||||
|
||||
void pgraph_init(NV2AState *d);
|
||||
void pgraph_destroy(PGRAPHState *pg);
|
||||
void pgraph_context_switch(NV2AState *d, unsigned int channel_id);
|
||||
int pgraph_method(NV2AState *d, unsigned int subchannel, unsigned int method,
|
||||
uint32_t parameter, uint32_t *parameters,
|
||||
size_t num_words_available, size_t max_lookahead_words,
|
||||
bool inc);
|
||||
void pgraph_gl_sync(NV2AState *d);
|
||||
void pgraph_process_pending_reports(NV2AState *d);
|
||||
void pgraph_process_pending_downloads(NV2AState *d);
|
||||
void pgraph_download_dirty_surfaces(NV2AState *d);
|
||||
void pgraph_flush(NV2AState *d);
|
||||
|
||||
void *pfifo_thread(void *arg);
|
||||
void pfifo_kick(NV2AState *d);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -21,6 +21,17 @@
|
|||
#ifndef HW_NV2A_REGS_H
|
||||
#define HW_NV2A_REGS_H
|
||||
|
||||
|
||||
#define GET_MASK(v, mask) (((v) & (mask)) >> ctz32(mask))
|
||||
|
||||
#define SET_MASK(v, mask, val) \
|
||||
({ \
|
||||
const unsigned int __val = (val); \
|
||||
const unsigned int __mask = (mask); \
|
||||
(v) &= ~(__mask); \
|
||||
(v) |= ((__val) << ctz32(__mask)) & (__mask); \
|
||||
})
|
||||
|
||||
#define NV_NUM_BLOCKS 21
|
||||
#define NV_PMC 0 /* card master control */
|
||||
#define NV_PBUS 1 /* bus control */
|
||||
|
|
|
@ -95,23 +95,25 @@ void pfifo_kick(NV2AState *d)
|
|||
qemu_cond_broadcast(&d->pfifo.fifo_cond);
|
||||
}
|
||||
|
||||
static bool pgraph_can_fifo_access(NV2AState *d) {
|
||||
return qatomic_read(&d->pgraph.regs[NV_PGRAPH_FIFO]) & NV_PGRAPH_FIFO_ACCESS;
|
||||
static bool can_fifo_access(NV2AState *d) {
|
||||
return qatomic_read(&d->pgraph.regs_[NV_PGRAPH_FIFO]) &
|
||||
NV_PGRAPH_FIFO_ACCESS;
|
||||
}
|
||||
|
||||
/* If NV097_FLIP_STALL was executed, check if the flip has completed.
|
||||
* This will usually happen in the VSYNC interrupt handler.
|
||||
*/
|
||||
static bool pgraph_is_flip_stall_complete(NV2AState *d)
|
||||
static bool is_flip_stall_complete(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
|
||||
NV2A_DPRINTF("flip stall read: %d, write: %d, modulo: %d\n",
|
||||
GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_READ_3D),
|
||||
GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_WRITE_3D),
|
||||
GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_MODULO_3D));
|
||||
uint32_t s = pgraph_reg_r(pg, NV_PGRAPH_SURFACE);
|
||||
|
||||
NV2A_DPRINTF("flip stall read: %d, write: %d, modulo: %d\n",
|
||||
GET_MASK(s, NV_PGRAPH_SURFACE_READ_3D),
|
||||
GET_MASK(s, NV_PGRAPH_SURFACE_WRITE_3D),
|
||||
GET_MASK(s, NV_PGRAPH_SURFACE_MODULO_3D));
|
||||
|
||||
uint32_t s = pg->regs[NV_PGRAPH_SURFACE];
|
||||
if (GET_MASK(s, NV_PGRAPH_SURFACE_READ_3D)
|
||||
!= GET_MASK(s, NV_PGRAPH_SURFACE_WRITE_3D)) {
|
||||
return true;
|
||||
|
@ -126,7 +128,7 @@ static bool pfifo_stall_for_flip(NV2AState *d)
|
|||
|
||||
if (qatomic_read(&d->pgraph.waiting_for_flip)) {
|
||||
qemu_mutex_lock(&d->pgraph.lock);
|
||||
if (!pgraph_is_flip_stall_complete(d)) {
|
||||
if (!is_flip_stall_complete(d)) {
|
||||
should_stall = true;
|
||||
} else {
|
||||
d->pgraph.waiting_for_flip = false;
|
||||
|
@ -141,7 +143,7 @@ static bool pfifo_puller_should_stall(NV2AState *d)
|
|||
{
|
||||
return pfifo_stall_for_flip(d) || qatomic_read(&d->pgraph.waiting_for_nop) ||
|
||||
qatomic_read(&d->pgraph.waiting_for_context_switch) ||
|
||||
!pgraph_can_fifo_access(d);
|
||||
!can_fifo_access(d);
|
||||
}
|
||||
|
||||
static ssize_t pfifo_run_puller(NV2AState *d, uint32_t method_entry,
|
||||
|
@ -187,7 +189,7 @@ static ssize_t pfifo_run_puller(NV2AState *d, uint32_t method_entry,
|
|||
qemu_mutex_lock(&d->pgraph.lock);
|
||||
|
||||
// Switch contexts if necessary
|
||||
if (pgraph_can_fifo_access(d)) {
|
||||
if (can_fifo_access(d)) {
|
||||
pgraph_context_switch(d, entry.channel_id);
|
||||
if (!d->pgraph.waiting_for_context_switch) {
|
||||
num_proc =
|
||||
|
@ -221,7 +223,7 @@ static ssize_t pfifo_run_puller(NV2AState *d, uint32_t method_entry,
|
|||
qemu_mutex_unlock(&d->pfifo.lock);
|
||||
qemu_mutex_lock(&d->pgraph.lock);
|
||||
|
||||
if (pgraph_can_fifo_access(d)) {
|
||||
if (can_fifo_access(d)) {
|
||||
num_proc =
|
||||
pgraph_method(d, subchannel, method, parameter, parameters,
|
||||
num_words_available, max_lookahead_words, inc);
|
||||
|
@ -242,7 +244,7 @@ static ssize_t pfifo_run_puller(NV2AState *d, uint32_t method_entry,
|
|||
|
||||
static bool pfifo_pusher_should_stall(NV2AState *d)
|
||||
{
|
||||
return !pgraph_can_fifo_access(d) ||
|
||||
return !can_fifo_access(d) ||
|
||||
qatomic_read(&d->pgraph.waiting_for_nop);
|
||||
}
|
||||
|
||||
|
@ -447,39 +449,11 @@ static void pfifo_run_pusher(NV2AState *d)
|
|||
}
|
||||
}
|
||||
|
||||
static void process_requests(NV2AState *d)
|
||||
{
|
||||
if (qatomic_read(&d->pgraph.downloads_pending) ||
|
||||
qatomic_read(&d->pgraph.download_dirty_surfaces_pending) ||
|
||||
qatomic_read(&d->pgraph.gl_sync_pending) ||
|
||||
qatomic_read(&d->pgraph.flush_pending) ||
|
||||
qatomic_read(&d->pgraph.shader_cache_writeback_pending)) {
|
||||
qemu_mutex_unlock(&d->pfifo.lock);
|
||||
qemu_mutex_lock(&d->pgraph.lock);
|
||||
if (qatomic_read(&d->pgraph.downloads_pending)) {
|
||||
pgraph_process_pending_downloads(d);
|
||||
}
|
||||
if (qatomic_read(&d->pgraph.download_dirty_surfaces_pending)) {
|
||||
pgraph_download_dirty_surfaces(d);
|
||||
}
|
||||
if (qatomic_read(&d->pgraph.gl_sync_pending)) {
|
||||
pgraph_gl_sync(d);
|
||||
}
|
||||
if (qatomic_read(&d->pgraph.flush_pending)) {
|
||||
pgraph_flush(d);
|
||||
}
|
||||
if (qatomic_read(&d->pgraph.shader_cache_writeback_pending)) {
|
||||
shader_write_cache_reload_list(&d->pgraph);
|
||||
}
|
||||
qemu_mutex_unlock(&d->pgraph.lock);
|
||||
qemu_mutex_lock(&d->pfifo.lock);
|
||||
}
|
||||
}
|
||||
|
||||
void *pfifo_thread(void *arg)
|
||||
{
|
||||
NV2AState *d = (NV2AState *)arg;
|
||||
glo_set_current(g_nv2a_context_render);
|
||||
|
||||
pgraph_init_thread(d);
|
||||
|
||||
rcu_register_thread();
|
||||
|
||||
|
@ -487,13 +461,13 @@ void *pfifo_thread(void *arg)
|
|||
while (true) {
|
||||
d->pfifo.fifo_kick = false;
|
||||
|
||||
process_requests(d);
|
||||
d->pgraph.renderer->ops.process_pending(d);
|
||||
|
||||
if (!d->pfifo.halt) {
|
||||
pfifo_run_pusher(d);
|
||||
}
|
||||
|
||||
pgraph_process_pending_reports(d);
|
||||
d->pgraph.renderer->ops.process_pending_reports(d);
|
||||
|
||||
if (!d->pfifo.fifo_kick) {
|
||||
qemu_cond_broadcast(&d->pfifo.fifo_idle_cond);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,84 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH Renderdoc Helpers
|
||||
*
|
||||
* Copyright (c) 2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wstrict-prototypes"
|
||||
#include "thirdparty/renderdoc_app.h"
|
||||
|
||||
#include "hw/xbox/nv2a/debug.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <libloaderapi.h>
|
||||
#else
|
||||
#include <dlfcn.h>
|
||||
#endif
|
||||
|
||||
static RENDERDOC_API_1_6_0 *rdoc_api = NULL;
|
||||
|
||||
int renderdoc_capture_frames = 0;
|
||||
|
||||
void nv2a_dbg_renderdoc_init(void)
|
||||
{
|
||||
if (rdoc_api) {
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
HMODULE renderdoc = GetModuleHandleA("renderdoc.dll");
|
||||
if (renderdoc) {
|
||||
pRENDERDOC_GetAPI RENDERDOC_GetAPI =
|
||||
(pRENDERDOC_GetAPI)GetProcAddress(renderdoc, "RENDERDOC_GetAPI");
|
||||
#else
|
||||
void *renderdoc = dlopen(
|
||||
#ifdef __APPLE__
|
||||
"librenderdoc.dylib",
|
||||
#else
|
||||
"librenderdoc.so",
|
||||
#endif
|
||||
RTLD_LAZY);
|
||||
if (renderdoc) {
|
||||
pRENDERDOC_GetAPI RENDERDOC_GetAPI =
|
||||
(pRENDERDOC_GetAPI)dlsym(renderdoc, "RENDERDOC_GetAPI");
|
||||
#endif // _WIN32
|
||||
int ret =
|
||||
RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void **)&rdoc_api);
|
||||
assert(ret == 1 && "Failed to retrieve RenderDoc API.");
|
||||
} else {
|
||||
fprintf(stderr, "Error: Failed to open renderdoc library: %s\n", dlerror());
|
||||
}
|
||||
}
|
||||
|
||||
void *nv2a_dbg_renderdoc_get_api(void)
|
||||
{
|
||||
return (void*)rdoc_api;
|
||||
}
|
||||
|
||||
bool nv2a_dbg_renderdoc_available(void)
|
||||
{
|
||||
return rdoc_api != NULL;
|
||||
}
|
||||
|
||||
void nv2a_dbg_renderdoc_capture_frames(int num_frames)
|
||||
{
|
||||
renderdoc_capture_frames += num_frames;
|
||||
}
|
|
@ -0,0 +1,174 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH OpenGL Renderer
|
||||
*
|
||||
* Copyright (c) 2012 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2018-2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "hw/xbox/nv2a/nv2a_int.h"
|
||||
#include "renderer.h"
|
||||
|
||||
// TODO: Optimize. Ideally this should all be done via OpenGL.
|
||||
void pgraph_gl_image_blit(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
ContextSurfaces2DState *context_surfaces = &pg->context_surfaces_2d;
|
||||
ImageBlitState *image_blit = &pg->image_blit;
|
||||
BetaState *beta = &pg->beta;
|
||||
|
||||
pgraph_gl_surface_update(d, false, true, true);
|
||||
|
||||
assert(context_surfaces->object_instance == image_blit->context_surfaces);
|
||||
|
||||
unsigned int bytes_per_pixel;
|
||||
switch (context_surfaces->color_format) {
|
||||
case NV062_SET_COLOR_FORMAT_LE_Y8:
|
||||
bytes_per_pixel = 1;
|
||||
break;
|
||||
case NV062_SET_COLOR_FORMAT_LE_R5G6B5:
|
||||
bytes_per_pixel = 2;
|
||||
break;
|
||||
case NV062_SET_COLOR_FORMAT_LE_A8R8G8B8:
|
||||
case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8:
|
||||
case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8:
|
||||
case NV062_SET_COLOR_FORMAT_LE_Y32:
|
||||
bytes_per_pixel = 4;
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Unknown blit surface format: 0x%x\n",
|
||||
context_surfaces->color_format);
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
|
||||
hwaddr source_dma_len, dest_dma_len;
|
||||
|
||||
uint8_t *source = (uint8_t *)nv_dma_map(
|
||||
d, context_surfaces->dma_image_source, &source_dma_len);
|
||||
assert(context_surfaces->source_offset < source_dma_len);
|
||||
source += context_surfaces->source_offset;
|
||||
|
||||
uint8_t *dest = (uint8_t *)nv_dma_map(d, context_surfaces->dma_image_dest,
|
||||
&dest_dma_len);
|
||||
assert(context_surfaces->dest_offset < dest_dma_len);
|
||||
dest += context_surfaces->dest_offset;
|
||||
|
||||
hwaddr source_addr = source - d->vram_ptr;
|
||||
hwaddr dest_addr = dest - d->vram_ptr;
|
||||
|
||||
SurfaceBinding *surf_src = pgraph_gl_surface_get(d, source_addr);
|
||||
if (surf_src) {
|
||||
pgraph_gl_surface_download_if_dirty(d, surf_src);
|
||||
}
|
||||
|
||||
SurfaceBinding *surf_dest = pgraph_gl_surface_get(d, dest_addr);
|
||||
if (surf_dest) {
|
||||
if (image_blit->height < surf_dest->height ||
|
||||
image_blit->width < surf_dest->width) {
|
||||
pgraph_gl_surface_download_if_dirty(d, surf_dest);
|
||||
} else {
|
||||
// The blit will completely replace the surface so any pending
|
||||
// download should be discarded.
|
||||
surf_dest->download_pending = false;
|
||||
surf_dest->draw_dirty = false;
|
||||
}
|
||||
surf_dest->upload_pending = true;
|
||||
pg->draw_time++;
|
||||
}
|
||||
|
||||
hwaddr source_offset = image_blit->in_y * context_surfaces->source_pitch +
|
||||
image_blit->in_x * bytes_per_pixel;
|
||||
hwaddr dest_offset = image_blit->out_y * context_surfaces->dest_pitch +
|
||||
image_blit->out_x * bytes_per_pixel;
|
||||
|
||||
hwaddr source_size =
|
||||
(image_blit->height - 1) * context_surfaces->source_pitch +
|
||||
image_blit->width * bytes_per_pixel;
|
||||
hwaddr dest_size = (image_blit->height - 1) * context_surfaces->dest_pitch +
|
||||
image_blit->width * bytes_per_pixel;
|
||||
|
||||
/* FIXME: What does hardware do in this case? */
|
||||
assert(source_addr + source_offset + source_size <=
|
||||
memory_region_size(d->vram));
|
||||
assert(dest_addr + dest_offset + dest_size <= memory_region_size(d->vram));
|
||||
|
||||
uint8_t *source_row = source + source_offset;
|
||||
uint8_t *dest_row = dest + dest_offset;
|
||||
|
||||
if (image_blit->operation == NV09F_SET_OPERATION_SRCCOPY) {
|
||||
// NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_SRCCOPY");
|
||||
for (unsigned int y = 0; y < image_blit->height; y++) {
|
||||
memmove(dest_row, source_row, image_blit->width * bytes_per_pixel);
|
||||
source_row += context_surfaces->source_pitch;
|
||||
dest_row += context_surfaces->dest_pitch;
|
||||
}
|
||||
} else if (image_blit->operation == NV09F_SET_OPERATION_BLEND_AND) {
|
||||
// NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_BLEND_AND");
|
||||
uint32_t max_beta_mult = 0x7f80;
|
||||
uint32_t beta_mult = beta->beta >> 16;
|
||||
uint32_t inv_beta_mult = max_beta_mult - beta_mult;
|
||||
for (unsigned int y = 0; y < image_blit->height; y++) {
|
||||
for (unsigned int x = 0; x < image_blit->width; x++) {
|
||||
for (unsigned int ch = 0; ch < 3; ch++) {
|
||||
uint32_t a = source_row[x * 4 + ch] * beta_mult;
|
||||
uint32_t b = dest_row[x * 4 + ch] * inv_beta_mult;
|
||||
dest_row[x * 4 + ch] = (a + b) / max_beta_mult;
|
||||
}
|
||||
}
|
||||
source_row += context_surfaces->source_pitch;
|
||||
dest_row += context_surfaces->dest_pitch;
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "Unknown blit operation: 0x%x\n",
|
||||
image_blit->operation);
|
||||
assert(false && "Unknown blit operation");
|
||||
}
|
||||
|
||||
NV2A_DPRINTF(" - 0x%tx -> 0x%tx\n", source_addr, dest_addr);
|
||||
|
||||
bool needs_alpha_patching;
|
||||
uint8_t alpha_override;
|
||||
switch (context_surfaces->color_format) {
|
||||
case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8:
|
||||
needs_alpha_patching = true;
|
||||
alpha_override = 0xff;
|
||||
break;
|
||||
case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8:
|
||||
needs_alpha_patching = true;
|
||||
alpha_override = 0;
|
||||
break;
|
||||
default:
|
||||
needs_alpha_patching = false;
|
||||
alpha_override = 0;
|
||||
}
|
||||
|
||||
if (needs_alpha_patching) {
|
||||
dest_row = dest + dest_offset;
|
||||
for (unsigned int y = 0; y < image_blit->height; y++) {
|
||||
for (unsigned int x = 0; x < image_blit->width; x++) {
|
||||
dest_row[x * 4 + 3] = alpha_override;
|
||||
}
|
||||
dest_row += context_surfaces->dest_pitch;
|
||||
}
|
||||
}
|
||||
|
||||
dest_addr += dest_offset;
|
||||
memory_region_set_client_dirty(d->vram, dest_addr, dest_size,
|
||||
DIRTY_MEMORY_VGA);
|
||||
memory_region_set_client_dirty(d->vram, dest_addr, dest_size,
|
||||
DIRTY_MEMORY_NV2A_TEX);
|
||||
}
|
|
@ -0,0 +1,322 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH OpenGL Renderer
|
||||
*
|
||||
* Copyright (c) 2012 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2018-2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef HW_XBOX_NV2A_PGRAPH_GL_CONSTANTS_H
|
||||
#define HW_XBOX_NV2A_PGRAPH_GL_CONSTANTS_H
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "hw/xbox/nv2a/nv2a_regs.h"
|
||||
#include "gloffscreen.h"
|
||||
|
||||
static const GLenum pgraph_texture_min_filter_gl_map[] = {
|
||||
0,
|
||||
GL_NEAREST,
|
||||
GL_LINEAR,
|
||||
GL_NEAREST_MIPMAP_NEAREST,
|
||||
GL_LINEAR_MIPMAP_NEAREST,
|
||||
GL_NEAREST_MIPMAP_LINEAR,
|
||||
GL_LINEAR_MIPMAP_LINEAR,
|
||||
GL_LINEAR,
|
||||
};
|
||||
|
||||
static const GLenum pgraph_texture_mag_filter_gl_map[] = {
|
||||
0,
|
||||
GL_NEAREST,
|
||||
GL_LINEAR,
|
||||
0,
|
||||
GL_LINEAR /* TODO: Convolution filter... */
|
||||
};
|
||||
|
||||
static const GLenum pgraph_texture_addr_gl_map[] = {
|
||||
0,
|
||||
GL_REPEAT,
|
||||
GL_MIRRORED_REPEAT,
|
||||
GL_CLAMP_TO_EDGE,
|
||||
GL_CLAMP_TO_BORDER,
|
||||
GL_CLAMP_TO_EDGE, /* Approximate GL_CLAMP */
|
||||
};
|
||||
|
||||
static const GLenum pgraph_blend_factor_gl_map[] = {
|
||||
GL_ZERO,
|
||||
GL_ONE,
|
||||
GL_SRC_COLOR,
|
||||
GL_ONE_MINUS_SRC_COLOR,
|
||||
GL_SRC_ALPHA,
|
||||
GL_ONE_MINUS_SRC_ALPHA,
|
||||
GL_DST_ALPHA,
|
||||
GL_ONE_MINUS_DST_ALPHA,
|
||||
GL_DST_COLOR,
|
||||
GL_ONE_MINUS_DST_COLOR,
|
||||
GL_SRC_ALPHA_SATURATE,
|
||||
0,
|
||||
GL_CONSTANT_COLOR,
|
||||
GL_ONE_MINUS_CONSTANT_COLOR,
|
||||
GL_CONSTANT_ALPHA,
|
||||
GL_ONE_MINUS_CONSTANT_ALPHA,
|
||||
};
|
||||
|
||||
static const GLenum pgraph_blend_equation_gl_map[] = {
|
||||
GL_FUNC_SUBTRACT,
|
||||
GL_FUNC_REVERSE_SUBTRACT,
|
||||
GL_FUNC_ADD,
|
||||
GL_MIN,
|
||||
GL_MAX,
|
||||
GL_FUNC_REVERSE_SUBTRACT,
|
||||
GL_FUNC_ADD,
|
||||
};
|
||||
|
||||
/* FIXME
|
||||
static const GLenum pgraph_blend_logicop_map[] = {
|
||||
GL_CLEAR,
|
||||
GL_AND,
|
||||
GL_AND_REVERSE,
|
||||
GL_COPY,
|
||||
GL_AND_INVERTED,
|
||||
GL_NOOP,
|
||||
GL_XOR,
|
||||
GL_OR,
|
||||
GL_NOR,
|
||||
GL_EQUIV,
|
||||
GL_INVERT,
|
||||
GL_OR_REVERSE,
|
||||
GL_COPY_INVERTED,
|
||||
GL_OR_INVERTED,
|
||||
GL_NAND,
|
||||
GL_SET,
|
||||
};
|
||||
*/
|
||||
|
||||
static const GLenum pgraph_cull_face_gl_map[] = {
|
||||
0,
|
||||
GL_FRONT,
|
||||
GL_BACK,
|
||||
GL_FRONT_AND_BACK
|
||||
};
|
||||
|
||||
static const GLenum pgraph_depth_func_gl_map[] = {
|
||||
GL_NEVER,
|
||||
GL_LESS,
|
||||
GL_EQUAL,
|
||||
GL_LEQUAL,
|
||||
GL_GREATER,
|
||||
GL_NOTEQUAL,
|
||||
GL_GEQUAL,
|
||||
GL_ALWAYS,
|
||||
};
|
||||
|
||||
static const GLenum pgraph_stencil_func_gl_map[] = {
|
||||
GL_NEVER,
|
||||
GL_LESS,
|
||||
GL_EQUAL,
|
||||
GL_LEQUAL,
|
||||
GL_GREATER,
|
||||
GL_NOTEQUAL,
|
||||
GL_GEQUAL,
|
||||
GL_ALWAYS,
|
||||
};
|
||||
|
||||
static const GLenum pgraph_stencil_op_gl_map[] = {
|
||||
0,
|
||||
GL_KEEP,
|
||||
GL_ZERO,
|
||||
GL_REPLACE,
|
||||
GL_INCR,
|
||||
GL_DECR,
|
||||
GL_INVERT,
|
||||
GL_INCR_WRAP,
|
||||
GL_DECR_WRAP,
|
||||
};
|
||||
|
||||
typedef struct ColorFormatInfo {
|
||||
unsigned int bytes_per_pixel;
|
||||
bool linear;
|
||||
GLint gl_internal_format;
|
||||
GLenum gl_format;
|
||||
GLenum gl_type;
|
||||
GLenum gl_swizzle_mask[4];
|
||||
bool depth;
|
||||
} ColorFormatInfo;
|
||||
|
||||
static const ColorFormatInfo kelvin_color_format_gl_map[66] = {
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8] =
|
||||
{1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
|
||||
{GL_RED, GL_RED, GL_RED, GL_ONE}},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8] =
|
||||
{1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
|
||||
{GL_RED, GL_RED, GL_RED, GL_RED}},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5] =
|
||||
{2, false, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5] =
|
||||
{2, false, GL_RGB5, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4] =
|
||||
{2, false, GL_RGBA4, GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5] =
|
||||
{2, false, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8] =
|
||||
{4, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8] =
|
||||
{4, false, GL_RGB8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
|
||||
|
||||
/* paletted texture */
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8] =
|
||||
{1, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
|
||||
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5] =
|
||||
{4, false, GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, 0, GL_RGBA},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8] =
|
||||
{4, false, GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, 0, GL_RGBA},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8] =
|
||||
{4, false, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, 0, GL_RGBA},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5] =
|
||||
{2, true, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5] =
|
||||
{2, true, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8] =
|
||||
{4, true, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8] =
|
||||
{1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
|
||||
{GL_RED, GL_RED, GL_RED, GL_ONE}},
|
||||
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8] =
|
||||
{2, true, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
|
||||
{GL_RED, GL_GREEN, GL_RED, GL_GREEN}},
|
||||
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8] =
|
||||
{1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
|
||||
{GL_ONE, GL_ONE, GL_ONE, GL_RED}},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8] =
|
||||
{2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
|
||||
{GL_RED, GL_RED, GL_RED, GL_GREEN}},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8] =
|
||||
{1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
|
||||
{GL_RED, GL_RED, GL_RED, GL_RED}},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5] =
|
||||
{2, true, GL_RGB5, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4] =
|
||||
{2, true, GL_RGBA4, GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8] =
|
||||
{4, true, GL_RGB8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8] =
|
||||
{1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
|
||||
{GL_ONE, GL_ONE, GL_ONE, GL_RED}},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8Y8] =
|
||||
{2, true, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
|
||||
{GL_RED, GL_RED, GL_RED, GL_GREEN}},
|
||||
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5] =
|
||||
{2, false, GL_RGB8_SNORM, GL_RGB, GL_BYTE}, /* FIXME: This might be signed */
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8] =
|
||||
{2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
|
||||
{GL_RED, GL_GREEN, GL_RED, GL_GREEN}},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8] =
|
||||
{2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
|
||||
{GL_GREEN, GL_RED, GL_RED, GL_GREEN}},
|
||||
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8] =
|
||||
{2, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8] =
|
||||
{2, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},
|
||||
|
||||
/* Additional information is passed to the pixel shader via the swizzle:
|
||||
* RED: The depth value.
|
||||
* GREEN: 0 for 16-bit, 1 for 24 bit
|
||||
* BLUE: 0 for fixed, 1 for float
|
||||
*/
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_DEPTH_Y16_FIXED] =
|
||||
{2, false, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT,
|
||||
{GL_RED, GL_ZERO, GL_ZERO, GL_ZERO}, true},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED] =
|
||||
{4, true, GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8,
|
||||
{GL_RED, GL_ONE, GL_ZERO, GL_ZERO}, true},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT] =
|
||||
/* FIXME: Uses fixed-point format to match surface format hack below. */
|
||||
{4, true, GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8,
|
||||
{GL_RED, GL_ONE, GL_ZERO, GL_ZERO}, true},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FIXED] =
|
||||
{2, true, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT,
|
||||
{GL_RED, GL_ZERO, GL_ZERO, GL_ZERO}, true},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FLOAT] =
|
||||
{2, true, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_HALF_FLOAT,
|
||||
{GL_RED, GL_ZERO, GL_ONE, GL_ZERO}, true},
|
||||
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16] =
|
||||
{2, true, GL_R16, GL_RED, GL_UNSIGNED_SHORT,
|
||||
{GL_RED, GL_RED, GL_RED, GL_ONE}},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8] =
|
||||
{4, false, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8] =
|
||||
{4, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8},
|
||||
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8] =
|
||||
{4, false, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8},
|
||||
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8] =
|
||||
{4, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8] =
|
||||
{4, true, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8] =
|
||||
{4, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}
|
||||
};
|
||||
|
||||
typedef struct SurfaceFormatInfo {
|
||||
unsigned int bytes_per_pixel;
|
||||
GLint gl_internal_format;
|
||||
GLenum gl_format;
|
||||
GLenum gl_type;
|
||||
GLenum gl_attachment;
|
||||
} SurfaceFormatInfo;
|
||||
|
||||
static const SurfaceFormatInfo kelvin_surface_color_format_gl_map[] = {
|
||||
[NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5] =
|
||||
{2, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, GL_COLOR_ATTACHMENT0},
|
||||
[NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5] =
|
||||
{2, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, GL_COLOR_ATTACHMENT0},
|
||||
[NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8] =
|
||||
{4, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_COLOR_ATTACHMENT0},
|
||||
[NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8] =
|
||||
{4, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_COLOR_ATTACHMENT0},
|
||||
|
||||
// FIXME: Map channel color
|
||||
[NV097_SET_SURFACE_FORMAT_COLOR_LE_B8] =
|
||||
{1, GL_R8, GL_RED, GL_UNSIGNED_BYTE, GL_COLOR_ATTACHMENT0},
|
||||
[NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8] =
|
||||
{2, GL_RG8, GL_RG, GL_UNSIGNED_SHORT, GL_COLOR_ATTACHMENT0},
|
||||
};
|
||||
|
||||
static const SurfaceFormatInfo kelvin_surface_zeta_float_format_gl_map[] = {
|
||||
[NV097_SET_SURFACE_FORMAT_ZETA_Z16] =
|
||||
{2, GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_HALF_FLOAT, GL_DEPTH_ATTACHMENT},
|
||||
[NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] =
|
||||
/* FIXME: GL does not support packing floating-point Z24S8 OOTB, so for
|
||||
* now just emulate this with fixed-point Z24S8. Possible compat
|
||||
* improvement with custom conversion.
|
||||
*/
|
||||
{4, GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, GL_DEPTH_STENCIL_ATTACHMENT},
|
||||
};
|
||||
|
||||
static const SurfaceFormatInfo kelvin_surface_zeta_fixed_format_gl_map[] = {
|
||||
[NV097_SET_SURFACE_FORMAT_ZETA_Z16] =
|
||||
{2, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, GL_DEPTH_ATTACHMENT},
|
||||
[NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] =
|
||||
{4, GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, GL_DEPTH_STENCIL_ATTACHMENT},
|
||||
};
|
||||
|
||||
#endif
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* QEMU Geforce NV2A debug helpers
|
||||
* Geforce NV2A PGRAPH OpenGL Renderer
|
||||
*
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2012 espes
|
||||
|
@ -18,6 +18,7 @@
|
|||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "renderer.h"
|
||||
#include "debug.h"
|
||||
|
||||
#ifdef DEBUG_NV2A_GL
|
||||
|
@ -28,15 +29,8 @@
|
|||
#include <assert.h>
|
||||
|
||||
#ifdef CONFIG_RENDERDOC
|
||||
#pragma GCC diagnostic ignored "-Wstrict-prototypes"
|
||||
#include "thirdparty/renderdoc_app.h"
|
||||
#ifdef _WIN32
|
||||
#include <libloaderapi.h>
|
||||
#else
|
||||
#include <dlfcn.h>
|
||||
#endif
|
||||
|
||||
static RENDERDOC_API_1_1_2 *rdoc_api = NULL;
|
||||
static int32_t renderdoc_capture_frames = 0;
|
||||
#endif
|
||||
|
||||
#define CHECK_GL_ERROR() do { \
|
||||
|
@ -74,31 +68,7 @@ void gl_debug_initialize(void)
|
|||
}
|
||||
|
||||
#ifdef CONFIG_RENDERDOC
|
||||
const char *renderdoc_lib;
|
||||
void* renderdoc;
|
||||
#ifdef __APPLE__
|
||||
renderdoc_lib = "librenderdoc.dylib";
|
||||
#elif _WIN32
|
||||
renderdoc_lib = "renderdoc.dll";
|
||||
#else
|
||||
renderdoc_lib = "librenderdoc.so";
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
renderdoc = GetModuleHandleA(renderdoc_lib);
|
||||
if (renderdoc) {
|
||||
pRENDERDOC_GetAPI RENDERDOC_GetAPI = (pRENDERDOC_GetAPI)GetProcAddress(
|
||||
renderdoc, "RENDERDOC_GetAPI");
|
||||
#else
|
||||
renderdoc = dlopen(renderdoc_lib, RTLD_NOW | RTLD_NOLOAD);
|
||||
if (renderdoc) {
|
||||
pRENDERDOC_GetAPI RENDERDOC_GetAPI = (pRENDERDOC_GetAPI)dlsym(
|
||||
renderdoc, "RENDERDOC_GetAPI");
|
||||
#endif
|
||||
int ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_1_2,
|
||||
(void **)&rdoc_api);
|
||||
assert(ret == 1 && "Failed to retrieve RenderDoc API.");
|
||||
}
|
||||
nv2a_dbg_renderdoc_init();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -179,7 +149,10 @@ void gl_debug_frame_terminator(void)
|
|||
CHECK_GL_ERROR();
|
||||
|
||||
#ifdef CONFIG_RENDERDOC
|
||||
if (rdoc_api) {
|
||||
if (nv2a_dbg_renderdoc_available()) {
|
||||
|
||||
RENDERDOC_API_1_6_0 *rdoc_api = nv2a_dbg_renderdoc_get_api();
|
||||
|
||||
if (rdoc_api->IsTargetControlConnected()) {
|
||||
if (rdoc_api->IsFrameCapturing()) {
|
||||
rdoc_api->EndFrameCapture(NULL, NULL);
|
||||
|
@ -190,7 +163,7 @@ void gl_debug_frame_terminator(void)
|
|||
error);
|
||||
}
|
||||
}
|
||||
if (renderdoc_capture_frames) {
|
||||
if (renderdoc_capture_frames > 0) {
|
||||
rdoc_api->StartFrameCapture(NULL, NULL);
|
||||
GLenum error = glGetError();
|
||||
if (error != GL_NO_ERROR) {
|
||||
|
@ -203,22 +176,10 @@ void gl_debug_frame_terminator(void)
|
|||
}
|
||||
}
|
||||
#endif
|
||||
if (!has_GL_GREMEDY_frame_terminator) {
|
||||
return;
|
||||
if (has_GL_GREMEDY_frame_terminator) {
|
||||
glFrameTerminatorGREMEDY();
|
||||
CHECK_GL_ERROR();
|
||||
}
|
||||
|
||||
glFrameTerminatorGREMEDY();
|
||||
CHECK_GL_ERROR();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RENDERDOC
|
||||
bool nv2a_dbg_renderdoc_available(void) {
|
||||
return rdoc_api != NULL;
|
||||
}
|
||||
|
||||
void nv2a_dbg_renderdoc_capture_frames(uint32_t num_frames) {
|
||||
renderdoc_capture_frames = num_frames;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // DEBUG_NV2A_GL
|
|
@ -0,0 +1,60 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH OpenGL Renderer
|
||||
*
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2012 espes
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef HW_XBOX_NV2A_PGRAPH_GL_DEBUG_H
|
||||
#define HW_XBOX_NV2A_PGRAPH_GL_DEBUG_H
|
||||
|
||||
// #define DEBUG_NV2A_GL
|
||||
#ifdef DEBUG_NV2A_GL
|
||||
|
||||
#include <stdbool.h>
|
||||
#include "gloffscreen.h"
|
||||
#include "config-host.h"
|
||||
|
||||
void gl_debug_initialize(void);
|
||||
void gl_debug_message(bool cc, const char *fmt, ...);
|
||||
void gl_debug_group_begin(const char *fmt, ...);
|
||||
void gl_debug_group_end(void);
|
||||
void gl_debug_label(GLenum target, GLuint name, const char *fmt, ...);
|
||||
void gl_debug_frame_terminator(void);
|
||||
|
||||
# define NV2A_GL_DPRINTF(cc, format, ...) \
|
||||
gl_debug_message(cc, "nv2a: " format, ## __VA_ARGS__)
|
||||
# define NV2A_GL_DGROUP_BEGIN(format, ...) \
|
||||
gl_debug_group_begin("nv2a: " format, ## __VA_ARGS__)
|
||||
# define NV2A_GL_DGROUP_END() \
|
||||
gl_debug_group_end()
|
||||
# define NV2A_GL_DLABEL(target, name, format, ...) \
|
||||
gl_debug_label(target, name, "nv2a: { " format " }", ## __VA_ARGS__)
|
||||
#define NV2A_GL_DFRAME_TERMINATOR() \
|
||||
gl_debug_frame_terminator()
|
||||
|
||||
#else
|
||||
|
||||
# define NV2A_GL_DPRINTF(cc, format, ...) do { \
|
||||
if (cc) NV2A_DPRINTF(format "\n", ##__VA_ARGS__ ); \
|
||||
} while (0)
|
||||
# define NV2A_GL_DGROUP_BEGIN(format, ...) do { } while (0)
|
||||
# define NV2A_GL_DGROUP_END() do { } while (0)
|
||||
# define NV2A_GL_DLABEL(target, name, format, ...) do { } while (0)
|
||||
# define NV2A_GL_DFRAME_TERMINATOR() do { } while (0)
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -0,0 +1,407 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH OpenGL Renderer
|
||||
*
|
||||
* Copyright (c) 2012 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2018-2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "hw/xbox/nv2a/nv2a_int.h"
|
||||
#include "hw/xbox/nv2a/pgraph/util.h"
|
||||
#include "renderer.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
void pgraph_gl_init_display_renderer(NV2AState *d)
|
||||
{
|
||||
struct PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHGLState *r = pg->gl_renderer_state;
|
||||
|
||||
glGenTextures(1, &r->gl_display_buffer);
|
||||
r->gl_display_buffer_internal_format = 0;
|
||||
r->gl_display_buffer_width = 0;
|
||||
r->gl_display_buffer_height = 0;
|
||||
r->gl_display_buffer_format = 0;
|
||||
r->gl_display_buffer_type = 0;
|
||||
|
||||
const char *vs =
|
||||
"#version 330\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" float x = -1.0 + float((gl_VertexID & 1) << 2);\n"
|
||||
" float y = -1.0 + float((gl_VertexID & 2) << 1);\n"
|
||||
" gl_Position = vec4(x, y, 0, 1);\n"
|
||||
"}\n";
|
||||
/* FIXME: improve interlace handling, pvideo */
|
||||
|
||||
const char *fs =
|
||||
"#version 330\n"
|
||||
"uniform sampler2D tex;\n"
|
||||
"uniform bool pvideo_enable;\n"
|
||||
"uniform sampler2D pvideo_tex;\n"
|
||||
"uniform vec2 pvideo_in_pos;\n"
|
||||
"uniform vec4 pvideo_pos;\n"
|
||||
"uniform vec3 pvideo_scale;\n"
|
||||
"uniform bool pvideo_color_key_enable;\n"
|
||||
"uniform vec4 pvideo_color_key;\n"
|
||||
"uniform vec2 display_size;\n"
|
||||
"uniform float line_offset;\n"
|
||||
"layout(location = 0) out vec4 out_Color;\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" vec2 texCoord = gl_FragCoord.xy/display_size;\n"
|
||||
" float rel = display_size.y/textureSize(tex, 0).y/line_offset;\n"
|
||||
" texCoord.y = 1 + rel*(texCoord.y - 1);"
|
||||
" out_Color.rgba = texture(tex, texCoord);\n"
|
||||
" if (pvideo_enable) {\n"
|
||||
" vec2 screenCoord = gl_FragCoord.xy - 0.5;\n"
|
||||
" vec4 output_region = vec4(pvideo_pos.xy, pvideo_pos.xy + pvideo_pos.zw);\n"
|
||||
" bvec4 clip = bvec4(lessThan(screenCoord, output_region.xy),\n"
|
||||
" greaterThan(screenCoord, output_region.zw));\n"
|
||||
" if (!any(clip) && (!pvideo_color_key_enable || out_Color.rgba == pvideo_color_key)) {\n"
|
||||
" vec2 out_xy = (screenCoord - pvideo_pos.xy) * pvideo_scale.z;\n"
|
||||
" vec2 in_st = (pvideo_in_pos + out_xy * pvideo_scale.xy) / textureSize(pvideo_tex, 0);\n"
|
||||
" in_st.y *= -1.0;\n"
|
||||
" out_Color.rgba = texture(pvideo_tex, in_st);\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"}\n";
|
||||
|
||||
r->disp_rndr.prog = pgraph_gl_compile_shader(vs, fs);
|
||||
r->disp_rndr.tex_loc = glGetUniformLocation(r->disp_rndr.prog, "tex");
|
||||
r->disp_rndr.pvideo_enable_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_enable");
|
||||
r->disp_rndr.pvideo_tex_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_tex");
|
||||
r->disp_rndr.pvideo_in_pos_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_in_pos");
|
||||
r->disp_rndr.pvideo_pos_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_pos");
|
||||
r->disp_rndr.pvideo_scale_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_scale");
|
||||
r->disp_rndr.pvideo_color_key_enable_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_color_key_enable");
|
||||
r->disp_rndr.pvideo_color_key_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_color_key");
|
||||
r->disp_rndr.display_size_loc = glGetUniformLocation(r->disp_rndr.prog, "display_size");
|
||||
r->disp_rndr.line_offset_loc = glGetUniformLocation(r->disp_rndr.prog, "line_offset");
|
||||
|
||||
glGenVertexArrays(1, &r->disp_rndr.vao);
|
||||
glBindVertexArray(r->disp_rndr.vao);
|
||||
glGenBuffers(1, &r->disp_rndr.vbo);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, r->disp_rndr.vbo);
|
||||
glBufferData(GL_ARRAY_BUFFER, 0, NULL, GL_STATIC_DRAW);
|
||||
glGenFramebuffers(1, &r->disp_rndr.fbo);
|
||||
glGenTextures(1, &r->disp_rndr.pvideo_tex);
|
||||
assert(glGetError() == GL_NO_ERROR);
|
||||
}
|
||||
|
||||
static uint8_t *convert_texture_data__CR8YB8CB8YA8(const uint8_t *data,
|
||||
unsigned int width,
|
||||
unsigned int height,
|
||||
unsigned int pitch)
|
||||
{
|
||||
uint8_t *converted_data = (uint8_t *)g_malloc(width * height * 4);
|
||||
int x, y;
|
||||
for (y = 0; y < height; y++) {
|
||||
const uint8_t *line = &data[y * pitch];
|
||||
const uint32_t row_offset = y * width;
|
||||
for (x = 0; x < width; x++) {
|
||||
uint8_t *pixel = &converted_data[(row_offset + x) * 4];
|
||||
convert_yuy2_to_rgb(line, x, &pixel[0], &pixel[1], &pixel[2]);
|
||||
pixel[3] = 255;
|
||||
}
|
||||
}
|
||||
return converted_data;
|
||||
}
|
||||
|
||||
static float pvideo_calculate_scale(unsigned int din_dout,
|
||||
unsigned int output_size)
|
||||
{
|
||||
float calculated_in = din_dout * (output_size - 1);
|
||||
calculated_in = floorf(calculated_in / (1 << 20) + 0.5f);
|
||||
return (calculated_in + 1.0f) / output_size;
|
||||
}
|
||||
|
||||
static void render_display_pvideo_overlay(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHGLState *r = pg->gl_renderer_state;
|
||||
|
||||
// FIXME: This check against PVIDEO_SIZE_IN does not match HW behavior.
|
||||
// Many games seem to pass this value when initializing or tearing down
|
||||
// PVIDEO. On its own, this generally does not result in the overlay being
|
||||
// hidden, however there are certain games (e.g., Ultimate Beach Soccer)
|
||||
// that use an unknown mechanism to hide the overlay without explicitly
|
||||
// stopping it.
|
||||
// Since the value seems to be set to 0xFFFFFFFF only in cases where the
|
||||
// content is not valid, it is probably good enough to treat it as an
|
||||
// implicit stop.
|
||||
bool enabled = (d->pvideo.regs[NV_PVIDEO_BUFFER] & NV_PVIDEO_BUFFER_0_USE)
|
||||
&& d->pvideo.regs[NV_PVIDEO_SIZE_IN] != 0xFFFFFFFF;
|
||||
glUniform1ui(r->disp_rndr.pvideo_enable_loc, enabled);
|
||||
if (!enabled) {
|
||||
return;
|
||||
}
|
||||
|
||||
hwaddr base = d->pvideo.regs[NV_PVIDEO_BASE];
|
||||
hwaddr limit = d->pvideo.regs[NV_PVIDEO_LIMIT];
|
||||
hwaddr offset = d->pvideo.regs[NV_PVIDEO_OFFSET];
|
||||
|
||||
int in_width =
|
||||
GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_WIDTH);
|
||||
int in_height =
|
||||
GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_HEIGHT);
|
||||
|
||||
int in_s = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN],
|
||||
NV_PVIDEO_POINT_IN_S);
|
||||
int in_t = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN],
|
||||
NV_PVIDEO_POINT_IN_T);
|
||||
|
||||
int in_pitch =
|
||||
GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_PITCH);
|
||||
int in_color =
|
||||
GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_COLOR);
|
||||
|
||||
unsigned int out_width =
|
||||
GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_WIDTH);
|
||||
unsigned int out_height =
|
||||
GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_HEIGHT);
|
||||
|
||||
float scale_x = 1.0f;
|
||||
float scale_y = 1.0f;
|
||||
unsigned int ds_dx = d->pvideo.regs[NV_PVIDEO_DS_DX];
|
||||
unsigned int dt_dy = d->pvideo.regs[NV_PVIDEO_DT_DY];
|
||||
if (ds_dx != NV_PVIDEO_DIN_DOUT_UNITY) {
|
||||
scale_x = pvideo_calculate_scale(ds_dx, out_width);
|
||||
}
|
||||
if (dt_dy != NV_PVIDEO_DIN_DOUT_UNITY) {
|
||||
scale_y = pvideo_calculate_scale(dt_dy, out_height);
|
||||
}
|
||||
|
||||
// On HW, setting NV_PVIDEO_SIZE_IN larger than NV_PVIDEO_SIZE_OUT results
|
||||
// in them being capped to the output size, content is not scaled. This is
|
||||
// particularly important as NV_PVIDEO_SIZE_IN may be set to 0xFFFFFFFF
|
||||
// during initialization or teardown.
|
||||
if (in_width > out_width) {
|
||||
in_width = floorf((float)out_width * scale_x + 0.5f);
|
||||
}
|
||||
if (in_height > out_height) {
|
||||
in_height = floorf((float)out_height * scale_y + 0.5f);
|
||||
}
|
||||
|
||||
/* TODO: support other color formats */
|
||||
assert(in_color == NV_PVIDEO_FORMAT_COLOR_LE_CR8YB8CB8YA8);
|
||||
|
||||
unsigned int out_x =
|
||||
GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_X);
|
||||
unsigned int out_y =
|
||||
GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_Y);
|
||||
|
||||
unsigned int color_key_enabled =
|
||||
GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_DISPLAY);
|
||||
glUniform1ui(r->disp_rndr.pvideo_color_key_enable_loc,
|
||||
color_key_enabled);
|
||||
|
||||
// TODO: Verify that masking off the top byte is correct.
|
||||
// SeaBlade sets a color key of 0x80000000 but the texture passed into the
|
||||
// shader is cleared to 0 alpha.
|
||||
unsigned int color_key = d->pvideo.regs[NV_PVIDEO_COLOR_KEY] & 0xFFFFFF;
|
||||
glUniform4f(r->disp_rndr.pvideo_color_key_loc,
|
||||
GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_RED) / 255.0,
|
||||
GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_GREEN) / 255.0,
|
||||
GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_BLUE) / 255.0,
|
||||
GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_ALPHA) / 255.0);
|
||||
|
||||
assert(offset + in_pitch * in_height <= limit);
|
||||
hwaddr end = base + offset + in_pitch * in_height;
|
||||
assert(end <= memory_region_size(d->vram));
|
||||
|
||||
pgraph_apply_scaling_factor(pg, &out_x, &out_y);
|
||||
pgraph_apply_scaling_factor(pg, &out_width, &out_height);
|
||||
|
||||
// Translate for the GL viewport origin.
|
||||
out_y = MAX(r->gl_display_buffer_height - 1 - (int)(out_y + out_height), 0);
|
||||
|
||||
glActiveTexture(GL_TEXTURE0 + 1);
|
||||
glBindTexture(GL_TEXTURE_2D, r->disp_rndr.pvideo_tex);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||
uint8_t *tex_rgba = convert_texture_data__CR8YB8CB8YA8(
|
||||
d->vram_ptr + base + offset, in_width, in_height, in_pitch);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, in_width, in_height, 0, GL_RGBA,
|
||||
GL_UNSIGNED_BYTE, tex_rgba);
|
||||
g_free(tex_rgba);
|
||||
glUniform1i(r->disp_rndr.pvideo_tex_loc, 1);
|
||||
glUniform2f(r->disp_rndr.pvideo_in_pos_loc, in_s, in_t);
|
||||
glUniform4f(r->disp_rndr.pvideo_pos_loc,
|
||||
out_x, out_y, out_width, out_height);
|
||||
glUniform3f(r->disp_rndr.pvideo_scale_loc,
|
||||
scale_x, scale_y, 1.0f / pg->surface_scale_factor);
|
||||
}
|
||||
|
||||
static void render_display(NV2AState *d, SurfaceBinding *surface)
|
||||
{
|
||||
struct PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHGLState *r = pg->gl_renderer_state;
|
||||
|
||||
unsigned int width, height;
|
||||
uint32_t pline_offset, pstart_addr, pline_compare;
|
||||
d->vga.get_resolution(&d->vga, (int*)&width, (int*)&height);
|
||||
d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
|
||||
int line_offset = surface->pitch / pline_offset;
|
||||
|
||||
/* Adjust viewport height for interlaced mode, used only in 1080i */
|
||||
if (d->vga.cr[NV_PRMCIO_INTERLACE_MODE] != NV_PRMCIO_INTERLACE_MODE_DISABLED) {
|
||||
height *= 2;
|
||||
}
|
||||
|
||||
pgraph_apply_scaling_factor(pg, &width, &height);
|
||||
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, r->disp_rndr.fbo);
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(GL_TEXTURE_2D, r->gl_display_buffer);
|
||||
bool recreate = (
|
||||
surface->fmt.gl_internal_format != r->gl_display_buffer_internal_format
|
||||
|| width != r->gl_display_buffer_width
|
||||
|| height != r->gl_display_buffer_height
|
||||
|| surface->fmt.gl_format != r->gl_display_buffer_format
|
||||
|| surface->fmt.gl_type != r->gl_display_buffer_type
|
||||
);
|
||||
|
||||
if (recreate) {
|
||||
/* XXX: There's apparently a bug in some Intel OpenGL drivers for
|
||||
* Windows that will leak this texture when its orphaned after use in
|
||||
* another context, apparently regardless of which thread it's created
|
||||
* or released on.
|
||||
*
|
||||
* Driver: 27.20.100.8729 9/11/2020 W10 x64
|
||||
* Track: https://community.intel.com/t5/Graphics/OpenGL-Windows-drivers-for-Intel-HD-630-leaking-GPU-memory-when/td-p/1274423
|
||||
*/
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||
r->gl_display_buffer_internal_format = surface->fmt.gl_internal_format;
|
||||
r->gl_display_buffer_width = width;
|
||||
r->gl_display_buffer_height = height;
|
||||
r->gl_display_buffer_format = surface->fmt.gl_format;
|
||||
r->gl_display_buffer_type = surface->fmt.gl_type;
|
||||
glTexImage2D(GL_TEXTURE_2D, 0,
|
||||
r->gl_display_buffer_internal_format,
|
||||
r->gl_display_buffer_width,
|
||||
r->gl_display_buffer_height,
|
||||
0,
|
||||
r->gl_display_buffer_format,
|
||||
r->gl_display_buffer_type,
|
||||
NULL);
|
||||
}
|
||||
|
||||
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
|
||||
GL_TEXTURE_2D, r->gl_display_buffer, 0);
|
||||
GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0};
|
||||
glDrawBuffers(1, DrawBuffers);
|
||||
assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
|
||||
|
||||
glBindTexture(GL_TEXTURE_2D, surface->gl_buffer);
|
||||
glBindVertexArray(r->disp_rndr.vao);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, r->disp_rndr.vbo);
|
||||
glUseProgram(r->disp_rndr.prog);
|
||||
glProgramUniform1i(r->disp_rndr.prog, r->disp_rndr.tex_loc, 0);
|
||||
glUniform2f(r->disp_rndr.display_size_loc, width, height);
|
||||
glUniform1f(r->disp_rndr.line_offset_loc, line_offset);
|
||||
render_display_pvideo_overlay(d);
|
||||
|
||||
glViewport(0, 0, width, height);
|
||||
glColorMask(true, true, true, true);
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
glDisable(GL_BLEND);
|
||||
glDisable(GL_STENCIL_TEST);
|
||||
glDisable(GL_CULL_FACE);
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
|
||||
glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
|
||||
glClear(GL_COLOR_BUFFER_BIT);
|
||||
glDrawArrays(GL_TRIANGLES, 0, 3);
|
||||
|
||||
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
|
||||
GL_TEXTURE_2D, 0, 0);
|
||||
}
|
||||
|
||||
static void gl_fence(void)
|
||||
{
|
||||
GLsync fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
|
||||
int result = glClientWaitSync(fence, GL_SYNC_FLUSH_COMMANDS_BIT,
|
||||
(GLuint64)(5000000000));
|
||||
assert(result == GL_CONDITION_SATISFIED || result == GL_ALREADY_SIGNALED);
|
||||
glDeleteSync(fence);
|
||||
}
|
||||
|
||||
void pgraph_gl_sync(NV2AState *d)
|
||||
{
|
||||
uint32_t pline_offset, pstart_addr, pline_compare;
|
||||
d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
|
||||
SurfaceBinding *surface = pgraph_gl_surface_get_within(d, d->pcrtc.start + pline_offset);
|
||||
if (surface == NULL) {
|
||||
qemu_event_set(&d->pgraph.sync_complete);
|
||||
return;
|
||||
}
|
||||
|
||||
/* FIXME: Sanity check surface dimensions */
|
||||
|
||||
/* Wait for queued commands to complete */
|
||||
pgraph_gl_upload_surface_data(d, surface, !tcg_enabled());
|
||||
gl_fence();
|
||||
assert(glGetError() == GL_NO_ERROR);
|
||||
|
||||
/* Render framebuffer in display context */
|
||||
glo_set_current(g_nv2a_context_display);
|
||||
render_display(d, surface);
|
||||
gl_fence();
|
||||
assert(glGetError() == GL_NO_ERROR);
|
||||
|
||||
/* Switch back to original context */
|
||||
glo_set_current(g_nv2a_context_render);
|
||||
|
||||
qatomic_set(&d->pgraph.sync_pending, false);
|
||||
qemu_event_set(&d->pgraph.sync_complete);
|
||||
}
|
||||
|
||||
int pgraph_gl_get_framebuffer_surface(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHGLState *r = pg->gl_renderer_state;
|
||||
|
||||
qemu_mutex_lock(&d->pfifo.lock);
|
||||
// FIXME: Possible race condition with pgraph, consider lock
|
||||
uint32_t pline_offset, pstart_addr, pline_compare;
|
||||
d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
|
||||
SurfaceBinding *surface = pgraph_gl_surface_get_within(d, d->pcrtc.start + pline_offset);
|
||||
if (surface == NULL || !surface->color) {
|
||||
qemu_mutex_unlock(&d->pfifo.lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
assert(surface->color);
|
||||
assert(surface->fmt.gl_attachment == GL_COLOR_ATTACHMENT0);
|
||||
assert(surface->fmt.gl_format == GL_RGBA
|
||||
|| surface->fmt.gl_format == GL_RGB
|
||||
|| surface->fmt.gl_format == GL_BGR
|
||||
|| surface->fmt.gl_format == GL_BGRA
|
||||
);
|
||||
|
||||
surface->frame_time = pg->frame_time;
|
||||
qemu_event_reset(&d->pgraph.sync_complete);
|
||||
qatomic_set(&pg->sync_pending, true);
|
||||
pfifo_kick(d);
|
||||
qemu_mutex_unlock(&d->pfifo.lock);
|
||||
qemu_event_wait(&d->pgraph.sync_complete);
|
||||
|
||||
return r->gl_display_buffer;
|
||||
}
|
|
@ -0,0 +1,528 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH OpenGL Renderer
|
||||
*
|
||||
* Copyright (c) 2012 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2018-2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "qemu/fast-hash.h"
|
||||
#include "hw/xbox/nv2a/nv2a_int.h"
|
||||
#include "debug.h"
|
||||
#include "renderer.h"
|
||||
|
||||
void pgraph_gl_clear_surface(NV2AState *d, uint32_t parameter)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHGLState *r = pg->gl_renderer_state;
|
||||
|
||||
NV2A_DPRINTF("---------PRE CLEAR ------\n");
|
||||
pg->clearing = true;
|
||||
|
||||
GLbitfield gl_mask = 0;
|
||||
|
||||
bool write_color = (parameter & NV097_CLEAR_SURFACE_COLOR);
|
||||
bool write_zeta =
|
||||
(parameter & (NV097_CLEAR_SURFACE_Z | NV097_CLEAR_SURFACE_STENCIL));
|
||||
|
||||
if (write_zeta) {
|
||||
GLint gl_clear_stencil;
|
||||
GLfloat gl_clear_depth;
|
||||
pgraph_get_clear_depth_stencil_value(pg, &gl_clear_depth,
|
||||
&gl_clear_stencil);
|
||||
|
||||
if (parameter & NV097_CLEAR_SURFACE_Z) {
|
||||
gl_mask |= GL_DEPTH_BUFFER_BIT;
|
||||
glDepthMask(GL_TRUE);
|
||||
glClearDepth(gl_clear_depth);
|
||||
}
|
||||
if (parameter & NV097_CLEAR_SURFACE_STENCIL) {
|
||||
gl_mask |= GL_STENCIL_BUFFER_BIT;
|
||||
glStencilMask(0xff);
|
||||
glClearStencil(gl_clear_stencil);
|
||||
}
|
||||
}
|
||||
if (write_color) {
|
||||
gl_mask |= GL_COLOR_BUFFER_BIT;
|
||||
glColorMask((parameter & NV097_CLEAR_SURFACE_R)
|
||||
? GL_TRUE : GL_FALSE,
|
||||
(parameter & NV097_CLEAR_SURFACE_G)
|
||||
? GL_TRUE : GL_FALSE,
|
||||
(parameter & NV097_CLEAR_SURFACE_B)
|
||||
? GL_TRUE : GL_FALSE,
|
||||
(parameter & NV097_CLEAR_SURFACE_A)
|
||||
? GL_TRUE : GL_FALSE);
|
||||
|
||||
GLfloat rgba[4];
|
||||
pgraph_get_clear_color(pg, rgba);
|
||||
glClearColor(rgba[0], rgba[1], rgba[2], rgba[3]);
|
||||
}
|
||||
|
||||
pgraph_gl_surface_update(d, true, write_color, write_zeta);
|
||||
|
||||
/* FIXME: Needs confirmation */
|
||||
unsigned int xmin =
|
||||
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTX), NV_PGRAPH_CLEARRECTX_XMIN);
|
||||
unsigned int xmax =
|
||||
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTX), NV_PGRAPH_CLEARRECTX_XMAX);
|
||||
unsigned int ymin =
|
||||
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTY), NV_PGRAPH_CLEARRECTY_YMIN);
|
||||
unsigned int ymax =
|
||||
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTY), NV_PGRAPH_CLEARRECTY_YMAX);
|
||||
|
||||
NV2A_DPRINTF(
|
||||
"------------------CLEAR 0x%x %d,%d - %d,%d %x---------------\n",
|
||||
parameter, xmin, ymin, xmax, ymax,
|
||||
d->pgraph.regs_[NV_PGRAPH_COLORCLEARVALUE]);
|
||||
|
||||
unsigned int scissor_width = xmax - xmin + 1,
|
||||
scissor_height = ymax - ymin + 1;
|
||||
pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin);
|
||||
pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height);
|
||||
ymin = pg->surface_binding_dim.height - (ymin + scissor_height);
|
||||
|
||||
NV2A_DPRINTF("Translated clear rect to %d,%d - %d,%d\n", xmin, ymin,
|
||||
xmin + scissor_width - 1, ymin + scissor_height - 1);
|
||||
|
||||
bool full_clear = !xmin && !ymin &&
|
||||
scissor_width >= pg->surface_binding_dim.width &&
|
||||
scissor_height >= pg->surface_binding_dim.height;
|
||||
|
||||
pgraph_apply_scaling_factor(pg, &xmin, &ymin);
|
||||
pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height);
|
||||
|
||||
/* FIXME: Respect window clip?!?! */
|
||||
glEnable(GL_SCISSOR_TEST);
|
||||
glScissor(xmin, ymin, scissor_width, scissor_height);
|
||||
|
||||
/* Dither */
|
||||
/* FIXME: Maybe also disable it here? + GL implementation dependent */
|
||||
if (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & NV_PGRAPH_CONTROL_0_DITHERENABLE) {
|
||||
glEnable(GL_DITHER);
|
||||
} else {
|
||||
glDisable(GL_DITHER);
|
||||
}
|
||||
|
||||
glClear(gl_mask);
|
||||
|
||||
glDisable(GL_SCISSOR_TEST);
|
||||
|
||||
pgraph_gl_set_surface_dirty(pg, write_color, write_zeta);
|
||||
|
||||
if (r->color_binding) {
|
||||
r->color_binding->cleared = full_clear && write_color;
|
||||
}
|
||||
if (r->zeta_binding) {
|
||||
r->zeta_binding->cleared = full_clear && write_zeta;
|
||||
}
|
||||
|
||||
pg->clearing = false;
|
||||
}
|
||||
|
||||
void pgraph_gl_draw_begin(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHGLState *r = pg->gl_renderer_state;
|
||||
|
||||
NV2A_GL_DGROUP_BEGIN("NV097_SET_BEGIN_END: 0x%x", pg->primitive_mode);
|
||||
|
||||
uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0);
|
||||
bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE;
|
||||
bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE;
|
||||
bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE;
|
||||
bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE;
|
||||
bool color_write = mask_alpha || mask_red || mask_green || mask_blue;
|
||||
bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE;
|
||||
bool stencil_test =
|
||||
pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE;
|
||||
bool is_nop_draw = !(color_write || depth_test || stencil_test);
|
||||
|
||||
pgraph_gl_surface_update(d, true, true, depth_test || stencil_test);
|
||||
|
||||
if (is_nop_draw) {
|
||||
return;
|
||||
}
|
||||
|
||||
assert(r->color_binding || r->zeta_binding);
|
||||
|
||||
pgraph_gl_bind_textures(d);
|
||||
pgraph_gl_bind_shaders(pg);
|
||||
|
||||
glColorMask(mask_red, mask_green, mask_blue, mask_alpha);
|
||||
glDepthMask(!!(control_0 & NV_PGRAPH_CONTROL_0_ZWRITEENABLE));
|
||||
glStencilMask(GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
|
||||
NV_PGRAPH_CONTROL_1_STENCIL_MASK_WRITE));
|
||||
|
||||
if (pgraph_reg_r(pg, NV_PGRAPH_BLEND) & NV_PGRAPH_BLEND_EN) {
|
||||
glEnable(GL_BLEND);
|
||||
uint32_t sfactor = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND),
|
||||
NV_PGRAPH_BLEND_SFACTOR);
|
||||
uint32_t dfactor = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND),
|
||||
NV_PGRAPH_BLEND_DFACTOR);
|
||||
assert(sfactor < ARRAY_SIZE(pgraph_blend_factor_gl_map));
|
||||
assert(dfactor < ARRAY_SIZE(pgraph_blend_factor_gl_map));
|
||||
glBlendFunc(pgraph_blend_factor_gl_map[sfactor],
|
||||
pgraph_blend_factor_gl_map[dfactor]);
|
||||
|
||||
uint32_t equation = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND),
|
||||
NV_PGRAPH_BLEND_EQN);
|
||||
assert(equation < ARRAY_SIZE(pgraph_blend_equation_gl_map));
|
||||
glBlendEquation(pgraph_blend_equation_gl_map[equation]);
|
||||
|
||||
uint32_t blend_color = pgraph_reg_r(pg, NV_PGRAPH_BLENDCOLOR);
|
||||
float gl_blend_color[4];
|
||||
pgraph_argb_pack32_to_rgba_float(blend_color, gl_blend_color);
|
||||
glBlendColor(gl_blend_color[0], gl_blend_color[1], gl_blend_color[2],
|
||||
gl_blend_color[3]);
|
||||
} else {
|
||||
glDisable(GL_BLEND);
|
||||
}
|
||||
|
||||
/* Face culling */
|
||||
if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER)
|
||||
& NV_PGRAPH_SETUPRASTER_CULLENABLE) {
|
||||
uint32_t cull_face = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER),
|
||||
NV_PGRAPH_SETUPRASTER_CULLCTRL);
|
||||
assert(cull_face < ARRAY_SIZE(pgraph_cull_face_gl_map));
|
||||
glCullFace(pgraph_cull_face_gl_map[cull_face]);
|
||||
glEnable(GL_CULL_FACE);
|
||||
} else {
|
||||
glDisable(GL_CULL_FACE);
|
||||
}
|
||||
|
||||
/* Clipping */
|
||||
glEnable(GL_CLIP_DISTANCE0);
|
||||
glEnable(GL_CLIP_DISTANCE1);
|
||||
|
||||
/* Front-face select */
|
||||
glFrontFace(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER)
|
||||
& NV_PGRAPH_SETUPRASTER_FRONTFACE
|
||||
? GL_CCW : GL_CW);
|
||||
|
||||
/* Polygon offset */
|
||||
/* FIXME: GL implementation-specific, maybe do this in VS? */
|
||||
if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
|
||||
NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE) {
|
||||
glEnable(GL_POLYGON_OFFSET_FILL);
|
||||
} else {
|
||||
glDisable(GL_POLYGON_OFFSET_FILL);
|
||||
}
|
||||
if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
|
||||
NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE) {
|
||||
glEnable(GL_POLYGON_OFFSET_LINE);
|
||||
} else {
|
||||
glDisable(GL_POLYGON_OFFSET_LINE);
|
||||
}
|
||||
if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
|
||||
NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE) {
|
||||
glEnable(GL_POLYGON_OFFSET_POINT);
|
||||
} else {
|
||||
glDisable(GL_POLYGON_OFFSET_POINT);
|
||||
}
|
||||
if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
|
||||
(NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE |
|
||||
NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE |
|
||||
NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) {
|
||||
uint32_t zfactor_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR);
|
||||
GLfloat zfactor = *(float*)&zfactor_u32;
|
||||
uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS);
|
||||
GLfloat zbias = *(float*)&zbias_u32;
|
||||
glPolygonOffset(zfactor, zbias);
|
||||
}
|
||||
|
||||
/* Depth testing */
|
||||
if (depth_test) {
|
||||
glEnable(GL_DEPTH_TEST);
|
||||
|
||||
uint32_t depth_func = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0),
|
||||
NV_PGRAPH_CONTROL_0_ZFUNC);
|
||||
assert(depth_func < ARRAY_SIZE(pgraph_depth_func_gl_map));
|
||||
glDepthFunc(pgraph_depth_func_gl_map[depth_func]);
|
||||
} else {
|
||||
glDisable(GL_DEPTH_TEST);
|
||||
}
|
||||
|
||||
if (GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ZCOMPRESSOCCLUDE),
|
||||
NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN) ==
|
||||
NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP) {
|
||||
glEnable(GL_DEPTH_CLAMP);
|
||||
} else {
|
||||
glDisable(GL_DEPTH_CLAMP);
|
||||
}
|
||||
|
||||
if (GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3),
|
||||
NV_PGRAPH_CONTROL_3_SHADEMODE) ==
|
||||
NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT) {
|
||||
glProvokingVertex(GL_FIRST_VERTEX_CONVENTION);
|
||||
}
|
||||
|
||||
if (stencil_test) {
|
||||
glEnable(GL_STENCIL_TEST);
|
||||
|
||||
uint32_t stencil_func = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
|
||||
NV_PGRAPH_CONTROL_1_STENCIL_FUNC);
|
||||
uint32_t stencil_ref = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
|
||||
NV_PGRAPH_CONTROL_1_STENCIL_REF);
|
||||
uint32_t func_mask = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
|
||||
NV_PGRAPH_CONTROL_1_STENCIL_MASK_READ);
|
||||
uint32_t op_fail = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2),
|
||||
NV_PGRAPH_CONTROL_2_STENCIL_OP_FAIL);
|
||||
uint32_t op_zfail = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2),
|
||||
NV_PGRAPH_CONTROL_2_STENCIL_OP_ZFAIL);
|
||||
uint32_t op_zpass = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2),
|
||||
NV_PGRAPH_CONTROL_2_STENCIL_OP_ZPASS);
|
||||
|
||||
assert(stencil_func < ARRAY_SIZE(pgraph_stencil_func_gl_map));
|
||||
assert(op_fail < ARRAY_SIZE(pgraph_stencil_op_gl_map));
|
||||
assert(op_zfail < ARRAY_SIZE(pgraph_stencil_op_gl_map));
|
||||
assert(op_zpass < ARRAY_SIZE(pgraph_stencil_op_gl_map));
|
||||
|
||||
glStencilFunc(
|
||||
pgraph_stencil_func_gl_map[stencil_func],
|
||||
stencil_ref,
|
||||
func_mask);
|
||||
|
||||
glStencilOp(
|
||||
pgraph_stencil_op_gl_map[op_fail],
|
||||
pgraph_stencil_op_gl_map[op_zfail],
|
||||
pgraph_stencil_op_gl_map[op_zpass]);
|
||||
|
||||
} else {
|
||||
glDisable(GL_STENCIL_TEST);
|
||||
}
|
||||
|
||||
/* Dither */
|
||||
/* FIXME: GL implementation dependent */
|
||||
if (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) &
|
||||
NV_PGRAPH_CONTROL_0_DITHERENABLE) {
|
||||
glEnable(GL_DITHER);
|
||||
} else {
|
||||
glDisable(GL_DITHER);
|
||||
}
|
||||
|
||||
glEnable(GL_PROGRAM_POINT_SIZE);
|
||||
|
||||
bool anti_aliasing = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ANTIALIASING), NV_PGRAPH_ANTIALIASING_ENABLE);
|
||||
|
||||
/* Edge Antialiasing */
|
||||
if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
|
||||
NV_PGRAPH_SETUPRASTER_LINESMOOTHENABLE) {
|
||||
glEnable(GL_LINE_SMOOTH);
|
||||
} else {
|
||||
glDisable(GL_LINE_SMOOTH);
|
||||
}
|
||||
if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
|
||||
NV_PGRAPH_SETUPRASTER_POLYSMOOTHENABLE) {
|
||||
glEnable(GL_POLYGON_SMOOTH);
|
||||
} else {
|
||||
glDisable(GL_POLYGON_SMOOTH);
|
||||
}
|
||||
|
||||
unsigned int vp_width = pg->surface_binding_dim.width,
|
||||
vp_height = pg->surface_binding_dim.height;
|
||||
pgraph_apply_scaling_factor(pg, &vp_width, &vp_height);
|
||||
glViewport(0, 0, vp_width, vp_height);
|
||||
|
||||
/* Surface clip */
|
||||
/* FIXME: Consider moving to PSH w/ window clip */
|
||||
unsigned int xmin = pg->surface_shape.clip_x - pg->surface_binding_dim.clip_x,
|
||||
ymin = pg->surface_shape.clip_y - pg->surface_binding_dim.clip_y;
|
||||
unsigned int xmax = xmin + pg->surface_shape.clip_width - 1,
|
||||
ymax = ymin + pg->surface_shape.clip_height - 1;
|
||||
|
||||
unsigned int scissor_width = xmax - xmin + 1,
|
||||
scissor_height = ymax - ymin + 1;
|
||||
pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin);
|
||||
pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height);
|
||||
ymin = pg->surface_binding_dim.height - (ymin + scissor_height);
|
||||
pgraph_apply_scaling_factor(pg, &xmin, &ymin);
|
||||
pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height);
|
||||
|
||||
glEnable(GL_SCISSOR_TEST);
|
||||
glScissor(xmin, ymin, scissor_width, scissor_height);
|
||||
|
||||
/* Visibility testing */
|
||||
if (pg->zpass_pixel_count_enable) {
|
||||
r->gl_zpass_pixel_count_query_count++;
|
||||
r->gl_zpass_pixel_count_queries = (GLuint*)g_realloc(
|
||||
r->gl_zpass_pixel_count_queries,
|
||||
sizeof(GLuint) * r->gl_zpass_pixel_count_query_count);
|
||||
|
||||
GLuint gl_query;
|
||||
glGenQueries(1, &gl_query);
|
||||
r->gl_zpass_pixel_count_queries[
|
||||
r->gl_zpass_pixel_count_query_count - 1] = gl_query;
|
||||
glBeginQuery(GL_SAMPLES_PASSED, gl_query);
|
||||
}
|
||||
}
|
||||
|
||||
void pgraph_gl_draw_end(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHGLState *r = pg->gl_renderer_state;
|
||||
|
||||
uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0);
|
||||
bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE;
|
||||
bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE;
|
||||
bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE;
|
||||
bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE;
|
||||
bool color_write = mask_alpha || mask_red || mask_green || mask_blue;
|
||||
bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE;
|
||||
bool stencil_test =
|
||||
pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE;
|
||||
bool is_nop_draw = !(color_write || depth_test || stencil_test);
|
||||
|
||||
if (is_nop_draw) {
|
||||
// FIXME: Check PGRAPH register 0x880.
|
||||
// HW uses bit 11 in 0x880 to enable or disable a color/zeta limit
|
||||
// check that will raise an exception in the case that a draw should
|
||||
// modify the color and/or zeta buffer but the target(s) are masked
|
||||
// off. This check only seems to trigger during the fragment
|
||||
// processing, it is legal to attempt a draw that is entirely
|
||||
// clipped regardless of 0x880. See xemu#635 for context.
|
||||
return;
|
||||
}
|
||||
|
||||
pgraph_gl_flush_draw(d);
|
||||
|
||||
/* End of visibility testing */
|
||||
if (pg->zpass_pixel_count_enable) {
|
||||
nv2a_profile_inc_counter(NV2A_PROF_QUERY);
|
||||
glEndQuery(GL_SAMPLES_PASSED);
|
||||
}
|
||||
|
||||
pg->draw_time++;
|
||||
if (r->color_binding && pgraph_color_write_enabled(pg)) {
|
||||
r->color_binding->draw_time = pg->draw_time;
|
||||
}
|
||||
if (r->zeta_binding && pgraph_zeta_write_enabled(pg)) {
|
||||
r->zeta_binding->draw_time = pg->draw_time;
|
||||
}
|
||||
|
||||
pgraph_gl_set_surface_dirty(pg, color_write, depth_test || stencil_test);
|
||||
NV2A_GL_DGROUP_END();
|
||||
}
|
||||
|
||||
void pgraph_gl_flush_draw(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHGLState *r = pg->gl_renderer_state;
|
||||
|
||||
if (!(r->color_binding || r->zeta_binding)) {
|
||||
return;
|
||||
}
|
||||
assert(r->shader_binding);
|
||||
|
||||
if (pg->draw_arrays_length) {
|
||||
NV2A_GL_DPRINTF(false, "Draw Arrays");
|
||||
nv2a_profile_inc_counter(NV2A_PROF_DRAW_ARRAYS);
|
||||
assert(pg->inline_elements_length == 0);
|
||||
assert(pg->inline_buffer_length == 0);
|
||||
assert(pg->inline_array_length == 0);
|
||||
|
||||
pgraph_gl_bind_vertex_attributes(d, pg->draw_arrays_min_start,
|
||||
pg->draw_arrays_max_count - 1,
|
||||
false, 0,
|
||||
pg->draw_arrays_max_count - 1);
|
||||
glMultiDrawArrays(r->shader_binding->gl_primitive_mode,
|
||||
pg->draw_arrays_start,
|
||||
pg->draw_arrays_count,
|
||||
pg->draw_arrays_length);
|
||||
} else if (pg->inline_elements_length) {
|
||||
NV2A_GL_DPRINTF(false, "Inline Elements");
|
||||
nv2a_profile_inc_counter(NV2A_PROF_INLINE_ELEMENTS);
|
||||
assert(pg->inline_buffer_length == 0);
|
||||
assert(pg->inline_array_length == 0);
|
||||
|
||||
uint32_t min_element = (uint32_t)-1;
|
||||
uint32_t max_element = 0;
|
||||
for (int i=0; i < pg->inline_elements_length; i++) {
|
||||
max_element = MAX(pg->inline_elements[i], max_element);
|
||||
min_element = MIN(pg->inline_elements[i], min_element);
|
||||
}
|
||||
|
||||
pgraph_gl_bind_vertex_attributes(
|
||||
d, min_element, max_element, false, 0,
|
||||
pg->inline_elements[pg->inline_elements_length - 1]);
|
||||
|
||||
VertexKey k;
|
||||
memset(&k, 0, sizeof(VertexKey));
|
||||
k.count = pg->inline_elements_length;
|
||||
k.gl_type = GL_UNSIGNED_INT;
|
||||
k.gl_normalize = GL_FALSE;
|
||||
k.stride = sizeof(uint32_t);
|
||||
uint64_t h = fast_hash((uint8_t*)pg->inline_elements,
|
||||
pg->inline_elements_length * 4);
|
||||
|
||||
LruNode *node = lru_lookup(&r->element_cache, h, &k);
|
||||
VertexLruNode *found = container_of(node, VertexLruNode, node);
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, found->gl_buffer);
|
||||
if (!found->initialized) {
|
||||
nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_4);
|
||||
glBufferData(GL_ELEMENT_ARRAY_BUFFER,
|
||||
pg->inline_elements_length * 4,
|
||||
pg->inline_elements, GL_STATIC_DRAW);
|
||||
found->initialized = true;
|
||||
} else {
|
||||
nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_4_NOTDIRTY);
|
||||
}
|
||||
glDrawElements(r->shader_binding->gl_primitive_mode,
|
||||
pg->inline_elements_length, GL_UNSIGNED_INT,
|
||||
(void *)0);
|
||||
} else if (pg->inline_buffer_length) {
|
||||
NV2A_GL_DPRINTF(false, "Inline Buffer");
|
||||
nv2a_profile_inc_counter(NV2A_PROF_INLINE_BUFFERS);
|
||||
assert(pg->inline_array_length == 0);
|
||||
|
||||
if (pg->compressed_attrs) {
|
||||
pg->compressed_attrs = 0;
|
||||
pgraph_gl_bind_shaders(pg);
|
||||
}
|
||||
|
||||
for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
|
||||
VertexAttribute *attr = &pg->vertex_attributes[i];
|
||||
if (attr->inline_buffer_populated) {
|
||||
nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_3);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, r->gl_inline_buffer[i]);
|
||||
glBufferData(GL_ARRAY_BUFFER,
|
||||
pg->inline_buffer_length * sizeof(float) * 4,
|
||||
attr->inline_buffer, GL_STREAM_DRAW);
|
||||
glVertexAttribPointer(i, 4, GL_FLOAT, GL_FALSE, 0, 0);
|
||||
glEnableVertexAttribArray(i);
|
||||
attr->inline_buffer_populated = false;
|
||||
memcpy(attr->inline_value,
|
||||
attr->inline_buffer + (pg->inline_buffer_length - 1) * 4,
|
||||
sizeof(attr->inline_value));
|
||||
} else {
|
||||
glDisableVertexAttribArray(i);
|
||||
glVertexAttrib4fv(i, attr->inline_value);
|
||||
}
|
||||
}
|
||||
|
||||
glDrawArrays(r->shader_binding->gl_primitive_mode,
|
||||
0, pg->inline_buffer_length);
|
||||
} else if (pg->inline_array_length) {
|
||||
NV2A_GL_DPRINTF(false, "Inline Array");
|
||||
nv2a_profile_inc_counter(NV2A_PROF_INLINE_ARRAYS);
|
||||
|
||||
unsigned int index_count = pgraph_gl_bind_inline_array(d);
|
||||
glDrawArrays(r->shader_binding->gl_primitive_mode,
|
||||
0, index_count);
|
||||
} else {
|
||||
NV2A_GL_DPRINTF(true, "EMPTY NV097_SET_BEGIN_END");
|
||||
NV2A_UNCONFIRMED("EMPTY NV097_SET_BEGIN_END");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
specific_ss.add([sdl, gloffscreen, files(
|
||||
'blit.c',
|
||||
'debug.c',
|
||||
'display.c',
|
||||
'draw.c',
|
||||
'renderer.c',
|
||||
'reports.c',
|
||||
'shaders.c',
|
||||
'surface.c',
|
||||
'texture.c',
|
||||
'vertex.c',
|
||||
)])
|
|
@ -0,0 +1,201 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH OpenGL Renderer
|
||||
*
|
||||
* Copyright (c) 2012 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2018-2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "hw/xbox/nv2a/nv2a_int.h"
|
||||
#include "hw/xbox/nv2a/pgraph/pgraph.h"
|
||||
#include "debug.h"
|
||||
#include "renderer.h"
|
||||
|
||||
GloContext *g_nv2a_context_render;
|
||||
GloContext *g_nv2a_context_display;
|
||||
|
||||
static void nv2a_gl_context_init(void)
|
||||
{
|
||||
g_nv2a_context_render = glo_context_create();
|
||||
g_nv2a_context_display = glo_context_create();
|
||||
}
|
||||
|
||||
static void pgraph_gl_init_thread(NV2AState *d)
|
||||
{
|
||||
glo_set_current(g_nv2a_context_render);
|
||||
}
|
||||
|
||||
static void pgraph_gl_deinit(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
|
||||
glo_set_current(g_nv2a_context_render);
|
||||
|
||||
pgraph_gl_deinit_surfaces(pg);
|
||||
pgraph_gl_deinit_shader_cache(pg);
|
||||
pgraph_gl_deinit_texture_cache(pg);
|
||||
|
||||
glo_set_current(NULL);
|
||||
glo_context_destroy(g_nv2a_context_render);
|
||||
glo_context_destroy(g_nv2a_context_display);
|
||||
}
|
||||
|
||||
static void pgraph_gl_flip_stall(NV2AState *d)
|
||||
{
|
||||
NV2A_GL_DFRAME_TERMINATOR();
|
||||
glFinish();
|
||||
}
|
||||
|
||||
static void pgraph_gl_flush(NV2AState *d)
|
||||
{
|
||||
pgraph_gl_surface_flush(d);
|
||||
pgraph_gl_mark_textures_possibly_dirty(d, 0, memory_region_size(d->vram));
|
||||
pgraph_gl_update_entire_memory_buffer(d);
|
||||
/* FIXME: Flush more? */
|
||||
|
||||
qatomic_set(&d->pgraph.flush_pending, false);
|
||||
qemu_event_set(&d->pgraph.flush_complete);
|
||||
}
|
||||
|
||||
static void pgraph_gl_process_pending(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHGLState *r = pg->gl_renderer_state;
|
||||
|
||||
if (qatomic_read(&r->downloads_pending) ||
|
||||
qatomic_read(&r->download_dirty_surfaces_pending) ||
|
||||
qatomic_read(&d->pgraph.sync_pending) ||
|
||||
qatomic_read(&d->pgraph.flush_pending) ||
|
||||
qatomic_read(&r->shader_cache_writeback_pending)) {
|
||||
qemu_mutex_unlock(&d->pfifo.lock);
|
||||
qemu_mutex_lock(&d->pgraph.lock);
|
||||
if (qatomic_read(&r->downloads_pending)) {
|
||||
pgraph_gl_process_pending_downloads(d);
|
||||
}
|
||||
if (qatomic_read(&r->download_dirty_surfaces_pending)) {
|
||||
pgraph_gl_download_dirty_surfaces(d);
|
||||
}
|
||||
if (qatomic_read(&d->pgraph.sync_pending)) {
|
||||
pgraph_gl_sync(d);
|
||||
}
|
||||
if (qatomic_read(&d->pgraph.flush_pending)) {
|
||||
pgraph_gl_flush(d);
|
||||
}
|
||||
if (qatomic_read(&r->shader_cache_writeback_pending)) {
|
||||
pgraph_gl_shader_write_cache_reload_list(&d->pgraph);
|
||||
}
|
||||
qemu_mutex_unlock(&d->pgraph.lock);
|
||||
qemu_mutex_lock(&d->pfifo.lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void pgraph_gl_pre_savevm_trigger(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHGLState *r = pg->gl_renderer_state;
|
||||
|
||||
qatomic_set(&r->download_dirty_surfaces_pending, true);
|
||||
qemu_event_reset(&r->dirty_surfaces_download_complete);
|
||||
}
|
||||
|
||||
static void pgraph_gl_pre_savevm_wait(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHGLState *r = pg->gl_renderer_state;
|
||||
|
||||
qemu_event_wait(&r->dirty_surfaces_download_complete);
|
||||
}
|
||||
|
||||
static void pgraph_gl_pre_shutdown_trigger(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHGLState *r = pg->gl_renderer_state;
|
||||
|
||||
qatomic_set(&r->shader_cache_writeback_pending, true);
|
||||
qemu_event_reset(&r->shader_cache_writeback_complete);
|
||||
}
|
||||
|
||||
static void pgraph_gl_pre_shutdown_wait(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHGLState *r = pg->gl_renderer_state;
|
||||
|
||||
qemu_event_wait(&r->shader_cache_writeback_complete);
|
||||
}
|
||||
|
||||
static void pgraph_gl_init(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
|
||||
pg->gl_renderer_state = g_malloc(sizeof(PGRAPHGLState));
|
||||
|
||||
/* fire up opengl */
|
||||
glo_set_current(g_nv2a_context_render);
|
||||
|
||||
#ifdef DEBUG_NV2A_GL
|
||||
gl_debug_initialize();
|
||||
#endif
|
||||
|
||||
/* DXT textures */
|
||||
assert(glo_check_extension("GL_EXT_texture_compression_s3tc"));
|
||||
/* Internal RGB565 texture format */
|
||||
assert(glo_check_extension("GL_ARB_ES2_compatibility"));
|
||||
|
||||
pgraph_gl_init_surfaces(pg);
|
||||
pgraph_gl_init_reports(d);
|
||||
pgraph_gl_init_texture_cache(d);
|
||||
pgraph_gl_init_vertex_cache(d);
|
||||
pgraph_gl_init_shader_cache(pg);
|
||||
|
||||
glo_set_current(g_nv2a_context_display);
|
||||
pgraph_gl_init_display_renderer(d);
|
||||
|
||||
glo_set_current(NULL);
|
||||
}
|
||||
|
||||
static PGRAPHRenderer pgraph_gl_renderer = {
|
||||
.type = CONFIG_DISPLAY_RENDERER_OPENGL,
|
||||
.name = "OpenGL",
|
||||
.ops = {
|
||||
.init = pgraph_gl_init,
|
||||
.early_context_init = nv2a_gl_context_init,
|
||||
.init_thread = pgraph_gl_init_thread,
|
||||
.finalize = pgraph_gl_deinit,
|
||||
.clear_report_value = pgraph_gl_clear_report_value,
|
||||
.clear_surface = pgraph_gl_clear_surface,
|
||||
.draw_begin = pgraph_gl_draw_begin,
|
||||
.draw_end = pgraph_gl_draw_end,
|
||||
.flip_stall = pgraph_gl_flip_stall,
|
||||
.flush_draw = pgraph_gl_flush_draw,
|
||||
.get_report = pgraph_gl_get_report,
|
||||
.image_blit = pgraph_gl_image_blit,
|
||||
.pre_savevm_trigger = pgraph_gl_pre_savevm_trigger,
|
||||
.pre_savevm_wait = pgraph_gl_pre_savevm_wait,
|
||||
.pre_shutdown_trigger = pgraph_gl_pre_shutdown_trigger,
|
||||
.pre_shutdown_wait = pgraph_gl_pre_shutdown_wait,
|
||||
.process_pending = pgraph_gl_process_pending,
|
||||
.process_pending_reports = pgraph_gl_process_pending_reports,
|
||||
.surface_update = pgraph_gl_surface_update,
|
||||
.set_surface_scale_factor = pgraph_gl_set_surface_scale_factor,
|
||||
.get_surface_scale_factor = pgraph_gl_get_surface_scale_factor,
|
||||
.get_framebuffer_surface = pgraph_gl_get_framebuffer_surface,
|
||||
}
|
||||
};
|
||||
|
||||
static void __attribute__((constructor)) register_renderer(void)
|
||||
{
|
||||
pgraph_renderer_register(&pgraph_gl_renderer);
|
||||
}
|
|
@ -0,0 +1,283 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH OpenGL Renderer
|
||||
*
|
||||
* Copyright (c) 2012 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2018-2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef HW_XBOX_NV2A_PGRAPH_GL_RENDERER_H
|
||||
#define HW_XBOX_NV2A_PGRAPH_GL_RENDERER_H
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/thread.h"
|
||||
#include "qemu/queue.h"
|
||||
#include "qemu/lru.h"
|
||||
|
||||
#include "hw/hw.h"
|
||||
|
||||
#include "hw/xbox/nv2a/nv2a_int.h"
|
||||
#include "hw/xbox/nv2a/nv2a_regs.h"
|
||||
#include "hw/xbox/nv2a/pgraph/surface.h"
|
||||
#include "hw/xbox/nv2a/pgraph/texture.h"
|
||||
#include "hw/xbox/nv2a/pgraph/shaders.h"
|
||||
|
||||
#include "gloffscreen.h"
|
||||
#include "constants.h"
|
||||
|
||||
typedef struct SurfaceBinding {
|
||||
QTAILQ_ENTRY(SurfaceBinding) entry;
|
||||
MemAccessCallback *access_cb;
|
||||
|
||||
hwaddr vram_addr;
|
||||
|
||||
SurfaceShape shape;
|
||||
uintptr_t dma_addr;
|
||||
uintptr_t dma_len;
|
||||
bool color;
|
||||
bool swizzle;
|
||||
|
||||
unsigned int width;
|
||||
unsigned int height;
|
||||
unsigned int pitch;
|
||||
size_t size;
|
||||
|
||||
bool cleared;
|
||||
int frame_time;
|
||||
int draw_time;
|
||||
bool draw_dirty;
|
||||
bool download_pending;
|
||||
bool upload_pending;
|
||||
|
||||
GLuint gl_buffer;
|
||||
SurfaceFormatInfo fmt;
|
||||
} SurfaceBinding;
|
||||
|
||||
typedef struct TextureBinding {
|
||||
unsigned int refcnt;
|
||||
int draw_time;
|
||||
uint64_t data_hash;
|
||||
unsigned int scale;
|
||||
unsigned int min_filter;
|
||||
unsigned int mag_filter;
|
||||
unsigned int addru;
|
||||
unsigned int addrv;
|
||||
unsigned int addrp;
|
||||
uint32_t border_color;
|
||||
bool border_color_set;
|
||||
GLenum gl_target;
|
||||
GLuint gl_texture;
|
||||
} TextureBinding;
|
||||
|
||||
typedef struct ShaderBinding {
|
||||
GLuint gl_program;
|
||||
GLenum gl_primitive_mode;
|
||||
|
||||
GLint psh_constant_loc[9][2];
|
||||
GLint alpha_ref_loc;
|
||||
|
||||
GLint bump_mat_loc[NV2A_MAX_TEXTURES];
|
||||
GLint bump_scale_loc[NV2A_MAX_TEXTURES];
|
||||
GLint bump_offset_loc[NV2A_MAX_TEXTURES];
|
||||
GLint tex_scale_loc[NV2A_MAX_TEXTURES];
|
||||
|
||||
GLint surface_size_loc;
|
||||
GLint clip_range_loc;
|
||||
|
||||
GLint vsh_constant_loc[NV2A_VERTEXSHADER_CONSTANTS];
|
||||
uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];
|
||||
|
||||
GLint inv_viewport_loc;
|
||||
GLint ltctxa_loc[NV2A_LTCTXA_COUNT];
|
||||
GLint ltctxb_loc[NV2A_LTCTXB_COUNT];
|
||||
GLint ltc1_loc[NV2A_LTC1_COUNT];
|
||||
|
||||
GLint fog_color_loc;
|
||||
GLint fog_param_loc;
|
||||
GLint light_infinite_half_vector_loc[NV2A_MAX_LIGHTS];
|
||||
GLint light_infinite_direction_loc[NV2A_MAX_LIGHTS];
|
||||
GLint light_local_position_loc[NV2A_MAX_LIGHTS];
|
||||
GLint light_local_attenuation_loc[NV2A_MAX_LIGHTS];
|
||||
|
||||
GLint clip_region_loc[8];
|
||||
|
||||
GLint material_alpha_loc;
|
||||
} ShaderBinding;
|
||||
|
||||
typedef struct ShaderLruNode {
|
||||
LruNode node;
|
||||
bool cached;
|
||||
void *program;
|
||||
size_t program_size;
|
||||
GLenum program_format;
|
||||
ShaderState state;
|
||||
ShaderBinding *binding;
|
||||
QemuThread *save_thread;
|
||||
} ShaderLruNode;
|
||||
|
||||
typedef struct VertexKey {
|
||||
size_t count;
|
||||
size_t stride;
|
||||
hwaddr addr;
|
||||
|
||||
GLboolean gl_normalize;
|
||||
GLuint gl_type;
|
||||
} VertexKey;
|
||||
|
||||
typedef struct VertexLruNode {
|
||||
LruNode node;
|
||||
VertexKey key;
|
||||
bool initialized;
|
||||
|
||||
GLuint gl_buffer;
|
||||
} VertexLruNode;
|
||||
|
||||
typedef struct TextureKey {
|
||||
TextureShape state;
|
||||
hwaddr texture_vram_offset;
|
||||
hwaddr texture_length;
|
||||
hwaddr palette_vram_offset;
|
||||
hwaddr palette_length;
|
||||
} TextureKey;
|
||||
|
||||
typedef struct TextureLruNode {
|
||||
LruNode node;
|
||||
TextureKey key;
|
||||
TextureBinding *binding;
|
||||
bool possibly_dirty;
|
||||
} TextureLruNode;
|
||||
|
||||
typedef struct QueryReport {
|
||||
QSIMPLEQ_ENTRY(QueryReport) entry;
|
||||
bool clear;
|
||||
uint32_t parameter;
|
||||
unsigned int query_count;
|
||||
GLuint *queries;
|
||||
} QueryReport;
|
||||
|
||||
typedef struct PGRAPHGLState {
|
||||
GLuint gl_framebuffer;
|
||||
GLuint gl_display_buffer;
|
||||
GLint gl_display_buffer_internal_format;
|
||||
GLsizei gl_display_buffer_width;
|
||||
GLsizei gl_display_buffer_height;
|
||||
GLenum gl_display_buffer_format;
|
||||
GLenum gl_display_buffer_type;
|
||||
|
||||
Lru element_cache;
|
||||
VertexLruNode *element_cache_entries;
|
||||
GLuint gl_inline_array_buffer;
|
||||
GLuint gl_memory_buffer;
|
||||
GLuint gl_vertex_array;
|
||||
GLuint gl_inline_buffer[NV2A_VERTEXSHADER_ATTRIBUTES];
|
||||
|
||||
QTAILQ_HEAD(, SurfaceBinding) surfaces;
|
||||
SurfaceBinding *color_binding, *zeta_binding;
|
||||
bool downloads_pending;
|
||||
QemuEvent downloads_complete;
|
||||
bool download_dirty_surfaces_pending;
|
||||
QemuEvent dirty_surfaces_download_complete; // common
|
||||
|
||||
TextureBinding *texture_binding[NV2A_MAX_TEXTURES];
|
||||
Lru texture_cache;
|
||||
TextureLruNode *texture_cache_entries;
|
||||
|
||||
Lru shader_cache;
|
||||
ShaderLruNode *shader_cache_entries;
|
||||
ShaderBinding *shader_binding;
|
||||
QemuMutex shader_cache_lock;
|
||||
QemuThread shader_disk_thread;
|
||||
|
||||
unsigned int zpass_pixel_count_result;
|
||||
unsigned int gl_zpass_pixel_count_query_count;
|
||||
GLuint *gl_zpass_pixel_count_queries;
|
||||
QSIMPLEQ_HEAD(, QueryReport) report_queue;
|
||||
|
||||
bool shader_cache_writeback_pending;
|
||||
QemuEvent shader_cache_writeback_complete;
|
||||
|
||||
struct s2t_rndr {
|
||||
GLuint fbo, vao, vbo, prog;
|
||||
GLuint tex_loc, surface_size_loc;
|
||||
} s2t_rndr;
|
||||
|
||||
struct disp_rndr {
|
||||
GLuint fbo, vao, vbo, prog;
|
||||
GLuint display_size_loc;
|
||||
GLuint line_offset_loc;
|
||||
GLuint tex_loc;
|
||||
GLuint pvideo_tex;
|
||||
GLint pvideo_enable_loc;
|
||||
GLint pvideo_tex_loc;
|
||||
GLint pvideo_in_pos_loc;
|
||||
GLint pvideo_pos_loc;
|
||||
GLint pvideo_scale_loc;
|
||||
GLint pvideo_color_key_enable_loc;
|
||||
GLint pvideo_color_key_loc;
|
||||
GLint palette_loc[256];
|
||||
} disp_rndr;
|
||||
} PGRAPHGLState;
|
||||
|
||||
extern GloContext *g_nv2a_context_render;
|
||||
extern GloContext *g_nv2a_context_display;
|
||||
|
||||
unsigned int pgraph_gl_bind_inline_array(NV2AState *d);
|
||||
void pgraph_gl_bind_shaders(PGRAPHState *pg);
|
||||
void pgraph_gl_bind_textures(NV2AState *d);
|
||||
void pgraph_gl_bind_vertex_attributes(NV2AState *d, unsigned int min_element, unsigned int max_element, bool inline_data, unsigned int inline_stride, unsigned int provoking_element);
|
||||
bool pgraph_gl_check_surface_to_texture_compatibility(const SurfaceBinding *surface, const TextureShape *shape);
|
||||
GLuint pgraph_gl_compile_shader(const char *vs_src, const char *fs_src);
|
||||
void pgraph_gl_deinit_shader_cache(PGRAPHState *pg);
|
||||
void pgraph_gl_deinit_surfaces(PGRAPHState *pg);
|
||||
void pgraph_gl_deinit_texture_cache(PGRAPHState *pg);
|
||||
void pgraph_gl_download_dirty_surfaces(NV2AState *d);
|
||||
void pgraph_gl_clear_report_value(NV2AState *d);
|
||||
void pgraph_gl_clear_surface(NV2AState *d, uint32_t parameter);
|
||||
void pgraph_gl_draw_begin(NV2AState *d);
|
||||
void pgraph_gl_draw_end(NV2AState *d);
|
||||
void pgraph_gl_flush_draw(NV2AState *d);
|
||||
void pgraph_gl_get_report(NV2AState *d, uint32_t parameter);
|
||||
void pgraph_gl_image_blit(NV2AState *d);
|
||||
void pgraph_gl_mark_textures_possibly_dirty(NV2AState *d, hwaddr addr, hwaddr size);
|
||||
void pgraph_gl_process_pending_reports(NV2AState *d);
|
||||
void pgraph_gl_surface_flush(NV2AState *d);
|
||||
void pgraph_gl_surface_update(NV2AState *d, bool upload, bool color_write, bool zeta_write);
|
||||
void pgraph_gl_sync(NV2AState *d);
|
||||
void pgraph_gl_update_entire_memory_buffer(NV2AState *d);
|
||||
void pgraph_gl_init_display_renderer(NV2AState *d);
|
||||
void pgraph_gl_init_reports(NV2AState *d);
|
||||
void pgraph_gl_init_shader_cache(PGRAPHState *pg);
|
||||
void pgraph_gl_init_surfaces(PGRAPHState *pg);
|
||||
void pgraph_gl_init_texture_cache(NV2AState *d);
|
||||
void pgraph_gl_init_vertex_cache(NV2AState *d);
|
||||
void pgraph_gl_process_pending_downloads(NV2AState *d);
|
||||
void pgraph_gl_reload_surface_scale_factor(PGRAPHState *pg);
|
||||
void pgraph_gl_render_surface_to_texture(NV2AState *d, SurfaceBinding *surface, TextureBinding *texture, TextureShape *texture_shape, int texture_unit);
|
||||
void pgraph_gl_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta);
|
||||
void pgraph_gl_surface_download_if_dirty(NV2AState *d, SurfaceBinding *surface);
|
||||
SurfaceBinding *pgraph_gl_surface_get(NV2AState *d, hwaddr addr);
|
||||
SurfaceBinding *pgraph_gl_surface_get_within(NV2AState *d, hwaddr addr);
|
||||
void pgraph_gl_surface_invalidate(NV2AState *d, SurfaceBinding *e);
|
||||
void pgraph_gl_unbind_surface(NV2AState *d, bool color);
|
||||
void pgraph_gl_upload_surface_data(NV2AState *d, SurfaceBinding *surface, bool force);
|
||||
void pgraph_gl_shader_cache_to_disk(ShaderLruNode *snode);
|
||||
bool pgraph_gl_shader_load_from_memory(ShaderLruNode *snode);
|
||||
void pgraph_gl_shader_write_cache_reload_list(PGRAPHState *pg);
|
||||
void pgraph_gl_set_surface_scale_factor(NV2AState *d, unsigned int scale);
|
||||
unsigned int pgraph_gl_get_surface_scale_factor(NV2AState *d);
|
||||
int pgraph_gl_get_framebuffer_surface(NV2AState *d);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,111 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH OpenGL Renderer
|
||||
*
|
||||
* Copyright (c) 2012 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2018-2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <hw/xbox/nv2a/nv2a_int.h>
|
||||
#include "renderer.h"
|
||||
|
||||
static void process_pending_report(NV2AState *d, QueryReport *report)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHGLState *r = pg->gl_renderer_state;
|
||||
|
||||
if (report->clear) {
|
||||
r->zpass_pixel_count_result = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
uint8_t type = GET_MASK(report->parameter, NV097_GET_REPORT_TYPE);
|
||||
assert(type == NV097_GET_REPORT_TYPE_ZPASS_PIXEL_CNT);
|
||||
|
||||
/* FIXME: Multisampling affects this (both: OGL and Xbox GPU),
|
||||
* not sure if CLEARs also count
|
||||
*/
|
||||
/* FIXME: What about clipping regions etc? */
|
||||
for (int i = 0; i < report->query_count; i++) {
|
||||
GLuint gl_query_result = 0;
|
||||
glGetQueryObjectuiv(report->queries[i], GL_QUERY_RESULT, &gl_query_result);
|
||||
gl_query_result /= pg->surface_scale_factor * pg->surface_scale_factor;
|
||||
r->zpass_pixel_count_result += gl_query_result;
|
||||
}
|
||||
|
||||
if (report->query_count) {
|
||||
glDeleteQueries(report->query_count, report->queries);
|
||||
g_free(report->queries);
|
||||
}
|
||||
|
||||
pgraph_write_zpass_pixel_cnt_report(d, report->parameter, r->zpass_pixel_count_result);
|
||||
}
|
||||
|
||||
void pgraph_gl_process_pending_reports(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHGLState *r = pg->gl_renderer_state;
|
||||
QueryReport *report, *next;
|
||||
|
||||
QSIMPLEQ_FOREACH_SAFE(report, &r->report_queue, entry, next) {
|
||||
process_pending_report(d, report);
|
||||
QSIMPLEQ_REMOVE_HEAD(&r->report_queue, entry);
|
||||
g_free(report);
|
||||
}
|
||||
}
|
||||
|
||||
void pgraph_gl_clear_report_value(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHGLState *r = pg->gl_renderer_state;
|
||||
|
||||
/* FIXME: Does this have a value in parameter? Also does this (also?) modify
|
||||
* the report memory block?
|
||||
*/
|
||||
if (r->gl_zpass_pixel_count_query_count) {
|
||||
glDeleteQueries(r->gl_zpass_pixel_count_query_count,
|
||||
r->gl_zpass_pixel_count_queries);
|
||||
r->gl_zpass_pixel_count_query_count = 0;
|
||||
}
|
||||
|
||||
QueryReport *report = g_malloc(sizeof(QueryReport));
|
||||
report->clear = true;
|
||||
QSIMPLEQ_INSERT_TAIL(&r->report_queue, report, entry);
|
||||
}
|
||||
|
||||
void pgraph_gl_init_reports(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHGLState *r = pg->gl_renderer_state;
|
||||
|
||||
QSIMPLEQ_INIT(&r->report_queue);
|
||||
}
|
||||
|
||||
void pgraph_gl_get_report(NV2AState *d, uint32_t parameter)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHGLState *r = pg->gl_renderer_state;
|
||||
|
||||
QueryReport *report = g_malloc(sizeof(QueryReport));
|
||||
report->clear = false;
|
||||
report->parameter = parameter;
|
||||
report->query_count = r->gl_zpass_pixel_count_query_count;
|
||||
report->queries = r->gl_zpass_pixel_count_queries;
|
||||
QSIMPLEQ_INSERT_TAIL(&r->report_queue, report, entry);
|
||||
|
||||
r->gl_zpass_pixel_count_query_count = 0;
|
||||
r->gl_zpass_pixel_count_queries = NULL;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,819 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH OpenGL Renderer
|
||||
*
|
||||
* Copyright (c) 2012 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2018-2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "qemu/fast-hash.h"
|
||||
#include "hw/xbox/nv2a/nv2a_int.h"
|
||||
#include "hw/xbox/nv2a/pgraph/swizzle.h"
|
||||
#include "hw/xbox/nv2a/pgraph/s3tc.h"
|
||||
#include "hw/xbox/nv2a/pgraph/texture.h"
|
||||
#include "debug.h"
|
||||
#include "renderer.h"
|
||||
|
||||
static TextureBinding* generate_texture(const TextureShape s, const uint8_t *texture_data, const uint8_t *palette_data);
|
||||
static void texture_binding_destroy(gpointer data);
|
||||
|
||||
struct pgraph_texture_possibly_dirty_struct {
|
||||
hwaddr addr, end;
|
||||
};
|
||||
|
||||
static void mark_textures_possibly_dirty_visitor(Lru *lru, LruNode *node, void *opaque)
|
||||
{
|
||||
struct pgraph_texture_possibly_dirty_struct *test =
|
||||
(struct pgraph_texture_possibly_dirty_struct *)opaque;
|
||||
|
||||
struct TextureLruNode *tnode = container_of(node, TextureLruNode, node);
|
||||
if (tnode->binding == NULL || tnode->possibly_dirty) {
|
||||
return;
|
||||
}
|
||||
|
||||
uintptr_t k_tex_addr = tnode->key.texture_vram_offset;
|
||||
uintptr_t k_tex_end = k_tex_addr + tnode->key.texture_length - 1;
|
||||
bool overlapping = !(test->addr > k_tex_end || k_tex_addr > test->end);
|
||||
|
||||
if (tnode->key.palette_length > 0) {
|
||||
uintptr_t k_pal_addr = tnode->key.palette_vram_offset;
|
||||
uintptr_t k_pal_end = k_pal_addr + tnode->key.palette_length - 1;
|
||||
overlapping |= !(test->addr > k_pal_end || k_pal_addr > test->end);
|
||||
}
|
||||
|
||||
tnode->possibly_dirty |= overlapping;
|
||||
}
|
||||
|
||||
void pgraph_gl_mark_textures_possibly_dirty(NV2AState *d,
|
||||
hwaddr addr, hwaddr size)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHGLState *r = pg->gl_renderer_state;
|
||||
|
||||
hwaddr end = TARGET_PAGE_ALIGN(addr + size) - 1;
|
||||
addr &= TARGET_PAGE_MASK;
|
||||
assert(end <= memory_region_size(d->vram));
|
||||
|
||||
struct pgraph_texture_possibly_dirty_struct test = {
|
||||
.addr = addr,
|
||||
.end = end,
|
||||
};
|
||||
|
||||
lru_visit_active(&r->texture_cache,
|
||||
mark_textures_possibly_dirty_visitor,
|
||||
&test);
|
||||
}
|
||||
|
||||
static bool check_texture_dirty(NV2AState *d, hwaddr addr, hwaddr size)
|
||||
{
|
||||
hwaddr end = TARGET_PAGE_ALIGN(addr + size);
|
||||
addr &= TARGET_PAGE_MASK;
|
||||
assert(end < memory_region_size(d->vram));
|
||||
return memory_region_test_and_clear_dirty(d->vram, addr, end - addr,
|
||||
DIRTY_MEMORY_NV2A_TEX);
|
||||
}
|
||||
|
||||
// Check if any of the pages spanned by the a texture are dirty.
|
||||
static bool check_texture_possibly_dirty(NV2AState *d,
|
||||
hwaddr texture_vram_offset,
|
||||
unsigned int length,
|
||||
hwaddr palette_vram_offset,
|
||||
unsigned int palette_length)
|
||||
{
|
||||
bool possibly_dirty = false;
|
||||
if (check_texture_dirty(d, texture_vram_offset, length)) {
|
||||
possibly_dirty = true;
|
||||
pgraph_gl_mark_textures_possibly_dirty(d, texture_vram_offset, length);
|
||||
}
|
||||
if (palette_length && check_texture_dirty(d, palette_vram_offset,
|
||||
palette_length)) {
|
||||
possibly_dirty = true;
|
||||
pgraph_gl_mark_textures_possibly_dirty(d, palette_vram_offset,
|
||||
palette_length);
|
||||
}
|
||||
return possibly_dirty;
|
||||
}
|
||||
|
||||
static void apply_texture_parameters(TextureBinding *binding,
|
||||
const BasicColorFormatInfo *f,
|
||||
unsigned int dimensionality,
|
||||
unsigned int filter,
|
||||
unsigned int address,
|
||||
bool is_bordered,
|
||||
uint32_t border_color)
|
||||
{
|
||||
unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN);
|
||||
unsigned int mag_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG);
|
||||
unsigned int addru = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRU);
|
||||
unsigned int addrv = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRV);
|
||||
unsigned int addrp = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRP);
|
||||
|
||||
if (f->linear) {
|
||||
/* somtimes games try to set mipmap min filters on linear textures.
|
||||
* this could indicate a bug... */
|
||||
switch (min_filter) {
|
||||
case NV_PGRAPH_TEXFILTER0_MIN_BOX_NEARESTLOD:
|
||||
case NV_PGRAPH_TEXFILTER0_MIN_BOX_TENT_LOD:
|
||||
min_filter = NV_PGRAPH_TEXFILTER0_MIN_BOX_LOD0;
|
||||
break;
|
||||
case NV_PGRAPH_TEXFILTER0_MIN_TENT_NEARESTLOD:
|
||||
case NV_PGRAPH_TEXFILTER0_MIN_TENT_TENT_LOD:
|
||||
min_filter = NV_PGRAPH_TEXFILTER0_MIN_TENT_LOD0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (min_filter != binding->min_filter) {
|
||||
glTexParameteri(binding->gl_target, GL_TEXTURE_MIN_FILTER,
|
||||
pgraph_texture_min_filter_gl_map[min_filter]);
|
||||
binding->min_filter = min_filter;
|
||||
}
|
||||
if (mag_filter != binding->mag_filter) {
|
||||
glTexParameteri(binding->gl_target, GL_TEXTURE_MAG_FILTER,
|
||||
pgraph_texture_mag_filter_gl_map[mag_filter]);
|
||||
binding->mag_filter = mag_filter;
|
||||
}
|
||||
|
||||
/* Texture wrapping */
|
||||
assert(addru < ARRAY_SIZE(pgraph_texture_addr_gl_map));
|
||||
if (addru != binding->addru) {
|
||||
glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_S,
|
||||
pgraph_texture_addr_gl_map[addru]);
|
||||
binding->addru = addru;
|
||||
}
|
||||
bool needs_border_color = binding->addru == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER;
|
||||
if (dimensionality > 1) {
|
||||
if (addrv != binding->addrv) {
|
||||
assert(addrv < ARRAY_SIZE(pgraph_texture_addr_gl_map));
|
||||
glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_T,
|
||||
pgraph_texture_addr_gl_map[addrv]);
|
||||
binding->addrv = addrv;
|
||||
}
|
||||
needs_border_color = needs_border_color || binding->addrv == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER;
|
||||
}
|
||||
if (dimensionality > 2) {
|
||||
if (addrp != binding->addrp) {
|
||||
assert(addrp < ARRAY_SIZE(pgraph_texture_addr_gl_map));
|
||||
glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_R,
|
||||
pgraph_texture_addr_gl_map[addrp]);
|
||||
binding->addrp = addrp;
|
||||
}
|
||||
needs_border_color = needs_border_color || binding->addrp == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER;
|
||||
}
|
||||
|
||||
if (!is_bordered && needs_border_color) {
|
||||
if (!binding->border_color_set || binding->border_color != border_color) {
|
||||
/* FIXME: Color channels might be wrong order */
|
||||
GLfloat gl_border_color[4];
|
||||
pgraph_argb_pack32_to_rgba_float(border_color, gl_border_color);
|
||||
glTexParameterfv(binding->gl_target, GL_TEXTURE_BORDER_COLOR,
|
||||
gl_border_color);
|
||||
|
||||
binding->border_color_set = true;
|
||||
binding->border_color = border_color;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void pgraph_gl_bind_textures(NV2AState *d)
|
||||
{
|
||||
int i;
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHGLState *r = pg->gl_renderer_state;
|
||||
|
||||
NV2A_GL_DGROUP_BEGIN("%s", __func__);
|
||||
|
||||
for (i=0; i<NV2A_MAX_TEXTURES; i++) {
|
||||
bool enabled = pgraph_is_texture_enabled(pg, i);
|
||||
/* FIXME: What happens if texture is disabled but stage is active? */
|
||||
|
||||
glActiveTexture(GL_TEXTURE0 + i);
|
||||
if (!enabled) {
|
||||
glBindTexture(GL_TEXTURE_CUBE_MAP, 0);
|
||||
glBindTexture(GL_TEXTURE_RECTANGLE, 0);
|
||||
glBindTexture(GL_TEXTURE_1D, 0);
|
||||
glBindTexture(GL_TEXTURE_2D, 0);
|
||||
glBindTexture(GL_TEXTURE_3D, 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
uint32_t filter = pgraph_reg_r(pg, NV_PGRAPH_TEXFILTER0 + i*4);
|
||||
uint32_t address = pgraph_reg_r(pg, NV_PGRAPH_TEXADDRESS0 + i*4);
|
||||
uint32_t border_color = pgraph_reg_r(pg, NV_PGRAPH_BORDERCOLOR0 + i*4);
|
||||
|
||||
/* Check for unsupported features */
|
||||
if (filter & NV_PGRAPH_TEXFILTER0_ASIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_ASIGNED");
|
||||
if (filter & NV_PGRAPH_TEXFILTER0_RSIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_RSIGNED");
|
||||
if (filter & NV_PGRAPH_TEXFILTER0_GSIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_GSIGNED");
|
||||
if (filter & NV_PGRAPH_TEXFILTER0_BSIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_BSIGNED");
|
||||
|
||||
TextureShape state = pgraph_get_texture_shape(pg, i);
|
||||
hwaddr texture_vram_offset, palette_vram_offset;
|
||||
size_t length, palette_length;
|
||||
|
||||
length = pgraph_get_texture_length(pg, &state);
|
||||
texture_vram_offset = pgraph_get_texture_phys_addr(pg, i);
|
||||
palette_vram_offset = pgraph_get_texture_palette_phys_addr_length(pg, i, &palette_length);
|
||||
|
||||
assert((texture_vram_offset + length) < memory_region_size(d->vram));
|
||||
assert((palette_vram_offset + palette_length)
|
||||
< memory_region_size(d->vram));
|
||||
bool is_indexed = (state.color_format ==
|
||||
NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8);
|
||||
bool possibly_dirty = false;
|
||||
bool possibly_dirty_checked = false;
|
||||
|
||||
SurfaceBinding *surface = pgraph_gl_surface_get(d, texture_vram_offset);
|
||||
TextureBinding *tbind = r->texture_binding[i];
|
||||
if (!pg->texture_dirty[i] && tbind) {
|
||||
bool reusable = false;
|
||||
if (surface && tbind->draw_time == surface->draw_time) {
|
||||
reusable = true;
|
||||
} else if (!surface) {
|
||||
possibly_dirty = check_texture_possibly_dirty(
|
||||
d,
|
||||
texture_vram_offset,
|
||||
length,
|
||||
palette_vram_offset,
|
||||
is_indexed ? palette_length : 0);
|
||||
possibly_dirty_checked = true;
|
||||
reusable = !possibly_dirty;
|
||||
}
|
||||
|
||||
if (reusable) {
|
||||
glBindTexture(r->texture_binding[i]->gl_target,
|
||||
r->texture_binding[i]->gl_texture);
|
||||
apply_texture_parameters(r->texture_binding[i],
|
||||
&kelvin_color_format_info_map[state.color_format],
|
||||
state.dimensionality,
|
||||
filter,
|
||||
address,
|
||||
state.border,
|
||||
border_color);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check active surfaces to see if this texture was a render target
|
||||
*/
|
||||
bool surf_to_tex = false;
|
||||
if (surface != NULL) {
|
||||
surf_to_tex = pgraph_gl_check_surface_to_texture_compatibility(
|
||||
surface, &state);
|
||||
|
||||
if (surf_to_tex && surface->upload_pending) {
|
||||
pgraph_gl_upload_surface_data(d, surface, false);
|
||||
}
|
||||
}
|
||||
|
||||
if (!surf_to_tex) {
|
||||
// FIXME: Restructure to support rendering surfaces to cubemap faces
|
||||
|
||||
// Writeback any surfaces which this texture may index
|
||||
hwaddr tex_vram_end = texture_vram_offset + length - 1;
|
||||
QTAILQ_FOREACH(surface, &r->surfaces, entry) {
|
||||
hwaddr surf_vram_end = surface->vram_addr + surface->size - 1;
|
||||
bool overlapping = !(surface->vram_addr >= tex_vram_end
|
||||
|| texture_vram_offset >= surf_vram_end);
|
||||
if (overlapping) {
|
||||
pgraph_gl_surface_download_if_dirty(d, surface);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TextureKey key;
|
||||
memset(&key, 0, sizeof(TextureKey));
|
||||
key.state = state;
|
||||
key.texture_vram_offset = texture_vram_offset;
|
||||
key.texture_length = length;
|
||||
if (is_indexed) {
|
||||
key.palette_vram_offset = palette_vram_offset;
|
||||
key.palette_length = palette_length;
|
||||
}
|
||||
|
||||
// Search for existing texture binding in cache
|
||||
uint64_t tex_binding_hash = fast_hash((uint8_t*)&key, sizeof(key));
|
||||
LruNode *found = lru_lookup(&r->texture_cache,
|
||||
tex_binding_hash, &key);
|
||||
TextureLruNode *key_out = container_of(found, TextureLruNode, node);
|
||||
possibly_dirty |= (key_out->binding == NULL) || key_out->possibly_dirty;
|
||||
|
||||
if (!surf_to_tex && !possibly_dirty_checked) {
|
||||
possibly_dirty |= check_texture_possibly_dirty(
|
||||
d,
|
||||
texture_vram_offset,
|
||||
length,
|
||||
palette_vram_offset,
|
||||
is_indexed ? palette_length : 0);
|
||||
}
|
||||
|
||||
// Calculate hash of texture data, if necessary
|
||||
void *texture_data = (char*)d->vram_ptr + texture_vram_offset;
|
||||
void *palette_data = (char*)d->vram_ptr + palette_vram_offset;
|
||||
|
||||
uint64_t tex_data_hash = 0;
|
||||
if (!surf_to_tex && possibly_dirty) {
|
||||
tex_data_hash = fast_hash(texture_data, length);
|
||||
if (is_indexed) {
|
||||
tex_data_hash ^= fast_hash(palette_data, palette_length);
|
||||
}
|
||||
}
|
||||
|
||||
// Free existing binding, if texture data has changed
|
||||
bool must_destroy = (key_out->binding != NULL)
|
||||
&& possibly_dirty
|
||||
&& (key_out->binding->data_hash != tex_data_hash);
|
||||
if (must_destroy) {
|
||||
texture_binding_destroy(key_out->binding);
|
||||
key_out->binding = NULL;
|
||||
}
|
||||
|
||||
if (key_out->binding == NULL) {
|
||||
// Must create the texture
|
||||
key_out->binding = generate_texture(state, texture_data, palette_data);
|
||||
key_out->binding->data_hash = tex_data_hash;
|
||||
key_out->binding->scale = 1;
|
||||
} else {
|
||||
// Saved an upload! Reuse existing texture in graphics memory.
|
||||
glBindTexture(key_out->binding->gl_target,
|
||||
key_out->binding->gl_texture);
|
||||
}
|
||||
|
||||
key_out->possibly_dirty = false;
|
||||
TextureBinding *binding = key_out->binding;
|
||||
binding->refcnt++;
|
||||
|
||||
if (surf_to_tex && binding->draw_time < surface->draw_time) {
|
||||
|
||||
trace_nv2a_pgraph_surface_render_to_texture(
|
||||
surface->vram_addr, surface->width, surface->height);
|
||||
pgraph_gl_render_surface_to_texture(d, surface, binding, &state, i);
|
||||
binding->draw_time = surface->draw_time;
|
||||
if (binding->gl_target == GL_TEXTURE_RECTANGLE) {
|
||||
binding->scale = pg->surface_scale_factor;
|
||||
} else {
|
||||
binding->scale = 1;
|
||||
}
|
||||
}
|
||||
|
||||
apply_texture_parameters(binding,
|
||||
&kelvin_color_format_info_map[state.color_format],
|
||||
state.dimensionality,
|
||||
filter,
|
||||
address,
|
||||
state.border,
|
||||
border_color);
|
||||
|
||||
if (r->texture_binding[i]) {
|
||||
if (r->texture_binding[i]->gl_target != binding->gl_target) {
|
||||
glBindTexture(r->texture_binding[i]->gl_target, 0);
|
||||
}
|
||||
texture_binding_destroy(r->texture_binding[i]);
|
||||
}
|
||||
r->texture_binding[i] = binding;
|
||||
pg->texture_dirty[i] = false;
|
||||
}
|
||||
NV2A_GL_DGROUP_END();
|
||||
}
|
||||
|
||||
static enum S3TC_DECOMPRESS_FORMAT
|
||||
gl_internal_format_to_s3tc_enum(GLint gl_internal_format)
|
||||
{
|
||||
switch (gl_internal_format) {
|
||||
case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
|
||||
return S3TC_DECOMPRESS_FORMAT_DXT1;
|
||||
case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
|
||||
return S3TC_DECOMPRESS_FORMAT_DXT3;
|
||||
case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
|
||||
return S3TC_DECOMPRESS_FORMAT_DXT5;
|
||||
default:
|
||||
assert(!"Invalid format");
|
||||
}
|
||||
}
|
||||
|
||||
static void upload_gl_texture(GLenum gl_target,
|
||||
const TextureShape s,
|
||||
const uint8_t *texture_data,
|
||||
const uint8_t *palette_data)
|
||||
{
|
||||
ColorFormatInfo f = kelvin_color_format_gl_map[s.color_format];
|
||||
nv2a_profile_inc_counter(NV2A_PROF_TEX_UPLOAD);
|
||||
|
||||
unsigned int adjusted_width = s.width;
|
||||
unsigned int adjusted_height = s.height;
|
||||
unsigned int adjusted_pitch = s.pitch;
|
||||
unsigned int adjusted_depth = s.depth;
|
||||
if (!f.linear && s.border) {
|
||||
adjusted_width = MAX(16, adjusted_width * 2);
|
||||
adjusted_height = MAX(16, adjusted_height * 2);
|
||||
adjusted_pitch = adjusted_width * (s.pitch / s.width);
|
||||
adjusted_depth = MAX(16, s.depth * 2);
|
||||
}
|
||||
|
||||
switch(gl_target) {
|
||||
case GL_TEXTURE_1D:
|
||||
assert(false);
|
||||
break;
|
||||
case GL_TEXTURE_RECTANGLE: {
|
||||
/* Can't handle strides unaligned to pixels */
|
||||
assert(s.pitch % f.bytes_per_pixel == 0);
|
||||
|
||||
uint8_t *converted = pgraph_convert_texture_data(
|
||||
s, texture_data, palette_data, adjusted_width, adjusted_height, 1,
|
||||
adjusted_pitch, 0, NULL);
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH,
|
||||
converted ? 0 : adjusted_pitch / f.bytes_per_pixel);
|
||||
glTexImage2D(gl_target, 0, f.gl_internal_format,
|
||||
adjusted_width, adjusted_height, 0,
|
||||
f.gl_format, f.gl_type,
|
||||
converted ? converted : texture_data);
|
||||
|
||||
if (converted) {
|
||||
g_free(converted);
|
||||
}
|
||||
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
|
||||
break;
|
||||
}
|
||||
case GL_TEXTURE_2D:
|
||||
case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
|
||||
case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
|
||||
case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
|
||||
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
|
||||
case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
|
||||
case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: {
|
||||
|
||||
unsigned int width = adjusted_width, height = adjusted_height;
|
||||
|
||||
int level;
|
||||
for (level = 0; level < s.levels; level++) {
|
||||
width = MAX(width, 1);
|
||||
height = MAX(height, 1);
|
||||
|
||||
if (f.gl_format == 0) { /* compressed */
|
||||
// https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-block-compression#virtual-size-versus-physical-size
|
||||
unsigned int block_size =
|
||||
f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT ?
|
||||
8 : 16;
|
||||
unsigned int physical_width = (width + 3) & ~3,
|
||||
physical_height = (height + 3) & ~3;
|
||||
if (physical_width != width) {
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, physical_width);
|
||||
}
|
||||
uint8_t *converted = s3tc_decompress_2d(
|
||||
gl_internal_format_to_s3tc_enum(f.gl_internal_format),
|
||||
texture_data, physical_width, physical_height);
|
||||
unsigned int tex_width = width;
|
||||
unsigned int tex_height = height;
|
||||
|
||||
if (s.cubemap && adjusted_width != s.width) {
|
||||
// FIXME: Consider preserving the border.
|
||||
// There does not seem to be a way to reference the border
|
||||
// texels in a cubemap, so they are discarded.
|
||||
glPixelStorei(GL_UNPACK_SKIP_PIXELS, 4);
|
||||
glPixelStorei(GL_UNPACK_SKIP_ROWS, 4);
|
||||
tex_width = s.width;
|
||||
tex_height = s.height;
|
||||
if (physical_width == width) {
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width);
|
||||
}
|
||||
}
|
||||
|
||||
glTexImage2D(gl_target, level, GL_RGBA, tex_width, tex_height, 0,
|
||||
GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, converted);
|
||||
g_free(converted);
|
||||
if (physical_width != width) {
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
|
||||
}
|
||||
if (s.cubemap && adjusted_width != s.width) {
|
||||
glPixelStorei(GL_UNPACK_SKIP_PIXELS, 0);
|
||||
glPixelStorei(GL_UNPACK_SKIP_ROWS, 0);
|
||||
if (physical_width == width) {
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
|
||||
}
|
||||
}
|
||||
texture_data +=
|
||||
physical_width / 4 * physical_height / 4 * block_size;
|
||||
} else {
|
||||
unsigned int pitch = width * f.bytes_per_pixel;
|
||||
uint8_t *unswizzled = (uint8_t*)g_malloc(height * pitch);
|
||||
unswizzle_rect(texture_data, width, height,
|
||||
unswizzled, pitch, f.bytes_per_pixel);
|
||||
uint8_t *converted = pgraph_convert_texture_data(
|
||||
s, unswizzled, palette_data, width, height, 1, pitch, 0,
|
||||
NULL);
|
||||
uint8_t *pixel_data = converted ? converted : unswizzled;
|
||||
unsigned int tex_width = width;
|
||||
unsigned int tex_height = height;
|
||||
|
||||
if (s.cubemap && adjusted_width != s.width) {
|
||||
// FIXME: Consider preserving the border.
|
||||
// There does not seem to be a way to reference the border
|
||||
// texels in a cubemap, so they are discarded.
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width);
|
||||
tex_width = s.width;
|
||||
tex_height = s.height;
|
||||
pixel_data += 4 * f.bytes_per_pixel + 4 * pitch;
|
||||
}
|
||||
|
||||
glTexImage2D(gl_target, level, f.gl_internal_format, tex_width,
|
||||
tex_height, 0, f.gl_format, f.gl_type,
|
||||
pixel_data);
|
||||
if (s.cubemap && s.border) {
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
|
||||
}
|
||||
if (converted) {
|
||||
g_free(converted);
|
||||
}
|
||||
g_free(unswizzled);
|
||||
|
||||
texture_data += width * height * f.bytes_per_pixel;
|
||||
}
|
||||
|
||||
width /= 2;
|
||||
height /= 2;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case GL_TEXTURE_3D: {
|
||||
|
||||
unsigned int width = adjusted_width;
|
||||
unsigned int height = adjusted_height;
|
||||
unsigned int depth = adjusted_depth;
|
||||
|
||||
assert(f.linear == false);
|
||||
|
||||
int level;
|
||||
for (level = 0; level < s.levels; level++) {
|
||||
if (f.gl_format == 0) { /* compressed */
|
||||
assert(width % 4 == 0 && height % 4 == 0 &&
|
||||
"Compressed 3D texture virtual size");
|
||||
width = MAX(width, 4);
|
||||
height = MAX(height, 4);
|
||||
depth = MAX(depth, 1);
|
||||
|
||||
unsigned int block_size;
|
||||
if (f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
|
||||
block_size = 8;
|
||||
} else {
|
||||
block_size = 16;
|
||||
}
|
||||
|
||||
size_t texture_size = width/4 * height/4 * depth * block_size;
|
||||
|
||||
uint8_t *converted = s3tc_decompress_3d(
|
||||
gl_internal_format_to_s3tc_enum(f.gl_internal_format),
|
||||
texture_data, width, height, depth);
|
||||
|
||||
glTexImage3D(gl_target, level, GL_RGBA8,
|
||||
width, height, depth, 0,
|
||||
GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV,
|
||||
converted);
|
||||
|
||||
g_free(converted);
|
||||
|
||||
texture_data += texture_size;
|
||||
} else {
|
||||
width = MAX(width, 1);
|
||||
height = MAX(height, 1);
|
||||
depth = MAX(depth, 1);
|
||||
|
||||
unsigned int row_pitch = width * f.bytes_per_pixel;
|
||||
unsigned int slice_pitch = row_pitch * height;
|
||||
uint8_t *unswizzled = (uint8_t*)g_malloc(slice_pitch * depth);
|
||||
unswizzle_box(texture_data, width, height, depth, unswizzled,
|
||||
row_pitch, slice_pitch, f.bytes_per_pixel);
|
||||
|
||||
uint8_t *converted = pgraph_convert_texture_data(
|
||||
s, unswizzled, palette_data, width, height, depth,
|
||||
row_pitch, slice_pitch, NULL);
|
||||
|
||||
glTexImage3D(gl_target, level, f.gl_internal_format,
|
||||
width, height, depth, 0,
|
||||
f.gl_format, f.gl_type,
|
||||
converted ? converted : unswizzled);
|
||||
|
||||
if (converted) {
|
||||
g_free(converted);
|
||||
}
|
||||
g_free(unswizzled);
|
||||
|
||||
texture_data += width * height * depth * f.bytes_per_pixel;
|
||||
}
|
||||
|
||||
width /= 2;
|
||||
height /= 2;
|
||||
depth /= 2;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static TextureBinding* generate_texture(const TextureShape s,
|
||||
const uint8_t *texture_data,
|
||||
const uint8_t *palette_data)
|
||||
{
|
||||
ColorFormatInfo f = kelvin_color_format_gl_map[s.color_format];
|
||||
|
||||
/* Create a new opengl texture */
|
||||
GLuint gl_texture;
|
||||
glGenTextures(1, &gl_texture);
|
||||
|
||||
GLenum gl_target;
|
||||
if (s.cubemap) {
|
||||
assert(f.linear == false);
|
||||
assert(s.dimensionality == 2);
|
||||
gl_target = GL_TEXTURE_CUBE_MAP;
|
||||
} else {
|
||||
if (f.linear) {
|
||||
/* linear textures use unnormalised texcoords.
|
||||
* GL_TEXTURE_RECTANGLE_ARB conveniently also does, but
|
||||
* does not allow repeat and mirror wrap modes.
|
||||
* (or mipmapping, but xbox d3d says 'Non swizzled and non
|
||||
* compressed textures cannot be mip mapped.')
|
||||
* Not sure if that'll be an issue. */
|
||||
|
||||
/* FIXME: GLSL 330 provides us with textureSize()! Use that? */
|
||||
gl_target = GL_TEXTURE_RECTANGLE;
|
||||
assert(s.dimensionality == 2);
|
||||
} else {
|
||||
switch(s.dimensionality) {
|
||||
case 1: gl_target = GL_TEXTURE_1D; break;
|
||||
case 2: gl_target = GL_TEXTURE_2D; break;
|
||||
case 3: gl_target = GL_TEXTURE_3D; break;
|
||||
default:
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
glBindTexture(gl_target, gl_texture);
|
||||
|
||||
NV2A_GL_DLABEL(GL_TEXTURE, gl_texture,
|
||||
"offset: 0x%08lx, format: 0x%02X%s, %d dimensions%s, "
|
||||
"width: %d, height: %d, depth: %d",
|
||||
texture_data - g_nv2a->vram_ptr,
|
||||
s.color_format, f.linear ? "" : " (SZ)",
|
||||
s.dimensionality, s.cubemap ? " (Cubemap)" : "",
|
||||
s.width, s.height, s.depth);
|
||||
|
||||
if (gl_target == GL_TEXTURE_CUBE_MAP) {
|
||||
|
||||
ColorFormatInfo f = kelvin_color_format_gl_map[s.color_format];
|
||||
unsigned int block_size;
|
||||
if (f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
|
||||
block_size = 8;
|
||||
} else {
|
||||
block_size = 16;
|
||||
}
|
||||
|
||||
size_t length = 0;
|
||||
unsigned int w = s.width;
|
||||
unsigned int h = s.height;
|
||||
if (!f.linear && s.border) {
|
||||
w = MAX(16, w * 2);
|
||||
h = MAX(16, h * 2);
|
||||
}
|
||||
|
||||
int level;
|
||||
for (level = 0; level < s.levels; level++) {
|
||||
if (f.gl_format == 0) {
|
||||
length += w/4 * h/4 * block_size;
|
||||
} else {
|
||||
length += w * h * f.bytes_per_pixel;
|
||||
}
|
||||
|
||||
w /= 2;
|
||||
h /= 2;
|
||||
}
|
||||
|
||||
length = (length + NV2A_CUBEMAP_FACE_ALIGNMENT - 1) & ~(NV2A_CUBEMAP_FACE_ALIGNMENT - 1);
|
||||
|
||||
upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_X,
|
||||
s, texture_data + 0 * length, palette_data);
|
||||
upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
|
||||
s, texture_data + 1 * length, palette_data);
|
||||
upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
|
||||
s, texture_data + 2 * length, palette_data);
|
||||
upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y,
|
||||
s, texture_data + 3 * length, palette_data);
|
||||
upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
|
||||
s, texture_data + 4 * length, palette_data);
|
||||
upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z,
|
||||
s, texture_data + 5 * length, palette_data);
|
||||
} else {
|
||||
upload_gl_texture(gl_target, s, texture_data, palette_data);
|
||||
}
|
||||
|
||||
/* Linear textures don't support mipmapping */
|
||||
if (!f.linear) {
|
||||
glTexParameteri(gl_target, GL_TEXTURE_BASE_LEVEL,
|
||||
s.min_mipmap_level);
|
||||
glTexParameteri(gl_target, GL_TEXTURE_MAX_LEVEL,
|
||||
s.levels - 1);
|
||||
}
|
||||
|
||||
if (f.gl_swizzle_mask[0] != 0 || f.gl_swizzle_mask[1] != 0
|
||||
|| f.gl_swizzle_mask[2] != 0 || f.gl_swizzle_mask[3] != 0) {
|
||||
glTexParameteriv(gl_target, GL_TEXTURE_SWIZZLE_RGBA,
|
||||
(const GLint *)f.gl_swizzle_mask);
|
||||
}
|
||||
|
||||
TextureBinding* ret = (TextureBinding *)g_malloc(sizeof(TextureBinding));
|
||||
ret->gl_target = gl_target;
|
||||
ret->gl_texture = gl_texture;
|
||||
ret->refcnt = 1;
|
||||
ret->draw_time = 0;
|
||||
ret->data_hash = 0;
|
||||
ret->min_filter = 0xFFFFFFFF;
|
||||
ret->mag_filter = 0xFFFFFFFF;
|
||||
ret->addru = 0xFFFFFFFF;
|
||||
ret->addrv = 0xFFFFFFFF;
|
||||
ret->addrp = 0xFFFFFFFF;
|
||||
ret->border_color_set = false;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void texture_binding_destroy(gpointer data)
|
||||
{
|
||||
TextureBinding *binding = (TextureBinding *)data;
|
||||
assert(binding->refcnt > 0);
|
||||
binding->refcnt--;
|
||||
if (binding->refcnt == 0) {
|
||||
glDeleteTextures(1, &binding->gl_texture);
|
||||
g_free(binding);
|
||||
}
|
||||
}
|
||||
|
||||
/* functions for texture LRU cache */
|
||||
static void texture_cache_entry_init(Lru *lru, LruNode *node, void *key)
|
||||
{
|
||||
TextureLruNode *tnode = container_of(node, TextureLruNode, node);
|
||||
memcpy(&tnode->key, key, sizeof(TextureKey));
|
||||
|
||||
tnode->binding = NULL;
|
||||
tnode->possibly_dirty = false;
|
||||
}
|
||||
|
||||
static void texture_cache_entry_post_evict(Lru *lru, LruNode *node)
|
||||
{
|
||||
TextureLruNode *tnode = container_of(node, TextureLruNode, node);
|
||||
if (tnode->binding) {
|
||||
texture_binding_destroy(tnode->binding);
|
||||
tnode->binding = NULL;
|
||||
tnode->possibly_dirty = false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool texture_cache_entry_compare(Lru *lru, LruNode *node, void *key)
|
||||
{
|
||||
TextureLruNode *tnode = container_of(node, TextureLruNode, node);
|
||||
return memcmp(&tnode->key, key, sizeof(TextureKey));
|
||||
}
|
||||
|
||||
void pgraph_gl_init_texture_cache(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHGLState *r = pg->gl_renderer_state;
|
||||
|
||||
const size_t texture_cache_size = 512;
|
||||
lru_init(&r->texture_cache);
|
||||
r->texture_cache_entries = malloc(texture_cache_size * sizeof(TextureLruNode));
|
||||
assert(r->texture_cache_entries != NULL);
|
||||
for (int i = 0; i < texture_cache_size; i++) {
|
||||
lru_add_free(&r->texture_cache, &r->texture_cache_entries[i].node);
|
||||
}
|
||||
|
||||
r->texture_cache.init_node = texture_cache_entry_init;
|
||||
r->texture_cache.compare_nodes = texture_cache_entry_compare;
|
||||
r->texture_cache.post_node_evict = texture_cache_entry_post_evict;
|
||||
}
|
||||
|
||||
void pgraph_gl_deinit_texture_cache(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHGLState *r = pg->gl_renderer_state;
|
||||
|
||||
// Clear out texture cache
|
||||
lru_flush(&r->texture_cache);
|
||||
free(r->texture_cache_entries);
|
||||
}
|
|
@ -0,0 +1,283 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH OpenGL Renderer
|
||||
*
|
||||
* Copyright (c) 2012 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2018-2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "hw/xbox/nv2a/nv2a_regs.h"
|
||||
#include <hw/xbox/nv2a/nv2a_int.h>
|
||||
#include "debug.h"
|
||||
#include "renderer.h"
|
||||
|
||||
static void update_memory_buffer(NV2AState *d, hwaddr addr, hwaddr size,
|
||||
bool quick)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHGLState *r = pg->gl_renderer_state;
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, r->gl_memory_buffer);
|
||||
|
||||
hwaddr end = TARGET_PAGE_ALIGN(addr + size);
|
||||
addr &= TARGET_PAGE_MASK;
|
||||
assert(end < memory_region_size(d->vram));
|
||||
|
||||
static hwaddr last_addr, last_end;
|
||||
if (quick && (addr >= last_addr) && (end <= last_end)) {
|
||||
return;
|
||||
}
|
||||
last_addr = addr;
|
||||
last_end = end;
|
||||
|
||||
size = end - addr;
|
||||
if (memory_region_test_and_clear_dirty(d->vram, addr, size,
|
||||
DIRTY_MEMORY_NV2A)) {
|
||||
glBufferSubData(GL_ARRAY_BUFFER, addr, size,
|
||||
d->vram_ptr + addr);
|
||||
nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_1);
|
||||
}
|
||||
}
|
||||
|
||||
void pgraph_gl_update_entire_memory_buffer(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHGLState *r = pg->gl_renderer_state;
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, r->gl_memory_buffer);
|
||||
glBufferSubData(GL_ARRAY_BUFFER, 0, memory_region_size(d->vram), d->vram_ptr);
|
||||
}
|
||||
|
||||
void pgraph_gl_bind_vertex_attributes(NV2AState *d, unsigned int min_element,
|
||||
unsigned int max_element, bool inline_data,
|
||||
unsigned int inline_stride,
|
||||
unsigned int provoking_element)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHGLState *r = pg->gl_renderer_state;
|
||||
|
||||
bool updated_memory_buffer = false;
|
||||
unsigned int num_elements = max_element - min_element + 1;
|
||||
|
||||
if (inline_data) {
|
||||
NV2A_GL_DGROUP_BEGIN("%s (num_elements: %d inline stride: %d)",
|
||||
__func__, num_elements, inline_stride);
|
||||
} else {
|
||||
NV2A_GL_DGROUP_BEGIN("%s (num_elements: %d)", __func__, num_elements);
|
||||
}
|
||||
|
||||
pg->compressed_attrs = 0;
|
||||
|
||||
for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
|
||||
VertexAttribute *attr = &pg->vertex_attributes[i];
|
||||
|
||||
if (!attr->count) {
|
||||
glDisableVertexAttribArray(i);
|
||||
glVertexAttrib4fv(i, attr->inline_value);
|
||||
continue;
|
||||
}
|
||||
|
||||
NV2A_DPRINTF("vertex data array format=%d, count=%d, stride=%d\n",
|
||||
attr->format, attr->count, attr->stride);
|
||||
|
||||
GLint gl_count = attr->count;
|
||||
GLenum gl_type;
|
||||
GLboolean gl_normalize;
|
||||
bool needs_conversion = false;
|
||||
|
||||
switch (attr->format) {
|
||||
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D:
|
||||
gl_type = GL_UNSIGNED_BYTE;
|
||||
gl_normalize = GL_TRUE;
|
||||
// http://www.opengl.org/registry/specs/ARB/vertex_array_bgra.txt
|
||||
gl_count = GL_BGRA;
|
||||
break;
|
||||
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL:
|
||||
gl_type = GL_UNSIGNED_BYTE;
|
||||
gl_normalize = GL_TRUE;
|
||||
break;
|
||||
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1:
|
||||
gl_type = GL_SHORT;
|
||||
gl_normalize = GL_TRUE;
|
||||
break;
|
||||
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F:
|
||||
gl_type = GL_FLOAT;
|
||||
gl_normalize = GL_FALSE;
|
||||
break;
|
||||
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K:
|
||||
gl_type = GL_SHORT;
|
||||
gl_normalize = GL_FALSE;
|
||||
break;
|
||||
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP:
|
||||
/* 3 signed, normalized components packed in 32-bits. (11,11,10) */
|
||||
gl_type = GL_INT;
|
||||
assert(attr->count == 1);
|
||||
needs_conversion = true;
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Unknown vertex type: 0x%x\n", attr->format);
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
|
||||
nv2a_profile_inc_counter(NV2A_PROF_ATTR_BIND);
|
||||
hwaddr attrib_data_addr;
|
||||
size_t stride;
|
||||
|
||||
if (needs_conversion) {
|
||||
pg->compressed_attrs |= (1 << i);
|
||||
}
|
||||
|
||||
hwaddr start = 0;
|
||||
if (inline_data) {
|
||||
glBindBuffer(GL_ARRAY_BUFFER, r->gl_inline_array_buffer);
|
||||
attrib_data_addr = attr->inline_array_offset;
|
||||
stride = inline_stride;
|
||||
} else {
|
||||
hwaddr dma_len;
|
||||
uint8_t *attr_data = (uint8_t *)nv_dma_map(
|
||||
d, attr->dma_select ? pg->dma_vertex_b : pg->dma_vertex_a,
|
||||
&dma_len);
|
||||
assert(attr->offset < dma_len);
|
||||
attrib_data_addr = attr_data + attr->offset - d->vram_ptr;
|
||||
stride = attr->stride;
|
||||
start = attrib_data_addr + min_element * stride;
|
||||
update_memory_buffer(d, start, num_elements * stride,
|
||||
updated_memory_buffer);
|
||||
updated_memory_buffer = true;
|
||||
}
|
||||
|
||||
uint32_t provoking_element_index = provoking_element - min_element;
|
||||
size_t element_size = attr->size * attr->count;
|
||||
assert(element_size <= sizeof(attr->inline_value));
|
||||
const uint8_t *last_entry;
|
||||
|
||||
if (inline_data) {
|
||||
last_entry = (uint8_t*)pg->inline_array + attr->inline_array_offset;
|
||||
} else {
|
||||
last_entry = d->vram_ptr + start;
|
||||
}
|
||||
if (!stride) {
|
||||
// Stride of 0 indicates that only the first element should be
|
||||
// used.
|
||||
pgraph_update_inline_value(attr, last_entry);
|
||||
glDisableVertexAttribArray(i);
|
||||
glVertexAttrib4fv(i, attr->inline_value);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (needs_conversion) {
|
||||
glVertexAttribIPointer(i, gl_count, gl_type, stride,
|
||||
(void *)attrib_data_addr);
|
||||
} else {
|
||||
glVertexAttribPointer(i, gl_count, gl_type, gl_normalize, stride,
|
||||
(void *)attrib_data_addr);
|
||||
}
|
||||
|
||||
glEnableVertexAttribArray(i);
|
||||
last_entry += stride * provoking_element_index;
|
||||
pgraph_update_inline_value(attr, last_entry);
|
||||
}
|
||||
|
||||
NV2A_GL_DGROUP_END();
|
||||
}
|
||||
|
||||
unsigned int pgraph_gl_bind_inline_array(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHGLState *r = pg->gl_renderer_state;
|
||||
|
||||
unsigned int offset = 0;
|
||||
for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
|
||||
VertexAttribute *attr = &pg->vertex_attributes[i];
|
||||
if (attr->count == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* FIXME: Double check */
|
||||
offset = ROUND_UP(offset, attr->size);
|
||||
attr->inline_array_offset = offset;
|
||||
NV2A_DPRINTF("bind inline attribute %d size=%d, count=%d\n",
|
||||
i, attr->size, attr->count);
|
||||
offset += attr->size * attr->count;
|
||||
offset = ROUND_UP(offset, attr->size);
|
||||
}
|
||||
|
||||
unsigned int vertex_size = offset;
|
||||
unsigned int index_count = pg->inline_array_length*4 / vertex_size;
|
||||
|
||||
NV2A_DPRINTF("draw inline array %d, %d\n", vertex_size, index_count);
|
||||
|
||||
nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_2);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, r->gl_inline_array_buffer);
|
||||
glBufferData(GL_ARRAY_BUFFER, NV2A_MAX_BATCH_LENGTH * sizeof(uint32_t),
|
||||
NULL, GL_STREAM_DRAW);
|
||||
glBufferSubData(GL_ARRAY_BUFFER, 0, index_count * vertex_size, pg->inline_array);
|
||||
pgraph_gl_bind_vertex_attributes(d, 0, index_count-1, true, vertex_size,
|
||||
index_count-1);
|
||||
|
||||
return index_count;
|
||||
}
|
||||
|
||||
static void vertex_cache_entry_init(Lru *lru, LruNode *node, void *key)
|
||||
{
|
||||
VertexLruNode *vnode = container_of(node, VertexLruNode, node);
|
||||
memcpy(&vnode->key, key, sizeof(struct VertexKey));
|
||||
vnode->initialized = false;
|
||||
}
|
||||
|
||||
static bool vertex_cache_entry_compare(Lru *lru, LruNode *node, void *key)
|
||||
{
|
||||
VertexLruNode *vnode = container_of(node, VertexLruNode, node);
|
||||
return memcmp(&vnode->key, key, sizeof(VertexKey));
|
||||
}
|
||||
|
||||
void pgraph_gl_init_vertex_cache(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHGLState *r = pg->gl_renderer_state;
|
||||
|
||||
const size_t element_cache_size = 50*1024;
|
||||
lru_init(&r->element_cache);
|
||||
r->element_cache_entries = malloc(element_cache_size * sizeof(VertexLruNode));
|
||||
assert(r->element_cache_entries != NULL);
|
||||
GLuint element_cache_buffers[element_cache_size];
|
||||
glGenBuffers(element_cache_size, element_cache_buffers);
|
||||
for (int i = 0; i < element_cache_size; i++) {
|
||||
r->element_cache_entries[i].gl_buffer = element_cache_buffers[i];
|
||||
lru_add_free(&r->element_cache, &r->element_cache_entries[i].node);
|
||||
}
|
||||
|
||||
r->element_cache.init_node = vertex_cache_entry_init;
|
||||
r->element_cache.compare_nodes = vertex_cache_entry_compare;
|
||||
|
||||
GLint max_vertex_attributes;
|
||||
glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_vertex_attributes);
|
||||
assert(max_vertex_attributes >= NV2A_VERTEXSHADER_ATTRIBUTES);
|
||||
|
||||
glGenBuffers(NV2A_VERTEXSHADER_ATTRIBUTES, r->gl_inline_buffer);
|
||||
glGenBuffers(1, &r->gl_inline_array_buffer);
|
||||
|
||||
glGenBuffers(1, &r->gl_memory_buffer);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, r->gl_memory_buffer);
|
||||
glBufferData(GL_ARRAY_BUFFER, memory_region_size(d->vram),
|
||||
NULL, GL_DYNAMIC_DRAW);
|
||||
|
||||
glGenVertexArrays(1, &r->gl_vertex_array);
|
||||
glBindVertexArray(r->gl_vertex_array);
|
||||
|
||||
assert(glGetError() == GL_NO_ERROR);
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH GLSL Shader Generator
|
||||
*
|
||||
* Copyright (c) 2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include "common.h"
|
||||
|
||||
|
||||
MString *pgraph_get_glsl_vtx_header(MString *out, bool location, bool smooth, bool in, bool prefix, bool array)
|
||||
{
|
||||
const char *flat_s = "flat";
|
||||
const char *noperspective_s = "noperspective";
|
||||
const char *qualifier_s = smooth ? noperspective_s : flat_s;
|
||||
const char *qualifiers[11] = {
|
||||
noperspective_s, flat_s, qualifier_s, qualifier_s,
|
||||
qualifier_s, qualifier_s, noperspective_s, noperspective_s,
|
||||
noperspective_s, noperspective_s, noperspective_s
|
||||
};
|
||||
|
||||
const char *in_out_s = in ? "in" : "out";
|
||||
|
||||
const char *float_s = "float";
|
||||
const char *vec4_s = "vec4";
|
||||
const char *types[11] = { float_s, float_s, vec4_s, vec4_s, vec4_s, vec4_s,
|
||||
float_s, vec4_s, vec4_s, vec4_s, vec4_s };
|
||||
|
||||
const char *prefix_s = prefix ? "v_" : "";
|
||||
const char *names[11] = {
|
||||
"vtx_inv_w", "vtx_inv_w_flat", "vtxD0", "vtxD1", "vtxB0", "vtxB1",
|
||||
"vtxFog", "vtxT0", "vtxT1", "vtxT2", "vtxT3",
|
||||
};
|
||||
const char *suffix_s = array ? "[]" : "";
|
||||
|
||||
for (int i = 0; i < 11; i++) {
|
||||
if (location) {
|
||||
mstring_append_fmt(out, "layout(location = %d) ", i);
|
||||
}
|
||||
mstring_append_fmt(out, "%s %s %s %s%s%s;\n",
|
||||
qualifiers[i], in_out_s, types[i], prefix_s, names[i], suffix_s);
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH GLSL Shader Generator
|
||||
*
|
||||
* Copyright (c) 2015 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef HW_NV2A_SHADERS_COMMON_H
|
||||
#define HW_NV2A_SHADERS_COMMON_H
|
||||
|
||||
#include "qemu/mstring.h"
|
||||
#include <stdbool.h>
|
||||
|
||||
#define GLSL_C(idx) "c[" stringify(idx) "]"
|
||||
#define GLSL_LTCTXA(idx) "ltctxa[" stringify(idx) "]"
|
||||
|
||||
#define GLSL_C_MAT4(idx) \
|
||||
"mat4(" GLSL_C(idx) ", " GLSL_C(idx+1) ", " \
|
||||
GLSL_C(idx+2) ", " GLSL_C(idx+3) ")"
|
||||
|
||||
#define GLSL_DEFINE(a, b) "#define " stringify(a) " " b "\n"
|
||||
|
||||
MString *pgraph_get_glsl_vtx_header(MString *out, bool location, bool smooth, bool in, bool prefix, bool array);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,228 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH GLSL Shader Generator
|
||||
*
|
||||
* Copyright (c) 2015 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2020-2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "hw/xbox/nv2a/pgraph/shaders.h"
|
||||
#include "common.h"
|
||||
#include "geom.h"
|
||||
|
||||
MString *pgraph_gen_geom_glsl(enum ShaderPolygonMode polygon_front_mode,
|
||||
enum ShaderPolygonMode polygon_back_mode,
|
||||
enum ShaderPrimitiveMode primitive_mode,
|
||||
bool smooth_shading,
|
||||
bool vulkan)
|
||||
{
|
||||
/* FIXME: Missing support for 2-sided-poly mode */
|
||||
assert(polygon_front_mode == polygon_back_mode);
|
||||
enum ShaderPolygonMode polygon_mode = polygon_front_mode;
|
||||
|
||||
/* POINT mode shouldn't require any special work */
|
||||
if (polygon_mode == POLY_MODE_POINT) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Handle LINE and FILL mode */
|
||||
const char *layout_in = NULL;
|
||||
const char *layout_out = NULL;
|
||||
const char *body = NULL;
|
||||
switch (primitive_mode) {
|
||||
case PRIM_TYPE_POINTS: return NULL;
|
||||
case PRIM_TYPE_LINES: return NULL;
|
||||
case PRIM_TYPE_LINE_LOOP: return NULL;
|
||||
case PRIM_TYPE_LINE_STRIP: return NULL;
|
||||
case PRIM_TYPE_TRIANGLES:
|
||||
if (polygon_mode == POLY_MODE_FILL) { return NULL; }
|
||||
assert(polygon_mode == POLY_MODE_LINE);
|
||||
layout_in = "layout(triangles) in;\n";
|
||||
layout_out = "layout(line_strip, max_vertices = 4) out;\n";
|
||||
body = " emit_vertex(0, 0);\n"
|
||||
" emit_vertex(1, 0);\n"
|
||||
" emit_vertex(2, 0);\n"
|
||||
" emit_vertex(0, 0);\n"
|
||||
" EndPrimitive();\n";
|
||||
break;
|
||||
case PRIM_TYPE_TRIANGLE_STRIP:
|
||||
if (polygon_mode == POLY_MODE_FILL) { return NULL; }
|
||||
assert(polygon_mode == POLY_MODE_LINE);
|
||||
layout_in = "layout(triangles) in;\n";
|
||||
layout_out = "layout(line_strip, max_vertices = 4) out;\n";
|
||||
/* Imagine a quad made of a tristrip, the comments tell you which
|
||||
* vertex we are using */
|
||||
body = " if ((gl_PrimitiveIDIn & 1) == 0) {\n"
|
||||
" if (gl_PrimitiveIDIn == 0) {\n"
|
||||
" emit_vertex(0, 0);\n" /* bottom right */
|
||||
" }\n"
|
||||
" emit_vertex(1, 0);\n" /* top right */
|
||||
" emit_vertex(2, 0);\n" /* bottom left */
|
||||
" emit_vertex(0, 0);\n" /* bottom right */
|
||||
" } else {\n"
|
||||
" emit_vertex(2, 0);\n" /* bottom left */
|
||||
" emit_vertex(1, 0);\n" /* top left */
|
||||
" emit_vertex(0, 0);\n" /* top right */
|
||||
" }\n"
|
||||
" EndPrimitive();\n";
|
||||
break;
|
||||
case PRIM_TYPE_TRIANGLE_FAN:
|
||||
if (polygon_mode == POLY_MODE_FILL) { return NULL; }
|
||||
assert(polygon_mode == POLY_MODE_LINE);
|
||||
layout_in = "layout(triangles) in;\n";
|
||||
layout_out = "layout(line_strip, max_vertices = 4) out;\n";
|
||||
body = " if (gl_PrimitiveIDIn == 0) {\n"
|
||||
" emit_vertex(0, 0);\n"
|
||||
" }\n"
|
||||
" emit_vertex(1, 0);\n"
|
||||
" emit_vertex(2, 0);\n"
|
||||
" emit_vertex(0, 0);\n"
|
||||
" EndPrimitive();\n";
|
||||
break;
|
||||
case PRIM_TYPE_QUADS:
|
||||
layout_in = "layout(lines_adjacency) in;\n";
|
||||
if (polygon_mode == POLY_MODE_LINE) {
|
||||
layout_out = "layout(line_strip, max_vertices = 5) out;\n";
|
||||
body = " emit_vertex(0, 3);\n"
|
||||
" emit_vertex(1, 3);\n"
|
||||
" emit_vertex(2, 3);\n"
|
||||
" emit_vertex(3, 3);\n"
|
||||
" emit_vertex(0, 3);\n"
|
||||
" EndPrimitive();\n";
|
||||
} else if (polygon_mode == POLY_MODE_FILL) {
|
||||
layout_out = "layout(triangle_strip, max_vertices = 4) out;\n";
|
||||
body = " emit_vertex(3, 3);\n"
|
||||
" emit_vertex(0, 3);\n"
|
||||
" emit_vertex(2, 3);\n"
|
||||
" emit_vertex(1, 3);\n"
|
||||
" EndPrimitive();\n";
|
||||
} else {
|
||||
assert(false);
|
||||
return NULL;
|
||||
}
|
||||
break;
|
||||
case PRIM_TYPE_QUAD_STRIP:
|
||||
layout_in = "layout(lines_adjacency) in;\n";
|
||||
if (polygon_mode == POLY_MODE_LINE) {
|
||||
layout_out = "layout(line_strip, max_vertices = 5) out;\n";
|
||||
body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n"
|
||||
" if (gl_PrimitiveIDIn == 0) {\n"
|
||||
" emit_vertex(0, 3);\n"
|
||||
" }\n"
|
||||
" emit_vertex(1, 3);\n"
|
||||
" emit_vertex(3, 3);\n"
|
||||
" emit_vertex(2, 3);\n"
|
||||
" emit_vertex(0, 3);\n"
|
||||
" EndPrimitive();\n";
|
||||
} else if (polygon_mode == POLY_MODE_FILL) {
|
||||
layout_out = "layout(triangle_strip, max_vertices = 4) out;\n";
|
||||
body = " if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n"
|
||||
" emit_vertex(0, 3);\n"
|
||||
" emit_vertex(1, 3);\n"
|
||||
" emit_vertex(2, 3);\n"
|
||||
" emit_vertex(3, 3);\n"
|
||||
" EndPrimitive();\n";
|
||||
} else {
|
||||
assert(false);
|
||||
return NULL;
|
||||
}
|
||||
break;
|
||||
case PRIM_TYPE_POLYGON:
|
||||
if (polygon_mode == POLY_MODE_LINE) {
|
||||
return NULL;
|
||||
}
|
||||
if (polygon_mode == POLY_MODE_FILL) {
|
||||
if (smooth_shading) {
|
||||
return NULL;
|
||||
}
|
||||
layout_in = "layout(triangles) in;\n";
|
||||
layout_out = "layout(triangle_strip, max_vertices = 3) out;\n";
|
||||
body = " emit_vertex(0, 2);\n"
|
||||
" emit_vertex(1, 2);\n"
|
||||
" emit_vertex(2, 2);\n"
|
||||
" EndPrimitive();\n";
|
||||
} else {
|
||||
assert(false);
|
||||
return NULL;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(false);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* generate a geometry shader to support deprecated primitive types */
|
||||
assert(layout_in);
|
||||
assert(layout_out);
|
||||
assert(body);
|
||||
MString *s = mstring_new();
|
||||
mstring_append_fmt(s, "#version %d\n\n", vulkan ? 450 : 400);
|
||||
mstring_append(s, layout_in);
|
||||
mstring_append(s, layout_out);
|
||||
mstring_append(s, "\n");
|
||||
pgraph_get_glsl_vtx_header(s, vulkan, smooth_shading, true, true, true);
|
||||
pgraph_get_glsl_vtx_header(s, vulkan, smooth_shading, false, false, false);
|
||||
|
||||
if (smooth_shading) {
|
||||
mstring_append(s,
|
||||
"void emit_vertex(int index, int _unused) {\n"
|
||||
" gl_Position = gl_in[index].gl_Position;\n"
|
||||
" gl_PointSize = gl_in[index].gl_PointSize;\n"
|
||||
// " gl_ClipDistance[0] = gl_in[index].gl_ClipDistance[0];\n"
|
||||
// " gl_ClipDistance[1] = gl_in[index].gl_ClipDistance[1];\n"
|
||||
" vtx_inv_w = v_vtx_inv_w[index];\n"
|
||||
" vtx_inv_w_flat = v_vtx_inv_w[index];\n"
|
||||
" vtxD0 = v_vtxD0[index];\n"
|
||||
" vtxD1 = v_vtxD1[index];\n"
|
||||
" vtxB0 = v_vtxB0[index];\n"
|
||||
" vtxB1 = v_vtxB1[index];\n"
|
||||
" vtxFog = v_vtxFog[index];\n"
|
||||
" vtxT0 = v_vtxT0[index];\n"
|
||||
" vtxT1 = v_vtxT1[index];\n"
|
||||
" vtxT2 = v_vtxT2[index];\n"
|
||||
" vtxT3 = v_vtxT3[index];\n"
|
||||
" EmitVertex();\n"
|
||||
"}\n");
|
||||
} else {
|
||||
mstring_append(s,
|
||||
"void emit_vertex(int index, int provoking_index) {\n"
|
||||
" gl_Position = gl_in[index].gl_Position;\n"
|
||||
" gl_PointSize = gl_in[index].gl_PointSize;\n"
|
||||
// " gl_ClipDistance[0] = gl_in[index].gl_ClipDistance[0];\n"
|
||||
// " gl_ClipDistance[1] = gl_in[index].gl_ClipDistance[1];\n"
|
||||
" vtx_inv_w = v_vtx_inv_w[index];\n"
|
||||
" vtx_inv_w_flat = v_vtx_inv_w[provoking_index];\n"
|
||||
" vtxD0 = v_vtxD0[provoking_index];\n"
|
||||
" vtxD1 = v_vtxD1[provoking_index];\n"
|
||||
" vtxB0 = v_vtxB0[provoking_index];\n"
|
||||
" vtxB1 = v_vtxB1[provoking_index];\n"
|
||||
" vtxFog = v_vtxFog[index];\n"
|
||||
" vtxT0 = v_vtxT0[index];\n"
|
||||
" vtxT1 = v_vtxT1[index];\n"
|
||||
" vtxT2 = v_vtxT2[index];\n"
|
||||
" vtxT3 = v_vtxT3[index];\n"
|
||||
" EmitVertex();\n"
|
||||
"}\n");
|
||||
}
|
||||
|
||||
mstring_append(s, "\n"
|
||||
"void main() {\n");
|
||||
mstring_append(s, body);
|
||||
mstring_append(s, "}\n");
|
||||
|
||||
return s;
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH GLSL Shader Generator
|
||||
*
|
||||
* Copyright (c) 2015 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2020-2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_GEOM_H
|
||||
#define HW_XBOX_NV2A_PGRAPH_GLSL_GEOM_H
|
||||
|
||||
#include "qemu/mstring.h"
|
||||
#include "hw/xbox/nv2a/pgraph/shaders.h"
|
||||
|
||||
MString *pgraph_gen_geom_glsl(enum ShaderPolygonMode polygon_front_mode,
|
||||
enum ShaderPolygonMode polygon_back_mode,
|
||||
enum ShaderPrimitiveMode primitive_mode,
|
||||
bool smooth_shading,
|
||||
bool vulkan);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,8 @@
|
|||
specific_ss.add([files(
|
||||
'common.c',
|
||||
'geom.c',
|
||||
'psh.c',
|
||||
'vsh.c',
|
||||
'vsh-ff.c',
|
||||
'vsh-prog.c',
|
||||
)])
|
|
@ -3,7 +3,7 @@
|
|||
*
|
||||
* Copyright (c) 2013 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2020-2021 Matt Borgerson
|
||||
* Copyright (c) 2020-2024 Matt Borgerson
|
||||
*
|
||||
* Based on:
|
||||
* Cxbx, PixelShader.cpp
|
||||
|
@ -34,9 +34,9 @@
|
|||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "qapi/qmp/qstring.h"
|
||||
|
||||
#include "shaders_common.h"
|
||||
#include "common.h"
|
||||
#include "hw/xbox/nv2a/debug.h"
|
||||
#include "hw/xbox/nv2a/pgraph/psh.h"
|
||||
#include "psh.h"
|
||||
|
||||
/*
|
||||
|
@ -575,7 +575,7 @@ static const char* get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *s
|
|||
return NULL;
|
||||
|
||||
case PS_TEXTUREMODES_PROJECT2D:
|
||||
return state->rect_tex[i] ? sampler2DRect : sampler2D;
|
||||
return (state->rect_tex[i] && !state->vulkan) ? sampler2DRect : sampler2D;
|
||||
|
||||
case PS_TEXTUREMODES_BUMPENVMAP:
|
||||
case PS_TEXTUREMODES_BUMPENVMAP_LUM:
|
||||
|
@ -584,12 +584,15 @@ static const char* get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *s
|
|||
fprintf(stderr, "Shadow map support not implemented for mode %d\n", mode);
|
||||
assert(!"Shadow map support not implemented for this mode");
|
||||
}
|
||||
return state->rect_tex[i] ? sampler2DRect : sampler2D;
|
||||
return (state->rect_tex[i] && !state->vulkan) ? sampler2DRect : sampler2D;
|
||||
|
||||
case PS_TEXTUREMODES_PROJECT3D:
|
||||
case PS_TEXTUREMODES_DOT_STR_3D:
|
||||
if (state->tex_x8y24[i] && state->vulkan) {
|
||||
return "usampler2D";
|
||||
}
|
||||
if (state->shadow_map[i]) {
|
||||
return state->rect_tex[i] ? sampler2DRect : sampler2D;
|
||||
return (state->rect_tex[i] && !state->vulkan) ? sampler2DRect : sampler2D;
|
||||
}
|
||||
return sampler3D;
|
||||
|
||||
|
@ -634,12 +637,28 @@ static void psh_append_shadowmap(const struct PixelShader *ps, int i, bool compa
|
|||
return;
|
||||
}
|
||||
|
||||
mstring_append_fmt(vars,
|
||||
"pT%d.xy *= texScale%d;\n"
|
||||
"vec4 t%d_depth = textureProj(texSamp%d, pT%d.xyw);\n",
|
||||
i, i, i, i, i);
|
||||
|
||||
mstring_append_fmt(vars, "pT%d.xy *= texScale%d;\n", i, i);
|
||||
const char *comparison = shadow_comparison_map[ps->state.shadow_depth_func];
|
||||
if (ps->state.rect_tex[i] && ps->state.vulkan) {
|
||||
if (ps->state.tex_x8y24[i]) {
|
||||
mstring_append_fmt(
|
||||
vars,
|
||||
"uvec4 t%d_depth_raw = texture(texSamp%d, pT%d.xy/pT%d.w);\n", i, i, i, i);
|
||||
mstring_append_fmt(
|
||||
vars,
|
||||
"vec4 t%d_depth = vec4(float(t%d_depth_raw.x & 0xFFFFFF), 1.0, 0.0, 0.0);",
|
||||
i, i);
|
||||
} else {
|
||||
mstring_append_fmt(
|
||||
vars,
|
||||
"vec4 t%d_depth = textureLod(texSamp%d, pT%d.xy/pT%d.w, 0);\n", i,
|
||||
i, i, i);
|
||||
}
|
||||
} else {
|
||||
mstring_append_fmt(
|
||||
vars, "vec4 t%d_depth = textureProj(texSamp%d, pT%d.xyw);\n", i, i,
|
||||
i);
|
||||
}
|
||||
|
||||
// Depth.y != 0 indicates 24 bit; depth.z != 0 indicates float.
|
||||
if (compare_z) {
|
||||
|
@ -685,18 +704,69 @@ static void apply_border_adjustment(const struct PixelShader *ps, MString *vars,
|
|||
var_name, var_name, i, ps->state.border_inv_real_size[i][0], ps->state.border_inv_real_size[i][1], ps->state.border_inv_real_size[i][2]);
|
||||
}
|
||||
|
||||
static void apply_convolution_filter(const struct PixelShader *ps, MString *vars, int tex)
|
||||
{
|
||||
// FIXME: Convolution for 2D textures
|
||||
// FIXME: Quincunx
|
||||
assert(ps->state.rect_tex[tex]);
|
||||
|
||||
if (ps->state.vulkan) {
|
||||
mstring_append_fmt(vars,
|
||||
"vec4 t%d = vec4(0.0);\n"
|
||||
"for (int i = 0; i < 9; i++) {\n"
|
||||
" vec2 texCoord = pT%d.xy/pT%d.w + convolution3x3[i];\n"
|
||||
" t%d += textureLod(texSamp%d, texCoord, 0) * gaussian3x3[i];\n"
|
||||
"}\n", tex, tex, tex, tex, tex);
|
||||
} else {
|
||||
mstring_append_fmt(vars,
|
||||
"vec4 t%d = vec4(0.0);\n"
|
||||
"for (int i = 0; i < 9; i++) {\n"
|
||||
" vec3 texCoord = pT%d.xyw + vec3(convolution3x3[i], 0);\n"
|
||||
" t%d += textureProj(texSamp%d, texCoord) * gaussian3x3[i];\n"
|
||||
"}\n", tex, tex, tex, tex, tex);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
static MString* psh_convert(struct PixelShader *ps)
|
||||
{
|
||||
int i;
|
||||
|
||||
const char *u = ps->state.vulkan ? "" : "uniform "; // FIXME: Remove
|
||||
|
||||
MString *preflight = mstring_new();
|
||||
mstring_append(preflight, ps->state.smooth_shading ?
|
||||
STRUCT_VERTEX_DATA_IN_SMOOTH :
|
||||
STRUCT_VERTEX_DATA_IN_FLAT);
|
||||
mstring_append(preflight, "\n");
|
||||
mstring_append(preflight, "out vec4 fragColor;\n");
|
||||
mstring_append(preflight, "\n");
|
||||
mstring_append(preflight, "uniform vec4 fogColor;\n");
|
||||
pgraph_get_glsl_vtx_header(preflight, ps->state.vulkan,
|
||||
ps->state.smooth_shading, true, false, false);
|
||||
|
||||
if (ps->state.vulkan) {
|
||||
mstring_append_fmt(preflight,
|
||||
"layout(location = 0) out vec4 fragColor;\n"
|
||||
"layout(binding = %d, std140) uniform PshUniforms {\n", PSH_UBO_BINDING);
|
||||
} else {
|
||||
mstring_append_fmt(preflight,
|
||||
"layout(location = 0) out vec4 fragColor;\n");
|
||||
}
|
||||
|
||||
mstring_append_fmt(preflight, "%sfloat alphaRef;\n"
|
||||
"%svec4 fogColor;\n"
|
||||
"%sivec4 clipRegion[8];\n",
|
||||
u, u, u);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
mstring_append_fmt(preflight, "%smat2 bumpMat%d;\n"
|
||||
"%sfloat bumpScale%d;\n"
|
||||
"%sfloat bumpOffset%d;\n"
|
||||
"%sfloat texScale%d;\n",
|
||||
u, i, u, i, u, i, u, i);
|
||||
}
|
||||
for (int i = 0; i < 9; i++) {
|
||||
for (int j = 0; j < 2; j++) {
|
||||
mstring_append_fmt(preflight, "%svec4 c%d_%d;\n", u, j, i);
|
||||
}
|
||||
}
|
||||
|
||||
if (ps->state.vulkan) {
|
||||
mstring_append(preflight, "};\n");
|
||||
}
|
||||
|
||||
const char *dotmap_funcs[] = {
|
||||
"dotmap_zero_to_one",
|
||||
|
@ -766,22 +836,12 @@ static MString* psh_convert(struct PixelShader *ps)
|
|||
" vec2(-1.0,-1.0),vec2(0.0,-1.0),vec2(1.0,-1.0),\n"
|
||||
" vec2(-1.0, 0.0),vec2(0.0, 0.0),vec2(1.0, 0.0),\n"
|
||||
" vec2(-1.0, 1.0),vec2(0.0, 1.0),vec2(1.0, 1.0));\n"
|
||||
"vec4 gaussianFilter2DRectProj(sampler2DRect sampler, vec3 texCoord) {\n"
|
||||
" vec4 sum = vec4(0.0);\n"
|
||||
" for (int i = 0; i < 9; i++) {\n"
|
||||
" sum += gaussian3x3[i]*textureProj(sampler,\n"
|
||||
" texCoord + vec3(convolution3x3[i], 0.0));\n"
|
||||
" }\n"
|
||||
" return sum;\n"
|
||||
"}\n"
|
||||
);
|
||||
|
||||
/* Window Clipping */
|
||||
MString *clip = mstring_new();
|
||||
mstring_append(preflight, "uniform ivec4 clipRegion[8];\n");
|
||||
mstring_append_fmt(clip, "/* Window-clip (%s) */\n",
|
||||
ps->state.window_clip_exclusive ?
|
||||
"Exclusive" : "Inclusive");
|
||||
mstring_append_fmt(clip, "/* Window-clip (%slusive) */\n",
|
||||
ps->state.window_clip_exclusive ? "Exc" : "Inc");
|
||||
if (!ps->state.window_clip_exclusive) {
|
||||
mstring_append(clip, "bool clipContained = false;\n");
|
||||
}
|
||||
|
@ -856,23 +916,27 @@ static MString* psh_convert(struct PixelShader *ps)
|
|||
if (ps->state.shadow_map[i]) {
|
||||
psh_append_shadowmap(ps, i, false, vars);
|
||||
} else {
|
||||
const char *lookup = "textureProj";
|
||||
if ((ps->state.conv_tex[i] == CONVOLUTION_FILTER_GAUSSIAN)
|
||||
|| (ps->state.conv_tex[i] == CONVOLUTION_FILTER_QUINCUNX)) {
|
||||
/* FIXME: Quincunx looks better than Linear and costs less than
|
||||
* Gaussian, but Gaussian should be plenty fast so use it for
|
||||
* now.
|
||||
*/
|
||||
if (ps->state.rect_tex[i]) {
|
||||
lookup = "gaussianFilter2DRectProj";
|
||||
} else {
|
||||
NV2A_UNIMPLEMENTED("Convolution for 2D textures");
|
||||
}
|
||||
}
|
||||
apply_border_adjustment(ps, vars, i, "pT%d");
|
||||
mstring_append_fmt(vars, "pT%d.xy = texScale%d * pT%d.xy;\n", i, i, i);
|
||||
mstring_append_fmt(vars, "vec4 t%d = %s(texSamp%d, pT%d.xyw);\n",
|
||||
i, lookup, i, i);
|
||||
if (ps->state.rect_tex[i]) {
|
||||
if ((ps->state.conv_tex[i] ==
|
||||
CONVOLUTION_FILTER_GAUSSIAN) ||
|
||||
(ps->state.conv_tex[i] ==
|
||||
CONVOLUTION_FILTER_QUINCUNX)) {
|
||||
apply_convolution_filter(ps, vars, i);
|
||||
} else {
|
||||
if (ps->state.vulkan) {
|
||||
mstring_append_fmt(vars, "vec4 t%d = textureLod(texSamp%d, pT%d.xy/pT%d.w, 0);\n",
|
||||
i, i, i, i);
|
||||
} else {
|
||||
mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, pT%d.xyw);\n",
|
||||
i, i, i);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, pT%d.xyw);\n",
|
||||
i, i, i);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -880,6 +944,7 @@ static MString* psh_convert(struct PixelShader *ps)
|
|||
if (ps->state.shadow_map[i]) {
|
||||
psh_append_shadowmap(ps, i, true, vars);
|
||||
} else {
|
||||
assert(!ps->state.rect_tex[i]);
|
||||
apply_border_adjustment(ps, vars, i, "pT%d");
|
||||
mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, pT%d.xyzw);\n",
|
||||
i, i, i);
|
||||
|
@ -906,7 +971,6 @@ static MString* psh_convert(struct PixelShader *ps)
|
|||
}
|
||||
case PS_TEXTUREMODES_BUMPENVMAP:
|
||||
assert(i >= 1);
|
||||
mstring_append_fmt(preflight, "uniform mat2 bumpMat%d;\n", i);
|
||||
|
||||
if (ps->state.snorm_tex[ps->input_tex[i]]) {
|
||||
/* Input color channels already signed (FIXME: May not always want signed textures in this case) */
|
||||
|
@ -925,9 +989,6 @@ static MString* psh_convert(struct PixelShader *ps)
|
|||
break;
|
||||
case PS_TEXTUREMODES_BUMPENVMAP_LUM:
|
||||
assert(i >= 1);
|
||||
mstring_append_fmt(preflight, "uniform float bumpScale%d;\n", i);
|
||||
mstring_append_fmt(preflight, "uniform float bumpOffset%d;\n", i);
|
||||
mstring_append_fmt(preflight, "uniform mat2 bumpMat%d;\n", i);
|
||||
|
||||
if (ps->state.snorm_tex[ps->input_tex[i]]) {
|
||||
/* Input color channels already signed (FIXME: May not always want signed textures in this case) */
|
||||
|
@ -1060,8 +1121,10 @@ static MString* psh_convert(struct PixelShader *ps)
|
|||
break;
|
||||
}
|
||||
|
||||
mstring_append_fmt(preflight, "uniform float texScale%d;\n", i);
|
||||
if (sampler_type != NULL) {
|
||||
if (ps->state.vulkan) {
|
||||
mstring_append_fmt(preflight, "layout(binding = %d) ", PSH_TEX_BINDING + i);
|
||||
}
|
||||
mstring_append_fmt(preflight, "uniform %s texSamp%d;\n", sampler_type, i);
|
||||
|
||||
/* As this means a texture fetch does happen, do alphakill */
|
||||
|
@ -1091,7 +1154,6 @@ static MString* psh_convert(struct PixelShader *ps)
|
|||
}
|
||||
|
||||
if (ps->state.alpha_test && ps->state.alpha_func != ALPHA_FUNC_ALWAYS) {
|
||||
mstring_append_fmt(preflight, "uniform float alphaRef;\n");
|
||||
if (ps->state.alpha_func == ALPHA_FUNC_NEVER) {
|
||||
mstring_append(ps->code, "discard;\n");
|
||||
} else {
|
||||
|
@ -1112,10 +1174,6 @@ static MString* psh_convert(struct PixelShader *ps)
|
|||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < ps->num_const_refs; i++) {
|
||||
mstring_append_fmt(preflight, "uniform vec4 %s;\n", ps->const_refs[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < ps->num_var_refs; i++) {
|
||||
mstring_append_fmt(vars, "vec4 %s;\n", ps->var_refs[i]);
|
||||
if (strcmp(ps->var_refs[i], "r0") == 0) {
|
||||
|
@ -1128,7 +1186,7 @@ static MString* psh_convert(struct PixelShader *ps)
|
|||
}
|
||||
|
||||
MString *final = mstring_new();
|
||||
mstring_append(final, "#version 330\n\n");
|
||||
mstring_append_fmt(final, "#version %d\n\n", ps->state.vulkan ? 450 : 400);
|
||||
mstring_append(final, mstring_get_str(preflight));
|
||||
mstring_append(final, "void main() {\n");
|
||||
mstring_append(final, mstring_get_str(clip));
|
||||
|
@ -1175,7 +1233,7 @@ static void parse_combiner_output(uint32_t value, struct OutputInfo *out)
|
|||
out->cd_alphablue = flags & 0x40;
|
||||
}
|
||||
|
||||
MString *psh_translate(const PshState state)
|
||||
MString *pgraph_gen_psh_glsl(const PshState state)
|
||||
{
|
||||
int i;
|
||||
struct PixelShader ps;
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH GLSL Shader Generator
|
||||
*
|
||||
* Copyright (c) 2013 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2020-2024 Matt Borgerson
|
||||
*
|
||||
* Based on:
|
||||
* Cxbx, PixelShader.cpp
|
||||
* Copyright (c) 2004 Aaron Robinson <caustik@caustik.com>
|
||||
* Kingofc <kingofc@freenet.de>
|
||||
* Xeon, XBD3DPixelShader.cpp
|
||||
* Copyright (c) 2003 _SF_
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 or
|
||||
* (at your option) version 3 of the License.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_PSH_H
|
||||
#define HW_XBOX_NV2A_PGRAPH_GLSL_PSH_H
|
||||
|
||||
#include "qemu/mstring.h"
|
||||
#include "hw/xbox/nv2a/pgraph/shaders.h"
|
||||
|
||||
// FIXME: Move to struct
|
||||
#define PSH_UBO_BINDING 1
|
||||
#define PSH_TEX_BINDING 2
|
||||
|
||||
MString *pgraph_gen_psh_glsl(const PshState state);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,497 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH GLSL Shader Generator
|
||||
*
|
||||
* Copyright (c) 2015 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2020-2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "hw/xbox/nv2a/pgraph/shaders.h"
|
||||
#include "common.h"
|
||||
#include "vsh-ff.h"
|
||||
|
||||
static void append_skinning_code(MString* str, bool mix,
|
||||
unsigned int count, const char* type,
|
||||
const char* output, const char* input,
|
||||
const char* matrix, const char* swizzle);
|
||||
|
||||
void pgraph_gen_vsh_ff_glsl(const ShaderState *state, MString *header,
|
||||
MString *body, MString *uniforms)
|
||||
{
|
||||
int i, j;
|
||||
const char *u = state->vulkan ? "" : "uniform "; // FIXME: Remove
|
||||
|
||||
/* generate vertex shader mimicking fixed function */
|
||||
mstring_append(header,
|
||||
"#define position v0\n"
|
||||
"#define weight v1\n"
|
||||
"#define normal v2.xyz\n"
|
||||
"#define diffuse v3\n"
|
||||
"#define specular v4\n"
|
||||
"#define fogCoord v5.x\n"
|
||||
"#define pointSize v6\n"
|
||||
"#define backDiffuse v7\n"
|
||||
"#define backSpecular v8\n"
|
||||
"#define texture0 v9\n"
|
||||
"#define texture1 v10\n"
|
||||
"#define texture2 v11\n"
|
||||
"#define texture3 v12\n"
|
||||
"#define reserved1 v13\n"
|
||||
"#define reserved2 v14\n"
|
||||
"#define reserved3 v15\n"
|
||||
"\n");
|
||||
mstring_append_fmt(uniforms,
|
||||
"%svec4 ltctxa[" stringify(NV2A_LTCTXA_COUNT) "];\n"
|
||||
"%svec4 ltctxb[" stringify(NV2A_LTCTXB_COUNT) "];\n"
|
||||
"%svec4 ltc1[" stringify(NV2A_LTC1_COUNT) "];\n", u, u, u
|
||||
);
|
||||
mstring_append(header,
|
||||
"\n"
|
||||
GLSL_DEFINE(projectionMat, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_PMAT0))
|
||||
GLSL_DEFINE(compositeMat, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_CMAT0))
|
||||
"\n"
|
||||
GLSL_DEFINE(texPlaneS0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 0))
|
||||
GLSL_DEFINE(texPlaneT0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 1))
|
||||
GLSL_DEFINE(texPlaneR0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 2))
|
||||
GLSL_DEFINE(texPlaneQ0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 3))
|
||||
"\n"
|
||||
GLSL_DEFINE(texPlaneS1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 0))
|
||||
GLSL_DEFINE(texPlaneT1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 1))
|
||||
GLSL_DEFINE(texPlaneR1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 2))
|
||||
GLSL_DEFINE(texPlaneQ1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 3))
|
||||
"\n"
|
||||
GLSL_DEFINE(texPlaneS2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 0))
|
||||
GLSL_DEFINE(texPlaneT2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 1))
|
||||
GLSL_DEFINE(texPlaneR2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 2))
|
||||
GLSL_DEFINE(texPlaneQ2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 3))
|
||||
"\n"
|
||||
GLSL_DEFINE(texPlaneS3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 0))
|
||||
GLSL_DEFINE(texPlaneT3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 1))
|
||||
GLSL_DEFINE(texPlaneR3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 2))
|
||||
GLSL_DEFINE(texPlaneQ3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 3))
|
||||
"\n"
|
||||
GLSL_DEFINE(modelViewMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT0))
|
||||
GLSL_DEFINE(modelViewMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT1))
|
||||
GLSL_DEFINE(modelViewMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT2))
|
||||
GLSL_DEFINE(modelViewMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT3))
|
||||
"\n"
|
||||
GLSL_DEFINE(invModelViewMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT0))
|
||||
GLSL_DEFINE(invModelViewMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT1))
|
||||
GLSL_DEFINE(invModelViewMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT2))
|
||||
GLSL_DEFINE(invModelViewMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT3))
|
||||
"\n"
|
||||
GLSL_DEFINE(eyePosition, GLSL_C(NV_IGRAPH_XF_XFCTX_EYEP))
|
||||
"\n"
|
||||
"#define lightAmbientColor(i) "
|
||||
"ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_AMB) " + (i)*6].xyz\n"
|
||||
"#define lightDiffuseColor(i) "
|
||||
"ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_DIF) " + (i)*6].xyz\n"
|
||||
"#define lightSpecularColor(i) "
|
||||
"ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_SPC) " + (i)*6].xyz\n"
|
||||
"\n"
|
||||
"#define lightSpotFalloff(i) "
|
||||
"ltctxa[" stringify(NV_IGRAPH_XF_LTCTXA_L0_K) " + (i)*2].xyz\n"
|
||||
"#define lightSpotDirection(i) "
|
||||
"ltctxa[" stringify(NV_IGRAPH_XF_LTCTXA_L0_SPT) " + (i)*2]\n"
|
||||
"\n"
|
||||
"#define lightLocalRange(i) "
|
||||
"ltc1[" stringify(NV_IGRAPH_XF_LTC1_r0) " + (i)].x\n"
|
||||
"\n"
|
||||
GLSL_DEFINE(sceneAmbientColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_FR_AMB) ".xyz")
|
||||
GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz")
|
||||
"\n"
|
||||
);
|
||||
mstring_append_fmt(uniforms,
|
||||
"%smat4 invViewport;\n", u);
|
||||
|
||||
/* Skinning */
|
||||
unsigned int count;
|
||||
bool mix;
|
||||
switch (state->skinning) {
|
||||
case SKINNING_OFF:
|
||||
mix = false; count = 0; break;
|
||||
case SKINNING_1WEIGHTS:
|
||||
mix = true; count = 2; break;
|
||||
case SKINNING_2WEIGHTS2MATRICES:
|
||||
mix = false; count = 2; break;
|
||||
case SKINNING_2WEIGHTS:
|
||||
mix = true; count = 3; break;
|
||||
case SKINNING_3WEIGHTS3MATRICES:
|
||||
mix = false; count = 3; break;
|
||||
case SKINNING_3WEIGHTS:
|
||||
mix = true; count = 4; break;
|
||||
case SKINNING_4WEIGHTS4MATRICES:
|
||||
mix = false; count = 4; break;
|
||||
default:
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
mstring_append_fmt(body, "/* Skinning mode %d */\n",
|
||||
state->skinning);
|
||||
|
||||
append_skinning_code(body, mix, count, "vec4",
|
||||
"tPosition", "position",
|
||||
"modelViewMat", "xyzw");
|
||||
append_skinning_code(body, mix, count, "vec3",
|
||||
"tNormal", "vec4(normal, 0.0)",
|
||||
"invModelViewMat", "xyz");
|
||||
|
||||
/* Normalization */
|
||||
if (state->normalization) {
|
||||
mstring_append(body, "tNormal = normalize(tNormal);\n");
|
||||
}
|
||||
|
||||
/* Texgen */
|
||||
for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
|
||||
mstring_append_fmt(body, "/* Texgen for stage %d */\n",
|
||||
i);
|
||||
/* Set each component individually */
|
||||
/* FIXME: could be nicer if some channels share the same texgen */
|
||||
for (j = 0; j < 4; j++) {
|
||||
/* TODO: TexGen View Model missing! */
|
||||
char c = "xyzw"[j];
|
||||
char cSuffix = "STRQ"[j];
|
||||
switch (state->texgen[i][j]) {
|
||||
case TEXGEN_DISABLE:
|
||||
mstring_append_fmt(body, "oT%d.%c = texture%d.%c;\n",
|
||||
i, c, i, c);
|
||||
break;
|
||||
case TEXGEN_EYE_LINEAR:
|
||||
mstring_append_fmt(body, "oT%d.%c = dot(texPlane%c%d, tPosition);\n",
|
||||
i, c, cSuffix, i);
|
||||
break;
|
||||
case TEXGEN_OBJECT_LINEAR:
|
||||
mstring_append_fmt(body, "oT%d.%c = dot(texPlane%c%d, position);\n",
|
||||
i, c, cSuffix, i);
|
||||
break;
|
||||
case TEXGEN_SPHERE_MAP:
|
||||
assert(j < 2); /* Channels S,T only! */
|
||||
mstring_append(body, "{\n");
|
||||
/* FIXME: u, r and m only have to be calculated once */
|
||||
mstring_append(body, " vec3 u = normalize(tPosition.xyz);\n");
|
||||
//FIXME: tNormal before or after normalization? Always normalize?
|
||||
mstring_append(body, " vec3 r = reflect(u, tNormal);\n");
|
||||
|
||||
/* FIXME: This would consume 1 division fewer and *might* be
|
||||
* faster than length:
|
||||
* // [z=1/(2*x) => z=1/x*0.5]
|
||||
* vec3 ro = r + vec3(0.0, 0.0, 1.0);
|
||||
* float m = inversesqrt(dot(ro,ro))*0.5;
|
||||
*/
|
||||
|
||||
mstring_append(body, " float invM = 1.0 / (2.0 * length(r + vec3(0.0, 0.0, 1.0)));\n");
|
||||
mstring_append_fmt(body, " oT%d.%c = r.%c * invM + 0.5;\n",
|
||||
i, c, c);
|
||||
mstring_append(body, "}\n");
|
||||
break;
|
||||
case TEXGEN_REFLECTION_MAP:
|
||||
assert(j < 3); /* Channels S,T,R only! */
|
||||
mstring_append(body, "{\n");
|
||||
/* FIXME: u and r only have to be calculated once, can share the one from SPHERE_MAP */
|
||||
mstring_append(body, " vec3 u = normalize(tPosition.xyz);\n");
|
||||
mstring_append(body, " vec3 r = reflect(u, tNormal);\n");
|
||||
mstring_append_fmt(body, " oT%d.%c = r.%c;\n",
|
||||
i, c, c);
|
||||
mstring_append(body, "}\n");
|
||||
break;
|
||||
case TEXGEN_NORMAL_MAP:
|
||||
assert(j < 3); /* Channels S,T,R only! */
|
||||
mstring_append_fmt(body, "oT%d.%c = tNormal.%c;\n",
|
||||
i, c, c);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Apply texture matrices */
|
||||
for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
|
||||
if (state->texture_matrix_enable[i]) {
|
||||
mstring_append_fmt(body,
|
||||
"oT%d = oT%d * texMat%d;\n",
|
||||
i, i, i);
|
||||
}
|
||||
}
|
||||
|
||||
/* Lighting */
|
||||
if (state->lighting) {
|
||||
|
||||
//FIXME: Do 2 passes if we want 2 sided-lighting?
|
||||
|
||||
static char alpha_source_diffuse[] = "diffuse.a";
|
||||
static char alpha_source_specular[] = "specular.a";
|
||||
static char alpha_source_material[] = "material_alpha";
|
||||
const char *alpha_source = alpha_source_diffuse;
|
||||
if (state->diffuse_src == MATERIAL_COLOR_SRC_MATERIAL) {
|
||||
mstring_append_fmt(uniforms, "%sfloat material_alpha;\n", u);
|
||||
alpha_source = alpha_source_material;
|
||||
} else if (state->diffuse_src == MATERIAL_COLOR_SRC_SPECULAR) {
|
||||
alpha_source = alpha_source_specular;
|
||||
}
|
||||
|
||||
if (state->ambient_src == MATERIAL_COLOR_SRC_MATERIAL) {
|
||||
mstring_append_fmt(body, "oD0 = vec4(sceneAmbientColor, %s);\n", alpha_source);
|
||||
} else if (state->ambient_src == MATERIAL_COLOR_SRC_DIFFUSE) {
|
||||
mstring_append_fmt(body, "oD0 = vec4(diffuse.rgb, %s);\n", alpha_source);
|
||||
} else if (state->ambient_src == MATERIAL_COLOR_SRC_SPECULAR) {
|
||||
mstring_append_fmt(body, "oD0 = vec4(specular.rgb, %s);\n", alpha_source);
|
||||
}
|
||||
|
||||
mstring_append(body, "oD0.rgb *= materialEmissionColor.rgb;\n");
|
||||
if (state->emission_src == MATERIAL_COLOR_SRC_MATERIAL) {
|
||||
mstring_append(body, "oD0.rgb += sceneAmbientColor;\n");
|
||||
} else if (state->emission_src == MATERIAL_COLOR_SRC_DIFFUSE) {
|
||||
mstring_append(body, "oD0.rgb += diffuse.rgb;\n");
|
||||
} else if (state->emission_src == MATERIAL_COLOR_SRC_SPECULAR) {
|
||||
mstring_append(body, "oD0.rgb += specular.rgb;\n");
|
||||
}
|
||||
|
||||
mstring_append(body, "oD1 = vec4(0.0, 0.0, 0.0, specular.a);\n");
|
||||
|
||||
for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
|
||||
if (state->light[i] == LIGHT_OFF) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* FIXME: It seems that we only have to handle the surface colors if
|
||||
* they are not part of the material [= vertex colors].
|
||||
* If they are material the cpu will premultiply light
|
||||
* colors
|
||||
*/
|
||||
|
||||
mstring_append_fmt(body, "/* Light %d */ {\n", i);
|
||||
|
||||
if (state->light[i] == LIGHT_LOCAL
|
||||
|| state->light[i] == LIGHT_SPOT) {
|
||||
|
||||
mstring_append_fmt(uniforms,
|
||||
"%svec3 lightLocalPosition%d;\n"
|
||||
"%svec3 lightLocalAttenuation%d;\n",
|
||||
u, i, u, i);
|
||||
mstring_append_fmt(body,
|
||||
" vec3 VP = lightLocalPosition%d - tPosition.xyz/tPosition.w;\n"
|
||||
" float d = length(VP);\n"
|
||||
//FIXME: if (d > lightLocalRange) { .. don't process this light .. } /* inclusive?! */ - what about directional lights?
|
||||
" VP = normalize(VP);\n"
|
||||
" float attenuation = 1.0 / (lightLocalAttenuation%d.x\n"
|
||||
" + lightLocalAttenuation%d.y * d\n"
|
||||
" + lightLocalAttenuation%d.z * d * d);\n"
|
||||
" vec3 halfVector = normalize(VP + eyePosition.xyz / eyePosition.w);\n" /* FIXME: Not sure if eyePosition is correct */
|
||||
" float nDotVP = max(0.0, dot(tNormal, VP));\n"
|
||||
" float nDotHV = max(0.0, dot(tNormal, halfVector));\n",
|
||||
i, i, i, i);
|
||||
|
||||
}
|
||||
|
||||
switch(state->light[i]) {
|
||||
case LIGHT_INFINITE:
|
||||
|
||||
/* lightLocalRange will be 1e+30 here */
|
||||
|
||||
mstring_append_fmt(uniforms,
|
||||
"%svec3 lightInfiniteHalfVector%d;\n"
|
||||
"%svec3 lightInfiniteDirection%d;\n",
|
||||
u, i, u, i);
|
||||
mstring_append_fmt(body,
|
||||
" float attenuation = 1.0;\n"
|
||||
" float nDotVP = max(0.0, dot(tNormal, normalize(vec3(lightInfiniteDirection%d))));\n"
|
||||
" float nDotHV = max(0.0, dot(tNormal, vec3(lightInfiniteHalfVector%d)));\n",
|
||||
i, i);
|
||||
|
||||
/* FIXME: Do specular */
|
||||
|
||||
/* FIXME: tBackDiffuse */
|
||||
|
||||
break;
|
||||
case LIGHT_LOCAL:
|
||||
/* Everything done already */
|
||||
break;
|
||||
case LIGHT_SPOT:
|
||||
/* https://docs.microsoft.com/en-us/windows/win32/direct3d9/attenuation-and-spotlight-factor#spotlight-factor */
|
||||
mstring_append_fmt(body,
|
||||
" vec4 spotDir = lightSpotDirection(%d);\n"
|
||||
" float invScale = 1/length(spotDir.xyz);\n"
|
||||
" float cosHalfPhi = -invScale*spotDir.w;\n"
|
||||
" float cosHalfTheta = invScale + cosHalfPhi;\n"
|
||||
" float spotDirDotVP = dot(spotDir.xyz, VP);\n"
|
||||
" float rho = invScale*spotDirDotVP;\n"
|
||||
" if (rho > cosHalfTheta) {\n"
|
||||
" } else if (rho <= cosHalfPhi) {\n"
|
||||
" attenuation = 0.0;\n"
|
||||
" } else {\n"
|
||||
" attenuation *= spotDirDotVP + spotDir.w;\n" /* FIXME: lightSpotFalloff */
|
||||
" }\n",
|
||||
i);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
|
||||
mstring_append_fmt(body,
|
||||
" float pf;\n"
|
||||
" if (nDotVP == 0.0) {\n"
|
||||
" pf = 0.0;\n"
|
||||
" } else {\n"
|
||||
" pf = pow(nDotHV, /* specular(l, m, n, l1, m1, n1) */ 0.001);\n"
|
||||
" }\n"
|
||||
" vec3 lightAmbient = lightAmbientColor(%d) * attenuation;\n"
|
||||
" vec3 lightDiffuse = lightDiffuseColor(%d) * attenuation * nDotVP;\n"
|
||||
" vec3 lightSpecular = lightSpecularColor(%d) * pf;\n",
|
||||
i, i, i);
|
||||
|
||||
mstring_append(body,
|
||||
" oD0.xyz += lightAmbient;\n");
|
||||
|
||||
switch (state->diffuse_src) {
|
||||
case MATERIAL_COLOR_SRC_MATERIAL:
|
||||
mstring_append(body,
|
||||
" oD0.xyz += lightDiffuse;\n");
|
||||
break;
|
||||
case MATERIAL_COLOR_SRC_DIFFUSE:
|
||||
mstring_append(body,
|
||||
" oD0.xyz += diffuse.xyz * lightDiffuse;\n");
|
||||
break;
|
||||
case MATERIAL_COLOR_SRC_SPECULAR:
|
||||
mstring_append(body,
|
||||
" oD0.xyz += specular.xyz * lightDiffuse;\n");
|
||||
break;
|
||||
}
|
||||
|
||||
mstring_append(body,
|
||||
" oD1.xyz += specular.xyz * lightSpecular;\n");
|
||||
|
||||
mstring_append(body, "}\n");
|
||||
}
|
||||
} else {
|
||||
mstring_append(body, " oD0 = diffuse;\n");
|
||||
mstring_append(body, " oD1 = specular;\n");
|
||||
}
|
||||
mstring_append(body, " oB0 = backDiffuse;\n");
|
||||
mstring_append(body, " oB1 = backSpecular;\n");
|
||||
|
||||
/* Fog */
|
||||
if (state->fog_enable) {
|
||||
|
||||
/* From: https://www.opengl.org/registry/specs/NV/fog_distance.txt */
|
||||
switch(state->foggen) {
|
||||
case FOGGEN_SPEC_ALPHA:
|
||||
/* FIXME: Do we have to clamp here? */
|
||||
mstring_append(body, " float fogDistance = clamp(specular.a, 0.0, 1.0);\n");
|
||||
break;
|
||||
case FOGGEN_RADIAL:
|
||||
mstring_append(body, " float fogDistance = length(tPosition.xyz);\n");
|
||||
break;
|
||||
case FOGGEN_PLANAR:
|
||||
case FOGGEN_ABS_PLANAR:
|
||||
mstring_append(body, " float fogDistance = dot(fogPlane.xyz, tPosition.xyz) + fogPlane.w;\n");
|
||||
if (state->foggen == FOGGEN_ABS_PLANAR) {
|
||||
mstring_append(body, " fogDistance = abs(fogDistance);\n");
|
||||
}
|
||||
break;
|
||||
case FOGGEN_FOG_X:
|
||||
mstring_append(body, " float fogDistance = fogCoord;\n");
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* If skinning is off the composite matrix already includes the MV matrix */
|
||||
if (state->skinning == SKINNING_OFF) {
|
||||
mstring_append(body, " tPosition = position;\n");
|
||||
}
|
||||
|
||||
mstring_append(body,
|
||||
" oPos = invViewport * (tPosition * compositeMat);\n"
|
||||
);
|
||||
|
||||
if (state->vulkan) {
|
||||
mstring_append(body, " oPos.y *= -1;\n");
|
||||
} else {
|
||||
mstring_append(body, " oPos.z = oPos.z * 2.0 - oPos.w;\n");
|
||||
}
|
||||
|
||||
/* FIXME: Testing */
|
||||
if (state->point_params_enable) {
|
||||
mstring_append_fmt(
|
||||
body,
|
||||
" float d_e = length(position * modelViewMat0);\n"
|
||||
" oPts.x = 1/sqrt(%f + %f*d_e + %f*d_e*d_e) + %f;\n",
|
||||
state->point_params[0], state->point_params[1], state->point_params[2],
|
||||
state->point_params[6]);
|
||||
mstring_append_fmt(body, " oPts.x = min(oPts.x*%f + %f, 64.0) * %d;\n",
|
||||
state->point_params[3], state->point_params[7],
|
||||
state->surface_scale_factor);
|
||||
} else {
|
||||
mstring_append_fmt(body, " oPts.x = %f * %d;\n", state->point_size,
|
||||
state->surface_scale_factor);
|
||||
}
|
||||
|
||||
mstring_append(body,
|
||||
" if (oPos.w == 0.0 || isinf(oPos.w)) {\n"
|
||||
" vtx_inv_w = 1.0;\n"
|
||||
" } else {\n"
|
||||
" vtx_inv_w = 1.0 / oPos.w;\n"
|
||||
" }\n"
|
||||
" vtx_inv_w_flat = vtx_inv_w;\n");
|
||||
}
|
||||
|
||||
static void append_skinning_code(MString* str, bool mix,
|
||||
unsigned int count, const char* type,
|
||||
const char* output, const char* input,
|
||||
const char* matrix, const char* swizzle)
|
||||
{
|
||||
if (count == 0) {
|
||||
mstring_append_fmt(str, "%s %s = (%s * %s0).%s;\n",
|
||||
type, output, input, matrix, swizzle);
|
||||
} else {
|
||||
mstring_append_fmt(str, "%s %s = %s(0.0);\n", type, output, type);
|
||||
if (mix) {
|
||||
/* Generated final weight (like GL_WEIGHT_SUM_UNITY_ARB) */
|
||||
mstring_append(str, "{\n"
|
||||
" float weight_i;\n"
|
||||
" float weight_n = 1.0;\n");
|
||||
int i;
|
||||
for (i = 0; i < count; i++) {
|
||||
if (i < (count - 1)) {
|
||||
char c = "xyzw"[i];
|
||||
mstring_append_fmt(str, " weight_i = weight.%c;\n"
|
||||
" weight_n -= weight_i;\n",
|
||||
c);
|
||||
} else {
|
||||
mstring_append(str, " weight_i = weight_n;\n");
|
||||
}
|
||||
mstring_append_fmt(str, " %s += (%s * %s%d).%s * weight_i;\n",
|
||||
output, input, matrix, i, swizzle);
|
||||
}
|
||||
mstring_append(str, "}\n");
|
||||
} else {
|
||||
/* Individual weights */
|
||||
int i;
|
||||
for (i = 0; i < count; i++) {
|
||||
char c = "xyzw"[i];
|
||||
mstring_append_fmt(str, "%s += (%s * %s%d).%s * weight.%c;\n",
|
||||
output, input, matrix, i, swizzle, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH GLSL Shader Generator
|
||||
*
|
||||
* Copyright (c) 2015 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2020-2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_VSH_FF_H
|
||||
#define HW_XBOX_NV2A_PGRAPH_GLSL_VSH_FF_H
|
||||
|
||||
#include "qemu/mstring.h"
|
||||
#include "hw/xbox/nv2a/pgraph/shaders.h"
|
||||
|
||||
void pgraph_gen_vsh_ff_glsl(const ShaderState *state, MString *header,
|
||||
MString *body, MString *uniforms);
|
||||
|
||||
#endif
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* QEMU Geforce NV2A vertex shader translation
|
||||
* Geforce NV2A PGRAPH GLSL Shader Generator
|
||||
*
|
||||
* Copyright (c) 2014 Jannik Vogel
|
||||
* Copyright (c) 2012 espes
|
||||
|
@ -32,8 +32,9 @@
|
|||
#include <stdbool.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "shaders_common.h"
|
||||
#include "vsh.h"
|
||||
#include "hw/xbox/nv2a/pgraph/vsh.h"
|
||||
#include "common.h"
|
||||
#include "vsh-prog.h"
|
||||
|
||||
#define VSH_D3DSCM_CORRECTION 96
|
||||
|
||||
|
@ -794,10 +795,11 @@ static const char* vsh_header =
|
|||
" return t;\n"
|
||||
"}\n";
|
||||
|
||||
void vsh_translate(uint16_t version,
|
||||
void pgraph_gen_vsh_prog_glsl(uint16_t version,
|
||||
const uint32_t *tokens,
|
||||
unsigned int length,
|
||||
bool z_perspective,
|
||||
bool vulkan,
|
||||
MString *header, MString *body)
|
||||
{
|
||||
|
||||
|
@ -843,14 +845,30 @@ void vsh_translate(uint16_t version,
|
|||
* TODO: the pixel-center co-ordinate differences should handled
|
||||
*/
|
||||
" oPos.x = 2.0 * (oPos.x - surfaceSize.x * 0.5) / surfaceSize.x;\n"
|
||||
" oPos.y = -2.0 * (oPos.y - surfaceSize.y * 0.5) / surfaceSize.y;\n"
|
||||
);
|
||||
);
|
||||
|
||||
if (vulkan) {
|
||||
mstring_append(body,
|
||||
" oPos.y = 2.0 * oPos.y / surfaceSize.y - 1.0;\n");
|
||||
} else {
|
||||
mstring_append(body, " oPos.y = -2.0 * (oPos.y - surfaceSize.y * 0.5) "
|
||||
"/ surfaceSize.y;\n");
|
||||
}
|
||||
|
||||
if (z_perspective) {
|
||||
mstring_append(body, " oPos.z = oPos.w;\n");
|
||||
}
|
||||
|
||||
mstring_append(body,
|
||||
" if (clipRange.y != clipRange.x) {\n");
|
||||
if (vulkan) {
|
||||
mstring_append(body, " oPos.z /= clipRange.y;\n");
|
||||
} else {
|
||||
mstring_append(body,
|
||||
" oPos.z = (oPos.z - clipRange.x)/(0.5*(clipRange.y "
|
||||
"- clipRange.x)) - 1;\n");
|
||||
}
|
||||
mstring_append(body,
|
||||
" if (clipRange.y != clipRange.x) {\n"
|
||||
" oPos.z = (oPos.z - clipRange.x)/(0.5*(clipRange.y - clipRange.x)) - 1;\n"
|
||||
" }\n"
|
||||
|
||||
/* Correct for the perspective divide */
|
|
@ -0,0 +1,35 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH GLSL Shader Generator
|
||||
*
|
||||
* Copyright (c) 2014 Jannik Vogel
|
||||
* Copyright (c) 2012 espes
|
||||
*
|
||||
* Based on:
|
||||
* Cxbx, VertexShader.cpp
|
||||
* Copyright (c) 2004 Aaron Robinson <caustik@caustik.com>
|
||||
* Kingofc <kingofc@freenet.de>
|
||||
* Dxbx, uPushBuffer.pas
|
||||
* Copyright (c) 2007 Shadow_tj, PatrickvL
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License as
|
||||
* published by the Free Software Foundation; either version 2 or
|
||||
* (at your option) version 3 of the License.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_VSH_PROG_H
|
||||
#define HW_XBOX_NV2A_PGRAPH_GLSL_VSH_PROG_H
|
||||
|
||||
void pgraph_gen_vsh_prog_glsl(uint16_t version, const uint32_t *tokens,
|
||||
unsigned int length, bool z_perspective,
|
||||
bool vulkan, MString *header, MString *body);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,274 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH GLSL Shader Generator
|
||||
*
|
||||
* Copyright (c) 2015 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2020-2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "hw/xbox/nv2a/pgraph/shaders.h"
|
||||
#include "common.h"
|
||||
#include "vsh.h"
|
||||
#include "vsh-ff.h"
|
||||
#include "vsh-prog.h"
|
||||
#include <stdbool.h>
|
||||
|
||||
MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs)
|
||||
{
|
||||
int i;
|
||||
MString *output = mstring_new();
|
||||
mstring_append_fmt(output, "#version %d\n\n", state->vulkan ? 450 : 400);
|
||||
|
||||
MString *header = mstring_from_str("");
|
||||
|
||||
MString *uniforms = mstring_from_str("");
|
||||
|
||||
const char *u = state->vulkan ? "" : "uniform "; // FIXME: Remove
|
||||
|
||||
mstring_append_fmt(uniforms,
|
||||
"%svec4 clipRange;\n"
|
||||
"%svec2 surfaceSize;\n"
|
||||
"%svec4 c[" stringify(NV2A_VERTEXSHADER_CONSTANTS) "];\n"
|
||||
"%svec2 fogParam;\n",
|
||||
u, u, u, u
|
||||
);
|
||||
|
||||
mstring_append(header,
|
||||
GLSL_DEFINE(fogPlane, GLSL_C(NV_IGRAPH_XF_XFCTX_FOG))
|
||||
GLSL_DEFINE(texMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T0MAT))
|
||||
GLSL_DEFINE(texMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T1MAT))
|
||||
GLSL_DEFINE(texMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T2MAT))
|
||||
GLSL_DEFINE(texMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T3MAT))
|
||||
|
||||
"\n"
|
||||
"vec4 oPos = vec4(0.0,0.0,0.0,1.0);\n"
|
||||
"vec4 oD0 = vec4(0.0,0.0,0.0,1.0);\n"
|
||||
"vec4 oD1 = vec4(0.0,0.0,0.0,1.0);\n"
|
||||
"vec4 oB0 = vec4(0.0,0.0,0.0,1.0);\n"
|
||||
"vec4 oB1 = vec4(0.0,0.0,0.0,1.0);\n"
|
||||
"vec4 oPts = vec4(0.0,0.0,0.0,1.0);\n"
|
||||
"vec4 oFog = vec4(0.0,0.0,0.0,1.0);\n"
|
||||
"vec4 oT0 = vec4(0.0,0.0,0.0,1.0);\n"
|
||||
"vec4 oT1 = vec4(0.0,0.0,0.0,1.0);\n"
|
||||
"vec4 oT2 = vec4(0.0,0.0,0.0,1.0);\n"
|
||||
"vec4 oT3 = vec4(0.0,0.0,0.0,1.0);\n"
|
||||
"\n"
|
||||
"vec4 decompress_11_11_10(int cmp) {\n"
|
||||
" float x = float(bitfieldExtract(cmp, 0, 11)) / 1023.0;\n"
|
||||
" float y = float(bitfieldExtract(cmp, 11, 11)) / 1023.0;\n"
|
||||
" float z = float(bitfieldExtract(cmp, 22, 10)) / 511.0;\n"
|
||||
" return vec4(x, y, z, 1);\n"
|
||||
"}\n");
|
||||
|
||||
pgraph_get_glsl_vtx_header(header, state->vulkan, state->smooth_shading,
|
||||
false, prefix_outputs, false);
|
||||
|
||||
if (prefix_outputs) {
|
||||
mstring_append(header,
|
||||
"#define vtx_inv_w v_vtx_inv_w\n"
|
||||
"#define vtx_inv_w_flat v_vtx_inv_w_flat\n"
|
||||
"#define vtxD0 v_vtxD0\n"
|
||||
"#define vtxD1 v_vtxD1\n"
|
||||
"#define vtxB0 v_vtxB0\n"
|
||||
"#define vtxB1 v_vtxB1\n"
|
||||
"#define vtxFog v_vtxFog\n"
|
||||
"#define vtxT0 v_vtxT0\n"
|
||||
"#define vtxT1 v_vtxT1\n"
|
||||
"#define vtxT2 v_vtxT2\n"
|
||||
"#define vtxT3 v_vtxT3\n"
|
||||
);
|
||||
}
|
||||
mstring_append(header, "\n");
|
||||
for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
|
||||
|
||||
bool is_uniform = state->uniform_attrs & (1 << i);
|
||||
bool is_compressed = state->compressed_attrs & (1 << i);
|
||||
|
||||
assert(!(is_uniform && is_compressed));
|
||||
|
||||
if (is_uniform) {
|
||||
mstring_append_fmt(header, "vec4 v%d = inlineValue[%d];\n", i, i);
|
||||
} else {
|
||||
if (state->compressed_attrs & (1 << i)) {
|
||||
mstring_append_fmt(header,
|
||||
"layout(location = %d) in int v%d_cmp;\n", i, i);
|
||||
} else if (state->swizzle_attrs & (1 << i)) {
|
||||
mstring_append_fmt(header, "layout(location = %d) in vec4 v%d_sw;\n",
|
||||
i, i);
|
||||
} else {
|
||||
mstring_append_fmt(header, "layout(location = %d) in vec4 v%d;\n",
|
||||
i, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
mstring_append(header, "\n");
|
||||
|
||||
MString *body = mstring_from_str("void main() {\n");
|
||||
|
||||
for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
|
||||
if (state->compressed_attrs & (1 << i)) {
|
||||
mstring_append_fmt(
|
||||
body, "vec4 v%d = decompress_11_11_10(v%d_cmp);\n", i, i);
|
||||
}
|
||||
|
||||
if (state->swizzle_attrs & (1 << i)) {
|
||||
mstring_append_fmt(body, "vec4 v%d = v%d_sw.bgra;\n", i, i);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (state->fixed_function) {
|
||||
pgraph_gen_vsh_ff_glsl(state, header, body, uniforms);
|
||||
} else if (state->vertex_program) {
|
||||
pgraph_gen_vsh_prog_glsl(VSH_VERSION_XVS,
|
||||
(uint32_t *)state->program_data,
|
||||
state->program_length, state->z_perspective,
|
||||
state->vulkan, header, body);
|
||||
} else {
|
||||
assert(false);
|
||||
}
|
||||
|
||||
|
||||
/* Fog */
|
||||
|
||||
if (state->fog_enable) {
|
||||
|
||||
if (state->vertex_program) {
|
||||
/* FIXME: Does foggen do something here? Let's do some tracking..
|
||||
*
|
||||
* "RollerCoaster Tycoon" has
|
||||
* state->vertex_program = true; state->foggen == FOGGEN_PLANAR
|
||||
* but expects oFog.x as fogdistance?! Writes oFog.xyzw = v0.z
|
||||
*/
|
||||
mstring_append(body, " float fogDistance = oFog.x;\n");
|
||||
}
|
||||
|
||||
/* FIXME: Do this per pixel? */
|
||||
|
||||
switch (state->fog_mode) {
|
||||
case FOG_MODE_LINEAR:
|
||||
case FOG_MODE_LINEAR_ABS:
|
||||
|
||||
/* f = (end - d) / (end - start)
|
||||
* fogParam.y = -1 / (end - start)
|
||||
* fogParam.x = 1 - end * fogParam.y;
|
||||
*/
|
||||
|
||||
mstring_append(body,
|
||||
" if (isinf(fogDistance)) {\n"
|
||||
" fogDistance = 0.0;\n"
|
||||
" }\n"
|
||||
);
|
||||
mstring_append(body, " float fogFactor = fogParam.x + fogDistance * fogParam.y;\n");
|
||||
mstring_append(body, " fogFactor -= 1.0;\n");
|
||||
break;
|
||||
case FOG_MODE_EXP:
|
||||
mstring_append(body,
|
||||
" if (isinf(fogDistance)) {\n"
|
||||
" fogDistance = 0.0;\n"
|
||||
" }\n"
|
||||
);
|
||||
/* fallthru */
|
||||
case FOG_MODE_EXP_ABS:
|
||||
|
||||
/* f = 1 / (e^(d * density))
|
||||
* fogParam.y = -density / (2 * ln(256))
|
||||
* fogParam.x = 1.5
|
||||
*/
|
||||
|
||||
mstring_append(body, " float fogFactor = fogParam.x + exp2(fogDistance * fogParam.y * 16.0);\n");
|
||||
mstring_append(body, " fogFactor -= 1.5;\n");
|
||||
break;
|
||||
case FOG_MODE_EXP2:
|
||||
case FOG_MODE_EXP2_ABS:
|
||||
|
||||
/* f = 1 / (e^((d * density)^2))
|
||||
* fogParam.y = -density / (2 * sqrt(ln(256)))
|
||||
* fogParam.x = 1.5
|
||||
*/
|
||||
|
||||
mstring_append(body, " float fogFactor = fogParam.x + exp2(-fogDistance * fogDistance * fogParam.y * fogParam.y * 32.0);\n");
|
||||
mstring_append(body, " fogFactor -= 1.5;\n");
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
/* Calculate absolute for the modes which need it */
|
||||
switch (state->fog_mode) {
|
||||
case FOG_MODE_LINEAR_ABS:
|
||||
case FOG_MODE_EXP_ABS:
|
||||
case FOG_MODE_EXP2_ABS:
|
||||
mstring_append(body, " fogFactor = abs(fogFactor);\n");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
mstring_append(body, " oFog.xyzw = vec4(fogFactor);\n");
|
||||
} else {
|
||||
/* FIXME: Is the fog still calculated / passed somehow?!
|
||||
*/
|
||||
mstring_append(body, " oFog.xyzw = vec4(1.0);\n");
|
||||
}
|
||||
|
||||
/* Set outputs */
|
||||
const char *shade_model_mult = state->smooth_shading ? "vtx_inv_w" : "vtx_inv_w_flat";
|
||||
mstring_append_fmt(body, "\n"
|
||||
" vtxD0 = clamp(oD0, 0.0, 1.0) * %s;\n"
|
||||
" vtxD1 = clamp(oD1, 0.0, 1.0) * %s;\n"
|
||||
" vtxB0 = clamp(oB0, 0.0, 1.0) * %s;\n"
|
||||
" vtxB1 = clamp(oB1, 0.0, 1.0) * %s;\n"
|
||||
" vtxFog = oFog.x * vtx_inv_w;\n"
|
||||
" vtxT0 = oT0 * vtx_inv_w;\n"
|
||||
" vtxT1 = oT1 * vtx_inv_w;\n"
|
||||
" vtxT2 = oT2 * vtx_inv_w;\n"
|
||||
" vtxT3 = oT3 * vtx_inv_w;\n"
|
||||
" gl_Position = oPos;\n"
|
||||
" gl_PointSize = oPts.x;\n"
|
||||
// " gl_ClipDistance[0] = oPos.z - oPos.w*clipRange.z;\n" // Near
|
||||
// " gl_ClipDistance[1] = oPos.w*clipRange.w - oPos.z;\n" // Far
|
||||
"\n"
|
||||
"}\n",
|
||||
shade_model_mult,
|
||||
shade_model_mult,
|
||||
shade_model_mult,
|
||||
shade_model_mult);
|
||||
|
||||
|
||||
/* Return combined header + source */
|
||||
if (state->vulkan) {
|
||||
mstring_append_fmt(
|
||||
output, "layout(binding = %d, std140) uniform VshUniforms {\n%s};\n\n",
|
||||
VSH_UBO_BINDING, mstring_get_str(uniforms));
|
||||
// FIXME: Only needed for vk, for gl we use glVertexAttrib
|
||||
mstring_append_fmt(output,
|
||||
"layout(push_constant) uniform PushConstants {\n"
|
||||
"vec4 inlineValue[" stringify(NV2A_VERTEXSHADER_ATTRIBUTES) "];\n"
|
||||
"};\n\n");
|
||||
} else {
|
||||
mstring_append(
|
||||
output, mstring_get_str(uniforms));
|
||||
}
|
||||
|
||||
mstring_append(output, mstring_get_str(header));
|
||||
mstring_unref(header);
|
||||
|
||||
mstring_append(output, mstring_get_str(body));
|
||||
mstring_unref(body);
|
||||
return output;
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH GLSL Shader Generator
|
||||
*
|
||||
* Copyright (c) 2015 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2020-2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_VSH_H
|
||||
#define HW_XBOX_NV2A_PGRAPH_GLSL_VSH_H
|
||||
|
||||
#include "qemu/mstring.h"
|
||||
#include "hw/xbox/nv2a/pgraph/shaders.h"
|
||||
|
||||
// FIXME: Move to struct
|
||||
#define VSH_UBO_BINDING 0
|
||||
|
||||
MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,19 @@
|
|||
specific_ss.add(files(
|
||||
'pgraph.c',
|
||||
'profile.c',
|
||||
'rdi.c',
|
||||
's3tc.c',
|
||||
'shaders.c',
|
||||
'swizzle.c',
|
||||
'texture.c',
|
||||
'vertex.c',
|
||||
))
|
||||
if have_renderdoc
|
||||
specific_ss.add(files('debug_renderdoc.c'))
|
||||
endif
|
||||
subdir('thirdparty')
|
||||
subdir('null')
|
||||
subdir('gl')
|
||||
subdir('glsl')
|
||||
subdir('vk')
|
||||
specific_ss.add(nv2a_vsh_cpu)
|
|
@ -0,0 +1,3 @@
|
|||
specific_ss.add([sdl, files(
|
||||
'renderer.c',
|
||||
)])
|
|
@ -0,0 +1,146 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH Null Renderer
|
||||
*
|
||||
* Copyright (c) 2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/thread.h"
|
||||
#include "hw/hw.h"
|
||||
#include "hw/xbox/nv2a/nv2a_int.h"
|
||||
|
||||
static void pgraph_null_sync(NV2AState *d)
|
||||
{
|
||||
qatomic_set(&d->pgraph.sync_pending, false);
|
||||
qemu_event_set(&d->pgraph.sync_complete);
|
||||
}
|
||||
|
||||
static void pgraph_null_flush(NV2AState *d)
|
||||
{
|
||||
qatomic_set(&d->pgraph.flush_pending, false);
|
||||
qemu_event_set(&d->pgraph.flush_complete);
|
||||
}
|
||||
|
||||
static void pgraph_null_process_pending(NV2AState *d)
|
||||
{
|
||||
if (
|
||||
qatomic_read(&d->pgraph.sync_pending) ||
|
||||
qatomic_read(&d->pgraph.flush_pending)
|
||||
) {
|
||||
qemu_mutex_unlock(&d->pfifo.lock);
|
||||
qemu_mutex_lock(&d->pgraph.lock);
|
||||
if (qatomic_read(&d->pgraph.sync_pending)) {
|
||||
pgraph_null_sync(d);
|
||||
}
|
||||
if (qatomic_read(&d->pgraph.flush_pending)) {
|
||||
pgraph_null_flush(d);
|
||||
}
|
||||
qemu_mutex_unlock(&d->pgraph.lock);
|
||||
qemu_mutex_lock(&d->pfifo.lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void pgraph_null_clear_report_value(NV2AState *d)
|
||||
{
|
||||
}
|
||||
|
||||
static void pgraph_null_clear_surface(NV2AState *d, uint32_t parameter)
|
||||
{
|
||||
}
|
||||
|
||||
static void pgraph_null_draw_begin(NV2AState *d)
|
||||
{
|
||||
}
|
||||
|
||||
static void pgraph_null_draw_end(NV2AState *d)
|
||||
{
|
||||
}
|
||||
|
||||
static void pgraph_null_flip_stall(NV2AState *d)
|
||||
{
|
||||
}
|
||||
|
||||
static void pgraph_null_flush_draw(NV2AState *d)
|
||||
{
|
||||
}
|
||||
|
||||
static void pgraph_null_get_report(NV2AState *d, uint32_t parameter)
|
||||
{
|
||||
pgraph_write_zpass_pixel_cnt_report(d, parameter, 0);
|
||||
}
|
||||
|
||||
static void pgraph_null_image_blit(NV2AState *d)
|
||||
{
|
||||
}
|
||||
|
||||
static void pgraph_null_pre_savevm_trigger(NV2AState *d)
|
||||
{
|
||||
}
|
||||
|
||||
static void pgraph_null_pre_savevm_wait(NV2AState *d)
|
||||
{
|
||||
}
|
||||
|
||||
static void pgraph_null_pre_shutdown_trigger(NV2AState *d)
|
||||
{
|
||||
}
|
||||
|
||||
static void pgraph_null_pre_shutdown_wait(NV2AState *d)
|
||||
{
|
||||
}
|
||||
|
||||
static void pgraph_null_process_pending_reports(NV2AState *d)
|
||||
{
|
||||
}
|
||||
|
||||
static void pgraph_null_surface_update(NV2AState *d, bool upload,
|
||||
bool color_write, bool zeta_write)
|
||||
{
|
||||
}
|
||||
|
||||
static void pgraph_null_init(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
pg->null_renderer_state = NULL;
|
||||
}
|
||||
|
||||
static PGRAPHRenderer pgraph_null_renderer = {
|
||||
.type = CONFIG_DISPLAY_RENDERER_NULL,
|
||||
.name = "Null",
|
||||
.ops = {
|
||||
.init = pgraph_null_init,
|
||||
.clear_report_value = pgraph_null_clear_report_value,
|
||||
.clear_surface = pgraph_null_clear_surface,
|
||||
.draw_begin = pgraph_null_draw_begin,
|
||||
.draw_end = pgraph_null_draw_end,
|
||||
.flip_stall = pgraph_null_flip_stall,
|
||||
.flush_draw = pgraph_null_flush_draw,
|
||||
.get_report = pgraph_null_get_report,
|
||||
.image_blit = pgraph_null_image_blit,
|
||||
.pre_savevm_trigger = pgraph_null_pre_savevm_trigger,
|
||||
.pre_savevm_wait = pgraph_null_pre_savevm_wait,
|
||||
.pre_shutdown_trigger = pgraph_null_pre_shutdown_trigger,
|
||||
.pre_shutdown_wait = pgraph_null_pre_shutdown_wait,
|
||||
.process_pending = pgraph_null_process_pending,
|
||||
.process_pending_reports = pgraph_null_process_pending_reports,
|
||||
.surface_update = pgraph_null_surface_update,
|
||||
}
|
||||
};
|
||||
|
||||
static void __attribute__((constructor)) register_renderer(void)
|
||||
{
|
||||
pgraph_renderer_register(&pgraph_null_renderer);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,383 @@
|
|||
/*
|
||||
* QEMU Geforce NV2A PGRAPH internal definitions
|
||||
*
|
||||
* Copyright (c) 2012 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2018-2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef HW_XBOX_NV2A_PGRAPH_H
|
||||
#define HW_XBOX_NV2A_PGRAPH_H
|
||||
|
||||
#include "xemu-config.h"
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/bitmap.h"
|
||||
#include "qemu/units.h"
|
||||
#include "qemu/thread.h"
|
||||
#include "cpu.h"
|
||||
|
||||
#include "shaders.h"
|
||||
#include "surface.h"
|
||||
#include "util.h"
|
||||
|
||||
typedef struct NV2AState NV2AState;
|
||||
typedef struct PGRAPHNullState PGRAPHNullState;
|
||||
typedef struct PGRAPHGLState PGRAPHGLState;
|
||||
typedef struct PGRAPHVkState PGRAPHVkState;
|
||||
|
||||
typedef struct VertexAttribute {
|
||||
bool dma_select;
|
||||
hwaddr offset;
|
||||
|
||||
/* inline arrays are packed in order?
|
||||
* Need to pass the offset to converted attributes */
|
||||
unsigned int inline_array_offset;
|
||||
|
||||
float inline_value[4];
|
||||
|
||||
unsigned int format;
|
||||
unsigned int size; /* size of the data type */
|
||||
unsigned int count; /* number of components */
|
||||
uint32_t stride;
|
||||
|
||||
bool needs_conversion;
|
||||
|
||||
float *inline_buffer;
|
||||
bool inline_buffer_populated;
|
||||
} VertexAttribute;
|
||||
|
||||
typedef struct Surface {
|
||||
bool draw_dirty;
|
||||
bool buffer_dirty;
|
||||
bool write_enabled_cache;
|
||||
unsigned int pitch;
|
||||
|
||||
hwaddr offset;
|
||||
} Surface;
|
||||
|
||||
typedef struct KelvinState {
|
||||
hwaddr object_instance;
|
||||
} KelvinState;
|
||||
|
||||
typedef struct ContextSurfaces2DState {
|
||||
hwaddr object_instance;
|
||||
hwaddr dma_image_source;
|
||||
hwaddr dma_image_dest;
|
||||
unsigned int color_format;
|
||||
unsigned int source_pitch, dest_pitch;
|
||||
hwaddr source_offset, dest_offset;
|
||||
} ContextSurfaces2DState;
|
||||
|
||||
typedef struct ImageBlitState {
|
||||
hwaddr object_instance;
|
||||
hwaddr context_surfaces;
|
||||
unsigned int operation;
|
||||
unsigned int in_x, in_y;
|
||||
unsigned int out_x, out_y;
|
||||
unsigned int width, height;
|
||||
} ImageBlitState;
|
||||
|
||||
typedef struct BetaState {
|
||||
hwaddr object_instance;
|
||||
uint32_t beta;
|
||||
} BetaState;
|
||||
|
||||
typedef struct PGRAPHRenderer {
|
||||
CONFIG_DISPLAY_RENDERER type;
|
||||
const char *name;
|
||||
struct {
|
||||
void (*early_context_init)(void);
|
||||
void (*init)(NV2AState *d);
|
||||
void (*init_thread)(NV2AState *d);
|
||||
void (*finalize)(NV2AState *d);
|
||||
void (*clear_report_value)(NV2AState *d);
|
||||
void (*clear_surface)(NV2AState *d, uint32_t parameter);
|
||||
void (*draw_begin)(NV2AState *d);
|
||||
void (*draw_end)(NV2AState *d);
|
||||
void (*flip_stall)(NV2AState *d);
|
||||
void (*flush_draw)(NV2AState *d);
|
||||
void (*get_report)(NV2AState *d, uint32_t parameter);
|
||||
void (*image_blit)(NV2AState *d);
|
||||
void (*pre_savevm_trigger)(NV2AState *d);
|
||||
void (*pre_savevm_wait)(NV2AState *d);
|
||||
void (*pre_shutdown_trigger)(NV2AState *d);
|
||||
void (*pre_shutdown_wait)(NV2AState *d);
|
||||
void (*process_pending)(NV2AState *d);
|
||||
void (*process_pending_reports)(NV2AState *d);
|
||||
void (*surface_flush)(NV2AState *d);
|
||||
void (*surface_update)(NV2AState *d, bool upload, bool color_write, bool zeta_write);
|
||||
void (*set_surface_scale_factor)(NV2AState *d, unsigned int scale);
|
||||
unsigned int (*get_surface_scale_factor)(NV2AState *d);
|
||||
int (*get_framebuffer_surface)(NV2AState *d);
|
||||
} ops;
|
||||
} PGRAPHRenderer;
|
||||
|
||||
typedef struct PGRAPHState {
|
||||
QemuMutex lock;
|
||||
|
||||
uint32_t pending_interrupts;
|
||||
uint32_t enabled_interrupts;
|
||||
|
||||
int frame_time;
|
||||
int draw_time;
|
||||
|
||||
/* subchannels state we're not sure the location of... */
|
||||
ContextSurfaces2DState context_surfaces_2d;
|
||||
ImageBlitState image_blit;
|
||||
KelvinState kelvin;
|
||||
BetaState beta;
|
||||
|
||||
hwaddr dma_color, dma_zeta;
|
||||
Surface surface_color, surface_zeta;
|
||||
unsigned int surface_type;
|
||||
SurfaceShape surface_shape;
|
||||
SurfaceShape last_surface_shape;
|
||||
|
||||
struct {
|
||||
int clip_x;
|
||||
int clip_width;
|
||||
int clip_y;
|
||||
int clip_height;
|
||||
int width;
|
||||
int height;
|
||||
} surface_binding_dim; // FIXME: Refactor
|
||||
|
||||
hwaddr dma_a, dma_b;
|
||||
bool texture_dirty[NV2A_MAX_TEXTURES];
|
||||
|
||||
bool texture_matrix_enable[NV2A_MAX_TEXTURES];
|
||||
|
||||
hwaddr dma_state;
|
||||
hwaddr dma_notifies;
|
||||
hwaddr dma_semaphore;
|
||||
|
||||
hwaddr dma_report;
|
||||
hwaddr report_offset;
|
||||
bool zpass_pixel_count_enable;
|
||||
|
||||
hwaddr dma_vertex_a, dma_vertex_b;
|
||||
|
||||
uint32_t primitive_mode;
|
||||
|
||||
bool enable_vertex_program_write; // FIXME: Not used anywhere???
|
||||
|
||||
uint32_t vertex_state_shader_v0[4];
|
||||
uint32_t program_data[NV2A_MAX_TRANSFORM_PROGRAM_LENGTH][VSH_TOKEN_SIZE];
|
||||
bool program_data_dirty;
|
||||
|
||||
uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];
|
||||
bool vsh_constants_dirty[NV2A_VERTEXSHADER_CONSTANTS];
|
||||
|
||||
/* lighting constant arrays */
|
||||
uint32_t ltctxa[NV2A_LTCTXA_COUNT][4];
|
||||
bool ltctxa_dirty[NV2A_LTCTXA_COUNT];
|
||||
uint32_t ltctxb[NV2A_LTCTXB_COUNT][4];
|
||||
bool ltctxb_dirty[NV2A_LTCTXB_COUNT];
|
||||
uint32_t ltc1[NV2A_LTC1_COUNT][4];
|
||||
bool ltc1_dirty[NV2A_LTC1_COUNT];
|
||||
|
||||
float material_alpha;
|
||||
|
||||
// should figure out where these are in lighting context
|
||||
float light_infinite_half_vector[NV2A_MAX_LIGHTS][3];
|
||||
float light_infinite_direction[NV2A_MAX_LIGHTS][3];
|
||||
float light_local_position[NV2A_MAX_LIGHTS][3];
|
||||
float light_local_attenuation[NV2A_MAX_LIGHTS][3];
|
||||
|
||||
float point_params[8];
|
||||
|
||||
VertexAttribute vertex_attributes[NV2A_VERTEXSHADER_ATTRIBUTES];
|
||||
uint16_t compressed_attrs;
|
||||
uint16_t uniform_attrs;
|
||||
uint16_t swizzle_attrs;
|
||||
|
||||
unsigned int inline_array_length;
|
||||
uint32_t inline_array[NV2A_MAX_BATCH_LENGTH];
|
||||
|
||||
unsigned int inline_elements_length;
|
||||
uint32_t inline_elements[NV2A_MAX_BATCH_LENGTH];
|
||||
|
||||
unsigned int inline_buffer_length;
|
||||
|
||||
unsigned int draw_arrays_length;
|
||||
unsigned int draw_arrays_min_start;
|
||||
unsigned int draw_arrays_max_count;
|
||||
/* FIXME: Unknown size, possibly endless, 1250 will do for now */
|
||||
/* Keep in sync with size used in nv2a.c */
|
||||
int32_t draw_arrays_start[1250];
|
||||
int32_t draw_arrays_count[1250];
|
||||
bool draw_arrays_prevent_connect;
|
||||
|
||||
uint32_t regs_[0x2000];
|
||||
DECLARE_BITMAP(regs_dirty, 0x2000 / sizeof(uint32_t));
|
||||
|
||||
bool clearing;
|
||||
bool waiting_for_nop;
|
||||
bool waiting_for_flip;
|
||||
bool waiting_for_context_switch;
|
||||
|
||||
bool flush_pending;
|
||||
QemuEvent flush_complete;
|
||||
|
||||
bool sync_pending;
|
||||
QemuEvent sync_complete;
|
||||
|
||||
unsigned int surface_scale_factor;
|
||||
uint8_t *scale_buf;
|
||||
|
||||
const PGRAPHRenderer *renderer;
|
||||
union {
|
||||
PGRAPHNullState *null_renderer_state;
|
||||
PGRAPHGLState *gl_renderer_state;
|
||||
PGRAPHVkState *vk_renderer_state;
|
||||
};
|
||||
} PGRAPHState;
|
||||
|
||||
void pgraph_init(NV2AState *d);
|
||||
void pgraph_init_thread(NV2AState *d);
|
||||
void pgraph_destroy(PGRAPHState *pg);
|
||||
void pgraph_context_switch(NV2AState *d, unsigned int channel_id);
|
||||
int pgraph_method(NV2AState *d, unsigned int subchannel, unsigned int method,
|
||||
uint32_t parameter, uint32_t *parameters,
|
||||
size_t num_words_available, size_t max_lookahead_words,
|
||||
bool inc);
|
||||
void pgraph_check_within_begin_end_block(PGRAPHState *pg);
|
||||
|
||||
void *pfifo_thread(void *arg);
|
||||
void pfifo_kick(NV2AState *d);
|
||||
|
||||
void pgraph_renderer_register(const PGRAPHRenderer *renderer);
|
||||
|
||||
// FIXME: Move from here
|
||||
|
||||
extern NV2AState *g_nv2a;
|
||||
|
||||
// FIXME: Add new function pgraph_is_texture_sampler_active()
|
||||
|
||||
static inline uint32_t pgraph_reg_r(PGRAPHState *pg, unsigned int r)
|
||||
{
|
||||
assert(r % 4 == 0);
|
||||
return pg->regs_[r];
|
||||
}
|
||||
|
||||
static inline void pgraph_reg_w(PGRAPHState *pg, unsigned int r, uint32_t v)
|
||||
{
|
||||
assert(r % 4 == 0);
|
||||
if (pg->regs_[r] != v) {
|
||||
bitmap_set(pg->regs_dirty, r / sizeof(uint32_t), 1);
|
||||
}
|
||||
pg->regs_[r] = v;
|
||||
}
|
||||
|
||||
void pgraph_clear_dirty_reg_map(PGRAPHState *pg);
|
||||
|
||||
static inline bool pgraph_is_reg_dirty(PGRAPHState *pg, unsigned int reg)
|
||||
{
|
||||
return test_bit(reg / sizeof(uint32_t), pg->regs_dirty);
|
||||
}
|
||||
|
||||
static inline bool pgraph_is_texture_stage_active(PGRAPHState *pg, unsigned int stage)
|
||||
{
|
||||
assert(stage < NV2A_MAX_TEXTURES);
|
||||
uint32_t mode = (pgraph_reg_r(pg, NV_PGRAPH_SHADERPROG) >> (stage * 5)) & 0x1F;
|
||||
return mode != 0 && mode != 4;// && mode != 0x11 && mode != 0x0a && mode != 0x09 && mode != 5;
|
||||
}
|
||||
|
||||
static inline bool pgraph_is_texture_enabled(PGRAPHState *pg, int texture_idx)
|
||||
{
|
||||
uint32_t ctl_0 = pgraph_reg_r(pg, NV_PGRAPH_TEXCTL0_0 + texture_idx*4);
|
||||
return // pgraph_is_texture_stage_active(pg, texture_idx) &&
|
||||
GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_ENABLE);
|
||||
}
|
||||
|
||||
static inline bool pgraph_is_texture_format_compressed(PGRAPHState *pg, int color_format)
|
||||
{
|
||||
return color_format == NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5 ||
|
||||
color_format == NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8 ||
|
||||
color_format == NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8;
|
||||
}
|
||||
|
||||
static inline bool pgraph_color_write_enabled(PGRAPHState *pg)
|
||||
{
|
||||
return pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & (
|
||||
NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE
|
||||
| NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE
|
||||
| NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE
|
||||
| NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE);
|
||||
}
|
||||
|
||||
static inline bool pgraph_zeta_write_enabled(PGRAPHState *pg)
|
||||
{
|
||||
return pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & (
|
||||
NV_PGRAPH_CONTROL_0_ZWRITEENABLE
|
||||
| NV_PGRAPH_CONTROL_0_STENCIL_WRITE_ENABLE);
|
||||
}
|
||||
|
||||
static inline void pgraph_apply_anti_aliasing_factor(PGRAPHState *pg,
|
||||
unsigned int *width,
|
||||
unsigned int *height)
|
||||
{
|
||||
switch (pg->surface_shape.anti_aliasing) {
|
||||
case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_1:
|
||||
break;
|
||||
case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_CORNER_2:
|
||||
if (width) { *width *= 2; }
|
||||
break;
|
||||
case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_SQUARE_OFFSET_4:
|
||||
if (width) { *width *= 2; }
|
||||
if (height) { *height *= 2; }
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void pgraph_apply_scaling_factor(PGRAPHState *pg,
|
||||
unsigned int *width,
|
||||
unsigned int *height)
|
||||
{
|
||||
*width *= pg->surface_scale_factor;
|
||||
*height *= pg->surface_scale_factor;
|
||||
}
|
||||
|
||||
void pgraph_get_clear_color(PGRAPHState *pg, float rgba[4]);
|
||||
void pgraph_get_clear_depth_stencil_value(PGRAPHState *pg, float *depth, int *stencil);
|
||||
|
||||
/* Vertex */
|
||||
void pgraph_allocate_inline_buffer_vertices(PGRAPHState *pg, unsigned int attr);
|
||||
void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg);
|
||||
void pgraph_reset_inline_buffers(PGRAPHState *pg);
|
||||
void pgraph_reset_draw_arrays(PGRAPHState *pg);
|
||||
void pgraph_update_inline_value(VertexAttribute *attr, const uint8_t *data);
|
||||
|
||||
/* RDI */
|
||||
uint32_t pgraph_rdi_read(PGRAPHState *pg, unsigned int select,
|
||||
unsigned int address);
|
||||
void pgraph_rdi_write(PGRAPHState *pg, unsigned int select,
|
||||
unsigned int address, uint32_t val);
|
||||
|
||||
static inline void pgraph_argb_pack32_to_rgba_float(uint32_t argb, float *rgba)
|
||||
{
|
||||
rgba[0] = ((argb >> 16) & 0xFF) / 255.0f; /* red */
|
||||
rgba[1] = ((argb >> 8) & 0xFF) / 255.0f; /* green */
|
||||
rgba[2] = (argb & 0xFF) / 255.0f; /* blue */
|
||||
rgba[3] = ((argb >> 24) & 0xFF) / 255.0f; /* alpha */
|
||||
}
|
||||
|
||||
void pgraph_write_zpass_pixel_cnt_report(NV2AState *d, uint32_t parameter, uint32_t result);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* QEMU Geforce NV2A profiling helpers
|
||||
*
|
||||
* Copyright (c) 2020-2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "../nv2a_int.h"
|
||||
|
||||
NV2AStats g_nv2a_stats;
|
||||
|
||||
void nv2a_profile_increment(void)
|
||||
{
|
||||
int64_t now = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
|
||||
const int64_t fps_update_interval = 250000;
|
||||
g_nv2a_stats.last_flip_time = now;
|
||||
|
||||
static int64_t frame_count = 0;
|
||||
frame_count++;
|
||||
|
||||
static int64_t ts = 0;
|
||||
int64_t delta = now - ts;
|
||||
if (delta >= fps_update_interval) {
|
||||
g_nv2a_stats.increment_fps = frame_count * 1000000 / delta;
|
||||
ts = now;
|
||||
frame_count = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void nv2a_profile_flip_stall(void)
|
||||
{
|
||||
int64_t now = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
|
||||
int64_t render_time = (now-g_nv2a_stats.last_flip_time)/1000;
|
||||
|
||||
g_nv2a_stats.frame_working.mspf = render_time;
|
||||
g_nv2a_stats.frame_history[g_nv2a_stats.frame_ptr] =
|
||||
g_nv2a_stats.frame_working;
|
||||
g_nv2a_stats.frame_ptr =
|
||||
(g_nv2a_stats.frame_ptr + 1) % NV2A_PROF_NUM_FRAMES;
|
||||
g_nv2a_stats.frame_count++;
|
||||
memset(&g_nv2a_stats.frame_working, 0, sizeof(g_nv2a_stats.frame_working));
|
||||
}
|
||||
|
||||
const char *nv2a_profile_get_counter_name(unsigned int cnt)
|
||||
{
|
||||
const char *default_names[NV2A_PROF__COUNT] = {
|
||||
#define _X(x) stringify(x),
|
||||
NV2A_PROF_COUNTERS_XMAC
|
||||
#undef _X
|
||||
};
|
||||
|
||||
assert(cnt < NV2A_PROF__COUNT);
|
||||
return default_names[cnt] + 10; /* 'NV2A_PROF_' */
|
||||
}
|
||||
|
||||
int nv2a_profile_get_counter_value(unsigned int cnt)
|
||||
{
|
||||
assert(cnt < NV2A_PROF__COUNT);
|
||||
unsigned int idx = (g_nv2a_stats.frame_ptr + NV2A_PROF_NUM_FRAMES - 1) %
|
||||
NV2A_PROF_NUM_FRAMES;
|
||||
return g_nv2a_stats.frame_history[idx].counters[cnt];
|
||||
}
|
|
@ -20,7 +20,8 @@
|
|||
#ifndef HW_NV2A_PSH_H
|
||||
#define HW_NV2A_PSH_H
|
||||
|
||||
#include "shaders_common.h"
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
enum PshAlphaFunc {
|
||||
ALPHA_FUNC_NEVER,
|
||||
|
@ -51,6 +52,8 @@ enum ConvolutionFilter {
|
|||
};
|
||||
|
||||
typedef struct PshState {
|
||||
bool vulkan;
|
||||
|
||||
/* fragment shader - register combiner stuff */
|
||||
uint32_t combiner_control;
|
||||
uint32_t shader_stage_program;
|
||||
|
@ -67,6 +70,7 @@ typedef struct PshState {
|
|||
bool compare_mode[4][4];
|
||||
bool alphakill[4];
|
||||
enum ConvolutionFilter conv_tex[4];
|
||||
bool tex_x8y24[4];
|
||||
|
||||
float border_logical_size[4][3];
|
||||
float border_inv_real_size[4][3];
|
||||
|
@ -82,6 +86,4 @@ typedef struct PshState {
|
|||
bool smooth_shading;
|
||||
} PshState;
|
||||
|
||||
MString *psh_translate(const PshState state);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,60 @@
|
|||
/*
|
||||
* QEMU Geforce NV2A implementation
|
||||
*
|
||||
* Copyright (c) 2012 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2018-2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "../nv2a_int.h"
|
||||
|
||||
uint32_t pgraph_rdi_read(PGRAPHState *pg, unsigned int select,
|
||||
unsigned int address)
|
||||
{
|
||||
uint32_t r = 0;
|
||||
switch(select) {
|
||||
case RDI_INDEX_VTX_CONSTANTS0:
|
||||
case RDI_INDEX_VTX_CONSTANTS1:
|
||||
assert((address / 4) < NV2A_VERTEXSHADER_CONSTANTS);
|
||||
r = pg->vsh_constants[address / 4][3 - address % 4];
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "nv2a: unknown rdi read select 0x%x address 0x%x\n",
|
||||
select, address);
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
void pgraph_rdi_write(PGRAPHState *pg, unsigned int select,
|
||||
unsigned int address, uint32_t val)
|
||||
{
|
||||
switch(select) {
|
||||
case RDI_INDEX_VTX_CONSTANTS0:
|
||||
case RDI_INDEX_VTX_CONSTANTS1:
|
||||
assert(false); /* Untested */
|
||||
assert((address / 4) < NV2A_VERTEXSHADER_CONSTANTS);
|
||||
pg->vsh_constants_dirty[address / 4] |=
|
||||
(val != pg->vsh_constants[address / 4][3 - address % 4]);
|
||||
pg->vsh_constants[address / 4][3 - address % 4] = val;
|
||||
break;
|
||||
default:
|
||||
NV2A_DPRINTF("unknown rdi write select 0x%x, address 0x%x, val 0x%08x\n",
|
||||
select, address, val);
|
||||
break;
|
||||
}
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* QEMU texture decompression routines
|
||||
* S3TC Texture Decompression
|
||||
*
|
||||
* Copyright (c) 2020 Wilhelm Kovatch
|
||||
*
|
||||
|
@ -25,13 +25,9 @@
|
|||
#include "qemu/osdep.h"
|
||||
#include "s3tc.h"
|
||||
|
||||
static inline void decode_bc1_colors(uint16_t c0,
|
||||
uint16_t c1,
|
||||
uint8_t r[4],
|
||||
uint8_t g[4],
|
||||
uint8_t b[4],
|
||||
uint8_t a[16],
|
||||
bool transparent)
|
||||
static void decode_bc1_colors(uint16_t c0, uint16_t c1, uint8_t r[4],
|
||||
uint8_t g[4], uint8_t b[4], uint8_t a[16],
|
||||
bool transparent)
|
||||
{
|
||||
r[0] = ((c0 & 0xF800) >> 8) * 0xFF / 0xF8,
|
||||
g[0] = ((c0 & 0x07E0) >> 3) * 0xFF / 0xFC,
|
||||
|
@ -66,15 +62,10 @@ static inline void decode_bc1_colors(uint16_t c0,
|
|||
}
|
||||
}
|
||||
|
||||
static inline void write_block_to_texture(uint8_t *converted_data,
|
||||
uint32_t indices,
|
||||
int i, int j, int width,
|
||||
int z_pos_factor,
|
||||
uint8_t r[4],
|
||||
uint8_t g[4],
|
||||
uint8_t b[4],
|
||||
uint8_t a[16],
|
||||
bool separate_alpha)
|
||||
static void write_block_to_texture(uint8_t *converted_data, uint32_t indices,
|
||||
int i, int j, int width, int z_pos_factor,
|
||||
uint8_t r[4], uint8_t g[4], uint8_t b[4],
|
||||
uint8_t a[16], bool separate_alpha)
|
||||
{
|
||||
int x0 = i * 4,
|
||||
y0 = j * 4;
|
||||
|
@ -89,16 +80,18 @@ static inline void write_block_to_texture(uint8_t *converted_data,
|
|||
int xy_index = y_index + x - x0;
|
||||
uint8_t index = (indices >> 2 * xy_index) & 0x03;
|
||||
uint8_t alpha_index = separate_alpha ? xy_index : index;
|
||||
uint32_t color = (r[index] << 24) | (g[index] << 16) | (b[index] << 8) | a[alpha_index];
|
||||
*(uint32_t*)(converted_data + (z_plus_y_pos_factor + x) * 4) = color;
|
||||
uint8_t *p = converted_data + (z_plus_y_pos_factor + x) * 4;
|
||||
*p++ = r[index];
|
||||
*p++ = g[index];
|
||||
*p++ = b[index];
|
||||
*p++ = a[alpha_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline void decompress_dxt1_block(const uint8_t block_data[8],
|
||||
uint8_t *converted_data,
|
||||
int i, int j, int width,
|
||||
int z_pos_factor)
|
||||
static void decompress_dxt1_block(const uint8_t block_data[8],
|
||||
uint8_t *converted_data, int i, int j,
|
||||
int width, int z_pos_factor)
|
||||
{
|
||||
uint16_t c0 = ((uint16_t*)block_data)[0],
|
||||
c1 = ((uint16_t*)block_data)[1];
|
||||
|
@ -111,10 +104,9 @@ static inline void decompress_dxt1_block(const uint8_t block_data[8],
|
|||
r, g, b, a, false);
|
||||
}
|
||||
|
||||
static inline void decompress_dxt3_block(const uint8_t block_data[16],
|
||||
uint8_t *converted_data,
|
||||
int i, int j, int width,
|
||||
int z_pos_factor)
|
||||
static void decompress_dxt3_block(const uint8_t block_data[16],
|
||||
uint8_t *converted_data, int i, int j,
|
||||
int width, int z_pos_factor)
|
||||
{
|
||||
uint16_t c0 = ((uint16_t*)block_data)[4],
|
||||
c1 = ((uint16_t*)block_data)[5];
|
||||
|
@ -132,10 +124,9 @@ static inline void decompress_dxt3_block(const uint8_t block_data[16],
|
|||
r, g, b, a, true);
|
||||
}
|
||||
|
||||
static inline void decompress_dxt5_block(const uint8_t block_data[16],
|
||||
uint8_t *converted_data,
|
||||
int i, int j, int width,
|
||||
int z_pos_factor)
|
||||
static void decompress_dxt5_block(const uint8_t block_data[16],
|
||||
uint8_t *converted_data, int i, int j,
|
||||
int width, int z_pos_factor)
|
||||
{
|
||||
uint16_t c0 = ((uint16_t*)block_data)[4],
|
||||
c1 = ((uint16_t*)block_data)[5];
|
||||
|
@ -173,11 +164,9 @@ static inline void decompress_dxt5_block(const uint8_t block_data[16],
|
|||
r, g, b, a, true);
|
||||
}
|
||||
|
||||
uint8_t *decompress_3d_texture_data(GLint color_format,
|
||||
const uint8_t *data,
|
||||
unsigned int width,
|
||||
unsigned int height,
|
||||
unsigned int depth)
|
||||
uint8_t *s3tc_decompress_3d(enum S3TC_DECOMPRESS_FORMAT color_format,
|
||||
const uint8_t *data, unsigned int width,
|
||||
unsigned int height, unsigned int depth)
|
||||
{
|
||||
assert((width > 0) && (width % 4 == 0));
|
||||
assert((height > 0) && (height % 4 == 0));
|
||||
|
@ -196,13 +185,13 @@ uint8_t *decompress_3d_texture_data(GLint color_format,
|
|||
int sub_block_index = block_index * block_depth + slice;
|
||||
int z_pos_factor = (k * block_depth + slice) * width * height;
|
||||
|
||||
if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
|
||||
if (color_format == S3TC_DECOMPRESS_FORMAT_DXT1) {
|
||||
decompress_dxt1_block(data + 8 * sub_block_index, converted_data,
|
||||
i, j, width, z_pos_factor);
|
||||
} else if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT3_EXT) {
|
||||
} else if (color_format == S3TC_DECOMPRESS_FORMAT_DXT3) {
|
||||
decompress_dxt3_block(data + 16 * sub_block_index, converted_data,
|
||||
i, j, width, z_pos_factor);
|
||||
} else if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT5_EXT) {
|
||||
} else if (color_format == S3TC_DECOMPRESS_FORMAT_DXT5) {
|
||||
decompress_dxt5_block(data + 16 * sub_block_index, converted_data,
|
||||
i, j, width, z_pos_factor);
|
||||
} else {
|
||||
|
@ -216,8 +205,9 @@ uint8_t *decompress_3d_texture_data(GLint color_format,
|
|||
return converted_data;
|
||||
}
|
||||
|
||||
uint8_t *decompress_2d_texture_data(GLint color_format, const uint8_t *data,
|
||||
unsigned int width, unsigned int height)
|
||||
uint8_t *s3tc_decompress_2d(enum S3TC_DECOMPRESS_FORMAT color_format,
|
||||
const uint8_t *data, unsigned int width,
|
||||
unsigned int height)
|
||||
{
|
||||
assert((width > 0) && (width % 4 == 0));
|
||||
assert((height > 0) && (height % 4 == 0));
|
||||
|
@ -226,13 +216,13 @@ uint8_t *decompress_2d_texture_data(GLint color_format, const uint8_t *data,
|
|||
for (int j = 0; j < num_blocks_y; j++) {
|
||||
for (int i = 0; i < num_blocks_x; i++) {
|
||||
int block_index = j * num_blocks_x + i;
|
||||
if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
|
||||
if (color_format == S3TC_DECOMPRESS_FORMAT_DXT1) {
|
||||
decompress_dxt1_block(data + 8 * block_index,
|
||||
converted_data, i, j, width, 0);
|
||||
} else if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT3_EXT) {
|
||||
} else if (color_format == S3TC_DECOMPRESS_FORMAT_DXT3) {
|
||||
decompress_dxt3_block(data + 16 * block_index,
|
||||
converted_data, i, j, width, 0);
|
||||
} else if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT5_EXT) {
|
||||
} else if (color_format == S3TC_DECOMPRESS_FORMAT_DXT5) {
|
||||
decompress_dxt5_block(data + 16 * block_index,
|
||||
converted_data, i, j, width, 0);
|
||||
} else {
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* QEMU texture decompression routines
|
||||
* S3TC Texture Decompression
|
||||
*
|
||||
* Copyright (c) 2020 Wilhelm Kovatch
|
||||
*
|
||||
|
@ -22,18 +22,23 @@
|
|||
* THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef S3TC_H
|
||||
#define S3TC_H
|
||||
#ifndef HW_XBOX_NV2A_PGRAPH_S3TC_H
|
||||
#define HW_XBOX_NV2A_PGRAPH_S3TC_H
|
||||
|
||||
#include "gl/gloffscreen.h"
|
||||
#include <stdint.h>
|
||||
|
||||
uint8_t *decompress_3d_texture_data(GLint color_format,
|
||||
const uint8_t *data,
|
||||
unsigned int width,
|
||||
unsigned int height,
|
||||
unsigned int depth);
|
||||
enum S3TC_DECOMPRESS_FORMAT {
|
||||
S3TC_DECOMPRESS_FORMAT_DXT1,
|
||||
S3TC_DECOMPRESS_FORMAT_DXT3,
|
||||
S3TC_DECOMPRESS_FORMAT_DXT5,
|
||||
};
|
||||
|
||||
uint8_t *decompress_2d_texture_data(GLint color_format, const uint8_t *data,
|
||||
unsigned int width, unsigned int height);
|
||||
uint8_t *s3tc_decompress_3d(enum S3TC_DECOMPRESS_FORMAT color_format,
|
||||
const uint8_t *data, unsigned int width,
|
||||
unsigned int height, unsigned int depth);
|
||||
|
||||
uint8_t *s3tc_decompress_2d(enum S3TC_DECOMPRESS_FORMAT color_format,
|
||||
const uint8_t *data, unsigned int width,
|
||||
unsigned int height);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,295 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH OpenGL Renderer
|
||||
*
|
||||
* Copyright (c) 2015 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2020-2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "hw/xbox/nv2a/debug.h"
|
||||
#include "texture.h"
|
||||
#include "pgraph.h"
|
||||
#include "shaders.h"
|
||||
|
||||
ShaderState pgraph_get_shader_state(PGRAPHState *pg)
|
||||
{
|
||||
bool vertex_program = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
|
||||
NV_PGRAPH_CSV0_D_MODE) == 2;
|
||||
|
||||
bool fixed_function = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
|
||||
NV_PGRAPH_CSV0_D_MODE) == 0;
|
||||
|
||||
int program_start = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_C),
|
||||
NV_PGRAPH_CSV0_C_CHEOPS_PROGRAM_START);
|
||||
|
||||
pg->program_data_dirty = false;
|
||||
|
||||
ShaderState state;
|
||||
|
||||
// We will hash it, so make sure any padding is zerod
|
||||
memset(&state, 0, sizeof(ShaderState));
|
||||
|
||||
state.vulkan = pg->renderer->type == CONFIG_DISPLAY_RENDERER_VULKAN;
|
||||
state.surface_scale_factor = pg->surface_scale_factor;
|
||||
|
||||
state.compressed_attrs = pg->compressed_attrs;
|
||||
state.uniform_attrs = pg->uniform_attrs;
|
||||
state.swizzle_attrs = pg->swizzle_attrs;
|
||||
|
||||
/* register combiner stuff */
|
||||
state.psh.vulkan = state.vulkan;
|
||||
state.psh.window_clip_exclusive =
|
||||
pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & NV_PGRAPH_SETUPRASTER_WINDOWCLIPTYPE;
|
||||
state.psh.combiner_control = pgraph_reg_r(pg, NV_PGRAPH_COMBINECTL);
|
||||
state.psh.shader_stage_program = pgraph_reg_r(pg, NV_PGRAPH_SHADERPROG);
|
||||
state.psh.other_stage_input = pgraph_reg_r(pg, NV_PGRAPH_SHADERCTL);
|
||||
state.psh.final_inputs_0 = pgraph_reg_r(pg, NV_PGRAPH_COMBINESPECFOG0);
|
||||
state.psh.final_inputs_1 = pgraph_reg_r(pg, NV_PGRAPH_COMBINESPECFOG1);
|
||||
|
||||
state.psh.alpha_test =
|
||||
pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & NV_PGRAPH_CONTROL_0_ALPHATESTENABLE;
|
||||
state.psh.alpha_func = (enum PshAlphaFunc)GET_MASK(
|
||||
pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0), NV_PGRAPH_CONTROL_0_ALPHAFUNC);
|
||||
|
||||
state.psh.point_sprite = pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
|
||||
NV_PGRAPH_SETUPRASTER_POINTSMOOTHENABLE;
|
||||
|
||||
state.psh.shadow_depth_func = (enum PshShadowDepthFunc)GET_MASK(
|
||||
pgraph_reg_r(pg, NV_PGRAPH_SHADOWCTL), NV_PGRAPH_SHADOWCTL_SHADOW_ZFUNC);
|
||||
|
||||
state.fixed_function = fixed_function;
|
||||
|
||||
/* fixed function stuff */
|
||||
if (fixed_function) {
|
||||
state.skinning = (enum VshSkinning)GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
|
||||
NV_PGRAPH_CSV0_D_SKIN);
|
||||
state.lighting =
|
||||
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_LIGHTING);
|
||||
state.normalization =
|
||||
pgraph_reg_r(pg, NV_PGRAPH_CSV0_C) & NV_PGRAPH_CSV0_C_NORMALIZATION_ENABLE;
|
||||
|
||||
/* color material */
|
||||
state.emission_src = (enum MaterialColorSource)GET_MASK(
|
||||
pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_EMISSION);
|
||||
state.ambient_src = (enum MaterialColorSource)GET_MASK(
|
||||
pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_AMBIENT);
|
||||
state.diffuse_src = (enum MaterialColorSource)GET_MASK(
|
||||
pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_DIFFUSE);
|
||||
state.specular_src = (enum MaterialColorSource)GET_MASK(
|
||||
pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_SPECULAR);
|
||||
}
|
||||
|
||||
/* vertex program stuff */
|
||||
state.vertex_program = vertex_program,
|
||||
state.z_perspective = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) &
|
||||
NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE;
|
||||
|
||||
state.point_params_enable = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
|
||||
NV_PGRAPH_CSV0_D_POINTPARAMSENABLE);
|
||||
state.point_size =
|
||||
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_POINTSIZE), NV097_SET_POINT_SIZE_V) / 8.0f;
|
||||
if (state.point_params_enable) {
|
||||
for (int i = 0; i < 8; i++) {
|
||||
state.point_params[i] = pg->point_params[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* geometry shader stuff */
|
||||
state.primitive_mode = (enum ShaderPrimitiveMode)pg->primitive_mode;
|
||||
state.polygon_front_mode = (enum ShaderPolygonMode)GET_MASK(
|
||||
pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER), NV_PGRAPH_SETUPRASTER_FRONTFACEMODE);
|
||||
state.polygon_back_mode = (enum ShaderPolygonMode)GET_MASK(
|
||||
pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER), NV_PGRAPH_SETUPRASTER_BACKFACEMODE);
|
||||
|
||||
state.smooth_shading = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3),
|
||||
NV_PGRAPH_CONTROL_3_SHADEMODE) ==
|
||||
NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH;
|
||||
state.psh.smooth_shading = state.smooth_shading;
|
||||
|
||||
state.program_length = 0;
|
||||
|
||||
if (vertex_program) {
|
||||
// copy in vertex program tokens
|
||||
for (int i = program_start; i < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH;
|
||||
i++) {
|
||||
uint32_t *cur_token = (uint32_t *)&pg->program_data[i];
|
||||
memcpy(&state.program_data[state.program_length], cur_token,
|
||||
VSH_TOKEN_SIZE * sizeof(uint32_t));
|
||||
state.program_length++;
|
||||
|
||||
if (vsh_get_field(cur_token, FLD_FINAL)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Texgen */
|
||||
for (int i = 0; i < 4; i++) {
|
||||
unsigned int reg = (i < 2) ? NV_PGRAPH_CSV1_A : NV_PGRAPH_CSV1_B;
|
||||
for (int j = 0; j < 4; j++) {
|
||||
unsigned int masks[] = {
|
||||
(i % 2) ? NV_PGRAPH_CSV1_A_T1_S : NV_PGRAPH_CSV1_A_T0_S,
|
||||
(i % 2) ? NV_PGRAPH_CSV1_A_T1_T : NV_PGRAPH_CSV1_A_T0_T,
|
||||
(i % 2) ? NV_PGRAPH_CSV1_A_T1_R : NV_PGRAPH_CSV1_A_T0_R,
|
||||
(i % 2) ? NV_PGRAPH_CSV1_A_T1_Q : NV_PGRAPH_CSV1_A_T0_Q
|
||||
};
|
||||
state.texgen[i][j] =
|
||||
(enum VshTexgen)GET_MASK(pgraph_reg_r(pg, reg), masks[j]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Fog */
|
||||
state.fog_enable =
|
||||
pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3) & NV_PGRAPH_CONTROL_3_FOGENABLE;
|
||||
if (state.fog_enable) {
|
||||
/*FIXME: Use CSV0_D? */
|
||||
state.fog_mode = (enum VshFogMode)GET_MASK(
|
||||
pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3), NV_PGRAPH_CONTROL_3_FOG_MODE);
|
||||
state.foggen = (enum VshFoggen)GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
|
||||
NV_PGRAPH_CSV0_D_FOGGENMODE);
|
||||
} else {
|
||||
/* FIXME: Do we still pass the fogmode? */
|
||||
state.fog_mode = (enum VshFogMode)0;
|
||||
state.foggen = (enum VshFoggen)0;
|
||||
}
|
||||
|
||||
/* Texture matrices */
|
||||
for (int i = 0; i < 4; i++) {
|
||||
state.texture_matrix_enable[i] = pg->texture_matrix_enable[i];
|
||||
}
|
||||
|
||||
/* Lighting */
|
||||
if (state.lighting) {
|
||||
for (int i = 0; i < NV2A_MAX_LIGHTS; i++) {
|
||||
state.light[i] = (enum VshLight)GET_MASK(
|
||||
pgraph_reg_r(pg, NV_PGRAPH_CSV0_D), NV_PGRAPH_CSV0_D_LIGHT0 << (i * 2));
|
||||
}
|
||||
}
|
||||
|
||||
/* Copy content of enabled combiner stages */
|
||||
int num_stages = pgraph_reg_r(pg, NV_PGRAPH_COMBINECTL) & 0xFF;
|
||||
for (int i = 0; i < num_stages; i++) {
|
||||
state.psh.rgb_inputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINECOLORI0 + i * 4);
|
||||
state.psh.rgb_outputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINECOLORO0 + i * 4);
|
||||
state.psh.alpha_inputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEALPHAI0 + i * 4);
|
||||
state.psh.alpha_outputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEALPHAO0 + i * 4);
|
||||
// constant_0[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR0 + i * 4);
|
||||
// constant_1[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR1 + i * 4);
|
||||
}
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int j = 0; j < 4; j++) {
|
||||
state.psh.compare_mode[i][j] =
|
||||
(pgraph_reg_r(pg, NV_PGRAPH_SHADERCLIPMODE) >> (4 * i + j)) & 1;
|
||||
}
|
||||
|
||||
uint32_t ctl_0 = pgraph_reg_r(pg, NV_PGRAPH_TEXCTL0_0 + i * 4);
|
||||
bool enabled = pgraph_is_texture_stage_active(pg, i) &&
|
||||
(ctl_0 & NV_PGRAPH_TEXCTL0_0_ENABLE);
|
||||
if (!enabled) {
|
||||
continue;
|
||||
}
|
||||
|
||||
state.psh.alphakill[i] = ctl_0 & NV_PGRAPH_TEXCTL0_0_ALPHAKILLEN;
|
||||
|
||||
uint32_t tex_fmt = pgraph_reg_r(pg, NV_PGRAPH_TEXFMT0 + i * 4);
|
||||
unsigned int color_format = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_COLOR);
|
||||
BasicColorFormatInfo f = kelvin_color_format_info_map[color_format];
|
||||
state.psh.rect_tex[i] = f.linear;
|
||||
state.psh.tex_x8y24[i] = color_format == NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED ||
|
||||
color_format == NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT;
|
||||
|
||||
uint32_t border_source =
|
||||
GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BORDER_SOURCE);
|
||||
bool cubemap = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE);
|
||||
state.psh.border_logical_size[i][0] = 0.0f;
|
||||
state.psh.border_logical_size[i][1] = 0.0f;
|
||||
state.psh.border_logical_size[i][2] = 0.0f;
|
||||
if (border_source != NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR) {
|
||||
if (!f.linear && !cubemap) {
|
||||
// The actual texture will be (at least) double the reported
|
||||
// size and shifted by a 4 texel border but texture coordinates
|
||||
// will still be relative to the reported size.
|
||||
unsigned int reported_width =
|
||||
1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_U);
|
||||
unsigned int reported_height =
|
||||
1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_V);
|
||||
unsigned int reported_depth =
|
||||
1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_P);
|
||||
|
||||
state.psh.border_logical_size[i][0] = reported_width;
|
||||
state.psh.border_logical_size[i][1] = reported_height;
|
||||
state.psh.border_logical_size[i][2] = reported_depth;
|
||||
|
||||
if (reported_width < 8) {
|
||||
state.psh.border_inv_real_size[i][0] = 0.0625f;
|
||||
} else {
|
||||
state.psh.border_inv_real_size[i][0] =
|
||||
1.0f / (reported_width * 2.0f);
|
||||
}
|
||||
if (reported_height < 8) {
|
||||
state.psh.border_inv_real_size[i][1] = 0.0625f;
|
||||
} else {
|
||||
state.psh.border_inv_real_size[i][1] =
|
||||
1.0f / (reported_height * 2.0f);
|
||||
}
|
||||
if (reported_depth < 8) {
|
||||
state.psh.border_inv_real_size[i][2] = 0.0625f;
|
||||
} else {
|
||||
state.psh.border_inv_real_size[i][2] =
|
||||
1.0f / (reported_depth * 2.0f);
|
||||
}
|
||||
} else {
|
||||
NV2A_UNIMPLEMENTED(
|
||||
"Border source texture with linear %d cubemap %d", f.linear,
|
||||
cubemap);
|
||||
}
|
||||
}
|
||||
|
||||
/* Keep track of whether texture data has been loaded as signed
|
||||
* normalized integers or not. This dictates whether or not we will need
|
||||
* to re-map in fragment shader for certain texture modes (e.g.
|
||||
* bumpenvmap).
|
||||
*
|
||||
* FIXME: When signed texture data is loaded as unsigned and remapped in
|
||||
* fragment shader, there may be interpolation artifacts. Fix this to
|
||||
* support signed textures more appropriately.
|
||||
*/
|
||||
#if 0 // FIXME
|
||||
state.psh.snorm_tex[i] = (f.gl_internal_format == GL_RGB8_SNORM)
|
||||
|| (f.gl_internal_format == GL_RG8_SNORM);
|
||||
#endif
|
||||
state.psh.shadow_map[i] = f.depth;
|
||||
|
||||
uint32_t filter = pgraph_reg_r(pg, NV_PGRAPH_TEXFILTER0 + i * 4);
|
||||
unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN);
|
||||
enum ConvolutionFilter kernel = CONVOLUTION_FILTER_DISABLED;
|
||||
/* FIXME: We do not distinguish between min and mag when
|
||||
* performing convolution. Just use it if specified for min (common AA
|
||||
* case).
|
||||
*/
|
||||
if (min_filter == NV_PGRAPH_TEXFILTER0_MIN_CONVOLUTION_2D_LOD0) {
|
||||
int k = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL);
|
||||
assert(k == NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL_QUINCUNX ||
|
||||
k == NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL_GAUSSIAN_3);
|
||||
kernel = (enum ConvolutionFilter)k;
|
||||
}
|
||||
|
||||
state.psh.conv_tex[i] = kernel;
|
||||
}
|
||||
|
||||
return state;
|
||||
}
|
|
@ -18,17 +18,14 @@
|
|||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef HW_NV2A_SHADERS_H
|
||||
#define HW_NV2A_SHADERS_H
|
||||
#ifndef HW_XBOX_NV2A_PGRAPH_SHADERS_H
|
||||
#define HW_XBOX_NV2A_PGRAPH_SHADERS_H
|
||||
|
||||
#include "qemu/thread.h"
|
||||
#include "qapi/qmp/qstring.h"
|
||||
#include "gl/gloffscreen.h"
|
||||
#include <stdint.h>
|
||||
#include "hw/xbox/nv2a/nv2a_regs.h"
|
||||
|
||||
#include "nv2a_regs.h"
|
||||
#include "vsh.h"
|
||||
#include "psh.h"
|
||||
#include "lru.h"
|
||||
|
||||
enum ShaderPrimitiveMode {
|
||||
PRIM_TYPE_INVALID,
|
||||
|
@ -57,10 +54,13 @@ enum MaterialColorSource {
|
|||
};
|
||||
|
||||
typedef struct ShaderState {
|
||||
bool vulkan;
|
||||
unsigned int surface_scale_factor;
|
||||
|
||||
PshState psh;
|
||||
uint16_t compressed_attrs;
|
||||
uint16_t uniform_attrs;
|
||||
uint16_t swizzle_attrs;
|
||||
|
||||
bool texture_matrix_enable[4];
|
||||
enum VshTexgen texgen[4][4];
|
||||
|
@ -101,61 +101,8 @@ typedef struct ShaderState {
|
|||
bool smooth_shading;
|
||||
} ShaderState;
|
||||
|
||||
typedef struct ShaderBinding {
|
||||
GLuint gl_program;
|
||||
GLenum gl_primitive_mode;
|
||||
|
||||
GLint psh_constant_loc[9][2];
|
||||
GLint alpha_ref_loc;
|
||||
|
||||
GLint bump_mat_loc[NV2A_MAX_TEXTURES];
|
||||
GLint bump_scale_loc[NV2A_MAX_TEXTURES];
|
||||
GLint bump_offset_loc[NV2A_MAX_TEXTURES];
|
||||
GLint tex_scale_loc[NV2A_MAX_TEXTURES];
|
||||
|
||||
GLint surface_size_loc;
|
||||
GLint clip_range_loc;
|
||||
|
||||
GLint vsh_constant_loc[NV2A_VERTEXSHADER_CONSTANTS];
|
||||
uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];
|
||||
|
||||
GLint inv_viewport_loc;
|
||||
GLint ltctxa_loc[NV2A_LTCTXA_COUNT];
|
||||
GLint ltctxb_loc[NV2A_LTCTXB_COUNT];
|
||||
GLint ltc1_loc[NV2A_LTC1_COUNT];
|
||||
|
||||
GLint fog_color_loc;
|
||||
GLint fog_param_loc[2];
|
||||
GLint light_infinite_half_vector_loc[NV2A_MAX_LIGHTS];
|
||||
GLint light_infinite_direction_loc[NV2A_MAX_LIGHTS];
|
||||
GLint light_local_position_loc[NV2A_MAX_LIGHTS];
|
||||
GLint light_local_attenuation_loc[NV2A_MAX_LIGHTS];
|
||||
|
||||
GLint clip_region_loc[8];
|
||||
|
||||
GLint material_alpha_loc;
|
||||
} ShaderBinding;
|
||||
|
||||
typedef struct ShaderLruNode {
|
||||
LruNode node;
|
||||
bool cached;
|
||||
void *program;
|
||||
size_t program_size;
|
||||
GLenum program_format;
|
||||
ShaderState state;
|
||||
ShaderBinding *binding;
|
||||
QemuThread *save_thread;
|
||||
} ShaderLruNode;
|
||||
|
||||
typedef struct PGRAPHState PGRAPHState;
|
||||
|
||||
GLenum get_gl_primitive_mode(enum ShaderPolygonMode polygon_mode, enum ShaderPrimitiveMode primitive_mode);
|
||||
void update_shader_constant_locations(ShaderBinding *binding, const ShaderState *state);
|
||||
ShaderBinding *generate_shaders(const ShaderState *state);
|
||||
|
||||
void shader_cache_init(PGRAPHState *pg);
|
||||
void shader_write_cache_reload_list(PGRAPHState *pg);
|
||||
bool shader_load_from_memory(ShaderLruNode *snode);
|
||||
void shader_cache_to_disk(ShaderLruNode *snode);
|
||||
ShaderState pgraph_get_shader_state(PGRAPHState *pg);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,35 @@
|
|||
/*
|
||||
* QEMU Geforce NV2A implementation
|
||||
*
|
||||
* Copyright (c) 2012 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2018-2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef HW_XBOX_NV2A_PGRAPH_SURFACE_H
|
||||
#define HW_XBOX_NV2A_PGRAPH_SURFACE_H
|
||||
|
||||
typedef struct SurfaceShape {
|
||||
unsigned int z_format;
|
||||
unsigned int color_format;
|
||||
unsigned int zeta_format;
|
||||
unsigned int log_width, log_height;
|
||||
unsigned int clip_x, clip_y;
|
||||
unsigned int clip_width, clip_height;
|
||||
unsigned int anti_aliasing;
|
||||
} SurfaceShape;
|
||||
|
||||
#endif
|
|
@ -18,8 +18,10 @@
|
|||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef HW_XBOX_SWIZZLE_H
|
||||
#define HW_XBOX_SWIZZLE_H
|
||||
#ifndef HW_XBOX_NV2A_PGRAPH_SWIZZLE_H
|
||||
#define HW_XBOX_NV2A_PGRAPH_SWIZZLE_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
void swizzle_box(
|
||||
const uint8_t *src_buf,
|
|
@ -0,0 +1,405 @@
|
|||
/*
|
||||
* QEMU Geforce NV2A implementation
|
||||
*
|
||||
* Copyright (c) 2012 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2018-2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "hw/xbox/nv2a/nv2a_int.h"
|
||||
#include "texture.h"
|
||||
#include "util.h"
|
||||
|
||||
const BasicColorFormatInfo kelvin_color_format_info_map[66] = {
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8] = { 1, false },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8] = { 1, false },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5] = { 2, false },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5] = { 2, false },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4] = { 2, false },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5] = { 2, false },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8] = { 4, false },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8] = { 4, false },
|
||||
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8] = { 1, false },
|
||||
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5] = { 4, false },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8] = { 4, false },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8] = { 4, false },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5] = { 2, true },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5] = { 2, true },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8] = { 4, true },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8] = { 1, true },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8] = { 2, true },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8] = { 1, false },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8] = { 2, false },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8] = { 1, true },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5] = { 2, true },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4] = { 2, true },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8] = { 4, true },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8] = { 1, true },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8Y8] = { 2, true },
|
||||
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5] = { 2, false },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8] = { 2, false },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8] = { 2, false },
|
||||
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8] = { 2, true },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8] = { 2, true },
|
||||
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_DEPTH_Y16_FIXED] = { 2, false, true },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED] = { 4, true,
|
||||
true },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT] = { 4, true,
|
||||
true },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FIXED] = { 2, true,
|
||||
true },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FLOAT] = { 2, true,
|
||||
true },
|
||||
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16] = { 2, true },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8] = { 4, false },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8] = { 4, false },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8] = { 4, false },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8] = { 4, true },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8] = { 4, true },
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8] = { 4, true },
|
||||
};
|
||||
|
||||
hwaddr pgraph_get_texture_phys_addr(PGRAPHState *pg, int texture_idx)
|
||||
{
|
||||
NV2AState *d = container_of(pg, NV2AState, pgraph);
|
||||
int i = texture_idx;
|
||||
|
||||
uint32_t fmt = pgraph_reg_r(pg, NV_PGRAPH_TEXFMT0 + i*4);
|
||||
unsigned int dma_select =
|
||||
GET_MASK(fmt, NV_PGRAPH_TEXFMT0_CONTEXT_DMA);
|
||||
|
||||
hwaddr offset = pgraph_reg_r(pg, NV_PGRAPH_TEXOFFSET0 + i*4);
|
||||
|
||||
hwaddr dma_len;
|
||||
uint8_t *texture_data;
|
||||
if (dma_select) {
|
||||
texture_data = (uint8_t*)nv_dma_map(d, pg->dma_b, &dma_len);
|
||||
} else {
|
||||
texture_data = (uint8_t*)nv_dma_map(d, pg->dma_a, &dma_len);
|
||||
}
|
||||
assert(offset < dma_len);
|
||||
texture_data += offset;
|
||||
|
||||
return texture_data - d->vram_ptr;
|
||||
}
|
||||
|
||||
hwaddr pgraph_get_texture_palette_phys_addr_length(PGRAPHState *pg, int texture_idx, size_t *length)
|
||||
{
|
||||
NV2AState *d = container_of(pg, NV2AState, pgraph);
|
||||
int i = texture_idx;
|
||||
|
||||
uint32_t palette = pgraph_reg_r(pg, NV_PGRAPH_TEXPALETTE0 + i*4);
|
||||
bool palette_dma_select =
|
||||
GET_MASK(palette, NV_PGRAPH_TEXPALETTE0_CONTEXT_DMA);
|
||||
unsigned int palette_length_index =
|
||||
GET_MASK(palette, NV_PGRAPH_TEXPALETTE0_LENGTH);
|
||||
unsigned int palette_offset =
|
||||
palette & NV_PGRAPH_TEXPALETTE0_OFFSET;
|
||||
|
||||
unsigned int palette_length = 0;
|
||||
switch (palette_length_index) {
|
||||
case NV_PGRAPH_TEXPALETTE0_LENGTH_256: palette_length = 256; break;
|
||||
case NV_PGRAPH_TEXPALETTE0_LENGTH_128: palette_length = 128; break;
|
||||
case NV_PGRAPH_TEXPALETTE0_LENGTH_64: palette_length = 64; break;
|
||||
case NV_PGRAPH_TEXPALETTE0_LENGTH_32: palette_length = 32; break;
|
||||
default: assert(false); break;
|
||||
}
|
||||
if (length) {
|
||||
*length = palette_length;
|
||||
}
|
||||
|
||||
hwaddr palette_dma_len;
|
||||
uint8_t *palette_data;
|
||||
if (palette_dma_select) {
|
||||
palette_data = (uint8_t*)nv_dma_map(d, pg->dma_b, &palette_dma_len);
|
||||
} else {
|
||||
palette_data = (uint8_t*)nv_dma_map(d, pg->dma_a, &palette_dma_len);
|
||||
}
|
||||
assert(palette_offset < palette_dma_len);
|
||||
palette_data += palette_offset;
|
||||
|
||||
return palette_data - d->vram_ptr;
|
||||
}
|
||||
|
||||
size_t pgraph_get_texture_length(PGRAPHState *pg, TextureShape *shape)
|
||||
{
|
||||
BasicColorFormatInfo f = kelvin_color_format_info_map[shape->color_format];
|
||||
size_t length = 0;
|
||||
|
||||
if (f.linear) {
|
||||
assert(shape->cubemap == false);
|
||||
assert(shape->dimensionality == 2);
|
||||
length = shape->height * shape->pitch;
|
||||
} else {
|
||||
if (shape->dimensionality >= 2) {
|
||||
unsigned int w = shape->width, h = shape->height;
|
||||
int level;
|
||||
if (!pgraph_is_texture_format_compressed(pg, shape->color_format)) {
|
||||
for (level = 0; level < shape->levels; level++) {
|
||||
w = MAX(w, 1);
|
||||
h = MAX(h, 1);
|
||||
length += w * h * f.bytes_per_pixel;
|
||||
w /= 2;
|
||||
h /= 2;
|
||||
}
|
||||
} else {
|
||||
/* Compressed textures are a bit different */
|
||||
unsigned int block_size =
|
||||
shape->color_format ==
|
||||
NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5 ?
|
||||
8 : 16;
|
||||
for (level = 0; level < shape->levels; level++) {
|
||||
w = MAX(w, 1);
|
||||
h = MAX(h, 1);
|
||||
unsigned int phys_w = (w + 3) & ~3,
|
||||
phys_h = (h + 3) & ~3;
|
||||
length += phys_w/4 * phys_h/4 * block_size;
|
||||
w /= 2;
|
||||
h /= 2;
|
||||
}
|
||||
}
|
||||
if (shape->cubemap) {
|
||||
assert(shape->dimensionality == 2);
|
||||
length = (length + NV2A_CUBEMAP_FACE_ALIGNMENT - 1) & ~(NV2A_CUBEMAP_FACE_ALIGNMENT - 1);
|
||||
length *= 6;
|
||||
}
|
||||
if (shape->dimensionality >= 3) {
|
||||
length *= shape->depth;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
TextureShape pgraph_get_texture_shape(PGRAPHState *pg, int texture_idx)
|
||||
{
|
||||
int i = texture_idx;
|
||||
|
||||
uint32_t ctl_0 = pgraph_reg_r(pg, NV_PGRAPH_TEXCTL0_0 + i*4);
|
||||
uint32_t ctl_1 = pgraph_reg_r(pg, NV_PGRAPH_TEXCTL1_0 + i*4);
|
||||
uint32_t fmt = pgraph_reg_r(pg, NV_PGRAPH_TEXFMT0 + i*4);
|
||||
|
||||
#if DEBUG_NV2A
|
||||
uint32_t filter = pgraph_reg_r(pg, NV_PGRAPH_TEXFILTER0 + i*4);
|
||||
uint32_t address = pgraph_reg_r(pg, NV_PGRAPH_TEXADDRESS0 + i*4);
|
||||
#endif
|
||||
|
||||
unsigned int min_mipmap_level =
|
||||
GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_MIN_LOD_CLAMP);
|
||||
unsigned int max_mipmap_level =
|
||||
GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_MAX_LOD_CLAMP);
|
||||
|
||||
unsigned int pitch =
|
||||
GET_MASK(ctl_1, NV_PGRAPH_TEXCTL1_0_IMAGE_PITCH);
|
||||
|
||||
bool cubemap =
|
||||
GET_MASK(fmt, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE);
|
||||
unsigned int dimensionality =
|
||||
GET_MASK(fmt, NV_PGRAPH_TEXFMT0_DIMENSIONALITY);
|
||||
|
||||
int tex_mode = (pgraph_reg_r(pg, NV_PGRAPH_SHADERPROG) >> (texture_idx * 5)) & 0x1F;
|
||||
if (tex_mode == 0x02) {
|
||||
assert(pgraph_is_texture_enabled(pg, texture_idx));
|
||||
// assert(state.dimensionality == 3);
|
||||
|
||||
// OVERRIDE
|
||||
// dimensionality = 3;
|
||||
}
|
||||
|
||||
unsigned int color_format = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_COLOR);
|
||||
unsigned int levels = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_MIPMAP_LEVELS);
|
||||
unsigned int log_width = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_U);
|
||||
unsigned int log_height = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_V);
|
||||
unsigned int log_depth = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_P);
|
||||
|
||||
unsigned int rect_width =
|
||||
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_TEXIMAGERECT0 + i*4),
|
||||
NV_PGRAPH_TEXIMAGERECT0_WIDTH);
|
||||
unsigned int rect_height =
|
||||
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_TEXIMAGERECT0 + i*4),
|
||||
NV_PGRAPH_TEXIMAGERECT0_HEIGHT);
|
||||
#ifdef DEBUG_NV2A
|
||||
unsigned int lod_bias =
|
||||
GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIPMAP_LOD_BIAS);
|
||||
#endif
|
||||
unsigned int border_source = GET_MASK(fmt,
|
||||
NV_PGRAPH_TEXFMT0_BORDER_SOURCE);
|
||||
|
||||
NV2A_DPRINTF(" texture %d is format 0x%x, "
|
||||
"off 0x%" HWADDR_PRIx " (r %d, %d or %d, %d, %d; %d%s),"
|
||||
" filter %x %x, levels %d-%d %d bias %d\n",
|
||||
i, color_format, address,
|
||||
rect_width, rect_height,
|
||||
1 << log_width, 1 << log_height, 1 << log_depth,
|
||||
pitch,
|
||||
cubemap ? "; cubemap" : "",
|
||||
GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN),
|
||||
GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG),
|
||||
min_mipmap_level, max_mipmap_level, levels,
|
||||
lod_bias);
|
||||
|
||||
assert(color_format < ARRAY_SIZE(kelvin_color_format_info_map));
|
||||
BasicColorFormatInfo f = kelvin_color_format_info_map[color_format];
|
||||
if (f.bytes_per_pixel == 0) {
|
||||
fprintf(stderr, "nv2a: unimplemented texture color format 0x%x\n",
|
||||
color_format);
|
||||
abort();
|
||||
}
|
||||
|
||||
unsigned int width, height, depth;
|
||||
if (f.linear) {
|
||||
assert(dimensionality == 2);
|
||||
width = rect_width;
|
||||
height = rect_height;
|
||||
depth = 1;
|
||||
} else {
|
||||
width = 1 << log_width;
|
||||
height = 1 << log_height;
|
||||
depth = 1 << log_depth;
|
||||
pitch = 0;
|
||||
|
||||
levels = MIN(levels, max_mipmap_level + 1);
|
||||
|
||||
/* Discard mipmap levels that would be smaller than 1x1.
|
||||
* FIXME: Is this actually needed?
|
||||
*
|
||||
* >> Level 0: 32 x 4
|
||||
* Level 1: 16 x 2
|
||||
* Level 2: 8 x 1
|
||||
* Level 3: 4 x 1
|
||||
* Level 4: 2 x 1
|
||||
* Level 5: 1 x 1
|
||||
*/
|
||||
levels = MIN(levels, MAX(log_width, log_height) + 1);
|
||||
assert(levels > 0);
|
||||
|
||||
if (dimensionality == 3) {
|
||||
/* FIXME: What about 3D mipmaps? */
|
||||
if (log_width < 2 || log_height < 2) {
|
||||
/* Base level is smaller than 4x4... */
|
||||
levels = 1;
|
||||
} else {
|
||||
levels = MIN(levels, MIN(log_width, log_height) - 1);
|
||||
}
|
||||
}
|
||||
min_mipmap_level = MIN(levels-1, min_mipmap_level);
|
||||
max_mipmap_level = MIN(levels-1, max_mipmap_level);
|
||||
}
|
||||
|
||||
TextureShape shape;
|
||||
|
||||
// We will hash it, so make sure any padding is zero
|
||||
memset(&shape, 0, sizeof(shape));
|
||||
|
||||
shape.cubemap = cubemap;
|
||||
shape.dimensionality = dimensionality;
|
||||
shape.color_format = color_format;
|
||||
shape.levels = levels;
|
||||
shape.width = width;
|
||||
shape.height = height;
|
||||
shape.depth = depth;
|
||||
shape.min_mipmap_level = min_mipmap_level;
|
||||
shape.max_mipmap_level = max_mipmap_level;
|
||||
shape.pitch = pitch;
|
||||
shape.border = border_source != NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR;
|
||||
return shape;
|
||||
}
|
||||
|
||||
uint8_t *pgraph_convert_texture_data(const TextureShape s, const uint8_t *data,
|
||||
const uint8_t *palette_data,
|
||||
unsigned int width, unsigned int height,
|
||||
unsigned int depth, unsigned int row_pitch,
|
||||
unsigned int slice_pitch,
|
||||
size_t *converted_size)
|
||||
{
|
||||
size_t size = 0;
|
||||
uint8_t *converted_data;
|
||||
|
||||
if (s.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8) {
|
||||
size = width * height * depth * 4;
|
||||
converted_data = g_malloc(size);
|
||||
const uint8_t *src = data;
|
||||
uint32_t *dst = (uint32_t *)converted_data;
|
||||
for (int z = 0; z < depth; z++) {
|
||||
for (int y = 0; y < height; y++) {
|
||||
for (int x = 0; x < width; x++) {
|
||||
uint8_t index = src[y * row_pitch + x];
|
||||
uint32_t color = *(uint32_t *)(palette_data + index * 4);
|
||||
*dst++ = color;
|
||||
}
|
||||
}
|
||||
src += slice_pitch;
|
||||
}
|
||||
} else if (s.color_format ==
|
||||
NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8 ||
|
||||
s.color_format ==
|
||||
NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8) {
|
||||
// TODO: Investigate whether a non-1 depth is possible.
|
||||
// Generally the hardware asserts when attempting to use volumetric
|
||||
// textures in linear formats.
|
||||
assert(depth == 1); /* FIXME */
|
||||
// FIXME: only valid if control0 register allows for colorspace
|
||||
// conversion
|
||||
size = width * height * 4;
|
||||
converted_data = g_malloc(size);
|
||||
uint8_t *pixel = converted_data;
|
||||
for (int y = 0; y < height; y++) {
|
||||
const uint8_t *line = &data[y * row_pitch * depth];
|
||||
for (int x = 0; x < width; x++, pixel += 4) {
|
||||
if (s.color_format ==
|
||||
NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8) {
|
||||
convert_yuy2_to_rgb(line, x, &pixel[0], &pixel[1],
|
||||
&pixel[2]);
|
||||
} else {
|
||||
convert_uyvy_to_rgb(line, x, &pixel[0], &pixel[1],
|
||||
&pixel[2]);
|
||||
}
|
||||
pixel[3] = 255;
|
||||
}
|
||||
}
|
||||
} else if (s.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5) {
|
||||
assert(depth == 1); /* FIXME */
|
||||
size = width * height * 3;
|
||||
converted_data = g_malloc(size);
|
||||
for (int y = 0; y < height; y++) {
|
||||
for (int x = 0; x < width; x++) {
|
||||
uint16_t rgb655 = *(uint16_t *)(data + y * row_pitch + x * 2);
|
||||
int8_t *pixel = (int8_t *)&converted_data[(y * width + x) * 3];
|
||||
/* Maps 5 bit G and B signed value range to 8 bit
|
||||
* signed values. R is probably unsigned.
|
||||
*/
|
||||
rgb655 ^= (1 << 9) | (1 << 4);
|
||||
pixel[0] = ((rgb655 & 0xFC00) >> 10) * 0x7F / 0x3F;
|
||||
pixel[1] = ((rgb655 & 0x03E0) >> 5) * 0xFF / 0x1F - 0x80;
|
||||
pixel[2] = (rgb655 & 0x001F) * 0xFF / 0x1F - 0x80;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (converted_size) {
|
||||
*converted_size = size;
|
||||
}
|
||||
return converted_data;
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
* QEMU Geforce NV2A implementation
|
||||
*
|
||||
* Copyright (c) 2012 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2018-2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef HW_XBOX_NV2A_PGRAPH_TEXTURE_H
|
||||
#define HW_XBOX_NV2A_PGRAPH_TEXTURE_H
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "cpu.h"
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "hw/xbox/nv2a/nv2a_regs.h"
|
||||
|
||||
typedef struct PGRAPHState PGRAPHState;
|
||||
|
||||
typedef struct TextureShape {
|
||||
bool cubemap;
|
||||
unsigned int dimensionality;
|
||||
unsigned int color_format;
|
||||
unsigned int levels;
|
||||
unsigned int width, height, depth;
|
||||
bool border;
|
||||
|
||||
unsigned int min_mipmap_level, max_mipmap_level;
|
||||
unsigned int pitch;
|
||||
} TextureShape;
|
||||
|
||||
typedef struct BasicColorFormatInfo {
|
||||
unsigned int bytes_per_pixel;
|
||||
bool linear;
|
||||
bool depth;
|
||||
} BasicColorFormatInfo;
|
||||
|
||||
extern const BasicColorFormatInfo kelvin_color_format_info_map[66];
|
||||
|
||||
uint8_t *pgraph_convert_texture_data(const TextureShape s, const uint8_t *data,
|
||||
const uint8_t *palette_data,
|
||||
unsigned int width, unsigned int height,
|
||||
unsigned int depth, unsigned int row_pitch,
|
||||
unsigned int slice_pitch,
|
||||
size_t *converted_size);
|
||||
|
||||
hwaddr pgraph_get_texture_phys_addr(PGRAPHState *pg, int texture_idx);
|
||||
hwaddr pgraph_get_texture_palette_phys_addr_length(PGRAPHState *pg, int texture_idx, size_t *length);
|
||||
TextureShape pgraph_get_texture_shape(PGRAPHState *pg, int texture_idx);
|
||||
size_t pgraph_get_texture_length(PGRAPHState *pg, TextureShape *shape);
|
||||
|
||||
#endif
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
* Offscreen OpenGL abstraction layer -- SDL based
|
||||
*
|
||||
* Copyright (c) 2018-2021 Matt Borgerson
|
||||
* Copyright (c) 2018-2024 Matt Borgerson
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
|
@ -10,3 +10,9 @@ libnv2a_vsh_cpu = static_library('nv2a_vsh_cpu',
|
|||
include_directories: ['.', 'nv2a_vsh_cpu/src'])
|
||||
nv2a_vsh_cpu = declare_dependency(link_with: libnv2a_vsh_cpu,
|
||||
include_directories: ['nv2a_vsh_cpu/src'])
|
||||
|
||||
libgloffscreen = static_library('libgloffscreen',
|
||||
sources: files('gloffscreen/common.c', 'gloffscreen/sdl.c'),
|
||||
dependencies: sdl)
|
||||
gloffscreen = declare_dependency(link_with: libgloffscreen,
|
||||
include_directories: ['gloffscreen'])
|
|
@ -0,0 +1,86 @@
|
|||
/*
|
||||
* QEMU Geforce NV2A implementation
|
||||
*
|
||||
* Copyright (c) 2012 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2018-2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef HW_XBOX_NV2A_PGRAPH_UTIL_H
|
||||
#define HW_XBOX_NV2A_PGRAPH_UTIL_H
|
||||
|
||||
static const float f16_max = 511.9375f;
|
||||
static const float f24_max = 1.0E30;
|
||||
|
||||
/* 16 bit to [0.0, F16_MAX = 511.9375] */
|
||||
static inline
|
||||
float convert_f16_to_float(uint16_t f16) {
|
||||
if (f16 == 0x0000) { return 0.0; }
|
||||
uint32_t i = (f16 << 11) + 0x3C000000;
|
||||
return *(float*)&i;
|
||||
}
|
||||
|
||||
/* 24 bit to [0.0, F24_MAX] */
|
||||
static inline
|
||||
float convert_f24_to_float(uint32_t f24) {
|
||||
assert(!(f24 >> 24));
|
||||
f24 &= 0xFFFFFF;
|
||||
if (f24 == 0x000000) { return 0.0; }
|
||||
uint32_t i = f24 << 7;
|
||||
return *(float*)&i;
|
||||
}
|
||||
|
||||
static inline
|
||||
uint8_t cliptobyte(int x)
|
||||
{
|
||||
return (uint8_t)((x < 0) ? 0 : ((x > 255) ? 255 : x));
|
||||
}
|
||||
|
||||
static inline
|
||||
void convert_yuy2_to_rgb(const uint8_t *line, unsigned int ix,
|
||||
uint8_t *r, uint8_t *g, uint8_t* b) {
|
||||
int c, d, e;
|
||||
c = (int)line[ix * 2] - 16;
|
||||
if (ix % 2) {
|
||||
d = (int)line[ix * 2 - 1] - 128;
|
||||
e = (int)line[ix * 2 + 1] - 128;
|
||||
} else {
|
||||
d = (int)line[ix * 2 + 1] - 128;
|
||||
e = (int)line[ix * 2 + 3] - 128;
|
||||
}
|
||||
*r = cliptobyte((298 * c + 409 * e + 128) >> 8);
|
||||
*g = cliptobyte((298 * c - 100 * d - 208 * e + 128) >> 8);
|
||||
*b = cliptobyte((298 * c + 516 * d + 128) >> 8);
|
||||
}
|
||||
|
||||
static inline
|
||||
void convert_uyvy_to_rgb(const uint8_t *line, unsigned int ix,
|
||||
uint8_t *r, uint8_t *g, uint8_t* b) {
|
||||
int c, d, e;
|
||||
c = (int)line[ix * 2 + 1] - 16;
|
||||
if (ix % 2) {
|
||||
d = (int)line[ix * 2 - 2] - 128;
|
||||
e = (int)line[ix * 2 + 0] - 128;
|
||||
} else {
|
||||
d = (int)line[ix * 2 + 0] - 128;
|
||||
e = (int)line[ix * 2 + 2] - 128;
|
||||
}
|
||||
*r = cliptobyte((298 * c + 409 * e + 128) >> 8);
|
||||
*g = cliptobyte((298 * c - 100 * d - 208 * e + 128) >> 8);
|
||||
*b = cliptobyte((298 * c + 516 * d + 128) >> 8);
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,131 @@
|
|||
/*
|
||||
* QEMU Geforce NV2A implementation
|
||||
*
|
||||
* Copyright (c) 2012 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2018-2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "hw/xbox/nv2a/nv2a_int.h"
|
||||
|
||||
void pgraph_update_inline_value(VertexAttribute *attr, const uint8_t *data)
|
||||
{
|
||||
assert(attr->count <= 4);
|
||||
attr->inline_value[0] = 0.0f;
|
||||
attr->inline_value[1] = 0.0f;
|
||||
attr->inline_value[2] = 0.0f;
|
||||
attr->inline_value[3] = 1.0f;
|
||||
|
||||
switch (attr->format) {
|
||||
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D:
|
||||
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL:
|
||||
for (uint32_t i = 0; i < attr->count; ++i) {
|
||||
attr->inline_value[i] = (float)data[i] / 255.0f;
|
||||
}
|
||||
break;
|
||||
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1: {
|
||||
const int16_t *val = (const int16_t *) data;
|
||||
for (uint32_t i = 0; i < attr->count; ++i, ++val) {
|
||||
attr->inline_value[i] = MAX(-1.0f, (float) *val / 32767.0f);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F:
|
||||
memcpy(attr->inline_value, data, attr->size * attr->count);
|
||||
break;
|
||||
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K: {
|
||||
const int16_t *val = (const int16_t *) data;
|
||||
for (uint32_t i = 0; i < attr->count; ++i, ++val) {
|
||||
attr->inline_value[i] = (float)*val;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP: {
|
||||
/* 3 signed, normalized components packed in 32-bits. (11,11,10) */
|
||||
const int32_t val = *(const int32_t *)data;
|
||||
int32_t x = val & 0x7FF;
|
||||
if (x & 0x400) {
|
||||
x |= 0xFFFFF800;
|
||||
}
|
||||
int32_t y = (val >> 11) & 0x7FF;
|
||||
if (y & 0x400) {
|
||||
y |= 0xFFFFF800;
|
||||
}
|
||||
int32_t z = (val >> 22) & 0x7FF;
|
||||
if (z & 0x200) {
|
||||
z |= 0xFFFFFC00;
|
||||
}
|
||||
|
||||
attr->inline_value[0] = MAX(-1.0f, (float)x / 1023.0f);
|
||||
attr->inline_value[1] = MAX(-1.0f, (float)y / 1023.0f);
|
||||
attr->inline_value[2] = MAX(-1.0f, (float)z / 511.0f);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
fprintf(stderr, "Unknown vertex attribute type: for format 0x%x\n",
|
||||
attr->format);
|
||||
assert(!"Unsupported attribute type");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void pgraph_allocate_inline_buffer_vertices(PGRAPHState *pg, unsigned int attr)
|
||||
{
|
||||
VertexAttribute *attribute = &pg->vertex_attributes[attr];
|
||||
|
||||
if (attribute->inline_buffer_populated || pg->inline_buffer_length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* Now upload the previous attribute value */
|
||||
attribute->inline_buffer_populated = true;
|
||||
for (int i = 0; i < pg->inline_buffer_length; i++) {
|
||||
memcpy(&attribute->inline_buffer[i * 4], attribute->inline_value,
|
||||
sizeof(float) * 4);
|
||||
}
|
||||
}
|
||||
|
||||
void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg)
|
||||
{
|
||||
pgraph_check_within_begin_end_block(pg);
|
||||
assert(pg->inline_buffer_length < NV2A_MAX_BATCH_LENGTH);
|
||||
|
||||
for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
|
||||
VertexAttribute *attribute = &pg->vertex_attributes[i];
|
||||
if (attribute->inline_buffer_populated) {
|
||||
memcpy(&attribute->inline_buffer[pg->inline_buffer_length * 4],
|
||||
attribute->inline_value, sizeof(float) * 4);
|
||||
}
|
||||
}
|
||||
|
||||
pg->inline_buffer_length++;
|
||||
}
|
||||
|
||||
void pgraph_reset_inline_buffers(PGRAPHState *pg)
|
||||
{
|
||||
pg->inline_elements_length = 0;
|
||||
pg->inline_array_length = 0;
|
||||
pg->inline_buffer_length = 0;
|
||||
pgraph_reset_draw_arrays(pg);
|
||||
}
|
||||
|
||||
void pgraph_reset_draw_arrays(PGRAPHState *pg)
|
||||
{
|
||||
pg->draw_arrays_length = 0;
|
||||
pg->draw_arrays_min_start = -1;
|
||||
pg->draw_arrays_max_count = 0;
|
||||
pg->draw_arrays_prevent_connect = false;
|
||||
}
|
|
@ -0,0 +1,177 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH Vulkan Renderer
|
||||
*
|
||||
* Copyright (c) 2024 Matt Borgerson
|
||||
*
|
||||
* Based on GL implementation:
|
||||
*
|
||||
* Copyright (c) 2012 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2018-2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "hw/xbox/nv2a/nv2a_int.h"
|
||||
#include "renderer.h"
|
||||
|
||||
void pgraph_vk_image_blit(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
ContextSurfaces2DState *context_surfaces = &pg->context_surfaces_2d;
|
||||
ImageBlitState *image_blit = &pg->image_blit;
|
||||
BetaState *beta = &pg->beta;
|
||||
|
||||
pgraph_vk_surface_update(d, false, true, true);
|
||||
|
||||
assert(context_surfaces->object_instance == image_blit->context_surfaces);
|
||||
|
||||
unsigned int bytes_per_pixel;
|
||||
switch (context_surfaces->color_format) {
|
||||
case NV062_SET_COLOR_FORMAT_LE_Y8:
|
||||
bytes_per_pixel = 1;
|
||||
break;
|
||||
case NV062_SET_COLOR_FORMAT_LE_R5G6B5:
|
||||
bytes_per_pixel = 2;
|
||||
break;
|
||||
case NV062_SET_COLOR_FORMAT_LE_A8R8G8B8:
|
||||
case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8:
|
||||
case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8:
|
||||
case NV062_SET_COLOR_FORMAT_LE_Y32:
|
||||
bytes_per_pixel = 4;
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Unknown blit surface format: 0x%x\n",
|
||||
context_surfaces->color_format);
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
|
||||
hwaddr source_dma_len, dest_dma_len;
|
||||
|
||||
uint8_t *source = (uint8_t *)nv_dma_map(
|
||||
d, context_surfaces->dma_image_source, &source_dma_len);
|
||||
assert(context_surfaces->source_offset < source_dma_len);
|
||||
source += context_surfaces->source_offset;
|
||||
|
||||
uint8_t *dest = (uint8_t *)nv_dma_map(d, context_surfaces->dma_image_dest,
|
||||
&dest_dma_len);
|
||||
assert(context_surfaces->dest_offset < dest_dma_len);
|
||||
dest += context_surfaces->dest_offset;
|
||||
|
||||
hwaddr source_addr = source - d->vram_ptr;
|
||||
hwaddr dest_addr = dest - d->vram_ptr;
|
||||
|
||||
SurfaceBinding *surf_src = pgraph_vk_surface_get(d, source_addr);
|
||||
if (surf_src) {
|
||||
pgraph_vk_surface_download_if_dirty(d, surf_src);
|
||||
}
|
||||
|
||||
SurfaceBinding *surf_dest = pgraph_vk_surface_get(d, dest_addr);
|
||||
if (surf_dest) {
|
||||
if (image_blit->height < surf_dest->height ||
|
||||
image_blit->width < surf_dest->width) {
|
||||
pgraph_vk_surface_download_if_dirty(d, surf_dest);
|
||||
} else {
|
||||
// The blit will completely replace the surface so any pending
|
||||
// download should be discarded.
|
||||
surf_dest->download_pending = false;
|
||||
surf_dest->draw_dirty = false;
|
||||
}
|
||||
surf_dest->upload_pending = true;
|
||||
pg->draw_time++;
|
||||
}
|
||||
|
||||
hwaddr source_offset = image_blit->in_y * context_surfaces->source_pitch +
|
||||
image_blit->in_x * bytes_per_pixel;
|
||||
hwaddr dest_offset = image_blit->out_y * context_surfaces->dest_pitch +
|
||||
image_blit->out_x * bytes_per_pixel;
|
||||
|
||||
hwaddr source_size =
|
||||
(image_blit->height - 1) * context_surfaces->source_pitch +
|
||||
image_blit->width * bytes_per_pixel;
|
||||
hwaddr dest_size = (image_blit->height - 1) * context_surfaces->dest_pitch +
|
||||
image_blit->width * bytes_per_pixel;
|
||||
|
||||
/* FIXME: What does hardware do in this case? */
|
||||
assert(source_addr + source_offset + source_size <=
|
||||
memory_region_size(d->vram));
|
||||
assert(dest_addr + dest_offset + dest_size <= memory_region_size(d->vram));
|
||||
|
||||
uint8_t *source_row = source + source_offset;
|
||||
uint8_t *dest_row = dest + dest_offset;
|
||||
|
||||
if (image_blit->operation == NV09F_SET_OPERATION_SRCCOPY) {
|
||||
// NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_SRCCOPY");
|
||||
for (unsigned int y = 0; y < image_blit->height; y++) {
|
||||
memmove(dest_row, source_row, image_blit->width * bytes_per_pixel);
|
||||
source_row += context_surfaces->source_pitch;
|
||||
dest_row += context_surfaces->dest_pitch;
|
||||
}
|
||||
} else if (image_blit->operation == NV09F_SET_OPERATION_BLEND_AND) {
|
||||
// NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_BLEND_AND");
|
||||
uint32_t max_beta_mult = 0x7f80;
|
||||
uint32_t beta_mult = beta->beta >> 16;
|
||||
uint32_t inv_beta_mult = max_beta_mult - beta_mult;
|
||||
for (unsigned int y = 0; y < image_blit->height; y++) {
|
||||
for (unsigned int x = 0; x < image_blit->width; x++) {
|
||||
for (unsigned int ch = 0; ch < 3; ch++) {
|
||||
uint32_t a = source_row[x * 4 + ch] * beta_mult;
|
||||
uint32_t b = dest_row[x * 4 + ch] * inv_beta_mult;
|
||||
dest_row[x * 4 + ch] = (a + b) / max_beta_mult;
|
||||
}
|
||||
}
|
||||
source_row += context_surfaces->source_pitch;
|
||||
dest_row += context_surfaces->dest_pitch;
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "Unknown blit operation: 0x%x\n",
|
||||
image_blit->operation);
|
||||
assert(false && "Unknown blit operation");
|
||||
}
|
||||
|
||||
NV2A_DPRINTF(" - 0x%tx -> 0x%tx\n", source_addr, dest_addr);
|
||||
|
||||
bool needs_alpha_patching;
|
||||
uint8_t alpha_override;
|
||||
switch (context_surfaces->color_format) {
|
||||
case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8:
|
||||
needs_alpha_patching = true;
|
||||
alpha_override = 0xff;
|
||||
break;
|
||||
case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8:
|
||||
needs_alpha_patching = true;
|
||||
alpha_override = 0;
|
||||
break;
|
||||
default:
|
||||
needs_alpha_patching = false;
|
||||
alpha_override = 0;
|
||||
}
|
||||
|
||||
if (needs_alpha_patching) {
|
||||
dest_row = dest + dest_offset;
|
||||
for (unsigned int y = 0; y < image_blit->height; y++) {
|
||||
for (unsigned int x = 0; x < image_blit->width; x++) {
|
||||
dest_row[x * 4 + 3] = alpha_override;
|
||||
}
|
||||
dest_row += context_surfaces->dest_pitch;
|
||||
}
|
||||
}
|
||||
|
||||
dest_addr += dest_offset;
|
||||
memory_region_set_client_dirty(d->vram, dest_addr, dest_size,
|
||||
DIRTY_MEMORY_VGA);
|
||||
memory_region_set_client_dirty(d->vram, dest_addr, dest_size,
|
||||
DIRTY_MEMORY_NV2A_TEX);
|
||||
}
|
|
@ -0,0 +1,206 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH Vulkan Renderer
|
||||
*
|
||||
* Copyright (c) 2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "renderer.h"
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
static void create_buffer(PGRAPHState *pg, StorageBuffer *buffer)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
VkBufferCreateInfo buffer_create_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.size = buffer->buffer_size,
|
||||
.usage = buffer->usage,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
};
|
||||
VK_CHECK(vmaCreateBuffer(r->allocator, &buffer_create_info,
|
||||
&buffer->alloc_info, &buffer->buffer,
|
||||
&buffer->allocation, NULL));
|
||||
}
|
||||
|
||||
static void destroy_buffer(PGRAPHState *pg, StorageBuffer *buffer)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
vmaDestroyBuffer(r->allocator, buffer->buffer, buffer->allocation);
|
||||
buffer->buffer = VK_NULL_HANDLE;
|
||||
buffer->allocation = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
void pgraph_vk_init_buffers(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
// FIXME: Profile buffer sizes
|
||||
|
||||
VmaAllocationCreateInfo host_alloc_create_info = {
|
||||
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST,
|
||||
.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT
|
||||
};
|
||||
VmaAllocationCreateInfo device_alloc_create_info = {
|
||||
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
|
||||
};
|
||||
|
||||
r->storage_buffers[BUFFER_STAGING_DST] = (StorageBuffer){
|
||||
.alloc_info = host_alloc_create_info,
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
.buffer_size = 4096 * 4096 * 4,
|
||||
};
|
||||
|
||||
r->storage_buffers[BUFFER_STAGING_SRC] = (StorageBuffer){
|
||||
.alloc_info = host_alloc_create_info,
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
|
||||
.buffer_size = r->storage_buffers[BUFFER_STAGING_DST].buffer_size,
|
||||
};
|
||||
|
||||
r->storage_buffers[BUFFER_COMPUTE_DST] = (StorageBuffer){
|
||||
.alloc_info = device_alloc_create_info,
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
|
||||
.buffer_size = (1024 * 10) * (1024 * 10) * 8,
|
||||
};
|
||||
|
||||
r->storage_buffers[BUFFER_COMPUTE_SRC] = (StorageBuffer){
|
||||
.alloc_info = device_alloc_create_info,
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
|
||||
.buffer_size = r->storage_buffers[BUFFER_COMPUTE_DST].buffer_size,
|
||||
};
|
||||
|
||||
r->storage_buffers[BUFFER_INDEX] = (StorageBuffer){
|
||||
.alloc_info = device_alloc_create_info,
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
||||
VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
|
||||
.buffer_size = sizeof(pg->inline_elements) * 100,
|
||||
};
|
||||
|
||||
r->storage_buffers[BUFFER_INDEX_STAGING] = (StorageBuffer){
|
||||
.alloc_info = host_alloc_create_info,
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
|
||||
.buffer_size = r->storage_buffers[BUFFER_INDEX].buffer_size,
|
||||
};
|
||||
|
||||
// FIXME: Don't assume that we can render with host mapped buffer
|
||||
r->storage_buffers[BUFFER_VERTEX_RAM] = (StorageBuffer){
|
||||
.alloc_info = host_alloc_create_info,
|
||||
.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
|
||||
.buffer_size = memory_region_size(d->vram),
|
||||
};
|
||||
|
||||
r->bitmap_size = memory_region_size(d->vram) / 4096;
|
||||
r->uploaded_bitmap = bitmap_new(r->bitmap_size);
|
||||
bitmap_clear(r->uploaded_bitmap, 0, r->bitmap_size);
|
||||
|
||||
r->storage_buffers[BUFFER_VERTEX_INLINE] = (StorageBuffer){
|
||||
.alloc_info = device_alloc_create_info,
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
||||
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
|
||||
.buffer_size = NV2A_VERTEXSHADER_ATTRIBUTES * NV2A_MAX_BATCH_LENGTH *
|
||||
4 * sizeof(float) * 10,
|
||||
};
|
||||
|
||||
r->storage_buffers[BUFFER_VERTEX_INLINE_STAGING] = (StorageBuffer){
|
||||
.alloc_info = host_alloc_create_info,
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
|
||||
.buffer_size = r->storage_buffers[BUFFER_VERTEX_INLINE].buffer_size,
|
||||
};
|
||||
|
||||
r->storage_buffers[BUFFER_UNIFORM] = (StorageBuffer){
|
||||
.alloc_info = device_alloc_create_info,
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
||||
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
|
||||
.buffer_size = 8 * 1024 * 1024,
|
||||
};
|
||||
|
||||
r->storage_buffers[BUFFER_UNIFORM_STAGING] = (StorageBuffer){
|
||||
.alloc_info = host_alloc_create_info,
|
||||
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
|
||||
.buffer_size = r->storage_buffers[BUFFER_UNIFORM].buffer_size,
|
||||
};
|
||||
|
||||
for (int i = 0; i < BUFFER_COUNT; i++) {
|
||||
create_buffer(pg, &r->storage_buffers[i]);
|
||||
}
|
||||
|
||||
// FIXME: Add fallback path for device using host mapped memory
|
||||
|
||||
int buffers_to_map[] = { BUFFER_VERTEX_RAM,
|
||||
BUFFER_INDEX_STAGING,
|
||||
BUFFER_VERTEX_INLINE_STAGING,
|
||||
BUFFER_UNIFORM_STAGING };
|
||||
|
||||
for (int i = 0; i < ARRAY_SIZE(buffers_to_map); i++) {
|
||||
VK_CHECK(vmaMapMemory(
|
||||
r->allocator, r->storage_buffers[buffers_to_map[i]].allocation,
|
||||
(void **)&r->storage_buffers[buffers_to_map[i]].mapped));
|
||||
}
|
||||
}
|
||||
|
||||
void pgraph_vk_finalize_buffers(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
for (int i = 0; i < BUFFER_COUNT; i++) {
|
||||
if (r->storage_buffers[i].mapped) {
|
||||
vmaUnmapMemory(r->allocator, r->storage_buffers[i].allocation);
|
||||
}
|
||||
destroy_buffer(pg, &r->storage_buffers[i]);
|
||||
}
|
||||
|
||||
g_free(r->uploaded_bitmap);
|
||||
r->uploaded_bitmap = NULL;
|
||||
}
|
||||
|
||||
bool pgraph_vk_buffer_has_space_for(PGRAPHState *pg, int index,
|
||||
VkDeviceSize size,
|
||||
VkDeviceAddress alignment)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
StorageBuffer *b = &r->storage_buffers[index];
|
||||
return (ROUND_UP(b->buffer_offset, alignment) + size) <= b->buffer_size;
|
||||
}
|
||||
|
||||
VkDeviceSize pgraph_vk_append_to_buffer(PGRAPHState *pg, int index, void **data,
|
||||
VkDeviceSize *sizes, size_t count,
|
||||
VkDeviceAddress alignment)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
VkDeviceSize total_size = 0;
|
||||
for (int i = 0; i < count; i++) {
|
||||
total_size += sizes[i];
|
||||
}
|
||||
assert(pgraph_vk_buffer_has_space_for(pg, index, total_size, alignment));
|
||||
|
||||
StorageBuffer *b = &r->storage_buffers[index];
|
||||
VkDeviceSize starting_offset = ROUND_UP(b->buffer_offset, alignment);
|
||||
|
||||
assert(b->mapped);
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
b->buffer_offset = ROUND_UP(b->buffer_offset, alignment);
|
||||
memcpy(b->mapped + b->buffer_offset, data[i], sizes[i]);
|
||||
b->buffer_offset += sizes[i];
|
||||
}
|
||||
|
||||
return starting_offset;
|
||||
}
|
|
@ -0,0 +1,119 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH Vulkan Renderer
|
||||
*
|
||||
* Copyright (c) 2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "renderer.h"
|
||||
|
||||
static void create_command_pool(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
QueueFamilyIndices indices =
|
||||
pgraph_vk_find_queue_families(r->physical_device);
|
||||
|
||||
VkCommandPoolCreateInfo create_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
|
||||
.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
|
||||
.queueFamilyIndex = indices.queue_family,
|
||||
};
|
||||
VK_CHECK(
|
||||
vkCreateCommandPool(r->device, &create_info, NULL, &r->command_pool));
|
||||
}
|
||||
|
||||
static void destroy_command_pool(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
vkDestroyCommandPool(r->device, r->command_pool, NULL);
|
||||
}
|
||||
|
||||
static void create_command_buffers(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
VkCommandBufferAllocateInfo alloc_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
|
||||
.commandPool = r->command_pool,
|
||||
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
|
||||
.commandBufferCount = ARRAY_SIZE(r->command_buffers),
|
||||
};
|
||||
VK_CHECK(
|
||||
vkAllocateCommandBuffers(r->device, &alloc_info, r->command_buffers));
|
||||
|
||||
r->command_buffer = r->command_buffers[0];
|
||||
r->aux_command_buffer = r->command_buffers[1];
|
||||
}
|
||||
|
||||
static void destroy_command_buffers(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
vkFreeCommandBuffers(r->device, r->command_pool,
|
||||
ARRAY_SIZE(r->command_buffers), r->command_buffers);
|
||||
|
||||
r->command_buffer = VK_NULL_HANDLE;
|
||||
r->aux_command_buffer = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
VkCommandBuffer pgraph_vk_begin_single_time_commands(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
assert(!r->in_aux_command_buffer);
|
||||
r->in_aux_command_buffer = true;
|
||||
|
||||
VkCommandBufferBeginInfo begin_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
||||
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
|
||||
};
|
||||
VK_CHECK(vkBeginCommandBuffer(r->aux_command_buffer, &begin_info));
|
||||
|
||||
return r->aux_command_buffer;
|
||||
}
|
||||
|
||||
void pgraph_vk_end_single_time_commands(PGRAPHState *pg, VkCommandBuffer cmd)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
assert(r->in_aux_command_buffer);
|
||||
|
||||
VK_CHECK(vkEndCommandBuffer(cmd));
|
||||
|
||||
VkSubmitInfo submit_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
||||
.commandBufferCount = 1,
|
||||
.pCommandBuffers = &cmd,
|
||||
};
|
||||
VK_CHECK(vkQueueSubmit(r->queue, 1, &submit_info, VK_NULL_HANDLE));
|
||||
nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_AUX);
|
||||
VK_CHECK(vkQueueWaitIdle(r->queue));
|
||||
|
||||
r->in_aux_command_buffer = false;
|
||||
}
|
||||
|
||||
void pgraph_vk_init_command_buffers(PGRAPHState *pg)
|
||||
{
|
||||
create_command_pool(pg);
|
||||
create_command_buffers(pg);
|
||||
}
|
||||
|
||||
void pgraph_vk_finalize_command_buffers(PGRAPHState *pg)
|
||||
{
|
||||
destroy_command_buffers(pg);
|
||||
destroy_command_pool(pg);
|
||||
}
|
|
@ -0,0 +1,418 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH Vulkan Renderer
|
||||
*
|
||||
* Copyright (c) 2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef HW_XBOX_NV2A_PGRAPH_VK_CONSTANTS_H
|
||||
#define HW_XBOX_NV2A_PGRAPH_VK_CONSTANTS_H
|
||||
|
||||
#include "hw/xbox/nv2a/nv2a_regs.h"
|
||||
#include "hw/xbox/nv2a/pgraph/shaders.h"
|
||||
#include <vulkan/vulkan.h>
|
||||
|
||||
static const VkFilter pgraph_texture_min_filter_vk_map[] = {
|
||||
0,
|
||||
VK_FILTER_NEAREST,
|
||||
VK_FILTER_LINEAR,
|
||||
VK_FILTER_NEAREST,
|
||||
VK_FILTER_LINEAR,
|
||||
VK_FILTER_NEAREST,
|
||||
VK_FILTER_LINEAR,
|
||||
VK_FILTER_LINEAR,
|
||||
};
|
||||
|
||||
static const VkFilter pgraph_texture_mag_filter_vk_map[] = {
|
||||
0,
|
||||
VK_FILTER_NEAREST,
|
||||
VK_FILTER_LINEAR,
|
||||
0,
|
||||
VK_FILTER_LINEAR /* TODO: Convolution filter... */
|
||||
};
|
||||
|
||||
static const VkSamplerAddressMode pgraph_texture_addr_vk_map[] = {
|
||||
0,
|
||||
VK_SAMPLER_ADDRESS_MODE_REPEAT,
|
||||
VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT,
|
||||
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
|
||||
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
|
||||
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, /* Approximate GL_CLAMP */
|
||||
};
|
||||
|
||||
static const VkBlendFactor pgraph_blend_factor_vk_map[] = {
|
||||
VK_BLEND_FACTOR_ZERO,
|
||||
VK_BLEND_FACTOR_ONE,
|
||||
VK_BLEND_FACTOR_SRC_COLOR,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR,
|
||||
VK_BLEND_FACTOR_SRC_ALPHA,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
|
||||
VK_BLEND_FACTOR_DST_ALPHA,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA,
|
||||
VK_BLEND_FACTOR_DST_COLOR,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR,
|
||||
VK_BLEND_FACTOR_SRC_ALPHA_SATURATE,
|
||||
0,
|
||||
VK_BLEND_FACTOR_CONSTANT_COLOR,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR,
|
||||
VK_BLEND_FACTOR_CONSTANT_ALPHA,
|
||||
VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA,
|
||||
};
|
||||
|
||||
static const VkBlendOp pgraph_blend_equation_vk_map[] = {
|
||||
VK_BLEND_OP_SUBTRACT,
|
||||
VK_BLEND_OP_REVERSE_SUBTRACT,
|
||||
VK_BLEND_OP_ADD,
|
||||
VK_BLEND_OP_MIN,
|
||||
VK_BLEND_OP_MAX,
|
||||
VK_BLEND_OP_REVERSE_SUBTRACT,
|
||||
VK_BLEND_OP_ADD,
|
||||
};
|
||||
|
||||
/* FIXME
|
||||
static const GLenum pgraph_blend_logicop_map[] = {
|
||||
GL_CLEAR,
|
||||
GL_AND,
|
||||
GL_AND_REVERSE,
|
||||
GL_COPY,
|
||||
GL_AND_INVERTED,
|
||||
GL_NOOP,
|
||||
GL_XOR,
|
||||
GL_OR,
|
||||
GL_NOR,
|
||||
GL_EQUIV,
|
||||
GL_INVERT,
|
||||
GL_OR_REVERSE,
|
||||
GL_COPY_INVERTED,
|
||||
GL_OR_INVERTED,
|
||||
GL_NAND,
|
||||
GL_SET,
|
||||
};
|
||||
*/
|
||||
|
||||
static const VkCullModeFlags pgraph_cull_face_vk_map[] = {
|
||||
0,
|
||||
VK_CULL_MODE_FRONT_BIT,
|
||||
VK_CULL_MODE_BACK_BIT,
|
||||
VK_CULL_MODE_FRONT_AND_BACK,
|
||||
};
|
||||
|
||||
static const VkCompareOp pgraph_depth_func_vk_map[] = {
|
||||
VK_COMPARE_OP_NEVER,
|
||||
VK_COMPARE_OP_LESS,
|
||||
VK_COMPARE_OP_EQUAL,
|
||||
VK_COMPARE_OP_LESS_OR_EQUAL,
|
||||
VK_COMPARE_OP_GREATER,
|
||||
VK_COMPARE_OP_NOT_EQUAL,
|
||||
VK_COMPARE_OP_GREATER_OR_EQUAL,
|
||||
VK_COMPARE_OP_ALWAYS,
|
||||
};
|
||||
|
||||
static const VkCompareOp pgraph_stencil_func_vk_map[] = {
|
||||
VK_COMPARE_OP_NEVER,
|
||||
VK_COMPARE_OP_LESS,
|
||||
VK_COMPARE_OP_EQUAL,
|
||||
VK_COMPARE_OP_LESS_OR_EQUAL,
|
||||
VK_COMPARE_OP_GREATER,
|
||||
VK_COMPARE_OP_NOT_EQUAL,
|
||||
VK_COMPARE_OP_GREATER_OR_EQUAL,
|
||||
VK_COMPARE_OP_ALWAYS,
|
||||
};
|
||||
|
||||
static const VkStencilOp pgraph_stencil_op_vk_map[] = {
|
||||
0,
|
||||
VK_STENCIL_OP_KEEP,
|
||||
VK_STENCIL_OP_ZERO,
|
||||
VK_STENCIL_OP_REPLACE,
|
||||
VK_STENCIL_OP_INCREMENT_AND_CLAMP,
|
||||
VK_STENCIL_OP_DECREMENT_AND_CLAMP,
|
||||
VK_STENCIL_OP_INVERT,
|
||||
VK_STENCIL_OP_INCREMENT_AND_WRAP,
|
||||
VK_STENCIL_OP_DECREMENT_AND_WRAP,
|
||||
};
|
||||
|
||||
static const VkPolygonMode pgraph_polygon_mode_vk_map[] = {
|
||||
[POLY_MODE_FILL] = VK_POLYGON_MODE_FILL,
|
||||
[POLY_MODE_POINT] = VK_POLYGON_MODE_POINT,
|
||||
[POLY_MODE_LINE] = VK_POLYGON_MODE_LINE,
|
||||
};
|
||||
|
||||
typedef struct VkColorFormatInfo {
|
||||
VkFormat vk_format;
|
||||
VkComponentMapping component_map;
|
||||
} VkColorFormatInfo;
|
||||
|
||||
static const VkColorFormatInfo kelvin_color_format_vk_map[66] = {
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8] = {
|
||||
VK_FORMAT_R8_UNORM,
|
||||
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE },
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8] = {
|
||||
VK_FORMAT_R8_UNORM,
|
||||
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5] = {
|
||||
VK_FORMAT_A1R5G5B5_UNORM_PACK16,
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5] = {
|
||||
VK_FORMAT_A1R5G5B5_UNORM_PACK16,
|
||||
{ VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_ONE },
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4] = {
|
||||
VK_FORMAT_A4R4G4B4_UNORM_PACK16,
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5] = {
|
||||
VK_FORMAT_R5G6B5_UNORM_PACK16,
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8] = {
|
||||
VK_FORMAT_B8G8R8A8_UNORM,
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8] = {
|
||||
VK_FORMAT_B8G8R8A8_UNORM,
|
||||
{ VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_ONE },
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8] = {
|
||||
VK_FORMAT_B8G8R8A8_UNORM, // Converted
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5] = {
|
||||
VK_FORMAT_R8G8B8A8_UNORM, // Converted
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8] = {
|
||||
VK_FORMAT_R8G8B8A8_UNORM, // Converted
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8] = {
|
||||
VK_FORMAT_R8G8B8A8_UNORM, // Converted
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5] = {
|
||||
VK_FORMAT_A1R5G5B5_UNORM_PACK16,
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5] = {
|
||||
VK_FORMAT_R5G6B5_UNORM_PACK16,
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8] = {
|
||||
VK_FORMAT_B8G8R8A8_UNORM,
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8] = {
|
||||
VK_FORMAT_R8_UNORM,
|
||||
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE, }
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8] = {
|
||||
VK_FORMAT_R8G8_UNORM,
|
||||
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, }
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8] = {
|
||||
VK_FORMAT_R8_UNORM,
|
||||
{ VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R },
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8] = {
|
||||
VK_FORMAT_R8G8_UNORM,
|
||||
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8] = {
|
||||
VK_FORMAT_R8_UNORM,
|
||||
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5] = {
|
||||
VK_FORMAT_A1R5G5B5_UNORM_PACK16,
|
||||
{ VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_ONE },
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4] = {
|
||||
VK_FORMAT_A4R4G4B4_UNORM_PACK16,
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8] = {
|
||||
VK_FORMAT_B8G8R8A8_UNORM,
|
||||
{ VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_ONE },
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8] = {
|
||||
VK_FORMAT_R8_UNORM,
|
||||
{ VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R }
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8Y8] = {
|
||||
VK_FORMAT_R8G8_UNORM,
|
||||
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5] = {
|
||||
VK_FORMAT_R8G8B8_SNORM, // Converted
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8] = {
|
||||
VK_FORMAT_R8G8_UNORM,
|
||||
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8] = {
|
||||
VK_FORMAT_R8G8_UNORM,
|
||||
{ VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8] = {
|
||||
VK_FORMAT_R8G8B8A8_UNORM, // Converted
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8] = {
|
||||
VK_FORMAT_R8G8B8A8_UNORM, // Converted
|
||||
},
|
||||
|
||||
/* Additional information is passed to the pixel shader via the swizzle:
|
||||
* RED: The depth value.
|
||||
* GREEN: 0 for 16-bit, 1 for 24 bit
|
||||
* BLUE: 0 for fixed, 1 for float
|
||||
*/
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_DEPTH_Y16_FIXED] = {
|
||||
VK_FORMAT_R16_UNORM, // FIXME
|
||||
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO },
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED] = {
|
||||
// FIXME
|
||||
// {GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}},
|
||||
VK_FORMAT_R32_UINT,
|
||||
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO },
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT] = {
|
||||
// FIXME
|
||||
// {GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}},
|
||||
VK_FORMAT_R32_UINT,
|
||||
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO },
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FIXED] = {
|
||||
VK_FORMAT_R16_UNORM, // FIXME
|
||||
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO },
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FLOAT] = {
|
||||
VK_FORMAT_R16_SFLOAT,
|
||||
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ZERO },
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16] = {
|
||||
VK_FORMAT_R16_UNORM,
|
||||
{ VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE }
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8] = {
|
||||
VK_FORMAT_R8G8B8A8_UNORM,
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8] = {
|
||||
VK_FORMAT_R8G8B8A8_UNORM,
|
||||
{ VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_R }
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8] = {
|
||||
VK_FORMAT_R8G8B8A8_UNORM,
|
||||
{ VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R }
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8] = {
|
||||
VK_FORMAT_R8G8B8A8_UNORM,
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8] = {
|
||||
VK_FORMAT_R8G8B8A8_UNORM,
|
||||
{ VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_R }
|
||||
},
|
||||
[NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8] = {
|
||||
VK_FORMAT_R8G8B8A8_UNORM,
|
||||
{ VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R }
|
||||
},
|
||||
};
|
||||
|
||||
typedef struct BasicSurfaceFormatInfo {
|
||||
unsigned int bytes_per_pixel;
|
||||
} BasicSurfaceFormatInfo;
|
||||
|
||||
typedef struct SurfaceFormatInfo {
|
||||
unsigned int host_bytes_per_pixel;
|
||||
VkFormat vk_format;
|
||||
VkImageUsageFlags usage;
|
||||
VkImageAspectFlags aspect;
|
||||
} SurfaceFormatInfo;
|
||||
|
||||
static const BasicSurfaceFormatInfo kelvin_surface_color_format_map[] = {
|
||||
[NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5] = { 2 },
|
||||
[NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5] = { 2 },
|
||||
[NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8] = { 4 },
|
||||
[NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8] = { 4 },
|
||||
[NV097_SET_SURFACE_FORMAT_COLOR_LE_B8] = { 1 },
|
||||
[NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8] = { 2 },
|
||||
};
|
||||
|
||||
static const SurfaceFormatInfo kelvin_surface_color_format_vk_map[] = {
|
||||
[NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5] =
|
||||
{
|
||||
// FIXME: Force alpha to zero
|
||||
2,
|
||||
VK_FORMAT_A1R5G5B5_UNORM_PACK16,
|
||||
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
|
||||
VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
},
|
||||
[NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5] =
|
||||
{
|
||||
2,
|
||||
VK_FORMAT_R5G6B5_UNORM_PACK16,
|
||||
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
|
||||
VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
},
|
||||
[NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8] =
|
||||
{
|
||||
// FIXME: Force alpha to zero
|
||||
4,
|
||||
VK_FORMAT_B8G8R8A8_UNORM,
|
||||
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
|
||||
VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
},
|
||||
[NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8] =
|
||||
{
|
||||
4,
|
||||
VK_FORMAT_B8G8R8A8_UNORM,
|
||||
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
|
||||
VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
},
|
||||
[NV097_SET_SURFACE_FORMAT_COLOR_LE_B8] =
|
||||
{
|
||||
// FIXME: Map channel color
|
||||
1,
|
||||
VK_FORMAT_R8_UNORM,
|
||||
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
|
||||
VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
},
|
||||
[NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8] =
|
||||
{
|
||||
// FIXME: Map channel color
|
||||
2,
|
||||
VK_FORMAT_R8G8_UNORM,
|
||||
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
|
||||
VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
},
|
||||
};
|
||||
|
||||
static const BasicSurfaceFormatInfo kelvin_surface_zeta_format_map[] = {
|
||||
[NV097_SET_SURFACE_FORMAT_ZETA_Z16] = { 2 },
|
||||
[NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] = { 4 },
|
||||
};
|
||||
|
||||
// FIXME: Actually support stored float format
|
||||
|
||||
static const SurfaceFormatInfo zeta_d16 = {
|
||||
2,
|
||||
VK_FORMAT_D16_UNORM,
|
||||
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
|
||||
VK_IMAGE_ASPECT_DEPTH_BIT,
|
||||
};
|
||||
|
||||
static const SurfaceFormatInfo zeta_d32_sfloat_s8_uint = {
|
||||
8,
|
||||
VK_FORMAT_D32_SFLOAT_S8_UINT,
|
||||
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
|
||||
VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT,
|
||||
};
|
||||
|
||||
static const SurfaceFormatInfo zeta_d24_unorm_s8_uint = {
|
||||
4,
|
||||
VK_FORMAT_D24_UNORM_S8_UINT,
|
||||
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
|
||||
VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT,
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH Vulkan Renderer
|
||||
*
|
||||
* Copyright (c) 2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "renderer.h"
|
||||
#include "debug.h"
|
||||
|
||||
#ifndef _WIN32
|
||||
#include <dlfcn.h>
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_RENDERDOC
|
||||
#pragma GCC diagnostic ignored "-Wstrict-prototypes"
|
||||
#include "thirdparty/renderdoc_app.h"
|
||||
#endif
|
||||
|
||||
int nv2a_vk_dgroup_indent = 0;
|
||||
|
||||
void pgraph_vk_debug_init(void)
|
||||
{
|
||||
#ifdef CONFIG_RENDERDOC
|
||||
nv2a_dbg_renderdoc_init();
|
||||
#endif
|
||||
}
|
||||
|
||||
void pgraph_vk_debug_frame_terminator(void)
|
||||
{
|
||||
#ifdef CONFIG_RENDERDOC
|
||||
if (nv2a_dbg_renderdoc_available()) {
|
||||
RENDERDOC_API_1_6_0 *rdoc_api = nv2a_dbg_renderdoc_get_api();
|
||||
|
||||
PGRAPHVkState *r = g_nv2a->pgraph.vk_renderer_state;
|
||||
if (rdoc_api->IsTargetControlConnected()) {
|
||||
if (rdoc_api->IsFrameCapturing()) {
|
||||
rdoc_api->EndFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(r->instance), 0);
|
||||
}
|
||||
if (renderdoc_capture_frames > 0) {
|
||||
rdoc_api->StartFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(r->instance), 0);
|
||||
--renderdoc_capture_frames;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH Vulkan Renderer
|
||||
*
|
||||
* Copyright (c) 2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef HW_XBOX_NV2A_PGRAPH_VK_DEBUG_H
|
||||
#define HW_XBOX_NV2A_PGRAPH_VK_DEBUG_H
|
||||
|
||||
#define DEBUG_VK 0
|
||||
|
||||
extern int nv2a_vk_dgroup_indent;
|
||||
|
||||
#define NV2A_VK_XDPRINTF(x, fmt, ...) \
|
||||
do { \
|
||||
if (x) { \
|
||||
for (int i = 0; i < nv2a_vk_dgroup_indent; i++) \
|
||||
fprintf(stderr, " "); \
|
||||
fprintf(stderr, fmt "\n", ##__VA_ARGS__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define NV2A_VK_DPRINTF(fmt, ...) NV2A_VK_XDPRINTF(DEBUG_VK, fmt, ##__VA_ARGS__)
|
||||
|
||||
#define NV2A_VK_DGROUP_BEGIN(fmt, ...) \
|
||||
do { \
|
||||
NV2A_VK_XDPRINTF(DEBUG_VK, fmt, ##__VA_ARGS__); \
|
||||
nv2a_vk_dgroup_indent++; \
|
||||
} while (0)
|
||||
|
||||
#define NV2A_VK_DGROUP_END(...) \
|
||||
do { \
|
||||
nv2a_vk_dgroup_indent--; \
|
||||
assert(nv2a_vk_dgroup_indent >= 0); \
|
||||
} while (0)
|
||||
|
||||
#define VK_CHECK(x) \
|
||||
do { \
|
||||
VkResult vk_result = (x); \
|
||||
if (vk_result != VK_SUCCESS) { \
|
||||
fprintf(stderr, "vk_result = %d\n", vk_result); \
|
||||
} \
|
||||
assert(vk_result == VK_SUCCESS && "vk check failed"); \
|
||||
} while (0)
|
||||
|
||||
void pgraph_vk_debug_frame_terminator(void);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,896 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH Vulkan Renderer
|
||||
*
|
||||
* Copyright (c) 2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "renderer.h"
|
||||
|
||||
static const char *display_frag_glsl =
|
||||
"#version 450\n"
|
||||
"layout(binding = 0) uniform sampler2D tex;\n"
|
||||
"layout(binding = 1) uniform sampler2D pvideo_tex;\n"
|
||||
"layout(push_constant, std430) uniform PushConstants {\n"
|
||||
" bool pvideo_enable;\n"
|
||||
" vec2 pvideo_in_pos;\n"
|
||||
" vec4 pvideo_pos;\n"
|
||||
" vec3 pvideo_scale;\n"
|
||||
" bool pvideo_color_key_enable;\n"
|
||||
" vec2 display_size;\n"
|
||||
" float line_offset;\n"
|
||||
" vec4 pvideo_color_key;\n"
|
||||
"};\n"
|
||||
"layout(location = 0) out vec4 out_Color;\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" vec2 texCoord = gl_FragCoord.xy/display_size;\n"
|
||||
" texCoord.y = 1 - texCoord.y;\n" // GL compat
|
||||
" float rel = display_size.y/textureSize(tex, 0).y/line_offset;\n"
|
||||
" texCoord.y = 1 + rel*(texCoord.y - 1);"
|
||||
" out_Color.rgba = texture(tex, texCoord);\n"
|
||||
// " if (pvideo_enable) {\n"
|
||||
// " vec2 screenCoord = gl_FragCoord.xy - 0.5;\n"
|
||||
// " vec4 output_region = vec4(pvideo_pos.xy, pvideo_pos.xy + pvideo_pos.zw);\n"
|
||||
// " bvec4 clip = bvec4(lessThan(screenCoord, output_region.xy),\n"
|
||||
// " greaterThan(screenCoord, output_region.zw));\n"
|
||||
// " if (!any(clip) && (!pvideo_color_key_enable || out_Color.rgba == pvideo_color_key)) {\n"
|
||||
// " vec2 out_xy = (screenCoord - pvideo_pos.xy) * pvideo_scale.z;\n"
|
||||
// " vec2 in_st = (pvideo_in_pos + out_xy * pvideo_scale.xy) / textureSize(pvideo_tex, 0);\n"
|
||||
// " in_st.y *= -1.0;\n"
|
||||
// " out_Color.rgba = texture(pvideo_tex, in_st);\n"
|
||||
// " }\n"
|
||||
// " }\n"
|
||||
"}\n";
|
||||
|
||||
static void create_descriptor_pool(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
VkDescriptorPoolSize pool_sizes = {
|
||||
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
.descriptorCount = 2,
|
||||
};
|
||||
|
||||
VkDescriptorPoolCreateInfo pool_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
||||
.poolSizeCount = 1,
|
||||
.pPoolSizes = &pool_sizes,
|
||||
.maxSets = 1,
|
||||
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
|
||||
};
|
||||
VK_CHECK(vkCreateDescriptorPool(r->device, &pool_info, NULL,
|
||||
&r->display.descriptor_pool));
|
||||
}
|
||||
|
||||
static void destroy_descriptor_pool(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
vkDestroyDescriptorPool(r->device, r->display.descriptor_pool, NULL);
|
||||
r->display.descriptor_pool = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
static void create_descriptor_set_layout(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
VkDescriptorSetLayoutBinding bindings[2];
|
||||
|
||||
for (int i = 0; i < ARRAY_SIZE(bindings); i++) {
|
||||
bindings[i] = (VkDescriptorSetLayoutBinding){
|
||||
.binding = i,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||
};
|
||||
}
|
||||
VkDescriptorSetLayoutCreateInfo layout_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||
.bindingCount = ARRAY_SIZE(bindings),
|
||||
.pBindings = bindings,
|
||||
};
|
||||
VK_CHECK(vkCreateDescriptorSetLayout(r->device, &layout_info, NULL,
|
||||
&r->display.descriptor_set_layout));
|
||||
}
|
||||
|
||||
static void destroy_descriptor_set_layout(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
vkDestroyDescriptorSetLayout(r->device, r->display.descriptor_set_layout,
|
||||
NULL);
|
||||
r->display.descriptor_set_layout = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
static void create_descriptor_sets(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
VkDescriptorSetLayout layout = r->display.descriptor_set_layout;
|
||||
|
||||
VkDescriptorSetAllocateInfo alloc_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
||||
.descriptorPool = r->display.descriptor_pool,
|
||||
.descriptorSetCount = 1,
|
||||
.pSetLayouts = &layout,
|
||||
};
|
||||
VK_CHECK(vkAllocateDescriptorSets(r->device, &alloc_info,
|
||||
&r->display.descriptor_set));
|
||||
}
|
||||
|
||||
static void create_render_pass(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
VkAttachmentDescription attachment;
|
||||
|
||||
VkAttachmentReference color_reference;
|
||||
attachment = (VkAttachmentDescription){
|
||||
.format = VK_FORMAT_R8G8B8A8_UNORM,
|
||||
.samples = VK_SAMPLE_COUNT_1_BIT,
|
||||
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
|
||||
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
|
||||
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
|
||||
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
|
||||
.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
||||
.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
||||
};
|
||||
color_reference = (VkAttachmentReference){
|
||||
0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
|
||||
};
|
||||
|
||||
VkSubpassDependency dependency = {
|
||||
.srcSubpass = VK_SUBPASS_EXTERNAL,
|
||||
};
|
||||
|
||||
dependency.srcStageMask |=
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
dependency.dstStageMask |=
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
dependency.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
|
||||
VkSubpassDescription subpass = {
|
||||
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
.colorAttachmentCount = 1,
|
||||
.pColorAttachments = &color_reference,
|
||||
};
|
||||
|
||||
VkRenderPassCreateInfo renderpass_create_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
|
||||
.attachmentCount = 1,
|
||||
.pAttachments = &attachment,
|
||||
.subpassCount = 1,
|
||||
.pSubpasses = &subpass,
|
||||
.dependencyCount = 1,
|
||||
.pDependencies = &dependency,
|
||||
};
|
||||
VK_CHECK(vkCreateRenderPass(r->device, &renderpass_create_info, NULL,
|
||||
&r->display.render_pass));
|
||||
}
|
||||
|
||||
static void destroy_render_pass(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
vkDestroyRenderPass(r->device, r->display.render_pass, NULL);
|
||||
r->display.render_pass = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
static void create_display_pipeline(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
r->display.display_frag =
|
||||
pgraph_vk_create_shader_module_from_glsl(
|
||||
r, VK_SHADER_STAGE_FRAGMENT_BIT, display_frag_glsl);
|
||||
|
||||
VkPipelineShaderStageCreateInfo shader_stages[] = {
|
||||
(VkPipelineShaderStageCreateInfo){
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.stage = VK_SHADER_STAGE_VERTEX_BIT,
|
||||
.module = r->quad_vert_module->module,
|
||||
.pName = "main",
|
||||
},
|
||||
(VkPipelineShaderStageCreateInfo){
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||
.module = r->display.display_frag->module,
|
||||
.pName = "main",
|
||||
},
|
||||
};
|
||||
|
||||
VkPipelineVertexInputStateCreateInfo vertex_input = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
|
||||
};
|
||||
|
||||
VkPipelineInputAssemblyStateCreateInfo input_assembly = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
|
||||
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
|
||||
.primitiveRestartEnable = VK_FALSE,
|
||||
};
|
||||
|
||||
VkPipelineViewportStateCreateInfo viewport_state = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
|
||||
.viewportCount = 1,
|
||||
.scissorCount = 1,
|
||||
};
|
||||
|
||||
VkPipelineRasterizationStateCreateInfo rasterizer = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
|
||||
.depthClampEnable = VK_FALSE,
|
||||
.rasterizerDiscardEnable = VK_FALSE,
|
||||
.polygonMode = VK_POLYGON_MODE_FILL,
|
||||
.lineWidth = 1.0f,
|
||||
.cullMode = VK_CULL_MODE_BACK_BIT,
|
||||
.frontFace = VK_FRONT_FACE_CLOCKWISE,
|
||||
.depthBiasEnable = VK_FALSE,
|
||||
};
|
||||
|
||||
VkPipelineMultisampleStateCreateInfo multisampling = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
||||
.sampleShadingEnable = VK_FALSE,
|
||||
.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
|
||||
};
|
||||
|
||||
VkPipelineDepthStencilStateCreateInfo depth_stencil = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
|
||||
.depthTestEnable = VK_FALSE,
|
||||
.depthCompareOp = VK_COMPARE_OP_ALWAYS,
|
||||
.depthBoundsTestEnable = VK_FALSE,
|
||||
};
|
||||
|
||||
VkPipelineColorBlendAttachmentState color_blend_attachment = {
|
||||
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
|
||||
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
|
||||
.blendEnable = VK_FALSE,
|
||||
};
|
||||
|
||||
VkPipelineColorBlendStateCreateInfo color_blending = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
|
||||
.logicOpEnable = VK_FALSE,
|
||||
.logicOp = VK_LOGIC_OP_COPY,
|
||||
.attachmentCount = 1,
|
||||
.pAttachments = &color_blend_attachment,
|
||||
};
|
||||
|
||||
VkDynamicState dynamic_states[] = { VK_DYNAMIC_STATE_VIEWPORT,
|
||||
VK_DYNAMIC_STATE_SCISSOR };
|
||||
VkPipelineDynamicStateCreateInfo dynamic_state = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
|
||||
.dynamicStateCount = 2,
|
||||
.pDynamicStates = dynamic_states,
|
||||
};
|
||||
|
||||
VkPushConstantRange push_constant_range = {
|
||||
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||
.offset = 0,
|
||||
.size = r->display.display_frag->push_constants.total_size,
|
||||
};
|
||||
|
||||
VkPipelineLayoutCreateInfo pipeline_layout_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
.setLayoutCount = 1,
|
||||
.pSetLayouts = &r->display.descriptor_set_layout,
|
||||
.pushConstantRangeCount = 1,
|
||||
.pPushConstantRanges = &push_constant_range,
|
||||
};
|
||||
VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL,
|
||||
&r->display.pipeline_layout));
|
||||
|
||||
VkGraphicsPipelineCreateInfo pipeline_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
||||
.stageCount = ARRAY_SIZE(shader_stages),
|
||||
.pStages = shader_stages,
|
||||
.pVertexInputState = &vertex_input,
|
||||
.pInputAssemblyState = &input_assembly,
|
||||
.pViewportState = &viewport_state,
|
||||
.pRasterizationState = &rasterizer,
|
||||
.pMultisampleState = &multisampling,
|
||||
.pDepthStencilState = r->zeta_binding ? &depth_stencil : NULL,
|
||||
.pColorBlendState = &color_blending,
|
||||
.pDynamicState = &dynamic_state,
|
||||
.layout = r->display.pipeline_layout,
|
||||
.renderPass = r->display.render_pass,
|
||||
.subpass = 0,
|
||||
.basePipelineHandle = VK_NULL_HANDLE,
|
||||
};
|
||||
VK_CHECK(vkCreateGraphicsPipelines(r->device, r->vk_pipeline_cache, 1,
|
||||
&pipeline_info, NULL,
|
||||
&r->display.pipeline));
|
||||
}
|
||||
|
||||
static void destroy_display_pipeline(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
vkDestroyPipeline(r->device, r->display.pipeline, NULL);
|
||||
r->display.pipeline = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
static void create_frame_buffer(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
VkFramebufferCreateInfo create_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
|
||||
.renderPass = r->display.render_pass,
|
||||
.attachmentCount = 1,
|
||||
.pAttachments = &r->display.image_view,
|
||||
.width = r->display.width,
|
||||
.height = r->display.height,
|
||||
.layers = 1,
|
||||
};
|
||||
VK_CHECK(vkCreateFramebuffer(r->device, &create_info, NULL,
|
||||
&r->display.framebuffer));
|
||||
}
|
||||
|
||||
static void destroy_frame_buffer(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
vkDestroyFramebuffer(r->device, r->display.framebuffer, NULL);
|
||||
r->display.framebuffer = NULL;
|
||||
}
|
||||
|
||||
static void destroy_current_display_image(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
PGRAPHVkDisplayState *d = &r->display;
|
||||
|
||||
if (d->image == VK_NULL_HANDLE) {
|
||||
return;
|
||||
}
|
||||
|
||||
destroy_frame_buffer(pg);
|
||||
|
||||
#if HAVE_EXTERNAL_MEMORY
|
||||
glDeleteTextures(1, &d->gl_texture_id);
|
||||
d->gl_texture_id = 0;
|
||||
|
||||
glDeleteMemoryObjectsEXT(1, &d->gl_memory_obj);
|
||||
d->gl_memory_obj = 0;
|
||||
|
||||
#ifdef WIN32
|
||||
CloseHandle(d->handle);
|
||||
d->handle = 0;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
vkDestroyImageView(r->device, d->image_view, NULL);
|
||||
d->image_view = VK_NULL_HANDLE;
|
||||
|
||||
vkDestroyImage(r->device, d->image, NULL);
|
||||
d->image = VK_NULL_HANDLE;
|
||||
|
||||
vkFreeMemory(r->device, d->memory, NULL);
|
||||
d->memory = VK_NULL_HANDLE;
|
||||
|
||||
d->draw_time = 0;
|
||||
}
|
||||
|
||||
// FIXME: We may need to use two images. One for actually rendering display,
|
||||
// and another for GL in the correct tiling mode
|
||||
|
||||
static void create_display_image_from_surface(PGRAPHState *pg,
|
||||
SurfaceBinding *surface)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
PGRAPHVkDisplayState *d = &r->display;
|
||||
|
||||
if (r->display.image != VK_NULL_HANDLE) {
|
||||
destroy_current_display_image(pg);
|
||||
}
|
||||
|
||||
const GLint gl_internal_format = GL_RGBA8;
|
||||
bool use_optimal_tiling = true;
|
||||
|
||||
#if HAVE_EXTERNAL_MEMORY
|
||||
GLint num_tiling_types;
|
||||
glGetInternalformativ(GL_TEXTURE_2D, gl_internal_format,
|
||||
GL_NUM_TILING_TYPES_EXT, 1, &num_tiling_types);
|
||||
// XXX: Apparently on AMD GL_OPTIMAL_TILING_EXT is reported to be
|
||||
// supported, but doesn't work? On nVidia, GL_LINEAR_TILING_EXT may not
|
||||
// be supported so we must use optimal. Default to optimal unless
|
||||
// linear is explicitly specified...
|
||||
GLint tiling_types[num_tiling_types];
|
||||
glGetInternalformativ(GL_TEXTURE_2D, gl_internal_format,
|
||||
GL_TILING_TYPES_EXT, num_tiling_types, tiling_types);
|
||||
for (int i = 0; i < num_tiling_types; i++) {
|
||||
if (tiling_types[i] == GL_LINEAR_TILING_EXT) {
|
||||
use_optimal_tiling = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Create image
|
||||
VkImageCreateInfo image_create_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||||
.imageType = VK_IMAGE_TYPE_2D,
|
||||
.extent.width = surface->width,
|
||||
.extent.height = surface->height,
|
||||
.extent.depth = 1,
|
||||
.mipLevels = 1,
|
||||
.arrayLayers = 1,
|
||||
.format = VK_FORMAT_R8G8B8A8_UNORM,
|
||||
.tiling = use_optimal_tiling ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR,
|
||||
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
|
||||
.samples = VK_SAMPLE_COUNT_1_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
};
|
||||
pgraph_apply_scaling_factor(pg, &image_create_info.extent.width,
|
||||
&image_create_info.extent.height);
|
||||
|
||||
VkExternalMemoryImageCreateInfo external_memory_image_create_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
|
||||
.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
|
||||
};
|
||||
image_create_info.pNext = &external_memory_image_create_info;
|
||||
|
||||
VK_CHECK(vkCreateImage(r->device, &image_create_info, NULL, &d->image));
|
||||
|
||||
// Allocate and bind image memory
|
||||
VkMemoryRequirements memory_requirements;
|
||||
vkGetImageMemoryRequirements(r->device, d->image, &memory_requirements);
|
||||
|
||||
VkMemoryAllocateInfo alloc_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.allocationSize = memory_requirements.size,
|
||||
.memoryTypeIndex =
|
||||
pgraph_vk_get_memory_type(pg, memory_requirements.memoryTypeBits,
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
|
||||
};
|
||||
|
||||
VkExportMemoryAllocateInfo export_memory_alloc_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
|
||||
.handleTypes =
|
||||
#ifdef WIN32
|
||||
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR
|
||||
#else
|
||||
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT
|
||||
#endif
|
||||
,
|
||||
};
|
||||
alloc_info.pNext = &export_memory_alloc_info;
|
||||
|
||||
VK_CHECK(vkAllocateMemory(r->device, &alloc_info, NULL, &d->memory));
|
||||
|
||||
vkBindImageMemory(r->device, d->image, d->memory, 0);
|
||||
|
||||
// Create Image View
|
||||
VkImageViewCreateInfo image_view_create_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||
.image = d->image,
|
||||
.viewType = VK_IMAGE_VIEW_TYPE_2D,
|
||||
.format = image_create_info.format,
|
||||
.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.subresourceRange.levelCount = 1,
|
||||
.subresourceRange.layerCount = 1,
|
||||
};
|
||||
VK_CHECK(vkCreateImageView(r->device, &image_view_create_info, NULL,
|
||||
&d->image_view));
|
||||
|
||||
#if HAVE_EXTERNAL_MEMORY
|
||||
|
||||
#ifdef WIN32
|
||||
|
||||
VkMemoryGetWin32HandleInfoKHR handle_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
|
||||
.memory = d->memory,
|
||||
.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR
|
||||
};
|
||||
VK_CHECK(vkGetMemoryWin32HandleKHR(r->device, &handle_info, &d->handle));
|
||||
|
||||
glCreateMemoryObjectsEXT(1, &d->gl_memory_obj);
|
||||
glImportMemoryWin32HandleEXT(d->gl_memory_obj, memory_requirements.size, GL_HANDLE_TYPE_OPAQUE_WIN32_EXT, d->handle);
|
||||
assert(glGetError() == GL_NO_ERROR);
|
||||
|
||||
#else
|
||||
|
||||
VkMemoryGetFdInfoKHR fd_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
|
||||
.memory = d->memory,
|
||||
.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
|
||||
};
|
||||
VK_CHECK(vkGetMemoryFdKHR(r->device, &fd_info, &d->fd));
|
||||
|
||||
glCreateMemoryObjectsEXT(1, &d->gl_memory_obj);
|
||||
glImportMemoryFdEXT(d->gl_memory_obj, memory_requirements.size,
|
||||
GL_HANDLE_TYPE_OPAQUE_FD_EXT, d->fd);
|
||||
assert(glIsMemoryObjectEXT(d->gl_memory_obj));
|
||||
assert(glGetError() == GL_NO_ERROR);
|
||||
|
||||
#endif // WIN32
|
||||
|
||||
glGenTextures(1, &d->gl_texture_id);
|
||||
glBindTexture(GL_TEXTURE_2D, d->gl_texture_id);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_TILING_EXT,
|
||||
use_optimal_tiling ? GL_OPTIMAL_TILING_EXT :
|
||||
GL_LINEAR_TILING_EXT);
|
||||
glTexStorageMem2DEXT(GL_TEXTURE_2D, 1, gl_internal_format,
|
||||
image_create_info.extent.width,
|
||||
image_create_info.extent.height, d->gl_memory_obj, 0);
|
||||
assert(glGetError() == GL_NO_ERROR);
|
||||
|
||||
#endif // HAVE_EXTERNAL_MEMORY
|
||||
|
||||
d->width = image_create_info.extent.width;
|
||||
d->height = image_create_info.extent.height;
|
||||
|
||||
create_frame_buffer(pg);
|
||||
}
|
||||
|
||||
static void update_descriptor_set(PGRAPHState *pg, SurfaceBinding *surface)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
VkDescriptorImageInfo image_infos[2];
|
||||
VkWriteDescriptorSet descriptor_writes[2];
|
||||
|
||||
// Display surface
|
||||
image_infos[0] = (VkDescriptorImageInfo){
|
||||
.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
|
||||
.imageView = surface->image_view,
|
||||
.sampler = r->display.sampler,
|
||||
};
|
||||
descriptor_writes[0] = (VkWriteDescriptorSet){
|
||||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
.dstSet = r->display.descriptor_set,
|
||||
.dstBinding = 0,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
.descriptorCount = 1,
|
||||
.pImageInfo = &image_infos[0],
|
||||
};
|
||||
|
||||
// FIXME: PVIDEO Overlay
|
||||
image_infos[1] = (VkDescriptorImageInfo){
|
||||
.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
|
||||
.imageView = r->dummy_texture.image_view,
|
||||
.sampler = r->dummy_texture.sampler,
|
||||
};
|
||||
descriptor_writes[1] = (VkWriteDescriptorSet){
|
||||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
.dstSet = r->display.descriptor_set,
|
||||
.dstBinding = 1,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
.descriptorCount = 1,
|
||||
.pImageInfo = &image_infos[1],
|
||||
};
|
||||
|
||||
vkUpdateDescriptorSets(r->device, ARRAY_SIZE(descriptor_writes),
|
||||
descriptor_writes, 0, NULL);
|
||||
}
|
||||
|
||||
static void update_uniforms(PGRAPHState *pg, SurfaceBinding *surface)
|
||||
{
|
||||
NV2AState *d = container_of(pg, NV2AState, pgraph);
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
unsigned int width, height;
|
||||
uint32_t pline_offset, pstart_addr, pline_compare;
|
||||
d->vga.get_resolution(&d->vga, (int*)&width, (int*)&height);
|
||||
d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
|
||||
int line_offset = surface->pitch / pline_offset;
|
||||
|
||||
/* Adjust viewport height for interlaced mode, used only in 1080i */
|
||||
if (d->vga.cr[NV_PRMCIO_INTERLACE_MODE] != NV_PRMCIO_INTERLACE_MODE_DISABLED) {
|
||||
height *= 2;
|
||||
}
|
||||
|
||||
pgraph_apply_scaling_factor(pg, &width, &height);
|
||||
|
||||
ShaderUniformLayout *l = &r->display.display_frag->push_constants;
|
||||
int display_size_loc = uniform_index(l, "display_size"); // FIXME: Cache
|
||||
int line_offset_loc = uniform_index(l, "line_offset");
|
||||
uniform2f(l, display_size_loc, width, height);
|
||||
uniform1f(l, line_offset_loc, line_offset);
|
||||
|
||||
#if 0 // FIXME: PVIDEO overlay
|
||||
// FIXME: This check against PVIDEO_SIZE_IN does not match HW behavior.
|
||||
// Many games seem to pass this value when initializing or tearing down
|
||||
// PVIDEO. On its own, this generally does not result in the overlay being
|
||||
// hidden, however there are certain games (e.g., Ultimate Beach Soccer)
|
||||
// that use an unknown mechanism to hide the overlay without explicitly
|
||||
// stopping it.
|
||||
// Since the value seems to be set to 0xFFFFFFFF only in cases where the
|
||||
// content is not valid, it is probably good enough to treat it as an
|
||||
// implicit stop.
|
||||
bool enabled = (d->pvideo.regs[NV_PVIDEO_BUFFER] & NV_PVIDEO_BUFFER_0_USE)
|
||||
&& d->pvideo.regs[NV_PVIDEO_SIZE_IN] != 0xFFFFFFFF;
|
||||
glUniform1ui(d->pgraph.renderer_state->disp_rndr.pvideo_enable_loc, enabled);
|
||||
if (!enabled) {
|
||||
return;
|
||||
}
|
||||
|
||||
hwaddr base = d->pvideo.regs[NV_PVIDEO_BASE];
|
||||
hwaddr limit = d->pvideo.regs[NV_PVIDEO_LIMIT];
|
||||
hwaddr offset = d->pvideo.regs[NV_PVIDEO_OFFSET];
|
||||
|
||||
int in_width =
|
||||
GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_WIDTH);
|
||||
int in_height =
|
||||
GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_HEIGHT);
|
||||
|
||||
int in_s = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN],
|
||||
NV_PVIDEO_POINT_IN_S);
|
||||
int in_t = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN],
|
||||
NV_PVIDEO_POINT_IN_T);
|
||||
|
||||
int in_pitch =
|
||||
GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_PITCH);
|
||||
int in_color =
|
||||
GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_COLOR);
|
||||
|
||||
unsigned int out_width =
|
||||
GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_WIDTH);
|
||||
unsigned int out_height =
|
||||
GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_HEIGHT);
|
||||
|
||||
float scale_x = 1.0f;
|
||||
float scale_y = 1.0f;
|
||||
unsigned int ds_dx = d->pvideo.regs[NV_PVIDEO_DS_DX];
|
||||
unsigned int dt_dy = d->pvideo.regs[NV_PVIDEO_DT_DY];
|
||||
if (ds_dx != NV_PVIDEO_DIN_DOUT_UNITY) {
|
||||
scale_x = pvideo_calculate_scale(ds_dx, out_width);
|
||||
}
|
||||
if (dt_dy != NV_PVIDEO_DIN_DOUT_UNITY) {
|
||||
scale_y = pvideo_calculate_scale(dt_dy, out_height);
|
||||
}
|
||||
|
||||
// On HW, setting NV_PVIDEO_SIZE_IN larger than NV_PVIDEO_SIZE_OUT results
|
||||
// in them being capped to the output size, content is not scaled. This is
|
||||
// particularly important as NV_PVIDEO_SIZE_IN may be set to 0xFFFFFFFF
|
||||
// during initialization or teardown.
|
||||
if (in_width > out_width) {
|
||||
in_width = floorf((float)out_width * scale_x + 0.5f);
|
||||
}
|
||||
if (in_height > out_height) {
|
||||
in_height = floorf((float)out_height * scale_y + 0.5f);
|
||||
}
|
||||
|
||||
/* TODO: support other color formats */
|
||||
assert(in_color == NV_PVIDEO_FORMAT_COLOR_LE_CR8YB8CB8YA8);
|
||||
|
||||
unsigned int out_x =
|
||||
GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_X);
|
||||
unsigned int out_y =
|
||||
GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_Y);
|
||||
|
||||
unsigned int color_key_enabled =
|
||||
GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_DISPLAY);
|
||||
glUniform1ui(d->pgraph.renderer_state->disp_rndr.pvideo_color_key_enable_loc,
|
||||
color_key_enabled);
|
||||
|
||||
// TODO: Verify that masking off the top byte is correct.
|
||||
// SeaBlade sets a color key of 0x80000000 but the texture passed into the
|
||||
// shader is cleared to 0 alpha.
|
||||
unsigned int color_key = d->pvideo.regs[NV_PVIDEO_COLOR_KEY] & 0xFFFFFF;
|
||||
glUniform4f(d->pgraph.renderer_state->disp_rndr.pvideo_color_key_loc,
|
||||
GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_RED) / 255.0,
|
||||
GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_GREEN) / 255.0,
|
||||
GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_BLUE) / 255.0,
|
||||
GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_ALPHA) / 255.0);
|
||||
|
||||
assert(offset + in_pitch * in_height <= limit);
|
||||
hwaddr end = base + offset + in_pitch * in_height;
|
||||
assert(end <= memory_region_size(d->vram));
|
||||
|
||||
pgraph_apply_scaling_factor(pg, &out_x, &out_y);
|
||||
pgraph_apply_scaling_factor(pg, &out_width, &out_height);
|
||||
|
||||
// Translate for the GL viewport origin.
|
||||
out_y = MAX(pg->renderer_state->gl_display_buffer_height - 1 - (int)(out_y + out_height), 0);
|
||||
|
||||
glActiveTexture(GL_TEXTURE0 + 1);
|
||||
glBindTexture(GL_TEXTURE_2D, d->pgraph.renderer_state->disp_rndr.pvideo_tex);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||
uint8_t *tex_rgba = convert_texture_data__CR8YB8CB8YA8(
|
||||
d->vram_ptr + base + offset, in_width, in_height, in_pitch);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, in_width, in_height, 0, GL_RGBA,
|
||||
GL_UNSIGNED_BYTE, tex_rgba);
|
||||
g_free(tex_rgba);
|
||||
glUniform1i(d->pgraph.renderer_state->disp_rndr.pvideo_tex_loc, 1);
|
||||
glUniform2f(d->pgraph.renderer_state->disp_rndr.pvideo_in_pos_loc, in_s, in_t);
|
||||
glUniform4f(d->pgraph.renderer_state->disp_rndr.pvideo_pos_loc,
|
||||
out_x, out_y, out_width, out_height);
|
||||
glUniform3f(d->pgraph.renderer_state->disp_rndr.pvideo_scale_loc,
|
||||
scale_x, scale_y, 1.0f / pg->surface_scale_factor);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void render_display(PGRAPHState *pg, SurfaceBinding *surface)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
PGRAPHVkDisplayState *disp = &r->display;
|
||||
|
||||
if (disp->draw_time >= surface->draw_time) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (r->in_command_buffer &&
|
||||
surface->draw_time >= r->command_buffer_start_time) {
|
||||
pgraph_vk_finish(pg, VK_FINISH_REASON_PRESENTING);
|
||||
}
|
||||
|
||||
update_uniforms(pg, surface);
|
||||
update_descriptor_set(pg, surface);
|
||||
|
||||
VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg);
|
||||
|
||||
pgraph_vk_transition_image_layout(pg, cmd, surface->image,
|
||||
surface->host_fmt.vk_format,
|
||||
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
||||
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
pgraph_vk_transition_image_layout(
|
||||
pg, cmd, disp->image, VK_FORMAT_R8G8B8A8_UNORM,
|
||||
VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
|
||||
|
||||
VkRenderPassBeginInfo render_pass_begin_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
|
||||
.renderPass = disp->render_pass,
|
||||
.framebuffer = disp->framebuffer,
|
||||
.renderArea.extent.width = disp->width,
|
||||
.renderArea.extent.height = disp->height,
|
||||
};
|
||||
vkCmdBeginRenderPass(cmd, &render_pass_begin_info,
|
||||
VK_SUBPASS_CONTENTS_INLINE);
|
||||
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
disp->pipeline);
|
||||
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
disp->pipeline_layout, 0, 1, &disp->descriptor_set,
|
||||
0, NULL);
|
||||
|
||||
VkViewport viewport = {
|
||||
.width = disp->width,
|
||||
.height = disp->height,
|
||||
.minDepth = 0.0,
|
||||
.maxDepth = 1.0,
|
||||
};
|
||||
vkCmdSetViewport(cmd, 0, 1, &viewport);
|
||||
|
||||
VkRect2D scissor = {
|
||||
.extent.width = disp->width,
|
||||
.extent.height = disp->height,
|
||||
};
|
||||
vkCmdSetScissor(cmd, 0, 1, &scissor);
|
||||
|
||||
vkCmdPushConstants(cmd, disp->pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||
0, disp->display_frag->push_constants.total_size,
|
||||
disp->display_frag->push_constants.allocation);
|
||||
|
||||
vkCmdDraw(cmd, 3, 1, 0, 0);
|
||||
|
||||
vkCmdEndRenderPass(cmd);
|
||||
|
||||
#if 0
|
||||
VkImageCopy region = {
|
||||
.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.srcSubresource.layerCount = 1,
|
||||
.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.dstSubresource.layerCount = 1,
|
||||
.extent.width = surface->width,
|
||||
.extent.height = surface->height,
|
||||
.extent.depth = 1,
|
||||
};
|
||||
pgraph_apply_scaling_factor(pg, ®ion.extent.width,
|
||||
®ion.extent.height);
|
||||
|
||||
vkCmdCopyImage(cmd, surface->image,
|
||||
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, disp->image,
|
||||
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion);
|
||||
#endif
|
||||
|
||||
pgraph_vk_transition_image_layout(pg, cmd, surface->image,
|
||||
surface->host_fmt.vk_format,
|
||||
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
|
||||
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
|
||||
|
||||
pgraph_vk_transition_image_layout(pg, cmd, disp->image,
|
||||
VK_FORMAT_R8G8B8_UNORM,
|
||||
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
||||
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
|
||||
pgraph_vk_end_single_time_commands(pg, cmd);
|
||||
nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_5);
|
||||
|
||||
disp->draw_time = surface->draw_time;
|
||||
}
|
||||
|
||||
static void create_surface_sampler(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
VkSamplerCreateInfo sampler_create_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
|
||||
.magFilter = VK_FILTER_NEAREST,
|
||||
.minFilter = VK_FILTER_NEAREST,
|
||||
.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT,
|
||||
.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT,
|
||||
.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT,
|
||||
.anisotropyEnable = VK_FALSE,
|
||||
.borderColor = VK_BORDER_COLOR_INT_OPAQUE_WHITE,
|
||||
.unnormalizedCoordinates = VK_FALSE,
|
||||
.compareEnable = VK_FALSE,
|
||||
.compareOp = VK_COMPARE_OP_ALWAYS,
|
||||
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
|
||||
};
|
||||
|
||||
VK_CHECK(vkCreateSampler(r->device, &sampler_create_info, NULL,
|
||||
&r->display.sampler));
|
||||
}
|
||||
|
||||
static void destroy_surface_sampler(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
vkDestroySampler(r->device, r->display.sampler, NULL);
|
||||
r->display.sampler = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
void pgraph_vk_init_display(PGRAPHState *pg)
|
||||
{
|
||||
create_descriptor_pool(pg);
|
||||
create_descriptor_set_layout(pg);
|
||||
create_descriptor_sets(pg);
|
||||
create_render_pass(pg);
|
||||
create_display_pipeline(pg);
|
||||
create_surface_sampler(pg);
|
||||
}
|
||||
|
||||
void pgraph_vk_finalize_display(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
if (r->display.image != VK_NULL_HANDLE) {
|
||||
destroy_current_display_image(pg);
|
||||
}
|
||||
|
||||
destroy_surface_sampler(pg);
|
||||
destroy_display_pipeline(pg);
|
||||
destroy_render_pass(pg);
|
||||
destroy_descriptor_set_layout(pg);
|
||||
destroy_descriptor_pool(pg);
|
||||
}
|
||||
|
||||
void pgraph_vk_render_display(PGRAPHState *pg)
|
||||
{
|
||||
NV2AState *d = container_of(pg, NV2AState, pgraph);
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
uint32_t pline_offset, pstart_addr, pline_compare;
|
||||
d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
|
||||
SurfaceBinding *surface =
|
||||
pgraph_vk_surface_get_within(d, d->pcrtc.start + pline_offset);
|
||||
if (surface == NULL || !surface->color) {
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned int width = surface->width, height = surface->height;
|
||||
pgraph_apply_scaling_factor(pg, &width, &height);
|
||||
|
||||
PGRAPHVkDisplayState *disp = &r->display;
|
||||
if (!disp->image || disp->width != width || disp->height != height) {
|
||||
create_display_image_from_surface(pg, surface);
|
||||
}
|
||||
|
||||
render_display(pg, surface);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,380 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH Vulkan Renderer
|
||||
*
|
||||
* Copyright (c) 2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "renderer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <glslang/Include/glslang_c_interface.h>
|
||||
#include <stdio.h>
|
||||
|
||||
static const glslang_resource_t
|
||||
resource_limits = { .max_lights = 32,
|
||||
.max_clip_planes = 6,
|
||||
.max_texture_units = 32,
|
||||
.max_texture_coords = 32,
|
||||
.max_vertex_attribs = 64,
|
||||
.max_vertex_uniform_components = 4096,
|
||||
.max_varying_floats = 64,
|
||||
.max_vertex_texture_image_units = 32,
|
||||
.max_combined_texture_image_units = 80,
|
||||
.max_texture_image_units = 32,
|
||||
.max_fragment_uniform_components = 4096,
|
||||
.max_draw_buffers = 32,
|
||||
.max_vertex_uniform_vectors = 128,
|
||||
.max_varying_vectors = 8,
|
||||
.max_fragment_uniform_vectors = 16,
|
||||
.max_vertex_output_vectors = 16,
|
||||
.max_fragment_input_vectors = 15,
|
||||
.min_program_texel_offset = -8,
|
||||
.max_program_texel_offset = 7,
|
||||
.max_clip_distances = 8,
|
||||
.max_compute_work_group_count_x = 65535,
|
||||
.max_compute_work_group_count_y = 65535,
|
||||
.max_compute_work_group_count_z = 65535,
|
||||
.max_compute_work_group_size_x = 1024,
|
||||
.max_compute_work_group_size_y = 1024,
|
||||
.max_compute_work_group_size_z = 64,
|
||||
.max_compute_uniform_components = 1024,
|
||||
.max_compute_texture_image_units = 16,
|
||||
.max_compute_image_uniforms = 8,
|
||||
.max_compute_atomic_counters = 8,
|
||||
.max_compute_atomic_counter_buffers = 1,
|
||||
.max_varying_components = 60,
|
||||
.max_vertex_output_components = 64,
|
||||
.max_geometry_input_components = 64,
|
||||
.max_geometry_output_components = 128,
|
||||
.max_fragment_input_components = 128,
|
||||
.max_image_units = 8,
|
||||
.max_combined_image_units_and_fragment_outputs = 8,
|
||||
.max_combined_shader_output_resources = 8,
|
||||
.max_image_samples = 0,
|
||||
.max_vertex_image_uniforms = 0,
|
||||
.max_tess_control_image_uniforms = 0,
|
||||
.max_tess_evaluation_image_uniforms = 0,
|
||||
.max_geometry_image_uniforms = 0,
|
||||
.max_fragment_image_uniforms = 8,
|
||||
.max_combined_image_uniforms = 8,
|
||||
.max_geometry_texture_image_units = 16,
|
||||
.max_geometry_output_vertices = 256,
|
||||
.max_geometry_total_output_components = 1024,
|
||||
.max_geometry_uniform_components = 1024,
|
||||
.max_geometry_varying_components = 64,
|
||||
.max_tess_control_input_components = 128,
|
||||
.max_tess_control_output_components = 128,
|
||||
.max_tess_control_texture_image_units = 16,
|
||||
.max_tess_control_uniform_components = 1024,
|
||||
.max_tess_control_total_output_components = 4096,
|
||||
.max_tess_evaluation_input_components = 128,
|
||||
.max_tess_evaluation_output_components = 128,
|
||||
.max_tess_evaluation_texture_image_units = 16,
|
||||
.max_tess_evaluation_uniform_components = 1024,
|
||||
.max_tess_patch_components = 120,
|
||||
.max_patch_vertices = 32,
|
||||
.max_tess_gen_level = 64,
|
||||
.max_viewports = 16,
|
||||
.max_vertex_atomic_counters = 0,
|
||||
.max_tess_control_atomic_counters = 0,
|
||||
.max_tess_evaluation_atomic_counters = 0,
|
||||
.max_geometry_atomic_counters = 0,
|
||||
.max_fragment_atomic_counters = 8,
|
||||
.max_combined_atomic_counters = 8,
|
||||
.max_atomic_counter_bindings = 1,
|
||||
.max_vertex_atomic_counter_buffers = 0,
|
||||
.max_tess_control_atomic_counter_buffers = 0,
|
||||
.max_tess_evaluation_atomic_counter_buffers = 0,
|
||||
.max_geometry_atomic_counter_buffers = 0,
|
||||
.max_fragment_atomic_counter_buffers = 1,
|
||||
.max_combined_atomic_counter_buffers = 1,
|
||||
.max_atomic_counter_buffer_size = 16384,
|
||||
.max_transform_feedback_buffers = 4,
|
||||
.max_transform_feedback_interleaved_components = 64,
|
||||
.max_cull_distances = 8,
|
||||
.max_combined_clip_and_cull_distances = 8,
|
||||
.max_samples = 4,
|
||||
.max_mesh_output_vertices_nv = 256,
|
||||
.max_mesh_output_primitives_nv = 512,
|
||||
.max_mesh_work_group_size_x_nv = 32,
|
||||
.max_mesh_work_group_size_y_nv = 1,
|
||||
.max_mesh_work_group_size_z_nv = 1,
|
||||
.max_task_work_group_size_x_nv = 32,
|
||||
.max_task_work_group_size_y_nv = 1,
|
||||
.max_task_work_group_size_z_nv = 1,
|
||||
.max_mesh_view_count_nv = 4,
|
||||
.maxDualSourceDrawBuffersEXT = 1,
|
||||
.limits = {
|
||||
.non_inductive_for_loops = 1,
|
||||
.while_loops = 1,
|
||||
.do_while_loops = 1,
|
||||
.general_uniform_indexing = 1,
|
||||
.general_attribute_matrix_vector_indexing = 1,
|
||||
.general_varying_indexing = 1,
|
||||
.general_sampler_indexing = 1,
|
||||
.general_variable_indexing = 1,
|
||||
.general_constant_matrix_vector_indexing = 1,
|
||||
} };
|
||||
|
||||
void pgraph_vk_init_glsl_compiler(void)
|
||||
{
|
||||
glslang_initialize_process();
|
||||
}
|
||||
|
||||
void pgraph_vk_finalize_glsl_compiler(void)
|
||||
{
|
||||
glslang_finalize_process();
|
||||
}
|
||||
|
||||
GByteArray *pgraph_vk_compile_glsl_to_spv(glslang_stage_t stage,
|
||||
const char *glsl_source)
|
||||
{
|
||||
const glslang_input_t input = {
|
||||
.language = GLSLANG_SOURCE_GLSL,
|
||||
.stage = stage,
|
||||
.client = GLSLANG_CLIENT_VULKAN,
|
||||
.client_version = GLSLANG_TARGET_VULKAN_1_3,
|
||||
.target_language = GLSLANG_TARGET_SPV,
|
||||
.target_language_version = GLSLANG_TARGET_SPV_1_5,
|
||||
.code = glsl_source,
|
||||
.default_version = 460,
|
||||
.default_profile = GLSLANG_NO_PROFILE,
|
||||
.force_default_version_and_profile = false,
|
||||
.forward_compatible = false,
|
||||
.messages = GLSLANG_MSG_DEFAULT_BIT,
|
||||
.resource = &resource_limits,
|
||||
};
|
||||
|
||||
glslang_shader_t *shader = glslang_shader_create(&input);
|
||||
|
||||
if (!glslang_shader_preprocess(shader, &input)) {
|
||||
fprintf(stderr,
|
||||
"GLSL preprocessing failed\n"
|
||||
"[INFO]: %s\n"
|
||||
"[DEBUG]: %s\n"
|
||||
"%s\n",
|
||||
glslang_shader_get_info_log(shader),
|
||||
glslang_shader_get_info_debug_log(shader), input.code);
|
||||
assert(!"glslang preprocess failed");
|
||||
glslang_shader_delete(shader);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!glslang_shader_parse(shader, &input)) {
|
||||
fprintf(stderr,
|
||||
"GLSL parsing failed\n"
|
||||
"[INFO]: %s\n"
|
||||
"[DEBUG]: %s\n"
|
||||
"%s\n",
|
||||
glslang_shader_get_info_log(shader),
|
||||
glslang_shader_get_info_debug_log(shader),
|
||||
glslang_shader_get_preprocessed_code(shader));
|
||||
assert(!"glslang parse failed");
|
||||
glslang_shader_delete(shader);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
glslang_program_t *program = glslang_program_create();
|
||||
glslang_program_add_shader(program, shader);
|
||||
|
||||
if (!glslang_program_link(program, GLSLANG_MSG_SPV_RULES_BIT |
|
||||
GLSLANG_MSG_VULKAN_RULES_BIT)) {
|
||||
fprintf(stderr,
|
||||
"GLSL linking failed\n"
|
||||
"[INFO]: %s\n"
|
||||
"[DEBUG]: %s\n",
|
||||
glslang_program_get_info_log(program),
|
||||
glslang_program_get_info_debug_log(program));
|
||||
assert(!"glslang link failed");
|
||||
glslang_program_delete(program);
|
||||
glslang_shader_delete(shader);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
glslang_spv_options_t spv_options = {
|
||||
.validate = true,
|
||||
|
||||
#if defined(CONFIG_RENDERDOC)
|
||||
.disable_optimizer = true,
|
||||
.generate_debug_info = true,
|
||||
.emit_nonsemantic_shader_debug_info = true,
|
||||
.emit_nonsemantic_shader_debug_source = true,
|
||||
#endif
|
||||
};
|
||||
glslang_program_SPIRV_generate_with_options(program, stage, &spv_options);
|
||||
|
||||
const char *spirv_messages = glslang_program_SPIRV_get_messages(program);
|
||||
if (spirv_messages) {
|
||||
printf("%s\b", spirv_messages);
|
||||
}
|
||||
|
||||
size_t num_program_bytes =
|
||||
glslang_program_SPIRV_get_size(program) * sizeof(uint32_t);
|
||||
|
||||
guint8 *data = g_malloc(num_program_bytes);
|
||||
glslang_program_SPIRV_get(program, (unsigned int *)data);
|
||||
|
||||
glslang_program_delete(program);
|
||||
glslang_shader_delete(shader);
|
||||
|
||||
return g_byte_array_new_take(data, num_program_bytes);
|
||||
}
|
||||
|
||||
VkShaderModule pgraph_vk_create_shader_module_from_spv(PGRAPHVkState *r, GByteArray *spv)
|
||||
{
|
||||
VkShaderModuleCreateInfo create_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
|
||||
.codeSize = spv->len,
|
||||
.pCode = (uint32_t *)spv->data,
|
||||
};
|
||||
VkShaderModule module;
|
||||
VK_CHECK(
|
||||
vkCreateShaderModule(r->device, &create_info, NULL, &module));
|
||||
return module;
|
||||
}
|
||||
|
||||
static void block_to_uniforms(const SpvReflectBlockVariable *block, ShaderUniformLayout *layout)
|
||||
{
|
||||
assert(!layout->uniforms);
|
||||
|
||||
layout->num_uniforms = block->member_count;
|
||||
layout->uniforms = g_malloc0_n(block->member_count, sizeof(ShaderUniform));
|
||||
layout->total_size = block->size;
|
||||
layout->allocation = g_malloc0(block->size);
|
||||
|
||||
for (uint32_t k = 0; k < block->member_count; ++k) {
|
||||
const SpvReflectBlockVariable *member = &block->members[k];
|
||||
|
||||
assert(member->array.dims_count < 2);
|
||||
|
||||
layout->uniforms[k] = (ShaderUniform){
|
||||
.name = strdup(member->name),
|
||||
.offset = member->offset,
|
||||
.dim_v = MAX(1, member->numeric.vector.component_count),
|
||||
.dim_a = MAX(member->array.dims_count ? member->array.dims[0] : 1, member->numeric.matrix.column_count),
|
||||
.stride = MAX(member->array.stride, member->numeric.matrix.stride),
|
||||
};
|
||||
|
||||
// fprintf(stderr, "<%s offset=%zd dim_v=%zd dim_a=%zd stride=%zd>\n",
|
||||
// layout->uniforms[k].name,
|
||||
// layout->uniforms[k].offset,
|
||||
// layout->uniforms[k].dim_v,
|
||||
// layout->uniforms[k].dim_a,
|
||||
// layout->uniforms[k].stride
|
||||
// );
|
||||
}
|
||||
// fprintf(stderr, "--\n");
|
||||
}
|
||||
|
||||
static void init_layout_from_spv(ShaderModuleInfo *info)
|
||||
{
|
||||
SpvReflectResult result = spvReflectCreateShaderModule(
|
||||
info->spirv->len, info->spirv->data, &info->reflect_module);
|
||||
assert(result == SPV_REFLECT_RESULT_SUCCESS &&
|
||||
"Failed to create SPIR-V shader module");
|
||||
|
||||
uint32_t descriptor_set_count = 0;
|
||||
result = spvReflectEnumerateDescriptorSets(&info->reflect_module,
|
||||
&descriptor_set_count, NULL);
|
||||
assert(result == SPV_REFLECT_RESULT_SUCCESS &&
|
||||
"Failed to enumerate descriptor sets");
|
||||
|
||||
info->descriptor_sets =
|
||||
g_malloc_n(descriptor_set_count, sizeof(SpvReflectDescriptorSet *));
|
||||
result = spvReflectEnumerateDescriptorSets(
|
||||
&info->reflect_module, &descriptor_set_count, info->descriptor_sets);
|
||||
assert(result == SPV_REFLECT_RESULT_SUCCESS &&
|
||||
"Failed to enumerate descriptor sets");
|
||||
|
||||
info->uniforms.num_uniforms = 0;
|
||||
info->uniforms.uniforms = NULL;
|
||||
|
||||
for (uint32_t i = 0; i < descriptor_set_count; ++i) {
|
||||
const SpvReflectDescriptorSet *descriptor_set =
|
||||
info->descriptor_sets[i];
|
||||
for (uint32_t j = 0; j < descriptor_set->binding_count; ++j) {
|
||||
const SpvReflectDescriptorBinding *binding =
|
||||
descriptor_set->bindings[j];
|
||||
if (binding->descriptor_type !=
|
||||
SPV_REFLECT_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const SpvReflectBlockVariable *block = &binding->block;
|
||||
block_to_uniforms(block, &info->uniforms);
|
||||
}
|
||||
}
|
||||
|
||||
info->push_constants.num_uniforms = 0;
|
||||
info->push_constants.uniforms = NULL;
|
||||
assert(info->reflect_module.push_constant_block_count < 2);
|
||||
if (info->reflect_module.push_constant_block_count) {
|
||||
block_to_uniforms(&info->reflect_module.push_constant_blocks[0],
|
||||
&info->push_constants);
|
||||
}
|
||||
}
|
||||
|
||||
static glslang_stage_t vk_shader_stage_to_glslang_stage(VkShaderStageFlagBits stage)
|
||||
{
|
||||
switch (stage) {
|
||||
case VK_SHADER_STAGE_GEOMETRY_BIT:
|
||||
return GLSLANG_STAGE_GEOMETRY;
|
||||
case VK_SHADER_STAGE_VERTEX_BIT:
|
||||
return GLSLANG_STAGE_VERTEX;
|
||||
case VK_SHADER_STAGE_FRAGMENT_BIT:
|
||||
return GLSLANG_STAGE_FRAGMENT;
|
||||
case VK_SHADER_STAGE_COMPUTE_BIT:
|
||||
return GLSLANG_STAGE_COMPUTE;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
}
|
||||
|
||||
ShaderModuleInfo *pgraph_vk_create_shader_module_from_glsl(
|
||||
PGRAPHVkState *r, VkShaderStageFlagBits stage, const char *glsl)
|
||||
{
|
||||
ShaderModuleInfo *info = g_malloc0(sizeof(*info));
|
||||
info->glsl = strdup(glsl);
|
||||
info->spirv = pgraph_vk_compile_glsl_to_spv(
|
||||
vk_shader_stage_to_glslang_stage(stage), glsl);
|
||||
info->module = pgraph_vk_create_shader_module_from_spv(r, info->spirv);
|
||||
init_layout_from_spv(info);
|
||||
return info;
|
||||
}
|
||||
|
||||
static void finalize_uniform_layout(ShaderUniformLayout *layout)
|
||||
{
|
||||
for (int i = 0; i < layout->num_uniforms; i++) {
|
||||
free((void*)layout->uniforms[i].name);
|
||||
}
|
||||
if (layout->uniforms) {
|
||||
g_free(layout->uniforms);
|
||||
}
|
||||
}
|
||||
|
||||
void pgraph_vk_destroy_shader_module(PGRAPHVkState *r, ShaderModuleInfo *info)
|
||||
{
|
||||
if (info->glsl) {
|
||||
free(info->glsl);
|
||||
}
|
||||
finalize_uniform_layout(&info->uniforms);
|
||||
finalize_uniform_layout(&info->push_constants);
|
||||
free(info->descriptor_sets);
|
||||
spvReflectDestroyShaderModule(&info->reflect_module);
|
||||
vkDestroyShaderModule(r->device, info->module, NULL);
|
||||
g_byte_array_unref(info->spirv);
|
||||
g_free(info);
|
||||
}
|
|
@ -0,0 +1,205 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH Vulkan Renderer
|
||||
*
|
||||
* Copyright (c) 2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef HW_XBOX_NV2A_PGRAPH_VK_GLSL_H
|
||||
#define HW_XBOX_NV2A_PGRAPH_VK_GLSL_H
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
typedef struct ShaderUniform {
|
||||
const char *name;
|
||||
size_t dim_v;
|
||||
size_t dim_a;
|
||||
size_t align;
|
||||
size_t stride;
|
||||
size_t offset;
|
||||
} ShaderUniform;
|
||||
|
||||
typedef struct ShaderUniformLayout {
|
||||
ShaderUniform *uniforms;
|
||||
size_t num_uniforms;
|
||||
size_t total_size;
|
||||
void *allocation;
|
||||
} ShaderUniformLayout;
|
||||
|
||||
static inline void uniform_std140(ShaderUniformLayout *layout)
|
||||
{
|
||||
size_t offset = 0;
|
||||
|
||||
for (int i = 0; i < layout->num_uniforms; i++) {
|
||||
ShaderUniform *u = &layout->uniforms[i];
|
||||
size_t size = sizeof(float); // float or int
|
||||
size_t align = size;
|
||||
size_t stride = 0;
|
||||
|
||||
size *= u->dim_v;
|
||||
align *= u->dim_v == 3 ? 4 : u->dim_v;
|
||||
|
||||
// If an array, each element is padded to vec4.
|
||||
if (u->dim_a > 1) {
|
||||
align = 4 * sizeof(float);
|
||||
stride = align;
|
||||
size = u->dim_a * align;
|
||||
} else {
|
||||
align = size;
|
||||
stride = 0;
|
||||
}
|
||||
|
||||
offset = ROUND_UP(offset, align);
|
||||
|
||||
u->align = align;
|
||||
u->offset = offset;
|
||||
u->stride = stride;
|
||||
|
||||
offset += size;
|
||||
}
|
||||
|
||||
layout->total_size = offset;
|
||||
assert(layout->total_size);
|
||||
}
|
||||
|
||||
static inline void uniform_std430(ShaderUniformLayout *layout)
|
||||
{
|
||||
size_t offset = 0;
|
||||
|
||||
for (int i = 0; i < layout->num_uniforms; i++) {
|
||||
ShaderUniform *u = &layout->uniforms[i];
|
||||
size_t size = sizeof(float); // float or int
|
||||
size *= u->dim_v;
|
||||
size_t align = size;
|
||||
size *= u->dim_a;
|
||||
|
||||
offset = ROUND_UP(offset, align);
|
||||
|
||||
u->align = align;
|
||||
u->offset = offset;
|
||||
u->stride = u->dim_a > 1 ? (size * u->dim_v) : 0;
|
||||
|
||||
offset += size;
|
||||
}
|
||||
|
||||
layout->total_size = offset;
|
||||
assert(layout->total_size);
|
||||
}
|
||||
|
||||
static inline int uniform_index(ShaderUniformLayout *layout, const char *name)
|
||||
{
|
||||
for (int i = 0; i < layout->num_uniforms; i++) {
|
||||
if (!strcmp(layout->uniforms[i].name, name)) {
|
||||
return i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static inline
|
||||
void *uniform_ptr(ShaderUniformLayout *layout, int idx)
|
||||
{
|
||||
assert(idx > 0 && "invalid uniform index");
|
||||
|
||||
return (char *)layout->allocation + layout->uniforms[idx - 1].offset;
|
||||
}
|
||||
|
||||
static inline
|
||||
void uniform_copy(ShaderUniformLayout *layout, int idx, void *values, size_t value_size, size_t count)
|
||||
{
|
||||
assert(idx > 0 && "invalid uniform index");
|
||||
|
||||
ShaderUniform *u = &layout->uniforms[idx - 1];
|
||||
const size_t element_size = value_size * u->dim_v;
|
||||
|
||||
size_t bytes_remaining = value_size * count;
|
||||
char *p_out = uniform_ptr(layout, idx);
|
||||
char *p_max = p_out + layout->total_size;
|
||||
char *p_in = (char *)values;
|
||||
|
||||
int index = 0;
|
||||
while (bytes_remaining) {
|
||||
assert(p_out < p_max);
|
||||
assert(index < u->dim_a);
|
||||
memcpy(p_out, p_in, element_size);
|
||||
bytes_remaining -= element_size;
|
||||
p_out += u->stride;
|
||||
p_in += element_size;
|
||||
index += 1;
|
||||
}
|
||||
}
|
||||
|
||||
static inline
|
||||
void uniform1fv(ShaderUniformLayout *layout, int idx, size_t count, float *values)
|
||||
{
|
||||
uniform_copy(layout, idx, values, sizeof(float), count);
|
||||
}
|
||||
|
||||
static inline
|
||||
void uniform1f(ShaderUniformLayout *layout, int idx, float value)
|
||||
{
|
||||
uniform1fv(layout, idx, 1, &value);
|
||||
}
|
||||
|
||||
static inline
|
||||
void uniform2f(ShaderUniformLayout *layout, int idx, float v0, float v1)
|
||||
{
|
||||
float values[] = { v0, v1 };
|
||||
uniform1fv(layout, idx, 2, values);
|
||||
}
|
||||
|
||||
static inline
|
||||
void uniform4f(ShaderUniformLayout *layout, int idx, float v0, float v1, float v2, float v3)
|
||||
{
|
||||
float values[] = { v0, v1, v2, v3 };
|
||||
uniform1fv(layout, idx, 4, values);
|
||||
}
|
||||
|
||||
static inline
|
||||
void uniformMatrix2fv(ShaderUniformLayout *layout, int idx, float *values)
|
||||
{
|
||||
uniform1fv(layout, idx, 4, values);
|
||||
}
|
||||
|
||||
static inline
|
||||
void uniformMatrix4fv(ShaderUniformLayout *layout, int idx, float *values)
|
||||
{
|
||||
uniform1fv(layout, idx, 4 * 4, values);
|
||||
}
|
||||
|
||||
static inline
|
||||
void uniform1iv(ShaderUniformLayout *layout, int idx, size_t count, int32_t *values)
|
||||
{
|
||||
uniform_copy(layout, idx, values, sizeof(int32_t), count);
|
||||
}
|
||||
|
||||
static inline
|
||||
void uniform1i(ShaderUniformLayout *layout, int idx, int32_t value)
|
||||
{
|
||||
uniform1iv(layout, idx, 1, &value);
|
||||
}
|
||||
|
||||
static inline
|
||||
void uniform4i(ShaderUniformLayout *layout, int idx, int v0, int v1, int v2, int v3)
|
||||
{
|
||||
int values[] = { v0, v1, v2, v3 };
|
||||
uniform1iv(layout, idx, 4, values);
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,209 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH Vulkan Renderer
|
||||
*
|
||||
* Copyright (c) 2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "renderer.h"
|
||||
|
||||
static bool check_format_has_depth_component(VkFormat format)
|
||||
{
|
||||
return format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
|
||||
format == VK_FORMAT_D24_UNORM_S8_UINT ||
|
||||
format == VK_FORMAT_D16_UNORM;
|
||||
}
|
||||
|
||||
static bool check_format_has_stencil_component(VkFormat format)
|
||||
{
|
||||
return format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
|
||||
format == VK_FORMAT_D24_UNORM_S8_UINT;
|
||||
}
|
||||
|
||||
void pgraph_vk_transition_image_layout(PGRAPHState *pg, VkCommandBuffer cmd,
|
||||
VkImage image, VkFormat format,
|
||||
VkImageLayout oldLayout,
|
||||
VkImageLayout newLayout)
|
||||
{
|
||||
VkImageMemoryBarrier barrier = {
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.oldLayout = oldLayout,
|
||||
.newLayout = newLayout,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = image,
|
||||
.subresourceRange.baseMipLevel = 0,
|
||||
.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.subresourceRange.baseArrayLayer = 0,
|
||||
.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
};
|
||||
|
||||
if (check_format_has_depth_component(format)) {
|
||||
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
|
||||
if (check_format_has_stencil_component(format)) {
|
||||
barrier.subresourceRange.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
}
|
||||
} else {
|
||||
barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
}
|
||||
|
||||
VkPipelineStageFlags sourceStage;
|
||||
VkPipelineStageFlags destinationStage;
|
||||
|
||||
// Undefined -> Dst
|
||||
if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED &&
|
||||
newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
|
||||
barrier.srcAccessMask = 0;
|
||||
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
|
||||
destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
|
||||
|
||||
// Undefined -> Color
|
||||
} else if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED &&
|
||||
newLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
|
||||
barrier.srcAccessMask = 0;
|
||||
barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
|
||||
destinationStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
|
||||
// Undefined -> Depth
|
||||
} else if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED &&
|
||||
newLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) {
|
||||
barrier.srcAccessMask = 0;
|
||||
barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
|
||||
destinationStage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
|
||||
|
||||
// Dst -> Shader Read
|
||||
} else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
|
||||
newLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
|
||||
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||
sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
|
||||
destinationStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
|
||||
|
||||
// Dst -> Color
|
||||
} else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
|
||||
newLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
|
||||
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
|
||||
destinationStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
|
||||
// Dst -> Depth
|
||||
} else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
|
||||
newLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) {
|
||||
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
|
||||
destinationStage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
|
||||
|
||||
// Dst -> Src
|
||||
} else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
|
||||
newLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) {
|
||||
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
|
||||
sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
|
||||
destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
|
||||
|
||||
// Shader Read -> Dst
|
||||
} else if (oldLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL &&
|
||||
newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
|
||||
barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
sourceStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
|
||||
destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
|
||||
|
||||
// Shader Read -> Color
|
||||
} else if (oldLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL &&
|
||||
newLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
|
||||
barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
sourceStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
|
||||
destinationStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
|
||||
// Color -> Src
|
||||
} else if (oldLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
|
||||
newLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) {
|
||||
barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
|
||||
sourceStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
|
||||
|
||||
// Color -> Dst
|
||||
} else if (oldLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
|
||||
newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
|
||||
barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
sourceStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
|
||||
destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
|
||||
|
||||
// Color -> Shader Read
|
||||
} else if (oldLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
|
||||
newLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
|
||||
barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||
sourceStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
destinationStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
|
||||
|
||||
// Depth -> Src
|
||||
} else if (oldLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL &&
|
||||
newLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) {
|
||||
barrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
|
||||
|
||||
sourceStage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
|
||||
destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
|
||||
|
||||
// Depth -> Dst
|
||||
} else if (oldLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL &&
|
||||
newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
|
||||
barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
sourceStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
|
||||
destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
|
||||
|
||||
// Src -> Color
|
||||
} else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL &&
|
||||
newLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
|
||||
barrier.srcAccessMask = 0;
|
||||
barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
|
||||
destinationStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
|
||||
// Src -> Depth
|
||||
} else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL &&
|
||||
newLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) {
|
||||
barrier.srcAccessMask = 0;
|
||||
barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
|
||||
destinationStage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT;
|
||||
|
||||
// Src -> Dst
|
||||
} else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL &&
|
||||
newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
|
||||
barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
|
||||
destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
|
||||
|
||||
} else {
|
||||
assert(!"unsupported layout transition!");
|
||||
}
|
||||
|
||||
vkCmdPipelineBarrier(cmd, sourceStage, destinationStage, 0, 0,
|
||||
NULL, 0, NULL, 1, &barrier);
|
||||
}
|
|
@ -0,0 +1,662 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH Vulkan Renderer
|
||||
*
|
||||
* Copyright (c) 2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "ui/xemu-settings.h"
|
||||
#include "renderer.h"
|
||||
#include "xemu-version.h"
|
||||
|
||||
#include <SDL.h>
|
||||
#include <SDL_syswm.h>
|
||||
#include <SDL_vulkan.h>
|
||||
|
||||
#include <volk.h>
|
||||
|
||||
typedef GArray VkExtensionPropertiesArray;
|
||||
typedef GArray StringArray;
|
||||
|
||||
static bool enable_validation = false;
|
||||
|
||||
static char const *const validation_layers[] = {
|
||||
"VK_LAYER_KHRONOS_validation",
|
||||
};
|
||||
|
||||
static char const *const required_instance_extensions[] = {
|
||||
VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
|
||||
VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME,
|
||||
VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME,
|
||||
};
|
||||
|
||||
static char const *const required_device_extensions[] = {
|
||||
VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME,
|
||||
VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME,
|
||||
#ifdef WIN32
|
||||
VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
|
||||
VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME,
|
||||
#else
|
||||
VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
|
||||
VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME,
|
||||
#endif
|
||||
};
|
||||
|
||||
static VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback(
|
||||
VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,
|
||||
VkDebugUtilsMessageTypeFlagsEXT messageType,
|
||||
const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData, void *pUserData)
|
||||
{
|
||||
NV2A_VK_DPRINTF("[vk] %s", pCallbackData->pMessage);
|
||||
fprintf(stderr, "[vk] %s\n", pCallbackData->pMessage);
|
||||
|
||||
if ((messageType & VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT) &&
|
||||
(messageSeverity & (VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
|
||||
VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT))) {
|
||||
exit(1);
|
||||
}
|
||||
return VK_FALSE;
|
||||
}
|
||||
|
||||
static bool check_validation_layer_support(void)
|
||||
{
|
||||
uint32_t num_available_layers;
|
||||
vkEnumerateInstanceLayerProperties(&num_available_layers, NULL);
|
||||
|
||||
g_autofree VkLayerProperties *available_layers =
|
||||
g_malloc_n(num_available_layers, sizeof(VkLayerProperties));
|
||||
vkEnumerateInstanceLayerProperties(&num_available_layers, available_layers);
|
||||
|
||||
for (int i = 0; i < ARRAY_SIZE(validation_layers); i++) {
|
||||
bool found = false;
|
||||
for (int j = 0; j < num_available_layers; j++) {
|
||||
if (!strcmp(validation_layers[i], available_layers[j].layerName)) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
fprintf(stderr, "desired validation layer not found: %s\n",
|
||||
validation_layers[i]);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static SDL_Window *create_window(void)
|
||||
{
|
||||
SDL_Window *window = SDL_CreateWindow(
|
||||
"SDL Offscreen Window", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED,
|
||||
640, 480, SDL_WINDOW_VULKAN | SDL_WINDOW_HIDDEN);
|
||||
|
||||
if (window == NULL) {
|
||||
fprintf(stderr, "%s: Failed to create window\n", __func__);
|
||||
SDL_Quit();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return window;
|
||||
}
|
||||
|
||||
static VkExtensionPropertiesArray *
|
||||
get_available_instance_extensions(PGRAPHState *pg)
|
||||
{
|
||||
uint32_t num_extensions = 0;
|
||||
|
||||
VK_CHECK(
|
||||
vkEnumerateInstanceExtensionProperties(NULL, &num_extensions, NULL));
|
||||
|
||||
VkExtensionPropertiesArray *extensions = g_array_sized_new(
|
||||
FALSE, FALSE, sizeof(VkExtensionProperties), num_extensions);
|
||||
|
||||
g_array_set_size(extensions, num_extensions);
|
||||
VK_CHECK(vkEnumerateInstanceExtensionProperties(
|
||||
NULL, &num_extensions, (VkExtensionProperties *)extensions->data));
|
||||
|
||||
return extensions;
|
||||
}
|
||||
|
||||
static bool
|
||||
is_extension_available(VkExtensionPropertiesArray *available_extensions,
|
||||
const char *extension_name)
|
||||
{
|
||||
for (int i = 0; i < available_extensions->len; i++) {
|
||||
VkExtensionProperties *e =
|
||||
&g_array_index(available_extensions, VkExtensionProperties, i);
|
||||
if (!strcmp(e->extensionName, extension_name)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static StringArray *get_required_instance_extension_names(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
// Add instance extensions SDL lists as required
|
||||
unsigned int sdl_count = 0;
|
||||
SDL_Vulkan_GetInstanceExtensions((SDL_Window *)r->window, &sdl_count, NULL);
|
||||
|
||||
StringArray *extensions =
|
||||
g_array_sized_new(FALSE, FALSE, sizeof(char *),
|
||||
sdl_count + ARRAY_SIZE(required_instance_extensions));
|
||||
|
||||
if (sdl_count) {
|
||||
g_array_set_size(extensions, sdl_count);
|
||||
SDL_Vulkan_GetInstanceExtensions((SDL_Window *)r->window, &sdl_count,
|
||||
(const char **)extensions->data);
|
||||
}
|
||||
|
||||
// Add additional required extensions
|
||||
g_array_append_vals(extensions, required_instance_extensions,
|
||||
ARRAY_SIZE(required_instance_extensions));
|
||||
|
||||
return extensions;
|
||||
}
|
||||
|
||||
static bool
|
||||
add_extension_if_available(VkExtensionPropertiesArray *available_extensions,
|
||||
StringArray *enabled_extension_names,
|
||||
const char *desired_extension_name)
|
||||
{
|
||||
if (is_extension_available(available_extensions, desired_extension_name)) {
|
||||
g_array_append_val(enabled_extension_names, desired_extension_name);
|
||||
return true;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Warning: extension not available: %s\n",
|
||||
desired_extension_name);
|
||||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
add_optional_instance_extension_names(PGRAPHState *pg,
|
||||
VkExtensionPropertiesArray *available_extensions,
|
||||
StringArray *enabled_extension_names)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
r->debug_utils_extension_enabled =
|
||||
g_config.display.vulkan.validation_layers &&
|
||||
add_extension_if_available(available_extensions, enabled_extension_names,
|
||||
VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
|
||||
}
|
||||
|
||||
static void create_instance(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
r->window = create_window();
|
||||
|
||||
VK_CHECK(volkInitialize());
|
||||
|
||||
VkApplicationInfo app_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
|
||||
.pApplicationName = "xemu",
|
||||
.applicationVersion = VK_MAKE_VERSION(
|
||||
xemu_version_major, xemu_version_minor, xemu_version_patch),
|
||||
.pEngineName = "No Engine",
|
||||
.engineVersion = VK_MAKE_VERSION(1, 0, 0),
|
||||
.apiVersion = VK_API_VERSION_1_3,
|
||||
};
|
||||
|
||||
g_autofree VkExtensionPropertiesArray *available_extensions =
|
||||
get_available_instance_extensions(pg);
|
||||
|
||||
g_autofree StringArray *enabled_extension_names =
|
||||
get_required_instance_extension_names(pg);
|
||||
|
||||
bool all_required_extensions_available = true;
|
||||
for (int i = 0; i < enabled_extension_names->len; i++) {
|
||||
const char *required_extension =
|
||||
g_array_index(enabled_extension_names, const char *, i);
|
||||
if (!is_extension_available(available_extensions, required_extension)) {
|
||||
fprintf(stderr,
|
||||
"Error: Required instance extension not available: %s\n",
|
||||
required_extension);
|
||||
all_required_extensions_available = false;
|
||||
}
|
||||
}
|
||||
assert(all_required_extensions_available);
|
||||
|
||||
add_optional_instance_extension_names(pg, available_extensions,
|
||||
enabled_extension_names);
|
||||
|
||||
fprintf(stderr, "Enabled instance extensions:\n");
|
||||
for (int i = 0; i < enabled_extension_names->len; i++) {
|
||||
fprintf(stderr, "- %s\n", g_array_index(enabled_extension_names, char *, i));
|
||||
}
|
||||
|
||||
VkInstanceCreateInfo create_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
|
||||
.pApplicationInfo = &app_info,
|
||||
.enabledExtensionCount = enabled_extension_names->len,
|
||||
.ppEnabledExtensionNames =
|
||||
&g_array_index(enabled_extension_names, const char *, 0),
|
||||
};
|
||||
|
||||
VkDebugUtilsMessengerCreateInfoEXT dbg_create_info;
|
||||
if (r->debug_utils_extension_enabled) {
|
||||
dbg_create_info = (VkDebugUtilsMessengerCreateInfoEXT){
|
||||
.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
|
||||
.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
|
||||
VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
|
||||
VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
|
||||
.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
|
||||
VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
|
||||
VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
|
||||
.pfnUserCallback = debugCallback,
|
||||
};
|
||||
}
|
||||
|
||||
enable_validation = g_config.display.vulkan.validation_layers;
|
||||
|
||||
if (enable_validation) {
|
||||
if (check_validation_layer_support()) {
|
||||
fprintf(stderr, "Warning: Validation layers enabled. Expect performance impact.\n");
|
||||
create_info.enabledLayerCount = ARRAY_SIZE(validation_layers);
|
||||
create_info.ppEnabledLayerNames = validation_layers;
|
||||
if (r->debug_utils_extension_enabled) {
|
||||
create_info.pNext =
|
||||
(VkDebugUtilsMessengerCreateInfoEXT *)&dbg_create_info;
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "Warning: validation layers not available\n");
|
||||
enable_validation = false;
|
||||
}
|
||||
}
|
||||
|
||||
VK_CHECK(vkCreateInstance(&create_info, NULL, &r->instance));
|
||||
|
||||
volkLoadInstance(r->instance);
|
||||
}
|
||||
|
||||
static bool is_queue_family_indicies_complete(QueueFamilyIndices indices)
|
||||
{
|
||||
return indices.queue_family >= 0;
|
||||
}
|
||||
|
||||
QueueFamilyIndices pgraph_vk_find_queue_families(VkPhysicalDevice device)
|
||||
{
|
||||
QueueFamilyIndices indices = {
|
||||
.queue_family = -1,
|
||||
};
|
||||
|
||||
uint32_t num_queue_families = 0;
|
||||
vkGetPhysicalDeviceQueueFamilyProperties(device, &num_queue_families, NULL);
|
||||
|
||||
g_autofree VkQueueFamilyProperties *queue_families =
|
||||
g_malloc_n(num_queue_families, sizeof(VkQueueFamilyProperties));
|
||||
vkGetPhysicalDeviceQueueFamilyProperties(device, &num_queue_families,
|
||||
queue_families);
|
||||
|
||||
for (int i = 0; i < num_queue_families; i++) {
|
||||
VkQueueFamilyProperties queueFamily = queue_families[i];
|
||||
// FIXME: Support independent graphics, compute queues
|
||||
int required_flags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT;
|
||||
if ((queueFamily.queueFlags & required_flags) == required_flags) {
|
||||
indices.queue_family = i;
|
||||
}
|
||||
if (is_queue_family_indicies_complete(indices)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return indices;
|
||||
}
|
||||
|
||||
static VkExtensionPropertiesArray *
|
||||
get_available_device_extensions(VkPhysicalDevice device)
|
||||
{
|
||||
uint32_t num_extensions = 0;
|
||||
|
||||
VK_CHECK(vkEnumerateDeviceExtensionProperties(device, NULL, &num_extensions,
|
||||
NULL));
|
||||
|
||||
VkExtensionPropertiesArray *extensions = g_array_sized_new(
|
||||
FALSE, FALSE, sizeof(VkExtensionProperties), num_extensions);
|
||||
|
||||
g_array_set_size(extensions, num_extensions);
|
||||
VK_CHECK(vkEnumerateDeviceExtensionProperties(
|
||||
device, NULL, &num_extensions,
|
||||
(VkExtensionProperties *)extensions->data));
|
||||
|
||||
return extensions;
|
||||
}
|
||||
|
||||
static StringArray *get_required_device_extension_names(void)
|
||||
{
|
||||
StringArray *extensions =
|
||||
g_array_sized_new(FALSE, FALSE, sizeof(char *),
|
||||
ARRAY_SIZE(required_device_extensions));
|
||||
|
||||
g_array_append_vals(extensions, required_device_extensions,
|
||||
ARRAY_SIZE(required_device_extensions));
|
||||
|
||||
return extensions;
|
||||
}
|
||||
|
||||
static void add_optional_device_extension_names(
|
||||
PGRAPHState *pg, VkExtensionPropertiesArray *available_extensions,
|
||||
StringArray *enabled_extension_names)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
r->custom_border_color_extension_enabled =
|
||||
add_extension_if_available(available_extensions, enabled_extension_names,
|
||||
VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
|
||||
|
||||
r->provoking_vertex_extension_enabled =
|
||||
add_extension_if_available(available_extensions, enabled_extension_names,
|
||||
VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME);
|
||||
|
||||
r->memory_budget_extension_enabled = add_extension_if_available(
|
||||
available_extensions, enabled_extension_names,
|
||||
VK_EXT_MEMORY_BUDGET_EXTENSION_NAME);
|
||||
}
|
||||
|
||||
static bool check_device_support_required_extensions(VkPhysicalDevice device)
|
||||
{
|
||||
g_autofree VkExtensionPropertiesArray *available_extensions =
|
||||
get_available_device_extensions(device);
|
||||
|
||||
for (int i = 0; i < ARRAY_SIZE(required_device_extensions); i++) {
|
||||
if (!is_extension_available(available_extensions,
|
||||
required_device_extensions[i])) {
|
||||
fprintf(stderr, "required device extension not found: %s\n",
|
||||
required_device_extensions[i]);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool is_device_compatible(VkPhysicalDevice device)
|
||||
{
|
||||
QueueFamilyIndices indices = pgraph_vk_find_queue_families(device);
|
||||
|
||||
return is_queue_family_indicies_complete(indices) &&
|
||||
check_device_support_required_extensions(device);
|
||||
// FIXME: Check formats
|
||||
// FIXME: Check vram
|
||||
}
|
||||
|
||||
static void select_physical_device(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
uint32_t num_physical_devices = 0;
|
||||
|
||||
vkEnumeratePhysicalDevices(r->instance, &num_physical_devices, NULL);
|
||||
if (num_physical_devices == 0) {
|
||||
assert(!"failed to find GPUs with Vulkan support");
|
||||
}
|
||||
|
||||
g_autofree VkPhysicalDevice *devices =
|
||||
g_malloc_n(num_physical_devices, sizeof(VkPhysicalDevice));
|
||||
vkEnumeratePhysicalDevices(r->instance, &num_physical_devices, devices);
|
||||
|
||||
fprintf(stderr, "Available physical devices:\n");
|
||||
for (int i = 0; i < num_physical_devices; i++) {
|
||||
vkGetPhysicalDeviceProperties(devices[i], &r->device_props);
|
||||
fprintf(stderr, "- %s\n", r->device_props.deviceName);
|
||||
}
|
||||
|
||||
// FIXME: Store preferred device
|
||||
|
||||
r->physical_device = VK_NULL_HANDLE;
|
||||
for (int i = 0; i < num_physical_devices; i++) {
|
||||
if (is_device_compatible(devices[i])) {
|
||||
r->physical_device = devices[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (r->physical_device == VK_NULL_HANDLE) {
|
||||
assert(!"failed to find a suitable GPU");
|
||||
}
|
||||
|
||||
vkGetPhysicalDeviceProperties(r->physical_device, &r->device_props);
|
||||
fprintf(stderr,
|
||||
"Selected physical device: %s\n"
|
||||
"- Vendor: %x, Device: %x\n"
|
||||
"- Driver Version: %d.%d.%d\n",
|
||||
r->device_props.deviceName,
|
||||
r->device_props.vendorID,
|
||||
r->device_props.deviceID,
|
||||
VK_VERSION_MAJOR(r->device_props.driverVersion),
|
||||
VK_VERSION_MINOR(r->device_props.driverVersion),
|
||||
VK_VERSION_PATCH(r->device_props.driverVersion));
|
||||
|
||||
size_t vsh_attr_values_size =
|
||||
NV2A_VERTEXSHADER_ATTRIBUTES * 4 * sizeof(float);
|
||||
assert(r->device_props.limits.maxPushConstantsSize >= vsh_attr_values_size);
|
||||
}
|
||||
|
||||
static void create_logical_device(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
QueueFamilyIndices indices =
|
||||
pgraph_vk_find_queue_families(r->physical_device);
|
||||
|
||||
g_autofree VkExtensionPropertiesArray *available_extensions =
|
||||
get_available_device_extensions(r->physical_device);
|
||||
|
||||
g_autofree StringArray *enabled_extension_names =
|
||||
get_required_device_extension_names();
|
||||
|
||||
add_optional_device_extension_names(pg, available_extensions,
|
||||
enabled_extension_names);
|
||||
|
||||
fprintf(stderr, "Enabled device extensions:\n");
|
||||
for (int i = 0; i < enabled_extension_names->len; i++) {
|
||||
fprintf(stderr, "- %s\n", g_array_index(enabled_extension_names, char *, i));
|
||||
}
|
||||
|
||||
float queuePriority = 1.0f;
|
||||
|
||||
VkDeviceQueueCreateInfo queue_create_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
|
||||
.queueFamilyIndex = indices.queue_family,
|
||||
.queueCount = 1,
|
||||
.pQueuePriorities = &queuePriority,
|
||||
};
|
||||
|
||||
// Ensure device supports required features
|
||||
VkPhysicalDeviceFeatures available_features, enabled_features;
|
||||
vkGetPhysicalDeviceFeatures(r->physical_device, &available_features);
|
||||
memset(&enabled_features, 0, sizeof(enabled_features));
|
||||
|
||||
struct {
|
||||
const char *name;
|
||||
VkBool32 available, *enabled;
|
||||
} required_features[] = {
|
||||
#define F(n) { #n, available_features.n, &enabled_features.n }
|
||||
F(shaderClipDistance),
|
||||
F(geometryShader),
|
||||
F(shaderTessellationAndGeometryPointSize),
|
||||
F(depthClamp),
|
||||
F(occlusionQueryPrecise),
|
||||
#undef F
|
||||
};
|
||||
|
||||
bool all_features_available = true;
|
||||
for (int i = 0; i < ARRAY_SIZE(required_features); i++) {
|
||||
if (required_features[i].available != VK_TRUE) {
|
||||
fprintf(stderr, "Error: Device does not support required feature %s\n", required_features[i].name);
|
||||
all_features_available = false;
|
||||
}
|
||||
*required_features[i].enabled = VK_TRUE;
|
||||
}
|
||||
assert(all_features_available);
|
||||
|
||||
void *next_struct = NULL;
|
||||
|
||||
VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex_features;
|
||||
if (r->provoking_vertex_extension_enabled) {
|
||||
provoking_vertex_features = (VkPhysicalDeviceProvokingVertexFeaturesEXT){
|
||||
.sType =
|
||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT,
|
||||
.provokingVertexLast = VK_TRUE,
|
||||
.pNext = next_struct,
|
||||
};
|
||||
next_struct = &provoking_vertex_features;
|
||||
}
|
||||
|
||||
VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border_features;
|
||||
if (r->custom_border_color_extension_enabled) {
|
||||
custom_border_features = (VkPhysicalDeviceCustomBorderColorFeaturesEXT){
|
||||
.sType =
|
||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT,
|
||||
.customBorderColors = VK_TRUE,
|
||||
.pNext = next_struct,
|
||||
};
|
||||
next_struct = &custom_border_features;
|
||||
}
|
||||
|
||||
VkDeviceCreateInfo device_create_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
|
||||
.queueCreateInfoCount = 1,
|
||||
.pQueueCreateInfos = &queue_create_info,
|
||||
.pEnabledFeatures = &enabled_features,
|
||||
.enabledExtensionCount = enabled_extension_names->len,
|
||||
.ppEnabledExtensionNames =
|
||||
&g_array_index(enabled_extension_names, const char *, 0),
|
||||
.pNext = next_struct,
|
||||
};
|
||||
|
||||
if (enable_validation) {
|
||||
device_create_info.enabledLayerCount = ARRAY_SIZE(validation_layers);
|
||||
device_create_info.ppEnabledLayerNames = validation_layers;
|
||||
}
|
||||
|
||||
VK_CHECK(vkCreateDevice(r->physical_device, &device_create_info, NULL,
|
||||
&r->device));
|
||||
|
||||
vkGetDeviceQueue(r->device, indices.queue_family, 0, &r->queue);
|
||||
}
|
||||
|
||||
uint32_t pgraph_vk_get_memory_type(PGRAPHState *pg, uint32_t type_bits,
|
||||
VkMemoryPropertyFlags properties)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
VkPhysicalDeviceMemoryProperties prop;
|
||||
vkGetPhysicalDeviceMemoryProperties(r->physical_device, &prop);
|
||||
for (uint32_t i = 0; i < prop.memoryTypeCount; i++) {
|
||||
if ((prop.memoryTypes[i].propertyFlags & properties) == properties &&
|
||||
type_bits & (1 << i)) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return 0xFFFFFFFF; // Unable to find memoryType
|
||||
}
|
||||
|
||||
static void init_allocator(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
VmaVulkanFunctions vulkanFunctions = {
|
||||
/// Required when using VMA_DYNAMIC_VULKAN_FUNCTIONS.
|
||||
.vkGetInstanceProcAddr = vkGetInstanceProcAddr,
|
||||
/// Required when using VMA_DYNAMIC_VULKAN_FUNCTIONS.
|
||||
.vkGetDeviceProcAddr = vkGetDeviceProcAddr,
|
||||
.vkGetPhysicalDeviceProperties = vkGetPhysicalDeviceProperties,
|
||||
.vkGetPhysicalDeviceMemoryProperties = vkGetPhysicalDeviceMemoryProperties,
|
||||
.vkAllocateMemory = vkAllocateMemory,
|
||||
.vkFreeMemory = vkFreeMemory,
|
||||
.vkMapMemory = vkMapMemory,
|
||||
.vkUnmapMemory = vkUnmapMemory,
|
||||
.vkFlushMappedMemoryRanges = vkFlushMappedMemoryRanges,
|
||||
.vkInvalidateMappedMemoryRanges = vkInvalidateMappedMemoryRanges,
|
||||
.vkBindBufferMemory = vkBindBufferMemory,
|
||||
.vkBindImageMemory = vkBindImageMemory,
|
||||
.vkGetBufferMemoryRequirements = vkGetBufferMemoryRequirements,
|
||||
.vkGetImageMemoryRequirements = vkGetImageMemoryRequirements,
|
||||
.vkCreateBuffer = vkCreateBuffer,
|
||||
.vkDestroyBuffer = vkDestroyBuffer,
|
||||
.vkCreateImage = vkCreateImage,
|
||||
.vkDestroyImage = vkDestroyImage,
|
||||
.vkCmdCopyBuffer = vkCmdCopyBuffer,
|
||||
#if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000
|
||||
/// Fetch "vkGetBufferMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetBufferMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension.
|
||||
.vkGetBufferMemoryRequirements2KHR = vkGetBufferMemoryRequirements2,
|
||||
/// Fetch "vkGetImageMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetImageMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension.
|
||||
.vkGetImageMemoryRequirements2KHR = vkGetImageMemoryRequirements2,
|
||||
#endif
|
||||
#if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000
|
||||
/// Fetch "vkBindBufferMemory2" on Vulkan >= 1.1, fetch "vkBindBufferMemory2KHR" when using VK_KHR_bind_memory2 extension.
|
||||
.vkBindBufferMemory2KHR = vkBindBufferMemory2,
|
||||
/// Fetch "vkBindImageMemory2" on Vulkan >= 1.1, fetch "vkBindImageMemory2KHR" when using VK_KHR_bind_memory2 extension.
|
||||
.vkBindImageMemory2KHR = vkBindImageMemory2,
|
||||
#endif
|
||||
#if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000
|
||||
/// Fetch from "vkGetPhysicalDeviceMemoryProperties2" on Vulkan >= 1.1, but you can also fetch it from "vkGetPhysicalDeviceMemoryProperties2KHR" if you enabled extension VK_KHR_get_physical_device_properties2.
|
||||
.vkGetPhysicalDeviceMemoryProperties2KHR = vkGetPhysicalDeviceMemoryProperties2KHR,
|
||||
#endif
|
||||
#if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000
|
||||
/// Fetch from "vkGetDeviceBufferMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceBufferMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4.
|
||||
.vkGetDeviceBufferMemoryRequirements = vkGetDeviceBufferMemoryRequirements,
|
||||
/// Fetch from "vkGetDeviceImageMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceImageMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4.
|
||||
.vkGetDeviceImageMemoryRequirements = vkGetDeviceImageMemoryRequirements,
|
||||
#endif
|
||||
};
|
||||
|
||||
VmaAllocatorCreateInfo create_info = {
|
||||
.flags = (r->memory_budget_extension_enabled ?
|
||||
VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT :
|
||||
0),
|
||||
.vulkanApiVersion = VK_API_VERSION_1_3,
|
||||
.instance = r->instance,
|
||||
.physicalDevice = r->physical_device,
|
||||
.device = r->device,
|
||||
.pVulkanFunctions = &vulkanFunctions,
|
||||
};
|
||||
|
||||
VK_CHECK(vmaCreateAllocator(&create_info, &r->allocator));
|
||||
}
|
||||
|
||||
static void finalize_allocator(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
vmaDestroyAllocator(r->allocator);
|
||||
}
|
||||
|
||||
void pgraph_vk_init_instance(PGRAPHState *pg)
|
||||
{
|
||||
create_instance(pg);
|
||||
select_physical_device(pg);
|
||||
create_logical_device(pg);
|
||||
init_allocator(pg);
|
||||
}
|
||||
|
||||
void pgraph_vk_finalize_instance(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
finalize_allocator(pg);
|
||||
vkDestroyDevice(r->device, NULL);
|
||||
r->device = VK_NULL_HANDLE;
|
||||
|
||||
vkDestroyInstance(r->instance, NULL);
|
||||
r->instance = VK_NULL_HANDLE;
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
if vulkan.found()
|
||||
|
||||
specific_ss.add([sdl, volk, libglslang, vma, vulkan, spirv_reflect, gloffscreen,
|
||||
files(
|
||||
'blit.c',
|
||||
'buffer.c',
|
||||
'command.c',
|
||||
'debug.c',
|
||||
'display.c',
|
||||
'draw.c',
|
||||
'glsl.c',
|
||||
'image.c',
|
||||
'instance.c',
|
||||
'renderer.c',
|
||||
'reports.c',
|
||||
'shaders.c',
|
||||
'surface-compute.c',
|
||||
'surface.c',
|
||||
'texture.c',
|
||||
'vertex.c',
|
||||
)
|
||||
])
|
||||
|
||||
endif
|
|
@ -0,0 +1,266 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH Vulkan Renderer
|
||||
*
|
||||
* Copyright (c) 2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "hw/xbox/nv2a/nv2a_int.h"
|
||||
#include "renderer.h"
|
||||
|
||||
#include "gloffscreen.h"
|
||||
|
||||
#if HAVE_EXTERNAL_MEMORY
|
||||
static GloContext *g_gl_context;
|
||||
|
||||
static void gl_context_init(void)
|
||||
{
|
||||
g_gl_context = glo_context_create();
|
||||
}
|
||||
#endif
|
||||
|
||||
static void pgraph_vk_init_thread(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
|
||||
#if HAVE_EXTERNAL_MEMORY
|
||||
glo_set_current(g_gl_context);
|
||||
#endif
|
||||
|
||||
pgraph_vk_init_instance(pg);
|
||||
pgraph_vk_init_command_buffers(pg);
|
||||
pgraph_vk_init_buffers(d);
|
||||
pgraph_vk_init_surfaces(pg);
|
||||
pgraph_vk_init_shaders(pg);
|
||||
pgraph_vk_init_pipelines(pg);
|
||||
pgraph_vk_init_textures(pg);
|
||||
pgraph_vk_init_reports(pg);
|
||||
pgraph_vk_init_compute(pg);
|
||||
pgraph_vk_init_display(pg);
|
||||
}
|
||||
|
||||
static void pgraph_vk_finalize(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
|
||||
pgraph_vk_finalize_display(pg);
|
||||
pgraph_vk_finalize_compute(pg);
|
||||
pgraph_vk_finalize_reports(pg);
|
||||
pgraph_vk_finalize_textures(pg);
|
||||
pgraph_vk_finalize_pipelines(pg);
|
||||
pgraph_vk_finalize_shaders(pg);
|
||||
pgraph_vk_finalize_surfaces(pg);
|
||||
pgraph_vk_finalize_buffers(d);
|
||||
pgraph_vk_finalize_command_buffers(pg);
|
||||
pgraph_vk_finalize_instance(pg);
|
||||
}
|
||||
|
||||
static void pgraph_vk_flush(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
|
||||
pgraph_vk_finish(pg, VK_FINISH_REASON_FLUSH);
|
||||
pgraph_vk_surface_flush(d);
|
||||
pgraph_vk_mark_textures_possibly_dirty(d, 0, memory_region_size(d->vram));
|
||||
pgraph_vk_update_vertex_ram_buffer(&d->pgraph, 0, d->vram_ptr,
|
||||
memory_region_size(d->vram));
|
||||
for (int i = 0; i < 4; i++) {
|
||||
pg->texture_dirty[i] = true;
|
||||
}
|
||||
|
||||
/* FIXME: Flush more? */
|
||||
|
||||
qatomic_set(&d->pgraph.flush_pending, false);
|
||||
qemu_event_set(&d->pgraph.flush_complete);
|
||||
}
|
||||
|
||||
static void pgraph_vk_sync(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
pgraph_vk_render_display(pg);
|
||||
|
||||
qatomic_set(&d->pgraph.sync_pending, false);
|
||||
qemu_event_set(&d->pgraph.sync_complete);
|
||||
}
|
||||
|
||||
static void pgraph_vk_process_pending(NV2AState *d)
|
||||
{
|
||||
PGRAPHVkState *r = d->pgraph.vk_renderer_state;
|
||||
|
||||
if (qatomic_read(&r->downloads_pending) ||
|
||||
qatomic_read(&r->download_dirty_surfaces_pending) ||
|
||||
qatomic_read(&d->pgraph.sync_pending) ||
|
||||
qatomic_read(&d->pgraph.flush_pending)
|
||||
) {
|
||||
qemu_mutex_unlock(&d->pfifo.lock);
|
||||
qemu_mutex_lock(&d->pgraph.lock);
|
||||
if (qatomic_read(&r->downloads_pending)) {
|
||||
pgraph_vk_process_pending_downloads(d);
|
||||
}
|
||||
if (qatomic_read(&r->download_dirty_surfaces_pending)) {
|
||||
pgraph_vk_download_dirty_surfaces(d);
|
||||
}
|
||||
if (qatomic_read(&d->pgraph.sync_pending)) {
|
||||
pgraph_vk_sync(d);
|
||||
}
|
||||
if (qatomic_read(&d->pgraph.flush_pending)) {
|
||||
pgraph_vk_flush(d);
|
||||
}
|
||||
qemu_mutex_unlock(&d->pgraph.lock);
|
||||
qemu_mutex_lock(&d->pfifo.lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void pgraph_vk_flip_stall(NV2AState *d)
|
||||
{
|
||||
pgraph_vk_finish(&d->pgraph, VK_FINISH_REASON_FLIP_STALL);
|
||||
pgraph_vk_debug_frame_terminator();
|
||||
}
|
||||
|
||||
static void pgraph_vk_pre_savevm_trigger(NV2AState *d)
|
||||
{
|
||||
qatomic_set(&d->pgraph.vk_renderer_state->download_dirty_surfaces_pending, true);
|
||||
qemu_event_reset(&d->pgraph.vk_renderer_state->dirty_surfaces_download_complete);
|
||||
}
|
||||
|
||||
static void pgraph_vk_pre_savevm_wait(NV2AState *d)
|
||||
{
|
||||
qemu_event_wait(&d->pgraph.vk_renderer_state->dirty_surfaces_download_complete);
|
||||
}
|
||||
|
||||
static void pgraph_vk_pre_shutdown_trigger(NV2AState *d)
|
||||
{
|
||||
// qatomic_set(&d->pgraph.vk_renderer_state->shader_cache_writeback_pending, true);
|
||||
// qemu_event_reset(&d->pgraph.vk_renderer_state->shader_cache_writeback_complete);
|
||||
}
|
||||
|
||||
static void pgraph_vk_pre_shutdown_wait(NV2AState *d)
|
||||
{
|
||||
// qemu_event_wait(&d->pgraph.vk_renderer_state->shader_cache_writeback_complete);
|
||||
}
|
||||
|
||||
static int pgraph_vk_get_framebuffer_surface(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
qemu_mutex_lock(&d->pfifo.lock);
|
||||
// FIXME: Possible race condition with pgraph, consider lock
|
||||
uint32_t pline_offset, pstart_addr, pline_compare;
|
||||
d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
|
||||
SurfaceBinding *surface = pgraph_vk_surface_get_within(d, d->pcrtc.start + pline_offset);
|
||||
if (surface == NULL || !surface->color) {
|
||||
qemu_mutex_unlock(&d->pfifo.lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
assert(surface->color);
|
||||
|
||||
surface->frame_time = pg->frame_time;
|
||||
|
||||
#if HAVE_EXTERNAL_MEMORY
|
||||
qemu_event_reset(&d->pgraph.sync_complete);
|
||||
qatomic_set(&pg->sync_pending, true);
|
||||
pfifo_kick(d);
|
||||
qemu_mutex_unlock(&d->pfifo.lock);
|
||||
qemu_event_wait(&d->pgraph.sync_complete);
|
||||
return r->display.gl_texture_id;
|
||||
#else
|
||||
qemu_mutex_unlock(&d->pfifo.lock);
|
||||
pgraph_vk_wait_for_surface_download(surface);
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void pgraph_vk_init(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
|
||||
pg->vk_renderer_state = (PGRAPHVkState *)g_malloc0(sizeof(PGRAPHVkState));
|
||||
|
||||
pgraph_vk_debug_init();
|
||||
}
|
||||
|
||||
static PGRAPHRenderer pgraph_vk_renderer = {
|
||||
.type = CONFIG_DISPLAY_RENDERER_VULKAN,
|
||||
.name = "Vulkan",
|
||||
.ops = {
|
||||
.init = pgraph_vk_init,
|
||||
#if HAVE_EXTERNAL_MEMORY
|
||||
.early_context_init = gl_context_init,
|
||||
#endif
|
||||
.init_thread = pgraph_vk_init_thread,
|
||||
.finalize = pgraph_vk_finalize,
|
||||
.clear_report_value = pgraph_vk_clear_report_value,
|
||||
.clear_surface = pgraph_vk_clear_surface,
|
||||
.draw_begin = pgraph_vk_draw_begin,
|
||||
.draw_end = pgraph_vk_draw_end,
|
||||
.flip_stall = pgraph_vk_flip_stall,
|
||||
.flush_draw = pgraph_vk_flush_draw,
|
||||
.get_report = pgraph_vk_get_report,
|
||||
.image_blit = pgraph_vk_image_blit,
|
||||
.pre_savevm_trigger = pgraph_vk_pre_savevm_trigger,
|
||||
.pre_savevm_wait = pgraph_vk_pre_savevm_wait,
|
||||
.pre_shutdown_trigger = pgraph_vk_pre_shutdown_trigger,
|
||||
.pre_shutdown_wait = pgraph_vk_pre_shutdown_wait,
|
||||
.process_pending = pgraph_vk_process_pending,
|
||||
.process_pending_reports = pgraph_vk_process_pending_reports,
|
||||
.surface_update = pgraph_vk_surface_update,
|
||||
.set_surface_scale_factor = pgraph_vk_set_surface_scale_factor,
|
||||
.get_surface_scale_factor = pgraph_vk_get_surface_scale_factor,
|
||||
.get_framebuffer_surface = pgraph_vk_get_framebuffer_surface,
|
||||
}
|
||||
};
|
||||
|
||||
static void __attribute__((constructor)) register_renderer(void)
|
||||
{
|
||||
pgraph_renderer_register(&pgraph_vk_renderer);
|
||||
}
|
||||
|
||||
void pgraph_vk_check_memory_budget(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
VkPhysicalDeviceMemoryProperties const *props;
|
||||
vmaGetMemoryProperties(r->allocator, &props);
|
||||
|
||||
g_autofree VmaBudget *budgets = g_malloc_n(props->memoryHeapCount, sizeof(VmaBudget));
|
||||
vmaGetHeapBudgets(r->allocator, budgets);
|
||||
|
||||
const float budget_threshold = 0.8;
|
||||
bool near_budget = false;
|
||||
|
||||
for (int i = 0; i < props->memoryHeapCount; i++) {
|
||||
VmaBudget *b = &budgets[i];
|
||||
float use_to_budget_ratio =
|
||||
(double)b->statistics.allocationBytes / (double)b->budget;
|
||||
NV2A_VK_DPRINTF("Heap %d: used %lu/%lu MiB (%.2f%%)", i,
|
||||
b->statistics.allocationBytes / (1024 * 1024),
|
||||
b->budget / (1024 * 1024), use_to_budget_ratio * 100);
|
||||
near_budget |= use_to_budget_ratio > budget_threshold;
|
||||
}
|
||||
|
||||
// If any heaps are near budget, free up some resources
|
||||
if (near_budget) {
|
||||
pgraph_vk_trim_texture_cache(pg);
|
||||
}
|
||||
|
||||
#if 0
|
||||
char *s;
|
||||
vmaBuildStatsString(r->allocator, &s, VK_TRUE);
|
||||
puts(s);
|
||||
vmaFreeStatsString(r->allocator, s);
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,526 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH Vulkan Renderer
|
||||
*
|
||||
* Copyright (c) 2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef HW_XBOX_NV2A_PGRAPH_VK_RENDERER_H
|
||||
#define HW_XBOX_NV2A_PGRAPH_VK_RENDERER_H
|
||||
|
||||
#define VK_NO_PROTOTYPES 1
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/thread.h"
|
||||
#include "qemu/queue.h"
|
||||
#include "qemu/lru.h"
|
||||
#include "hw/hw.h"
|
||||
#include "hw/xbox/nv2a/nv2a_int.h"
|
||||
#include "hw/xbox/nv2a/nv2a_regs.h"
|
||||
#include "hw/xbox/nv2a/pgraph/surface.h"
|
||||
#include "hw/xbox/nv2a/pgraph/texture.h"
|
||||
#include "hw/xbox/nv2a/pgraph/shaders.h"
|
||||
|
||||
#include <vulkan/vulkan.h>
|
||||
#include <glslang/Include/glslang_c_interface.h>
|
||||
#include <volk.h>
|
||||
#include <spirv_reflect.h>
|
||||
|
||||
#define VMA_STATIC_VULKAN_FUNCTIONS 1
|
||||
#define VMA_DYNAMIC_VULKAN_FUNCTIONS 0
|
||||
#include <vk_mem_alloc.h>
|
||||
|
||||
#include "debug.h"
|
||||
#include "constants.h"
|
||||
#include "glsl.h"
|
||||
|
||||
#define HAVE_EXTERNAL_MEMORY 1
|
||||
|
||||
typedef struct QueueFamilyIndices {
|
||||
int queue_family;
|
||||
} QueueFamilyIndices;
|
||||
|
||||
typedef struct MemorySyncRequirement {
|
||||
hwaddr addr, size;
|
||||
} MemorySyncRequirement;
|
||||
|
||||
typedef struct RenderPassState {
|
||||
VkFormat color_format;
|
||||
VkFormat zeta_format;
|
||||
} RenderPassState;
|
||||
|
||||
typedef struct RenderPass {
|
||||
RenderPassState state;
|
||||
VkRenderPass render_pass;
|
||||
} RenderPass;
|
||||
|
||||
typedef struct PipelineKey {
|
||||
bool clear;
|
||||
RenderPassState render_pass_state;
|
||||
ShaderState shader_state;
|
||||
uint32_t regs[10];
|
||||
VkVertexInputBindingDescription binding_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES];
|
||||
VkVertexInputAttributeDescription attribute_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES];
|
||||
} PipelineKey;
|
||||
|
||||
typedef struct PipelineBinding {
|
||||
LruNode node;
|
||||
PipelineKey key;
|
||||
VkPipelineLayout layout;
|
||||
VkPipeline pipeline;
|
||||
VkRenderPass render_pass;
|
||||
unsigned int draw_time;
|
||||
} PipelineBinding;
|
||||
|
||||
enum Buffer {
|
||||
BUFFER_STAGING_DST,
|
||||
BUFFER_STAGING_SRC,
|
||||
BUFFER_COMPUTE_DST,
|
||||
BUFFER_COMPUTE_SRC,
|
||||
BUFFER_INDEX,
|
||||
BUFFER_INDEX_STAGING,
|
||||
BUFFER_VERTEX_RAM,
|
||||
BUFFER_VERTEX_INLINE,
|
||||
BUFFER_VERTEX_INLINE_STAGING,
|
||||
BUFFER_UNIFORM,
|
||||
BUFFER_UNIFORM_STAGING,
|
||||
BUFFER_COUNT
|
||||
};
|
||||
|
||||
typedef struct StorageBuffer {
|
||||
VkBuffer buffer;
|
||||
VkBufferUsageFlags usage;
|
||||
VmaAllocationCreateInfo alloc_info;
|
||||
VmaAllocation allocation;
|
||||
VkMemoryPropertyFlags properties;
|
||||
size_t buffer_offset;
|
||||
size_t buffer_size;
|
||||
uint8_t *mapped;
|
||||
} StorageBuffer;
|
||||
|
||||
typedef struct SurfaceBinding {
|
||||
QTAILQ_ENTRY(SurfaceBinding) entry;
|
||||
MemAccessCallback *access_cb;
|
||||
|
||||
hwaddr vram_addr;
|
||||
|
||||
SurfaceShape shape;
|
||||
uintptr_t dma_addr;
|
||||
uintptr_t dma_len;
|
||||
bool color;
|
||||
bool swizzle;
|
||||
|
||||
unsigned int width;
|
||||
unsigned int height;
|
||||
unsigned int pitch;
|
||||
size_t size;
|
||||
|
||||
bool cleared;
|
||||
int frame_time;
|
||||
int draw_time;
|
||||
bool draw_dirty;
|
||||
bool download_pending;
|
||||
bool upload_pending;
|
||||
|
||||
BasicSurfaceFormatInfo fmt;
|
||||
SurfaceFormatInfo host_fmt;
|
||||
|
||||
VkImage image;
|
||||
VkImageView image_view;
|
||||
VmaAllocation allocation;
|
||||
|
||||
// Used for scaling
|
||||
VkImage image_scratch;
|
||||
VkImageLayout image_scratch_current_layout;
|
||||
VmaAllocation allocation_scratch;
|
||||
|
||||
bool initialized;
|
||||
} SurfaceBinding;
|
||||
|
||||
typedef struct ShaderModuleInfo {
|
||||
char *glsl;
|
||||
GByteArray *spirv;
|
||||
VkShaderModule module;
|
||||
SpvReflectShaderModule reflect_module;
|
||||
SpvReflectDescriptorSet **descriptor_sets;
|
||||
ShaderUniformLayout uniforms;
|
||||
ShaderUniformLayout push_constants;
|
||||
} ShaderModuleInfo;
|
||||
|
||||
typedef struct ShaderBinding {
|
||||
LruNode node;
|
||||
ShaderState state;
|
||||
ShaderModuleInfo *geometry;
|
||||
ShaderModuleInfo *vertex;
|
||||
ShaderModuleInfo *fragment;
|
||||
|
||||
int psh_constant_loc[9][2];
|
||||
int alpha_ref_loc;
|
||||
|
||||
int bump_mat_loc[NV2A_MAX_TEXTURES];
|
||||
int bump_scale_loc[NV2A_MAX_TEXTURES];
|
||||
int bump_offset_loc[NV2A_MAX_TEXTURES];
|
||||
int tex_scale_loc[NV2A_MAX_TEXTURES];
|
||||
|
||||
int surface_size_loc;
|
||||
int clip_range_loc;
|
||||
|
||||
int vsh_constant_loc;
|
||||
uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];
|
||||
|
||||
int inv_viewport_loc;
|
||||
int ltctxa_loc;
|
||||
int ltctxb_loc;
|
||||
int ltc1_loc;
|
||||
|
||||
int fog_color_loc;
|
||||
int fog_param_loc;
|
||||
int light_infinite_half_vector_loc[NV2A_MAX_LIGHTS];
|
||||
int light_infinite_direction_loc[NV2A_MAX_LIGHTS];
|
||||
int light_local_position_loc[NV2A_MAX_LIGHTS];
|
||||
int light_local_attenuation_loc[NV2A_MAX_LIGHTS];
|
||||
|
||||
int clip_region_loc;
|
||||
|
||||
int material_alpha_loc;
|
||||
} ShaderBinding;
|
||||
|
||||
typedef struct TextureKey {
|
||||
TextureShape state;
|
||||
hwaddr texture_vram_offset;
|
||||
hwaddr texture_length;
|
||||
hwaddr palette_vram_offset;
|
||||
hwaddr palette_length;
|
||||
float scale;
|
||||
} TextureKey;
|
||||
|
||||
typedef struct TextureBinding {
|
||||
LruNode node;
|
||||
TextureKey key;
|
||||
VkImage image;
|
||||
VkImageLayout current_layout;
|
||||
VkImageView image_view;
|
||||
VmaAllocation allocation;
|
||||
VkSampler sampler;
|
||||
bool possibly_dirty;
|
||||
uint64_t hash;
|
||||
unsigned int draw_time;
|
||||
uint32_t submit_time;
|
||||
} TextureBinding;
|
||||
|
||||
typedef struct QueryReport {
|
||||
QSIMPLEQ_ENTRY(QueryReport) entry;
|
||||
bool clear;
|
||||
uint32_t parameter;
|
||||
unsigned int query_count;
|
||||
} QueryReport;
|
||||
|
||||
typedef struct PGRAPHVkDisplayState {
|
||||
ShaderModuleInfo *display_frag;
|
||||
|
||||
VkDescriptorPool descriptor_pool;
|
||||
VkDescriptorSetLayout descriptor_set_layout;
|
||||
VkDescriptorSet descriptor_set;
|
||||
|
||||
VkPipelineLayout pipeline_layout;
|
||||
VkPipeline pipeline;
|
||||
|
||||
VkRenderPass render_pass;
|
||||
VkFramebuffer framebuffer;
|
||||
|
||||
VkImage image;
|
||||
VkImageView image_view;
|
||||
VkDeviceMemory memory;
|
||||
VkSampler sampler;
|
||||
|
||||
int width, height;
|
||||
int draw_time;
|
||||
|
||||
// OpenGL Interop
|
||||
#ifdef WIN32
|
||||
HANDLE handle;
|
||||
#else
|
||||
int fd;
|
||||
#endif
|
||||
GLuint gl_memory_obj;
|
||||
GLuint gl_texture_id;
|
||||
} PGRAPHVkDisplayState;
|
||||
|
||||
typedef struct PGRAPHVkComputeState {
|
||||
VkDescriptorPool descriptor_pool;
|
||||
VkDescriptorSetLayout descriptor_set_layout;
|
||||
VkDescriptorSet descriptor_sets[1];
|
||||
VkPipelineLayout pipeline_layout;
|
||||
VkPipeline pipeline_pack_d24s8;
|
||||
VkPipeline pipeline_unpack_d24s8;
|
||||
VkPipeline pipeline_pack_f32s8;
|
||||
VkPipeline pipeline_unpack_f32s8;
|
||||
} PGRAPHVkComputeState;
|
||||
|
||||
typedef struct PGRAPHVkState {
|
||||
void *window;
|
||||
VkInstance instance;
|
||||
|
||||
bool debug_utils_extension_enabled;
|
||||
bool custom_border_color_extension_enabled;
|
||||
bool provoking_vertex_extension_enabled;
|
||||
bool memory_budget_extension_enabled;
|
||||
|
||||
VkPhysicalDevice physical_device;
|
||||
VkPhysicalDeviceProperties device_props;
|
||||
VkDevice device;
|
||||
VmaAllocator allocator;
|
||||
uint32_t allocator_last_submit_index;
|
||||
|
||||
VkQueue queue;
|
||||
VkCommandPool command_pool;
|
||||
VkCommandBuffer command_buffers[2];
|
||||
|
||||
VkCommandBuffer command_buffer;
|
||||
VkSemaphore command_buffer_semaphore;
|
||||
VkFence command_buffer_fence;
|
||||
unsigned int command_buffer_start_time;
|
||||
bool in_command_buffer;
|
||||
uint32_t submit_count;
|
||||
|
||||
VkCommandBuffer aux_command_buffer;
|
||||
bool in_aux_command_buffer;
|
||||
|
||||
VkFramebuffer framebuffers[50];
|
||||
int framebuffer_index;
|
||||
bool framebuffer_dirty;
|
||||
|
||||
VkRenderPass render_pass;
|
||||
RenderPass *render_passes;
|
||||
int render_passes_index;
|
||||
int render_passes_capacity;
|
||||
bool in_render_pass;
|
||||
bool in_draw;
|
||||
|
||||
Lru pipeline_cache;
|
||||
VkPipelineCache vk_pipeline_cache;
|
||||
PipelineBinding *pipeline_cache_entries;
|
||||
PipelineBinding *pipeline_binding;
|
||||
bool pipeline_binding_changed;
|
||||
|
||||
VkDescriptorPool descriptor_pool;
|
||||
VkDescriptorSetLayout descriptor_set_layout;
|
||||
VkDescriptorSet descriptor_sets[1024];
|
||||
int descriptor_set_index;
|
||||
|
||||
StorageBuffer storage_buffers[BUFFER_COUNT];
|
||||
|
||||
MemorySyncRequirement vertex_ram_buffer_syncs[NV2A_VERTEXSHADER_ATTRIBUTES];
|
||||
size_t num_vertex_ram_buffer_syncs;
|
||||
unsigned long *uploaded_bitmap;
|
||||
size_t bitmap_size;
|
||||
|
||||
VkVertexInputAttributeDescription vertex_attribute_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES];
|
||||
int vertex_attribute_to_description_location[NV2A_VERTEXSHADER_ATTRIBUTES];
|
||||
int num_active_vertex_attribute_descriptions;
|
||||
|
||||
VkVertexInputBindingDescription vertex_binding_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES];
|
||||
int num_active_vertex_binding_descriptions;
|
||||
hwaddr vertex_attribute_offsets[NV2A_VERTEXSHADER_ATTRIBUTES];
|
||||
|
||||
QTAILQ_HEAD(, SurfaceBinding) surfaces;
|
||||
QTAILQ_HEAD(, SurfaceBinding) invalid_surfaces;
|
||||
SurfaceBinding *color_binding, *zeta_binding;
|
||||
bool downloads_pending;
|
||||
QemuEvent downloads_complete;
|
||||
bool download_dirty_surfaces_pending;
|
||||
QemuEvent dirty_surfaces_download_complete; // common
|
||||
|
||||
Lru texture_cache;
|
||||
TextureBinding *texture_cache_entries;
|
||||
TextureBinding *texture_bindings[NV2A_MAX_TEXTURES];
|
||||
TextureBinding dummy_texture;
|
||||
bool texture_bindings_changed;
|
||||
|
||||
Lru shader_cache;
|
||||
ShaderBinding *shader_cache_entries;
|
||||
ShaderBinding *shader_binding;
|
||||
ShaderModuleInfo *quad_vert_module, *solid_frag_module;
|
||||
bool shader_bindings_changed;
|
||||
|
||||
// FIXME: Merge these into a structure
|
||||
uint64_t uniform_buffer_hashes[2];
|
||||
size_t uniform_buffer_offsets[2];
|
||||
bool uniforms_changed;
|
||||
|
||||
VkQueryPool query_pool;
|
||||
int max_queries_in_flight; // FIXME: Move out to constant
|
||||
int num_queries_in_flight;
|
||||
bool new_query_needed;
|
||||
bool query_in_flight;
|
||||
uint32_t zpass_pixel_count_result;
|
||||
QSIMPLEQ_HEAD(, QueryReport) report_queue; // FIXME: Statically allocate
|
||||
|
||||
SurfaceFormatInfo kelvin_surface_zeta_vk_map[3];
|
||||
|
||||
uint32_t clear_parameter;
|
||||
|
||||
PGRAPHVkDisplayState display;
|
||||
PGRAPHVkComputeState compute;
|
||||
} PGRAPHVkState;
|
||||
|
||||
// renderer.c
|
||||
void pgraph_vk_check_memory_budget(PGRAPHState *pg);
|
||||
|
||||
// debug.c
|
||||
void pgraph_vk_debug_init(void);
|
||||
|
||||
// instance.c
|
||||
void pgraph_vk_init_instance(PGRAPHState *pg);
|
||||
void pgraph_vk_finalize_instance(PGRAPHState *pg);
|
||||
QueueFamilyIndices pgraph_vk_find_queue_families(VkPhysicalDevice device);
|
||||
uint32_t pgraph_vk_get_memory_type(PGRAPHState *pg, uint32_t type_bits,
|
||||
VkMemoryPropertyFlags properties);
|
||||
|
||||
// glsl.c
|
||||
void pgraph_vk_init_glsl_compiler(void);
|
||||
void pgraph_vk_finalize_glsl_compiler(void);
|
||||
GByteArray *pgraph_vk_compile_glsl_to_spv(glslang_stage_t stage,
|
||||
const char *glsl_source);
|
||||
VkShaderModule pgraph_vk_create_shader_module_from_spv(PGRAPHVkState *r,
|
||||
GByteArray *spv);
|
||||
ShaderModuleInfo *pgraph_vk_create_shader_module_from_glsl(
|
||||
PGRAPHVkState *r, VkShaderStageFlagBits stage, const char *glsl);
|
||||
void pgraph_vk_destroy_shader_module(PGRAPHVkState *r, ShaderModuleInfo *info);
|
||||
|
||||
// buffer.c
|
||||
void pgraph_vk_init_buffers(NV2AState *d);
|
||||
void pgraph_vk_finalize_buffers(NV2AState *d);
|
||||
bool pgraph_vk_buffer_has_space_for(PGRAPHState *pg, int index,
|
||||
VkDeviceSize size,
|
||||
VkDeviceAddress alignment);
|
||||
VkDeviceSize pgraph_vk_append_to_buffer(PGRAPHState *pg, int index, void **data,
|
||||
VkDeviceSize *sizes, size_t count,
|
||||
VkDeviceAddress alignment);
|
||||
|
||||
// command.c
|
||||
void pgraph_vk_init_command_buffers(PGRAPHState *pg);
|
||||
void pgraph_vk_finalize_command_buffers(PGRAPHState *pg);
|
||||
VkCommandBuffer pgraph_vk_begin_single_time_commands(PGRAPHState *pg);
|
||||
void pgraph_vk_end_single_time_commands(PGRAPHState *pg, VkCommandBuffer cmd);
|
||||
|
||||
// image.c
|
||||
void pgraph_vk_transition_image_layout(PGRAPHState *pg, VkCommandBuffer cmd,
|
||||
VkImage image, VkFormat format,
|
||||
VkImageLayout oldLayout,
|
||||
VkImageLayout newLayout);
|
||||
|
||||
// vertex.c
|
||||
void pgraph_vk_bind_vertex_attributes(NV2AState *d, unsigned int min_element,
|
||||
unsigned int max_element,
|
||||
bool inline_data,
|
||||
unsigned int inline_stride,
|
||||
unsigned int provoking_element);
|
||||
void pgraph_vk_bind_vertex_attributes_inline(NV2AState *d);
|
||||
void pgraph_vk_update_vertex_ram_buffer(PGRAPHState *pg, hwaddr offset, void *data,
|
||||
VkDeviceSize size);
|
||||
VkDeviceSize pgraph_vk_update_index_buffer(PGRAPHState *pg, void *data,
|
||||
VkDeviceSize size);
|
||||
VkDeviceSize pgraph_vk_update_vertex_inline_buffer(PGRAPHState *pg, void **data,
|
||||
VkDeviceSize *sizes,
|
||||
size_t count);
|
||||
|
||||
// surface.c
|
||||
void pgraph_vk_init_surfaces(PGRAPHState *pg);
|
||||
void pgraph_vk_finalize_surfaces(PGRAPHState *pg);
|
||||
void pgraph_vk_surface_flush(NV2AState *d);
|
||||
void pgraph_vk_process_pending_downloads(NV2AState *d);
|
||||
void pgraph_vk_surface_download_if_dirty(NV2AState *d, SurfaceBinding *surface);
|
||||
SurfaceBinding *pgraph_vk_surface_get_within(NV2AState *d, hwaddr addr);
|
||||
void pgraph_vk_wait_for_surface_download(SurfaceBinding *e);
|
||||
void pgraph_vk_download_dirty_surfaces(NV2AState *d);
|
||||
void pgraph_vk_upload_surface_data(NV2AState *d, SurfaceBinding *surface,
|
||||
bool force);
|
||||
void pgraph_vk_surface_update(NV2AState *d, bool upload, bool color_write,
|
||||
bool zeta_write);
|
||||
SurfaceBinding *pgraph_vk_surface_get(NV2AState *d, hwaddr addr);
|
||||
void pgraph_vk_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta);
|
||||
void pgraph_vk_set_surface_scale_factor(NV2AState *d, unsigned int scale);
|
||||
unsigned int pgraph_vk_get_surface_scale_factor(NV2AState *d);
|
||||
void pgraph_vk_reload_surface_scale_factor(PGRAPHState *pg);
|
||||
|
||||
// surface-compute.c
|
||||
void pgraph_vk_init_compute(PGRAPHState *pg);
|
||||
void pgraph_vk_finalize_compute(PGRAPHState *pg);
|
||||
void pgraph_vk_pack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface,
|
||||
VkCommandBuffer cmd, VkBuffer src,
|
||||
VkBuffer dst, bool downscale);
|
||||
void pgraph_vk_unpack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface,
|
||||
VkCommandBuffer cmd, VkBuffer src,
|
||||
VkBuffer dst);
|
||||
|
||||
// display.c
|
||||
void pgraph_vk_init_display(PGRAPHState *pg);
|
||||
void pgraph_vk_finalize_display(PGRAPHState *pg);
|
||||
void pgraph_vk_render_display(PGRAPHState *pg);
|
||||
|
||||
// texture.c
|
||||
void pgraph_vk_init_textures(PGRAPHState *pg);
|
||||
void pgraph_vk_finalize_textures(PGRAPHState *pg);
|
||||
void pgraph_vk_bind_textures(NV2AState *d);
|
||||
void pgraph_vk_mark_textures_possibly_dirty(NV2AState *d, hwaddr addr,
|
||||
hwaddr size);
|
||||
void pgraph_vk_trim_texture_cache(PGRAPHState *pg);
|
||||
|
||||
// shaders.c
|
||||
void pgraph_vk_init_shaders(PGRAPHState *pg);
|
||||
void pgraph_vk_finalize_shaders(PGRAPHState *pg);
|
||||
void pgraph_vk_update_descriptor_sets(PGRAPHState *pg);
|
||||
void pgraph_vk_bind_shaders(PGRAPHState *pg);
|
||||
void pgraph_vk_update_shader_uniforms(PGRAPHState *pg);
|
||||
|
||||
// reports.c
|
||||
void pgraph_vk_init_reports(PGRAPHState *pg);
|
||||
void pgraph_vk_finalize_reports(PGRAPHState *pg);
|
||||
void pgraph_vk_clear_report_value(NV2AState *d);
|
||||
void pgraph_vk_get_report(NV2AState *d, uint32_t parameter);
|
||||
void pgraph_vk_process_pending_reports(NV2AState *d);
|
||||
void pgraph_vk_process_pending_reports_internal(NV2AState *d);
|
||||
|
||||
typedef enum FinishReason {
|
||||
VK_FINISH_REASON_VERTEX_BUFFER_DIRTY,
|
||||
VK_FINISH_REASON_SURFACE_CREATE,
|
||||
VK_FINISH_REASON_SURFACE_DOWN,
|
||||
VK_FINISH_REASON_NEED_BUFFER_SPACE,
|
||||
VK_FINISH_REASON_FRAMEBUFFER_DIRTY,
|
||||
VK_FINISH_REASON_PRESENTING,
|
||||
VK_FINISH_REASON_FLIP_STALL,
|
||||
VK_FINISH_REASON_FLUSH,
|
||||
} FinishReason;
|
||||
|
||||
// draw.c
|
||||
void pgraph_vk_init_pipelines(PGRAPHState *pg);
|
||||
void pgraph_vk_finalize_pipelines(PGRAPHState *pg);
|
||||
void pgraph_vk_clear_surface(NV2AState *d, uint32_t parameter);
|
||||
void pgraph_vk_draw_begin(NV2AState *d);
|
||||
void pgraph_vk_draw_end(NV2AState *d);
|
||||
void pgraph_vk_finish(PGRAPHState *pg, FinishReason why);
|
||||
void pgraph_vk_flush_draw(NV2AState *d);
|
||||
void pgraph_vk_begin_command_buffer(PGRAPHState *pg);
|
||||
void pgraph_vk_ensure_command_buffer(PGRAPHState *pg);
|
||||
void pgraph_vk_ensure_not_in_render_pass(PGRAPHState *pg);
|
||||
|
||||
VkCommandBuffer pgraph_vk_begin_nondraw_commands(PGRAPHState *pg);
|
||||
void pgraph_vk_end_nondraw_commands(PGRAPHState *pg, VkCommandBuffer cmd);
|
||||
|
||||
// blit.c
|
||||
void pgraph_vk_image_blit(NV2AState *d);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,134 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH Vulkan Renderer
|
||||
*
|
||||
* Copyright (c) 2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "renderer.h"
|
||||
|
||||
void pgraph_vk_init_reports(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
QSIMPLEQ_INIT(&r->report_queue);
|
||||
r->num_queries_in_flight = 0;
|
||||
r->max_queries_in_flight = 1024;
|
||||
r->new_query_needed = true;
|
||||
r->query_in_flight = false;
|
||||
r->zpass_pixel_count_result = 0;
|
||||
|
||||
VkQueryPoolCreateInfo pool_create_info = (VkQueryPoolCreateInfo){
|
||||
.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
|
||||
.queryType = VK_QUERY_TYPE_OCCLUSION,
|
||||
.queryCount = r->max_queries_in_flight,
|
||||
};
|
||||
VK_CHECK(
|
||||
vkCreateQueryPool(r->device, &pool_create_info, NULL, &r->query_pool));
|
||||
}
|
||||
|
||||
void pgraph_vk_finalize_reports(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
vkDestroyQueryPool(r->device, r->query_pool, NULL);
|
||||
}
|
||||
|
||||
void pgraph_vk_clear_report_value(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
QueryReport *q = g_malloc(sizeof(QueryReport)); // FIXME: Pre-allocate
|
||||
q->clear = true;
|
||||
QSIMPLEQ_INSERT_TAIL(&r->report_queue, q, entry);
|
||||
}
|
||||
|
||||
void pgraph_vk_get_report(NV2AState *d, uint32_t parameter)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
uint8_t type = GET_MASK(parameter, NV097_GET_REPORT_TYPE);
|
||||
assert(type == NV097_GET_REPORT_TYPE_ZPASS_PIXEL_CNT);
|
||||
|
||||
QueryReport *q = g_malloc(sizeof(QueryReport)); // FIXME: Pre-allocate
|
||||
q->clear = false;
|
||||
q->parameter = parameter;
|
||||
q->query_count = r->num_queries_in_flight;
|
||||
QSIMPLEQ_INSERT_TAIL(&r->report_queue, q, entry);
|
||||
|
||||
r->new_query_needed = true;
|
||||
}
|
||||
|
||||
void pgraph_vk_process_pending_reports_internal(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
NV2A_VK_DGROUP_BEGIN("Processing queries");
|
||||
|
||||
assert(!r->in_command_buffer);
|
||||
|
||||
// Fetch all query results
|
||||
g_autofree uint64_t *query_results = NULL;
|
||||
|
||||
if (r->num_queries_in_flight > 0) {
|
||||
size_t size_of_results = r->num_queries_in_flight * sizeof(uint64_t);
|
||||
query_results = g_malloc_n(r->num_queries_in_flight,
|
||||
sizeof(uint64_t)); // FIXME: Pre-allocate
|
||||
VkResult result;
|
||||
do {
|
||||
result = vkGetQueryPoolResults(
|
||||
r->device, r->query_pool, 0, r->num_queries_in_flight,
|
||||
size_of_results, query_results, sizeof(uint64_t),
|
||||
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
|
||||
} while (result == VK_NOT_READY);
|
||||
}
|
||||
|
||||
// Write out queries
|
||||
QueryReport *q, *next;
|
||||
int num_results_counted = 0;
|
||||
|
||||
int result_divisor = pg->surface_scale_factor * pg->surface_scale_factor;
|
||||
|
||||
QSIMPLEQ_FOREACH_SAFE (q, &r->report_queue, entry, next) {
|
||||
if (q->clear) {
|
||||
NV2A_VK_DPRINTF("Cleared");
|
||||
r->zpass_pixel_count_result = 0;
|
||||
} else {
|
||||
assert(q->query_count >= num_results_counted);
|
||||
assert(q->query_count <= r->num_queries_in_flight);
|
||||
|
||||
while (num_results_counted < q->query_count) {
|
||||
r->zpass_pixel_count_result +=
|
||||
query_results[num_results_counted++];
|
||||
}
|
||||
|
||||
pgraph_write_zpass_pixel_cnt_report(
|
||||
d, q->parameter,
|
||||
r->zpass_pixel_count_result / result_divisor);
|
||||
}
|
||||
QSIMPLEQ_REMOVE_HEAD(&r->report_queue, entry);
|
||||
g_free(q);
|
||||
}
|
||||
|
||||
r->num_queries_in_flight = 0;
|
||||
NV2A_VK_DGROUP_END();
|
||||
}
|
||||
|
||||
void pgraph_vk_process_pending_reports(NV2AState *d)
|
||||
{
|
||||
}
|
|
@ -0,0 +1,797 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH Vulkan Renderer
|
||||
*
|
||||
* Copyright (c) 2024 Matt Borgerson
|
||||
*
|
||||
* Based on GL implementation:
|
||||
*
|
||||
* Copyright (c) 2015 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2018-2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include "hw/xbox/nv2a/pgraph/shaders.h"
|
||||
#include "hw/xbox/nv2a/pgraph/util.h"
|
||||
#include "hw/xbox/nv2a/pgraph/glsl/geom.h"
|
||||
#include "hw/xbox/nv2a/pgraph/glsl/vsh.h"
|
||||
#include "hw/xbox/nv2a/pgraph/glsl/psh.h"
|
||||
#include "qemu/fast-hash.h"
|
||||
#include "qemu/mstring.h"
|
||||
#include "renderer.h"
|
||||
#include <locale.h>
|
||||
|
||||
static void create_descriptor_pool(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
size_t num_sets = ARRAY_SIZE(r->descriptor_sets);
|
||||
|
||||
VkDescriptorPoolSize pool_sizes[] = {
|
||||
{
|
||||
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.descriptorCount = 2 * num_sets,
|
||||
},
|
||||
{
|
||||
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
.descriptorCount = NV2A_MAX_TEXTURES * num_sets,
|
||||
}
|
||||
};
|
||||
|
||||
VkDescriptorPoolCreateInfo pool_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
||||
.poolSizeCount = ARRAY_SIZE(pool_sizes),
|
||||
.pPoolSizes = pool_sizes,
|
||||
.maxSets = ARRAY_SIZE(r->descriptor_sets),
|
||||
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
|
||||
};
|
||||
VK_CHECK(vkCreateDescriptorPool(r->device, &pool_info, NULL,
|
||||
&r->descriptor_pool));
|
||||
}
|
||||
|
||||
static void destroy_descriptor_pool(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
vkDestroyDescriptorPool(r->device, r->descriptor_pool, NULL);
|
||||
r->descriptor_pool = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
static void create_descriptor_set_layout(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
VkDescriptorSetLayoutBinding bindings[2 + NV2A_MAX_TEXTURES];
|
||||
|
||||
bindings[0] = (VkDescriptorSetLayoutBinding){
|
||||
.binding = VSH_UBO_BINDING,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
|
||||
};
|
||||
bindings[1] = (VkDescriptorSetLayoutBinding){
|
||||
.binding = PSH_UBO_BINDING,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||
};
|
||||
for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
|
||||
bindings[2 + i] = (VkDescriptorSetLayoutBinding){
|
||||
.binding = PSH_TEX_BINDING + i,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||
};
|
||||
}
|
||||
VkDescriptorSetLayoutCreateInfo layout_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||
.bindingCount = ARRAY_SIZE(bindings),
|
||||
.pBindings = bindings,
|
||||
};
|
||||
VK_CHECK(vkCreateDescriptorSetLayout(r->device, &layout_info, NULL,
|
||||
&r->descriptor_set_layout));
|
||||
}
|
||||
|
||||
static void destroy_descriptor_set_layout(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
vkDestroyDescriptorSetLayout(r->device, r->descriptor_set_layout, NULL);
|
||||
r->descriptor_set_layout = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
static void create_descriptor_sets(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
VkDescriptorSetLayout layouts[ARRAY_SIZE(r->descriptor_sets)];
|
||||
for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
|
||||
layouts[i] = r->descriptor_set_layout;
|
||||
}
|
||||
|
||||
VkDescriptorSetAllocateInfo alloc_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
||||
.descriptorPool = r->descriptor_pool,
|
||||
.descriptorSetCount = ARRAY_SIZE(r->descriptor_sets),
|
||||
.pSetLayouts = layouts,
|
||||
};
|
||||
VK_CHECK(
|
||||
vkAllocateDescriptorSets(r->device, &alloc_info, r->descriptor_sets));
|
||||
}
|
||||
|
||||
static void destroy_descriptor_sets(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
vkFreeDescriptorSets(r->device, r->descriptor_pool,
|
||||
ARRAY_SIZE(r->descriptor_sets), r->descriptor_sets);
|
||||
for (int i = 0; i < ARRAY_SIZE(r->descriptor_sets); i++) {
|
||||
r->descriptor_sets[i] = VK_NULL_HANDLE;
|
||||
}
|
||||
}
|
||||
|
||||
void pgraph_vk_update_descriptor_sets(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
bool need_uniform_write =
|
||||
r->uniforms_changed ||
|
||||
!r->storage_buffers[BUFFER_UNIFORM_STAGING].buffer_offset;
|
||||
|
||||
if (!(r->shader_bindings_changed || r->texture_bindings_changed ||
|
||||
(r->descriptor_set_index == 0) || need_uniform_write)) {
|
||||
return; // Nothing changed
|
||||
}
|
||||
|
||||
ShaderBinding *binding = r->shader_binding;
|
||||
ShaderUniformLayout *layouts[] = { &binding->vertex->uniforms,
|
||||
&binding->fragment->uniforms };
|
||||
VkDeviceSize ubo_buffer_total_size = 0;
|
||||
for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
|
||||
ubo_buffer_total_size += layouts[i]->total_size;
|
||||
}
|
||||
bool need_ubo_staging_buffer_reset =
|
||||
r->uniforms_changed &&
|
||||
!pgraph_vk_buffer_has_space_for(pg, BUFFER_UNIFORM_STAGING,
|
||||
ubo_buffer_total_size,
|
||||
r->device_props.limits.minUniformBufferOffsetAlignment);
|
||||
|
||||
bool need_descriptor_write_reset =
|
||||
(r->descriptor_set_index >= ARRAY_SIZE(r->descriptor_sets));
|
||||
|
||||
if (need_descriptor_write_reset || need_ubo_staging_buffer_reset) {
|
||||
pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE);
|
||||
need_uniform_write = true;
|
||||
}
|
||||
|
||||
VkWriteDescriptorSet descriptor_writes[2 + NV2A_MAX_TEXTURES];
|
||||
|
||||
assert(r->descriptor_set_index < ARRAY_SIZE(r->descriptor_sets));
|
||||
|
||||
if (need_uniform_write) {
|
||||
for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
|
||||
void *data = layouts[i]->allocation;
|
||||
VkDeviceSize size = layouts[i]->total_size;
|
||||
r->uniform_buffer_offsets[i] = pgraph_vk_append_to_buffer(
|
||||
pg, BUFFER_UNIFORM_STAGING, &data, &size, 1,
|
||||
r->device_props.limits.minUniformBufferOffsetAlignment);
|
||||
}
|
||||
|
||||
r->uniforms_changed = false;
|
||||
}
|
||||
|
||||
VkDescriptorBufferInfo ubo_buffer_infos[2];
|
||||
for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
|
||||
ubo_buffer_infos[i] = (VkDescriptorBufferInfo){
|
||||
.buffer = r->storage_buffers[BUFFER_UNIFORM].buffer,
|
||||
.offset = r->uniform_buffer_offsets[i],
|
||||
.range = layouts[i]->total_size,
|
||||
};
|
||||
descriptor_writes[i] = (VkWriteDescriptorSet){
|
||||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
.dstSet = r->descriptor_sets[r->descriptor_set_index],
|
||||
.dstBinding = i == 0 ? VSH_UBO_BINDING : PSH_UBO_BINDING,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.descriptorCount = 1,
|
||||
.pBufferInfo = &ubo_buffer_infos[i],
|
||||
};
|
||||
}
|
||||
|
||||
VkDescriptorImageInfo image_infos[NV2A_MAX_TEXTURES];
|
||||
for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
|
||||
image_infos[i] = (VkDescriptorImageInfo){
|
||||
.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
|
||||
.imageView = r->texture_bindings[i]->image_view,
|
||||
.sampler = r->texture_bindings[i]->sampler,
|
||||
};
|
||||
descriptor_writes[2 + i] = (VkWriteDescriptorSet){
|
||||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
.dstSet = r->descriptor_sets[r->descriptor_set_index],
|
||||
.dstBinding = PSH_TEX_BINDING + i,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
.descriptorCount = 1,
|
||||
.pImageInfo = &image_infos[i],
|
||||
};
|
||||
}
|
||||
|
||||
vkUpdateDescriptorSets(r->device, 6, descriptor_writes, 0, NULL);
|
||||
|
||||
r->descriptor_set_index++;
|
||||
}
|
||||
|
||||
static void update_shader_constant_locations(ShaderBinding *binding)
|
||||
{
|
||||
int i, j;
|
||||
char tmp[64];
|
||||
|
||||
/* lookup fragment shader uniforms */
|
||||
for (i = 0; i < 9; i++) {
|
||||
for (j = 0; j < 2; j++) {
|
||||
snprintf(tmp, sizeof(tmp), "c%d_%d", j, i);
|
||||
binding->psh_constant_loc[i][j] =
|
||||
uniform_index(&binding->fragment->uniforms, tmp);
|
||||
}
|
||||
}
|
||||
binding->alpha_ref_loc =
|
||||
uniform_index(&binding->fragment->uniforms, "alphaRef");
|
||||
binding->fog_color_loc =
|
||||
uniform_index(&binding->fragment->uniforms, "fogColor");
|
||||
for (i = 1; i < NV2A_MAX_TEXTURES; i++) {
|
||||
snprintf(tmp, sizeof(tmp), "bumpMat%d", i);
|
||||
binding->bump_mat_loc[i] =
|
||||
uniform_index(&binding->fragment->uniforms, tmp);
|
||||
snprintf(tmp, sizeof(tmp), "bumpScale%d", i);
|
||||
binding->bump_scale_loc[i] =
|
||||
uniform_index(&binding->fragment->uniforms, tmp);
|
||||
snprintf(tmp, sizeof(tmp), "bumpOffset%d", i);
|
||||
binding->bump_offset_loc[i] =
|
||||
uniform_index(&binding->fragment->uniforms, tmp);
|
||||
}
|
||||
|
||||
for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
|
||||
snprintf(tmp, sizeof(tmp), "texScale%d", i);
|
||||
binding->tex_scale_loc[i] =
|
||||
uniform_index(&binding->fragment->uniforms, tmp);
|
||||
}
|
||||
|
||||
/* lookup vertex shader uniforms */
|
||||
binding->vsh_constant_loc = uniform_index(&binding->vertex->uniforms, "c");
|
||||
binding->surface_size_loc =
|
||||
uniform_index(&binding->vertex->uniforms, "surfaceSize");
|
||||
binding->clip_range_loc =
|
||||
uniform_index(&binding->vertex->uniforms, "clipRange");
|
||||
binding->fog_param_loc =
|
||||
uniform_index(&binding->vertex->uniforms, "fogParam");
|
||||
|
||||
binding->inv_viewport_loc =
|
||||
uniform_index(&binding->vertex->uniforms, "invViewport");
|
||||
binding->ltctxa_loc = uniform_index(&binding->vertex->uniforms, "ltctxa");
|
||||
binding->ltctxb_loc = uniform_index(&binding->vertex->uniforms, "ltctxb");
|
||||
binding->ltc1_loc = uniform_index(&binding->vertex->uniforms, "ltc1");
|
||||
|
||||
for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
|
||||
snprintf(tmp, sizeof(tmp), "lightInfiniteHalfVector%d", i);
|
||||
binding->light_infinite_half_vector_loc[i] =
|
||||
uniform_index(&binding->vertex->uniforms, tmp);
|
||||
snprintf(tmp, sizeof(tmp), "lightInfiniteDirection%d", i);
|
||||
binding->light_infinite_direction_loc[i] =
|
||||
uniform_index(&binding->vertex->uniforms, tmp);
|
||||
|
||||
snprintf(tmp, sizeof(tmp), "lightLocalPosition%d", i);
|
||||
binding->light_local_position_loc[i] =
|
||||
uniform_index(&binding->vertex->uniforms, tmp);
|
||||
snprintf(tmp, sizeof(tmp), "lightLocalAttenuation%d", i);
|
||||
binding->light_local_attenuation_loc[i] =
|
||||
uniform_index(&binding->vertex->uniforms, tmp);
|
||||
}
|
||||
|
||||
binding->clip_region_loc =
|
||||
uniform_index(&binding->fragment->uniforms, "clipRegion");
|
||||
|
||||
binding->material_alpha_loc =
|
||||
uniform_index(&binding->vertex->uniforms, "material_alpha");
|
||||
}
|
||||
|
||||
static void shader_cache_entry_init(Lru *lru, LruNode *node, void *state)
|
||||
{
|
||||
ShaderBinding *snode = container_of(node, ShaderBinding, node);
|
||||
memcpy(&snode->state, state, sizeof(ShaderState));
|
||||
}
|
||||
|
||||
static void shader_cache_entry_post_evict(Lru *lru, LruNode *node)
|
||||
{
|
||||
PGRAPHVkState *r = container_of(lru, PGRAPHVkState, shader_cache);
|
||||
ShaderBinding *snode = container_of(node, ShaderBinding, node);
|
||||
|
||||
ShaderModuleInfo *modules[] = {
|
||||
snode->geometry,
|
||||
snode->vertex,
|
||||
snode->fragment,
|
||||
};
|
||||
for (int i = 0; i < ARRAY_SIZE(modules); i++) {
|
||||
if (modules[i]) {
|
||||
pgraph_vk_destroy_shader_module(r, modules[i]);
|
||||
}
|
||||
}
|
||||
|
||||
memset(&snode->state, 0, sizeof(ShaderState));
|
||||
}
|
||||
|
||||
static bool shader_cache_entry_compare(Lru *lru, LruNode *node, void *key)
|
||||
{
|
||||
ShaderBinding *snode = container_of(node, ShaderBinding, node);
|
||||
return memcmp(&snode->state, key, sizeof(ShaderState));
|
||||
}
|
||||
|
||||
static void shader_cache_init(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
const size_t shader_cache_size = 1024;
|
||||
lru_init(&r->shader_cache);
|
||||
r->shader_cache_entries = g_malloc_n(shader_cache_size, sizeof(ShaderBinding));
|
||||
assert(r->shader_cache_entries != NULL);
|
||||
for (int i = 0; i < shader_cache_size; i++) {
|
||||
lru_add_free(&r->shader_cache, &r->shader_cache_entries[i].node);
|
||||
}
|
||||
r->shader_cache.init_node = shader_cache_entry_init;
|
||||
r->shader_cache.compare_nodes = shader_cache_entry_compare;
|
||||
r->shader_cache.post_node_evict = shader_cache_entry_post_evict;
|
||||
}
|
||||
|
||||
static void shader_cache_finalize(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
lru_flush(&r->shader_cache);
|
||||
g_free(r->shader_cache_entries);
|
||||
r->shader_cache_entries = NULL;
|
||||
}
|
||||
|
||||
static ShaderBinding *gen_shaders(PGRAPHState *pg, ShaderState *state)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
uint64_t hash = fast_hash((void *)state, sizeof(*state));
|
||||
LruNode *node = lru_lookup(&r->shader_cache, hash, state);
|
||||
ShaderBinding *snode = container_of(node, ShaderBinding, node);
|
||||
|
||||
NV2A_VK_DPRINTF("shader state hash: %016lx, %p", hash, snode);
|
||||
|
||||
if (!snode->fragment) {
|
||||
NV2A_VK_DPRINTF("cache miss");
|
||||
nv2a_profile_inc_counter(NV2A_PROF_SHADER_GEN);
|
||||
|
||||
char *previous_numeric_locale = setlocale(LC_NUMERIC, NULL);
|
||||
if (previous_numeric_locale) {
|
||||
previous_numeric_locale = g_strdup(previous_numeric_locale);
|
||||
}
|
||||
|
||||
/* Ensure numeric values are printed with '.' radix, no grouping */
|
||||
setlocale(LC_NUMERIC, "C");
|
||||
|
||||
MString *geometry_shader_code = pgraph_gen_geom_glsl(
|
||||
state->polygon_front_mode, state->polygon_back_mode,
|
||||
state->primitive_mode, state->smooth_shading, true);
|
||||
if (geometry_shader_code) {
|
||||
NV2A_VK_DPRINTF("geometry shader: \n%s",
|
||||
mstring_get_str(geometry_shader_code));
|
||||
snode->geometry = pgraph_vk_create_shader_module_from_glsl(
|
||||
r, VK_SHADER_STAGE_GEOMETRY_BIT,
|
||||
mstring_get_str(geometry_shader_code));
|
||||
mstring_unref(geometry_shader_code);
|
||||
} else {
|
||||
memset(&snode->geometry, 0, sizeof(snode->geometry));
|
||||
}
|
||||
|
||||
MString *vertex_shader_code =
|
||||
pgraph_gen_vsh_glsl(state, geometry_shader_code != NULL);
|
||||
NV2A_VK_DPRINTF("vertex shader: \n%s",
|
||||
mstring_get_str(vertex_shader_code));
|
||||
snode->vertex = pgraph_vk_create_shader_module_from_glsl(
|
||||
r, VK_SHADER_STAGE_VERTEX_BIT,
|
||||
mstring_get_str(vertex_shader_code));
|
||||
mstring_unref(vertex_shader_code);
|
||||
|
||||
MString *fragment_shader_code = pgraph_gen_psh_glsl(state->psh);
|
||||
NV2A_VK_DPRINTF("fragment shader: \n%s",
|
||||
mstring_get_str(fragment_shader_code));
|
||||
snode->fragment = pgraph_vk_create_shader_module_from_glsl(
|
||||
r, VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||
mstring_get_str(fragment_shader_code));
|
||||
mstring_unref(fragment_shader_code);
|
||||
|
||||
if (previous_numeric_locale) {
|
||||
setlocale(LC_NUMERIC, previous_numeric_locale);
|
||||
g_free(previous_numeric_locale);
|
||||
}
|
||||
|
||||
update_shader_constant_locations(snode);
|
||||
}
|
||||
|
||||
return snode;
|
||||
}
|
||||
|
||||
// FIXME: Move to common
|
||||
static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding,
|
||||
bool binding_changed, bool vertex_program,
|
||||
bool fixed_function)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
/* update combiner constants */
|
||||
for (i = 0; i < 9; i++) {
|
||||
uint32_t constant[2];
|
||||
if (i == 8) {
|
||||
/* final combiner */
|
||||
constant[0] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR0);
|
||||
constant[1] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR1);
|
||||
} else {
|
||||
constant[0] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR0 + i * 4);
|
||||
constant[1] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR1 + i * 4);
|
||||
}
|
||||
|
||||
for (j = 0; j < 2; j++) {
|
||||
GLint loc = binding->psh_constant_loc[i][j];
|
||||
if (loc != -1) {
|
||||
float value[4];
|
||||
pgraph_argb_pack32_to_rgba_float(constant[j], value);
|
||||
uniform1fv(&binding->fragment->uniforms, loc, 4, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (binding->alpha_ref_loc != -1) {
|
||||
float alpha_ref = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0),
|
||||
NV_PGRAPH_CONTROL_0_ALPHAREF) /
|
||||
255.0;
|
||||
uniform1f(&binding->fragment->uniforms, binding->alpha_ref_loc,
|
||||
alpha_ref);
|
||||
}
|
||||
|
||||
|
||||
/* For each texture stage */
|
||||
for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
|
||||
int loc;
|
||||
|
||||
/* Bump luminance only during stages 1 - 3 */
|
||||
if (i > 0) {
|
||||
loc = binding->bump_mat_loc[i];
|
||||
if (loc != -1) {
|
||||
uint32_t m_u32[4];
|
||||
m_u32[0] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT00 + 4 * (i - 1));
|
||||
m_u32[1] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT01 + 4 * (i - 1));
|
||||
m_u32[2] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT10 + 4 * (i - 1));
|
||||
m_u32[3] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT11 + 4 * (i - 1));
|
||||
float m[4];
|
||||
m[0] = *(float*)&m_u32[0];
|
||||
m[1] = *(float*)&m_u32[1];
|
||||
m[2] = *(float*)&m_u32[2];
|
||||
m[3] = *(float*)&m_u32[3];
|
||||
uniformMatrix2fv(&binding->fragment->uniforms, loc, m);
|
||||
}
|
||||
loc = binding->bump_scale_loc[i];
|
||||
if (loc != -1) {
|
||||
uint32_t v =
|
||||
pgraph_reg_r(pg, NV_PGRAPH_BUMPSCALE1 + (i - 1) * 4);
|
||||
uniform1f(&binding->fragment->uniforms, loc,
|
||||
*(float *)&v);
|
||||
}
|
||||
loc = binding->bump_offset_loc[i];
|
||||
if (loc != -1) {
|
||||
uint32_t v =
|
||||
pgraph_reg_r(pg, NV_PGRAPH_BUMPOFFSET1 + (i - 1) * 4);
|
||||
uniform1f(&binding->fragment->uniforms, loc,
|
||||
*(float *)&v);
|
||||
}
|
||||
}
|
||||
|
||||
loc = binding->tex_scale_loc[i];
|
||||
if (loc != -1) {
|
||||
assert(pg->vk_renderer_state->texture_bindings[i] != NULL);
|
||||
float scale = pg->vk_renderer_state->texture_bindings[i]->key.scale;
|
||||
BasicColorFormatInfo f_basic = kelvin_color_format_info_map[pg->vk_renderer_state->texture_bindings[i]->key.state.color_format];
|
||||
if (!f_basic.linear) {
|
||||
scale = 1.0;
|
||||
}
|
||||
uniform1f(&binding->fragment->uniforms, loc, scale);
|
||||
}
|
||||
}
|
||||
|
||||
if (binding->fog_color_loc != -1) {
|
||||
uint32_t fog_color = pgraph_reg_r(pg, NV_PGRAPH_FOGCOLOR);
|
||||
uniform4f(&binding->fragment->uniforms, binding->fog_color_loc,
|
||||
GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_RED) / 255.0,
|
||||
GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_GREEN) / 255.0,
|
||||
GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_BLUE) / 255.0,
|
||||
GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_ALPHA) / 255.0);
|
||||
}
|
||||
if (binding->fog_param_loc != -1) {
|
||||
uint32_t v[2];
|
||||
v[0] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM0);
|
||||
v[1] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM1);
|
||||
uniform2f(&binding->vertex->uniforms,
|
||||
binding->fog_param_loc, *(float *)&v[0],
|
||||
*(float *)&v[1]);
|
||||
}
|
||||
|
||||
float zmax;
|
||||
switch (pg->surface_shape.zeta_format) {
|
||||
case NV097_SET_SURFACE_FORMAT_ZETA_Z16:
|
||||
zmax = pg->surface_shape.z_format ? f16_max : (float)0xFFFF;
|
||||
break;
|
||||
case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8:
|
||||
zmax = pg->surface_shape.z_format ? f24_max : (float)0xFFFFFF;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
if (fixed_function) {
|
||||
/* update lighting constants */
|
||||
struct {
|
||||
uint32_t *v;
|
||||
int locs;
|
||||
size_t len;
|
||||
} lighting_arrays[] = {
|
||||
{ &pg->ltctxa[0][0], binding->ltctxa_loc, NV2A_LTCTXA_COUNT },
|
||||
{ &pg->ltctxb[0][0], binding->ltctxb_loc, NV2A_LTCTXB_COUNT },
|
||||
{ &pg->ltc1[0][0], binding->ltc1_loc, NV2A_LTC1_COUNT },
|
||||
};
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(lighting_arrays); i++) {
|
||||
uniform1iv(
|
||||
&binding->vertex->uniforms, lighting_arrays[i].locs,
|
||||
lighting_arrays[i].len * 4, (void *)lighting_arrays[i].v);
|
||||
}
|
||||
|
||||
for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
|
||||
int loc = binding->light_infinite_half_vector_loc[i];
|
||||
if (loc != -1) {
|
||||
uniform1fv(&binding->vertex->uniforms, loc, 3,
|
||||
pg->light_infinite_half_vector[i]);
|
||||
}
|
||||
loc = binding->light_infinite_direction_loc[i];
|
||||
if (loc != -1) {
|
||||
uniform1fv(&binding->vertex->uniforms, loc, 3,
|
||||
pg->light_infinite_direction[i]);
|
||||
}
|
||||
|
||||
loc = binding->light_local_position_loc[i];
|
||||
if (loc != -1) {
|
||||
uniform1fv(&binding->vertex->uniforms, loc, 3,
|
||||
pg->light_local_position[i]);
|
||||
}
|
||||
loc = binding->light_local_attenuation_loc[i];
|
||||
if (loc != -1) {
|
||||
uniform1fv(&binding->vertex->uniforms, loc, 3,
|
||||
pg->light_local_attenuation[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/* estimate the viewport by assuming it matches the surface ... */
|
||||
unsigned int aa_width = 1, aa_height = 1;
|
||||
pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height);
|
||||
|
||||
float m11 = 0.5 * (pg->surface_binding_dim.width / aa_width);
|
||||
float m22 = -0.5 * (pg->surface_binding_dim.height / aa_height);
|
||||
float m33 = zmax;
|
||||
float m41 = *(float *)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][0];
|
||||
float m42 = *(float *)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][1];
|
||||
|
||||
float invViewport[16] = {
|
||||
1.0 / m11, 0, 0, 0, 0, 1.0 / m22, 0,
|
||||
0, 0, 0, 1.0 / m33, 0, -1.0 + m41 / m11, 1.0 + m42 / m22,
|
||||
0, 1.0
|
||||
};
|
||||
|
||||
if (binding->inv_viewport_loc != -1) {
|
||||
uniformMatrix4fv(&binding->vertex->uniforms,
|
||||
binding->inv_viewport_loc, &invViewport[0]);
|
||||
}
|
||||
}
|
||||
|
||||
/* update vertex program constants */
|
||||
uniform1iv(&binding->vertex->uniforms, binding->vsh_constant_loc,
|
||||
NV2A_VERTEXSHADER_CONSTANTS * 4, (void *)pg->vsh_constants);
|
||||
|
||||
if (binding->surface_size_loc != -1) {
|
||||
unsigned int aa_width = 1, aa_height = 1;
|
||||
pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height);
|
||||
uniform2f(&binding->vertex->uniforms, binding->surface_size_loc,
|
||||
pg->surface_binding_dim.width / aa_width,
|
||||
pg->surface_binding_dim.height / aa_height);
|
||||
}
|
||||
|
||||
if (binding->clip_range_loc != -1) {
|
||||
uint32_t v[2];
|
||||
v[0] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMIN);
|
||||
v[1] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMAX);
|
||||
float zclip_min = *(float *)&v[0] / zmax * 2.0 - 1.0;
|
||||
float zclip_max = *(float *)&v[1] / zmax * 2.0 - 1.0;
|
||||
uniform4f(&binding->vertex->uniforms, binding->clip_range_loc, 0,
|
||||
zmax, zclip_min, zclip_max);
|
||||
}
|
||||
|
||||
/* Clipping regions */
|
||||
unsigned int max_gl_width = pg->surface_binding_dim.width;
|
||||
unsigned int max_gl_height = pg->surface_binding_dim.height;
|
||||
pgraph_apply_scaling_factor(pg, &max_gl_width, &max_gl_height);
|
||||
|
||||
uint32_t clip_regions[8][4];
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
uint32_t x = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPX0 + i * 4);
|
||||
unsigned int x_min = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMIN);
|
||||
unsigned int x_max = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMAX) + 1;
|
||||
uint32_t y = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPY0 + i * 4);
|
||||
unsigned int y_min = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMIN);
|
||||
unsigned int y_max = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMAX) + 1;
|
||||
pgraph_apply_anti_aliasing_factor(pg, &x_min, &y_min);
|
||||
pgraph_apply_anti_aliasing_factor(pg, &x_max, &y_max);
|
||||
|
||||
pgraph_apply_scaling_factor(pg, &x_min, &y_min);
|
||||
pgraph_apply_scaling_factor(pg, &x_max, &y_max);
|
||||
|
||||
clip_regions[i][0] = x_min;
|
||||
clip_regions[i][1] = y_min;
|
||||
clip_regions[i][2] = x_max;
|
||||
clip_regions[i][3] = y_max;
|
||||
}
|
||||
uniform1iv(&binding->fragment->uniforms, binding->clip_region_loc,
|
||||
8 * 4, (void *)clip_regions);
|
||||
|
||||
if (binding->material_alpha_loc != -1) {
|
||||
uniform1f(&binding->vertex->uniforms, binding->material_alpha_loc,
|
||||
pg->material_alpha);
|
||||
}
|
||||
}
|
||||
|
||||
// Quickly check PGRAPH state to see if any registers have changed that
|
||||
// necessitate a full shader state inspection.
|
||||
static bool check_shaders_dirty(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
if (!r->shader_binding) {
|
||||
return true;
|
||||
}
|
||||
if (pg->program_data_dirty) {
|
||||
return true;
|
||||
}
|
||||
|
||||
int num_stages = pgraph_reg_r(pg, NV_PGRAPH_COMBINECTL) & 0xFF;
|
||||
for (int i = 0; i < num_stages; i++) {
|
||||
if (pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINEALPHAI0 + i * 4) ||
|
||||
pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINEALPHAO0 + i * 4) ||
|
||||
pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINECOLORI0 + i * 4) ||
|
||||
pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINECOLORO0 + i * 4)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
unsigned int regs[] = {
|
||||
NV_PGRAPH_COMBINECTL,
|
||||
NV_PGRAPH_COMBINESPECFOG0,
|
||||
NV_PGRAPH_COMBINESPECFOG1,
|
||||
NV_PGRAPH_CSV0_C,
|
||||
NV_PGRAPH_CSV0_D,
|
||||
NV_PGRAPH_CSV1_A,
|
||||
NV_PGRAPH_CSV1_B,
|
||||
NV_PGRAPH_POINTSIZE,
|
||||
NV_PGRAPH_SHADERCLIPMODE,
|
||||
NV_PGRAPH_SHADERCTL,
|
||||
NV_PGRAPH_SHADERPROG,
|
||||
NV_PGRAPH_SHADOWCTL,
|
||||
};
|
||||
for (int i = 0; i < ARRAY_SIZE(regs); i++) {
|
||||
if (pgraph_is_reg_dirty(pg, regs[i])) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
ShaderState *state = &r->shader_binding->state;
|
||||
if (pg->uniform_attrs != state->uniform_attrs ||
|
||||
pg->swizzle_attrs != state->swizzle_attrs ||
|
||||
pg->compressed_attrs != state->compressed_attrs ||
|
||||
pg->primitive_mode != state->primitive_mode ||
|
||||
pg->surface_scale_factor != state->surface_scale_factor) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Textures
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (pg->texture_matrix_enable[i] != pg->vk_renderer_state->shader_binding->state.texture_matrix_enable[i] ||
|
||||
pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXCTL0_0 + i * 4) ||
|
||||
pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXFILTER0 + i * 4) ||
|
||||
pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXFMT0 + i * 4)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND_NOTDIRTY);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void pgraph_vk_bind_shaders(PGRAPHState *pg)
|
||||
{
|
||||
NV2A_VK_DGROUP_BEGIN("%s", __func__);
|
||||
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
r->shader_bindings_changed = false;
|
||||
|
||||
if (check_shaders_dirty(pg)) {
|
||||
ShaderState new_state;
|
||||
memset(&new_state, 0, sizeof(ShaderState));
|
||||
new_state = pgraph_get_shader_state(pg);
|
||||
if (!r->shader_binding || memcmp(&r->shader_binding->state, &new_state, sizeof(ShaderState))) {
|
||||
r->shader_binding = gen_shaders(pg, &new_state);
|
||||
r->shader_bindings_changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME: Use dirty bits
|
||||
pgraph_vk_update_shader_uniforms(pg);
|
||||
|
||||
NV2A_VK_DGROUP_END();
|
||||
}
|
||||
|
||||
void pgraph_vk_update_shader_uniforms(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
NV2A_VK_DGROUP_BEGIN("%s", __func__);
|
||||
nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND);
|
||||
|
||||
assert(r->shader_binding);
|
||||
ShaderBinding *binding = r->shader_binding;
|
||||
ShaderUniformLayout *layouts[] = { &binding->vertex->uniforms,
|
||||
&binding->fragment->uniforms };
|
||||
shader_update_constants(pg, r->shader_binding, true,
|
||||
r->shader_binding->state.vertex_program,
|
||||
r->shader_binding->state.fixed_function);
|
||||
|
||||
for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
|
||||
uint64_t hash = fast_hash(layouts[i]->allocation, layouts[i]->total_size);
|
||||
r->uniforms_changed |= (hash != r->uniform_buffer_hashes[i]);
|
||||
r->uniform_buffer_hashes[i] = hash;
|
||||
}
|
||||
|
||||
nv2a_profile_inc_counter(r->uniforms_changed ?
|
||||
NV2A_PROF_SHADER_UBO_DIRTY :
|
||||
NV2A_PROF_SHADER_UBO_NOTDIRTY);
|
||||
|
||||
NV2A_VK_DGROUP_END();
|
||||
}
|
||||
|
||||
void pgraph_vk_init_shaders(PGRAPHState *pg)
|
||||
{
|
||||
pgraph_vk_init_glsl_compiler();
|
||||
create_descriptor_pool(pg);
|
||||
create_descriptor_set_layout(pg);
|
||||
create_descriptor_sets(pg);
|
||||
shader_cache_init(pg);
|
||||
}
|
||||
|
||||
void pgraph_vk_finalize_shaders(PGRAPHState *pg)
|
||||
{
|
||||
shader_cache_finalize(pg);
|
||||
destroy_descriptor_sets(pg);
|
||||
destroy_descriptor_set_layout(pg);
|
||||
destroy_descriptor_pool(pg);
|
||||
pgraph_vk_finalize_glsl_compiler();
|
||||
}
|
|
@ -0,0 +1,473 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH Vulkan Renderer
|
||||
*
|
||||
* Copyright (c) 2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "hw/xbox/nv2a/pgraph/pgraph.h"
|
||||
#include "renderer.h"
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
// TODO: Swizzle/Unswizzle
|
||||
// TODO: Float depth format (low priority, but would be better for accuracy)
|
||||
|
||||
// FIXME: Below pipeline creation assumes identical 3 buffer setup. For
|
||||
// swizzle shader we will need more flexibility.
|
||||
|
||||
const char *pack_d24_unorm_s8_uint_to_z24s8_glsl =
|
||||
"#version 450\n"
|
||||
"layout(local_size_x = 256) in;\n"
|
||||
"layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n"
|
||||
"layout(binding = 0) buffer DepthIn { uint depth_in[]; };\n"
|
||||
"layout(binding = 1) buffer StencilIn { uint stencil_in[]; };\n"
|
||||
"layout(binding = 2) buffer DepthStencilOut { uint depth_stencil_out[]; };\n"
|
||||
"uint get_input_idx(uint idx_out) {\n"
|
||||
" uint scale = width_in / width_out;"
|
||||
" uint y = (idx_out / width_out) * scale;\n"
|
||||
" uint x = (idx_out % width_out) * scale;\n"
|
||||
" return y * width_in + x;\n"
|
||||
"}\n"
|
||||
"void main() {\n"
|
||||
" uint idx_out = gl_GlobalInvocationID.x;\n"
|
||||
" uint idx_in = get_input_idx(idx_out);\n"
|
||||
" uint depth_value = depth_in[idx_in];\n"
|
||||
" uint stencil_value = (stencil_in[idx_in / 4] >> ((idx_in % 4) * 8)) & 0xff;\n"
|
||||
" depth_stencil_out[idx_out] = depth_value << 8 | stencil_value;\n"
|
||||
"}\n";
|
||||
|
||||
const char *unpack_z24s8_to_d24_unorm_s8_uint_glsl =
|
||||
"#version 450\n"
|
||||
"layout(local_size_x = 256) in;\n"
|
||||
"layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n"
|
||||
"layout(binding = 0) buffer DepthOut { uint depth_out[]; };\n"
|
||||
"layout(binding = 1) buffer StencilOut { uint stencil_out[]; };\n"
|
||||
"layout(binding = 2) buffer DepthStencilIn { uint depth_stencil_in[]; };\n"
|
||||
"uint get_input_idx(uint idx_out) {\n"
|
||||
" uint scale = width_out / width_in;"
|
||||
" uint y = (idx_out / width_out) / scale;\n"
|
||||
" uint x = (idx_out % width_out) / scale;\n"
|
||||
" return y * width_in + x;\n"
|
||||
"}\n"
|
||||
"void main() {\n"
|
||||
" uint idx_out = gl_GlobalInvocationID.x;\n"
|
||||
" uint idx_in = get_input_idx(idx_out);\n"
|
||||
" depth_out[idx_out] = depth_stencil_in[idx_in] >> 8;\n"
|
||||
" if (idx_out % 4 == 0) {\n"
|
||||
" uint stencil_value = 0;\n"
|
||||
" for (int i = 0; i < 4; i++) {\n" // Include next 3 pixels
|
||||
" uint v = depth_stencil_in[get_input_idx(idx_out + i)] & 0xff;\n"
|
||||
" stencil_value |= v << (i * 8);\n"
|
||||
" }\n"
|
||||
" stencil_out[idx_out / 4] = stencil_value;\n"
|
||||
" }\n"
|
||||
"}\n";
|
||||
|
||||
const char *pack_d32_sfloat_s8_uint_to_z24s8_glsl =
|
||||
"#version 450\n"
|
||||
"layout(local_size_x = 256) in;\n"
|
||||
"layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n"
|
||||
"layout(binding = 0) buffer DepthIn { float depth_in[]; };\n"
|
||||
"layout(binding = 1) buffer StencilIn { uint stencil_in[]; };\n"
|
||||
"layout(binding = 2) buffer DepthStencilOut { uint depth_stencil_out[]; };\n"
|
||||
"uint get_input_idx(uint idx_out) {\n"
|
||||
" uint y = idx_out / width_out;\n"
|
||||
" uint x = idx_out % width_out;\n"
|
||||
" return (y * width_in + x) * (width_in / width_out);\n"
|
||||
"}\n"
|
||||
"void main() {\n"
|
||||
" uint idx_out = gl_GlobalInvocationID.x;\n"
|
||||
" uint idx_in = get_input_idx(idx_out);\n"
|
||||
" uint depth_value = int(depth_in[idx_in] * float(0xffffff));\n"
|
||||
" uint stencil_value = (stencil_in[idx_in / 4] >> ((idx_in % 4) * 8)) & 0xff;\n"
|
||||
" depth_stencil_out[idx_out] = depth_value << 8 | stencil_value;\n"
|
||||
"}\n";
|
||||
|
||||
const char *unpack_z24s8_to_d32_sfloat_s8_uint_glsl =
|
||||
"#version 450\n"
|
||||
"layout(local_size_x = 256) in;\n"
|
||||
"layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n"
|
||||
"layout(binding = 0) buffer DepthOut { float depth_out[]; };\n"
|
||||
"layout(binding = 1) buffer StencilOut { uint stencil_out[]; };\n"
|
||||
"layout(binding = 2) buffer DepthStencilIn { uint depth_stencil_in[]; };\n"
|
||||
"uint get_input_idx(uint idx_out) {\n"
|
||||
" uint scale = width_out / width_in;"
|
||||
" uint y = (idx_out / width_out) / scale;\n"
|
||||
" uint x = (idx_out % width_out) / scale;\n"
|
||||
" return y * width_in + x;\n"
|
||||
"}\n"
|
||||
"void main() {\n"
|
||||
" uint idx_out = gl_GlobalInvocationID.x;\n"
|
||||
" uint idx_in = get_input_idx(idx_out);\n"
|
||||
" depth_out[idx_out] = float(depth_stencil_in[idx_in] >> 8) / float(0xffffff);\n"
|
||||
" if (idx_out % 4 == 0) {\n"
|
||||
" uint stencil_value = 0;\n"
|
||||
" for (int i = 0; i < 4; i++) {\n" // Include next 3 pixels
|
||||
" uint v = depth_stencil_in[get_input_idx(idx_out + i)] & 0xff;\n"
|
||||
" stencil_value |= v << (i * 8);\n"
|
||||
" }\n"
|
||||
" stencil_out[idx_out / 4] = stencil_value;\n"
|
||||
" }\n"
|
||||
"}\n";
|
||||
|
||||
static void create_descriptor_pool(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
VkDescriptorPoolSize pool_sizes[] = {
|
||||
{
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.descriptorCount = 3,
|
||||
},
|
||||
};
|
||||
|
||||
VkDescriptorPoolCreateInfo pool_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
|
||||
.poolSizeCount = ARRAY_SIZE(pool_sizes),
|
||||
.pPoolSizes = pool_sizes,
|
||||
.maxSets = ARRAY_SIZE(r->compute.descriptor_sets),
|
||||
.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
|
||||
};
|
||||
VK_CHECK(vkCreateDescriptorPool(r->device, &pool_info, NULL,
|
||||
&r->compute.descriptor_pool));
|
||||
}
|
||||
|
||||
static void destroy_descriptor_pool(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
vkDestroyDescriptorPool(r->device, r->compute.descriptor_pool, NULL);
|
||||
r->compute.descriptor_pool = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
static void create_descriptor_set_layout(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
const int num_buffers = 3;
|
||||
|
||||
VkDescriptorSetLayoutBinding bindings[num_buffers];
|
||||
for (int i = 0; i < num_buffers; i++) {
|
||||
bindings[i] = (VkDescriptorSetLayoutBinding){
|
||||
.binding = i,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
};
|
||||
}
|
||||
VkDescriptorSetLayoutCreateInfo layout_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||
.bindingCount = ARRAY_SIZE(bindings),
|
||||
.pBindings = bindings,
|
||||
};
|
||||
VK_CHECK(vkCreateDescriptorSetLayout(r->device, &layout_info, NULL,
|
||||
&r->compute.descriptor_set_layout));
|
||||
}
|
||||
|
||||
static void destroy_descriptor_set_layout(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
vkDestroyDescriptorSetLayout(r->device, r->compute.descriptor_set_layout,
|
||||
NULL);
|
||||
r->compute.descriptor_set_layout = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
static void create_descriptor_sets(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
VkDescriptorSetLayout layouts[ARRAY_SIZE(r->descriptor_sets)];
|
||||
for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
|
||||
layouts[i] = r->compute.descriptor_set_layout;
|
||||
}
|
||||
VkDescriptorSetAllocateInfo alloc_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
|
||||
.descriptorPool = r->compute.descriptor_pool,
|
||||
.descriptorSetCount = ARRAY_SIZE(r->compute.descriptor_sets),
|
||||
.pSetLayouts = layouts,
|
||||
};
|
||||
VK_CHECK(vkAllocateDescriptorSets(r->device, &alloc_info,
|
||||
r->compute.descriptor_sets));
|
||||
}
|
||||
|
||||
static void destroy_descriptor_sets(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
vkFreeDescriptorSets(r->device, r->compute.descriptor_pool,
|
||||
ARRAY_SIZE(r->compute.descriptor_sets),
|
||||
r->compute.descriptor_sets);
|
||||
for (int i = 0; i < ARRAY_SIZE(r->compute.descriptor_sets); i++) {
|
||||
r->compute.descriptor_sets[i] = VK_NULL_HANDLE;
|
||||
}
|
||||
}
|
||||
|
||||
static void create_compute_pipeline_layout(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
VkPushConstantRange push_constant_range = {
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.size = 2 * sizeof(uint32_t),
|
||||
};
|
||||
VkPipelineLayoutCreateInfo pipeline_layout_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
.setLayoutCount = 1,
|
||||
.pSetLayouts = &r->compute.descriptor_set_layout,
|
||||
.pushConstantRangeCount = 1,
|
||||
.pPushConstantRanges = &push_constant_range,
|
||||
};
|
||||
VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL,
|
||||
&r->compute.pipeline_layout));
|
||||
}
|
||||
|
||||
static VkPipeline create_compute_pipeline(PGRAPHState *pg, const char *glsl)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
ShaderModuleInfo *module = pgraph_vk_create_shader_module_from_glsl(
|
||||
r, VK_SHADER_STAGE_COMPUTE_BIT, glsl);
|
||||
|
||||
VkComputePipelineCreateInfo pipeline_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
.layout = r->compute.pipeline_layout,
|
||||
.stage =
|
||||
(VkPipelineShaderStageCreateInfo){
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pName = "main",
|
||||
.module = module->module,
|
||||
},
|
||||
};
|
||||
VkPipeline pipeline;
|
||||
VK_CHECK(vkCreateComputePipelines(r->device, r->vk_pipeline_cache, 1,
|
||||
&pipeline_info, NULL,
|
||||
&pipeline));
|
||||
|
||||
pgraph_vk_destroy_shader_module(r, module);
|
||||
|
||||
return pipeline;
|
||||
}
|
||||
|
||||
static void update_descriptor_sets(PGRAPHState *pg,
|
||||
VkDescriptorBufferInfo *buffers, int count)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
assert(count == 3);
|
||||
VkWriteDescriptorSet descriptor_writes[3];
|
||||
const int descriptor_set_index = 0;
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
descriptor_writes[i] = (VkWriteDescriptorSet){
|
||||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
.dstSet = r->compute.descriptor_sets[descriptor_set_index],
|
||||
.dstBinding = i,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.descriptorCount = 1,
|
||||
.pBufferInfo = &buffers[i],
|
||||
};
|
||||
}
|
||||
vkUpdateDescriptorSets(r->device, count, descriptor_writes, 0, NULL);
|
||||
}
|
||||
|
||||
//
|
||||
// Pack depth+stencil into NV097_SET_SURFACE_FORMAT_ZETA_Z24S8
|
||||
// formatted buffer with depth in bits 31-8 and stencil in bits 7-0.
|
||||
//
|
||||
void pgraph_vk_pack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface,
|
||||
VkCommandBuffer cmd, VkBuffer src,
|
||||
VkBuffer dst, bool downscale)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
unsigned int input_width = surface->width, input_height = surface->height;
|
||||
pgraph_apply_scaling_factor(pg, &input_width, &input_height);
|
||||
|
||||
unsigned int output_width = surface->width, output_height = surface->height;
|
||||
if (!downscale) {
|
||||
pgraph_apply_scaling_factor(pg, &output_width, &output_height);
|
||||
}
|
||||
|
||||
size_t depth_bytes_per_pixel = 4;
|
||||
size_t depth_size = input_width * input_height * depth_bytes_per_pixel;
|
||||
|
||||
size_t stencil_bytes_per_pixel = 1;
|
||||
size_t stencil_size = input_width * input_height * stencil_bytes_per_pixel;
|
||||
|
||||
size_t output_bytes_per_pixel = 4;
|
||||
size_t output_size = output_width * output_height * output_bytes_per_pixel;
|
||||
|
||||
VkDescriptorBufferInfo buffers[] = {
|
||||
{
|
||||
.buffer = src,
|
||||
.offset = 0,
|
||||
.range = depth_size,
|
||||
},
|
||||
{
|
||||
.buffer = src,
|
||||
.offset = depth_size,
|
||||
.range = stencil_size,
|
||||
},
|
||||
{
|
||||
.buffer = dst,
|
||||
.offset = 0,
|
||||
.range = output_size,
|
||||
},
|
||||
};
|
||||
update_descriptor_sets(pg, buffers, ARRAY_SIZE(buffers));
|
||||
|
||||
if (surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
|
||||
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
r->compute.pipeline_pack_d24s8);
|
||||
} else if (surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
|
||||
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
r->compute.pipeline_pack_f32s8);
|
||||
} else {
|
||||
assert(!"Unsupported pack format");
|
||||
}
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
r->compute.pipeline_layout, 0, 1,
|
||||
&r->compute.descriptor_sets[0], 0, NULL);
|
||||
|
||||
uint32_t push_constants[2] = { input_width, output_width };
|
||||
assert(sizeof(push_constants) == 8);
|
||||
vkCmdPushConstants(cmd, r->compute.pipeline_layout,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
|
||||
push_constants);
|
||||
|
||||
size_t workgroup_size_in_units = 256;
|
||||
size_t output_size_in_units = output_width * output_height;
|
||||
assert(output_size_in_units % workgroup_size_in_units == 0);
|
||||
size_t group_count = output_size_in_units / workgroup_size_in_units;
|
||||
|
||||
// FIXME: Check max group count
|
||||
|
||||
vkCmdDispatch(cmd, group_count, 1, 1);
|
||||
}
|
||||
|
||||
void pgraph_vk_unpack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface,
|
||||
VkCommandBuffer cmd, VkBuffer src,
|
||||
VkBuffer dst)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
unsigned int input_width = surface->width, input_height = surface->height;
|
||||
|
||||
unsigned int output_width = surface->width, output_height = surface->height;
|
||||
pgraph_apply_scaling_factor(pg, &output_width, &output_height);
|
||||
|
||||
size_t depth_bytes_per_pixel = 4;
|
||||
size_t depth_size = output_width * output_height * depth_bytes_per_pixel;
|
||||
|
||||
size_t stencil_bytes_per_pixel = 1;
|
||||
size_t stencil_size = output_width * output_height * stencil_bytes_per_pixel;
|
||||
|
||||
size_t input_bytes_per_pixel = 4;
|
||||
size_t input_size = input_width * input_height * input_bytes_per_pixel;
|
||||
|
||||
VkDescriptorBufferInfo buffers[] = {
|
||||
{
|
||||
.buffer = dst,
|
||||
.offset = 0,
|
||||
.range = depth_size,
|
||||
},
|
||||
{
|
||||
.buffer = dst,
|
||||
.offset = depth_size,
|
||||
.range = stencil_size,
|
||||
},
|
||||
{
|
||||
.buffer = src,
|
||||
.offset = 0,
|
||||
.range = input_size,
|
||||
},
|
||||
};
|
||||
update_descriptor_sets(pg, buffers, ARRAY_SIZE(buffers));
|
||||
|
||||
if (surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
|
||||
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
r->compute.pipeline_unpack_d24s8);
|
||||
} else if (surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
|
||||
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
r->compute.pipeline_unpack_f32s8);
|
||||
} else {
|
||||
assert(!"Unsupported pack format");
|
||||
}
|
||||
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
r->compute.pipeline_layout, 0, 1,
|
||||
&r->compute.descriptor_sets[0], 0, NULL);
|
||||
|
||||
assert(output_width >= input_width);
|
||||
uint32_t push_constants[2] = { input_width, output_width };
|
||||
assert(sizeof(push_constants) == 8);
|
||||
vkCmdPushConstants(cmd, r->compute.pipeline_layout,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
|
||||
push_constants);
|
||||
|
||||
size_t workgroup_size_in_units = 256;
|
||||
size_t output_size_in_units = output_width * output_height;
|
||||
assert(output_size_in_units % workgroup_size_in_units == 0);
|
||||
size_t group_count = output_size_in_units / workgroup_size_in_units;
|
||||
|
||||
// FIXME: Check max group count
|
||||
|
||||
vkCmdDispatch(cmd, group_count, 1, 1);
|
||||
}
|
||||
|
||||
void pgraph_vk_init_compute(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
create_descriptor_pool(pg);
|
||||
create_descriptor_set_layout(pg);
|
||||
create_descriptor_sets(pg);
|
||||
create_compute_pipeline_layout(pg);
|
||||
|
||||
r->compute.pipeline_pack_d24s8 =
|
||||
create_compute_pipeline(pg, pack_d24_unorm_s8_uint_to_z24s8_glsl);
|
||||
r->compute.pipeline_unpack_d24s8 =
|
||||
create_compute_pipeline(pg, unpack_z24s8_to_d24_unorm_s8_uint_glsl);
|
||||
r->compute.pipeline_pack_f32s8 =
|
||||
create_compute_pipeline(pg, pack_d32_sfloat_s8_uint_to_z24s8_glsl);
|
||||
r->compute.pipeline_unpack_f32s8 =
|
||||
create_compute_pipeline(pg, unpack_z24s8_to_d32_sfloat_s8_uint_glsl);
|
||||
}
|
||||
|
||||
void pgraph_vk_finalize_compute(PGRAPHState *pg)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
VkPipeline *pipelines[] = {
|
||||
&r->compute.pipeline_pack_d24s8,
|
||||
&r->compute.pipeline_unpack_d24s8,
|
||||
&r->compute.pipeline_pack_f32s8,
|
||||
&r->compute.pipeline_unpack_f32s8,
|
||||
};
|
||||
|
||||
for (int i = 0; i < ARRAY_SIZE(pipelines); i++) {
|
||||
vkDestroyPipeline(r->device, *pipelines[i], NULL);
|
||||
pipelines[i] = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
vkDestroyPipelineLayout(r->device, r->compute.pipeline_layout, NULL);
|
||||
r->compute.pipeline_layout = VK_NULL_HANDLE;
|
||||
|
||||
destroy_descriptor_sets(pg);
|
||||
destroy_descriptor_set_layout(pg);
|
||||
destroy_descriptor_pool(pg);
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,312 @@
|
|||
/*
|
||||
* Geforce NV2A PGRAPH Vulkan Renderer
|
||||
*
|
||||
* Copyright (c) 2024 Matt Borgerson
|
||||
*
|
||||
* Based on GL implementation:
|
||||
*
|
||||
* Copyright (c) 2012 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
* Copyright (c) 2018-2024 Matt Borgerson
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "renderer.h"
|
||||
|
||||
VkDeviceSize pgraph_vk_update_index_buffer(PGRAPHState *pg, void *data,
|
||||
VkDeviceSize size)
|
||||
{
|
||||
nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_2);
|
||||
return pgraph_vk_append_to_buffer(pg, BUFFER_INDEX_STAGING, &data, &size, 1,
|
||||
1);
|
||||
}
|
||||
|
||||
VkDeviceSize pgraph_vk_update_vertex_inline_buffer(PGRAPHState *pg, void **data,
|
||||
VkDeviceSize *sizes,
|
||||
size_t count)
|
||||
{
|
||||
nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_3);
|
||||
return pgraph_vk_append_to_buffer(pg, BUFFER_VERTEX_INLINE_STAGING, data,
|
||||
sizes, count, 1);
|
||||
}
|
||||
|
||||
void pgraph_vk_update_vertex_ram_buffer(PGRAPHState *pg, hwaddr offset,
|
||||
void *data, VkDeviceSize size)
|
||||
{
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
size_t offset_bit = offset / 4096;
|
||||
size_t nbits = size / 4096;
|
||||
if (find_next_bit(r->uploaded_bitmap, nbits, offset_bit) < nbits) {
|
||||
// Vertex data changed while building the draw list. Finish drawing
|
||||
// before updating RAM buffer.
|
||||
pgraph_vk_finish(pg, VK_FINISH_REASON_VERTEX_BUFFER_DIRTY);
|
||||
}
|
||||
|
||||
nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_1);
|
||||
memcpy(r->storage_buffers[BUFFER_VERTEX_RAM].mapped + offset, data, size);
|
||||
|
||||
bitmap_set(r->uploaded_bitmap, offset_bit, nbits);
|
||||
}
|
||||
|
||||
static void update_memory_buffer(NV2AState *d, hwaddr addr, hwaddr size)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
assert(r->num_vertex_ram_buffer_syncs <
|
||||
ARRAY_SIZE(r->vertex_ram_buffer_syncs));
|
||||
r->vertex_ram_buffer_syncs[r->num_vertex_ram_buffer_syncs++] =
|
||||
(MemorySyncRequirement){ .addr = addr, .size = size };
|
||||
}
|
||||
|
||||
static const VkFormat float_to_count[] = {
|
||||
VK_FORMAT_R32_SFLOAT,
|
||||
VK_FORMAT_R32G32_SFLOAT,
|
||||
VK_FORMAT_R32G32B32_SFLOAT,
|
||||
VK_FORMAT_R32G32B32A32_SFLOAT,
|
||||
};
|
||||
|
||||
static const VkFormat ub_to_count[] = {
|
||||
VK_FORMAT_R8_UNORM,
|
||||
VK_FORMAT_R8G8_UNORM,
|
||||
VK_FORMAT_R8G8B8_UNORM,
|
||||
VK_FORMAT_R8G8B8A8_UNORM,
|
||||
};
|
||||
|
||||
static const VkFormat s1_to_count[] = {
|
||||
VK_FORMAT_R16_SNORM,
|
||||
VK_FORMAT_R16G16_SNORM,
|
||||
VK_FORMAT_R16G16B16_SNORM,
|
||||
VK_FORMAT_R16G16B16A16_SNORM,
|
||||
};
|
||||
|
||||
static const VkFormat s32k_to_count[] = {
|
||||
VK_FORMAT_R16_SSCALED,
|
||||
VK_FORMAT_R16G16_SSCALED,
|
||||
VK_FORMAT_R16G16B16_SSCALED,
|
||||
VK_FORMAT_R16G16B16A16_SSCALED,
|
||||
};
|
||||
|
||||
static char const * const vertex_data_array_format_to_str[] = {
|
||||
[NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D] = "UB_D3D",
|
||||
[NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL] = "UB_OGL",
|
||||
[NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1] = "S1",
|
||||
[NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F] = "F",
|
||||
[NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K] = "S32K",
|
||||
[NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP] = "CMP",
|
||||
};
|
||||
|
||||
void pgraph_vk_bind_vertex_attributes(NV2AState *d, unsigned int min_element,
|
||||
unsigned int max_element,
|
||||
bool inline_data,
|
||||
unsigned int inline_stride,
|
||||
unsigned int provoking_element)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
unsigned int num_elements = max_element - min_element + 1;
|
||||
|
||||
if (inline_data) {
|
||||
NV2A_VK_DGROUP_BEGIN("%s (num_elements: %d inline stride: %d)",
|
||||
__func__, num_elements, inline_stride);
|
||||
} else {
|
||||
NV2A_VK_DGROUP_BEGIN("%s (num_elements: %d)", __func__, num_elements);
|
||||
}
|
||||
|
||||
pg->compressed_attrs = 0;
|
||||
pg->uniform_attrs = 0;
|
||||
pg->swizzle_attrs = 0;
|
||||
|
||||
r->num_active_vertex_attribute_descriptions = 0;
|
||||
r->num_active_vertex_binding_descriptions = 0;
|
||||
|
||||
for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
|
||||
VertexAttribute *attr = &pg->vertex_attributes[i];
|
||||
NV2A_VK_DGROUP_BEGIN("[attr %02d] format=%s, count=%d, stride=%d", i,
|
||||
vertex_data_array_format_to_str[attr->format],
|
||||
attr->count, attr->stride);
|
||||
r->vertex_attribute_to_description_location[i] = -1;
|
||||
if (!attr->count) {
|
||||
pg->uniform_attrs |= 1 << i;
|
||||
NV2A_VK_DPRINTF("inline_value = {%f, %f, %f, %f}",
|
||||
attr->inline_value[0], attr->inline_value[1],
|
||||
attr->inline_value[2], attr->inline_value[3]);
|
||||
NV2A_VK_DGROUP_END();
|
||||
continue;
|
||||
}
|
||||
|
||||
VkFormat vk_format;
|
||||
bool needs_conversion = false;
|
||||
bool d3d_swizzle = false;
|
||||
|
||||
switch (attr->format) {
|
||||
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D:
|
||||
assert(attr->count == 4);
|
||||
d3d_swizzle = true;
|
||||
/* fallthru */
|
||||
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL:
|
||||
assert(attr->count <= ARRAY_SIZE(ub_to_count));
|
||||
vk_format = ub_to_count[attr->count - 1];
|
||||
break;
|
||||
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1:
|
||||
assert(attr->count <= ARRAY_SIZE(s1_to_count));
|
||||
vk_format = s1_to_count[attr->count - 1];
|
||||
break;
|
||||
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F:
|
||||
assert(attr->count <= ARRAY_SIZE(float_to_count));
|
||||
vk_format = float_to_count[attr->count - 1];
|
||||
break;
|
||||
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K:
|
||||
assert(attr->count <= ARRAY_SIZE(s32k_to_count));
|
||||
vk_format = s32k_to_count[attr->count - 1];
|
||||
break;
|
||||
case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP:
|
||||
vk_format =
|
||||
VK_FORMAT_R32_SINT; // VK_FORMAT_B10G11R11_UFLOAT_PACK32 ??
|
||||
/* 3 signed, normalized components packed in 32-bits. (11,11,10) */
|
||||
assert(attr->count == 1);
|
||||
needs_conversion = true;
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Unknown vertex type: 0x%x\n", attr->format);
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
|
||||
nv2a_profile_inc_counter(NV2A_PROF_ATTR_BIND);
|
||||
hwaddr attrib_data_addr;
|
||||
size_t stride;
|
||||
|
||||
if (needs_conversion) {
|
||||
pg->compressed_attrs |= (1 << i);
|
||||
}
|
||||
if (d3d_swizzle) {
|
||||
pg->swizzle_attrs |= (1 << i);
|
||||
}
|
||||
|
||||
hwaddr start = 0;
|
||||
if (inline_data) {
|
||||
attrib_data_addr = attr->inline_array_offset;
|
||||
stride = inline_stride;
|
||||
} else {
|
||||
hwaddr dma_len;
|
||||
uint8_t *attr_data = (uint8_t *)nv_dma_map(
|
||||
d, attr->dma_select ? pg->dma_vertex_b : pg->dma_vertex_a,
|
||||
&dma_len);
|
||||
assert(attr->offset < dma_len);
|
||||
attrib_data_addr = attr_data + attr->offset - d->vram_ptr;
|
||||
stride = attr->stride;
|
||||
start = attrib_data_addr + min_element * stride;
|
||||
update_memory_buffer(d, start, num_elements * stride);
|
||||
}
|
||||
|
||||
uint32_t provoking_element_index = provoking_element - min_element;
|
||||
size_t element_size = attr->size * attr->count;
|
||||
assert(element_size <= sizeof(attr->inline_value));
|
||||
const uint8_t *last_entry;
|
||||
|
||||
if (inline_data) {
|
||||
last_entry =
|
||||
(uint8_t *)pg->inline_array + attr->inline_array_offset;
|
||||
} else {
|
||||
last_entry = d->vram_ptr + start;
|
||||
}
|
||||
if (!stride) {
|
||||
// Stride of 0 indicates that only the first element should be
|
||||
// used.
|
||||
pg->uniform_attrs |= 1 << i;
|
||||
pgraph_update_inline_value(attr, last_entry);
|
||||
NV2A_VK_DPRINTF("inline_value = {%f, %f, %f, %f}",
|
||||
attr->inline_value[0], attr->inline_value[1],
|
||||
attr->inline_value[2], attr->inline_value[3]);
|
||||
NV2A_VK_DGROUP_END();
|
||||
continue;
|
||||
}
|
||||
|
||||
NV2A_VK_DPRINTF("offset = %08" HWADDR_PRIx, attrib_data_addr);
|
||||
last_entry += stride * provoking_element_index;
|
||||
pgraph_update_inline_value(attr, last_entry);
|
||||
|
||||
r->vertex_attribute_to_description_location[i] =
|
||||
r->num_active_vertex_binding_descriptions;
|
||||
|
||||
r->vertex_binding_descriptions
|
||||
[r->num_active_vertex_binding_descriptions++] =
|
||||
(VkVertexInputBindingDescription){
|
||||
.binding = r->vertex_attribute_to_description_location[i],
|
||||
.stride = stride,
|
||||
.inputRate = VK_VERTEX_INPUT_RATE_VERTEX,
|
||||
};
|
||||
|
||||
r->vertex_attribute_descriptions
|
||||
[r->num_active_vertex_attribute_descriptions++] =
|
||||
(VkVertexInputAttributeDescription){
|
||||
.binding = r->vertex_attribute_to_description_location[i],
|
||||
.location = i,
|
||||
.format = vk_format,
|
||||
};
|
||||
|
||||
r->vertex_attribute_offsets[i] = attrib_data_addr;
|
||||
|
||||
NV2A_VK_DGROUP_END();
|
||||
}
|
||||
|
||||
NV2A_VK_DGROUP_END();
|
||||
}
|
||||
|
||||
void pgraph_vk_bind_vertex_attributes_inline(NV2AState *d)
|
||||
{
|
||||
PGRAPHState *pg = &d->pgraph;
|
||||
PGRAPHVkState *r = pg->vk_renderer_state;
|
||||
|
||||
pg->compressed_attrs = 0;
|
||||
pg->uniform_attrs = 0;
|
||||
pg->swizzle_attrs = 0;
|
||||
|
||||
r->num_active_vertex_attribute_descriptions = 0;
|
||||
r->num_active_vertex_binding_descriptions = 0;
|
||||
|
||||
for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
|
||||
VertexAttribute *attr = &pg->vertex_attributes[i];
|
||||
if (attr->inline_buffer_populated) {
|
||||
r->vertex_attribute_to_description_location[i] =
|
||||
r->num_active_vertex_binding_descriptions;
|
||||
r->vertex_binding_descriptions
|
||||
[r->num_active_vertex_binding_descriptions++] =
|
||||
(VkVertexInputBindingDescription){
|
||||
.binding =
|
||||
r->vertex_attribute_to_description_location[i],
|
||||
.stride = 4 * sizeof(float),
|
||||
.inputRate = VK_VERTEX_INPUT_RATE_VERTEX,
|
||||
};
|
||||
r->vertex_attribute_descriptions
|
||||
[r->num_active_vertex_attribute_descriptions++] =
|
||||
(VkVertexInputAttributeDescription){
|
||||
.binding =
|
||||
r->vertex_attribute_to_description_location[i],
|
||||
.location = i,
|
||||
.format = VK_FORMAT_R32G32B32A32_SFLOAT,
|
||||
};
|
||||
memcpy(attr->inline_value,
|
||||
attr->inline_buffer + (pg->inline_buffer_length - 1) * 4,
|
||||
sizeof(attr->inline_value));
|
||||
} else {
|
||||
r->vertex_attribute_to_description_location[i] = -1;
|
||||
pg->uniform_attrs |= 1 << i;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -21,7 +21,7 @@
|
|||
#define HW_NV2A_VSH_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include "shaders_common.h"
|
||||
#include "qemu/mstring.h"
|
||||
|
||||
enum VshLight {
|
||||
LIGHT_OFF,
|
||||
|
@ -130,11 +130,4 @@ typedef enum {
|
|||
|
||||
uint8_t vsh_get_field(const uint32_t *shader_token, VshFieldName field_name);
|
||||
|
||||
void vsh_translate(uint16_t version,
|
||||
const uint32_t *tokens,
|
||||
unsigned int length,
|
||||
bool z_perspective,
|
||||
MString *header, MString *body);
|
||||
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
|
@ -1,125 +0,0 @@
|
|||
/*
|
||||
* QEMU Geforce NV2A shader common definitions
|
||||
*
|
||||
* Copyright (c) 2015 espes
|
||||
* Copyright (c) 2015 Jannik Vogel
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef HW_NV2A_SHADERS_COMMON_H
|
||||
#define HW_NV2A_SHADERS_COMMON_H
|
||||
|
||||
#include "debug.h"
|
||||
|
||||
#define DEF_VERTEX_DATA(qualifier, in_out, prefix, suffix) \
|
||||
"noperspective " in_out " float " prefix "vtx_inv_w" suffix ";\n" \
|
||||
"flat " in_out " float " prefix "vtx_inv_w_flat" suffix ";\n" \
|
||||
qualifier " " in_out " vec4 " prefix "vtxD0" suffix ";\n" \
|
||||
qualifier " " in_out " vec4 " prefix "vtxD1" suffix ";\n" \
|
||||
qualifier " " in_out " vec4 " prefix "vtxB0" suffix ";\n" \
|
||||
qualifier " " in_out " vec4 " prefix "vtxB1" suffix ";\n" \
|
||||
"noperspective " in_out " float " prefix "vtxFog" suffix ";\n" \
|
||||
"noperspective " in_out " vec4 " prefix "vtxT0" suffix ";\n" \
|
||||
"noperspective " in_out " vec4 " prefix "vtxT1" suffix ";\n" \
|
||||
"noperspective " in_out " vec4 " prefix "vtxT2" suffix ";\n" \
|
||||
"noperspective " in_out " vec4 " prefix "vtxT3" suffix ";\n"
|
||||
|
||||
#define STRUCT_VERTEX_DATA_OUT_SMOOTH DEF_VERTEX_DATA("noperspective", "out", "", "")
|
||||
#define STRUCT_VERTEX_DATA_IN_SMOOTH DEF_VERTEX_DATA("noperspective", "in", "", "")
|
||||
#define STRUCT_V_VERTEX_DATA_OUT_SMOOTH DEF_VERTEX_DATA("noperspective", "out", "v_", "")
|
||||
#define STRUCT_V_VERTEX_DATA_IN_ARRAY_SMOOTH DEF_VERTEX_DATA("noperspective", "in", "v_", "[]")
|
||||
|
||||
#define STRUCT_VERTEX_DATA_OUT_FLAT DEF_VERTEX_DATA("flat", "out", "", "")
|
||||
#define STRUCT_VERTEX_DATA_IN_FLAT DEF_VERTEX_DATA("flat", "in", "", "")
|
||||
#define STRUCT_V_VERTEX_DATA_OUT_FLAT DEF_VERTEX_DATA("flat", "out", "v_", "")
|
||||
#define STRUCT_V_VERTEX_DATA_IN_ARRAY_FLAT DEF_VERTEX_DATA("flat", "in", "v_", "[]")
|
||||
|
||||
typedef struct {
|
||||
int ref;
|
||||
gchar *string;
|
||||
} MString;
|
||||
|
||||
void mstring_append_fmt(MString *mstring, const char *fmt, ...);
|
||||
MString *mstring_from_fmt(const char *fmt, ...);
|
||||
void mstring_append_va(MString *mstring, const char *fmt, va_list va);
|
||||
|
||||
static inline
|
||||
void mstring_ref(MString *mstr)
|
||||
{
|
||||
mstr->ref++;
|
||||
}
|
||||
|
||||
static inline
|
||||
void mstring_unref(MString *mstr)
|
||||
{
|
||||
mstr->ref--;
|
||||
if (!mstr->ref) {
|
||||
g_free(mstr->string);
|
||||
g_free(mstr);
|
||||
}
|
||||
}
|
||||
|
||||
static inline
|
||||
void mstring_append(MString *mstr, const char *str)
|
||||
{
|
||||
gchar *n = g_strconcat(mstr->string, str, NULL);
|
||||
g_free(mstr->string);
|
||||
mstr->string = n;
|
||||
}
|
||||
|
||||
static inline
|
||||
void mstring_append_chr(MString *mstr, char chr)
|
||||
{
|
||||
mstring_append_fmt(mstr, "%c", chr);
|
||||
}
|
||||
|
||||
static inline
|
||||
void mstring_append_int(MString *mstr, int val)
|
||||
{
|
||||
mstring_append_fmt(mstr, "%" PRId64, val);
|
||||
}
|
||||
|
||||
static inline
|
||||
MString *mstring_new(void)
|
||||
{
|
||||
MString *mstr = g_malloc(sizeof(MString));
|
||||
mstr->ref = 1;
|
||||
mstr->string = g_strdup("");
|
||||
return mstr;
|
||||
}
|
||||
|
||||
static inline
|
||||
MString *mstring_from_str(const char *str)
|
||||
{
|
||||
MString *mstr = g_malloc(sizeof(MString));
|
||||
mstr->ref = 1;
|
||||
mstr->string = g_strdup(str);
|
||||
return mstr;
|
||||
}
|
||||
|
||||
static inline
|
||||
const gchar *mstring_get_str(MString *mstr)
|
||||
{
|
||||
return mstr->string;
|
||||
}
|
||||
|
||||
static inline
|
||||
size_t mstring_get_length(MString *mstr)
|
||||
{
|
||||
return strlen(mstr->string);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
* LRU object list
|
||||
*
|
||||
* Copyright (c) 2021 Matt Borgerson
|
||||
* Copyright (c) 2021-2024 Matt Borgerson
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
|
@ -42,6 +42,8 @@ typedef struct Lru Lru;
|
|||
struct Lru {
|
||||
QTAILQ_HEAD(, LruNode) global;
|
||||
QTAILQ_HEAD(, LruNode) bins[LRU_NUM_BINS];
|
||||
int num_used;
|
||||
int num_free;
|
||||
|
||||
/* Initialize a node. */
|
||||
void (*init_node)(Lru *lru, LruNode *node, void *key);
|
||||
|
@ -67,6 +69,8 @@ void lru_init(Lru *lru)
|
|||
lru->compare_nodes = NULL;
|
||||
lru->pre_node_evict = NULL;
|
||||
lru->post_node_evict = NULL;
|
||||
lru->num_free = 0;
|
||||
lru->num_used = 0;
|
||||
}
|
||||
|
||||
static inline
|
||||
|
@ -74,6 +78,7 @@ void lru_add_free(Lru *lru, LruNode *node)
|
|||
{
|
||||
node->next_bin.tqe_circ.tql_prev = NULL;
|
||||
QTAILQ_INSERT_TAIL(&lru->global, node, next_global);
|
||||
lru->num_free += 1;
|
||||
}
|
||||
|
||||
static inline
|
||||
|
@ -106,29 +111,51 @@ void lru_evict_node(Lru *lru, LruNode *node)
|
|||
if (lru->post_node_evict) {
|
||||
lru->post_node_evict(lru, node);
|
||||
}
|
||||
|
||||
lru->num_used -= 1;
|
||||
lru->num_free += 1;
|
||||
}
|
||||
|
||||
static inline
|
||||
LruNode *lru_try_evict_one(Lru *lru)
|
||||
{
|
||||
LruNode *found;
|
||||
|
||||
QTAILQ_FOREACH_REVERSE(found, &lru->global, next_global) {
|
||||
if (lru_is_node_in_use(lru, found)
|
||||
&& (!lru->pre_node_evict || lru->pre_node_evict(lru, found))) {
|
||||
lru_evict_node(lru, found);
|
||||
return found;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline
|
||||
LruNode *lru_evict_one(Lru *lru)
|
||||
{
|
||||
LruNode *found;
|
||||
|
||||
QTAILQ_FOREACH_REVERSE(found, &lru->global, next_global) {
|
||||
bool can_evict = true;
|
||||
if (lru_is_node_in_use(lru, found) && lru->pre_node_evict) {
|
||||
can_evict = lru->pre_node_evict(lru, found);
|
||||
}
|
||||
if (can_evict) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
LruNode *found = lru_try_evict_one(lru);
|
||||
|
||||
assert(found != NULL); /* No evictable node! */
|
||||
|
||||
lru_evict_node(lru, found);
|
||||
return found;
|
||||
}
|
||||
|
||||
static inline
|
||||
LruNode *lru_get_one_free(Lru *lru)
|
||||
{
|
||||
LruNode *found;
|
||||
|
||||
QTAILQ_FOREACH_REVERSE(found, &lru->global, next_global) {
|
||||
if (!lru_is_node_in_use(lru, found)) {
|
||||
return found;
|
||||
}
|
||||
}
|
||||
|
||||
return lru_evict_one(lru);
|
||||
}
|
||||
|
||||
static inline
|
||||
bool lru_contains_hash(Lru *lru, uint64_t hash)
|
||||
{
|
||||
|
@ -160,12 +187,15 @@ LruNode *lru_lookup(Lru *lru, uint64_t hash, void *key)
|
|||
if (found) {
|
||||
QTAILQ_REMOVE(&lru->bins[bin], found, next_bin);
|
||||
} else {
|
||||
found = lru_evict_one(lru);
|
||||
found = lru_get_one_free(lru);
|
||||
found->hash = hash;
|
||||
if (lru->init_node) {
|
||||
lru->init_node(lru, found, key);
|
||||
}
|
||||
assert(found->hash == hash);
|
||||
|
||||
lru->num_used += 1;
|
||||
lru->num_free -= 1;
|
||||
}
|
||||
|
||||
QTAILQ_REMOVE(&lru->global, found, next_global);
|
|
@ -0,0 +1,82 @@
|
|||
#ifndef MSTRING_H
|
||||
#define MSTRING_H
|
||||
|
||||
#include "qemu/osdep.h"
|
||||
#include <string.h>
|
||||
|
||||
typedef struct {
|
||||
int ref;
|
||||
gchar *string;
|
||||
} MString;
|
||||
|
||||
void mstring_append_fmt(MString *mstring, const char *fmt, ...);
|
||||
MString *mstring_from_fmt(const char *fmt, ...);
|
||||
void mstring_append_va(MString *mstring, const char *fmt, va_list va);
|
||||
|
||||
static inline
|
||||
void mstring_ref(MString *mstr)
|
||||
{
|
||||
mstr->ref++;
|
||||
}
|
||||
|
||||
static inline
|
||||
void mstring_unref(MString *mstr)
|
||||
{
|
||||
mstr->ref--;
|
||||
if (!mstr->ref) {
|
||||
g_free(mstr->string);
|
||||
g_free(mstr);
|
||||
}
|
||||
}
|
||||
|
||||
static inline
|
||||
void mstring_append(MString *mstr, const char *str)
|
||||
{
|
||||
gchar *n = g_strconcat(mstr->string, str, NULL);
|
||||
g_free(mstr->string);
|
||||
mstr->string = n;
|
||||
}
|
||||
|
||||
static inline
|
||||
void mstring_append_chr(MString *mstr, char chr)
|
||||
{
|
||||
mstring_append_fmt(mstr, "%c", chr);
|
||||
}
|
||||
|
||||
static inline
|
||||
void mstring_append_int(MString *mstr, int val)
|
||||
{
|
||||
mstring_append_fmt(mstr, "%" PRId64, val);
|
||||
}
|
||||
|
||||
static inline
|
||||
MString *mstring_new(void)
|
||||
{
|
||||
MString *mstr = g_malloc(sizeof(MString));
|
||||
mstr->ref = 1;
|
||||
mstr->string = g_strdup("");
|
||||
return mstr;
|
||||
}
|
||||
|
||||
static inline
|
||||
MString *mstring_from_str(const char *str)
|
||||
{
|
||||
MString *mstr = g_malloc(sizeof(MString));
|
||||
mstr->ref = 1;
|
||||
mstr->string = g_strdup(str);
|
||||
return mstr;
|
||||
}
|
||||
|
||||
static inline
|
||||
const gchar *mstring_get_str(MString *mstr)
|
||||
{
|
||||
return mstr->string;
|
||||
}
|
||||
|
||||
static inline
|
||||
size_t mstring_get_length(MString *mstr)
|
||||
{
|
||||
return strlen(mstr->string);
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,201 @@
|
|||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -0,0 +1,19 @@
|
|||
Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
|
@ -0,0 +1,19 @@
|
|||
Copyright (c) 2018-2024 Arseny Kapoulkine
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
30
meson.build
30
meson.build
|
@ -1180,6 +1180,34 @@ if not get_option('opengl').auto() or have_system or have_vhost_user_gpu
|
|||
link_args: config_host['EPOXY_LIBS'].split() + opengl_libs)
|
||||
endif
|
||||
|
||||
vulkan = not_found
|
||||
if targetos == 'windows'
|
||||
vulkan = declare_dependency(
|
||||
compile_args: ['-DVK_USE_PLATFORM_WIN32_KHR', '-DVK_NO_PROTOTYPES'],
|
||||
)
|
||||
libglslang = declare_dependency(link_args: [
|
||||
'-lglslang',
|
||||
'-lMachineIndependent',
|
||||
'-lGenericCodeGen',
|
||||
'-lSPIRV',
|
||||
'-lSPIRV-Tools',
|
||||
'-lSPIRV-Tools-opt'
|
||||
])
|
||||
elif targetos == 'linux'
|
||||
vulkan = dependency('vulkan')
|
||||
libglslang = declare_dependency(link_args: [
|
||||
'-lglslang',
|
||||
'-lMachineIndependent',
|
||||
'-lGenericCodeGen',
|
||||
'-lSPIRV',
|
||||
'-lSPIRV-Tools',
|
||||
'-lSPIRV-Tools-opt'
|
||||
])
|
||||
endif
|
||||
|
||||
subdir('thirdparty')
|
||||
|
||||
|
||||
gbm = not_found
|
||||
if (have_system or have_tools) and (virgl.found() or opengl.found())
|
||||
gbm = dependency('gbm', method: 'pkg-config', required: false,
|
||||
|
@ -1931,6 +1959,7 @@ config_host_data.set('CONFIG_LINUX_IO_URING', linux_io_uring.found())
|
|||
config_host_data.set('CONFIG_LIBPMEM', libpmem.found())
|
||||
config_host_data.set('CONFIG_NUMA', numa.found())
|
||||
config_host_data.set('CONFIG_OPENGL', opengl.found())
|
||||
config_host_data.set('CONFIG_VULKAN', vulkan.found())
|
||||
config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
|
||||
config_host_data.set('CONFIG_RBD', rbd.found())
|
||||
config_host_data.set('CONFIG_RDMA', rdma.found())
|
||||
|
@ -4054,6 +4083,7 @@ summary_info += {'U2F support': u2f}
|
|||
summary_info += {'libusb': libusb}
|
||||
summary_info += {'usb net redir': usbredir}
|
||||
summary_info += {'OpenGL support (epoxy)': opengl}
|
||||
summary_info += {'Vulkan support': vulkan}
|
||||
summary_info += {'GBM': gbm}
|
||||
summary_info += {'libiscsi support': libiscsi}
|
||||
summary_info += {'libnfs support': libnfs}
|
||||
|
|
|
@ -28,8 +28,12 @@ sub_file="${sub_tdir}/submodule.tar"
|
|||
# different to the host OS.
|
||||
submodules="dtc meson ui/keycodemapdb"
|
||||
submodules="$submodules tests/fp/berkeley-softfloat-3 tests/fp/berkeley-testfloat-3"
|
||||
submodules="$submodules ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig" # xemu extras
|
||||
|
||||
# xemu extras
|
||||
submodules="$submodules ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig"
|
||||
submodules="$submodules hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu"
|
||||
submodules="$submodules thirdparty/volk thirdparty/VulkanMemoryAllocator thirdparty/SPIRV-Reflect"
|
||||
|
||||
sub_deinit=""
|
||||
|
||||
function cleanup() {
|
||||
|
|
|
@ -228,7 +228,25 @@ Lib('fpng', 'https://github.com/richgel999/fpng',
|
|||
Lib('nv2a_vsh_cpu', 'https://github.com/abaire/nv2a_vsh_cpu',
|
||||
unlicense, 'https://raw.githubusercontent.com/abaire/nv2a_vsh_cpu/main/LICENSE',
|
||||
ships_static=all_platforms,
|
||||
submodule=Submodule('hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu')
|
||||
submodule=Submodule('hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu')
|
||||
),
|
||||
|
||||
Lib('volk', 'https://github.com/zeux/volk',
|
||||
mit, 'https://raw.githubusercontent.com/zeux/volk/master/LICENSE.md',
|
||||
ships_static=all_platforms,
|
||||
submodule=Submodule('thirdparty/volk')
|
||||
),
|
||||
|
||||
Lib('VulkanMemoryAllocator', 'https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator',
|
||||
mit, 'https://raw.githubusercontent.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator/master/LICENSE.txt',
|
||||
ships_static=all_platforms,
|
||||
submodule=Submodule('thirdparty/VulkanMemoryAllocator')
|
||||
),
|
||||
|
||||
Lib('SPIRV-Reflect', 'https://github.com/KhronosGroup/SPIRV-Reflect',
|
||||
apache2, 'https://raw.githubusercontent.com/KhronosGroup/SPIRV-Reflect/main/LICENSE',
|
||||
ships_static=all_platforms,
|
||||
submodule=Submodule('thirdparty/SPIRV-Reflect')
|
||||
),
|
||||
|
||||
#
|
||||
|
@ -344,6 +362,17 @@ Lib('miniz', 'https://github.com/richgel999/miniz',
|
|||
ships_static={windows}, platform={windows},
|
||||
version='2.1.0'
|
||||
),
|
||||
|
||||
Lib('glslang', 'https://github.com/KhronosGroup/glslang',
|
||||
bsd_3clause, 'https://raw.githubusercontent.com/KhronosGroup/glslang/main/LICENSE.txt',
|
||||
ships_static={windows}, platform={windows},
|
||||
),
|
||||
|
||||
Lib('SPIRV-Tools', 'https://github.com/KhronosGroup/SPIRV-Tools',
|
||||
apache2, 'https://raw.githubusercontent.com/KhronosGroup/SPIRV-Tools/main/LICENSE',
|
||||
ships_static={windows}, platform={windows},
|
||||
),
|
||||
|
||||
]
|
||||
|
||||
def gen_license():
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Subproject commit 1d674a82d7e102ed0c02e64e036827db9e8b1a71
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue