Compare commits

..

No commits in common. "master" and "v0.8.46" have entirely different histories.

137 changed files with 6128 additions and 8427 deletions

View File

@ -24,7 +24,7 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
- name: Extract image metadata (tags, labels)
id: meta
uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # v5
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
labels: |
@ -35,7 +35,7 @@ jobs:
type=ref,event=branch
type=sha
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3
uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3
- name: Login to GitHub Container Registry
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
@ -44,7 +44,7 @@ jobs:
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push image
uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v5
uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4 # v5
with:
context: ubuntu-win64-cross
push: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}

View File

@ -83,7 +83,7 @@ jobs:
steps:
- name: Download source package
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4
with:
name: src.tar.gz
- name: Extract source package
@ -140,7 +140,7 @@ jobs:
arch: aarch64
steps:
- name: Download artifacts
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4
with:
name: ${{ matrix.artifact_name }}
path: ${{ matrix.artifact_name }}
@ -174,25 +174,25 @@ jobs:
build_param: --debug
artifact_name: xemu-ubuntu-x86_64-debug
artifact_filename: xemu-ubuntu-x86_64-debug.tgz
runs-on: ubuntu-22.04
runs-on: ubuntu-24.04
- arch: x86_64
configuration: Release
build_param:
artifact_name: xemu-ubuntu-x86_64-release
artifact_filename: xemu-ubuntu-x86_64-release.tgz
runs-on: ubuntu-22.04
runs-on: ubuntu-24.04
- arch: aarch64
configuration: Debug
build_param: --debug
artifact_name: xemu-ubuntu-aarch64-debug
artifact_filename: xemu-ubuntu-aarch64-debug.tgz
runs-on: ubuntu-22.04-arm
runs-on: ubuntu-24.04-arm
- arch: aarch64
configuration: Release
build_param:
artifact_name: xemu-ubuntu-aarch64-release
artifact_filename: xemu-ubuntu-aarch64-release.tgz
runs-on: ubuntu-22.04-arm
runs-on: ubuntu-24.04-arm
steps:
- name: Initialize compiler cache
id: cache
@ -202,7 +202,7 @@ jobs:
key: cache-${{ runner.os }}-${{ matrix.arch }}-${{ matrix.configuration }}-${{ github.sha }}
restore-keys: cache-${{ runner.os }}-${{ matrix.arch }}-${{ matrix.configuration }}-
- name: Download source package
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4
with:
name: src.tar.gz
- name: Extract source package
@ -305,12 +305,12 @@ jobs:
artifact_filename: xemu-macos-arm64-release.zip
steps:
- name: Download source package
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4
with:
name: src.tar.gz
- name: Extract source package
run: tar xf src.tar.gz
- uses: actions/setup-python@v5.6.0
- uses: actions/setup-python@v5.5.0
with:
python-version: '3.12'
- name: Install dependencies
@ -358,12 +358,12 @@ jobs:
configuration: ["debug", "release"]
steps:
- name: Download x86_64 build
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4
with:
name: xemu-macos-x86_64-${{ matrix.configuration }}
path: xemu-macos-x86_64-${{ matrix.configuration }}
- name: Download arm64 build
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4
with:
name: xemu-macos-arm64-${{ matrix.configuration }}
path: xemu-macos-arm64-${{ matrix.configuration }}
@ -398,7 +398,7 @@ jobs:
needs: [Ubuntu, macOSUniversal, Windows, WindowsPdb]
steps:
- name: Download artifacts
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4
with:
path: dist
- name: Extract source package
@ -420,7 +420,7 @@ jobs:
run: |
cp dist/xemu-win-x86_64-release-pdb/xemu-win-x86_64-release.zip dist/xemu-win-x86_64-release-pdb/xemu-win-release.zip
- name: Publish release
uses: softprops/action-gh-release@72f2c25fcb47643c292f7107632f7a47c1df5cd8 # v2.3.2
uses: softprops/action-gh-release@c95fe1489396fe8a9eb87c0abf8aa5b2ef267fda # v2.2.1
with:
tag_name: v${{ env.XEMU_VERSION }}
name: v${{ env.XEMU_VERSION }}
@ -462,7 +462,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Download source package
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4
uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4
with:
name: src.tar.gz
- name: Extract source package
@ -471,7 +471,7 @@ jobs:
tar -C src -xf src.tar.gz
# Ensure subprojects are uploaded
find src/subprojects -name "*.gitignore" -exec rm {} \;
rm src/subprojects/.gitignore
- name: Integrate Debian packaging
run: |
pushd src

View File

@ -1,74 +0,0 @@
name: Bump Meson subprojects
on:
workflow_dispatch:
schedule:
- cron: '0 6 * * 1'
permissions:
contents: write
pull-requests: write
jobs:
bump_wraps:
name: "Bump Meson subprojects"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4
- name: Install the latest version of uv
uses: astral-sh/setup-uv@e92bafb6253dcd438e0484186d7669ea7a8ca1cc # v6
with:
enable-cache: false
- name: Check for updates
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
uv run -s scripts/bump-subproject-wraps.py -m \
subprojects/curl.wrap \
subprojects/genconfig.wrap \
subprojects/glslang.wrap \
subprojects/imgui.wrap \
subprojects/implot.wrap \
subprojects/json.wrap \
subprojects/nv2a_vsh_cpu.wrap \
subprojects/SPIRV-Reflect.wrap \
subprojects/tomlplusplus.wrap \
subprojects/volk.wrap \
subprojects/VulkanMemoryAllocator.wrap \
subprojects/xxhash.wrap \
> updated.json
- name: Create PRs for updates
env:
GH_TOKEN: ${{ secrets.XEMU_ROBOT_TOKEN }}
run: |
set -euo pipefail
git config user.name "xemu-robot"
git config user.email "robot@xemu.app"
jq -c '.[]' updated.json | while read -r item; do
path=$(echo "$item" | jq -r '.path')
file_basename=$(basename "$path")
name="${file_basename%%.*}"
owner=$(echo "$item" | jq -r '.owner')
repo=$(echo "$item" | jq -r '.repo')
old_rev=$(echo "$item" | jq -r '.old_rev')
new_rev=$(echo "$item" | jq -r '.new_rev')
new_tag=$(echo "$item" | jq -r '.new_tag')
echo "➤ Processing $name"
branch="sync/bump-${name//\//-}-${GITHUB_RUN_ID}"
git switch --quiet -c "$branch" origin/master
git add "$path"
git commit -m "meson: Bump ${name} to ${new_tag}"
git push -u origin "$branch"
gh pr create \
--title "meson: Bump ${name} to ${new_tag}" \
--body "Automatic bump of \`${name}\` to [${new_tag}](https://github.com/${owner}/${repo}/compare/${old_rev}..${new_rev})." \
--base master
done

View File

@ -66,14 +66,8 @@ package_macos() {
cp Info.plist dist/xemu.app/Contents/
if [[ -e "${project_source_dir}/XEMU_VERSION" ]]; then
xemu_version="$(cat ${project_source_dir}/XEMU_VERSION | cut -f1 -d-)"
else
xemu_version="0.0.0"
fi
plutil -replace CFBundleShortVersionString -string "${xemu_version}" dist/xemu.app/Contents/Info.plist
plutil -replace CFBundleVersion -string "${xemu_version}" dist/xemu.app/Contents/Info.plist
plutil -replace CFBundleShortVersionString -string $(cat ${project_source_dir}/XEMU_VERSION | cut -f1 -d-) dist/xemu.app/Contents/Info.plist
plutil -replace CFBundleVersion -string $(cat ${project_source_dir}/XEMU_VERSION | cut -f1 -d-) dist/xemu.app/Contents/Info.plist
codesign --force --deep --preserve-metadata=entitlements,requirements,flags,runtime --sign - "${exe_path}"
python3 ./scripts/gen-license.py --version-file=macos-libs/$target_arch/INSTALLED > dist/LICENSE.txt

View File

@ -54,9 +54,6 @@ input:
auto_bind:
type: bool
default: true
allow_vibration:
type: bool
default: true
background_input_capture: bool
keyboard_controller_scancode_map:
# Scancode reference : https://github.com/libsdl-org/SDL/blob/main/include/SDL_scancode.h
@ -212,19 +209,9 @@ display:
advanced_tree_state:
type: bool
default: false
setup_nvidia_profile:
type: bool
default: true
audio:
vp:
num_workers:
type: integer
default: 0 # 0 = auto
use_dsp: bool
hrtf:
type: bool
default: true
volume_limit:
type: number
default: 1

2
debian/control vendored
View File

@ -6,8 +6,6 @@ Build-Depends: debhelper (>= 11),
cmake,
git,
python3:any,
python3-pip,
python3-tomli,
python3-yaml,
python3-venv,
ninja-build,

File diff suppressed because it is too large Load Diff

View File

@ -1,564 +0,0 @@
/*
* QEMU MCPX Audio Processing Unit implementation
*
* Copyright (c) 2012 espes
* Copyright (c) 2018-2019 Jannik Vogel
* Copyright (c) 2019-2025 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "apu_int.h"
MCPXAPUState *g_state; // Used via debug handlers
static void update_irq(MCPXAPUState *d)
{
if (d->regs[NV_PAPU_FECTL] & NV_PAPU_FECTL_FEMETHMODE_TRAPPED) {
qatomic_or(&d->regs[NV_PAPU_ISTS], NV_PAPU_ISTS_FETINTSTS);
}
if ((d->regs[NV_PAPU_IEN] & NV_PAPU_ISTS_GINTSTS) &&
((d->regs[NV_PAPU_ISTS] & ~NV_PAPU_ISTS_GINTSTS) &
d->regs[NV_PAPU_IEN])) {
qatomic_or(&d->regs[NV_PAPU_ISTS], NV_PAPU_ISTS_GINTSTS);
// fprintf(stderr, "mcpx irq raise ien=%08x ists=%08x\n",
// d->regs[NV_PAPU_IEN], d->regs[NV_PAPU_ISTS]);
pci_irq_assert(PCI_DEVICE(d));
} else {
qatomic_and(&d->regs[NV_PAPU_ISTS], ~NV_PAPU_ISTS_GINTSTS);
// fprintf(stderr, "mcpx irq lower ien=%08x ists=%08x\n",
// d->regs[NV_PAPU_IEN], d->regs[NV_PAPU_ISTS]);
pci_irq_deassert(PCI_DEVICE(d));
}
}
static uint64_t mcpx_apu_read(void *opaque, hwaddr addr, unsigned int size)
{
MCPXAPUState *d = opaque;
uint64_t r = 0;
switch (addr) {
case NV_PAPU_XGSCNT:
r = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / 100; //???
break;
default:
if (addr < 0x20000) {
r = qatomic_read(&d->regs[addr]);
}
break;
}
trace_mcpx_apu_reg_read(addr, size, r);
return r;
}
static void mcpx_apu_write(void *opaque, hwaddr addr, uint64_t val,
unsigned int size)
{
MCPXAPUState *d = opaque;
trace_mcpx_apu_reg_write(addr, size, val);
switch (addr) {
case NV_PAPU_ISTS:
/* the bits of the interrupts to clear are written */
qatomic_and(&d->regs[NV_PAPU_ISTS], ~val);
update_irq(d);
qemu_cond_broadcast(&d->cond);
break;
case NV_PAPU_FECTL:
case NV_PAPU_SECTL:
qatomic_set(&d->regs[addr], val);
qemu_cond_broadcast(&d->cond);
break;
case NV_PAPU_FEMEMDATA:
/* 'magic write'
* This value is expected to be written to FEMEMADDR on completion of
* something to do with notifies. Just do it now :/ */
stl_le_phys(&address_space_memory, d->regs[NV_PAPU_FEMEMADDR], val);
// fprintf(stderr, "MAGIC WRITE\n");
qatomic_set(&d->regs[addr], val);
break;
default:
if (addr < 0x20000) {
qatomic_set(&d->regs[addr], val);
}
break;
}
}
static const MemoryRegionOps mcpx_apu_mmio_ops = {
.read = mcpx_apu_read,
.write = mcpx_apu_write,
};
static void se_frame(MCPXAPUState *d)
{
mcpx_apu_update_dsp_preference(d);
mcpx_debug_begin_frame();
g_dbg.gp_realtime = d->gp.realtime;
g_dbg.ep_realtime = d->ep.realtime;
qemu_spin_lock(&d->monitor.fifo_lock);
int num_bytes_free = fifo8_num_free(&d->monitor.fifo);
qemu_spin_unlock(&d->monitor.fifo_lock);
/* A rudimentary calculation to determine approximately how taxed the APU
* thread is, by measuring how much time we spend waiting for FIFO to drain
* versus working on building frames.
* =1: thread is not sleeping and likely falling behind realtime
* <1: thread is able to complete work on time
*/
if (num_bytes_free < sizeof(d->monitor.frame_buf)) {
int64_t sleep_start = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
qemu_cond_wait(&d->cond, &d->lock);
int64_t sleep_end = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
d->sleep_acc += (sleep_end - sleep_start);
return;
}
int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
if (now - d->frame_count_time >= 1000) {
g_dbg.frames_processed = d->frame_count;
float t = 1.0f - ((double)d->sleep_acc /
(double)((now - d->frame_count_time) * 1000));
g_dbg.utilization = t;
d->frame_count_time = now;
d->frame_count = 0;
d->sleep_acc = 0;
}
d->frame_count++;
/* Buffer for all mixbins for this frame */
float mixbins[NUM_MIXBINS][NUM_SAMPLES_PER_FRAME] = { 0 };
mcpx_apu_vp_frame(d, mixbins);
mcpx_apu_dsp_frame(d, mixbins);
if ((d->ep_frame_div + 1) % 8 == 0) {
#if 0
FILE *fd = fopen("ep.pcm", "a+");
assert(fd != NULL);
fwrite(d->apu_fifo_output, sizeof(d->apu_fifo_output), 1, fd);
fclose(fd);
#endif
if (0 <= g_config.audio.volume_limit && g_config.audio.volume_limit < 1) {
float f = pow(g_config.audio.volume_limit, M_E);
for (int i = 0; i < 256; i++) {
d->monitor.frame_buf[i][0] *= f;
d->monitor.frame_buf[i][1] *= f;
}
}
qemu_spin_lock(&d->monitor.fifo_lock);
num_bytes_free = fifo8_num_free(&d->monitor.fifo);
assert(num_bytes_free >= sizeof(d->monitor.frame_buf));
fifo8_push_all(&d->monitor.fifo, (uint8_t *)d->monitor.frame_buf,
sizeof(d->monitor.frame_buf));
qemu_spin_unlock(&d->monitor.fifo_lock);
memset(d->monitor.frame_buf, 0, sizeof(d->monitor.frame_buf));
}
d->ep_frame_div++;
mcpx_debug_end_frame();
}
/* Note: only supports millisecond resolution on Windows */
static void sleep_ns(int64_t ns)
{
#ifndef _WIN32
struct timespec sleep_delay, rem_delay;
sleep_delay.tv_sec = ns / 1000000000LL;
sleep_delay.tv_nsec = ns % 1000000000LL;
nanosleep(&sleep_delay, &rem_delay);
#else
Sleep(ns / SCALE_MS);
#endif
}
static void monitor_sink_cb(void *opaque, uint8_t *stream, int free_b)
{
MCPXAPUState *s = MCPX_APU_DEVICE(opaque);
if (!runstate_is_running()) {
memset(stream, 0, free_b);
return;
}
int avail = 0;
while (avail < free_b) {
qemu_spin_lock(&s->monitor.fifo_lock);
avail = fifo8_num_used(&s->monitor.fifo);
qemu_spin_unlock(&s->monitor.fifo_lock);
if (avail < free_b) {
sleep_ns(1000000);
qemu_cond_broadcast(&s->cond);
}
if (!runstate_is_running()) {
memset(stream, 0, free_b);
return;
}
}
int to_copy = MIN(free_b, avail);
while (to_copy > 0) {
uint32_t chunk_len = 0;
qemu_spin_lock(&s->monitor.fifo_lock);
chunk_len = fifo8_pop_buf(&s->monitor.fifo, stream, to_copy);
assert(chunk_len <= to_copy);
qemu_spin_unlock(&s->monitor.fifo_lock);
stream += chunk_len;
to_copy -= chunk_len;
}
qemu_cond_broadcast(&s->cond);
}
static void monitor_init(MCPXAPUState *d)
{
qemu_spin_init(&d->monitor.fifo_lock);
fifo8_create(&d->monitor.fifo, 3 * (256 * 2 * 2));
struct SDL_AudioSpec sdl_audio_spec = {
.freq = 48000,
.format = AUDIO_S16LSB,
.channels = 2,
.samples = 512,
.callback = monitor_sink_cb,
.userdata = d,
};
if (SDL_Init(SDL_INIT_AUDIO) < 0) {
fprintf(stderr, "Failed to initialize SDL audio subsystem: %s\n", SDL_GetError());
exit(1);
}
SDL_AudioDeviceID sdl_audio_dev;
sdl_audio_dev = SDL_OpenAudioDevice(NULL, 0, &sdl_audio_spec, NULL, 0);
if (sdl_audio_dev == 0) {
fprintf(stderr, "SDL_OpenAudioDevice failed: %s\n", SDL_GetError());
assert(!"SDL_OpenAudioDevice failed");
exit(1);
}
SDL_PauseAudioDevice(sdl_audio_dev, 0);
}
static void mcpx_apu_realize(PCIDevice *dev, Error **errp)
{
MCPXAPUState *d = MCPX_APU_DEVICE(dev);
dev->config[PCI_INTERRUPT_PIN] = 0x01;
memory_region_init_io(&d->mmio, OBJECT(dev), &mcpx_apu_mmio_ops, d,
"mcpx-apu-mmio", 0x80000);
memory_region_init_io(&d->vp.mmio, OBJECT(dev), &vp_ops, d,
"mcpx-apu-vp", 0x10000);
memory_region_add_subregion(&d->mmio, 0x20000, &d->vp.mmio);
memory_region_init_io(&d->gp.mmio, OBJECT(dev), &gp_ops, d,
"mcpx-apu-gp", 0x10000);
memory_region_add_subregion(&d->mmio, 0x30000, &d->gp.mmio);
memory_region_init_io(&d->ep.mmio, OBJECT(dev), &ep_ops, d,
"mcpx-apu-ep", 0x10000);
memory_region_add_subregion(&d->mmio, 0x50000, &d->ep.mmio);
pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
}
static void mcpx_apu_exitfn(PCIDevice *dev)
{
MCPXAPUState *d = MCPX_APU_DEVICE(dev);
d->exiting = true;
qemu_cond_broadcast(&d->cond);
qemu_thread_join(&d->apu_thread);
mcpx_apu_vp_finalize(d);
}
static void mcpx_apu_reset(MCPXAPUState *d)
{
qemu_mutex_lock(&d->lock); // FIXME: Can fail if thread is pegged, add flag
memset(d->regs, 0, sizeof(d->regs));
mcpx_apu_vp_reset(d);
// FIXME: Reset DSP state
memset(d->gp.dsp->core.pram_opcache, 0,
sizeof(d->gp.dsp->core.pram_opcache));
memset(d->ep.dsp->core.pram_opcache, 0,
sizeof(d->ep.dsp->core.pram_opcache));
d->set_irq = false;
qemu_cond_signal(&d->cond);
qemu_mutex_unlock(&d->lock);
}
// Note: This is handled as a VM state change and not as a `pre_save` callback
// because we want to halt the FIFO before any VM state is saved/restored to
// avoid corruption.
static void mcpx_apu_vm_state_change(void *opaque, bool running, RunState state)
{
MCPXAPUState *d = opaque;
if (state == RUN_STATE_SAVE_VM) {
qemu_mutex_lock(&d->lock);
}
}
static int mcpx_apu_post_save(void *opaque)
{
MCPXAPUState *d = opaque;
qemu_cond_signal(&d->cond);
qemu_mutex_unlock(&d->lock);
return 0;
}
static int mcpx_apu_pre_load(void *opaque)
{
MCPXAPUState *d = opaque;
mcpx_apu_reset(d);
qemu_mutex_lock(&d->lock);
return 0;
}
static int mcpx_apu_post_load(void *opaque, int version_id)
{
MCPXAPUState *d = opaque;
qemu_cond_signal(&d->cond);
qemu_mutex_unlock(&d->lock);
return 0;
}
static void mcpx_apu_reset_hold(Object *obj, ResetType type)
{
MCPXAPUState *d = MCPX_APU_DEVICE(obj);
mcpx_apu_reset(d);
}
const VMStateDescription vmstate_vp_dsp_dma_state = {
.name = "mcpx-apu/dsp-state/dma",
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_UINT32(configuration, DSPDMAState),
VMSTATE_UINT32(control, DSPDMAState),
VMSTATE_UINT32(start_block, DSPDMAState),
VMSTATE_UINT32(next_block, DSPDMAState),
VMSTATE_BOOL(error, DSPDMAState),
VMSTATE_BOOL(eol, DSPDMAState),
VMSTATE_END_OF_LIST()
}
};
const VMStateDescription vmstate_vp_dsp_core_state = {
.name = "mcpx-apu/dsp-state/core",
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
// FIXME: Remove unnecessary fields
VMSTATE_UINT16(instr_cycle, dsp_core_t),
VMSTATE_UINT32(pc, dsp_core_t),
VMSTATE_UINT32_ARRAY(registers, dsp_core_t, DSP_REG_MAX),
VMSTATE_UINT32_2DARRAY(stack, dsp_core_t, 2, 16),
VMSTATE_UINT32_ARRAY(xram, dsp_core_t, DSP_XRAM_SIZE),
VMSTATE_UINT32_ARRAY(yram, dsp_core_t, DSP_YRAM_SIZE),
VMSTATE_UINT32_ARRAY(pram, dsp_core_t, DSP_PRAM_SIZE),
VMSTATE_UINT32_ARRAY(mixbuffer, dsp_core_t, DSP_MIXBUFFER_SIZE),
VMSTATE_UINT32_ARRAY(periph, dsp_core_t, DSP_PERIPH_SIZE),
VMSTATE_UINT32(loop_rep, dsp_core_t),
VMSTATE_UINT32(pc_on_rep, dsp_core_t),
VMSTATE_UINT16(interrupt_state, dsp_core_t),
VMSTATE_UINT16(interrupt_instr_fetch, dsp_core_t),
VMSTATE_UINT16(interrupt_save_pc, dsp_core_t),
VMSTATE_UINT16(interrupt_counter, dsp_core_t),
VMSTATE_UINT16(interrupt_ipl_to_raise, dsp_core_t),
VMSTATE_UINT16(interrupt_pipeline_count, dsp_core_t),
VMSTATE_INT16_ARRAY(interrupt_ipl, dsp_core_t, 12),
VMSTATE_UINT16_ARRAY(interrupt_is_pending, dsp_core_t, 12),
VMSTATE_UINT32(num_inst, dsp_core_t),
VMSTATE_UINT32(cur_inst_len, dsp_core_t),
VMSTATE_UINT32(cur_inst, dsp_core_t),
VMSTATE_UNUSED(1),
VMSTATE_UINT32(disasm_memory_ptr, dsp_core_t),
VMSTATE_BOOL(exception_debugging, dsp_core_t),
VMSTATE_UINT32(disasm_prev_inst_pc, dsp_core_t),
VMSTATE_BOOL(disasm_is_looping, dsp_core_t),
VMSTATE_UINT32(disasm_cur_inst, dsp_core_t),
VMSTATE_UINT16(disasm_cur_inst_len, dsp_core_t),
VMSTATE_UINT32_ARRAY(disasm_registers_save, dsp_core_t, 64),
// #ifdef DSP_DISASM_REG_PC
// VMSTATE_UINT32(pc_save, dsp_core_t),
// #endif
VMSTATE_END_OF_LIST()
}
};
const VMStateDescription vmstate_vp_dsp_state = {
.name = "mcpx-apu/dsp-state",
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_STRUCT(core, DSPState, 1, vmstate_vp_dsp_core_state, dsp_core_t),
VMSTATE_STRUCT(dma, DSPState, 1, vmstate_vp_dsp_dma_state, DSPDMAState),
VMSTATE_INT32(save_cycles, DSPState),
VMSTATE_UINT32(interrupts, DSPState),
VMSTATE_END_OF_LIST()
}
};
const VMStateDescription vmstate_vp_ssl_data = {
.name = "mcpx_apu_voice_data",
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_UINT32_ARRAY(base, MCPXAPUVPSSLData, MCPX_HW_SSLS_PER_VOICE),
VMSTATE_UINT8_ARRAY(count, MCPXAPUVPSSLData, MCPX_HW_SSLS_PER_VOICE),
VMSTATE_INT32(ssl_index, MCPXAPUVPSSLData),
VMSTATE_INT32(ssl_seg, MCPXAPUVPSSLData),
VMSTATE_END_OF_LIST()
}
};
static const VMStateDescription vmstate_mcpx_apu = {
.name = "mcpx-apu",
.version_id = 1,
.minimum_version_id = 1,
.post_save = mcpx_apu_post_save,
.pre_load = mcpx_apu_pre_load,
.post_load = mcpx_apu_post_load,
.fields = (VMStateField[]) {
VMSTATE_PCI_DEVICE(parent_obj, MCPXAPUState),
VMSTATE_STRUCT_POINTER(gp.dsp, MCPXAPUState, vmstate_vp_dsp_state,
DSPState),
VMSTATE_UINT32_ARRAY(gp.regs, MCPXAPUState, 0x10000),
VMSTATE_STRUCT_POINTER(ep.dsp, MCPXAPUState, vmstate_vp_dsp_state,
DSPState),
VMSTATE_UINT32_ARRAY(ep.regs, MCPXAPUState, 0x10000),
VMSTATE_UINT32_ARRAY(regs, MCPXAPUState, 0x20000),
VMSTATE_UINT32(vp.inbuf_sge_handle, MCPXAPUState),
VMSTATE_UINT32(vp.outbuf_sge_handle, MCPXAPUState),
VMSTATE_STRUCT_ARRAY(vp.ssl, MCPXAPUState, MCPX_HW_MAX_VOICES, 1,
vmstate_vp_ssl_data, MCPXAPUVPSSLData),
VMSTATE_INT32(vp.ssl_base_page, MCPXAPUState),
VMSTATE_UINT8_ARRAY(vp.hrtf_submix, MCPXAPUState, 4),
VMSTATE_UINT8(vp.hrtf_headroom, MCPXAPUState),
VMSTATE_UINT8_ARRAY(vp.submix_headroom, MCPXAPUState, NUM_MIXBINS),
VMSTATE_UINT64_ARRAY(vp.voice_locked, MCPXAPUState, 4),
VMSTATE_END_OF_LIST()
},
};
static void mcpx_apu_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
ResettableClass *rc = RESETTABLE_CLASS(klass);
PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
k->vendor_id = PCI_VENDOR_ID_NVIDIA;
k->device_id = PCI_DEVICE_ID_NVIDIA_MCPX_APU;
k->revision = 177;
k->class_id = PCI_CLASS_MULTIMEDIA_AUDIO;
k->realize = mcpx_apu_realize;
k->exit = mcpx_apu_exitfn;
rc->phases.hold = mcpx_apu_reset_hold;
dc->desc = "MCPX Audio Processing Unit";
dc->vmsd = &vmstate_mcpx_apu;
}
static const TypeInfo mcpx_apu_info = {
.name = "mcpx-apu",
.parent = TYPE_PCI_DEVICE,
.instance_size = sizeof(MCPXAPUState),
.class_init = mcpx_apu_class_init,
.interfaces =
(InterfaceInfo[]){
{ INTERFACE_CONVENTIONAL_PCI_DEVICE },
{},
},
};
static void mcpx_apu_register(void)
{
type_register_static(&mcpx_apu_info);
}
type_init(mcpx_apu_register);
static void *mcpx_apu_frame_thread(void *arg)
{
MCPXAPUState *d = MCPX_APU_DEVICE(arg);
qemu_mutex_lock(&d->lock);
while (!qatomic_read(&d->exiting)) {
int xcntmode = GET_MASK(qatomic_read(&d->regs[NV_PAPU_SECTL]),
NV_PAPU_SECTL_XCNTMODE);
uint32_t fectl = qatomic_read(&d->regs[NV_PAPU_FECTL]);
if (xcntmode == NV_PAPU_SECTL_XCNTMODE_OFF ||
(fectl & NV_PAPU_FECTL_FEMETHMODE_TRAPPED) ||
(fectl & NV_PAPU_FECTL_FEMETHMODE_HALTED)) {
d->set_irq = true;
}
if (d->set_irq) {
qemu_mutex_unlock(&d->lock);
bql_lock();
update_irq(d);
bql_unlock();
qemu_mutex_lock(&d->lock);
d->set_irq = false;
}
xcntmode = GET_MASK(qatomic_read(&d->regs[NV_PAPU_SECTL]),
NV_PAPU_SECTL_XCNTMODE);
fectl = qatomic_read(&d->regs[NV_PAPU_FECTL]);
if (xcntmode == NV_PAPU_SECTL_XCNTMODE_OFF ||
(fectl & NV_PAPU_FECTL_FEMETHMODE_TRAPPED) ||
(fectl & NV_PAPU_FECTL_FEMETHMODE_HALTED)) {
qemu_cond_wait(&d->cond, &d->lock);
continue;
}
se_frame((void *)d);
}
qemu_mutex_unlock(&d->lock);
return NULL;
}
void mcpx_apu_init(PCIBus *bus, int devfn, MemoryRegion *ram)
{
PCIDevice *dev = pci_create_simple(bus, devfn, "mcpx-apu");
MCPXAPUState *d = MCPX_APU_DEVICE(dev);
g_state = d;
d->ram = ram;
d->ram_ptr = memory_region_get_ram_ptr(d->ram);
mcpx_apu_dsp_init(d);
d->set_irq = false;
d->exiting = false;
qemu_mutex_init(&d->lock);
qemu_cond_init(&d->cond);
qemu_add_vm_change_state_handler(mcpx_apu_vm_state_change, d);
mcpx_apu_vp_init(d);
qemu_thread_create(&d->apu_thread, "mcpx.apu_thread", mcpx_apu_frame_thread,
d, QEMU_THREAD_JOINABLE);
monitor_init(d);
}

View File

@ -1,117 +0,0 @@
/*
* QEMU MCPX Audio Processing Unit implementation
*
* Copyright (c) 2012 espes
* Copyright (c) 2018-2019 Jannik Vogel
* Copyright (c) 2019-2025 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_XBOX_MCPX_APU_INT_H
#define HW_XBOX_MCPX_APU_INT_H
#include "qemu/osdep.h"
#include <math.h>
#include <SDL.h>
#include "hw/hw.h"
#include "hw/pci/pci.h"
#include "hw/pci/pci_device.h"
#include "cpu.h"
#include "migration/vmstate.h"
#include "qemu/main-loop.h"
#include "qemu/thread.h"
#include "sysemu/runstate.h"
#include "audio/audio.h"
#include "qemu/fifo8.h"
#include "ui/xemu-settings.h"
#include "trace.h"
#include "apu.h"
#include "apu_regs.h"
#include "apu_debug.h"
#include "fpconv.h"
#include "vp/vp.h"
#include "dsp/gp_ep.h"
#define GET_MASK(v, mask) (((v) & (mask)) >> ctz32(mask))
#define SET_MASK(v, mask, val) \
do { \
(v) &= ~(mask); \
(v) |= ((val) << ctz32(mask)) & (mask); \
} while (0)
#define CASE_4(v, step) \
case (v): \
case (v)+(step): \
case (v)+(step)*2: \
case (v)+(step)*3
// #define DEBUG_MCPX
#ifdef DEBUG_MCPX
#define DPRINTF(fmt, ...) \
do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
#else
#define DPRINTF(fmt, ...) \
do { } while (0)
#endif
#define MCPX_APU_DEVICE(obj) \
OBJECT_CHECK(MCPXAPUState, (obj), "mcpx-apu")
typedef struct MCPXAPUState {
/*< private >*/
PCIDevice parent_obj;
/*< public >*/
bool exiting;
bool set_irq;
QemuThread apu_thread;
QemuMutex lock;
QemuCond cond;
MemoryRegion *ram;
uint8_t *ram_ptr;
MemoryRegion mmio;
MCPXAPUVPState vp;
MCPXAPUGPState gp;
MCPXAPUEPState ep;
uint32_t regs[0x20000];
int ep_frame_div;
int sleep_acc;
int frame_count;
int64_t frame_count_time;
struct {
McpxApuDebugMonitorPoint point;
int16_t frame_buf[256][2]; // 1 EP frame (0x400 bytes), 8 buffered
QemuSpin fifo_lock;
Fifo8 fifo;
} monitor;
} MCPXAPUState;
extern MCPXAPUState *g_state; // Used via debug handlers
extern struct McpxApuDebug g_dbg, g_dbg_cache;
extern int g_dbg_voice_monitor;
extern uint64_t g_dbg_muted_voices[4];
void mcpx_debug_begin_frame(void);
void mcpx_debug_end_frame(void);
#endif

View File

@ -1,86 +0,0 @@
/*
* QEMU MCPX Audio Processing Unit implementation
*
* Copyright (c) 2012 espes
* Copyright (c) 2018-2019 Jannik Vogel
* Copyright (c) 2019-2025 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "apu_int.h"
struct McpxApuDebug g_dbg, g_dbg_cache;
int g_dbg_voice_monitor = -1;
uint64_t g_dbg_muted_voices[4];
const struct McpxApuDebug *mcpx_apu_get_debug_info(void)
{
return &g_dbg_cache;
}
void mcpx_debug_begin_frame(void)
{
for (int i = 0; i < MCPX_HW_MAX_VOICES; i++) {
g_dbg.vp.v[i].active = false;
g_dbg.vp.v[i].multipass_dst_voice = 0xFFFF;
}
}
void mcpx_debug_end_frame(void)
{
g_dbg_cache = g_dbg;
}
void mcpx_apu_debug_set_gp_realtime_enabled(bool run)
{
g_state->gp.realtime = run;
}
void mcpx_apu_debug_set_ep_realtime_enabled(bool run)
{
g_state->ep.realtime = run;
}
McpxApuDebugMonitorPoint mcpx_apu_debug_get_monitor(void)
{
return g_state->monitor.point;
}
void mcpx_apu_debug_set_monitor(McpxApuDebugMonitorPoint monitor)
{
g_state->monitor.point = monitor;
}
void mcpx_apu_debug_isolate_voice(uint16_t v)
{
g_dbg_voice_monitor = v;
}
void mcpx_apu_debug_clear_isolations(void)
{
g_dbg_voice_monitor = -1;
}
bool mcpx_apu_debug_is_muted(uint16_t v)
{
assert(v < MCPX_HW_MAX_VOICES);
return g_dbg_muted_voices[v / 64] & (1LL << (v % 64));
}
void mcpx_apu_debug_toggle_mute(uint16_t v)
{
assert(v < MCPX_HW_MAX_VOICES);
g_dbg_muted_voices[v / 64] ^= (1LL << (v % 64));
}

View File

@ -1,526 +0,0 @@
/*
* QEMU MCPX Audio Processing Unit implementation
*
* Copyright (c) 2012 espes
* Copyright (c) 2018-2019 Jannik Vogel
* Copyright (c) 2019-2025 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "hw/xbox/mcpx/apu/apu_int.h"
static const int16_t ep_silence[256][2] = { 0 };
void mcpx_apu_update_dsp_preference(MCPXAPUState *d)
{
static int last_known_preference = -1;
if (last_known_preference == (int)g_config.audio.use_dsp) {
return;
}
if (g_config.audio.use_dsp) {
d->monitor.point = MCPX_APU_DEBUG_MON_GP_OR_EP;
d->gp.realtime = true;
d->ep.realtime = true;
} else {
d->monitor.point = MCPX_APU_DEBUG_MON_VP;
d->gp.realtime = false;
d->ep.realtime = false;
}
last_known_preference = g_config.audio.use_dsp;
}
static void scatter_gather_rw(MCPXAPUState *d, hwaddr sge_base,
unsigned int max_sge, uint8_t *ptr, uint32_t addr,
size_t len, bool dir)
{
unsigned int page_entry = addr / TARGET_PAGE_SIZE;
unsigned int offset_in_page = addr % TARGET_PAGE_SIZE;
unsigned int bytes_to_copy = TARGET_PAGE_SIZE - offset_in_page;
while (len > 0) {
assert(page_entry <= max_sge);
uint32_t prd_address = ldl_le_phys(&address_space_memory,
sge_base + page_entry * 8 + 0);
// uint32_t prd_control = ldl_le_phys(&address_space_memory,
// sge_base + page_entry * 8 + 4);
hwaddr paddr = prd_address + offset_in_page;
if (bytes_to_copy > len) {
bytes_to_copy = len;
}
assert(paddr + bytes_to_copy < memory_region_size(d->ram));
if (dir) {
memcpy(&d->ram_ptr[paddr], ptr, bytes_to_copy);
memory_region_set_dirty(d->ram, paddr, bytes_to_copy);
} else {
memcpy(ptr, &d->ram_ptr[paddr], bytes_to_copy);
}
ptr += bytes_to_copy;
len -= bytes_to_copy;
/* After the first iteration, we are page aligned */
page_entry += 1;
bytes_to_copy = TARGET_PAGE_SIZE;
offset_in_page = 0;
}
}
static void gp_scratch_rw(void *opaque, uint8_t *ptr, uint32_t addr, size_t len,
bool dir)
{
MCPXAPUState *d = opaque;
// fprintf(stderr, "GP %s scratch 0x%x bytes (0x%x words) at %x (0x%x words)\n", dir ? "writing to" : "reading from", len, len/4, addr, addr/4);
scatter_gather_rw(d, d->regs[NV_PAPU_GPSADDR], d->regs[NV_PAPU_GPSMAXSGE],
ptr, addr, len, dir);
}
static void ep_scratch_rw(void *opaque, uint8_t *ptr, uint32_t addr, size_t len,
bool dir)
{
MCPXAPUState *d = opaque;
// fprintf(stderr, "EP %s scratch 0x%x bytes (0x%x words) at %x (0x%x words)\n", dir ? "writing to" : "reading from", len, len/4, addr, addr/4);
scatter_gather_rw(d, d->regs[NV_PAPU_EPSADDR], d->regs[NV_PAPU_EPSMAXSGE],
ptr, addr, len, dir);
}
static uint32_t circular_scatter_gather_rw(MCPXAPUState *d, hwaddr sge_base,
unsigned int max_sge, uint8_t *ptr,
uint32_t base, uint32_t end,
uint32_t cur, size_t len, bool dir)
{
while (len > 0) {
unsigned int bytes_to_copy = end - cur;
if (bytes_to_copy > len) {
bytes_to_copy = len;
}
DPRINTF("circular scatter gather %s in range 0x%x - 0x%x at 0x%x of "
"length 0x%x / 0x%lx bytes\n",
dir ? "write" : "read", base, end, cur, bytes_to_copy, len);
assert((cur >= base) && ((cur + bytes_to_copy) <= end));
scatter_gather_rw(d, sge_base, max_sge, ptr, cur, bytes_to_copy, dir);
ptr += bytes_to_copy;
len -= bytes_to_copy;
/* After the first iteration we might have to wrap */
cur += bytes_to_copy;
if (cur >= end) {
assert(cur == end);
cur = base;
}
}
return cur;
}
static void gp_fifo_rw(void *opaque, uint8_t *ptr, unsigned int index,
size_t len, bool dir)
{
MCPXAPUState *d = opaque;
uint32_t base;
uint32_t end;
hwaddr cur_reg;
if (dir) {
assert(index < GP_OUTPUT_FIFO_COUNT);
base = GET_MASK(d->regs[NV_PAPU_GPOFBASE0 + 0x10 * index],
NV_PAPU_GPOFBASE0_VALUE);
end = GET_MASK(d->regs[NV_PAPU_GPOFEND0 + 0x10 * index],
NV_PAPU_GPOFEND0_VALUE);
cur_reg = NV_PAPU_GPOFCUR0 + 0x10 * index;
} else {
assert(index < GP_INPUT_FIFO_COUNT);
base = GET_MASK(d->regs[NV_PAPU_GPIFBASE0 + 0x10 * index],
NV_PAPU_GPOFBASE0_VALUE);
end = GET_MASK(d->regs[NV_PAPU_GPIFEND0 + 0x10 * index],
NV_PAPU_GPOFEND0_VALUE);
cur_reg = NV_PAPU_GPIFCUR0 + 0x10 * index;
}
uint32_t cur = GET_MASK(d->regs[cur_reg], NV_PAPU_GPOFCUR0_VALUE);
// fprintf(stderr, "GP %s fifo #%d, base = %x, end = %x, cur = %x, len = %x\n",
// dir ? "writing to" : "reading from", index,
// base, end, cur, len);
/* DSP hangs if current >= end; but forces current >= base */
assert(cur < end);
if (cur < base) {
cur = base;
}
cur = circular_scatter_gather_rw(d,
d->regs[NV_PAPU_GPFADDR], d->regs[NV_PAPU_GPFMAXSGE],
ptr, base, end, cur, len, dir);
SET_MASK(d->regs[cur_reg], NV_PAPU_GPOFCUR0_VALUE, cur);
}
static bool ep_sink_samples(MCPXAPUState *d, uint8_t *ptr, size_t len)
{
if (d->monitor.point == MCPX_APU_DEBUG_MON_AC97) {
return false;
} else if ((d->monitor.point == MCPX_APU_DEBUG_MON_EP) ||
(d->monitor.point == MCPX_APU_DEBUG_MON_GP_OR_EP)) {
assert(len == sizeof(d->monitor.frame_buf));
memcpy(d->monitor.frame_buf, ptr, len);
}
return true;
}
static void ep_fifo_rw(void *opaque, uint8_t *ptr, unsigned int index,
size_t len, bool dir)
{
MCPXAPUState *d = opaque;
uint32_t base;
uint32_t end;
hwaddr cur_reg;
if (dir) {
assert(index < EP_OUTPUT_FIFO_COUNT);
base = GET_MASK(d->regs[NV_PAPU_EPOFBASE0 + 0x10 * index],
NV_PAPU_GPOFBASE0_VALUE);
end = GET_MASK(d->regs[NV_PAPU_EPOFEND0 + 0x10 * index],
NV_PAPU_GPOFEND0_VALUE);
cur_reg = NV_PAPU_EPOFCUR0 + 0x10 * index;
} else {
assert(index < EP_INPUT_FIFO_COUNT);
base = GET_MASK(d->regs[NV_PAPU_EPIFBASE0 + 0x10 * index],
NV_PAPU_GPOFBASE0_VALUE);
end = GET_MASK(d->regs[NV_PAPU_EPIFEND0 + 0x10 * index],
NV_PAPU_GPOFEND0_VALUE);
cur_reg = NV_PAPU_EPIFCUR0 + 0x10 * index;
}
uint32_t cur = GET_MASK(d->regs[cur_reg], NV_PAPU_GPOFCUR0_VALUE);
// fprintf(stderr, "EP %s fifo #%d, base = %x, end = %x, cur = %x, len = %x\n",
// dir ? "writing to" : "reading from", index,
// base, end, cur, len);
if (dir && index == 0) {
bool did_sink = ep_sink_samples(d, ptr, len);
if (did_sink) {
/* Since we are sinking, push silence out */
assert(len <= sizeof(ep_silence));
ptr = (uint8_t*)ep_silence;
}
}
/* DSP hangs if current >= end; but forces current >= base */
if (cur >= end) {
cur = cur % (end - base);
}
if (cur < base) {
cur = base;
}
cur = circular_scatter_gather_rw(d,
d->regs[NV_PAPU_EPFADDR], d->regs[NV_PAPU_EPFMAXSGE],
ptr, base, end, cur, len, dir);
SET_MASK(d->regs[cur_reg], NV_PAPU_GPOFCUR0_VALUE, cur);
}
static void proc_rst_write(DSPState *dsp, uint32_t oldval, uint32_t val)
{
if (!(val & NV_PAPU_GPRST_GPRST) || !(val & NV_PAPU_GPRST_GPDSPRST)) {
dsp_reset(dsp);
} else if (
(!(oldval & NV_PAPU_GPRST_GPRST) || !(oldval & NV_PAPU_GPRST_GPDSPRST))
&& ((val & NV_PAPU_GPRST_GPRST) && (val & NV_PAPU_GPRST_GPDSPRST))) {
dsp_bootstrap(dsp);
}
}
/* Global Processor - programmable DSP */
static uint64_t gp_read(void *opaque, hwaddr addr, unsigned int size)
{
MCPXAPUState *d = opaque;
assert(size == 4);
assert(addr % 4 == 0);
uint64_t r = 0;
switch (addr) {
case NV_PAPU_GPXMEM ... NV_PAPU_GPXMEM + 0x1000 * 4 - 1: {
uint32_t xaddr = (addr - NV_PAPU_GPXMEM) / 4;
r = dsp_read_memory(d->gp.dsp, 'X', xaddr);
// fprintf(stderr, "read GP NV_PAPU_GPXMEM [%x] -> %x\n", xaddr, r);
break;
}
case NV_PAPU_GPMIXBUF ... NV_PAPU_GPMIXBUF + 0x400 * 4 - 1: {
uint32_t xaddr = (addr - NV_PAPU_GPMIXBUF) / 4;
r = dsp_read_memory(d->gp.dsp, 'X', GP_DSP_MIXBUF_BASE + xaddr);
// fprintf(stderr, "read GP NV_PAPU_GPMIXBUF [%x] -> %x\n", xaddr, r);
break;
}
case NV_PAPU_GPYMEM ... NV_PAPU_GPYMEM + 0x800 * 4 - 1: {
uint32_t yaddr = (addr - NV_PAPU_GPYMEM) / 4;
r = dsp_read_memory(d->gp.dsp, 'Y', yaddr);
// fprintf(stderr, "read GP NV_PAPU_GPYMEM [%x] -> %x\n", yaddr, r);
break;
}
case NV_PAPU_GPPMEM ... NV_PAPU_GPPMEM + 0x1000 * 4 - 1: {
uint32_t paddr = (addr - NV_PAPU_GPPMEM) / 4;
r = dsp_read_memory(d->gp.dsp, 'P', paddr);
// fprintf(stderr, "read GP NV_PAPU_GPPMEM [%x] -> %x\n", paddr, r);
break;
}
default:
r = d->gp.regs[addr];
break;
}
DPRINTF("mcpx apu GP: read [0x%" HWADDR_PRIx "] -> 0x%lx\n", addr, r);
return r;
}
static void gp_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size)
{
MCPXAPUState *d = opaque;
qemu_mutex_lock(&d->lock);
assert(size == 4);
assert(addr % 4 == 0);
DPRINTF("mcpx apu GP: [0x%" HWADDR_PRIx "] = 0x%lx\n", addr, val);
switch (addr) {
case NV_PAPU_GPXMEM ... NV_PAPU_GPXMEM + 0x1000 * 4 - 1: {
uint32_t xaddr = (addr - NV_PAPU_GPXMEM) / 4;
// fprintf(stderr, "gp write xmem %x = %x\n", xaddr, val);
dsp_write_memory(d->gp.dsp, 'X', xaddr, val);
break;
}
case NV_PAPU_GPMIXBUF ... NV_PAPU_GPMIXBUF + 0x400 * 4 - 1: {
uint32_t xaddr = (addr - NV_PAPU_GPMIXBUF) / 4;
// fprintf(stderr, "gp write xmixbuf %x = %x\n", xaddr, val);
dsp_write_memory(d->gp.dsp, 'X', GP_DSP_MIXBUF_BASE + xaddr, val);
break;
}
case NV_PAPU_GPYMEM ... NV_PAPU_GPYMEM + 0x800 * 4 - 1: {
uint32_t yaddr = (addr - NV_PAPU_GPYMEM) / 4;
// fprintf(stderr, "gp write ymem %x = %x\n", yaddr, val);
dsp_write_memory(d->gp.dsp, 'Y', yaddr, val);
break;
}
case NV_PAPU_GPPMEM ... NV_PAPU_GPPMEM + 0x1000 * 4 - 1: {
uint32_t paddr = (addr - NV_PAPU_GPPMEM) / 4;
// fprintf(stderr, "gp write pmem %x = %x\n", paddr, val);
dsp_write_memory(d->gp.dsp, 'P', paddr, val);
break;
}
case NV_PAPU_GPRST:
proc_rst_write(d->gp.dsp, d->gp.regs[NV_PAPU_GPRST], val);
d->gp.regs[NV_PAPU_GPRST] = val;
break;
default:
d->gp.regs[addr] = val;
break;
}
qemu_mutex_unlock(&d->lock);
}
const MemoryRegionOps gp_ops = {
.read = gp_read,
.write = gp_write,
};
/* Encode Processor - encoding DSP */
static uint64_t ep_read(void *opaque, hwaddr addr, unsigned int size)
{
MCPXAPUState *d = opaque;
assert(size == 4);
assert(addr % 4 == 0);
uint64_t r = 0;
switch (addr) {
case NV_PAPU_EPXMEM ... NV_PAPU_EPXMEM + 0xC00 * 4 - 1: {
uint32_t xaddr = (addr - NV_PAPU_EPXMEM) / 4;
r = dsp_read_memory(d->ep.dsp, 'X', xaddr);
// fprintf(stderr, "read EP NV_PAPU_EPXMEM [%x] -> %x\n", xaddr, r);
break;
}
case NV_PAPU_EPYMEM ... NV_PAPU_EPYMEM + 0x100 * 4 - 1: {
uint32_t yaddr = (addr - NV_PAPU_EPYMEM) / 4;
r = dsp_read_memory(d->ep.dsp, 'Y', yaddr);
// fprintf(stderr, "read EP NV_PAPU_EPYMEM [%x] -> %x\n", yaddr, r);
break;
}
case NV_PAPU_EPPMEM ... NV_PAPU_EPPMEM + 0x1000 * 4 - 1: {
uint32_t paddr = (addr - NV_PAPU_EPPMEM) / 4;
r = dsp_read_memory(d->ep.dsp, 'P', paddr);
// fprintf(stderr, "read EP NV_PAPU_EPPMEM [%x] -> %x\n", paddr, r);
break;
}
default:
r = d->ep.regs[addr];
break;
}
DPRINTF("mcpx apu EP: read [0x%" HWADDR_PRIx "] -> 0x%lx\n", addr, r);
return r;
}
static void ep_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size)
{
MCPXAPUState *d = opaque;
qemu_mutex_lock(&d->lock);
assert(size == 4);
assert(addr % 4 == 0);
DPRINTF("mcpx apu EP: [0x%" HWADDR_PRIx "] = 0x%lx\n", addr, val);
switch (addr) {
case NV_PAPU_EPXMEM ... NV_PAPU_EPXMEM + 0xC00 * 4 - 1: {
uint32_t xaddr = (addr - NV_PAPU_EPXMEM) / 4;
dsp_write_memory(d->ep.dsp, 'X', xaddr, val);
// fprintf(stderr, "ep write xmem %x = %x\n", xaddr, val);
break;
}
case NV_PAPU_EPYMEM ... NV_PAPU_EPYMEM + 0x100 * 4 - 1: {
uint32_t yaddr = (addr - NV_PAPU_EPYMEM) / 4;
dsp_write_memory(d->ep.dsp, 'Y', yaddr, val);
// fprintf(stderr, "ep write ymem %x = %x\n", yaddr, val);
break;
}
case NV_PAPU_EPPMEM ... NV_PAPU_EPPMEM + 0x1000 * 4 - 1: {
uint32_t paddr = (addr - NV_PAPU_EPPMEM) / 4;
// fprintf(stderr, "ep write pmem %x = %x\n", paddr, val);
dsp_write_memory(d->ep.dsp, 'P', paddr, val);
break;
}
case NV_PAPU_EPRST:
proc_rst_write(d->ep.dsp, d->ep.regs[NV_PAPU_EPRST], val);
d->ep.regs[NV_PAPU_EPRST] = val;
d->ep_frame_div = 0; /* FIXME: Still unsure about frame sync */
break;
default:
d->ep.regs[addr] = val;
break;
}
qemu_mutex_unlock(&d->lock);
}
const MemoryRegionOps ep_ops = {
.read = ep_read,
.write = ep_write,
};
void mcpx_apu_dsp_frame(MCPXAPUState *d, float mixbins[NUM_MIXBINS][NUM_SAMPLES_PER_FRAME])
{
/* Write VP results to the GP DSP MIXBUF */
for (int mixbin = 0; mixbin < NUM_MIXBINS; mixbin++) {
uint32_t base = GP_DSP_MIXBUF_BASE + mixbin * NUM_SAMPLES_PER_FRAME;
for (int sample = 0; sample < NUM_SAMPLES_PER_FRAME; sample++) {
dsp_write_memory(d->gp.dsp, 'X', base + sample,
float_to_24b(mixbins[mixbin][sample]));
}
}
bool ep_enabled = (d->ep.regs[NV_PAPU_EPRST] & NV_PAPU_GPRST_GPRST) &&
(d->ep.regs[NV_PAPU_EPRST] & NV_PAPU_GPRST_GPDSPRST);
/* Run GP */
if ((d->gp.regs[NV_PAPU_GPRST] & NV_PAPU_GPRST_GPRST) &&
(d->gp.regs[NV_PAPU_GPRST] & NV_PAPU_GPRST_GPDSPRST)) {
dsp_start_frame(d->gp.dsp);
d->gp.dsp->core.is_idle = false;
d->gp.dsp->core.cycle_count = 0;
do {
dsp_run(d->gp.dsp, 1000);
} while (!d->gp.dsp->core.is_idle && d->gp.realtime);
g_dbg.gp.cycles = d->gp.dsp->core.cycle_count;
if ((d->monitor.point == MCPX_APU_DEBUG_MON_GP) ||
(d->monitor.point == MCPX_APU_DEBUG_MON_GP_OR_EP && !ep_enabled)) {
int off = (d->ep_frame_div % 8) * NUM_SAMPLES_PER_FRAME;
for (int i = 0; i < NUM_SAMPLES_PER_FRAME; i++) {
uint32_t l = dsp_read_memory(d->gp.dsp, 'X', 0x1400 + i);
d->monitor.frame_buf[off + i][0] = l >> 8;
uint32_t r =
dsp_read_memory(d->gp.dsp, 'X', 0x1400 + 1 * 0x20 + i);
d->monitor.frame_buf[off + i][1] = r >> 8;
}
}
}
/* Run EP */
if ((d->ep.regs[NV_PAPU_EPRST] & NV_PAPU_GPRST_GPRST) &&
(d->ep.regs[NV_PAPU_EPRST] & NV_PAPU_GPRST_GPDSPRST)) {
if (d->ep_frame_div % 8 == 0) {
dsp_start_frame(d->ep.dsp);
d->ep.dsp->core.is_idle = false;
d->ep.dsp->core.cycle_count = 0;
do {
dsp_run(d->ep.dsp, 1000);
} while (!d->ep.dsp->core.is_idle && d->ep.realtime);
g_dbg.ep.cycles = d->ep.dsp->core.cycle_count;
}
}
}
void mcpx_apu_dsp_init(MCPXAPUState *d)
{
d->gp.dsp = dsp_init(d, gp_scratch_rw, gp_fifo_rw);
for (int i = 0; i < DSP_PRAM_SIZE; i++) {
d->gp.dsp->core.pram[i] = 0xCACACACA;
}
memset(d->gp.dsp->core.pram_opcache, 0,
sizeof(d->gp.dsp->core.pram_opcache));
d->gp.dsp->is_gp = true;
d->gp.dsp->core.is_gp = true;
d->gp.dsp->core.is_idle = false;
d->gp.dsp->core.cycle_count = 0;
d->ep.dsp = dsp_init(d, ep_scratch_rw, ep_fifo_rw);
for (int i = 0; i < DSP_PRAM_SIZE; i++) {
d->ep.dsp->core.pram[i] = 0xCACACACA;
}
memset(d->ep.dsp->core.pram_opcache, 0,
sizeof(d->ep.dsp->core.pram_opcache));
for (int i = 0; i < DSP_XRAM_SIZE; i++) {
d->ep.dsp->core.xram[i] = 0xCACACACA;
}
for (int i = 0; i < DSP_YRAM_SIZE; i++) {
d->ep.dsp->core.yram[i] = 0xCACACACA;
}
d->ep.dsp->is_gp = false;
d->ep.dsp->core.is_gp = false;
d->ep.dsp->core.is_idle = false;
d->ep.dsp->core.cycle_count = 0;
/* Until DSP is more performant, a switch to decide whether or not we should
* use the full audio pipeline or not.
*/
mcpx_apu_update_dsp_preference(d);
}

View File

@ -1,57 +0,0 @@
/*
* QEMU MCPX Audio Processing Unit implementation
*
* Copyright (c) 2012 espes
* Copyright (c) 2018-2019 Jannik Vogel
* Copyright (c) 2019-2025 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_XBOX_MCPX_APU_GP_EP_H
#define HW_XBOX_MCPX_APU_GP_EP_H
#include "qemu/osdep.h"
#include "hw/hw.h"
#include "hw/pci/pci.h"
#include "hw/xbox/mcpx/apu/apu_regs.h"
#include "dsp.h"
#include "dsp_dma.h"
#include "dsp_cpu.h"
#include "dsp_state.h"
typedef struct MCPXAPUState MCPXAPUState;
typedef struct MCPXAPUGPState {
bool realtime;
MemoryRegion mmio;
DSPState *dsp;
uint32_t regs[0x10000];
} MCPXAPUGPState;
typedef struct MCPXAPUEPState {
bool realtime;
MemoryRegion mmio;
DSPState *dsp;
uint32_t regs[0x10000];
} MCPXAPUEPState;
extern const MemoryRegionOps gp_ops;
extern const MemoryRegionOps ep_ops;
void mcpx_apu_dsp_init(MCPXAPUState *d);
void mcpx_apu_update_dsp_preference(MCPXAPUState *d);
void mcpx_apu_dsp_frame(MCPXAPUState *d, float mixbins[NUM_MIXBINS][NUM_SAMPLES_PER_FRAME]);
#endif

View File

@ -1 +0,0 @@
#include "trace/trace-hw_xbox_mcpx_apu_dsp.h"

View File

@ -1,7 +0,0 @@
mcpx_ss.add(sdl, files(
'apu.c',
'debug.c',
))
subdir('vp')
subdir('dsp')

View File

@ -1 +0,0 @@
#include "trace/trace-hw_xbox_mcpx_apu.h"

View File

@ -1,137 +0,0 @@
/*
* HRTF Filter
*
* Copyright (c) 2025 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_XBOX_MCPX_HRTF_H
#define HW_XBOX_MCPX_HRTF_H
#include <string.h>
#include <stddef.h>
#include <math.h>
#include "hw/xbox/mcpx/apu/apu_regs.h"
#define HRTF_SAMPLES_PER_FRAME NUM_SAMPLES_PER_FRAME
#define HRTF_NUM_TAPS 31
#define HRTF_MAX_DELAY_SAMPLES 42
#define HRTF_BUFLEN (HRTF_NUM_TAPS + HRTF_MAX_DELAY_SAMPLES)
#define HRTF_PARAM_SMOOTH_ALPHA 0.01f
typedef struct {
int buf_pos;
struct {
float buf[HRTF_BUFLEN];
float hrir_coeff_cur[HRTF_NUM_TAPS];
float hrir_coeff_tar[HRTF_NUM_TAPS];
} ch[2];
float itd_cur;
float itd_tar;
} HrtfFilter;
static inline void hrtf_filter_init(HrtfFilter *f)
{
memset(f, 0, sizeof(*f));
}
static inline void
hrtf_filter_set_target_params(HrtfFilter *f, float hrir_coeff[2][HRTF_NUM_TAPS],
float itd)
{
f->itd_tar =
fmaxf(-HRTF_MAX_DELAY_SAMPLES, fminf(itd, HRTF_MAX_DELAY_SAMPLES));
for (int ch = 0; ch < 2; ch++) {
float *coeff = f->ch[ch].hrir_coeff_tar;
memcpy(coeff, hrir_coeff[ch], sizeof(f->ch[ch].hrir_coeff_tar));
// Normalize coefficients for unity filter gain
float s = 0.0f;
for (int k = 0; k < HRTF_NUM_TAPS; k++) {
s += fabsf(coeff[k]);
}
if (s == 0.0f || s == 1.0f) {
break;
}
for (int k = 0; k < HRTF_NUM_TAPS; k++) {
coeff[k] /= s;
}
}
}
static inline float hrtf_filter_smooth_param(float cur, float tar)
{
// FIXME: Match hardware parameter transition
return cur + HRTF_PARAM_SMOOTH_ALPHA * (tar - cur);
}
static inline void hrtf_filter_step_parameters(HrtfFilter *f)
{
for (int ch = 0; ch < 2; ch++) {
float *coeff_cur = f->ch[ch].hrir_coeff_cur;
float *coeff_tar = f->ch[ch].hrir_coeff_tar;
for (int k = 0; k < HRTF_NUM_TAPS; k++) {
coeff_cur[k] = hrtf_filter_smooth_param(coeff_cur[k], coeff_tar[k]);
}
}
f->itd_cur = hrtf_filter_smooth_param(f->itd_cur, f->itd_tar);
}
static inline void hrtf_filter_process(HrtfFilter *f,
float in[HRTF_SAMPLES_PER_FRAME][2],
float out[HRTF_SAMPLES_PER_FRAME][2])
{
for (int n = 0; n < HRTF_SAMPLES_PER_FRAME; n++) {
hrtf_filter_step_parameters(f);
for (int ch = 0; ch < 2; ch++) {
float *buf = f->ch[ch].buf;
float *coeff = f->ch[ch].hrir_coeff_cur;
// Push new sample
buf[f->buf_pos] = in[n][ch];
// Interaural time difference (channel delay)
float d = f->itd_cur * (ch == 0 ? +1.0f : -1.0f);
if (d < 0.0f) {
d = 0.0f;
}
int di = d;
float dfrac = d - di;
// HRIR Convolution
float acc = 0.0f;
for (int k = 0; k < HRTF_NUM_TAPS; k++) {
int idx1 = (f->buf_pos - di - k + HRTF_BUFLEN) % HRTF_BUFLEN;
float s = buf[idx1];
// Linear interpolation for fractional part
if (dfrac > 0.0f) {
int idx2 = (idx1 - 1 + HRTF_BUFLEN) % HRTF_BUFLEN;
s = s * (1 - dfrac) + buf[idx2] * dfrac;
}
acc += coeff[k] * s;
}
out[n][ch] = acc;
}
f->buf_pos = (f->buf_pos + 1) % HRTF_BUFLEN;
}
}
#endif

View File

@ -1,3 +0,0 @@
mcpx_ss.add(libsamplerate, files(
'vp.c'
))

View File

@ -1,112 +0,0 @@
/*
* QEMU MCPX Audio Processing Unit implementation
*
* Copyright (c) 2012 espes
* Copyright (c) 2018-2019 Jannik Vogel
* Copyright (c) 2019-2025 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_XBOX_MCPX_VP_H
#define HW_XBOX_MCPX_VP_H
#include <samplerate.h>
#include "qemu/osdep.h"
#include "qemu/thread.h"
#include "hw/hw.h"
#include "hw/pci/pci.h"
#include "hw/xbox/mcpx/apu/apu_regs.h"
#include "svf.h"
#include "hrtf.h"
typedef struct MCPXAPUState MCPXAPUState;
typedef struct MCPXAPUVPSSLData {
uint32_t base[MCPX_HW_SSLS_PER_VOICE];
uint8_t count[MCPX_HW_SSLS_PER_VOICE];
int ssl_index;
int ssl_seg;
} MCPXAPUVPSSLData;
typedef struct MCPXAPUVoiceFilter {
uint16_t voice;
float resample_buf[NUM_SAMPLES_PER_FRAME * 2];
SRC_STATE *resampler;
sv_filter svf[2];
HrtfFilter hrtf;
} MCPXAPUVoiceFilter;
typedef struct VoiceWorkItem {
int voice;
int list;
} VoiceWorkItem;
typedef struct VoiceWorker {
QemuThread thread;
float mixbins[NUM_MIXBINS][NUM_SAMPLES_PER_FRAME];
float sample_buf[NUM_SAMPLES_PER_FRAME][2];
VoiceWorkItem queue[MCPX_HW_MAX_VOICES];
int queue_len;
} VoiceWorker;
typedef struct VoiceWorkDispatch {
QemuMutex lock;
int num_workers;
VoiceWorker *workers;
bool workers_should_exit;
QemuCond work_pending;
uint64_t workers_pending;
QemuCond work_finished;
float mixbins[NUM_MIXBINS][NUM_SAMPLES_PER_FRAME];
VoiceWorkItem queue[MCPX_HW_MAX_VOICES];
int queue_len;
} VoiceWorkDispatch;
typedef struct {
MemoryRegion mmio;
VoiceWorkDispatch voice_work_dispatch;
MCPXAPUVoiceFilter filters[MCPX_HW_MAX_VOICES];
// FIXME: Where are these stored?
int ssl_base_page;
MCPXAPUVPSSLData ssl[MCPX_HW_MAX_VOICES];
uint8_t hrtf_headroom;
uint8_t hrtf_submix[4];
uint8_t submix_headroom[NUM_MIXBINS];
float sample_buf[NUM_SAMPLES_PER_FRAME][2];
uint64_t voice_locked[4];
QemuSpin voice_spinlocks[MCPX_HW_MAX_VOICES];
struct {
int current_entry;
// FIXME: Stored in RAM
struct {
float hrir[2][HRTF_NUM_TAPS];
float itd;
} entries[HRTF_ENTRY_COUNT];
} hrtf;
uint32_t inbuf_sge_handle; //FIXME: Where is this stored?
uint32_t outbuf_sge_handle; //FIXME: Where is this stored?
} MCPXAPUVPState;
extern const MemoryRegionOps vp_ops;
void mcpx_apu_vp_init(MCPXAPUState *d);
void mcpx_apu_vp_finalize(MCPXAPUState *d);
void mcpx_apu_vp_frame(MCPXAPUState *d, float mixbins[NUM_MIXBINS][NUM_SAMPLES_PER_FRAME]);
void mcpx_apu_vp_reset(MCPXAPUState *d);
#endif

View File

@ -23,15 +23,13 @@
#include <stdbool.h>
#include <stdint.h>
#define MAX_VOICE_WORKERS 16
typedef enum McpxApuDebugMonitorPoint {
enum McpxApuDebugMon {
MCPX_APU_DEBUG_MON_AC97,
MCPX_APU_DEBUG_MON_VP,
MCPX_APU_DEBUG_MON_GP,
MCPX_APU_DEBUG_MON_EP,
MCPX_APU_DEBUG_MON_GP_OR_EP
} McpxApuDebugMonitorPoint;
};
struct McpxApuDebugVoice
{
@ -57,12 +55,6 @@ struct McpxApuDebugVoice
struct McpxApuDebugVp
{
struct McpxApuDebugVoice v[256];
int num_workers;
struct {
int num_voices;
int time_us;
} workers[MAX_VOICE_WORKERS];
int total_worker_time_us;
};
struct McpxApuDebugDsp
@ -84,8 +76,8 @@ extern "C" {
#endif
const struct McpxApuDebug *mcpx_apu_get_debug_info(void);
McpxApuDebugMonitorPoint mcpx_apu_debug_get_monitor(void);
void mcpx_apu_debug_set_monitor(McpxApuDebugMonitorPoint monitor);
int mcpx_apu_debug_get_monitor(void);
void mcpx_apu_debug_set_monitor(int mon);
void mcpx_apu_debug_isolate_voice(uint16_t v);
void mcpx_apu_debug_clear_isolations(void);
void mcpx_apu_debug_toggle_mute(uint16_t v);

View File

@ -145,8 +145,6 @@
#define NV1BA0_PIO_VOICE_PAUSE 0x00000140
# define NV1BA0_PIO_VOICE_PAUSE_HANDLE 0x0000FFFF
# define NV1BA0_PIO_VOICE_PAUSE_ACTION (1 << 18)
#define NV1BA0_PIO_SET_CURRENT_HRTF_ENTRY 0x00000160
# define NV1BA0_PIO_SET_CURRENT_HRTF_ENTRY_HANDLE 0x0000FFFF
#define NV1BA0_PIO_SET_CONTEXT_DMA_NOTIFY 0x00000180
#define NV1BA0_PIO_SET_CURRENT_SSL_CONTEXT_DMA 0x0000018C
#define NV1BA0_PIO_SET_CURRENT_SSL 0x00000190
@ -167,8 +165,6 @@
#define NV1BA0_PIO_SET_VOICE_CFG_ENV1 0x00000310
#define NV1BA0_PIO_SET_VOICE_CFG_ENVF 0x00000314
#define NV1BA0_PIO_SET_VOICE_CFG_MISC 0x00000318
#define NV1BA0_PIO_SET_VOICE_TAR_HRTF 0x0000031C
# define NV1BA0_PIO_SET_VOICE_TAR_HRTF_HANDLE 0x0000FFFF
#define NV1BA0_PIO_SET_VOICE_SSL_A 0x00000320
# define NV1BA0_PIO_SET_VOICE_SSL_A_COUNT 0x000000FF
# define NV1BA0_PIO_SET_VOICE_SSL_A_BASE 0xFFFFFF00
@ -189,15 +185,6 @@
# define NV1BA0_PIO_SET_VOICE_BUF_CBO_OFFSET 0x00FFFFFF
#define NV1BA0_PIO_SET_VOICE_CFG_BUF_EBO 0x000003DC
# define NV1BA0_PIO_SET_VOICE_CFG_BUF_EBO_OFFSET 0x00FFFFFF
#define NV1BA0_PIO_SET_HRIR 0x00000400
# define NV1BA0_PIO_SET_HRIR_LEFT0 0x000000FF
# define NV1BA0_PIO_SET_HRIR_RIGHT0 0x0000FF00
# define NV1BA0_PIO_SET_HRIR_LEFT1 0x00FF0000
# define NV1BA0_PIO_SET_HRIR_RIGHT1 0xFF000000
#define NV1BA0_PIO_SET_HRIR_X 0x0000043C
# define NV1BA0_PIO_SET_HRIR_X_LEFT30 0x000000FF
# define NV1BA0_PIO_SET_HRIR_X_RIGHT30 0x0000FF00
# define NV1BA0_PIO_SET_HRIR_X_ITD 0xFFFF0000
#define NV1BA0_PIO_SET_SSL_SEGMENT_OFFSET 0x00000600
#define NV1BA0_PIO_SET_SSL_SEGMENT_LENGTH 0x00000604
#define NV1BA0_PIO_SET_CURRENT_INBUF_SGE 0x00000804
@ -262,8 +249,6 @@
#define NV_PAVS_VOICE_CFG_MISC 0x00000018
# define NV_PAVS_VOICE_CFG_MISC_EF_RELEASERATE (0xFFF << 0)
# define NV_PAVS_VOICE_CFG_MISC_FMODE (3 << 16)
#define NV_PAVS_VOICE_CFG_HRTF_TARGET 0x0000001C
# define NV_PAVS_VOICE_CFG_HRTF_TARGET_HANDLE 0x0000FFFF
#define NV_PAVS_VOICE_CUR_PSL_START 0x00000020
# define NV_PAVS_VOICE_CUR_PSL_START_BA 0x00FFFFFF
#define NV_PAVS_VOICE_CUR_PSH_SAMPLE 0x00000024
@ -328,7 +313,6 @@
#define EP_INPUT_FIFO_COUNT 2
#define MCPX_HW_MAX_VOICES 256
#define MCPX_HW_MAX_3D_VOICES 64
#define NUM_SAMPLES_PER_FRAME 32
#define NUM_MIXBINS 32
@ -352,12 +336,6 @@ enum MCPX_HW_NOTIFIER {
#define NV1BA0_NOTIFICATION_STATUS_DONE_SUCCESS 0x01
#define NV1BA0_NOTIFICATION_STATUS_IN_PROGRESS 0x80
#define HRTF_NULL_HANDLE 0xFFFF
#define HRTF_ENTRY_COUNT 128
#define MULTIPASS_BIN 31
#define MULTIPASS_BIN_MASK (1 << MULTIPASS_BIN)
// clang-format on
#endif

View File

@ -1,4 +1,2 @@
libdsp = static_library('dsp', files(['debug.c', 'dsp.c', 'dsp_cpu.c', 'dsp_dma.c']) + genh)
dsp = declare_dependency(objects: libdsp.extract_all_objects(recursive: false))
mcpx_ss.add(dsp, files('gp_ep.c'))

1
hw/xbox/mcpx/dsp/trace.h Normal file
View File

@ -0,0 +1 @@
#include "trace/trace-hw_xbox_mcpx_dsp.h"

View File

@ -1,7 +1,7 @@
/*
* Helper FP conversions
*
* Copyright (c) 2020-2025 Matt Borgerson
* Copyright (c) 2020-2021 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@ -21,39 +21,27 @@
#ifndef FLOATCONV_H
#define FLOATCONV_H
#include <stdint.h>
static inline float int8_to_float(int8_t x)
{
return x / 128.0f;
}
static inline float uint8_to_float(uint8_t value)
static float uint8_to_float(uint8_t value)
{
return ((int)value - 0x80) / (1.0 * 0x80);
}
static inline float int16_to_float(int16_t value)
static float int16_to_float(int16_t value)
{
return value / (1.0 * 0x8000);
}
static inline float s6p9_to_float(int16_t value)
{
return value / 512.0f;
}
static inline float int32_to_float(int32_t value)
static float int32_to_float(int32_t value)
{
return value / (1.0 * 0x80000000);
}
static inline float int24_to_float(int32_t value)
static float int24_to_float(int32_t value)
{
return int32_to_float((uint32_t)value << 8);
}
static inline uint32_t float_to_24b(float value)
static uint32_t float_to_24b(float value)
{
double scaled_value = value * (8.0 * 0x100000);
int int24;

View File

@ -1,8 +1,9 @@
subdir('dsp')
mcpx_ss = ss.source_set()
mcpx_ss.add(files('aci.c'))
subdir('apu')
subdir('nvnet')
mcpx_ss.add(sdl, libsamplerate, dsp, files(
'apu.c',
'aci.c',
))
specific_ss.add_all(mcpx_ss)

View File

@ -1 +0,0 @@
mcpx_ss.add(files('nvnet.c'))

File diff suppressed because it is too large Load Diff

View File

@ -1,267 +0,0 @@
/*
* QEMU nForce Ethernet Controller register definitions
*
* Copyright (c) 2013 espes
* Copyright (c) 2015-2025 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*
* --
*
* Most definitions are based on forcedeth.c, taken from cromwell project.
* Original forcedeth.c license follows:
*
* --
* forcedeth.c -- Etherboot device driver for the NVIDIA nForce
* media access controllers.
*
* Note: This driver is based on the Linux driver that was based on
* a cleanroom reimplementation which was based on reverse
* engineered documentation written by Carl-Daniel Hailfinger
* and Andrew de Quincey. It's neither supported nor endorsed
* by NVIDIA Corp. Use at your own risk.
*
* Written 2004 by Timothy Legge <tlegge@rogers.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Portions of this code based on:
* forcedeth: Ethernet driver for NVIDIA nForce media access controllers:
*
* (C) 2003 Manfred Spraul
* See Linux Driver for full information
*
* Linux Driver Version 0.22, 19 Jan 2004
*
*
* REVISION HISTORY:
* ================
* v1.0 01-31-2004 timlegge Initial port of Linux driver
* v1.1 02-03-2004 timlegge Large Clean up, first release
*
* Indent Options: indent -kr -i8
***************************************************************************/
#ifndef HW_NVNET_REGS_H
#define HW_NVNET_REGS_H
// clang-format off
#define NVNET_IRQ_STATUS 0x000
# define NVNET_IRQ_STATUS_RX 0x00000002
# define NVNET_IRQ_STATUS_RX_NOBUF 0x00000004
# define NVNET_IRQ_STATUS_TX_ERR 0x00000008
# define NVNET_IRQ_STATUS_TX 0x00000010
# define NVNET_IRQ_STATUS_TIMER 0x00000020
# define NVNET_IRQ_STATUS_MIIEVENT 0x00000040
#define NVNET_IRQ_MASK 0x004
#define NVNET_UNKNOWN_SETUP_REG6 0x008
# define NVNET_UNKNOWN_SETUP_REG6_VAL 3
/*
* NVNET_POLLING_INTERVAL_DEFAULT is the interval length of the timer source on the nic
* NVNET_POLLING_INTERVAL_DEFAULT=97 would result in an interval length of 1 ms
*/
#define NVNET_POLLING_INTERVAL 0x00C
# define NVNET_POLLING_INTERVAL_DEFAULT 970
#define NVNET_MISC1 0x080
# define NVNET_MISC1_HD 0x00000002
# define NVNET_MISC1_FORCE 0x003B0F3C
#define NVNET_TRANSMITTER_CONTROL 0x084
# define NVNET_TRANSMITTER_CONTROL_START 0x00000001
#define NVNET_TRANSMITTER_STATUS 0x088
# define NVNET_TRANSMITTER_STATUS_BUSY 0x00000001
#define NVNET_PACKET_FILTER 0x08C
# define NVNET_PACKET_FILTER_ALWAYS 0x007F0008
# define NVNET_PACKET_FILTER_PROMISC 0x00000080
# define NVNET_PACKET_FILTER_MYADDR 0x00000020
#define NVNET_OFFLOAD 0x090
# define NVNET_OFFLOAD_HOMEPHY 0x00000601
# define NVNET_OFFLOAD_NORMAL 0x000005EE
#define NVNET_RECEIVER_CONTROL 0x094
# define NVNET_RECEIVER_CONTROL_START 0x00000001
#define NVNET_RECEIVER_STATUS 0x098
# define NVNET_RECEIVER_STATUS_BUSY 0x00000001
#define NVNET_RANDOM_SEED 0x09C
# define NVNET_RANDOM_SEED_MASK 0x000000FF
# define NVNET_RANDOM_SEED_FORCE 0x00007F00
#define NVNET_UNKNOWN_SETUP_REG1 0x0A0
# define NVNET_UNKNOWN_SETUP_REG1_VAL 0x0016070F
#define NVNET_UNKNOWN_SETUP_REG2 0x0A4
# define NVNET_UNKNOWN_SETUP_REG2_VAL 0x00000016
#define NVNET_MAC_ADDR_A 0x0A8
#define NVNET_MAC_ADDR_B 0x0AC
#define NVNET_MULTICAST_ADDR_A 0x0B0
# define NVNET_MULTICAST_ADDR_A_FORCE 0x00000001
#define NVNET_MULTICAST_ADDR_B 0x0B4
#define NVNET_MULTICAST_MASK_A 0x0B8
#define NVNET_MULTICAST_MASK_B 0x0BC
#define NVNET_TX_RING_PHYS_ADDR 0x100
#define NVNET_RX_RING_PHYS_ADDR 0x104
#define NVNET_RING_SIZE 0x108
# define NVNET_RING_SIZE_TX 0x0000FFFF
# define NVNET_RING_SIZE_RX 0xFFFF0000
#define NVNET_UNKNOWN_TRANSMITTER_REG 0x10C
#define NVNET_LINKSPEED 0x110
# define NVNET_LINKSPEED_FORCE 0x00010000
# define NVNET_LINKSPEED_10 10
# define NVNET_LINKSPEED_100 100
# define NVNET_LINKSPEED_1000 1000
#define NVNET_TX_RING_CURRENT_DESC_PHYS_ADDR 0x11C
#define NVNET_RX_RING_CURRENT_DESC_PHYS_ADDR 0x120
#define NVNET_TX_CURRENT_BUFFER_PHYS_ADDR 0x124
#define NVNET_RX_CURRENT_BUFFER_PHYS_ADDR 0x12C
#define NVNET_UNKNOWN_SETUP_REG5 0x130
# define NVNET_UNKNOWN_SETUP_REG5_BIT31 (1 << 31)
#define NVNET_TX_RING_NEXT_DESC_PHYS_ADDR 0x134
#define NVNET_RX_RING_NEXT_DESC_PHYS_ADDR 0x138
#define NVNET_UNKNOWN_SETUP_REG8 0x13C
# define NVNET_UNKNOWN_SETUP_REG8_VAL1 0x00300010
#define NVNET_UNKNOWN_SETUP_REG7 0x140
# define NVNET_UNKNOWN_SETUP_REG7_VAL 0x00300010
#define NVNET_TX_RX_CONTROL 0x144
# define NVNET_TX_RX_CONTROL_KICK 0x00000001
# define NVNET_TX_RX_CONTROL_BIT1 0x00000002
# define NVNET_TX_RX_CONTROL_BIT2 0x00000004
# define NVNET_TX_RX_CONTROL_IDLE 0x00000008
# define NVNET_TX_RX_CONTROL_RESET 0x00000010
#define NVNET_MII_STATUS 0x180
# define NVNET_MII_STATUS_ERROR 0x00000001
# define NVNET_MII_STATUS_LINKCHANGE 0x00000008
#define NVNET_UNKNOWN_SETUP_REG4 0x184
# define NVNET_UNKNOWN_SETUP_REG4_VAL 8
#define NVNET_ADAPTER_CONTROL 0x188
# define NVNET_ADAPTER_CONTROL_START 0x00000002
# define NVNET_ADAPTER_CONTROL_LINKUP 0x00000004
# define NVNET_ADAPTER_CONTROL_PHYVALID 0x00004000
# define NVNET_ADAPTER_CONTROL_RUNNING 0x00100000
# define NVNET_ADAPTER_CONTROL_PHYSHIFT 24
#define NVNET_MII_SPEED 0x18C
# define NVNET_MII_SPEED_BIT8 (1 << 8)
# define NVNET_MII_SPEED_DELAY 5
#define NVNET_MDIO_ADDR 0x190
# define NVNET_MDIO_ADDR_INUSE 0x00008000
# define NVNET_MDIO_ADDR_WRITE 0x00000400
# define NVNET_MDIO_ADDR_PHYADDR 0x000003E0
# define NVNET_MDIO_ADDR_PHYREG 0x0000001F
#define NVNET_MDIO_DATA 0x194
#define NVNET_WAKEUPFLAGS 0x200
# define NVNET_WAKEUPFLAGS_VAL 0x00007770
# define NVNET_WAKEUPFLAGS_BUSYSHIFT 24
# define NVNET_WAKEUPFLAGS_ENABLESHIFT 16
# define NVNET_WAKEUPFLAGS_D3SHIFT 12
# define NVNET_WAKEUPFLAGS_D2SHIFT 8
# define NVNET_WAKEUPFLAGS_D1SHIFT 4
# define NVNET_WAKEUPFLAGS_D0SHIFT 0
# define NVNET_WAKEUPFLAGS_ACCEPT_MAGPAT 0x00000001
# define NVNET_WAKEUPFLAGS_ACCEPT_WAKEUPPAT 0x00000002
# define NVNET_WAKEUPFLAGS_ACCEPT_LINKCHANGE 0x00000004
#define NVNET_PATTERN_CRC 0x204
#define NVNET_PATTERN_MASK 0x208
#define NVNET_POWERCAP 0x268
# define NVNET_POWERCAP_D3SUPP (1 << 30)
# define NVNET_POWERCAP_D2SUPP (1 << 26)
# define NVNET_POWERCAP_D1SUPP (1 << 25)
#define NVNET_POWERSTATE 0x26C
# define NVNET_POWERSTATE_POWEREDUP 0x00008000
# define NVNET_POWERSTATE_VALID 0x00000100
# define NVNET_POWERSTATE_MASK 0x00000003
# define NVNET_POWERSTATE_D0 0x00000000
# define NVNET_POWERSTATE_D1 0x00000001
# define NVNET_POWERSTATE_D2 0x00000002
# define NVNET_POWERSTATE_D3 0x00000003
#define NV_TX_LASTPACKET (1 << 0)
#define NV_TX_RETRYERROR (1 << 3)
#define NV_TX_LASTPACKET1 (1 << 8)
#define NV_TX_DEFERRED (1 << 10)
#define NV_TX_CARRIERLOST (1 << 11)
#define NV_TX_LATECOLLISION (1 << 12)
#define NV_TX_UNDERFLOW (1 << 13)
#define NV_TX_ERROR (1 << 14)
#define NV_TX_VALID (1 << 15)
#define NV_RX_DESCRIPTORVALID (1 << 0)
#define NV_RX_MISSEDFRAME (1 << 1)
#define NV_RX_SUBSTRACT1 (1 << 3)
#define NV_RX_BIT4 (1 << 4)
#define NV_RX_ERROR1 (1 << 7)
#define NV_RX_ERROR2 (1 << 8)
#define NV_RX_ERROR3 (1 << 9)
#define NV_RX_ERROR4 (1 << 10)
#define NV_RX_CRCERR (1 << 11)
#define NV_RX_OVERFLOW (1 << 12)
#define NV_RX_FRAMINGERR (1 << 13)
#define NV_RX_ERROR (1 << 14)
#define NV_RX_AVAIL (1 << 15)
/* Miscelaneous hardware related defines: */
#define NV_PCI_REGSZ 0x270
/* various timeout delays: all in usec */
#define NV_TXRX_RESET_DELAY 4
#define NV_TXSTOP_DELAY1 10
#define NV_TXSTOP_DELAY1MAX 500000
#define NV_TXSTOP_DELAY2 100
#define NV_RXSTOP_DELAY1 10
#define NV_RXSTOP_DELAY1MAX 500000
#define NV_RXSTOP_DELAY2 100
#define NV_SETUP5_DELAY 5
#define NV_SETUP5_DELAYMAX 50000
#define NV_POWERUP_DELAY 5
#define NV_POWERUP_DELAYMAX 5000
#define NV_MIIBUSY_DELAY 50
#define NV_MIIPHY_DELAY 10
#define NV_MIIPHY_DELAYMAX 10000
#define NV_WAKEUPPATTERNS 5
#define NV_WAKEUPMASKENTRIES 4
/* General driver defaults */
#define NV_WATCHDOG_TIMEO (2 * HZ)
#define DEFAULT_MTU 1500
#define RX_RING 4
#define TX_RING 2
/* limited to 1 packet until we understand NV_TX_LASTPACKET */
#define TX_LIMIT_STOP 10
#define TX_LIMIT_START 5
/* rx / tx mac addr + type + vlan + align + slack*/
#define RX_NIC_BUFSIZE (DEFAULT_MTU + 64)
/* even more slack */
#define RX_ALLOC_BUFSIZE (DEFAULT_MTU + 128)
#define TX_ALLOC_BUFSIZE (DEFAULT_MTU + 128)
#define OOM_REFILL (1 + HZ / 20)
#define POLL_WAIT (1 + HZ / 100)
/* Link partner ability register. */
#define LPA_SLCT 0x001F /* Same as advertise selector */
#define LPA_RESV 0x1C00 /* Unused... */
#define LPA_RFAULT 0x2000 /* Link partner faulted */
#define LPA_NPAGE 0x8000 /* Next page bit */
// clang-format on
#endif /* HW_NVNET_REGS_H */

View File

@ -1 +0,0 @@
#include "trace/trace-hw_xbox_mcpx_nvnet.h"

1
hw/xbox/mcpx/trace.h Normal file
View File

@ -0,0 +1 @@
#include "trace/trace-hw_xbox_mcpx.h"

View File

@ -5,6 +5,7 @@ specific_ss.add(files(
# 'chihiro.c',
'eeprom_generation.c',
'lpc47m157.c',
'nvnet.c',
'smbus_adm1032.c',
'smbus_cx25871.c',
'smbus_fs454.c',

View File

@ -155,9 +155,8 @@ static inline void nv2a_profile_inc_counter(enum NV2A_PROF_COUNTERS_ENUM cnt)
void nv2a_dbg_renderdoc_init(void);
void *nv2a_dbg_renderdoc_get_api(void);
bool nv2a_dbg_renderdoc_available(void);
void nv2a_dbg_renderdoc_capture_frames(int num_frames, bool trace);
void nv2a_dbg_renderdoc_capture_frames(int num_frames);
extern int renderdoc_capture_frames;
extern bool renderdoc_trace_frames;
#endif
#ifdef __cplusplus

View File

@ -423,7 +423,7 @@ const VMStateDescription vmstate_nv2a_pgraph_vertex_attributes = {
static const VMStateDescription vmstate_nv2a = {
.name = "nv2a",
.version_id = 3,
.version_id = 2,
.minimum_version_id = 1,
.post_save = nv2a_post_save,
.post_load = nv2a_post_load,
@ -507,11 +507,9 @@ static const VMStateDescription vmstate_nv2a = {
VMSTATE_BOOL_ARRAY(pgraph.ltc1_dirty, NV2AState, NV2A_LTC1_COUNT),
VMSTATE_STRUCT_ARRAY(pgraph.vertex_attributes, NV2AState, NV2A_VERTEXSHADER_ATTRIBUTES, 1, vmstate_nv2a_pgraph_vertex_attributes, VertexAttribute),
VMSTATE_UINT32(pgraph.inline_array_length, NV2AState),
VMSTATE_UINT32_SUB_ARRAY(pgraph.inline_array, NV2AState, 0, NV2A_MAX_BATCH_LENGTH_V2),
VMSTATE_UINT32_SUB_ARRAY_V(pgraph.inline_array, NV2AState, NV2A_MAX_BATCH_LENGTH_V2, NV2A_MAX_BATCH_LENGTH - NV2A_MAX_BATCH_LENGTH_V2, 3),
VMSTATE_UINT32_ARRAY(pgraph.inline_array, NV2AState, NV2A_MAX_BATCH_LENGTH),
VMSTATE_UINT32(pgraph.inline_elements_length, NV2AState), // fixme
VMSTATE_UINT32_SUB_ARRAY(pgraph.inline_elements, NV2AState, 0, NV2A_MAX_BATCH_LENGTH_V2),
VMSTATE_UINT32_SUB_ARRAY_V(pgraph.inline_elements, NV2AState, NV2A_MAX_BATCH_LENGTH_V2, NV2A_MAX_BATCH_LENGTH - NV2A_MAX_BATCH_LENGTH_V2, 3),
VMSTATE_UINT32_ARRAY(pgraph.inline_elements, NV2AState, NV2A_MAX_BATCH_LENGTH),
VMSTATE_UINT32(pgraph.inline_buffer_length, NV2AState), // fixme
VMSTATE_UINT32(pgraph.draw_arrays_length, NV2AState),
VMSTATE_UINT32(pgraph.draw_arrays_max_count, NV2AState),

View File

@ -315,14 +315,11 @@
#define NV_PGRAPH_CSV0_C 0x00000FB8
# define NV_PGRAPH_CSV0_C_CHEOPS_PROGRAM_START 0x0000FF00
# define NV_PGRAPH_CSV0_C_SPECULAR_ENABLE (1 << 16)
# define NV_PGRAPH_CSV0_C_ALPHA_FROM_MATERIAL_SPECULAR (1 << 17)
# define NV_PGRAPH_CSV0_C_SEPARATE_SPECULAR (1 << 18)
# define NV_PGRAPH_CSV0_C_SPECULAR (3 << 19)
# define NV_PGRAPH_CSV0_C_DIFFUSE (3 << 21)
# define NV_PGRAPH_CSV0_C_AMBIENT (3 << 23)
# define NV_PGRAPH_CSV0_C_EMISSION (3 << 25)
# define NV_PGRAPH_CSV0_C_NORMALIZATION_ENABLE (1 << 27)
# define NV_PGRAPH_CSV0_C_LOCALEYE (1 << 30)
# define NV_PGRAPH_CSV0_C_LIGHTING (1 << 31)
#define NV_PGRAPH_CSV1_B 0x00000FBC
#define NV_PGRAPH_CSV1_A 0x00000FC0
@ -409,10 +406,6 @@
# define NV_PGRAPH_CLEARRECTY_YMIN 0x00000FFF
# define NV_PGRAPH_CLEARRECTY_YMAX 0x0FFF0000
#define NV_PGRAPH_COLORCLEARVALUE 0x0000186C
#define NV_PGRAPH_COLORKEYCOLOR0 0x00001870
#define NV_PGRAPH_COLORKEYCOLOR1 0x00001874
#define NV_PGRAPH_COLORKEYCOLOR2 0x00001878
#define NV_PGRAPH_COLORKEYCOLOR3 0x0000187C
#define NV_PGRAPH_COMBINEFACTOR0 0x00001880
#define NV_PGRAPH_COMBINEFACTOR1 0x000018A0
#define NV_PGRAPH_COMBINEALPHAI0 0x000018C0
@ -537,7 +530,6 @@
#define NV_PGRAPH_TEXADDRESS2 0x000019C4
#define NV_PGRAPH_TEXADDRESS3 0x000019C8
#define NV_PGRAPH_TEXCTL0_0 0x000019CC
# define NV_PGRAPH_TEXCTL0_0_COLORKEYMODE 0x03
# define NV_PGRAPH_TEXCTL0_0_ALPHAKILLEN (1 << 2)
# define NV_PGRAPH_TEXCTL0_0_MAX_LOD_CLAMP 0x0003FFC0
# define NV_PGRAPH_TEXCTL0_0_MIN_LOD_CLAMP 0x3FFC0000
@ -890,10 +882,6 @@
# define NV097_SET_CONTROL0_STENCIL_WRITE_ENABLE (1 << 0)
# define NV097_SET_CONTROL0_Z_FORMAT (1 << 12)
# define NV097_SET_CONTROL0_Z_PERSPECTIVE_ENABLE (1 << 16)
# define NV097_SET_LIGHT_CONTROL 0x00000294
# define NV097_SET_LIGHT_CONTROL_SEPARATE_SPECULAR 1
# define NV097_SET_LIGHT_CONTROL_LOCALEYE (1 << 16)
# define NV097_SET_LIGHT_CONTROL_ALPHA_FROM_MATERIAL_SPECULAR (1 << 17)
# define NV097_SET_COLOR_MATERIAL 0x00000298
# define NV097_SET_FOG_MODE 0x0000029C
# define NV097_SET_FOG_MODE_V_LINEAR 0x2601
@ -1061,7 +1049,6 @@
# define NV097_SET_TEXGEN_VIEW_MODEL_LOCAL_VIEWER 0
# define NV097_SET_TEXGEN_VIEW_MODEL_INFINITE_VIEWER 1
# define NV097_SET_FOG_PLANE 0x000009D0
# define NV097_SET_SPECULAR_PARAMS 0x000009E0
# define NV097_SET_SCENE_AMBIENT_COLOR 0x00000A10
# define NV097_SET_VIEWPORT_OFFSET 0x00000A20
# define NV097_SET_POINT_PARAMS 0x00000A30
@ -1070,7 +1057,6 @@
# define NV097_SET_COMBINER_FACTOR1 0x00000A80
# define NV097_SET_COMBINER_ALPHA_OCW 0x00000AA0
# define NV097_SET_COMBINER_COLOR_ICW 0x00000AC0
# define NV097_SET_COLOR_KEY_COLOR 0x00000AE0
# define NV097_SET_VIEWPORT_SCALE 0x00000AF0
# define NV097_SET_TRANSFORM_PROGRAM 0x00000B00
# define NV097_SET_TRANSFORM_CONSTANT 0x00000B80
@ -1113,11 +1099,6 @@
# define NV097_SET_TEXCOORD3_4F 0x00001620
# define NV097_SET_TEXCOORD3_2S 0x00001610
# define NV097_SET_TEXCOORD3_4S 0x00001630
# define NV097_SET_FOG_COORD 0x00001698
# define NV097_SET_WEIGHT1F 0x0000169C
# define NV097_SET_WEIGHT2F 0x000016A0
# define NV097_SET_WEIGHT3F 0x000016B0
# define NV097_SET_WEIGHT4F 0x000016C0
# define NV097_SET_VERTEX_DATA_ARRAY_OFFSET 0x00001720
# define NV097_SET_VERTEX_DATA_ARRAY_FORMAT 0x00001760
# define NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE 0x0000000F
@ -1271,7 +1252,6 @@
# define NV097_SET_CLEAR_RECT_HORIZONTAL 0x00001D98
# define NV097_SET_CLEAR_RECT_VERTICAL 0x00001D9C
# define NV097_SET_SPECULAR_FOG_FACTOR 0x00001E20
# define NV097_SET_SPECULAR_PARAMS_BACK 0x00001E28
# define NV097_SET_COMBINER_COLOR_OCW 0x00001E40
# define NV097_SET_COMBINER_CONTROL 0x00001E60
# define NV097_SET_SHADOW_ZSLOPE_THRESHOLD 0x00001E68
@ -1473,22 +1453,7 @@
#define NV2A_NUM_SUBCHANNELS 8
#define NV2A_CACHE1_SIZE 128
/* This is a multi-use limit. Testing on an Xbox 1.0, it is possible to send
* arrays of at least 0x0FFFFF elements without issue, however sending
* NV097_DRAW_ARRAYS with a start value > 0xFFFF raises an exception implying
* that there may be a vertex limit. Since xemu uses batch length for vertex
* elements in NV097_INLINE_ARRAY the size should ideally be high enough to
* accommodate 0xFFFF vertices with maximum attributes specified.
*
* Retail games are known to send at least 0x410FA elements in a single draw, so
* a somewhat larger value is selected to balance memory use with real-world
* limits.
*
* NV2A_MAX_BATCH_LENGTH_V2 is the previous limit, for migration.
* FIXME: Remove NV2A_MAX_BATCH_LENGTH_V2 at some point in the future.
*/
#define NV2A_MAX_BATCH_LENGTH 0x07FFFF
#define NV2A_MAX_BATCH_LENGTH_V2 0x1FFFF
#define NV2A_MAX_BATCH_LENGTH 0x1FFFF
#define NV2A_VERTEXSHADER_ATTRIBUTES 16
#define NV2A_MAX_TEXTURES 4

View File

@ -36,7 +36,6 @@
static RENDERDOC_API_1_6_0 *rdoc_api = NULL;
int renderdoc_capture_frames = 0;
bool renderdoc_trace_frames = false;
void nv2a_dbg_renderdoc_init(void)
{
@ -90,8 +89,7 @@ bool nv2a_dbg_renderdoc_available(void)
return rdoc_api != NULL;
}
void nv2a_dbg_renderdoc_capture_frames(int num_frames, bool trace)
void nv2a_dbg_renderdoc_capture_frames(int num_frames)
{
renderdoc_capture_frames += num_frames;
renderdoc_trace_frames = trace;
}

View File

@ -298,7 +298,7 @@ static const SurfaceFormatInfo kelvin_surface_color_format_gl_map[] = {
[NV097_SET_SURFACE_FORMAT_COLOR_LE_B8] =
{1, GL_R8, GL_RED, GL_UNSIGNED_BYTE, GL_COLOR_ATTACHMENT0},
[NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8] =
{2, GL_RG8, GL_RG, GL_UNSIGNED_BYTE, GL_COLOR_ATTACHMENT0},
{2, GL_RG8, GL_RG, GL_UNSIGNED_SHORT, GL_COLOR_ATTACHMENT0},
};
static const SurfaceFormatInfo kelvin_surface_zeta_float_format_gl_map[] = {

View File

@ -29,8 +29,6 @@
#include <assert.h>
#ifdef CONFIG_RENDERDOC
#include "trace/control.h"
#pragma GCC diagnostic ignored "-Wstrict-prototypes"
#include "thirdparty/renderdoc_app.h"
#endif
@ -156,8 +154,7 @@ void gl_debug_frame_terminator(void)
RENDERDOC_API_1_6_0 *rdoc_api = nv2a_dbg_renderdoc_get_api();
if (rdoc_api->IsTargetControlConnected()) {
bool capturing = rdoc_api->IsFrameCapturing();
if (capturing && renderdoc_capture_frames == 0) {
if (rdoc_api->IsFrameCapturing()) {
rdoc_api->EndFrameCapture(NULL, NULL);
GLenum error = glGetError();
if (error != GL_NO_ERROR) {
@ -165,16 +162,8 @@ void gl_debug_frame_terminator(void)
"Renderdoc EndFrameCapture triggered GL error 0x%X - ignoring\n",
error);
}
if (renderdoc_trace_frames) {
trace_enable_events("-nv2a_pgraph_*");
renderdoc_trace_frames = false;
}
}
if (renderdoc_capture_frames > 0) {
if (!capturing) {
if (renderdoc_trace_frames) {
trace_enable_events("nv2a_pgraph_*");
}
rdoc_api->StartFrameCapture(NULL, NULL);
GLenum error = glGetError();
if (error != GL_NO_ERROR) {
@ -182,7 +171,6 @@ void gl_debug_frame_terminator(void)
"Renderdoc StartFrameCapture triggered GL error 0x%X - ignoring\n",
error);
}
}
--renderdoc_capture_frames;
}
}

View File

@ -32,10 +32,10 @@
#include "config-host.h"
void gl_debug_initialize(void);
void gl_debug_message(bool cc, const char *fmt, ...) __attribute__ ((format (printf, 2, 3)));
void gl_debug_group_begin(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
void gl_debug_message(bool cc, const char *fmt, ...);
void gl_debug_group_begin(const char *fmt, ...);
void gl_debug_group_end(void);
void gl_debug_label(GLenum target, GLuint name, const char *fmt, ...) __attribute__ ((format (printf, 3, 4)));
void gl_debug_label(GLenum target, GLuint name, const char *fmt, ...);
void gl_debug_frame_terminator(void);
# define NV2A_GL_DPRINTF(cc, format, ...) \

View File

@ -68,7 +68,7 @@ void pgraph_gl_init_display(NV2AState *d)
"{\n"
" vec2 texCoord = gl_FragCoord.xy/display_size;\n"
" float rel = display_size.y/textureSize(tex, 0).y/line_offset;\n"
" texCoord.y = rel*(1.0f - texCoord.y);"
" texCoord.y = 1 + rel*(texCoord.y - 1);"
" out_Color.rgba = texture(tex, texCoord);\n"
" if (pvideo_enable) {\n"
" vec2 screenCoord = gl_FragCoord.xy - 0.5;\n"

View File

@ -92,6 +92,7 @@ void pgraph_gl_clear_surface(NV2AState *d, uint32_t parameter)
scissor_height = ymax - ymin + 1;
pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin);
pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height);
ymin = pg->surface_binding_dim.height - (ymin + scissor_height);
NV2A_DPRINTF("Translated clear rect to %d,%d - %d,%d\n", xmin, ymin,
xmin + scissor_width - 1, ymin + scissor_height - 1);
@ -203,10 +204,9 @@ void pgraph_gl_draw_begin(NV2AState *d)
}
/* Front-face select */
/* Winding is reverse here because clip-space y-coordinates are inverted */
glFrontFace(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER)
& NV_PGRAPH_SETUPRASTER_FRONTFACE
? GL_CW : GL_CCW);
? GL_CCW : GL_CW);
/* Polygon offset */
/* FIXME: GL implementation-specific, maybe do this in VS? */
@ -340,6 +340,7 @@ void pgraph_gl_draw_begin(NV2AState *d)
pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin);
pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height);
ymin = pg->surface_binding_dim.height - (ymin + scissor_height);
pgraph_apply_scaling_factor(pg, &xmin, &ymin);
pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height);

View File

@ -33,7 +33,7 @@
#include "hw/xbox/nv2a/nv2a_regs.h"
#include "hw/xbox/nv2a/pgraph/surface.h"
#include "hw/xbox/nv2a/pgraph/texture.h"
#include "hw/xbox/nv2a/pgraph/glsl/shaders.h"
#include "hw/xbox/nv2a/pgraph/shaders.h"
#include "gloffscreen.h"
#include "constants.h"
@ -82,30 +82,6 @@ typedef struct TextureBinding {
GLuint gl_texture;
} TextureBinding;
typedef struct ShaderModuleCacheKey {
GLenum kind;
union {
struct {
VshState state;
GenVshGlslOptions glsl_opts;
} vsh;
struct {
GeomState state;
GenGeomGlslOptions glsl_opts;
} geom;
struct {
PshState state;
GenPshGlslOptions glsl_opts;
} psh;
};
} ShaderModuleCacheKey;
typedef struct ShaderModuleCacheEntry {
LruNode node;
ShaderModuleCacheKey key;
GLuint gl_shader;
} ShaderModuleCacheEntry;
typedef struct ShaderBinding {
LruNode node;
bool initialized;
@ -120,10 +96,36 @@ typedef struct ShaderBinding {
GLuint gl_program;
GLenum gl_primitive_mode;
struct {
PshUniformLocs psh;
VshUniformLocs vsh;
} uniform_locs;
GLint psh_constant_loc[9][2];
GLint alpha_ref_loc;
GLint bump_mat_loc[NV2A_MAX_TEXTURES];
GLint bump_scale_loc[NV2A_MAX_TEXTURES];
GLint bump_offset_loc[NV2A_MAX_TEXTURES];
GLint tex_scale_loc[NV2A_MAX_TEXTURES];
GLint surface_size_loc;
GLint clip_range_loc;
GLint depth_offset_loc;
GLint vsh_constant_loc[NV2A_VERTEXSHADER_CONSTANTS];
uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];
GLint inv_viewport_loc;
GLint ltctxa_loc[NV2A_LTCTXA_COUNT];
GLint ltctxb_loc[NV2A_LTCTXB_COUNT];
GLint ltc1_loc[NV2A_LTC1_COUNT];
GLint fog_color_loc;
GLint fog_param_loc;
GLint light_infinite_half_vector_loc[NV2A_MAX_LIGHTS];
GLint light_infinite_direction_loc[NV2A_MAX_LIGHTS];
GLint light_local_position_loc[NV2A_MAX_LIGHTS];
GLint light_local_attenuation_loc[NV2A_MAX_LIGHTS];
GLint clip_region_loc[8];
GLint material_alpha_loc;
} ShaderBinding;
typedef struct VertexKey {
@ -199,9 +201,6 @@ typedef struct PGRAPHGLState {
QemuMutex shader_cache_lock;
QemuThread shader_disk_thread;
Lru shader_module_cache;
ShaderModuleCacheEntry *shader_module_cache_entries;
unsigned int zpass_pixel_count_result;
unsigned int gl_zpass_pixel_count_query_count;
GLuint *gl_zpass_pixel_count_queries;

View File

@ -22,9 +22,14 @@
#include "qemu/osdep.h"
#include "qemu/fast-hash.h"
#include "qemu/mstring.h"
#include <locale.h>
#include "xemu-version.h"
#include "ui/xemu-settings.h"
#include "hw/xbox/nv2a/pgraph/glsl/geom.h"
#include "hw/xbox/nv2a/pgraph/glsl/vsh.h"
#include "hw/xbox/nv2a/pgraph/glsl/psh.h"
#include "hw/xbox/nv2a/pgraph/shaders.h"
#include "hw/xbox/nv2a/pgraph/util.h"
#include "debug.h"
#include "renderer.h"
@ -95,131 +100,154 @@ static GLuint create_gl_shader(GLenum gl_shader_type,
return shader;
}
static void set_texture_sampler_uniforms(ShaderBinding *binding)
static void update_shader_constant_locations(ShaderBinding *binding)
{
char tmp[64];
/* set texture samplers */
for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
char samplerName[16];
snprintf(samplerName, sizeof(samplerName), "texSamp%d", i);
GLint texSampLoc =
glGetUniformLocation(binding->gl_program, samplerName);
GLint texSampLoc = glGetUniformLocation(binding->gl_program, samplerName);
if (texSampLoc >= 0) {
glUniform1i(texSampLoc, i);
}
}
}
static void update_shader_uniform_locs(ShaderBinding *binding)
{
char tmp[64];
for (int i = 0; i < ARRAY_SIZE(binding->uniform_locs.vsh); i++) {
const char *name = VshUniformInfo[i].name;
if (VshUniformInfo[i].count > 1) {
snprintf(tmp, sizeof(tmp), "%s[0]", name);
name = tmp;
}
binding->uniform_locs.vsh[i] = glGetUniformLocation(binding->gl_program, name);
/* validate the program */
glValidateProgram(binding->gl_program);
GLint valid = 0;
glGetProgramiv(binding->gl_program, GL_VALIDATE_STATUS, &valid);
if (!valid) {
GLchar log[1024];
glGetProgramInfoLog(binding->gl_program, 1024, NULL, log);
fprintf(stderr, "nv2a: shader validation failed: %s\n", log);
abort();
}
for (int i = 0; i < ARRAY_SIZE(binding->uniform_locs.psh); i++) {
const char *name = PshUniformInfo[i].name;
if (PshUniformInfo[i].count > 1) {
snprintf(tmp, sizeof(tmp), "%s[0]", name);
name = tmp;
/* lookup fragment shader uniforms */
for (int i = 0; i < 9; i++) {
for (int j = 0; j < 2; j++) {
snprintf(tmp, sizeof(tmp), "c%d_%d", j, i);
binding->psh_constant_loc[i][j] = glGetUniformLocation(binding->gl_program, tmp);
}
binding->uniform_locs.psh[i] = glGetUniformLocation(binding->gl_program, name);
}
binding->alpha_ref_loc = glGetUniformLocation(binding->gl_program, "alphaRef");
for (int i = 1; i < NV2A_MAX_TEXTURES; i++) {
snprintf(tmp, sizeof(tmp), "bumpMat%d", i);
binding->bump_mat_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
snprintf(tmp, sizeof(tmp), "bumpScale%d", i);
binding->bump_scale_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
snprintf(tmp, sizeof(tmp), "bumpOffset%d", i);
binding->bump_offset_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
}
for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
snprintf(tmp, sizeof(tmp), "texScale%d", i);
binding->tex_scale_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
}
/* lookup vertex shader uniforms */
for (int i = 0; i < NV2A_VERTEXSHADER_CONSTANTS; i++) {
snprintf(tmp, sizeof(tmp), "c[%d]", i);
binding->vsh_constant_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
}
binding->surface_size_loc = glGetUniformLocation(binding->gl_program, "surfaceSize");
binding->clip_range_loc = glGetUniformLocation(binding->gl_program, "clipRange");
binding->depth_offset_loc = glGetUniformLocation(binding->gl_program, "depthOffset");
binding->fog_color_loc = glGetUniformLocation(binding->gl_program, "fogColor");
binding->fog_param_loc = glGetUniformLocation(binding->gl_program, "fogParam");
binding->inv_viewport_loc = glGetUniformLocation(binding->gl_program, "invViewport");
for (int i = 0; i < NV2A_LTCTXA_COUNT; i++) {
snprintf(tmp, sizeof(tmp), "ltctxa[%d]", i);
binding->ltctxa_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
}
for (int i = 0; i < NV2A_LTCTXB_COUNT; i++) {
snprintf(tmp, sizeof(tmp), "ltctxb[%d]", i);
binding->ltctxb_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
}
for (int i = 0; i < NV2A_LTC1_COUNT; i++) {
snprintf(tmp, sizeof(tmp), "ltc1[%d]", i);
binding->ltc1_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
}
for (int i = 0; i < NV2A_MAX_LIGHTS; i++) {
snprintf(tmp, sizeof(tmp), "lightInfiniteHalfVector%d", i);
binding->light_infinite_half_vector_loc[i] =
glGetUniformLocation(binding->gl_program, tmp);
snprintf(tmp, sizeof(tmp), "lightInfiniteDirection%d", i);
binding->light_infinite_direction_loc[i] =
glGetUniformLocation(binding->gl_program, tmp);
snprintf(tmp, sizeof(tmp), "lightLocalPosition%d", i);
binding->light_local_position_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
snprintf(tmp, sizeof(tmp), "lightLocalAttenuation%d", i);
binding->light_local_attenuation_loc[i] =
glGetUniformLocation(binding->gl_program, tmp);
}
for (int i = 0; i < 8; i++) {
snprintf(tmp, sizeof(tmp), "clipRegion[%d]", i);
binding->clip_region_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
}
if (binding->state.fixed_function) {
binding->material_alpha_loc =
glGetUniformLocation(binding->gl_program, "material_alpha");
} else {
binding->material_alpha_loc = -1;
}
}
static void shader_module_cache_entry_init(Lru *lru, LruNode *node,
const void *key)
static void generate_shaders(ShaderBinding *binding)
{
ShaderModuleCacheEntry *module =
container_of(node, ShaderModuleCacheEntry, node);
memcpy(&module->key, key, sizeof(ShaderModuleCacheKey));
const char *kind_str;
MString *code;
switch (module->key.kind) {
case GL_VERTEX_SHADER:
kind_str = "vertex shader";
code = pgraph_glsl_gen_vsh(&module->key.vsh.state,
module->key.vsh.glsl_opts);
break;
case GL_GEOMETRY_SHADER:
kind_str = "geometry shader";
code = pgraph_glsl_gen_geom(&module->key.geom.state,
module->key.geom.glsl_opts);
break;
case GL_FRAGMENT_SHADER:
kind_str = "fragment shader";
code = pgraph_glsl_gen_psh(&module->key.psh.state,
module->key.psh.glsl_opts);
break;
default:
assert(!"Invalid shader module kind");
kind_str = "unknown";
code = NULL;
char *previous_numeric_locale = setlocale(LC_NUMERIC, NULL);
if (previous_numeric_locale) {
previous_numeric_locale = g_strdup(previous_numeric_locale);
}
module->gl_shader =
create_gl_shader(module->key.kind, mstring_get_str(code), kind_str);
mstring_unref(code);
}
static void shader_module_cache_entry_post_evict(Lru *lru, LruNode *node)
{
ShaderModuleCacheEntry *module =
container_of(node, ShaderModuleCacheEntry, node);
glDeleteShader(module->gl_shader);
}
static bool shader_module_cache_entry_compare(Lru *lru, LruNode *node,
const void *key)
{
ShaderModuleCacheEntry *module =
container_of(node, ShaderModuleCacheEntry, node);
return memcmp(&module->key, key, sizeof(ShaderModuleCacheKey));
}
static GLuint get_shader_module_for_key(PGRAPHGLState *r,
const ShaderModuleCacheKey *key)
{
uint64_t hash = fast_hash((void *)key, sizeof(ShaderModuleCacheKey));
LruNode *node = lru_lookup(&r->shader_module_cache, hash, key);
ShaderModuleCacheEntry *module =
container_of(node, ShaderModuleCacheEntry, node);
return module->gl_shader;
}
static void generate_shaders(PGRAPHGLState *r, ShaderBinding *binding)
{
/* Ensure numeric values are printed with '.' radix, no grouping */
setlocale(LC_NUMERIC, "C");
GLuint program = glCreateProgram();
ShaderState *state = &binding->state;
ShaderModuleCacheKey key;
bool need_geometry_shader = pgraph_glsl_need_geom(&state->geom);
if (need_geometry_shader) {
memset(&key, 0, sizeof(key));
key.kind = GL_GEOMETRY_SHADER;
key.geom.state = state->geom;
glAttachShader(program, get_shader_module_for_key(r, &key));
/* Create an optional geometry shader and find primitive type */
GLenum gl_primitive_mode =
get_gl_primitive_mode(state->polygon_front_mode, state->primitive_mode);
MString* geometry_shader_code =
pgraph_gen_geom_glsl(state->polygon_front_mode,
state->polygon_back_mode,
state->primitive_mode,
state->smooth_shading,
false);
if (geometry_shader_code) {
const char* geometry_shader_code_str =
mstring_get_str(geometry_shader_code);
GLuint geometry_shader = create_gl_shader(GL_GEOMETRY_SHADER,
geometry_shader_code_str,
"geometry shader");
glAttachShader(program, geometry_shader);
mstring_unref(geometry_shader_code);
}
/* create the vertex shader */
memset(&key, 0, sizeof(key));
key.kind = GL_VERTEX_SHADER;
key.vsh.state = state->vsh;
key.vsh.glsl_opts.prefix_outputs = need_geometry_shader;
glAttachShader(program, get_shader_module_for_key(r, &key));
MString *vertex_shader_code =
pgraph_gen_vsh_glsl(state, geometry_shader_code != NULL);
GLuint vertex_shader = create_gl_shader(GL_VERTEX_SHADER,
mstring_get_str(vertex_shader_code),
"vertex shader");
glAttachShader(program, vertex_shader);
mstring_unref(vertex_shader_code);
/* generate a fragment shader from register combiners */
memset(&key, 0, sizeof(key));
key.kind = GL_FRAGMENT_SHADER;
key.psh.state = state->psh;
glAttachShader(program, get_shader_module_for_key(r, &key));
MString *fragment_shader_code = pgraph_gen_psh_glsl(state->psh);
const char *fragment_shader_code_str =
mstring_get_str(fragment_shader_code);
GLuint fragment_shader = create_gl_shader(GL_FRAGMENT_SHADER,
fragment_shader_code_str,
"fragment shader");
glAttachShader(program, fragment_shader);
mstring_unref(fragment_shader_code);
/* link the program */
glLinkProgram(program);
@ -234,25 +262,15 @@ static void generate_shaders(PGRAPHGLState *r, ShaderBinding *binding)
glUseProgram(program);
binding->gl_program = program;
binding->gl_primitive_mode = get_gl_primitive_mode(
state->geom.polygon_front_mode, state->geom.primitive_mode);
binding->initialized = true;
binding->gl_program = program;
binding->gl_primitive_mode = gl_primitive_mode;
update_shader_constant_locations(binding);
set_texture_sampler_uniforms(binding);
/* validate the program */
GLint valid = 0;
glValidateProgram(program);
glGetProgramiv(program, GL_VALIDATE_STATUS, &valid);
if (!valid) {
GLchar log[1024];
glGetProgramInfoLog(program, 1024, NULL, log);
fprintf(stderr, "nv2a: shader validation failed: %s\n", log);
abort();
if (previous_numeric_locale) {
setlocale(LC_NUMERIC, previous_numeric_locale);
g_free(previous_numeric_locale);
}
update_shader_uniform_locs(binding);
}
static const char *shader_gl_vendor = NULL;
@ -328,19 +346,6 @@ bool pgraph_gl_shader_load_from_memory(ShaderBinding *binding)
return false;
}
glUseProgram(gl_program);
g_free(binding->program);
binding->program = NULL;
binding->gl_program = gl_program;
binding->gl_primitive_mode =
get_gl_primitive_mode(binding->state.geom.polygon_front_mode,
binding->state.geom.primitive_mode);
binding->initialized = true;
set_texture_sampler_uniforms(binding);
glValidateProgram(gl_program);
GLint valid = 0;
glGetProgramiv(gl_program, GL_VALIDATE_STATUS, &valid);
@ -352,7 +357,17 @@ bool pgraph_gl_shader_load_from_memory(ShaderBinding *binding)
return false;
}
update_shader_uniform_locs(binding);
glUseProgram(gl_program);
binding->gl_program = gl_program;
binding->gl_primitive_mode = get_gl_primitive_mode(
binding->state.polygon_front_mode, binding->state.primitive_mode);
binding->initialized = true;
g_free(binding->program);
binding->program = NULL;
update_shader_constant_locations(binding);
return true;
}
@ -492,7 +507,7 @@ static void *shader_reload_lru_from_disk(void *arg)
return NULL;
}
static void shader_cache_entry_init(Lru *lru, LruNode *node, const void *state)
static void shader_cache_entry_init(Lru *lru, LruNode *node, void *state)
{
ShaderBinding *binding = container_of(node, ShaderBinding, node);
memcpy(&binding->state, state, sizeof(ShaderState));
@ -522,7 +537,7 @@ static void shader_cache_entry_post_evict(Lru *lru, LruNode *node)
memset(&binding->state, 0, sizeof(ShaderState));
}
static bool shader_cache_entry_compare(Lru *lru, LruNode *node, const void *key)
static bool shader_cache_entry_compare(Lru *lru, LruNode *node, void *key)
{
ShaderBinding *binding = container_of(node, ShaderBinding, node);
return memcmp(&binding->state, key, sizeof(ShaderState));
@ -556,20 +571,6 @@ void pgraph_gl_init_shaders(PGRAPHState *pg)
qemu_thread_create(&r->shader_disk_thread, "pgraph.renderer_state->shader_cache",
shader_reload_lru_from_disk, pg, QEMU_THREAD_JOINABLE);
/* FIXME: Make this configurable */
const size_t shader_module_cache_size = 50*1024;
lru_init(&r->shader_module_cache);
r->shader_module_cache_entries =
g_malloc_n(shader_module_cache_size, sizeof(ShaderModuleCacheEntry));
assert(r->shader_module_cache_entries != NULL);
for (int i = 0; i < shader_module_cache_size; i++) {
lru_add_free(&r->shader_module_cache, &r->shader_module_cache_entries[i].node);
}
r->shader_module_cache.init_node = shader_module_cache_entry_init;
r->shader_module_cache.compare_nodes = shader_module_cache_entry_compare;
r->shader_module_cache.post_node_evict = shader_module_cache_entry_post_evict;
}
void pgraph_gl_finalize_shaders(PGRAPHState *pg)
@ -581,10 +582,6 @@ void pgraph_gl_finalize_shaders(PGRAPHState *pg)
free(r->shader_cache_entries);
r->shader_cache_entries = NULL;
lru_flush(&r->shader_module_cache);
g_free(r->shader_module_cache_entries);
r->shader_module_cache_entries = NULL;
qemu_mutex_destroy(&r->shader_cache_lock);
}
@ -688,72 +685,341 @@ void pgraph_gl_shader_cache_to_disk(ShaderBinding *binding)
qemu_thread_create(binding->save_thread, name, shader_write_to_disk, binding, QEMU_THREAD_JOINABLE);
}
static void apply_uniform_updates(const UniformInfo *info, int *locs,
void *values, size_t count)
{
for (int i = 0; i < count; i++) {
if (locs[i] == -1) {
continue;
}
void *value = (char*)values + info[i].val_offs;
switch (info[i].type) {
case UniformElementType_uint:
glUniform1uiv(locs[i], info[i].count, value);
break;
case UniformElementType_int:
glUniform1iv(locs[i], info[i].count, value);
break;
case UniformElementType_ivec4:
glUniform4iv(locs[i], info[i].count, value);
break;
case UniformElementType_float:
glUniform1fv(locs[i], info[i].count, value);
break;
case UniformElementType_vec2:
glUniform2fv(locs[i], info[i].count, value);
break;
case UniformElementType_vec3:
glUniform3fv(locs[i], info[i].count, value);
break;
case UniformElementType_vec4:
glUniform4fv(locs[i], info[i].count, value);
break;
case UniformElementType_mat2:
glUniformMatrix2fv(locs[i], info[i].count, GL_FALSE, value);
break;
default:
g_assert_not_reached();
}
}
assert(glGetError() == GL_NO_ERROR);
}
// FIXME: Dirty tracking
// FIXME: Consider UBO to align with VK renderer
static void update_shader_uniforms(PGRAPHState *pg, ShaderBinding *binding)
static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding,
bool binding_changed)
{
PGRAPHGLState *r = pg->gl_renderer_state;
int i, j;
VshUniformValues vsh_values;
pgraph_glsl_set_vsh_uniform_values(pg, &binding->state.vsh,
binding->uniform_locs.vsh, &vsh_values);
apply_uniform_updates(VshUniformInfo, binding->uniform_locs.vsh,
&vsh_values, VshUniform__COUNT);
/* update combiner constants */
for (i = 0; i < 9; i++) {
uint32_t constant[2];
if (i == 8) {
/* final combiner */
constant[0] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR0);
constant[1] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR1);
} else {
constant[0] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR0 + i * 4);
constant[1] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR1 + i * 4);
}
PshUniformValues psh_values;
pgraph_glsl_set_psh_uniform_values(pg, binding->uniform_locs.psh, &psh_values);
for (int i = 0; i < 4; i++) {
if (r->texture_binding[i] != NULL) {
float scale = r->texture_binding[i]->scale;
psh_values.texScale[i] = scale;
for (j = 0; j < 2; j++) {
GLint loc = binding->psh_constant_loc[i][j];
if (loc != -1) {
float value[4];
pgraph_argb_pack32_to_rgba_float(constant[j], value);
glUniform4fv(loc, 1, value);
}
}
apply_uniform_updates(PshUniformInfo, binding->uniform_locs.psh,
&psh_values, PshUniform__COUNT);
}
if (binding->alpha_ref_loc != -1) {
int alpha_ref = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0),
NV_PGRAPH_CONTROL_0_ALPHAREF);
glUniform1i(binding->alpha_ref_loc, alpha_ref);
}
/* For each texture stage */
for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
GLint loc;
/* Bump luminance only during stages 1 - 3 */
if (i > 0) {
loc = binding->bump_mat_loc[i];
if (loc != -1) {
uint32_t m_u32[4];
m_u32[0] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT00 + 4 * (i - 1));
m_u32[1] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT01 + 4 * (i - 1));
m_u32[2] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT10 + 4 * (i - 1));
m_u32[3] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT11 + 4 * (i - 1));
float m[4];
m[0] = *(float*)&m_u32[0];
m[1] = *(float*)&m_u32[1];
m[2] = *(float*)&m_u32[2];
m[3] = *(float*)&m_u32[3];
glUniformMatrix2fv(loc, 1, GL_FALSE, m);
}
loc = binding->bump_scale_loc[i];
if (loc != -1) {
uint32_t v =
pgraph_reg_r(pg, NV_PGRAPH_BUMPSCALE1 + (i - 1) * 4);
glUniform1f(loc, *(float*)&v);
}
loc = binding->bump_offset_loc[i];
if (loc != -1) {
uint32_t v =
pgraph_reg_r(pg, NV_PGRAPH_BUMPOFFSET1 + (i - 1) * 4);
glUniform1f(loc, *(float*)&v);
}
}
loc = r->shader_binding->tex_scale_loc[i];
if (loc != -1) {
assert(r->texture_binding[i] != NULL);
glUniform1f(loc, (float)r->texture_binding[i]->scale);
}
}
if (binding->fog_color_loc != -1) {
uint32_t fog_color = pgraph_reg_r(pg, NV_PGRAPH_FOGCOLOR);
glUniform4f(binding->fog_color_loc,
GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_RED) / 255.0,
GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_GREEN) / 255.0,
GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_BLUE) / 255.0,
GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_ALPHA) / 255.0);
}
if (binding->fog_param_loc != -1) {
uint32_t v[2];
v[0] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM0);
v[1] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM1);
glUniform2f(binding->fog_param_loc, *(float *)&v[0], *(float *)&v[1]);
}
float zmax;
switch (pg->surface_shape.zeta_format) {
case NV097_SET_SURFACE_FORMAT_ZETA_Z16:
zmax = pg->surface_shape.z_format ? f16_max : (float)0xFFFF;
break;
case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8:
zmax = pg->surface_shape.z_format ? f24_max : (float)0xFFFFFF;
break;
default:
assert(0);
}
if (binding->state.fixed_function) {
/* update lighting constants */
struct {
uint32_t* v;
bool* dirty;
GLint* locs;
size_t len;
} lighting_arrays[] = {
{&pg->ltctxa[0][0], &pg->ltctxa_dirty[0], binding->ltctxa_loc, NV2A_LTCTXA_COUNT},
{&pg->ltctxb[0][0], &pg->ltctxb_dirty[0], binding->ltctxb_loc, NV2A_LTCTXB_COUNT},
{&pg->ltc1[0][0], &pg->ltc1_dirty[0], binding->ltc1_loc, NV2A_LTC1_COUNT},
};
for (i=0; i<ARRAY_SIZE(lighting_arrays); i++) {
uint32_t *lighting_v = lighting_arrays[i].v;
bool *lighting_dirty = lighting_arrays[i].dirty;
GLint *lighting_locs = lighting_arrays[i].locs;
size_t lighting_len = lighting_arrays[i].len;
for (j=0; j<lighting_len; j++) {
if (!lighting_dirty[j] && !binding_changed) continue;
GLint loc = lighting_locs[j];
if (loc != -1) {
glUniform4fv(loc, 1, (const GLfloat*)&lighting_v[j*4]);
}
lighting_dirty[j] = false;
}
}
for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
GLint loc;
loc = binding->light_infinite_half_vector_loc[i];
if (loc != -1) {
glUniform3fv(loc, 1, pg->light_infinite_half_vector[i]);
}
loc = binding->light_infinite_direction_loc[i];
if (loc != -1) {
glUniform3fv(loc, 1, pg->light_infinite_direction[i]);
}
loc = binding->light_local_position_loc[i];
if (loc != -1) {
glUniform3fv(loc, 1, pg->light_local_position[i]);
}
loc = binding->light_local_attenuation_loc[i];
if (loc != -1) {
glUniform3fv(loc, 1, pg->light_local_attenuation[i]);
}
}
/* estimate the viewport by assuming it matches the surface ... */
unsigned int aa_width = 1, aa_height = 1;
pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height);
float m11 = 0.5 * (pg->surface_binding_dim.width/aa_width);
float m22 = -0.5 * (pg->surface_binding_dim.height/aa_height);
float m33 = zmax;
float m41 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][0];
float m42 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][1];
float invViewport[16] = {
1.0/m11, 0, 0, 0,
0, 1.0/m22, 0, 0,
0, 0, 1.0/m33, 0,
-1.0+m41/m11, 1.0+m42/m22, 0, 1.0
};
if (binding->inv_viewport_loc != -1) {
glUniformMatrix4fv(binding->inv_viewport_loc,
1, GL_FALSE, &invViewport[0]);
}
}
/* update vertex program constants */
for (i=0; i<NV2A_VERTEXSHADER_CONSTANTS; i++) {
if (!pg->vsh_constants_dirty[i] && !binding_changed) continue;
GLint loc = binding->vsh_constant_loc[i];
if ((loc != -1) &&
memcmp(binding->vsh_constants[i], pg->vsh_constants[i],
sizeof(pg->vsh_constants[1]))) {
glUniform4fv(loc, 1, (const GLfloat *)pg->vsh_constants[i]);
memcpy(binding->vsh_constants[i], pg->vsh_constants[i],
sizeof(pg->vsh_constants[i]));
}
pg->vsh_constants_dirty[i] = false;
}
if (binding->surface_size_loc != -1) {
unsigned int aa_width = 1, aa_height = 1;
pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height);
glUniform2f(binding->surface_size_loc,
pg->surface_binding_dim.width / aa_width,
pg->surface_binding_dim.height / aa_height);
}
if (binding->clip_range_loc != -1) {
uint32_t v[2];
v[0] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMIN);
v[1] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMAX);
float zclip_min = *(float *)&v[0];
float zclip_max = *(float *)&v[1];
glUniform4f(binding->clip_range_loc, 0, zmax, zclip_min, zclip_max);
}
if (binding->depth_offset_loc != -1) {
float zbias = 0.0f;
if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
(NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE |
NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE |
NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) {
uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS);
zbias = *(float *)&zbias_u32;
if (pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR) != 0 &&
(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) &
NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE)) {
/* TODO: emulate zfactor when z_perspective true, i.e.
* w-buffering. Perhaps calculate an additional offset based on
* triangle orientation in geometry shader and pass the result
* to fragment shader and add it to gl_FragDepth as well.
*/
NV2A_UNIMPLEMENTED("NV_PGRAPH_ZOFFSETFACTOR for w-buffering");
}
}
glUniform1f(binding->depth_offset_loc, zbias);
}
/* Clipping regions */
unsigned int max_gl_width = pg->surface_binding_dim.width;
unsigned int max_gl_height = pg->surface_binding_dim.height;
pgraph_apply_scaling_factor(pg, &max_gl_width, &max_gl_height);
for (i = 0; i < 8; i++) {
uint32_t x = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPX0 + i * 4);
unsigned int x_min = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMIN);
unsigned int x_max = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMAX) + 1;
uint32_t y = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPY0 + i * 4);
unsigned int y_min = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMIN);
unsigned int y_max = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMAX) + 1;
pgraph_apply_anti_aliasing_factor(pg, &x_min, &y_min);
pgraph_apply_anti_aliasing_factor(pg, &x_max, &y_max);
pgraph_apply_scaling_factor(pg, &x_min, &y_min);
pgraph_apply_scaling_factor(pg, &x_max, &y_max);
/* Translate for the GL viewport origin */
int y_min_xlat = MAX((int)max_gl_height - (int)y_max, 0);
int y_max_xlat = MIN((int)max_gl_height - (int)y_min, max_gl_height);
glUniform4i(r->shader_binding->clip_region_loc[i],
x_min, y_min_xlat, x_max, y_max_xlat);
}
if (binding->material_alpha_loc != -1) {
glUniform1f(binding->material_alpha_loc, pg->material_alpha);
}
}
static bool test_shaders_dirty(PGRAPHState *pg)
{
#define CR_1(reg) CR_x(reg, 1)
#define CR_4(reg) CR_x(reg, 4)
#define CR_8(reg) CR_x(reg, 8)
#define CF(src, name) CF_x(typeof(src), (&src), name, 1)
#define CFA(src, name) CF_x(typeof(src[0]), src, name, ARRAY_SIZE(src))
#define CNAME(name) reg_check__ ## name
#define CX_x__define(type, name, x) static type CNAME(name)[x];
#define CR_x__define(reg, x) CX_x__define(uint32_t, reg, x)
#define CF_x__define(type, src, name, x) CX_x__define(type, name, x)
#define CR_x__check(reg, x) \
for (int i = 0; i < x; i++) { if (pgraph_reg_r(pg, reg+i*4) != CNAME(reg)[i]) goto dirty; }
#define CF_x__check(type, src, name, x) \
for (int i = 0; i < x; i++) { if (src[i] != CNAME(name)[i]) goto dirty; }
#define CR_x__update(reg, x) \
for (int i = 0; i < x; i++) { CNAME(reg)[i] = pgraph_reg_r(pg, reg+i*4); }
#define CF_x__update(type, src, name, x) \
for (int i = 0; i < x; i++) { CNAME(name)[i] = src[i]; }
#define DIRTY_REGS \
CR_1(NV_PGRAPH_COMBINECTL) \
CR_1(NV_PGRAPH_SHADERCTL) \
CR_1(NV_PGRAPH_SHADOWCTL) \
CR_1(NV_PGRAPH_COMBINESPECFOG0) \
CR_1(NV_PGRAPH_COMBINESPECFOG1) \
CR_1(NV_PGRAPH_CONTROL_0) \
CR_1(NV_PGRAPH_CONTROL_3) \
CR_1(NV_PGRAPH_CSV0_C) \
CR_1(NV_PGRAPH_CSV0_D) \
CR_1(NV_PGRAPH_CSV1_A) \
CR_1(NV_PGRAPH_CSV1_B) \
CR_1(NV_PGRAPH_SETUPRASTER) \
CR_1(NV_PGRAPH_SHADERPROG) \
CR_1(NV_PGRAPH_ZCOMPRESSOCCLUDE) \
CR_8(NV_PGRAPH_COMBINECOLORI0) \
CR_8(NV_PGRAPH_COMBINECOLORO0) \
CR_8(NV_PGRAPH_COMBINEALPHAI0) \
CR_8(NV_PGRAPH_COMBINEALPHAO0) \
CR_8(NV_PGRAPH_COMBINEFACTOR0) \
CR_8(NV_PGRAPH_COMBINEFACTOR1) \
CR_1(NV_PGRAPH_SHADERCLIPMODE) \
CR_4(NV_PGRAPH_TEXCTL0_0) \
CR_4(NV_PGRAPH_TEXFMT0) \
CR_4(NV_PGRAPH_TEXFILTER0) \
CR_8(NV_PGRAPH_WINDOWCLIPX0) \
CR_8(NV_PGRAPH_WINDOWCLIPY0) \
CF(pg->primitive_mode, primitive_mode) \
CF(pg->surface_scale_factor, surface_scale_factor) \
CF(pg->compressed_attrs, compressed_attrs) \
CFA(pg->texture_matrix_enable, texture_matrix_enable)
#define CR_x(reg, x) CR_x__define(reg, x)
#define CF_x(type, src, name, x) CF_x__define(type, src, name, x)
DIRTY_REGS
#undef CR_x
#undef CF_x
#define CR_x(reg, x) CR_x__check(reg, x)
#define CF_x(type, src, name, x) CF_x__check(type, src, name, x)
DIRTY_REGS
#undef CR_x
#undef CF_x
return false;
dirty:
#define CR_x(reg, x) CR_x__update(reg, x)
#define CF_x(type, src, name, x) CF_x__update(type, src, name, x)
DIRTY_REGS
#undef CR_x
#undef CF_x
return true;
}
void pgraph_gl_bind_shaders(PGRAPHState *pg)
@ -761,17 +1027,18 @@ void pgraph_gl_bind_shaders(PGRAPHState *pg)
PGRAPHGLState *r = pg->gl_renderer_state;
bool binding_changed = false;
if (r->shader_binding &&
!pgraph_glsl_check_shader_state_dirty(pg, &r->shader_binding->state)) {
if (r->shader_binding && !test_shaders_dirty(pg) && !pg->program_data_dirty) {
nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND_NOTDIRTY);
goto update_uniforms;
goto update_constants;
}
ShaderBinding *old_binding = r->shader_binding;
ShaderState state = pgraph_glsl_get_shader_state(pg);
ShaderState state = pgraph_get_shader_state(pg);
assert(!state.vulkan);
NV2A_GL_DGROUP_BEGIN("%s (%s)", __func__,
state.vsh.is_fixed_function ? "FF" : "PROG");
NV2A_GL_DGROUP_BEGIN("%s (VP: %s FFP: %s)", __func__,
state.vertex_program ? "yes" : "no",
state.fixed_function ? "yes" : "no");
qemu_mutex_lock(&r->shader_cache_lock);
@ -783,7 +1050,7 @@ void pgraph_gl_bind_shaders(PGRAPHState *pg)
if (!binding->initialized && !pgraph_gl_shader_load_from_memory(binding)) {
nv2a_profile_inc_counter(NV2A_PROF_SHADER_GEN);
generate_shaders(r, binding);
generate_shaders(binding);
if (g_config.perf.cache_shaders) {
pgraph_gl_shader_cache_to_disk(binding);
}
@ -802,10 +1069,10 @@ void pgraph_gl_bind_shaders(PGRAPHState *pg)
NV2A_GL_DGROUP_END();
update_uniforms:
update_constants:
assert(r->shader_binding);
assert(r->shader_binding->initialized);
update_shader_uniforms(pg, r->shader_binding);
shader_update_constants(pg, r->shader_binding, binding_changed);
}
GLuint pgraph_gl_compile_shader(const char *vs_src, const char *fs_src)

View File

@ -137,7 +137,11 @@ static void init_render_to_texture(PGRAPHState *pg)
"layout(location = 0) out vec4 out_Color;\n"
"void main()\n"
"{\n"
" vec2 texCoord = gl_FragCoord.xy / textureSize(tex, 0).xy;\n"
" vec2 texCoord;\n"
" texCoord.x = gl_FragCoord.x;\n"
" texCoord.y = (surface_size.y - gl_FragCoord.y)\n"
" + (textureSize(tex,0).y - surface_size.y);\n"
" texCoord /= textureSize(tex,0).xy;\n"
" out_Color.rgba = texture(tex, texCoord);\n"
"}\n";
@ -294,7 +298,7 @@ static void render_surface_to_texture_slow(NV2AState *d,
size_t bufsize = width * height * surface->fmt.bytes_per_pixel;
uint8_t *buf = g_malloc(bufsize);
surface_download_to_buffer(d, surface, false, false, false, buf);
surface_download_to_buffer(d, surface, false, true, false, buf);
width = texture_shape->width;
height = texture_shape->height;
@ -413,52 +417,16 @@ bool pgraph_gl_check_surface_to_texture_compatibility(
return false;
}
static bool check_surface_overlaps_range(const SurfaceBinding *surface,
hwaddr range_start, hwaddr range_len)
static void wait_for_surface_download(SurfaceBinding *e)
{
hwaddr surface_end = surface->vram_addr + surface->size;
hwaddr range_end = range_start + range_len;
return !(surface->vram_addr >= range_end || range_start >= surface_end);
}
NV2AState *d = g_nv2a;
PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
static void surface_access_callback(void *opaque, MemoryRegion *mr, hwaddr addr,
hwaddr len, bool write)
{
NV2AState *d = (NV2AState *)opaque;
qemu_mutex_lock(&d->pgraph.lock);
PGRAPHGLState *r = d->pgraph.gl_renderer_state;
bool wait_for_downloads = false;
SurfaceBinding *surface;
QTAILQ_FOREACH(surface, &r->surfaces, entry) {
if (!check_surface_overlaps_range(surface, addr, len)) {
continue;
}
hwaddr offset = addr - surface->vram_addr;
if (write) {
trace_nv2a_pgraph_surface_cpu_write(surface->vram_addr, offset);
} else {
trace_nv2a_pgraph_surface_cpu_read(surface->vram_addr, offset);
}
if (surface->draw_dirty) {
surface->download_pending = true;
wait_for_downloads = true;
}
if (write) {
surface->upload_pending = true;
}
}
qemu_mutex_unlock(&d->pgraph.lock);
if (wait_for_downloads) {
if (qatomic_read(&e->draw_dirty)) {
qemu_mutex_lock(&d->pfifo.lock);
qemu_event_reset(&r->downloads_complete);
qatomic_set(&e->download_pending, true);
qatomic_set(&r->downloads_pending, true);
pfifo_kick(d);
qemu_mutex_unlock(&d->pfifo.lock);
@ -466,44 +434,22 @@ static void surface_access_callback(void *opaque, MemoryRegion *mr, hwaddr addr,
}
}
static void register_cpu_access_callback(NV2AState *d, SurfaceBinding *surface)
static void surface_access_callback(void *opaque, MemoryRegion *mr, hwaddr addr,
hwaddr len, bool write)
{
if (tcg_enabled()) {
surface->access_cb = mem_access_callback_insert(
qemu_get_cpu(0), d->vram, surface->vram_addr, surface->size,
&surface_access_callback, d);
SurfaceBinding *e = opaque;
assert(addr >= e->vram_addr);
hwaddr offset = addr - e->vram_addr;
assert(offset < e->size);
if (qatomic_read(&e->draw_dirty)) {
trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset);
wait_for_surface_download(e);
}
}
static void unregister_cpu_access_callback(NV2AState *d,
SurfaceBinding const *surface)
{
if (tcg_enabled()) {
mem_access_callback_remove_by_ref(qemu_get_cpu(0), surface->access_cb);
}
}
static bool check_surfaces_overlap(const SurfaceBinding *surface,
const SurfaceBinding *other_surface)
{
return check_surface_overlaps_range(surface, other_surface->vram_addr,
other_surface->size);
}
static void invalidate_overlapping_surfaces(NV2AState *d, SurfaceBinding *surface)
{
PGRAPHState *pg = &d->pgraph;
PGRAPHGLState *r = pg->gl_renderer_state;
SurfaceBinding *other_surface, *next_surface;
QTAILQ_FOREACH_SAFE(other_surface, &r->surfaces, entry, next_surface) {
if (check_surfaces_overlap(surface, other_surface)) {
trace_nv2a_pgraph_surface_evict_overlapping(
other_surface->vram_addr, other_surface->width, other_surface->height,
other_surface->pitch);
pgraph_gl_surface_download_if_dirty(d, other_surface);
pgraph_gl_surface_invalidate(d, other_surface);
}
if (write && !qatomic_read(&e->upload_pending)) {
trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset);
qatomic_set(&e->upload_pending, true);
}
}
@ -515,13 +461,35 @@ static SurfaceBinding *surface_put(NV2AState *d, hwaddr addr,
assert(pgraph_gl_surface_get(d, addr) == NULL);
invalidate_overlapping_surfaces(d, surface_in);
SurfaceBinding *surface, *next;
uintptr_t e_end = surface_in->vram_addr + surface_in->size - 1;
QTAILQ_FOREACH_SAFE(surface, &r->surfaces, entry, next) {
uintptr_t s_end = surface->vram_addr + surface->size - 1;
bool overlapping = !(surface->vram_addr > e_end
|| surface_in->vram_addr > s_end);
if (overlapping) {
trace_nv2a_pgraph_surface_evict_overlapping(
surface->vram_addr, surface->width, surface->height,
surface->pitch);
pgraph_gl_surface_download_if_dirty(d, surface);
pgraph_gl_surface_invalidate(d, surface);
}
}
SurfaceBinding *surface_out = g_malloc(sizeof(SurfaceBinding));
assert(surface_out != NULL);
*surface_out = *surface_in;
register_cpu_access_callback(d, surface_out);
if (tcg_enabled()) {
qemu_mutex_unlock(&d->pgraph.lock);
bql_lock();
mem_access_callback_insert(qemu_get_cpu(0),
d->vram, surface_out->vram_addr, surface_out->size,
&surface_out->access_cb, &surface_access_callback,
surface_out);
bql_unlock();
qemu_mutex_lock(&d->pgraph.lock);
}
QTAILQ_INSERT_TAIL(&r->surfaces, surface_out, entry);
@ -575,7 +543,13 @@ void pgraph_gl_surface_invalidate(NV2AState *d, SurfaceBinding *surface)
pgraph_gl_unbind_surface(d, false);
}
unregister_cpu_access_callback(d, surface);
if (tcg_enabled()) {
qemu_mutex_unlock(&d->pgraph.lock);
bql_lock();
mem_access_callback_remove_by_ref(qemu_get_cpu(0), surface->access_cb);
bql_unlock();
qemu_mutex_lock(&d->pgraph.lock);
}
glDeleteTextures(1, &surface->gl_buffer);
@ -764,7 +738,7 @@ static void surface_download(NV2AState *d, SurfaceBinding *surface, bool force)
nv2a_profile_inc_counter(NV2A_PROF_SURF_DOWNLOAD);
surface_download_to_buffer(d, surface, true, false, true,
surface_download_to_buffer(d, surface, true, true, true,
d->vram_ptr + surface->vram_addr);
memory_region_set_client_dirty(d->vram, surface->vram_addr,
@ -901,26 +875,20 @@ void pgraph_gl_upload_surface_data(NV2AState *d, SurfaceBinding *surface,
surface->fmt.bytes_per_pixel);
}
/* FIXME: Replace this scaling */
/* FIXME: Replace this flip/scaling */
// This is VRAM so we can't do this inplace!
uint8_t *optimal_buf = buf;
unsigned int optimal_pitch = surface->width * surface->fmt.bytes_per_pixel;
if (surface->pitch != optimal_pitch) {
optimal_buf = (uint8_t *)g_malloc(surface->height * optimal_pitch);
uint8_t *src = buf;
uint8_t *dst = optimal_buf;
uint8_t *flipped_buf = (uint8_t *)g_malloc(
surface->height * surface->width * surface->fmt.bytes_per_pixel);
unsigned int irow;
for (irow = 0; irow < surface->height; irow++) {
memcpy(dst, src, optimal_pitch);
src += surface->pitch;
dst += optimal_pitch;
}
memcpy(&flipped_buf[surface->width * (surface->height - irow - 1)
* surface->fmt.bytes_per_pixel],
&buf[surface->pitch * irow],
surface->width * surface->fmt.bytes_per_pixel);
}
uint8_t *gl_read_buf = optimal_buf;
uint8_t *gl_read_buf = flipped_buf;
unsigned int width = surface->width, height = surface->height;
if (pg->surface_scale_factor > 1) {
@ -928,7 +896,7 @@ void pgraph_gl_upload_surface_data(NV2AState *d, SurfaceBinding *surface,
pg->scale_buf = (uint8_t *)g_realloc(
pg->scale_buf, width * height * surface->fmt.bytes_per_pixel);
gl_read_buf = pg->scale_buf;
uint8_t *out = gl_read_buf, *in = optimal_buf;
uint8_t *out = gl_read_buf, *in = flipped_buf;
surface_copy_expand(out, in, surface->width, surface->height,
surface->fmt.bytes_per_pixel,
d->pgraph.surface_scale_factor);
@ -947,9 +915,7 @@ void pgraph_gl_upload_surface_data(NV2AState *d, SurfaceBinding *surface,
height, 0, surface->fmt.gl_format, surface->fmt.gl_type,
gl_read_buf);
glPixelStorei(GL_UNPACK_ALIGNMENT, prev_unpack_alignment);
if (optimal_buf != buf) {
g_free(optimal_buf);
}
g_free(flipped_buf);
if (surface->swizzle) {
g_free(buf);
}

View File

@ -746,7 +746,7 @@ static void texture_binding_destroy(gpointer data)
}
/* functions for texture LRU cache */
static void texture_cache_entry_init(Lru *lru, LruNode *node, const void *key)
static void texture_cache_entry_init(Lru *lru, LruNode *node, void *key)
{
TextureLruNode *tnode = container_of(node, TextureLruNode, node);
memcpy(&tnode->key, key, sizeof(TextureKey));
@ -765,8 +765,7 @@ static void texture_cache_entry_post_evict(Lru *lru, LruNode *node)
}
}
static bool texture_cache_entry_compare(Lru *lru, LruNode *node,
const void *key)
static bool texture_cache_entry_compare(Lru *lru, LruNode *node, void *key)
{
TextureLruNode *tnode = container_of(node, TextureLruNode, node);
return memcmp(&tnode->key, key, sizeof(TextureKey));

View File

@ -223,23 +223,23 @@ unsigned int pgraph_gl_bind_inline_array(NV2AState *d)
nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_2);
glBindBuffer(GL_ARRAY_BUFFER, r->gl_inline_array_buffer);
GLsizeiptr buffer_size = index_count * vertex_size;
glBufferData(GL_ARRAY_BUFFER, buffer_size, NULL, GL_STREAM_DRAW);
glBufferSubData(GL_ARRAY_BUFFER, 0, buffer_size, pg->inline_array);
glBufferData(GL_ARRAY_BUFFER, NV2A_MAX_BATCH_LENGTH * sizeof(uint32_t),
NULL, GL_STREAM_DRAW);
glBufferSubData(GL_ARRAY_BUFFER, 0, index_count * vertex_size, pg->inline_array);
pgraph_gl_bind_vertex_attributes(d, 0, index_count-1, true, vertex_size,
index_count-1);
return index_count;
}
static void vertex_cache_entry_init(Lru *lru, LruNode *node, const void *key)
static void vertex_cache_entry_init(Lru *lru, LruNode *node, void *key)
{
VertexLruNode *vnode = container_of(node, VertexLruNode, node);
memcpy(&vnode->key, key, sizeof(struct VertexKey));
vnode->initialized = false;
}
static bool vertex_cache_entry_compare(Lru *lru, LruNode *node, const void *key)
static bool vertex_cache_entry_compare(Lru *lru, LruNode *node, void *key)
{
VertexLruNode *vnode = container_of(node, VertexLruNode, node);
return memcmp(&vnode->key, key, sizeof(VertexKey));

View File

@ -1,7 +1,7 @@
/*
* Geforce NV2A PGRAPH GLSL Shader Generator
*
* Copyright (c) 2024-2025 Matt Borgerson
* Copyright (c) 2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@ -18,15 +18,8 @@
*/
#include "common.h"
#include "hw/xbox/nv2a/pgraph/pgraph.h"
#define DECL_UNIFORM_ELEMENT_NAME(type) #type,
const char *uniform_element_type_to_str[] = {
UNIFORM_ELEMENT_TYPE_X(DECL_UNIFORM_ELEMENT_NAME)
};
MString *pgraph_glsl_get_vtx_header(MString *out, bool location, bool smooth,
bool in, bool prefix, bool array)
MString *pgraph_get_glsl_vtx_header(MString *out, bool location, bool smooth, bool in, bool prefix, bool array)
{
const char *smooth_s = "";
const char *flat_s = "flat ";
@ -61,26 +54,3 @@ MString *pgraph_glsl_get_vtx_header(MString *out, bool location, bool smooth,
return out;
}
void pgraph_glsl_set_clip_range_uniform_value(PGRAPHState *pg, float clipRange[4])
{
float zmax;
switch (pg->surface_shape.zeta_format) {
case NV097_SET_SURFACE_FORMAT_ZETA_Z16:
zmax = pg->surface_shape.z_format ? f16_max : (float)0xFFFF;
break;
case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8:
zmax = pg->surface_shape.z_format ? f24_max : (float)0xFFFFFF;
break;
default:
assert(0);
}
uint32_t zclip_min = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMIN);
uint32_t zclip_max = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMAX);
clipRange[0] = 0;
clipRange[1] = zmax;
clipRange[2] = *(float *)&zclip_min;
clipRange[3] = *(float *)&zclip_max;
}

View File

@ -3,7 +3,6 @@
*
* Copyright (c) 2015 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2025 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@ -22,69 +21,8 @@
#ifndef HW_NV2A_SHADERS_COMMON_H
#define HW_NV2A_SHADERS_COMMON_H
#include "qemu/osdep.h"
#include "qemu/mstring.h"
typedef int ivec4[4];
typedef float mat2[2 * 2];
typedef unsigned int uint;
typedef float vec2[2];
typedef float vec3[3];
typedef float vec4[4];
#define UNIFORM_ELEMENT_TYPE_X(DECL) \
DECL(float) \
DECL(int) \
DECL(ivec4) \
DECL(mat2) \
DECL(uint) \
DECL(vec2) \
DECL(vec3) \
DECL(vec4)
enum UniformElementType {
#define DECL_UNIFORM_ELEMENT_TYPE(type) UniformElementType_##type,
UNIFORM_ELEMENT_TYPE_X(DECL_UNIFORM_ELEMENT_TYPE)
};
extern const char *uniform_element_type_to_str[];
#define DECL_UNIFORM_ENUM_VALUE(s, name, type, count) s##_##name,
#define DECL_UNIFORM_ENUM_TYPE(name, decls) \
enum name##Indices{ \
decls(name, DECL_UNIFORM_ENUM_VALUE) name##__COUNT, \
};
#define DECL_UNIFORM_LOC_STRUCT_TYPE(name, decls) \
typedef int name##Locs[name##__COUNT];
#define DECL_UNIFORM_VAL_STRUCT_FIELD(s, name, type, count) type name[count];
#define DECL_UNIFORM_VAL_STRUCT_TYPE(name, decls) \
typedef struct name##Values { \
decls(name, DECL_UNIFORM_VAL_STRUCT_FIELD) \
} name##Values;
typedef struct UniformInfo {
const char *name;
enum UniformElementType type;
size_t size;
size_t count;
size_t val_offs;
} UniformInfo;
#define DECL_UNIFORM_INFO_ITEM(s, name, type, count) \
{ #name, UniformElementType_##type, sizeof(type), count, \
offsetof(s##Values, name) },
#define DECL_UNIFORM_INFO_ARR(name, decls) \
extern const UniformInfo name##Info[];
#define DEF_UNIFORM_INFO_ARR(name, decls) \
const UniformInfo name##Info[] = { decls(name, DECL_UNIFORM_INFO_ITEM) };
#define DECL_UNIFORM_TYPES(name, decls) \
DECL_UNIFORM_ENUM_TYPE(name, decls) \
DECL_UNIFORM_LOC_STRUCT_TYPE(name, decls) \
DECL_UNIFORM_VAL_STRUCT_TYPE(name, decls) \
DECL_UNIFORM_INFO_ARR(name, decls)
#include <stdbool.h>
#define GLSL_C(idx) "c[" stringify(idx) "]"
#define GLSL_LTCTXA(idx) "ltctxa[" stringify(idx) "]"
@ -95,12 +33,6 @@ typedef struct UniformInfo {
#define GLSL_DEFINE(a, b) "#define " stringify(a) " " b "\n"
MString *pgraph_glsl_get_vtx_header(MString *out, bool location, bool smooth,
bool in, bool prefix, bool array);
typedef struct PGRAPHState PGRAPHState;
void pgraph_glsl_set_clip_range_uniform_value(PGRAPHState *pg,
float clipRange[4]);
MString *pgraph_get_glsl_vtx_header(MString *out, bool location, bool smooth, bool in, bool prefix, bool array);
#endif

View File

@ -3,7 +3,7 @@
*
* Copyright (c) 2015 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2020-2025 Matt Borgerson
* Copyright (c) 2020-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@ -19,99 +19,19 @@
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "hw/xbox/nv2a/pgraph/pgraph.h"
#include "hw/xbox/nv2a/pgraph/shaders.h"
#include "common.h"
#include "geom.h"
void pgraph_glsl_set_geom_state(PGRAPHState *pg, GeomState *state)
{
state->primitive_mode = (enum ShaderPrimitiveMode)pg->primitive_mode;
state->polygon_front_mode = (enum ShaderPolygonMode)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER),
NV_PGRAPH_SETUPRASTER_FRONTFACEMODE);
state->polygon_back_mode = (enum ShaderPolygonMode)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER),
NV_PGRAPH_SETUPRASTER_BACKFACEMODE);
state->smooth_shading = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3),
NV_PGRAPH_CONTROL_3_SHADEMODE) ==
NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH;
}
bool pgraph_glsl_need_geom(const GeomState *state)
MString *pgraph_gen_geom_glsl(enum ShaderPolygonMode polygon_front_mode,
enum ShaderPolygonMode polygon_back_mode,
enum ShaderPrimitiveMode primitive_mode,
bool smooth_shading,
bool vulkan)
{
/* FIXME: Missing support for 2-sided-poly mode */
assert(state->polygon_front_mode == state->polygon_back_mode);
enum ShaderPolygonMode polygon_mode = state->polygon_front_mode;
/* POINT mode shouldn't require any special work */
if (polygon_mode == POLY_MODE_POINT) {
return false;
}
switch (state->primitive_mode) {
case PRIM_TYPE_TRIANGLES:
if (polygon_mode == POLY_MODE_FILL) {
return false;
}
return true;
case PRIM_TYPE_TRIANGLE_STRIP:
if (polygon_mode == POLY_MODE_FILL) {
return false;
}
assert(polygon_mode == POLY_MODE_LINE);
return true;
case PRIM_TYPE_TRIANGLE_FAN:
if (polygon_mode == POLY_MODE_FILL) {
return false;
}
assert(polygon_mode == POLY_MODE_LINE);
return true;
case PRIM_TYPE_QUADS:
if (polygon_mode == POLY_MODE_LINE) {
return true;
} else if (polygon_mode == POLY_MODE_FILL) {
return true;
} else {
assert(false);
return false;
}
break;
case PRIM_TYPE_QUAD_STRIP:
if (polygon_mode == POLY_MODE_LINE) {
return true;
} else if (polygon_mode == POLY_MODE_FILL) {
return true;
} else {
assert(false);
return false;
}
break;
case PRIM_TYPE_POLYGON:
if (polygon_mode == POLY_MODE_LINE) {
return false;
}
if (polygon_mode == POLY_MODE_FILL) {
if (state->smooth_shading) {
return false;
}
return true;
} else {
assert(false);
return false;
}
break;
default:
return false;
}
}
MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts)
{
/* FIXME: Missing support for 2-sided-poly mode */
assert(state->polygon_front_mode == state->polygon_back_mode);
enum ShaderPolygonMode polygon_mode = state->polygon_front_mode;
assert(polygon_front_mode == polygon_back_mode);
enum ShaderPolygonMode polygon_mode = polygon_front_mode;
/* POINT mode shouldn't require any special work */
if (polygon_mode == POLY_MODE_POINT) {
@ -122,7 +42,7 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts)
const char *layout_in = NULL;
const char *layout_out = NULL;
const char *body = NULL;
switch (state->primitive_mode) {
switch (primitive_mode) {
case PRIM_TYPE_POINTS: return NULL;
case PRIM_TYPE_LINES: return NULL;
case PRIM_TYPE_LINE_LOOP: return NULL;
@ -225,7 +145,7 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts)
return NULL;
}
if (polygon_mode == POLY_MODE_FILL) {
if (state->smooth_shading) {
if (smooth_shading) {
return NULL;
}
layout_in = "layout(triangles) in;\n";
@ -249,19 +169,16 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts)
assert(layout_in);
assert(layout_out);
assert(body);
MString *output =
mstring_from_fmt("#version %d\n\n"
"%s"
"%s"
"\n",
opts.vulkan ? 450 : 400, layout_in, layout_out);
pgraph_glsl_get_vtx_header(output, opts.vulkan, state->smooth_shading, true,
true, true);
pgraph_glsl_get_vtx_header(output, opts.vulkan, state->smooth_shading,
false, false, false);
MString *s = mstring_new();
mstring_append_fmt(s, "#version %d\n\n", vulkan ? 450 : 400);
mstring_append(s, layout_in);
mstring_append(s, layout_out);
mstring_append(s, "\n");
pgraph_get_glsl_vtx_header(s, vulkan, smooth_shading, true, true, true);
pgraph_get_glsl_vtx_header(s, vulkan, smooth_shading, false, false, false);
if (state->smooth_shading) {
mstring_append(output,
if (smooth_shading) {
mstring_append(s,
"void emit_vertex(int index, int _unused) {\n"
" gl_Position = gl_in[index].gl_Position;\n"
" gl_PointSize = gl_in[index].gl_PointSize;\n"
@ -277,7 +194,7 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts)
" EmitVertex();\n"
"}\n");
} else {
mstring_append(output,
mstring_append(s,
"void emit_vertex(int index, int provoking_index) {\n"
" gl_Position = gl_in[index].gl_Position;\n"
" gl_PointSize = gl_in[index].gl_PointSize;\n"
@ -294,12 +211,10 @@ MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts)
"}\n");
}
mstring_append_fmt(output,
"\n"
"void main() {\n"
"%s"
"}\n",
body);
mstring_append(s, "\n"
"void main() {\n");
mstring_append(s, body);
mstring_append(s, "}\n");
return output;
return s;
}

View File

@ -3,7 +3,7 @@
*
* Copyright (c) 2015 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2020-2025 Matt Borgerson
* Copyright (c) 2020-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@ -22,24 +22,13 @@
#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_GEOM_H
#define HW_XBOX_NV2A_PGRAPH_GLSL_GEOM_H
#include "common.h"
#include "hw/xbox/nv2a/pgraph/vsh_regs.h"
#include "qemu/mstring.h"
#include "hw/xbox/nv2a/pgraph/shaders.h"
typedef struct {
enum ShaderPrimitiveMode primitive_mode;
enum ShaderPolygonMode polygon_front_mode;
enum ShaderPolygonMode polygon_back_mode;
bool smooth_shading;
} GeomState;
typedef struct GenGeomGlslOptions {
bool vulkan;
} GenGeomGlslOptions;
void pgraph_glsl_set_geom_state(PGRAPHState *pg, GeomState *geom);
bool pgraph_glsl_need_geom(const GeomState *state);
MString *pgraph_glsl_gen_geom(const GeomState *state, GenGeomGlslOptions opts);
MString *pgraph_gen_geom_glsl(enum ShaderPolygonMode polygon_front_mode,
enum ShaderPolygonMode polygon_back_mode,
enum ShaderPrimitiveMode primitive_mode,
bool smooth_shading,
bool vulkan);
#endif

View File

@ -2,7 +2,6 @@ specific_ss.add([files(
'common.c',
'geom.c',
'psh.c',
'shaders.c',
'vsh.c',
'vsh-ff.c',
'vsh-prog.c',

File diff suppressed because it is too large Load Diff

View File

@ -3,94 +3,39 @@
*
* Copyright (c) 2013 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2020-2025 Matt Borgerson
* Copyright (c) 2020-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
* Based on:
* Cxbx, PixelShader.cpp
* Copyright (c) 2004 Aaron Robinson <caustik@caustik.com>
* Kingofc <kingofc@freenet.de>
* Xeon, XBD3DPixelShader.cpp
* Copyright (c) 2003 _SF_
*
* This library is distributed in the hope that it will be useful,
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 or
* (at your option) version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_PSH_H
#define HW_XBOX_NV2A_PGRAPH_GLSL_PSH_H
#include "common.h"
#include "hw/xbox/nv2a/pgraph/psh_regs.h"
#include "qemu/mstring.h"
#include "hw/xbox/nv2a/pgraph/shaders.h"
typedef struct PGRAPHState PGRAPHState;
// FIXME: Move to struct
#define PSH_UBO_BINDING 1
#define PSH_TEX_BINDING 2
typedef struct PshState {
uint32_t combiner_control;
uint32_t shader_stage_program;
uint32_t other_stage_input;
uint32_t final_inputs_0;
uint32_t final_inputs_1;
uint32_t rgb_inputs[8], rgb_outputs[8];
uint32_t alpha_inputs[8], alpha_outputs[8];
bool point_sprite;
bool rect_tex[4];
bool snorm_tex[4];
bool compare_mode[4][4];
bool alphakill[4];
int colorkey_mode[4];
enum ConvolutionFilter conv_tex[4];
bool tex_x8y24[4];
int dim_tex[4];
float border_logical_size[4][3];
float border_inv_real_size[4][3];
bool shadow_map[4];
enum PshShadowDepthFunc shadow_depth_func;
bool alpha_test;
enum PshAlphaFunc alpha_func;
bool window_clip_exclusive;
bool smooth_shading;
bool depth_clipping;
bool z_perspective;
} PshState;
void pgraph_glsl_set_psh_state(PGRAPHState *pg, PshState *state);
#define PSH_UNIFORM_DECL_X(S, DECL) \
DECL(S, alphaRef, int, 1) \
DECL(S, bumpMat, mat2, 4) \
DECL(S, bumpOffset, float, 4) \
DECL(S, bumpScale, float, 4) \
DECL(S, clipRange, vec4, 1) \
DECL(S, clipRegion, ivec4, 8) \
DECL(S, colorKey, uint, 4) \
DECL(S, colorKeyMask, uint, 4) \
DECL(S, consts, vec4, 18) \
DECL(S, depthOffset, float, 1) \
DECL(S, fogColor, vec4, 1) \
DECL(S, texScale, float, 4)
DECL_UNIFORM_TYPES(PshUniform, PSH_UNIFORM_DECL_X)
typedef struct GenPshGlslOptions {
bool vulkan;
int ubo_binding;
int tex_binding;
} GenPshGlslOptions;
MString *pgraph_glsl_gen_psh(const PshState *state, GenPshGlslOptions opts);
void pgraph_glsl_set_psh_uniform_values(PGRAPHState *pg,
const PshUniformLocs locs,
PshUniformValues *values);
MString *pgraph_gen_psh_glsl(const PshState state);
#endif

View File

@ -1,94 +0,0 @@
/*
* Geforce NV2A PGRAPH GLSL Shader Generator
*
* Copyright (c) 2025 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "hw/xbox/nv2a/pgraph/pgraph.h"
#include "shaders.h"
ShaderState pgraph_glsl_get_shader_state(PGRAPHState *pg)
{
pg->program_data_dirty = false; /* fixme */
ShaderState state;
// We will hash it, so make sure any padding is zeroed
memset(&state, 0, sizeof(ShaderState));
pgraph_glsl_set_vsh_state(pg, &state.vsh);
pgraph_glsl_set_geom_state(pg, &state.geom);
pgraph_glsl_set_psh_state(pg, &state.psh);
return state;
}
bool pgraph_glsl_check_shader_state_dirty(PGRAPHState *pg,
const ShaderState *state)
{
if (pg->program_data_dirty) {
return true;
}
unsigned int regs[] = {
NV_PGRAPH_COMBINECTL, NV_PGRAPH_COMBINESPECFOG0,
NV_PGRAPH_COMBINESPECFOG1, NV_PGRAPH_CONTROL_0,
NV_PGRAPH_CONTROL_3, NV_PGRAPH_CSV0_C,
NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV1_A,
NV_PGRAPH_CSV1_B, NV_PGRAPH_POINTSIZE,
NV_PGRAPH_SETUPRASTER, NV_PGRAPH_SHADERCLIPMODE,
NV_PGRAPH_SHADERCTL, NV_PGRAPH_SHADERPROG,
NV_PGRAPH_SHADOWCTL, NV_PGRAPH_ZCOMPRESSOCCLUDE,
};
for (int i = 0; i < ARRAY_SIZE(regs); i++) {
if (pgraph_is_reg_dirty(pg, regs[i])) {
return true;
}
}
int num_stages = pgraph_reg_r(pg, NV_PGRAPH_COMBINECTL) & 0xFF;
for (int i = 0; i < num_stages; i++) {
if (pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINEALPHAI0 + i * 4) ||
pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINEALPHAO0 + i * 4) ||
pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINECOLORI0 + i * 4) ||
pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINECOLORO0 + i * 4)) {
return true;
}
}
if (pg->uniform_attrs != state->vsh.uniform_attrs ||
pg->swizzle_attrs != state->vsh.swizzle_attrs ||
pg->compressed_attrs != state->vsh.compressed_attrs ||
pg->primitive_mode != state->geom.primitive_mode ||
pg->surface_scale_factor != state->vsh.surface_scale_factor) {
return true;
}
for (int i = 0; i < 4; i++) {
if (pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXCTL0_0 + i * 4) ||
pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXFILTER0 + i * 4) ||
pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXFMT0 + i * 4)) {
return true;
}
if (pg->texture_matrix_enable[i] !=
state->vsh.fixed_function.texture_matrix_enable[i]) {
return true;
}
}
return false;
}

View File

@ -1,40 +0,0 @@
/*
* Geforce NV2A PGRAPH GLSL Shader Generator
*
* Copyright (c) 2025 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_SHADERS_H
#define HW_XBOX_NV2A_PGRAPH_GLSL_SHADERS_H
#include "vsh.h"
#include "geom.h"
#include "psh.h"
typedef struct ShaderState {
VshState vsh;
GeomState geom;
PshState psh;
} ShaderState;
typedef struct PGRAPHState PGRAPHState;
ShaderState pgraph_glsl_get_shader_state(PGRAPHState *pg);
bool pgraph_glsl_check_shader_state_dirty(PGRAPHState *pg,
const ShaderState *state);
#endif

View File

@ -3,7 +3,7 @@
*
* Copyright (c) 2015 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2020-2025 Matt Borgerson
* Copyright (c) 2020-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@ -20,55 +20,22 @@
*/
#include "qemu/osdep.h"
#include "hw/xbox/nv2a/pgraph/shaders.h"
#include "common.h"
#include "vsh-ff.h"
static void append_skinning_code(MString *str, bool mix, unsigned int count,
const char *type, const char *output,
const char *input, const char *matrix,
const char *swizzle)
{
if (count == 0) {
mstring_append_fmt(str, "%s %s = (%s * %s0).%s;\n",
type, output, input, matrix, swizzle);
} else {
mstring_append_fmt(str, "%s %s = %s(0.0);\n", type, output, type);
if (mix) {
/* Generated final weight (like GL_WEIGHT_SUM_UNITY_ARB) */
mstring_append(str, "{\n"
" float weight_i;\n"
" float weight_n = 1.0;\n");
int i;
for (i = 0; i < count; i++) {
if (i < (count - 1)) {
char c = "xyzw"[i];
mstring_append_fmt(str, " weight_i = weight.%c;\n"
" weight_n -= weight_i;\n",
c);
} else {
mstring_append(str, " weight_i = weight_n;\n");
}
mstring_append_fmt(str, " %s += (%s * %s%d).%s * weight_i;\n",
output, input, matrix, i, swizzle);
}
mstring_append(str, "}\n");
} else {
/* Individual weights */
int i;
for (i = 0; i < count; i++) {
char c = "xyzw"[i];
mstring_append_fmt(str, "%s += (%s * %s%d).%s * weight.%c;\n",
output, input, matrix, i, swizzle, c);
}
}
}
}
static void append_skinning_code(MString* str, bool mix,
unsigned int count, const char* type,
const char* output, const char* input,
const char* matrix, const char* swizzle);
void pgraph_glsl_gen_vsh_ff(const VshState *state, MString *header,
MString *body)
void pgraph_gen_vsh_ff_glsl(const ShaderState *state, MString *header,
MString *body, MString *uniforms)
{
int i, j;
const char *u = state->vulkan ? "" : "uniform "; // FIXME: Remove
/* generate vertex shader mimicking fixed function */
mstring_append(header,
"#define position v0\n"
"#define weight v1\n"
@ -87,6 +54,11 @@ void pgraph_glsl_gen_vsh_ff(const VshState *state, MString *header,
"#define reserved2 v14\n"
"#define reserved3 v15\n"
"\n");
mstring_append_fmt(uniforms,
"%svec4 ltctxa[" stringify(NV2A_LTCTXA_COUNT) "];\n"
"%svec4 ltctxb[" stringify(NV2A_LTCTXB_COUNT) "];\n"
"%svec4 ltc1[" stringify(NV2A_LTC1_COUNT) "];\n", u, u, u
);
mstring_append(header,
"\n"
GLSL_DEFINE(projectionMat, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_PMAT0))
@ -143,10 +115,13 @@ GLSL_DEFINE(sceneAmbientColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_FR_AMB) ".xyz")
GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz")
"\n"
);
mstring_append_fmt(uniforms,
"%smat4 invViewport;\n", u);
/* Skinning */
unsigned int count;
bool mix;
switch (state->fixed_function.skinning) {
switch (state->skinning) {
case SKINNING_OFF:
mix = false; count = 0; break;
case SKINNING_1WEIGHTS:
@ -166,7 +141,7 @@ GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz
break;
}
mstring_append_fmt(body, "/* Skinning mode %d */\n",
state->fixed_function.skinning);
state->skinning);
append_skinning_code(body, mix, count, "vec4",
"tPosition", "position",
@ -175,10 +150,12 @@ GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz
"tNormal", "vec4(normal, 0.0)",
"invModelViewMat", "xyz");
if (state->fixed_function.normalization) {
/* Normalization */
if (state->normalization) {
mstring_append(body, "tNormal = normalize(tNormal);\n");
}
/* Texgen */
for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
mstring_append_fmt(body, "/* Texgen for stage %d */\n",
i);
@ -188,7 +165,7 @@ GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz
/* TODO: TexGen View Model missing! */
char c = "xyzw"[j];
char cSuffix = "STRQ"[j];
switch (state->fixed_function.texgen[i][j]) {
switch (state->texgen[i][j]) {
case TEXGEN_DISABLE:
mstring_append_fmt(body, "oT%d.%c = texture%d.%c;\n",
i, c, i, c);
@ -243,104 +220,104 @@ GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz
}
}
/* Apply texture matrices */
for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
if (state->fixed_function.texture_matrix_enable[i]) {
if (state->texture_matrix_enable[i]) {
mstring_append_fmt(body,
"oT%d = oT%d * texMat%d;\n",
i, i, i);
}
}
if (!state->fixed_function.lighting) {
mstring_append(body, " oD0 = diffuse;\n");
mstring_append(body, " oD1 = specular;\n");
mstring_append(body, " oB0 = backDiffuse;\n");
mstring_append(body, " oB1 = backSpecular;\n");
} else {
/* Lighting */
if (state->lighting) {
//FIXME: Do 2 passes if we want 2 sided-lighting?
static char alpha_source_diffuse[] = "diffuse.a";
static char alpha_source_specular[] = "specular.a";
static char alpha_source_material[] = "material_alpha";
const char *alpha_source = alpha_source_diffuse;
if (state->fixed_function.diffuse_src == MATERIAL_COLOR_SRC_MATERIAL) {
if (state->diffuse_src == MATERIAL_COLOR_SRC_MATERIAL) {
mstring_append_fmt(uniforms, "%sfloat material_alpha;\n", u);
alpha_source = alpha_source_material;
} else if (state->fixed_function.diffuse_src == MATERIAL_COLOR_SRC_SPECULAR) {
} else if (state->diffuse_src == MATERIAL_COLOR_SRC_SPECULAR) {
alpha_source = alpha_source_specular;
}
if (state->fixed_function.ambient_src == MATERIAL_COLOR_SRC_MATERIAL) {
if (state->ambient_src == MATERIAL_COLOR_SRC_MATERIAL) {
mstring_append_fmt(body, "oD0 = vec4(sceneAmbientColor, %s);\n", alpha_source);
} else if (state->fixed_function.ambient_src == MATERIAL_COLOR_SRC_DIFFUSE) {
} else if (state->ambient_src == MATERIAL_COLOR_SRC_DIFFUSE) {
mstring_append_fmt(body, "oD0 = vec4(diffuse.rgb, %s);\n", alpha_source);
} else if (state->fixed_function.ambient_src == MATERIAL_COLOR_SRC_SPECULAR) {
} else if (state->ambient_src == MATERIAL_COLOR_SRC_SPECULAR) {
mstring_append_fmt(body, "oD0 = vec4(specular.rgb, %s);\n", alpha_source);
}
mstring_append(body, "oD0.rgb *= materialEmissionColor.rgb;\n");
if (state->fixed_function.emission_src == MATERIAL_COLOR_SRC_MATERIAL) {
if (state->emission_src == MATERIAL_COLOR_SRC_MATERIAL) {
mstring_append(body, "oD0.rgb += sceneAmbientColor;\n");
} else if (state->fixed_function.emission_src == MATERIAL_COLOR_SRC_DIFFUSE) {
} else if (state->emission_src == MATERIAL_COLOR_SRC_DIFFUSE) {
mstring_append(body, "oD0.rgb += diffuse.rgb;\n");
} else if (state->fixed_function.emission_src == MATERIAL_COLOR_SRC_SPECULAR) {
} else if (state->emission_src == MATERIAL_COLOR_SRC_SPECULAR) {
mstring_append(body, "oD0.rgb += specular.rgb;\n");
}
mstring_append(body, "oD1 = vec4(0.0, 0.0, 0.0, specular.a);\n");
if (state->fixed_function.local_eye) {
mstring_append(body,
"vec3 VPeye = normalize(eyePosition.xyz / eyePosition.w - tPosition.xyz / tPosition.w);\n"
);
}
for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
if (state->fixed_function.light[i] == LIGHT_OFF) {
if (state->light[i] == LIGHT_OFF) {
continue;
}
/* FIXME: It seems that we only have to handle the surface colors if
* they are not part of the material [= vertex colors].
* If they are material the cpu will premultiply light
* colors
*/
mstring_append_fmt(body, "/* Light %d */ {\n", i);
if (state->fixed_function.light[i] == LIGHT_LOCAL
|| state->fixed_function.light[i] == LIGHT_SPOT) {
if (state->light[i] == LIGHT_LOCAL
|| state->light[i] == LIGHT_SPOT) {
mstring_append_fmt(uniforms,
"%svec3 lightLocalPosition%d;\n"
"%svec3 lightLocalAttenuation%d;\n",
u, i, u, i);
mstring_append_fmt(body,
" vec3 tPos = tPosition.xyz/tPosition.w;\n"
" vec3 VP = lightLocalPosition[%d] - tPos;\n"
" vec3 VP = lightLocalPosition%d - tPosition.xyz/tPosition.w;\n"
" float d = length(VP);\n"
" if (d <= lightLocalRange(%d)) {\n" /* FIXME: Double check that range is inclusive */
//FIXME: if (d > lightLocalRange) { .. don't process this light .. } /* inclusive?! */ - what about directional lights?
" VP = normalize(VP);\n"
" float attenuation = 1.0 / (lightLocalAttenuation[%d].x\n"
" + lightLocalAttenuation[%d].y * d\n"
" + lightLocalAttenuation[%d].z * d * d);\n"
" vec3 halfVector = normalize(VP + %s);\n"
" float attenuation = 1.0 / (lightLocalAttenuation%d.x\n"
" + lightLocalAttenuation%d.y * d\n"
" + lightLocalAttenuation%d.z * d * d);\n"
" vec3 halfVector = normalize(VP + eyePosition.xyz / eyePosition.w);\n" /* FIXME: Not sure if eyePosition is correct */
" float nDotVP = max(0.0, dot(tNormal, VP));\n"
" float nDotHV = max(0.0, dot(tNormal, halfVector));\n",
i, i, i, i, i,
state->fixed_function.local_eye ? "VPeye" : "vec3(0.0, 0.0, 0.0)"
);
i, i, i, i);
}
switch(state->fixed_function.light[i]) {
switch(state->light[i]) {
case LIGHT_INFINITE:
/* lightLocalRange will be 1e+30 here */
mstring_append_fmt(uniforms,
"%svec3 lightInfiniteHalfVector%d;\n"
"%svec3 lightInfiniteDirection%d;\n",
u, i, u, i);
mstring_append_fmt(body,
" {\n"
" float attenuation = 1.0;\n"
" vec3 lightDirection = normalize(lightInfiniteDirection[%d]);\n"
" float nDotVP = max(0.0, dot(tNormal, lightDirection));\n",
i);
if (state->fixed_function.local_eye) {
mstring_append(body,
" float nDotHV = max(0.0, dot(tNormal, normalize(lightDirection + VPeye)));\n"
);
} else {
mstring_append_fmt(body,
" float nDotHV = max(0.0, dot(tNormal, lightInfiniteHalfVector[%d]));\n",
i
);
}
" float nDotVP = max(0.0, dot(tNormal, normalize(vec3(lightInfiniteDirection%d))));\n"
" float nDotHV = max(0.0, dot(tNormal, vec3(lightInfiniteHalfVector%d)));\n",
i, i);
/* FIXME: Do specular */
/* FIXME: tBackDiffuse */
break;
case LIGHT_LOCAL:
/* Everything done already */
@ -369,20 +346,20 @@ GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz
mstring_append_fmt(body,
" float pf;\n"
" if (nDotVP == 0.0 || nDotHV == 0.0) {\n"
" if (nDotVP == 0.0) {\n"
" pf = 0.0;\n"
" } else {\n"
" pf = pow(nDotHV, specularPower);\n"
" pf = pow(nDotHV, /* specular(l, m, n, l1, m1, n1) */ 0.001);\n"
" }\n"
" vec3 lightAmbient = lightAmbientColor(%d) * attenuation;\n"
" vec3 lightDiffuse = lightDiffuseColor(%d) * attenuation * nDotVP;\n"
" vec3 lightSpecular = lightSpecularColor(%d) * attenuation * pf;\n",
" vec3 lightSpecular = lightSpecularColor(%d) * pf;\n",
i, i, i);
mstring_append(body,
" oD0.xyz += lightAmbient;\n");
switch (state->fixed_function.diffuse_src) {
switch (state->diffuse_src) {
case MATERIAL_COLOR_SRC_MATERIAL:
mstring_append(body,
" oD0.xyz += lightDiffuse;\n");
@ -397,57 +374,28 @@ GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz
break;
}
switch (state->fixed_function.specular_src) {
case MATERIAL_COLOR_SRC_MATERIAL:
mstring_append(body,
" oD1.xyz += lightSpecular;\n");
break;
case MATERIAL_COLOR_SRC_DIFFUSE:
mstring_append(body,
" oD1.xyz += diffuse.xyz * lightSpecular;\n");
break;
case MATERIAL_COLOR_SRC_SPECULAR:
mstring_append(body,
" oD1.xyz += specular.xyz * lightSpecular;\n");
break;
}
mstring_append(body, " }\n"
"}\n");
mstring_append(body, "}\n");
}
/* TODO: Implement two-sided lighting */
mstring_append(body, " oB0 = backDiffuse;\n");
mstring_append(body, " oB1 = backSpecular;\n");
} else {
mstring_append(body, " oD0 = diffuse;\n");
mstring_append(body, " oD1 = specular;\n");
}
if (!state->specular_enable) {
mstring_append(body, " oD1 = vec4(0.0, 0.0, 0.0, 1.0);\n");
mstring_append(body, " oB1 = vec4(0.0, 0.0, 0.0, 1.0);\n");
} else {
if (!state->separate_specular) {
if (state->fixed_function.lighting) {
mstring_append(body,
" oD0.xyz += oD1.xyz;\n"
" oB0.xyz += oB1.xyz;\n"
);
}
mstring_append(body,
" oD1 = specular;\n"
" oB1 = backSpecular;\n"
);
}
if (state->ignore_specular_alpha) {
mstring_append(body,
" oD1.a = 1.0;\n"
" oB1.a = 1.0;\n"
);
}
}
mstring_append(body, " oB0 = backDiffuse;\n");
mstring_append(body, " oB1 = backSpecular;\n");
/* Fog */
if (state->fog_enable) {
/* From: https://www.opengl.org/registry/specs/NV/fog_distance.txt */
switch(state->fixed_function.foggen) {
switch(state->foggen) {
case FOGGEN_SPEC_ALPHA:
/* FIXME: Do we have to clamp here? */
mstring_append(body, " float fogDistance = clamp(specular.a, 0.0, 1.0);\n");
@ -458,7 +406,7 @@ GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz
case FOGGEN_PLANAR:
case FOGGEN_ABS_PLANAR:
mstring_append(body, " float fogDistance = dot(fogPlane.xyz, tPosition.xyz) + fogPlane.w;\n");
if (state->fixed_function.foggen == FOGGEN_ABS_PLANAR) {
if (state->foggen == FOGGEN_ABS_PLANAR) {
mstring_append(body, " fogDistance = abs(fogDistance);\n");
}
break;
@ -466,38 +414,81 @@ GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz
mstring_append(body, " float fogDistance = fogCoord;\n");
break;
default:
assert(!"Invalid foggen mode");
assert(false);
break;
}
}
/* If skinning is off the composite matrix already includes the MV matrix */
if (state->fixed_function.skinning == SKINNING_OFF) {
if (state->skinning == SKINNING_OFF) {
mstring_append(body, " tPosition = position;\n");
}
mstring_append(body,
" oPos = tPosition * compositeMat;\n"
" oPos.z = oPos.z / clipRange.y;\n"
" oPos.w = clampAwayZeroInf(oPos.w);\n"
" oPos.xy /= oPos.w;\n"
" oPos.xy += c[" stringify(NV_IGRAPH_XF_XFCTX_VPOFF) "].xy;\n"
" oPos.xy = roundScreenCoords(oPos.xy);\n"
" oPos.xy = (2.0f * oPos.xy - surfaceSize) / surfaceSize;\n"
" oPos.xy *= oPos.w;\n"
" oPos = invViewport * oPos;\n"
);
if (state->vulkan) {
mstring_append(body, " oPos.y *= -1;\n");
}
/* FIXME: Testing */
if (state->point_params_enable) {
mstring_append(
mstring_append_fmt(
body,
" float d_e = length(position * modelViewMat0);\n"
" oPts.x = 1/sqrt(pointParams[0] + pointParams[1] * d_e + pointParams[2] * d_e * d_e) + pointParams[6];\n");
mstring_append_fmt(body, " oPts.x = min(oPts.x * pointParams[3] + pointParams[7], 64.0) * %d;\n",
" oPts.x = 1/sqrt(%f + %f*d_e + %f*d_e*d_e) + %f;\n",
state->point_params[0], state->point_params[1], state->point_params[2],
state->point_params[6]);
mstring_append_fmt(body, " oPts.x = min(oPts.x*%f + %f, 64.0) * %d;\n",
state->point_params[3], state->point_params[7],
state->surface_scale_factor);
} else {
mstring_append_fmt(body, " oPts.x = %f * %d;\n", state->point_size,
state->surface_scale_factor);
}
}
static void append_skinning_code(MString* str, bool mix,
unsigned int count, const char* type,
const char* output, const char* input,
const char* matrix, const char* swizzle)
{
if (count == 0) {
mstring_append_fmt(str, "%s %s = (%s * %s0).%s;\n",
type, output, input, matrix, swizzle);
} else {
mstring_append_fmt(str, "%s %s = %s(0.0);\n", type, output, type);
if (mix) {
/* Generated final weight (like GL_WEIGHT_SUM_UNITY_ARB) */
mstring_append(str, "{\n"
" float weight_i;\n"
" float weight_n = 1.0;\n");
int i;
for (i = 0; i < count; i++) {
if (i < (count - 1)) {
char c = "xyzw"[i];
mstring_append_fmt(str, " weight_i = weight.%c;\n"
" weight_n -= weight_i;\n",
c);
} else {
mstring_append(str, " weight_i = weight_n;\n");
}
mstring_append_fmt(str, " %s += (%s * %s%d).%s * weight_i;\n",
output, input, matrix, i, swizzle);
}
mstring_append(str, "}\n");
} else {
/* Individual weights */
int i;
for (i = 0; i < count; i++) {
char c = "xyzw"[i];
mstring_append_fmt(str, "%s += (%s * %s%d).%s * weight.%c;\n",
output, input, matrix, i, swizzle, c);
}
}
}
}

View File

@ -3,7 +3,7 @@
*
* Copyright (c) 2015 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2020-2025 Matt Borgerson
* Copyright (c) 2020-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@ -23,9 +23,9 @@
#define HW_XBOX_NV2A_PGRAPH_GLSL_VSH_FF_H
#include "qemu/mstring.h"
#include "vsh.h"
#include "hw/xbox/nv2a/pgraph/shaders.h"
void pgraph_glsl_gen_vsh_ff(const VshState *state, MString *header,
MString *body);
void pgraph_gen_vsh_ff_glsl(const ShaderState *state, MString *header,
MString *body, MString *uniforms);
#endif

View File

@ -3,7 +3,6 @@
*
* Copyright (c) 2014 Jannik Vogel
* Copyright (c) 2012 espes
* Copyright (c) 2025 Matt Borgerson
*
* Based on:
* Cxbx, VertexShader.cpp
@ -33,10 +32,66 @@
#include <stdbool.h>
#include <assert.h>
#include "hw/xbox/nv2a/pgraph/vsh.h"
#include "common.h"
#include "vsh.h"
#include "vsh-prog.h"
#define VSH_D3DSCM_CORRECTION 96
typedef enum {
PARAM_UNKNOWN = 0,
PARAM_R,
PARAM_V,
PARAM_C
} VshParameterType;
typedef enum {
OUTPUT_C = 0,
OUTPUT_O
} VshOutputType;
typedef enum {
OMUX_MAC = 0,
OMUX_ILU
} VshOutputMux;
typedef enum {
ILU_NOP = 0,
ILU_MOV,
ILU_RCP,
ILU_RCC,
ILU_RSQ,
ILU_EXP,
ILU_LOG,
ILU_LIT
} VshILU;
typedef enum {
MAC_NOP,
MAC_MOV,
MAC_MUL,
MAC_ADD,
MAC_MAD,
MAC_DP3,
MAC_DPH,
MAC_DP4,
MAC_DST,
MAC_MIN,
MAC_MAX,
MAC_SLT,
MAC_SGE,
MAC_ARL
} VshMAC;
typedef enum {
SWIZZLE_X = 0,
SWIZZLE_Y,
SWIZZLE_Z,
SWIZZLE_W
} VshSwizzle;
typedef struct VshFieldMapping {
VshFieldName field_name;
uint8_t subtoken;
@ -88,6 +143,7 @@ static const VshFieldMapping field_mapping[] = {
{ FLD_FINAL, 3, 0, 1 }
};
typedef struct VshOpcodeParams {
bool A;
bool B;
@ -230,6 +286,8 @@ static const char* out_reg_name[] = {
"A0.x",
};
// Retrieves a number of bits in the instruction token
static int vsh_get_from_token(const uint32_t *shader_token,
uint8_t subtoken,
@ -248,6 +306,7 @@ uint8_t vsh_get_field(const uint32_t *shader_token, VshFieldName field_name)
field_mapping[field_name].bit_length));
}
// Converts the C register address to disassembly format
static int16_t convert_c_register(const int16_t c_reg)
{
@ -256,7 +315,9 @@ static int16_t convert_c_register(const int16_t c_reg)
return r; //FIXME: = c_reg?!
}
static MString *decode_swizzle(const uint32_t *shader_token,
static MString* decode_swizzle(const uint32_t *shader_token,
VshFieldName swizzle_field)
{
const char* swizzle_str = "xyzw";
@ -294,9 +355,10 @@ static MString *decode_swizzle(const uint32_t *shader_token,
}
}
static MString *decode_opcode_input(const uint32_t *shader_token,
static MString* decode_opcode_input(const uint32_t *shader_token,
VshParameterType param,
VshFieldName neg_field, int reg_num)
VshFieldName neg_field,
int reg_num)
{
/* This function decodes a vertex shader opcode parameter into a string.
* Input A, B or C is controlled via the Param and NEG fieldnames,
@ -346,10 +408,13 @@ static MString *decode_opcode_input(const uint32_t *shader_token,
return ret_str;
}
static MString *decode_opcode(const uint32_t *shader_token,
VshOutputMux out_mux, uint32_t mask,
const char *opcode, const char *inputs,
MString **suffix)
static MString* decode_opcode(const uint32_t *shader_token,
VshOutputMux out_mux,
uint32_t mask,
const char *opcode,
const char *inputs,
MString** suffix)
{
MString *ret = mstring_new();
int reg_num = vsh_get_field(shader_token, FLD_OUT_R);
@ -431,7 +496,8 @@ static MString *decode_opcode(const uint32_t *shader_token,
return ret;
}
static MString *decode_token(const uint32_t *shader_token)
static MString* decode_token(const uint32_t *shader_token)
{
MString *ret;
@ -573,7 +639,7 @@ static const char* vsh_header =
// Unfortunately mix() falls victim to the same handling of exceptional
// (inf/NaN) handling as a multiply, so per-component comparisons are used
// to guarantee HW behavior (anything * 0 must == 0).
" vec4 zero_components = sign(NaNToOne(src0)) * sign(NaNToOne(src1));\n"
" vec4 zero_components = sign(src0) * sign(src1);\n"
" vec4 ret = src0 * src1;\n"
" if (zero_components.x == 0.0) { ret.x = 0.0; }\n"
" if (zero_components.y == 0.0) { ret.y = 0.0; }\n"
@ -723,9 +789,11 @@ static const char* vsh_header =
" return t;\n"
"}\n";
void pgraph_glsl_gen_vsh_prog(uint16_t version, const uint32_t *tokens,
unsigned int length, MString *header,
MString *body)
void pgraph_gen_vsh_prog_glsl(uint16_t version,
const uint32_t *tokens,
unsigned int length,
bool vulkan,
MString *header, MString *body)
{
mstring_append(header, vsh_header);
@ -737,10 +805,12 @@ void pgraph_glsl_gen_vsh_prog(uint16_t version, const uint32_t *tokens,
const uint32_t* cur_token = &tokens[slot * VSH_TOKEN_SIZE];
MString *token_str = decode_token(cur_token);
mstring_append_fmt(body,
" /* Slot %d: 0x%08X 0x%08X 0x%08X 0x%08X */\n"
" %s\n",
slot, cur_token[0], cur_token[1], cur_token[2],
cur_token[3], mstring_get_str(token_str));
" /* Slot %d: 0x%08X 0x%08X 0x%08X 0x%08X */",
slot,
cur_token[0],cur_token[1],cur_token[2],cur_token[3]);
mstring_append(body, "\n");
mstring_append(body, mstring_get_str(token_str));
mstring_append(body, "\n");
mstring_unref(token_str);
if (vsh_get_field(cur_token, FLD_FINAL)) {
@ -751,12 +821,22 @@ void pgraph_glsl_gen_vsh_prog(uint16_t version, const uint32_t *tokens,
assert(has_final);
mstring_append(body,
/* The shaders leave the result in screen space, while OpenGL expects it
* in clip space.
/* the shaders leave the result in screen space, while
* opengl expects it in clip space.
* TODO: the pixel-center co-ordinate differences should handled
*/
" oPos.xy = roundScreenCoords(oPos.xy);\n"
" oPos.xy = (2.0f * oPos.xy - surfaceSize) / surfaceSize;\n"
" oPos.x = 2.0 * (oPos.x - surfaceSize.x * 0.5) / surfaceSize.x;\n"
);
if (vulkan) {
mstring_append(body,
" oPos.y = 2.0 * oPos.y / surfaceSize.y - 1.0;\n");
} else {
mstring_append(body, " oPos.y = -2.0 * (oPos.y - surfaceSize.y * 0.5) "
"/ surfaceSize.y;\n");
}
mstring_append(body,
" oPos.z = oPos.z / clipRange.y;\n"
" oPos.w = clampAwayZeroInf(oPos.w);\n"

View File

@ -3,7 +3,13 @@
*
* Copyright (c) 2014 Jannik Vogel
* Copyright (c) 2012 espes
* Copyright (c) 2025 Matt Borgerson
*
* Based on:
* Cxbx, VertexShader.cpp
* Copyright (c) 2004 Aaron Robinson <caustik@caustik.com>
* Kingofc <kingofc@freenet.de>
* Dxbx, uPushBuffer.pas
* Copyright (c) 2007 Shadow_tj, PatrickvL
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
@ -22,10 +28,8 @@
#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_VSH_PROG_H
#define HW_XBOX_NV2A_PGRAPH_GLSL_VSH_PROG_H
#include "qemu/mstring.h"
void pgraph_glsl_gen_vsh_prog(uint16_t version, const uint32_t *tokens,
unsigned int length, MString *header,
MString *body);
void pgraph_gen_vsh_prog_glsl(uint16_t version, const uint32_t *tokens,
unsigned int length,
bool vulkan, MString *header, MString *body);
#endif

View File

@ -3,7 +3,7 @@
*
* Copyright (c) 2015 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2020-2025 Matt Borgerson
* Copyright (c) 2020-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@ -20,176 +20,39 @@
*/
#include "qemu/osdep.h"
#include "hw/xbox/nv2a/pgraph/pgraph.h"
#include "hw/xbox/nv2a/pgraph/shaders.h"
#include "common.h"
#include "vsh.h"
#include "vsh-ff.h"
#include "vsh-prog.h"
#include <stdbool.h>
DEF_UNIFORM_INFO_ARR(VshUniform, VSH_UNIFORM_DECL_X)
static void set_fixed_function_vsh_state(PGRAPHState *pg,
FixedFunctionVshState *state)
MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs)
{
state->skinning = (enum VshSkinning)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_CSV0_D), NV_PGRAPH_CSV0_D_SKIN);
state->normalization = pgraph_reg_r(pg, NV_PGRAPH_CSV0_C) &
NV_PGRAPH_CSV0_C_NORMALIZATION_ENABLE;
state->local_eye =
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_LOCALEYE);
int i;
MString *output = mstring_new();
mstring_append_fmt(output, "#version %d\n\n", state->vulkan ? 450 : 400);
state->emission_src = (enum MaterialColorSource)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_EMISSION);
state->ambient_src = (enum MaterialColorSource)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_AMBIENT);
state->diffuse_src = (enum MaterialColorSource)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_DIFFUSE);
state->specular_src = (enum MaterialColorSource)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_SPECULAR);
MString *header = mstring_from_str("");
MString *uniforms = mstring_from_str("");
for (int i = 0; i < 4; i++) {
state->texture_matrix_enable[i] = pg->texture_matrix_enable[i];
}
const char *u = state->vulkan ? "" : "uniform "; // FIXME: Remove
for (int i = 0; i < 4; i++) {
unsigned int reg = (i < 2) ? NV_PGRAPH_CSV1_A : NV_PGRAPH_CSV1_B;
for (int j = 0; j < 4; j++) {
unsigned int masks[] = {
(i % 2) ? NV_PGRAPH_CSV1_A_T1_S : NV_PGRAPH_CSV1_A_T0_S,
(i % 2) ? NV_PGRAPH_CSV1_A_T1_T : NV_PGRAPH_CSV1_A_T0_T,
(i % 2) ? NV_PGRAPH_CSV1_A_T1_R : NV_PGRAPH_CSV1_A_T0_R,
(i % 2) ? NV_PGRAPH_CSV1_A_T1_Q : NV_PGRAPH_CSV1_A_T0_Q
};
state->texgen[i][j] =
(enum VshTexgen)GET_MASK(pgraph_reg_r(pg, reg), masks[j]);
}
}
mstring_append_fmt(uniforms,
"%svec4 clipRange;\n"
"%svec2 surfaceSize;\n"
"%svec4 c[" stringify(NV2A_VERTEXSHADER_CONSTANTS) "];\n"
"%svec2 fogParam;\n",
u, u, u, u
);
state->lighting =
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_LIGHTING);
if (state->lighting) {
for (int i = 0; i < NV2A_MAX_LIGHTS; i++) {
state->light[i] =
(enum VshLight)GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
NV_PGRAPH_CSV0_D_LIGHT0 << (i * 2));
}
}
if (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3) & NV_PGRAPH_CONTROL_3_FOGENABLE) {
state->foggen = (enum VshFoggen)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_CSV0_D), NV_PGRAPH_CSV0_D_FOGGENMODE);
}
}
static void set_programmable_vsh_state(PGRAPHState *pg,
ProgrammableVshState *prog)
{
int program_start = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_C),
NV_PGRAPH_CSV0_C_CHEOPS_PROGRAM_START);
prog->program_length = 0;
for (int i = program_start; i < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH; i++) {
uint32_t *cur_token = (uint32_t *)&pg->program_data[i];
memcpy(&prog->program_data[prog->program_length], cur_token,
VSH_TOKEN_SIZE * sizeof(uint32_t));
prog->program_length++;
if (vsh_get_field(cur_token, FLD_FINAL)) {
break;
}
}
}
void pgraph_glsl_set_vsh_state(PGRAPHState *pg, VshState *vsh)
{
bool vertex_program = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
NV_PGRAPH_CSV0_D_MODE) == 2;
bool fixed_function = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
NV_PGRAPH_CSV0_D_MODE) == 0;
assert(vertex_program || fixed_function);
vsh->surface_scale_factor = pg->surface_scale_factor; // FIXME
vsh->compressed_attrs = pg->compressed_attrs;
vsh->uniform_attrs = pg->uniform_attrs;
vsh->swizzle_attrs = pg->swizzle_attrs;
vsh->specular_enable = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_C),
NV_PGRAPH_CSV0_C_SPECULAR_ENABLE);
vsh->separate_specular = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_C),
NV_PGRAPH_CSV0_C_SEPARATE_SPECULAR);
vsh->ignore_specular_alpha =
!GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_C),
NV_PGRAPH_CSV0_C_ALPHA_FROM_MATERIAL_SPECULAR);
vsh->specular_power = pg->specular_power;
vsh->specular_power_back = pg->specular_power_back;
vsh->z_perspective = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) &
NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE;
vsh->point_params_enable = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
NV_PGRAPH_CSV0_D_POINTPARAMSENABLE);
vsh->point_size = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_POINTSIZE),
NV097_SET_POINT_SIZE_V) /
8.0f;
if (vsh->point_params_enable) {
for (int i = 0; i < 8; i++) {
vsh->point_params[i] = pg->point_params[i];
}
}
vsh->smooth_shading = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3),
NV_PGRAPH_CONTROL_3_SHADEMODE) ==
NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH;
vsh->fog_enable =
pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3) & NV_PGRAPH_CONTROL_3_FOGENABLE;
if (vsh->fog_enable) {
/*FIXME: Use CSV0_D? */
vsh->fog_mode =
(enum VshFogMode)GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3),
NV_PGRAPH_CONTROL_3_FOG_MODE);
}
vsh->is_fixed_function = fixed_function;
if (fixed_function) {
set_fixed_function_vsh_state(pg, &vsh->fixed_function);
} else {
set_programmable_vsh_state(pg, &vsh->programmable);
}
}
MString *pgraph_glsl_gen_vsh(const VshState *state, GenVshGlslOptions opts)
{
MString *uniforms = mstring_new();
const char *u = opts.vulkan ? "" : "uniform ";
for (int i = 0; i < ARRAY_SIZE(VshUniformInfo); i++) {
const UniformInfo *info = &VshUniformInfo[i];
const char *type_str = uniform_element_type_to_str[info->type];
if (i == VshUniform_inlineValue &&
(!state->uniform_attrs ||
opts.use_push_constants_for_uniform_attrs)) {
continue;
}
if (info->count == 1) {
mstring_append_fmt(uniforms, "%s%s %s;\n", u, type_str,
info->name);
} else {
mstring_append_fmt(uniforms, "%s%s %s[%zd];\n", u, type_str,
info->name, info->count);
}
}
MString *header = mstring_from_str(
mstring_append(header,
GLSL_DEFINE(fogPlane, GLSL_C(NV_IGRAPH_XF_XFCTX_FOG))
GLSL_DEFINE(texMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T0MAT))
GLSL_DEFINE(texMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T1MAT))
GLSL_DEFINE(texMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T2MAT))
GLSL_DEFINE(texMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T3MAT))
"\n"
"#define FLOAT_MAX uintBitsToFloat(0x7F7FFFFFu)\n"
"\n"
"vec4 oPos = vec4(0.0,0.0,0.0,1.0);\n"
"vec4 oD0 = vec4(0.0,0.0,0.0,1.0);\n"
@ -218,23 +81,12 @@ MString *pgraph_glsl_gen_vsh(const VshState *state, GenVshGlslOptions opts)
" t = clamp(t, uintBitsToFloat(0xDF800000), uintBitsToFloat(0x9F800000));\n"
" }\n"
" return t;\n"
"}\n"
"\n"
"vec4 NaNToOne(vec4 src) {\n"
" return mix(src, vec4(1.0), isnan(src));\n"
"}\n"
"\n"
// Xbox NV2A rasterizer appears to have 4 bit precision fixed-point
// fractional part and to convert floating-point coordinates by
// by truncating (not flooring).
"vec2 roundScreenCoords(vec2 pos) {\n"
" return trunc(pos * 16.0f) / 16.0f;\n"
"}\n");
pgraph_glsl_get_vtx_header(header, opts.vulkan, state->smooth_shading,
false, opts.prefix_outputs, false);
pgraph_get_glsl_vtx_header(header, state->vulkan, state->smooth_shading,
false, prefix_outputs, false);
if (opts.prefix_outputs) {
if (prefix_outputs) {
mstring_append(header,
"#define vtxD0 v_vtxD0\n"
"#define vtxD1 v_vtxD1\n"
@ -251,7 +103,7 @@ MString *pgraph_glsl_gen_vsh(const VshState *state, GenVshGlslOptions opts)
int num_uniform_attrs = 0;
for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
bool is_uniform = state->uniform_attrs & (1 << i);
bool is_swizzled = state->swizzle_attrs & (1 << i);
bool is_compressed = state->compressed_attrs & (1 << i);
@ -276,12 +128,11 @@ MString *pgraph_glsl_gen_vsh(const VshState *state, GenVshGlslOptions opts)
}
}
}
mstring_append(header, "\n");
MString *body = mstring_from_str("void main() {\n");
for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
if (state->compressed_attrs & (1 << i)) {
mstring_append_fmt(
body, "vec4 v%d = decompress_11_11_10(v%d_cmp);\n", i, i);
@ -293,19 +144,23 @@ MString *pgraph_glsl_gen_vsh(const VshState *state, GenVshGlslOptions opts)
}
if (state->is_fixed_function) {
pgraph_glsl_gen_vsh_ff(state, header, body);
if (state->fixed_function) {
pgraph_gen_vsh_ff_glsl(state, header, body, uniforms);
} else if (state->vertex_program) {
pgraph_gen_vsh_prog_glsl(VSH_VERSION_XVS,
(uint32_t *)state->program_data,
state->program_length,
state->vulkan, header, body);
} else {
pgraph_glsl_gen_vsh_prog(
VSH_VERSION_XVS, (uint32_t *)state->programmable.program_data,
state->programmable.program_length, header, body);
assert(false);
}
if (!state->fog_enable) {
/* FIXME: Is the fog still calculated / passed somehow?! */
mstring_append(body, " oFog = vec4(1.0);\n");
} else {
if (!state->is_fixed_function) {
/* Fog */
if (state->fog_enable) {
if (state->vertex_program) {
/* FIXME: Does foggen do something here? Let's do some tracking..
*
* "RollerCoaster Tycoon" has
@ -366,6 +221,7 @@ MString *pgraph_glsl_gen_vsh(const VshState *state, GenVshGlslOptions opts)
assert(false);
break;
}
/* Calculate absolute for the modes which need it */
switch (state->fog_mode) {
case FOG_MODE_LINEAR_ABS:
case FOG_MODE_EXP_ABS:
@ -376,18 +232,17 @@ MString *pgraph_glsl_gen_vsh(const VshState *state, GenVshGlslOptions opts)
break;
}
/* Fog is clamped to min/max normal float values here to match HW
* interpolation. It is then clamped to [0,1] in the pixel shader.
mstring_append(body, " oFog.xyzw = vec4(fogFactor);\n");
} else {
/* FIXME: Is the fog still calculated / passed somehow?!
*/
// clang-format off
mstring_append(body,
" oFog = clamp(NaNToOne(vec4(fogFactor)), -FLOAT_MAX, FLOAT_MAX);\n");
// clang-format on
mstring_append(body, " oFog.xyzw = vec4(1.0);\n");
}
/* Set outputs */
mstring_append(body, "\n"
" vtxD0 = clamp(NaNToOne(oD0), 0.0, 1.0);\n"
" vtxB0 = clamp(NaNToOne(oB0), 0.0, 1.0);\n"
" vtxD0 = clamp(oD0, 0.0, 1.0);\n"
" vtxB0 = clamp(oB0, 0.0, 1.0);\n"
" vtxFog = oFog.x;\n"
" vtxT0 = oT0;\n"
" vtxT1 = oT1;\n"
@ -398,16 +253,9 @@ MString *pgraph_glsl_gen_vsh(const VshState *state, GenVshGlslOptions opts)
if (state->specular_enable) {
mstring_append(body,
" vtxD1 = clamp(NaNToOne(oD1), 0.0, 1.0);\n"
" vtxB1 = clamp(NaNToOne(oB1), 0.0, 1.0);\n"
" vtxD1 = clamp(oD1, 0.0, 1.0);\n"
" vtxB1 = clamp(oB1, 0.0, 1.0);\n"
);
if (state->ignore_specular_alpha) {
mstring_append(body,
" vtxD1.w = 1.0;\n"
" vtxB1.w = 1.0;\n"
);
}
} else {
mstring_append(body,
" vtxD1 = vec4(0.0, 0.0, 0.0, 1.0);\n"
@ -415,7 +263,7 @@ MString *pgraph_glsl_gen_vsh(const VshState *state, GenVshGlslOptions opts)
);
}
if (opts.vulkan) {
if (state->vulkan) {
mstring_append(body,
" gl_Position = oPos;\n"
);
@ -428,25 +276,25 @@ MString *pgraph_glsl_gen_vsh(const VshState *state, GenVshGlslOptions opts)
mstring_append(body, "}\n");
/* Return combined header + source */
MString *output =
mstring_from_fmt("#version %d\n\n", opts.vulkan ? 450 : 400);
if (opts.vulkan) {
if (state->vulkan) {
// FIXME: Optimize uniforms
if (num_uniform_attrs > 0 &&
opts.use_push_constants_for_uniform_attrs) {
if (num_uniform_attrs > 0) {
if (state->use_push_constants_for_uniform_attrs) {
mstring_append_fmt(output,
"layout(push_constant) uniform PushConstants {\n"
" vec4 inlineValue[%d];\n"
"};\n\n",
"};\n\n", num_uniform_attrs);
} else {
mstring_append_fmt(uniforms, " vec4 inlineValue[%d];\n",
num_uniform_attrs);
}
}
mstring_append_fmt(
output,
"layout(binding = %d, std140) uniform VshUniforms {\n"
"%s"
"};\n\n",
opts.ubo_binding, mstring_get_str(uniforms));
VSH_UBO_BINDING, mstring_get_str(uniforms));
} else {
mstring_append(
output, mstring_get_str(uniforms));
@ -457,110 +305,5 @@ MString *pgraph_glsl_gen_vsh(const VshState *state, GenVshGlslOptions opts)
mstring_append(output, mstring_get_str(body));
mstring_unref(body);
return output;
}
void pgraph_glsl_set_vsh_uniform_values(PGRAPHState *pg, const VshState *state,
const VshUniformLocs locs,
VshUniformValues *values)
{
if (locs[VshUniform_c] != -1) {
QEMU_BUILD_BUG_MSG(sizeof(values->c) != sizeof(pg->vsh_constants),
"Uniform value size inconsistency");
memcpy(values->c, pg->vsh_constants, sizeof(pg->vsh_constants));
}
if (locs[VshUniform_clipRange] != -1) {
pgraph_glsl_set_clip_range_uniform_value(pg, values->clipRange[0]);
}
if (locs[VshUniform_fogParam] != -1) {
uint32_t param_0 = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM0);
uint32_t param_1 = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM1);
values->fogParam[0][0] = *(float *)&param_0;
values->fogParam[0][1] = *(float *)&param_1;
}
if (locs[VshUniform_pointParams] != -1) {
QEMU_BUILD_BUG_MSG(sizeof(values->pointParams) !=
sizeof(pg->point_params),
"Uniform value size inconsistency");
memcpy(values->pointParams, pg->point_params, sizeof(pg->point_params));
}
if (locs[VshUniform_material_alpha] != -1) {
values->material_alpha[0] = pg->material_alpha;
}
if (locs[VshUniform_inlineValue] != -1) {
pgraph_get_inline_values(pg, state->uniform_attrs, values->inlineValue,
NULL);
}
if (locs[VshUniform_surfaceSize] != -1) {
unsigned int aa_width = 1, aa_height = 1;
pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height);
float width = (float)pg->surface_binding_dim.width / aa_width;
float height = (float)pg->surface_binding_dim.height / aa_height;
values->surfaceSize[0][0] = width;
values->surfaceSize[0][1] = height;
}
if (state->is_fixed_function) {
if (locs[VshUniform_ltctxa] != -1) {
QEMU_BUILD_BUG_MSG(sizeof(values->ltctxa) != sizeof(pg->ltctxa),
"Uniform value size inconsistency");
memcpy(values->ltctxa, pg->ltctxa, sizeof(pg->ltctxa));
}
if (locs[VshUniform_ltctxb] != -1) {
QEMU_BUILD_BUG_MSG(sizeof(values->ltctxb) != sizeof(pg->ltctxb),
"Uniform value size inconsistency");
memcpy(values->ltctxb, pg->ltctxb, sizeof(pg->ltctxb));
}
if (locs[VshUniform_ltc1] != -1) {
QEMU_BUILD_BUG_MSG(sizeof(values->ltc1) != sizeof(pg->ltc1),
"Uniform value size inconsistency");
memcpy(values->ltc1, pg->ltc1, sizeof(pg->ltc1));
}
if (locs[VshUniform_lightInfiniteHalfVector] != -1) {
QEMU_BUILD_BUG_MSG(sizeof(values->lightInfiniteHalfVector) !=
sizeof(pg->light_infinite_half_vector),
"Uniform value size inconsistency");
memcpy(values->lightInfiniteHalfVector,
pg->light_infinite_half_vector,
sizeof(pg->light_infinite_half_vector));
}
if (locs[VshUniform_lightInfiniteDirection] != -1) {
QEMU_BUILD_BUG_MSG(sizeof(values->lightInfiniteDirection) !=
sizeof(pg->light_infinite_direction),
"Uniform value size inconsistency");
memcpy(values->lightInfiniteDirection, pg->light_infinite_direction,
sizeof(pg->light_infinite_direction));
}
if (locs[VshUniform_lightLocalPosition] != -1) {
QEMU_BUILD_BUG_MSG(sizeof(values->lightLocalPosition) !=
sizeof(pg->light_local_position),
"Uniform value size inconsistency");
memcpy(values->lightLocalPosition, pg->light_local_position,
sizeof(pg->light_local_position));
}
if (locs[VshUniform_lightLocalAttenuation] != -1) {
QEMU_BUILD_BUG_MSG(sizeof(values->lightLocalAttenuation) !=
sizeof(pg->light_local_attenuation),
"Uniform value size inconsistency");
memcpy(values->lightLocalAttenuation, pg->light_local_attenuation,
sizeof(pg->light_local_attenuation));
}
if (locs[VshUniform_specularPower] != -1) {
values->specularPower[0] = pg->specular_power;
}
}
}

View File

@ -3,7 +3,7 @@
*
* Copyright (c) 2015 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2020-2025 Matt Borgerson
* Copyright (c) 2020-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@ -22,92 +22,12 @@
#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_VSH_H
#define HW_XBOX_NV2A_PGRAPH_GLSL_VSH_H
#include "common.h"
#include "hw/xbox/nv2a/pgraph/vsh_regs.h"
#include "qemu/mstring.h"
#include "hw/xbox/nv2a/pgraph/shaders.h"
typedef struct PGRAPHState PGRAPHState;
// FIXME: Move to struct
#define VSH_UBO_BINDING 0
typedef struct FixedFunctionVshState {
bool normalization;
bool texture_matrix_enable[4];
enum VshTexgen texgen[4][4];
enum VshFoggen foggen;
enum VshSkinning skinning;
bool lighting;
enum VshLight light[NV2A_MAX_LIGHTS];
enum MaterialColorSource emission_src;
enum MaterialColorSource ambient_src;
enum MaterialColorSource diffuse_src;
enum MaterialColorSource specular_src;
bool local_eye;
} FixedFunctionVshState;
typedef struct ProgrammableVshState {
uint32_t program_data[NV2A_MAX_TRANSFORM_PROGRAM_LENGTH][VSH_TOKEN_SIZE];
int program_length;
} ProgrammableVshState;
typedef struct {
unsigned int surface_scale_factor; // FIXME: Remove
uint16_t compressed_attrs;
uint16_t uniform_attrs;
uint16_t swizzle_attrs;
bool fog_enable;
enum VshFogMode fog_mode;
bool specular_enable;
bool separate_specular;
bool ignore_specular_alpha;
float specular_power;
float specular_power_back;
bool point_params_enable;
float point_size;
float point_params[8];
bool smooth_shading;
bool z_perspective;
bool is_fixed_function;
FixedFunctionVshState fixed_function;
ProgrammableVshState programmable;
} VshState;
void pgraph_glsl_set_vsh_state(PGRAPHState *pg, VshState *state);
#define VSH_UNIFORM_DECL_X(S, DECL) \
DECL(S, c, vec4, NV2A_VERTEXSHADER_CONSTANTS) \
DECL(S, clipRange, vec4, 1) \
DECL(S, fogParam, vec2, 1) \
DECL(S, inlineValue, vec4, NV2A_VERTEXSHADER_ATTRIBUTES) \
DECL(S, lightInfiniteDirection, vec3, NV2A_MAX_LIGHTS) \
DECL(S, lightInfiniteHalfVector, vec3, NV2A_MAX_LIGHTS) \
DECL(S, lightLocalAttenuation, vec3, NV2A_MAX_LIGHTS) \
DECL(S, lightLocalPosition, vec3, NV2A_MAX_LIGHTS) \
DECL(S, ltc1, vec4, NV2A_LTC1_COUNT) \
DECL(S, ltctxa, vec4, NV2A_LTCTXA_COUNT) \
DECL(S, ltctxb, vec4, NV2A_LTCTXB_COUNT) \
DECL(S, material_alpha, float, 1) \
DECL(S, pointParams, float, 8) \
DECL(S, specularPower, float, 1) \
DECL(S, surfaceSize, vec2, 1)
DECL_UNIFORM_TYPES(VshUniform, VSH_UNIFORM_DECL_X)
typedef struct GenVshGlslOptions {
bool vulkan;
bool prefix_outputs;
bool use_push_constants_for_uniform_attrs;
int ubo_binding;
} GenVshGlslOptions;
MString *pgraph_glsl_gen_vsh(const VshState *state,
GenVshGlslOptions glsl_opts);
void pgraph_glsl_set_vsh_uniform_values(PGRAPHState *pg, const VshState *state,
const VshUniformLocs locs,
VshUniformValues *values);
MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs);
#endif

View File

@ -3,6 +3,7 @@ specific_ss.add(files(
'profile.c',
'rdi.c',
's3tc.c',
'shaders.c',
'swizzle.c',
'texture.c',
'vertex.c',

View File

@ -27,7 +27,6 @@ DEF_METHOD(NV097, SET_COMBINER_SPECULAR_FOG_CW0)
DEF_METHOD(NV097, SET_COMBINER_SPECULAR_FOG_CW1)
DEF_METHOD_CASE_4(NV097, SET_TEXTURE_ADDRESS, 64)
DEF_METHOD(NV097, SET_CONTROL0)
DEF_METHOD(NV097, SET_LIGHT_CONTROL)
DEF_METHOD(NV097, SET_COLOR_MATERIAL)
DEF_METHOD(NV097, SET_FOG_MODE)
DEF_METHOD(NV097, SET_FOG_GEN_MODE)
@ -96,7 +95,6 @@ DEF_METHOD_RANGE(NV097, SET_FOG_PARAMS, 3)
DEF_METHOD_RANGE(NV097, SET_TEXGEN_PLANE_S, 4*4*4)
DEF_METHOD(NV097, SET_TEXGEN_VIEW_MODEL)
DEF_METHOD_RANGE(NV097, SET_FOG_PLANE, 4)
DEF_METHOD_RANGE(NV097, SET_SPECULAR_PARAMS, 6)
DEF_METHOD_RANGE(NV097, SET_SCENE_AMBIENT_COLOR, 3)
DEF_METHOD_RANGE(NV097, SET_VIEWPORT_OFFSET, 4)
DEF_METHOD_RANGE(NV097, SET_POINT_PARAMS, 8)
@ -105,7 +103,6 @@ DEF_METHOD_RANGE(NV097, SET_COMBINER_FACTOR0, 8)
DEF_METHOD_RANGE(NV097, SET_COMBINER_FACTOR1, 8)
DEF_METHOD_RANGE(NV097, SET_COMBINER_ALPHA_OCW, 8)
DEF_METHOD_RANGE(NV097, SET_COMBINER_COLOR_ICW, 8)
DEF_METHOD_RANGE(NV097, SET_COLOR_KEY_COLOR, 4)
DEF_METHOD_RANGE(NV097, SET_VIEWPORT_SCALE, 4)
DEF_METHOD_RANGE(NV097, SET_TRANSFORM_PROGRAM, 32)
DEF_METHOD_RANGE(NV097, SET_TRANSFORM_CONSTANT, 32)
@ -137,11 +134,6 @@ DEF_METHOD_RANGE(NV097, SET_TEXCOORD3_2F, 2)
DEF_METHOD_RANGE(NV097, SET_TEXCOORD3_4F, 4)
DEF_METHOD_RANGE(NV097, SET_TEXCOORD3_2S, 1)
DEF_METHOD_RANGE(NV097, SET_TEXCOORD3_4S, 2)
DEF_METHOD(NV097, SET_FOG_COORD)
DEF_METHOD(NV097, SET_WEIGHT1F)
DEF_METHOD_RANGE(NV097, SET_WEIGHT2F, 2)
DEF_METHOD_RANGE(NV097, SET_WEIGHT3F, 3)
DEF_METHOD_RANGE(NV097, SET_WEIGHT4F, 4)
DEF_METHOD_RANGE(NV097, SET_VERTEX_DATA_ARRAY_FORMAT, 16)
DEF_METHOD_RANGE(NV097, SET_VERTEX_DATA_ARRAY_OFFSET, 16)
DEF_METHOD(NV097, SET_LOGIC_OP_ENABLE)
@ -185,7 +177,6 @@ DEF_METHOD(NV097, CLEAR_SURFACE)
DEF_METHOD(NV097, SET_CLEAR_RECT_HORIZONTAL)
DEF_METHOD(NV097, SET_CLEAR_RECT_VERTICAL)
DEF_METHOD_RANGE(NV097, SET_SPECULAR_FOG_FACTOR, 2)
DEF_METHOD_RANGE(NV097, SET_SPECULAR_PARAMS_BACK, 6)
DEF_METHOD(NV097, SET_SHADER_CLIP_PLANE_MODE)
DEF_METHOD_RANGE(NV097, SET_COMBINER_COLOR_OCW, 8)
DEF_METHOD(NV097, SET_COMBINER_CONTROL)

View File

@ -19,8 +19,6 @@
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include <math.h>
#include "hw/xbox/nv2a/nv2a_int.h"
#include "ui/xemu-notifications.h"
#include "ui/xemu-settings.h"
@ -225,8 +223,6 @@ void pgraph_init(NV2AState *d)
qemu_event_init(&pg->sync_complete, false);
qemu_event_init(&pg->flush_complete, false);
qemu_cond_init(&pg->framebuffer_released);
qemu_event_init(&pg->renderer_switch_complete, false);
pg->renderer_switch_phase = PGRAPH_RENDERER_SWITCH_PHASE_IDLE;
pg->frame_time = 0;
pg->draw_time = 0;
@ -1079,18 +1075,6 @@ DEF_METHOD(NV097, SET_CONTROL0)
z_perspective);
}
DEF_METHOD(NV097, SET_LIGHT_CONTROL)
{
PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_SEPARATE_SPECULAR,
(parameter & NV097_SET_LIGHT_CONTROL_SEPARATE_SPECULAR) != 0);
PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_LOCALEYE,
(parameter & NV097_SET_LIGHT_CONTROL_LOCALEYE) != 0);
PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_ALPHA_FROM_MATERIAL_SPECULAR,
(parameter & NV097_SET_LIGHT_CONTROL_ALPHA_FROM_MATERIAL_SPECULAR) != 0);
}
DEF_METHOD(NV097, SET_COLOR_MATERIAL)
{
PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_EMISSION,
@ -1413,10 +1397,8 @@ DEF_METHOD(NV097, SET_BLEND_EQUATION)
DEF_METHOD(NV097, SET_DEPTH_FUNC)
{
if (parameter >= 0x200 && parameter <= 0x207) {
PG_SET_MASK(NV_PGRAPH_CONTROL_0, NV_PGRAPH_CONTROL_0_ZFUNC,
parameter & 0xF);
}
}
DEF_METHOD(NV097, SET_COLOR_MASK)
@ -1809,113 +1791,6 @@ DEF_METHOD_INC(NV097, SET_FOG_PLANE)
pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_FOG] = true;
}
struct CurveCoefficients {
float a;
float b;
float c;
};
static const struct CurveCoefficients curve_coefficients[] = {
{1.000108475163, -9.838607076280, 54.829089549713},
{1.199164441703, -3.292603784852, 7.799987995214},
{8.653441252033, 29.189473787191, 43.586027561823},
{-531.307758450301, 117.398468683934, 113.155490738338},
{-4.662713151292, 1.221108944572, 1.217360986939},
{-124.435242105211, 35.401219563514, 35.408114377045},
{10672560.259502287954, 21565843.555823743343, 10894794.336297152564},
{-51973801.463933646679, -104199997.554352939129, -52225454.356278456748},
{972270.324080004124, 2025882.096547174733, 1054898.052467488218},
};
static const float kCoefficient0StepPoints[] = {
-0.022553957999, // power = 1.25
-0.421539008617, // power = 4.00
-0.678715527058, // power = 9.00
-0.838916420937, // power = 20.00
-0.961754500866, // power = 90.00
-0.990773200989, // power = 375.00
-0.994858562946, // power = 650.00
-0.996561050415, // power = 1000.00
-0.999547004700, // power = 1250.00
};
static float reconstruct_quadratic(float c0, const struct CurveCoefficients *coefficients) {
return coefficients->a + coefficients->b * c0 + coefficients->c * c0 * c0;
}
static float reconstruct_saturation_growth_rate(float c0, const struct CurveCoefficients *coefficients) {
return (coefficients->a * c0) / (coefficients->b + coefficients->c * c0);
}
static float (* const reconstruct_func_map[])(float, const struct CurveCoefficients *) = {
reconstruct_quadratic, // 1.0..1.25 max error 0.01 %
reconstruct_quadratic, // 1.25..4.0 max error 2.2 %
reconstruct_quadratic, // 4.0..9.0 max error 2.3 %
reconstruct_saturation_growth_rate, // 9.0..20.0 max error 1.4 %
reconstruct_saturation_growth_rate, // 20.0..90.0 max error 2.1 %
reconstruct_saturation_growth_rate, // 90.0..375.0 max error 2.8%
reconstruct_quadratic, // 375..650 max error 1.0 %
reconstruct_quadratic, // 650..1000 max error 1.7%
reconstruct_quadratic, // 1000..1250 max error 1.0%
};
static float reconstruct_specular_power(const float *params) {
// See https://github.com/dracc/xgu/blob/db3172d8c983629f0dc971092981846da22438ae/xgux.h#L279
// Values < 1.0 will result in a positive c1 and (c2 - c0 * 2) will be very
// close to the original value.
if (params[1] > 0.0f && params[2] < 1.0f) {
return params[2] - (params[0] * 2.0f);
}
float c0 = params[0];
float c3 = params[3];
// FIXME: This handling is not correct, but is distinct without crashing.
// It does not appear possible for a DirectX-generated value to be positive,
// so while this differs from hardware behavior, it may be irrelevant in
// practice.
if (c0 > 0.0f || c3 > 0.0f) {
return 0.0001f;
}
float reconstructed_power = 0.f;
for (uint32_t i = 0; i < sizeof(kCoefficient0StepPoints) / sizeof(kCoefficient0StepPoints[0]); ++i) {
if (c0 > kCoefficient0StepPoints[i]) {
reconstructed_power = reconstruct_func_map[i](c0, &curve_coefficients[i]);
break;
}
}
float reconstructed_half_power = 0.f;
for (uint32_t i = 0; i < sizeof(kCoefficient0StepPoints) / sizeof(kCoefficient0StepPoints[0]); ++i) {
if (c3 > kCoefficient0StepPoints[i]) {
reconstructed_half_power = reconstruct_func_map[i](c3, &curve_coefficients[i]);
break;
}
}
// The range can be extended beyond 1250 by using the half power params. This
// will only work for DirectX generated values, arbitrary params could
// erroneously trigger this.
//
// There are some very low power (~1) values that have inverted powers, but
// they are easily identified by comparatively high c0 parameters.
if (reconstructed_power == 0.f || (reconstructed_half_power > reconstructed_power && c0 < -0.1f)) {
return reconstructed_half_power * 2.f;
}
return reconstructed_power;
}
DEF_METHOD_INC(NV097, SET_SPECULAR_PARAMS)
{
int slot = (method - NV097_SET_SPECULAR_PARAMS) / 4;
pg->specular_params[slot] = *(float *)&parameter;
if (slot == 5) {
pg->specular_power = reconstruct_specular_power(pg->specular_params);
}
}
DEF_METHOD_INC(NV097, SET_SCENE_AMBIENT_COLOR)
{
int slot = (method - NV097_SET_SCENE_AMBIENT_COLOR) / 4;
@ -1968,12 +1843,6 @@ DEF_METHOD_INC(NV097, SET_COMBINER_COLOR_ICW)
pgraph_reg_w(pg, NV_PGRAPH_COMBINECOLORI0 + slot*4, parameter);
}
DEF_METHOD_INC(NV097, SET_COLOR_KEY_COLOR)
{
int slot = (method - NV097_SET_COLOR_KEY_COLOR) / 4;
pgraph_reg_w(pg, NV_PGRAPH_COLORKEYCOLOR0 + slot * 4, parameter);
}
DEF_METHOD_INC(NV097, SET_VIEWPORT_SCALE)
{
int slot = (method - NV097_SET_VIEWPORT_SCALE) / 4;
@ -2141,26 +2010,6 @@ DEF_METHOD_INC(NV097, SET_VERTEX4F)
}
}
DEF_METHOD(NV097, SET_FOG_COORD)
{
VertexAttribute *attribute = &pg->vertex_attributes[NV2A_VERTEX_ATTR_FOG];
pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_FOG);
attribute->inline_value[0] = *(float*)&parameter;
attribute->inline_value[1] = attribute->inline_value[0];
attribute->inline_value[2] = attribute->inline_value[0];
attribute->inline_value[3] = attribute->inline_value[0];
}
DEF_METHOD(NV097, SET_WEIGHT1F)
{
VertexAttribute *attribute = &pg->vertex_attributes[NV2A_VERTEX_ATTR_WEIGHT];
pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_WEIGHT);
attribute->inline_value[0] = *(float*)&parameter;
attribute->inline_value[1] = 0.f;
attribute->inline_value[2] = 0.f;
attribute->inline_value[3] = 1.f;
}
DEF_METHOD_INC(NV097, SET_NORMAL3S)
{
int slot = (method - NV097_SET_NORMAL3S) / 4;
@ -2295,6 +2144,7 @@ DEF_METHOD_INC(NV097, SET_TEXCOORD1_4F)
SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD1_4F, NV2A_VERTEX_ATTR_TEXTURE1);
}
DEF_METHOD_INC(NV097, SET_TEXCOORD2_4F)
{
SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD2_4F, NV2A_VERTEX_ATTR_TEXTURE2);
@ -2305,34 +2155,8 @@ DEF_METHOD_INC(NV097, SET_TEXCOORD3_4F)
SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD3_4F, NV2A_VERTEX_ATTR_TEXTURE3);
}
DEF_METHOD_INC(NV097, SET_WEIGHT4F)
{
SET_VERTEX_ATTRIBUTE_F(NV097_SET_WEIGHT4F, NV2A_VERTEX_ATTR_WEIGHT);
}
#undef SET_VERTEX_ATTRIBUTE_F
DEF_METHOD_INC(NV097, SET_WEIGHT2F)
{
int slot = (method - NV097_SET_WEIGHT2F) / 4;
VertexAttribute *attribute =
&pg->vertex_attributes[NV2A_VERTEX_ATTR_WEIGHT];
pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_WEIGHT);
attribute->inline_value[slot] = *(float*)&parameter;
attribute->inline_value[2] = 0.0f;
attribute->inline_value[3] = 1.0f;
}
DEF_METHOD_INC(NV097, SET_WEIGHT3F)
{
int slot = (method - NV097_SET_WEIGHT3F) / 4;
VertexAttribute *attribute =
&pg->vertex_attributes[NV2A_VERTEX_ATTR_WEIGHT];
pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_WEIGHT);
attribute->inline_value[slot] = *(float*)&parameter;
attribute->inline_value[3] = 1.0f;
}
#define SET_VERTEX_ATRIBUTE_TEX_2F(command, attr_index) \
do { \
int slot = (method - (command)) / 4; \
@ -2702,11 +2526,7 @@ DEF_METHOD(NV097, DRAW_ARRAYS)
int32_t count = GET_MASK(parameter, NV097_DRAW_ARRAYS_COUNT) + 1;
if (pg->inline_elements_length) {
/* FIXME: HW throws an exception if the start index is > 0xFFFF. This
* would prevent this assert from firing for any reasonable choice of
* NV2A_MAX_BATCH_LENGTH (which must be larger to accommodate
* NV097_INLINE_ARRAY anyway)
*/
/* FIXME: Determine HW behavior for overflow case. */
assert((pg->inline_elements_length + count) < NV2A_MAX_BATCH_LENGTH);
assert(!pg->draw_arrays_prevent_connect);
@ -2908,15 +2728,6 @@ DEF_METHOD_INC(NV097, SET_SPECULAR_FOG_FACTOR)
pgraph_reg_w(pg, NV_PGRAPH_SPECFOGFACTOR0 + slot*4, parameter);
}
DEF_METHOD_INC(NV097, SET_SPECULAR_PARAMS_BACK)
{
int slot = (method - NV097_SET_SPECULAR_PARAMS_BACK) / 4;
pg->specular_params_back[slot] = *(float *)&parameter;
if (slot == 5) {
pg->specular_power_back = reconstruct_specular_power(pg->specular_params_back);
}
}
DEF_METHOD(NV097, SET_SHADER_CLIP_PLANE_MODE)
{
pgraph_reg_w(pg, NV_PGRAPH_SHADERCLIPMODE, parameter);
@ -3147,31 +2958,12 @@ void pgraph_write_zpass_pixel_cnt_report(NV2AState *d, uint32_t parameter,
NV2A_DPRINTF("Report result %d @%" HWADDR_PRIx, result, offset);
}
static void do_wait_for_renderer_switch(CPUState *cpu, run_on_cpu_data data)
{
NV2AState *d = (NV2AState *)data.host_ptr;
qemu_mutex_lock(&d->pfifo.lock);
d->pgraph.renderer_switch_phase = PGRAPH_RENDERER_SWITCH_PHASE_CPU_WAITING;
pfifo_kick(d);
qemu_mutex_unlock(&d->pfifo.lock);
qemu_event_wait(&d->pgraph.renderer_switch_complete);
}
void pgraph_process_pending(NV2AState *d)
{
PGRAPHState *pg = &d->pgraph;
pg->renderer->ops.process_pending(d);
if (g_config.display.renderer != pg->renderer->type &&
pg->renderer_switch_phase == PGRAPH_RENDERER_SWITCH_PHASE_IDLE) {
pg->renderer_switch_phase = PGRAPH_RENDERER_SWITCH_PHASE_STARTED;
qemu_event_reset(&pg->renderer_switch_complete);
async_safe_run_on_cpu(qemu_get_cpu(0), do_wait_for_renderer_switch,
RUN_ON_CPU_HOST_PTR(d));
}
if (pg->renderer_switch_phase == PGRAPH_RENDERER_SWITCH_PHASE_CPU_WAITING) {
if (g_config.display.renderer != pg->renderer->type) {
qemu_mutex_lock(&d->pgraph.renderer_lock);
qemu_mutex_unlock(&d->pfifo.lock);
qemu_mutex_lock(&d->pgraph.lock);
@ -3183,13 +2975,14 @@ void pgraph_process_pending(NV2AState *d)
qemu_mutex_lock(&d->pfifo.lock);
qemu_mutex_unlock(&d->pgraph.lock);
if (pg->renderer->ops.process_pending) {
pg->renderer->ops.process_pending(d);
}
qemu_mutex_unlock(&d->pfifo.lock);
qemu_mutex_lock(&d->pgraph.lock);
while (pg->framebuffer_in_use) {
qemu_cond_wait(&d->pgraph.framebuffer_released,
&d->pgraph.renderer_lock);
qemu_cond_wait(&d->pgraph.framebuffer_released, &d->pgraph.renderer_lock);
}
if (pg->renderer->ops.finalize) {
@ -3202,9 +2995,6 @@ void pgraph_process_pending(NV2AState *d)
qemu_mutex_unlock(&d->pgraph.renderer_lock);
qemu_mutex_unlock(&d->pgraph.lock);
qemu_mutex_lock(&d->pfifo.lock);
pg->renderer_switch_phase = PGRAPH_RENDERER_SWITCH_PHASE_IDLE;
qemu_event_set(&pg->renderer_switch_complete);
}
}

View File

@ -29,10 +29,9 @@
#include "qemu/thread.h"
#include "cpu.h"
#include "shaders.h"
#include "surface.h"
#include "texture.h"
#include "util.h"
#include "vsh_regs.h"
typedef struct NV2AState NV2AState;
typedef struct PGRAPHNullState PGRAPHNullState;
@ -198,11 +197,6 @@ typedef struct PGRAPHState {
float light_local_position[NV2A_MAX_LIGHTS][3];
float light_local_attenuation[NV2A_MAX_LIGHTS][3];
float specular_params[6];
float specular_power;
float specular_params_back[6];
float specular_power_back;
float point_params[8];
VertexAttribute vertex_attributes[NV2A_VERTEXSHADER_ATTRIBUTES];
@ -244,13 +238,6 @@ typedef struct PGRAPHState {
bool framebuffer_in_use;
QemuCond framebuffer_released;
enum {
PGRAPH_RENDERER_SWITCH_PHASE_IDLE,
PGRAPH_RENDERER_SWITCH_PHASE_STARTED,
PGRAPH_RENDERER_SWITCH_PHASE_CPU_WAITING,
} renderer_switch_phase;
QemuEvent renderer_switch_complete;
unsigned int surface_scale_factor;
uint8_t *scale_buf;

92
hw/xbox/nv2a/pgraph/psh.h Normal file
View File

@ -0,0 +1,92 @@
/*
* QEMU Geforce NV2A pixel shader translation
*
* Copyright (c) 2013 espes
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_NV2A_PSH_H
#define HW_NV2A_PSH_H
#include <stdint.h>
#include <stdbool.h>
enum PshAlphaFunc {
ALPHA_FUNC_NEVER,
ALPHA_FUNC_LESS,
ALPHA_FUNC_EQUAL,
ALPHA_FUNC_LEQUAL,
ALPHA_FUNC_GREATER,
ALPHA_FUNC_NOTEQUAL,
ALPHA_FUNC_GEQUAL,
ALPHA_FUNC_ALWAYS,
};
enum PshShadowDepthFunc {
SHADOW_DEPTH_FUNC_NEVER,
SHADOW_DEPTH_FUNC_LESS,
SHADOW_DEPTH_FUNC_EQUAL,
SHADOW_DEPTH_FUNC_LEQUAL,
SHADOW_DEPTH_FUNC_GREATER,
SHADOW_DEPTH_FUNC_NOTEQUAL,
SHADOW_DEPTH_FUNC_GEQUAL,
SHADOW_DEPTH_FUNC_ALWAYS,
};
enum ConvolutionFilter {
CONVOLUTION_FILTER_DISABLED,
CONVOLUTION_FILTER_QUINCUNX,
CONVOLUTION_FILTER_GAUSSIAN,
};
typedef struct PshState {
bool vulkan;
/* fragment shader - register combiner stuff */
uint32_t combiner_control;
uint32_t shader_stage_program;
uint32_t other_stage_input;
uint32_t final_inputs_0;
uint32_t final_inputs_1;
uint32_t rgb_inputs[8], rgb_outputs[8];
uint32_t alpha_inputs[8], alpha_outputs[8];
bool point_sprite;
bool rect_tex[4];
bool snorm_tex[4];
bool compare_mode[4][4];
bool alphakill[4];
enum ConvolutionFilter conv_tex[4];
bool tex_x8y24[4];
int dim_tex[4];
float border_logical_size[4][3];
float border_inv_real_size[4][3];
bool shadow_map[4];
enum PshShadowDepthFunc shadow_depth_func;
bool alpha_test;
enum PshAlphaFunc alpha_func;
bool window_clip_exclusive;
bool smooth_shading;
bool depth_clipping;
bool z_perspective;
} PshState;
#endif

View File

@ -1,190 +0,0 @@
/*
* QEMU Geforce NV2A pixel shader translation
*
* Copyright (c) 2013 espes
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_NV2A_PSH_H
#define HW_NV2A_PSH_H
#include <stdint.h>
#include <stdbool.h>
/*
* For some background, see the OpenGL extension:
* https://www.opengl.org/registry/specs/NV/register_combiners.txt
*/
enum PS_TEXTUREMODES
{ // valid in stage 0 1 2 3
PS_TEXTUREMODES_NONE= 0x00L, // * * * *
PS_TEXTUREMODES_PROJECT2D= 0x01L, // * * * *
PS_TEXTUREMODES_PROJECT3D= 0x02L, // * * * *
PS_TEXTUREMODES_CUBEMAP= 0x03L, // * * * *
PS_TEXTUREMODES_PASSTHRU= 0x04L, // * * * *
PS_TEXTUREMODES_CLIPPLANE= 0x05L, // * * * *
PS_TEXTUREMODES_BUMPENVMAP= 0x06L, // - * * *
PS_TEXTUREMODES_BUMPENVMAP_LUM= 0x07L, // - * * *
PS_TEXTUREMODES_BRDF= 0x08L, // - - * *
PS_TEXTUREMODES_DOT_ST= 0x09L, // - - * *
PS_TEXTUREMODES_DOT_ZW= 0x0aL, // - - * *
PS_TEXTUREMODES_DOT_RFLCT_DIFF= 0x0bL, // - - * -
PS_TEXTUREMODES_DOT_RFLCT_SPEC= 0x0cL, // - - - *
PS_TEXTUREMODES_DOT_STR_3D= 0x0dL, // - - - *
PS_TEXTUREMODES_DOT_STR_CUBE= 0x0eL, // - - - *
PS_TEXTUREMODES_DPNDNT_AR= 0x0fL, // - * * *
PS_TEXTUREMODES_DPNDNT_GB= 0x10L, // - * * *
PS_TEXTUREMODES_DOTPRODUCT= 0x11L, // - * * -
PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST= 0x12L, // - - - *
// 0x13-0x1f reserved
};
enum PS_INPUTMAPPING
{
PS_INPUTMAPPING_UNSIGNED_IDENTITY= 0x00L, // max(0,x) OK for final combiner
PS_INPUTMAPPING_UNSIGNED_INVERT= 0x20L, // 1 - max(0,x) OK for final combiner
PS_INPUTMAPPING_EXPAND_NORMAL= 0x40L, // 2*max(0,x) - 1 invalid for final combiner
PS_INPUTMAPPING_EXPAND_NEGATE= 0x60L, // 1 - 2*max(0,x) invalid for final combiner
PS_INPUTMAPPING_HALFBIAS_NORMAL= 0x80L, // max(0,x) - 1/2 invalid for final combiner
PS_INPUTMAPPING_HALFBIAS_NEGATE= 0xa0L, // 1/2 - max(0,x) invalid for final combiner
PS_INPUTMAPPING_SIGNED_IDENTITY= 0xc0L, // x invalid for final combiner
PS_INPUTMAPPING_SIGNED_NEGATE= 0xe0L, // -x invalid for final combiner
};
enum PS_REGISTER
{
PS_REGISTER_ZERO= 0x00L, // r
PS_REGISTER_DISCARD= 0x00L, // w
PS_REGISTER_C0= 0x01L, // r
PS_REGISTER_C1= 0x02L, // r
PS_REGISTER_FOG= 0x03L, // r
PS_REGISTER_V0= 0x04L, // r/w
PS_REGISTER_V1= 0x05L, // r/w
PS_REGISTER_T0= 0x08L, // r/w
PS_REGISTER_T1= 0x09L, // r/w
PS_REGISTER_T2= 0x0aL, // r/w
PS_REGISTER_T3= 0x0bL, // r/w
PS_REGISTER_R0= 0x0cL, // r/w
PS_REGISTER_R1= 0x0dL, // r/w
PS_REGISTER_V1R0_SUM= 0x0eL, // r
PS_REGISTER_EF_PROD= 0x0fL, // r
PS_REGISTER_ONE= PS_REGISTER_ZERO | PS_INPUTMAPPING_UNSIGNED_INVERT, // OK for final combiner
PS_REGISTER_NEGATIVE_ONE= PS_REGISTER_ZERO | PS_INPUTMAPPING_EXPAND_NORMAL, // invalid for final combiner
PS_REGISTER_ONE_HALF= PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NEGATE, // invalid for final combiner
PS_REGISTER_NEGATIVE_ONE_HALF= PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NORMAL, // invalid for final combiner
};
enum PS_COMBINERCOUNTFLAGS
{
PS_COMBINERCOUNT_MUX_LSB= 0x0000L, // mux on r0.a lsb
PS_COMBINERCOUNT_MUX_MSB= 0x0001L, // mux on r0.a msb
PS_COMBINERCOUNT_SAME_C0= 0x0000L, // c0 same in each stage
PS_COMBINERCOUNT_UNIQUE_C0= 0x0010L, // c0 unique in each stage
PS_COMBINERCOUNT_SAME_C1= 0x0000L, // c1 same in each stage
PS_COMBINERCOUNT_UNIQUE_C1= 0x0100L // c1 unique in each stage
};
enum PS_COMBINEROUTPUT
{
PS_COMBINEROUTPUT_IDENTITY= 0x00L, // y = x
PS_COMBINEROUTPUT_BIAS= 0x08L, // y = x - 0.5
PS_COMBINEROUTPUT_SHIFTLEFT_1= 0x10L, // y = x*2
PS_COMBINEROUTPUT_SHIFTLEFT_1_BIAS= 0x18L, // y = (x - 0.5)*2
PS_COMBINEROUTPUT_SHIFTLEFT_2= 0x20L, // y = x*4
PS_COMBINEROUTPUT_SHIFTRIGHT_1= 0x30L, // y = x/2
PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA= 0x80L, // RGB only
PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA= 0x40L, // RGB only
PS_COMBINEROUTPUT_AB_MULTIPLY= 0x00L,
PS_COMBINEROUTPUT_AB_DOT_PRODUCT= 0x02L, // RGB only
PS_COMBINEROUTPUT_CD_MULTIPLY= 0x00L,
PS_COMBINEROUTPUT_CD_DOT_PRODUCT= 0x01L, // RGB only
PS_COMBINEROUTPUT_AB_CD_SUM= 0x00L, // 3rd output is AB+CD
PS_COMBINEROUTPUT_AB_CD_MUX= 0x04L, // 3rd output is MUX(AB,CD) based on R0.a
};
enum PS_CHANNEL
{
PS_CHANNEL_RGB= 0x00, // used as RGB source
PS_CHANNEL_BLUE= 0x00, // used as ALPHA source
PS_CHANNEL_ALPHA= 0x10, // used as RGB or ALPHA source
};
enum PS_FINALCOMBINERSETTING
{
PS_FINALCOMBINERSETTING_CLAMP_SUM= 0x80, // V1+R0 sum clamped to [0,1]
PS_FINALCOMBINERSETTING_COMPLEMENT_V1= 0x40, // unsigned invert mapping
PS_FINALCOMBINERSETTING_COMPLEMENT_R0= 0x20, // unsigned invert mapping
};
enum PS_DOTMAPPING
{ // valid in stage 0 1 2 3
PS_DOTMAPPING_ZERO_TO_ONE= 0x00L, // - * * *
PS_DOTMAPPING_MINUS1_TO_1_D3D= 0x01L, // - * * *
PS_DOTMAPPING_MINUS1_TO_1_GL= 0x02L, // - * * *
PS_DOTMAPPING_MINUS1_TO_1= 0x03L, // - * * *
PS_DOTMAPPING_HILO_1= 0x04L, // - * * *
PS_DOTMAPPING_HILO_HEMISPHERE_D3D= 0x05L, // - * * *
PS_DOTMAPPING_HILO_HEMISPHERE_GL= 0x06L, // - * * *
PS_DOTMAPPING_HILO_HEMISPHERE= 0x07L, // - * * *
};
enum PS_COLORKEYMODE {
COLOR_KEY_NONE = 0,
COLOR_KEY_KILL_ALPHA = 1,
COLOR_KEY_KILL_COLOR_AND_ALPHA = 2,
COLOR_KEY_DISCARD = 3,
};
enum PshAlphaFunc {
ALPHA_FUNC_NEVER,
ALPHA_FUNC_LESS,
ALPHA_FUNC_EQUAL,
ALPHA_FUNC_LEQUAL,
ALPHA_FUNC_GREATER,
ALPHA_FUNC_NOTEQUAL,
ALPHA_FUNC_GEQUAL,
ALPHA_FUNC_ALWAYS,
};
enum PshShadowDepthFunc {
SHADOW_DEPTH_FUNC_NEVER,
SHADOW_DEPTH_FUNC_LESS,
SHADOW_DEPTH_FUNC_EQUAL,
SHADOW_DEPTH_FUNC_LEQUAL,
SHADOW_DEPTH_FUNC_GREATER,
SHADOW_DEPTH_FUNC_NOTEQUAL,
SHADOW_DEPTH_FUNC_GEQUAL,
SHADOW_DEPTH_FUNC_ALWAYS,
};
enum ConvolutionFilter {
CONVOLUTION_FILTER_DISABLED,
CONVOLUTION_FILTER_QUINCUNX,
CONVOLUTION_FILTER_GAUSSIAN,
};
#endif

View File

@ -0,0 +1,302 @@
/*
* Geforce NV2A PGRAPH OpenGL Renderer
*
* Copyright (c) 2015 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2020-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "hw/xbox/nv2a/debug.h"
#include "texture.h"
#include "pgraph.h"
#include "shaders.h"
ShaderState pgraph_get_shader_state(PGRAPHState *pg)
{
bool vertex_program = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
NV_PGRAPH_CSV0_D_MODE) == 2;
bool fixed_function = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
NV_PGRAPH_CSV0_D_MODE) == 0;
int program_start = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_C),
NV_PGRAPH_CSV0_C_CHEOPS_PROGRAM_START);
pg->program_data_dirty = false;
ShaderState state;
// We will hash it, so make sure any padding is zeroed
memset(&state, 0, sizeof(ShaderState));
state.surface_scale_factor = pg->surface_scale_factor;
state.compressed_attrs = pg->compressed_attrs;
state.uniform_attrs = pg->uniform_attrs;
state.swizzle_attrs = pg->swizzle_attrs;
/* register combiner stuff */
state.psh.window_clip_exclusive =
pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & NV_PGRAPH_SETUPRASTER_WINDOWCLIPTYPE;
state.psh.combiner_control = pgraph_reg_r(pg, NV_PGRAPH_COMBINECTL);
state.psh.shader_stage_program = pgraph_reg_r(pg, NV_PGRAPH_SHADERPROG);
state.psh.other_stage_input = pgraph_reg_r(pg, NV_PGRAPH_SHADERCTL);
state.psh.final_inputs_0 = pgraph_reg_r(pg, NV_PGRAPH_COMBINESPECFOG0);
state.psh.final_inputs_1 = pgraph_reg_r(pg, NV_PGRAPH_COMBINESPECFOG1);
state.psh.alpha_test =
pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & NV_PGRAPH_CONTROL_0_ALPHATESTENABLE;
state.psh.alpha_func = (enum PshAlphaFunc)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0), NV_PGRAPH_CONTROL_0_ALPHAFUNC);
state.psh.point_sprite = pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
NV_PGRAPH_SETUPRASTER_POINTSMOOTHENABLE;
state.psh.shadow_depth_func = (enum PshShadowDepthFunc)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_SHADOWCTL), NV_PGRAPH_SHADOWCTL_SHADOW_ZFUNC);
state.fixed_function = fixed_function;
state.specular_enable = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_C),
NV_PGRAPH_CSV0_C_SPECULAR_ENABLE);
/* fixed function stuff */
if (fixed_function) {
state.skinning = (enum VshSkinning)GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
NV_PGRAPH_CSV0_D_SKIN);
state.lighting =
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_LIGHTING);
state.normalization =
pgraph_reg_r(pg, NV_PGRAPH_CSV0_C) & NV_PGRAPH_CSV0_C_NORMALIZATION_ENABLE;
/* color material */
state.emission_src = (enum MaterialColorSource)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_EMISSION);
state.ambient_src = (enum MaterialColorSource)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_AMBIENT);
state.diffuse_src = (enum MaterialColorSource)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_DIFFUSE);
state.specular_src = (enum MaterialColorSource)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_SPECULAR);
}
/* vertex program stuff */
state.vertex_program = vertex_program,
state.z_perspective = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) &
NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE;
state.psh.z_perspective = state.z_perspective;
state.point_params_enable = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
NV_PGRAPH_CSV0_D_POINTPARAMSENABLE);
state.point_size =
GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_POINTSIZE), NV097_SET_POINT_SIZE_V) / 8.0f;
if (state.point_params_enable) {
for (int i = 0; i < 8; i++) {
state.point_params[i] = pg->point_params[i];
}
}
/* geometry shader stuff */
state.primitive_mode = (enum ShaderPrimitiveMode)pg->primitive_mode;
state.polygon_front_mode = (enum ShaderPolygonMode)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER), NV_PGRAPH_SETUPRASTER_FRONTFACEMODE);
state.polygon_back_mode = (enum ShaderPolygonMode)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER), NV_PGRAPH_SETUPRASTER_BACKFACEMODE);
state.smooth_shading = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3),
NV_PGRAPH_CONTROL_3_SHADEMODE) ==
NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH;
state.psh.smooth_shading = state.smooth_shading;
state.psh.depth_clipping = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ZCOMPRESSOCCLUDE),
NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN) ==
NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CULL;
state.program_length = 0;
if (vertex_program) {
// copy in vertex program tokens
for (int i = program_start; i < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH;
i++) {
uint32_t *cur_token = (uint32_t *)&pg->program_data[i];
memcpy(&state.program_data[state.program_length], cur_token,
VSH_TOKEN_SIZE * sizeof(uint32_t));
state.program_length++;
if (vsh_get_field(cur_token, FLD_FINAL)) {
break;
}
}
}
/* Texgen */
for (int i = 0; i < 4; i++) {
unsigned int reg = (i < 2) ? NV_PGRAPH_CSV1_A : NV_PGRAPH_CSV1_B;
for (int j = 0; j < 4; j++) {
unsigned int masks[] = {
(i % 2) ? NV_PGRAPH_CSV1_A_T1_S : NV_PGRAPH_CSV1_A_T0_S,
(i % 2) ? NV_PGRAPH_CSV1_A_T1_T : NV_PGRAPH_CSV1_A_T0_T,
(i % 2) ? NV_PGRAPH_CSV1_A_T1_R : NV_PGRAPH_CSV1_A_T0_R,
(i % 2) ? NV_PGRAPH_CSV1_A_T1_Q : NV_PGRAPH_CSV1_A_T0_Q
};
state.texgen[i][j] =
(enum VshTexgen)GET_MASK(pgraph_reg_r(pg, reg), masks[j]);
}
}
/* Fog */
state.fog_enable =
pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3) & NV_PGRAPH_CONTROL_3_FOGENABLE;
if (state.fog_enable) {
/*FIXME: Use CSV0_D? */
state.fog_mode = (enum VshFogMode)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3), NV_PGRAPH_CONTROL_3_FOG_MODE);
state.foggen = (enum VshFoggen)GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
NV_PGRAPH_CSV0_D_FOGGENMODE);
} else {
/* FIXME: Do we still pass the fogmode? */
state.fog_mode = (enum VshFogMode)0;
state.foggen = (enum VshFoggen)0;
}
/* Texture matrices */
for (int i = 0; i < 4; i++) {
state.texture_matrix_enable[i] = pg->texture_matrix_enable[i];
}
/* Lighting */
if (state.lighting) {
for (int i = 0; i < NV2A_MAX_LIGHTS; i++) {
state.light[i] = (enum VshLight)GET_MASK(
pgraph_reg_r(pg, NV_PGRAPH_CSV0_D), NV_PGRAPH_CSV0_D_LIGHT0 << (i * 2));
}
}
/* Copy content of enabled combiner stages */
int num_stages = pgraph_reg_r(pg, NV_PGRAPH_COMBINECTL) & 0xFF;
for (int i = 0; i < num_stages; i++) {
state.psh.rgb_inputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINECOLORI0 + i * 4);
state.psh.rgb_outputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINECOLORO0 + i * 4);
state.psh.alpha_inputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEALPHAI0 + i * 4);
state.psh.alpha_outputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEALPHAO0 + i * 4);
// constant_0[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR0 + i * 4);
// constant_1[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR1 + i * 4);
}
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
state.psh.compare_mode[i][j] =
(pgraph_reg_r(pg, NV_PGRAPH_SHADERCLIPMODE) >> (4 * i + j)) & 1;
}
uint32_t ctl_0 = pgraph_reg_r(pg, NV_PGRAPH_TEXCTL0_0 + i * 4);
bool enabled = pgraph_is_texture_stage_active(pg, i) &&
(ctl_0 & NV_PGRAPH_TEXCTL0_0_ENABLE);
if (!enabled) {
continue;
}
state.psh.alphakill[i] = ctl_0 & NV_PGRAPH_TEXCTL0_0_ALPHAKILLEN;
uint32_t tex_fmt = pgraph_reg_r(pg, NV_PGRAPH_TEXFMT0 + i * 4);
state.psh.dim_tex[i] = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_DIMENSIONALITY);
unsigned int color_format = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_COLOR);
BasicColorFormatInfo f = kelvin_color_format_info_map[color_format];
state.psh.rect_tex[i] = f.linear;
state.psh.tex_x8y24[i] = color_format == NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED ||
color_format == NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT;
uint32_t border_source =
GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BORDER_SOURCE);
bool cubemap = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE);
state.psh.border_logical_size[i][0] = 0.0f;
state.psh.border_logical_size[i][1] = 0.0f;
state.psh.border_logical_size[i][2] = 0.0f;
if (border_source != NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR) {
if (!f.linear && !cubemap) {
// The actual texture will be (at least) double the reported
// size and shifted by a 4 texel border but texture coordinates
// will still be relative to the reported size.
unsigned int reported_width =
1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_U);
unsigned int reported_height =
1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_V);
unsigned int reported_depth =
1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_P);
state.psh.border_logical_size[i][0] = reported_width;
state.psh.border_logical_size[i][1] = reported_height;
state.psh.border_logical_size[i][2] = reported_depth;
if (reported_width < 8) {
state.psh.border_inv_real_size[i][0] = 0.0625f;
} else {
state.psh.border_inv_real_size[i][0] =
1.0f / (reported_width * 2.0f);
}
if (reported_height < 8) {
state.psh.border_inv_real_size[i][1] = 0.0625f;
} else {
state.psh.border_inv_real_size[i][1] =
1.0f / (reported_height * 2.0f);
}
if (reported_depth < 8) {
state.psh.border_inv_real_size[i][2] = 0.0625f;
} else {
state.psh.border_inv_real_size[i][2] =
1.0f / (reported_depth * 2.0f);
}
} else {
NV2A_UNIMPLEMENTED(
"Border source texture with linear %d cubemap %d", f.linear,
cubemap);
}
}
/* Keep track of whether texture data has been loaded as signed
* normalized integers or not. This dictates whether or not we will need
* to re-map in fragment shader for certain texture modes (e.g.
* bumpenvmap).
*
* FIXME: When signed texture data is loaded as unsigned and remapped in
* fragment shader, there may be interpolation artifacts. Fix this to
* support signed textures more appropriately.
*/
#if 0 // FIXME
state.psh.snorm_tex[i] = (f.gl_internal_format == GL_RGB8_SNORM)
|| (f.gl_internal_format == GL_RG8_SNORM);
#endif
state.psh.shadow_map[i] = f.depth;
uint32_t filter = pgraph_reg_r(pg, NV_PGRAPH_TEXFILTER0 + i * 4);
unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN);
enum ConvolutionFilter kernel = CONVOLUTION_FILTER_DISABLED;
/* FIXME: We do not distinguish between min and mag when
* performing convolution. Just use it if specified for min (common AA
* case).
*/
if (min_filter == NV_PGRAPH_TEXFILTER0_MIN_CONVOLUTION_2D_LOD0) {
int k = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL);
assert(k == NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL_QUINCUNX ||
k == NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL_GAUSSIAN_3);
kernel = (enum ConvolutionFilter)k;
}
state.psh.conv_tex[i] = kernel;
}
return state;
}

View File

@ -0,0 +1,110 @@
/*
* QEMU Geforce NV2A shader generator
*
* Copyright (c) 2015 espes
* Copyright (c) 2015 Jannik Vogel
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef HW_XBOX_NV2A_PGRAPH_SHADERS_H
#define HW_XBOX_NV2A_PGRAPH_SHADERS_H
#include <stdint.h>
#include "hw/xbox/nv2a/nv2a_regs.h"
#include "vsh.h"
#include "psh.h"
enum ShaderPrimitiveMode {
PRIM_TYPE_INVALID,
PRIM_TYPE_POINTS,
PRIM_TYPE_LINES,
PRIM_TYPE_LINE_LOOP,
PRIM_TYPE_LINE_STRIP,
PRIM_TYPE_TRIANGLES,
PRIM_TYPE_TRIANGLE_STRIP,
PRIM_TYPE_TRIANGLE_FAN,
PRIM_TYPE_QUADS,
PRIM_TYPE_QUAD_STRIP,
PRIM_TYPE_POLYGON,
};
enum ShaderPolygonMode {
POLY_MODE_FILL,
POLY_MODE_POINT,
POLY_MODE_LINE,
};
enum MaterialColorSource {
MATERIAL_COLOR_SRC_MATERIAL,
MATERIAL_COLOR_SRC_DIFFUSE,
MATERIAL_COLOR_SRC_SPECULAR,
};
typedef struct ShaderState {
bool vulkan;
bool use_push_constants_for_uniform_attrs;
unsigned int surface_scale_factor;
PshState psh;
uint16_t compressed_attrs;
uint16_t uniform_attrs;
uint16_t swizzle_attrs;
bool texture_matrix_enable[4];
enum VshTexgen texgen[4][4];
bool fog_enable;
enum VshFoggen foggen;
enum VshFogMode fog_mode;
enum VshSkinning skinning;
bool normalization;
enum MaterialColorSource emission_src;
enum MaterialColorSource ambient_src;
enum MaterialColorSource diffuse_src;
enum MaterialColorSource specular_src;
bool lighting;
enum VshLight light[NV2A_MAX_LIGHTS];
bool fixed_function;
bool specular_enable;
/* vertex program */
bool vertex_program;
uint32_t program_data[NV2A_MAX_TRANSFORM_PROGRAM_LENGTH][VSH_TOKEN_SIZE];
int program_length;
bool z_perspective;
/* primitive format for geometry shader */
enum ShaderPolygonMode polygon_front_mode;
enum ShaderPolygonMode polygon_back_mode;
enum ShaderPrimitiveMode primitive_mode;
bool point_params_enable;
float point_size;
float point_params[8];
bool smooth_shading;
} ShaderState;
typedef struct PGRAPHState PGRAPHState;
ShaderState pgraph_get_shader_state(PGRAPHState *pg);
#endif

View File

@ -18,6 +18,7 @@
*/
#include "renderer.h"
#include <vulkan/vulkan_core.h>
static void create_buffer(PGRAPHState *pg, StorageBuffer *buffer)
{

View File

@ -21,7 +21,7 @@
#define HW_XBOX_NV2A_PGRAPH_VK_CONSTANTS_H
#include "hw/xbox/nv2a/nv2a_regs.h"
#include "hw/xbox/nv2a/pgraph/vsh_regs.h"
#include "hw/xbox/nv2a/pgraph/shaders.h"
#include <vulkan/vulkan.h>
static const VkFilter pgraph_texture_min_filter_vk_map[] = {

View File

@ -25,8 +25,6 @@
#endif
#ifdef CONFIG_RENDERDOC
#include "trace/control.h"
#pragma GCC diagnostic ignored "-Wstrict-prototypes"
#include "thirdparty/renderdoc_app.h"
#endif
@ -48,21 +46,11 @@ void pgraph_vk_debug_frame_terminator(void)
PGRAPHVkState *r = g_nv2a->pgraph.vk_renderer_state;
if (rdoc_api->IsTargetControlConnected()) {
bool capturing = rdoc_api->IsFrameCapturing();
if (capturing && renderdoc_capture_frames == 0) {
if (rdoc_api->IsFrameCapturing()) {
rdoc_api->EndFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(r->instance), 0);
if (renderdoc_trace_frames) {
trace_enable_events("-nv2a_pgraph_*");
renderdoc_trace_frames = false;
}
}
if (renderdoc_capture_frames > 0) {
if (!capturing) {
if (renderdoc_trace_frames) {
trace_enable_events("nv2a_pgraph_*");
}
rdoc_api->StartFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(r->instance), 0);
}
--renderdoc_capture_frames;
}
}

View File

@ -1,7 +1,7 @@
/*
* Geforce NV2A PGRAPH Vulkan Renderer
*
* Copyright (c) 2024-2025 Matt Borgerson
* Copyright (c) 2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@ -20,7 +20,6 @@
#include "qemu/osdep.h"
#include "qemu/fast-hash.h"
#include "renderer.h"
#include <math.h>
void pgraph_vk_draw_begin(NV2AState *d)
{
@ -51,8 +50,8 @@ static VkPrimitiveTopology get_primitive_topology(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
int polygon_mode = r->shader_binding->state.geom.polygon_front_mode;
int primitive_mode = r->shader_binding->state.geom.primitive_mode;
int polygon_mode = r->shader_binding->state.polygon_front_mode;
int primitive_mode = r->shader_binding->state.primitive_mode;
if (polygon_mode == POLY_MODE_POINT) {
return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
@ -93,8 +92,7 @@ static VkPrimitiveTopology get_primitive_topology(PGRAPHState *pg)
}
}
static void pipeline_cache_entry_init(Lru *lru, LruNode *node,
const void *state)
static void pipeline_cache_entry_init(Lru *lru, LruNode *node, void *state)
{
PipelineBinding *snode = container_of(node, PipelineBinding, node);
snode->layout = VK_NULL_HANDLE;
@ -118,8 +116,7 @@ static void pipeline_cache_entry_post_evict(Lru *lru, LruNode *node)
snode->layout = VK_NULL_HANDLE;
}
static bool pipeline_cache_entry_compare(Lru *lru, LruNode *node,
const void *key)
static bool pipeline_cache_entry_compare(Lru *lru, LruNode *node, void *key)
{
PipelineBinding *snode = container_of(node, PipelineBinding, node);
return memcmp(&snode->key, key, sizeof(PipelineKey));
@ -748,15 +745,15 @@ static void create_pipeline(PGRAPHState *pg)
(VkPipelineShaderStageCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_VERTEX_BIT,
.module = r->shader_binding->vsh.module_info->module,
.module = r->shader_binding->vertex->module,
.pName = "main",
};
if (r->shader_binding->geom.module_info) {
if (r->shader_binding->geometry) {
shader_stages[num_active_shader_stages++] =
(VkPipelineShaderStageCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_GEOMETRY_BIT,
.module = r->shader_binding->geom.module_info->module,
.module = r->shader_binding->geometry->module,
.pName = "main",
};
}
@ -764,7 +761,7 @@ static void create_pipeline(PGRAPHState *pg)
(VkPipelineShaderStageCreateInfo){
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
.module = r->shader_binding->psh.module_info->module,
.module = r->shader_binding->fragment->module,
.pName = "main",
};
@ -818,7 +815,7 @@ static void create_pipeline(PGRAPHState *pg)
.depthClampEnable = VK_TRUE,
.rasterizerDiscardEnable = VK_FALSE,
.polygonMode = pgraph_polygon_mode_vk_map[r->shader_binding->state
.geom.polygon_front_mode],
.polygon_front_mode],
.lineWidth = 1.0f,
.frontFace = (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
NV_PGRAPH_SETUPRASTER_FRONTFACE) ?
@ -948,23 +945,12 @@ static void create_pipeline(PGRAPHState *pg)
.blendConstants[3] = blend_constant[3],
};
VkDynamicState dynamic_states[3] = { VK_DYNAMIC_STATE_VIEWPORT,
VkDynamicState dynamic_states[2] = { VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR };
int num_dynamic_states = 2;
snode->has_dynamic_line_width =
(r->enabled_physical_device_features.wideLines == VK_TRUE) &&
(r->shader_binding->state.geom.polygon_front_mode == POLY_MODE_LINE ||
r->shader_binding->state.geom.primitive_mode == PRIM_TYPE_LINES ||
r->shader_binding->state.geom.primitive_mode == PRIM_TYPE_LINE_LOOP ||
r->shader_binding->state.geom.primitive_mode == PRIM_TYPE_LINE_STRIP);
if (snode->has_dynamic_line_width) {
dynamic_states[num_dynamic_states++] = VK_DYNAMIC_STATE_LINE_WIDTH;
}
VkPipelineDynamicStateCreateInfo dynamic_state = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.dynamicStateCount = num_dynamic_states,
.dynamicStateCount = ARRAY_SIZE(dynamic_states),
.pDynamicStates = dynamic_states,
};
@ -1014,9 +1000,9 @@ static void create_pipeline(PGRAPHState *pg)
};
VkPushConstantRange push_constant_range;
if (r->use_push_constants_for_uniform_attrs) {
if (r->shader_binding->state.use_push_constants_for_uniform_attrs) {
int num_uniform_attributes =
__builtin_popcount(r->shader_binding->state.vsh.uniform_attrs);
__builtin_popcount(r->shader_binding->state.uniform_attrs);
if (num_uniform_attributes) {
push_constant_range = (VkPushConstantRange){
.stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
@ -1069,7 +1055,7 @@ static void push_vertex_attr_values(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
if (!r->use_push_constants_for_uniform_attrs) {
if (!r->shader_binding->state.use_push_constants_for_uniform_attrs) {
return;
}
@ -1078,8 +1064,8 @@ static void push_vertex_attr_values(PGRAPHState *pg)
float values[NV2A_VERTEXSHADER_ATTRIBUTES][4];
int num_uniform_attrs = 0;
pgraph_get_inline_values(pg, r->shader_binding->state.vsh.uniform_attrs,
values, &num_uniform_attrs);
pgraph_get_inline_values(pg, r->shader_binding->state.uniform_attrs, values,
&num_uniform_attrs);
if (num_uniform_attrs > 0) {
vkCmdPushConstants(r->command_buffer, r->pipeline_binding->layout,
@ -1434,21 +1420,6 @@ static void begin_pre_draw(PGRAPHState *pg)
pgraph_vk_ensure_command_buffer(pg);
}
static float clamp_line_width_to_device_limits(PGRAPHState *pg, float width)
{
PGRAPHVkState *r = pg->vk_renderer_state;
float min_width = r->device_props.limits.lineWidthRange[0];
float max_width = r->device_props.limits.lineWidthRange[1];
float granularity = r->device_props.limits.lineWidthGranularity;
if (granularity != 0.0f) {
float steps = roundf((width - min_width) / granularity);
width = min_width + steps * granularity;
}
return fminf(fmaxf(min_width, width), max_width);
}
static void begin_draw(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
@ -1520,12 +1491,6 @@ static void begin_draw(PGRAPHState *pg)
.extent.height = scissor_height,
};
vkCmdSetScissor(r->command_buffer, 0, 1, &scissor);
if (r->pipeline_binding->has_dynamic_line_width) {
float line_width =
clamp_line_width_to_device_limits(pg, pg->surface_scale_factor);
vkCmdSetLineWidth(r->command_buffer, line_width);
}
}
if (!pg->clearing) {

View File

@ -1,7 +1,7 @@
/*
* Geforce NV2A PGRAPH Vulkan Renderer
*
* Copyright (c) 2024-2025 Matt Borgerson
* Copyright (c) 2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@ -269,24 +269,12 @@ static void block_to_uniforms(const SpvReflectBlockVariable *block, ShaderUnifor
assert(member->array.dims_count < 2);
int dim = 1;
for (int i = 0; i < member->array.dims_count; i++) {
dim *= member->array.dims[i];
}
int stride = MAX(member->array.stride, member->numeric.matrix.stride);
if (member->numeric.matrix.column_count) {
dim *= member->numeric.matrix.column_count;
if (member->array.stride) {
stride =
member->array.stride / member->numeric.matrix.column_count;
}
}
layout->uniforms[k] = (ShaderUniform){
.name = strdup(member->name),
.offset = member->offset,
.dim_v = MAX(1, member->numeric.vector.component_count),
.dim_a = dim,
.stride = stride,
.dim_a = MAX(member->array.dims_count ? member->array.dims[0] : 1, member->numeric.matrix.column_count),
.stride = MAX(member->array.stride, member->numeric.matrix.stride),
};
// fprintf(stderr, "<%s offset=%zd dim_v=%zd dim_a=%zd stride=%zd>\n",
@ -368,7 +356,6 @@ ShaderModuleInfo *pgraph_vk_create_shader_module_from_glsl(
PGRAPHVkState *r, VkShaderStageFlagBits stage, const char *glsl)
{
ShaderModuleInfo *info = g_malloc0(sizeof(*info));
info->refcnt = 0;
info->glsl = strdup(glsl);
info->spirv = pgraph_vk_compile_glsl_to_spv(
vk_shader_stage_to_glslang_stage(stage), glsl);
@ -387,24 +374,8 @@ static void finalize_uniform_layout(ShaderUniformLayout *layout)
}
}
void pgraph_vk_ref_shader_module(ShaderModuleInfo *info)
{
info->refcnt++;
}
void pgraph_vk_unref_shader_module(PGRAPHVkState *r, ShaderModuleInfo *info)
{
assert(info->refcnt >= 1);
info->refcnt--;
if (info->refcnt == 0) {
pgraph_vk_destroy_shader_module(r, info);
}
}
void pgraph_vk_destroy_shader_module(PGRAPHVkState *r, ShaderModuleInfo *info)
{
assert(info->refcnt == 0);
if (info->glsl) {
free(info->glsl);
}

View File

@ -1,7 +1,7 @@
/*
* Geforce NV2A PGRAPH Vulkan Renderer
*
* Copyright (c) 2024-2025 Matt Borgerson
* Copyright (c) 2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@ -120,8 +120,8 @@ void *uniform_ptr(ShaderUniformLayout *layout, int idx)
return (char *)layout->allocation + layout->uniforms[idx - 1].offset;
}
static inline void uniform_copy(ShaderUniformLayout *layout, int idx,
void *values, size_t value_size, size_t count)
static inline
void uniform_copy(ShaderUniformLayout *layout, int idx, void *values, size_t value_size, size_t count)
{
assert(idx > 0 && "invalid uniform index");
@ -135,7 +135,7 @@ static inline void uniform_copy(ShaderUniformLayout *layout, int idx,
int index = 0;
while (bytes_remaining) {
assert((p_out + element_size) <= p_max);
assert(p_out < p_max);
assert(index < u->dim_a);
memcpy(p_out, p_in, element_size);
bytes_remaining -= element_size;
@ -202,10 +202,4 @@ void uniform4i(ShaderUniformLayout *layout, int idx, int v0, int v1, int v2, int
uniform1iv(layout, idx, 4, values);
}
static inline void uniform1uiv(ShaderUniformLayout *layout, int idx,
size_t count, uint32_t *values)
{
uniform_copy(layout, idx, values, sizeof(uint32_t), count);
}
#endif

View File

@ -1,7 +1,7 @@
/*
* Geforce NV2A PGRAPH Vulkan Renderer
*
* Copyright (c) 2024-2025 Matt Borgerson
* Copyright (c) 2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@ -522,48 +522,36 @@ static bool create_logical_device(PGRAPHState *pg, Error **errp)
.pQueuePriorities = &queuePriority,
};
// Check device features
VkPhysicalDeviceFeatures physical_device_features;
vkGetPhysicalDeviceFeatures(r->physical_device, &physical_device_features);
memset(&r->enabled_physical_device_features, 0,
sizeof(r->enabled_physical_device_features));
// Ensure device supports required features
VkPhysicalDeviceFeatures available_features, enabled_features;
vkGetPhysicalDeviceFeatures(r->physical_device, &available_features);
memset(&enabled_features, 0, sizeof(enabled_features));
struct {
const char *name;
VkBool32 available, *enabled;
bool required;
} desired_features[] = {
// clang-format off
#define F(n, req) { \
.name = #n, \
.available = physical_device_features.n, \
.enabled = &r->enabled_physical_device_features.n, \
.required = req, \
}
F(shaderClipDistance, true),
F(geometryShader, true),
F(shaderTessellationAndGeometryPointSize, true),
F(depthClamp, true),
F(occlusionQueryPrecise, true),
F(fillModeNonSolid, true),
F(wideLines, false),
} required_features[] = {
#define F(n) { #n, available_features.n, &enabled_features.n }
F(shaderClipDistance),
F(geometryShader),
F(shaderTessellationAndGeometryPointSize),
F(depthClamp),
F(occlusionQueryPrecise),
#undef F
// clang-format on
};
bool all_required_features_available = true;
for (int i = 0; i < ARRAY_SIZE(desired_features); i++) {
if (desired_features[i].required &&
desired_features[i].available != VK_TRUE) {
bool all_features_available = true;
for (int i = 0; i < ARRAY_SIZE(required_features); i++) {
if (required_features[i].available != VK_TRUE) {
fprintf(stderr,
"Error: Device does not support required feature %s\n",
desired_features[i].name);
all_required_features_available = false;
required_features[i].name);
all_features_available = false;
}
*desired_features[i].enabled = desired_features[i].available;
*required_features[i].enabled = VK_TRUE;
}
if (!all_required_features_available) {
if (!all_features_available) {
error_setg(errp, "Device does not support required features");
return false;
}
@ -596,7 +584,7 @@ static bool create_logical_device(PGRAPHState *pg, Error **errp)
.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
.queueCreateInfoCount = 1,
.pQueueCreateInfos = &queue_create_info,
.pEnabledFeatures = &r->enabled_physical_device_features,
.pEnabledFeatures = &enabled_features,
.enabledExtensionCount = enabled_extension_names->len,
.ppEnabledExtensionNames =
&g_array_index(enabled_extension_names, const char *, 0),

View File

@ -1,7 +1,7 @@
/*
* Geforce NV2A PGRAPH Vulkan Renderer
*
* Copyright (c) 2024-2025 Matt Borgerson
* Copyright (c) 2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@ -29,7 +29,7 @@
#include "hw/xbox/nv2a/nv2a_regs.h"
#include "hw/xbox/nv2a/pgraph/surface.h"
#include "hw/xbox/nv2a/pgraph/texture.h"
#include "hw/xbox/nv2a/pgraph/glsl/shaders.h"
#include "hw/xbox/nv2a/pgraph/shaders.h"
#include <vulkan/vulkan.h>
#include <glslang/Include/glslang_c_interface.h>
@ -77,7 +77,6 @@ typedef struct PipelineBinding {
VkPipeline pipeline;
VkRenderPass render_pass;
unsigned int draw_time;
bool has_dynamic_line_width;
} PipelineBinding;
enum Buffer {
@ -146,7 +145,6 @@ typedef struct SurfaceBinding {
} SurfaceBinding;
typedef struct ShaderModuleInfo {
int refcnt;
char *glsl;
GByteArray *spirv;
VkShaderModule module;
@ -156,44 +154,48 @@ typedef struct ShaderModuleInfo {
ShaderUniformLayout push_constants;
} ShaderModuleInfo;
typedef struct ShaderModuleCacheKey {
VkShaderStageFlagBits kind;
union {
struct {
VshState state;
GenVshGlslOptions glsl_opts;
} vsh;
struct {
GeomState state;
GenGeomGlslOptions glsl_opts;
} geom;
struct {
PshState state;
GenPshGlslOptions glsl_opts;
} psh;
};
} ShaderModuleCacheKey;
typedef struct ShaderModuleCacheEntry {
LruNode node;
ShaderModuleCacheKey key;
ShaderModuleInfo *module_info;
} ShaderModuleCacheEntry;
typedef struct ShaderBinding {
LruNode node;
bool initialized;
ShaderState state;
struct {
ShaderModuleInfo *module_info;
VshUniformLocs uniform_locs;
} vsh;
struct {
ShaderModuleInfo *module_info;
} geom;
struct {
ShaderModuleInfo *module_info;
PshUniformLocs uniform_locs;
} psh;
ShaderModuleInfo *geometry;
ShaderModuleInfo *vertex;
ShaderModuleInfo *fragment;
int psh_constant_loc[9][2];
int alpha_ref_loc;
int bump_mat_loc[NV2A_MAX_TEXTURES];
int bump_scale_loc[NV2A_MAX_TEXTURES];
int bump_offset_loc[NV2A_MAX_TEXTURES];
int tex_scale_loc[NV2A_MAX_TEXTURES];
int surface_size_loc;
int clip_range_loc;
int clip_range_floc;
int depth_offset_loc;
int vsh_constant_loc;
uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];
int inv_viewport_loc;
int ltctxa_loc;
int ltctxb_loc;
int ltc1_loc;
int fog_color_loc;
int fog_param_loc;
int light_infinite_half_vector_loc[NV2A_MAX_LIGHTS];
int light_infinite_direction_loc[NV2A_MAX_LIGHTS];
int light_local_position_loc[NV2A_MAX_LIGHTS];
int light_local_attenuation_loc[NV2A_MAX_LIGHTS];
int clip_region_loc;
int material_alpha_loc;
int uniform_attrs_loc;
} ShaderBinding;
typedef struct TextureKey {
@ -329,7 +331,6 @@ typedef struct PGRAPHVkState {
bool memory_budget_extension_enabled;
VkPhysicalDevice physical_device;
VkPhysicalDeviceFeatures enabled_physical_device_features;
VkPhysicalDeviceProperties device_props;
VkDevice device;
VmaAllocator allocator;
@ -404,10 +405,6 @@ typedef struct PGRAPHVkState {
ShaderBinding *shader_binding;
ShaderModuleInfo *quad_vert_module, *solid_frag_module;
bool shader_bindings_changed;
bool use_push_constants_for_uniform_attrs;
Lru shader_module_cache;
ShaderModuleCacheEntry *shader_module_cache_entries;
// FIXME: Merge these into a structure
uint64_t uniform_buffer_hashes[2];
@ -464,8 +461,6 @@ VkShaderModule pgraph_vk_create_shader_module_from_spv(PGRAPHVkState *r,
GByteArray *spv);
ShaderModuleInfo *pgraph_vk_create_shader_module_from_glsl(
PGRAPHVkState *r, VkShaderStageFlagBits stage, const char *glsl);
void pgraph_vk_ref_shader_module(ShaderModuleInfo *info);
void pgraph_vk_unref_shader_module(PGRAPHVkState *r, ShaderModuleInfo *info);
void pgraph_vk_destroy_shader_module(PGRAPHVkState *r, ShaderModuleInfo *info);
// buffer.c
@ -555,6 +550,7 @@ void pgraph_vk_init_shaders(PGRAPHState *pg);
void pgraph_vk_finalize_shaders(PGRAPHState *pg);
void pgraph_vk_update_descriptor_sets(PGRAPHState *pg);
void pgraph_vk_bind_shaders(PGRAPHState *pg);
void pgraph_vk_update_shader_uniforms(PGRAPHState *pg);
// reports.c
void pgraph_vk_init_reports(PGRAPHState *pg);

View File

@ -1,7 +1,13 @@
/*
* Geforce NV2A PGRAPH Vulkan Renderer
*
* Copyright (c) 2024-2025 Matt Borgerson
* Copyright (c) 2024 Matt Borgerson
*
* Based on GL implementation:
*
* Copyright (c) 2015 espes
* Copyright (c) 2015 Jannik Vogel
* Copyright (c) 2018-2024 Matt Borgerson
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@ -18,13 +24,15 @@
*/
#include "qemu/osdep.h"
#include "hw/xbox/nv2a/pgraph/shaders.h"
#include "hw/xbox/nv2a/pgraph/util.h"
#include "hw/xbox/nv2a/pgraph/glsl/geom.h"
#include "hw/xbox/nv2a/pgraph/glsl/vsh.h"
#include "hw/xbox/nv2a/pgraph/glsl/psh.h"
#include "qemu/fast-hash.h"
#include "qemu/mstring.h"
#include "renderer.h"
#define VSH_UBO_BINDING 0
#define PSH_UBO_BINDING 1
#define PSH_TEX_BINDING 2
#include <locale.h>
const size_t MAX_UNIFORM_ATTR_VALUES_SIZE = NV2A_VERTEXSHADER_ATTRIBUTES * 4 * sizeof(float);
@ -151,8 +159,8 @@ void pgraph_vk_update_descriptor_sets(PGRAPHState *pg)
}
ShaderBinding *binding = r->shader_binding;
ShaderUniformLayout *layouts[] = { &binding->vsh.module_info->uniforms,
&binding->psh.module_info->uniforms };
ShaderUniformLayout *layouts[] = { &binding->vertex->uniforms,
&binding->fragment->uniforms };
VkDeviceSize ubo_buffer_total_size = 0;
for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
ubo_buffer_total_size += layouts[i]->total_size;
@ -228,72 +236,90 @@ void pgraph_vk_update_descriptor_sets(PGRAPHState *pg)
r->descriptor_set_index++;
}
static void update_shader_uniform_locs(ShaderBinding *binding)
static void update_shader_constant_locations(ShaderBinding *binding)
{
for (int i = 0; i < ARRAY_SIZE(binding->vsh.uniform_locs); i++) {
binding->vsh.uniform_locs[i] = uniform_index(
&binding->vsh.module_info->uniforms, VshUniformInfo[i].name);
char tmp[64];
/* lookup fragment shader uniforms */
for (int i = 0; i < 9; i++) {
for (int j = 0; j < 2; j++) {
snprintf(tmp, sizeof(tmp), "c%d_%d", j, i);
binding->psh_constant_loc[i][j] =
uniform_index(&binding->fragment->uniforms, tmp);
}
}
binding->alpha_ref_loc =
uniform_index(&binding->fragment->uniforms, "alphaRef");
binding->fog_color_loc =
uniform_index(&binding->fragment->uniforms, "fogColor");
for (int i = 1; i < NV2A_MAX_TEXTURES; i++) {
snprintf(tmp, sizeof(tmp), "bumpMat%d", i);
binding->bump_mat_loc[i] =
uniform_index(&binding->fragment->uniforms, tmp);
snprintf(tmp, sizeof(tmp), "bumpScale%d", i);
binding->bump_scale_loc[i] =
uniform_index(&binding->fragment->uniforms, tmp);
snprintf(tmp, sizeof(tmp), "bumpOffset%d", i);
binding->bump_offset_loc[i] =
uniform_index(&binding->fragment->uniforms, tmp);
}
for (int i = 0; i < ARRAY_SIZE(binding->psh.uniform_locs); i++) {
binding->psh.uniform_locs[i] = uniform_index(
&binding->psh.module_info->uniforms, PshUniformInfo[i].name);
for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
snprintf(tmp, sizeof(tmp), "texScale%d", i);
binding->tex_scale_loc[i] =
uniform_index(&binding->fragment->uniforms, tmp);
}
/* lookup vertex shader uniforms */
binding->vsh_constant_loc = uniform_index(&binding->vertex->uniforms, "c");
binding->surface_size_loc =
uniform_index(&binding->vertex->uniforms, "surfaceSize");
binding->clip_range_loc =
uniform_index(&binding->vertex->uniforms, "clipRange");
binding->clip_range_floc =
uniform_index(&binding->fragment->uniforms, "clipRange");
binding->depth_offset_loc =
uniform_index(&binding->fragment->uniforms, "depthOffset");
binding->fog_param_loc =
uniform_index(&binding->vertex->uniforms, "fogParam");
binding->inv_viewport_loc =
uniform_index(&binding->vertex->uniforms, "invViewport");
binding->ltctxa_loc = uniform_index(&binding->vertex->uniforms, "ltctxa");
binding->ltctxb_loc = uniform_index(&binding->vertex->uniforms, "ltctxb");
binding->ltc1_loc = uniform_index(&binding->vertex->uniforms, "ltc1");
for (int i = 0; i < NV2A_MAX_LIGHTS; i++) {
snprintf(tmp, sizeof(tmp), "lightInfiniteHalfVector%d", i);
binding->light_infinite_half_vector_loc[i] =
uniform_index(&binding->vertex->uniforms, tmp);
snprintf(tmp, sizeof(tmp), "lightInfiniteDirection%d", i);
binding->light_infinite_direction_loc[i] =
uniform_index(&binding->vertex->uniforms, tmp);
snprintf(tmp, sizeof(tmp), "lightLocalPosition%d", i);
binding->light_local_position_loc[i] =
uniform_index(&binding->vertex->uniforms, tmp);
snprintf(tmp, sizeof(tmp), "lightLocalAttenuation%d", i);
binding->light_local_attenuation_loc[i] =
uniform_index(&binding->vertex->uniforms, tmp);
}
binding->clip_region_loc =
uniform_index(&binding->fragment->uniforms, "clipRegion");
binding->material_alpha_loc =
uniform_index(&binding->vertex->uniforms, "material_alpha");
binding->uniform_attrs_loc =
uniform_index(&binding->vertex->uniforms, "inlineValue");
}
static ShaderModuleInfo *
get_and_ref_shader_module_for_key(PGRAPHVkState *r,
const ShaderModuleCacheKey *key)
static void shader_cache_entry_init(Lru *lru, LruNode *node, void *state)
{
uint64_t hash = fast_hash((void *)key, sizeof(ShaderModuleCacheKey));
LruNode *node = lru_lookup(&r->shader_module_cache, hash, key);
ShaderModuleCacheEntry *module =
container_of(node, ShaderModuleCacheEntry, node);
pgraph_vk_ref_shader_module(module->module_info);
return module->module_info;
}
static void shader_cache_entry_init(Lru *lru, LruNode *node, const void *state)
{
PGRAPHVkState *r = container_of(lru, PGRAPHVkState, shader_cache);
ShaderBinding *binding = container_of(node, ShaderBinding, node);
memcpy(&binding->state, state, sizeof(ShaderState));
NV2A_VK_DPRINTF("cache miss");
nv2a_profile_inc_counter(NV2A_PROF_SHADER_GEN);
ShaderModuleCacheKey key;
bool need_geometry_shader = pgraph_glsl_need_geom(&binding->state.geom);
if (need_geometry_shader) {
memset(&key, 0, sizeof(key));
key.kind = VK_SHADER_STAGE_GEOMETRY_BIT;
key.geom.state = binding->state.geom;
key.geom.glsl_opts.vulkan = true;
binding->geom.module_info = get_and_ref_shader_module_for_key(r, &key);
} else {
binding->geom.module_info = NULL;
}
memset(&key, 0, sizeof(key));
key.kind = VK_SHADER_STAGE_VERTEX_BIT;
key.vsh.state = binding->state.vsh;
key.vsh.glsl_opts.vulkan = true;
key.vsh.glsl_opts.prefix_outputs = need_geometry_shader;
key.vsh.glsl_opts.use_push_constants_for_uniform_attrs =
r->use_push_constants_for_uniform_attrs;
key.vsh.glsl_opts.ubo_binding = VSH_UBO_BINDING;
binding->vsh.module_info = get_and_ref_shader_module_for_key(r, &key);
memset(&key, 0, sizeof(key));
key.kind = VK_SHADER_STAGE_FRAGMENT_BIT;
key.psh.state = binding->state.psh;
key.psh.glsl_opts.vulkan = true;
key.psh.glsl_opts.ubo_binding = PSH_UBO_BINDING;
key.psh.glsl_opts.tex_binding = PSH_TEX_BINDING;
binding->psh.module_info = get_and_ref_shader_module_for_key(r, &key);
update_shader_uniform_locs(binding);
ShaderBinding *snode = container_of(node, ShaderBinding, node);
memcpy(&snode->state, state, sizeof(ShaderState));
snode->initialized = false;
}
static void shader_cache_entry_post_evict(Lru *lru, LruNode *node)
@ -302,74 +328,25 @@ static void shader_cache_entry_post_evict(Lru *lru, LruNode *node)
ShaderBinding *snode = container_of(node, ShaderBinding, node);
ShaderModuleInfo *modules[] = {
snode->vsh.module_info,
snode->geom.module_info,
snode->psh.module_info,
snode->geometry,
snode->vertex,
snode->fragment,
};
for (int i = 0; i < ARRAY_SIZE(modules); i++) {
if (modules[i]) {
pgraph_vk_unref_shader_module(r, modules[i]);
pgraph_vk_destroy_shader_module(r, modules[i]);
}
}
snode->initialized = false;
}
static bool shader_cache_entry_compare(Lru *lru, LruNode *node, const void *key)
static bool shader_cache_entry_compare(Lru *lru, LruNode *node, void *key)
{
ShaderBinding *snode = container_of(node, ShaderBinding, node);
return memcmp(&snode->state, key, sizeof(ShaderState));
}
static void shader_module_cache_entry_init(Lru *lru, LruNode *node,
const void *key)
{
PGRAPHVkState *r = container_of(lru, PGRAPHVkState, shader_module_cache);
ShaderModuleCacheEntry *module =
container_of(node, ShaderModuleCacheEntry, node);
memcpy(&module->key, key, sizeof(ShaderModuleCacheKey));
MString *code;
switch (module->key.kind) {
case VK_SHADER_STAGE_VERTEX_BIT:
code = pgraph_glsl_gen_vsh(&module->key.vsh.state,
module->key.vsh.glsl_opts);
break;
case VK_SHADER_STAGE_GEOMETRY_BIT:
code = pgraph_glsl_gen_geom(&module->key.geom.state,
module->key.geom.glsl_opts);
break;
case VK_SHADER_STAGE_FRAGMENT_BIT:
code = pgraph_glsl_gen_psh(&module->key.psh.state,
module->key.psh.glsl_opts);
break;
default:
assert(!"Invalid shader module kind");
code = NULL;
}
module->module_info = pgraph_vk_create_shader_module_from_glsl(
r, module->key.kind, mstring_get_str(code));
pgraph_vk_ref_shader_module(module->module_info);
mstring_unref(code);
}
static void shader_module_cache_entry_post_evict(Lru *lru, LruNode *node)
{
PGRAPHVkState *r = container_of(lru, PGRAPHVkState, shader_module_cache);
ShaderModuleCacheEntry *module =
container_of(node, ShaderModuleCacheEntry, node);
pgraph_vk_unref_shader_module(r, module->module_info);
module->module_info = NULL;
}
static bool shader_module_cache_entry_compare(Lru *lru, LruNode *node,
const void *key)
{
ShaderModuleCacheEntry *module =
container_of(node, ShaderModuleCacheEntry, node);
return memcmp(&module->key, key, sizeof(ShaderModuleCacheKey));
}
static void shader_cache_init(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
@ -384,22 +361,6 @@ static void shader_cache_init(PGRAPHState *pg)
r->shader_cache.init_node = shader_cache_entry_init;
r->shader_cache.compare_nodes = shader_cache_entry_compare;
r->shader_cache.post_node_evict = shader_cache_entry_post_evict;
/* FIXME: Make this configurable */
const size_t shader_module_cache_size = 50 * 1024;
lru_init(&r->shader_module_cache);
r->shader_module_cache_entries =
g_malloc_n(shader_module_cache_size, sizeof(ShaderModuleCacheEntry));
assert(r->shader_module_cache_entries != NULL);
for (int i = 0; i < shader_module_cache_size; i++) {
lru_add_free(&r->shader_module_cache,
&r->shader_module_cache_entries[i].node);
}
r->shader_module_cache.init_node = shader_module_cache_entry_init;
r->shader_module_cache.compare_nodes = shader_module_cache_entry_compare;
r->shader_module_cache.post_node_evict =
shader_module_cache_entry_post_evict;
}
static void shader_cache_finalize(PGRAPHState *pg)
@ -409,78 +370,475 @@ static void shader_cache_finalize(PGRAPHState *pg)
lru_flush(&r->shader_cache);
g_free(r->shader_cache_entries);
r->shader_cache_entries = NULL;
lru_flush(&r->shader_module_cache);
g_free(r->shader_module_cache_entries);
r->shader_module_cache_entries = NULL;
}
static ShaderBinding *get_shader_binding_for_state(PGRAPHVkState *r,
const ShaderState *state)
static ShaderBinding *gen_shaders(PGRAPHState *pg, ShaderState *state)
{
PGRAPHVkState *r = pg->vk_renderer_state;
uint64_t hash = fast_hash((void *)state, sizeof(*state));
LruNode *node = lru_lookup(&r->shader_cache, hash, state);
ShaderBinding *binding = container_of(node, ShaderBinding, node);
NV2A_VK_DPRINTF("shader state hash: %016" PRIx64 " %p", hash, binding);
return binding;
ShaderBinding *snode = container_of(node, ShaderBinding, node);
NV2A_VK_DPRINTF("shader state hash: %016" PRIx64 " %p", hash, snode);
if (!snode->initialized) {
NV2A_VK_DPRINTF("cache miss");
nv2a_profile_inc_counter(NV2A_PROF_SHADER_GEN);
char *previous_numeric_locale = setlocale(LC_NUMERIC, NULL);
if (previous_numeric_locale) {
previous_numeric_locale = g_strdup(previous_numeric_locale);
}
/* Ensure numeric values are printed with '.' radix, no grouping */
setlocale(LC_NUMERIC, "C");
MString *geometry_shader_code = pgraph_gen_geom_glsl(
state->polygon_front_mode, state->polygon_back_mode,
state->primitive_mode, state->smooth_shading, true);
if (geometry_shader_code) {
NV2A_VK_DPRINTF("geometry shader: \n%s",
mstring_get_str(geometry_shader_code));
snode->geometry = pgraph_vk_create_shader_module_from_glsl(
r, VK_SHADER_STAGE_GEOMETRY_BIT,
mstring_get_str(geometry_shader_code));
mstring_unref(geometry_shader_code);
} else {
snode->geometry = NULL;
}
MString *vertex_shader_code =
pgraph_gen_vsh_glsl(state, geometry_shader_code != NULL);
NV2A_VK_DPRINTF("vertex shader: \n%s",
mstring_get_str(vertex_shader_code));
snode->vertex = pgraph_vk_create_shader_module_from_glsl(
r, VK_SHADER_STAGE_VERTEX_BIT,
mstring_get_str(vertex_shader_code));
mstring_unref(vertex_shader_code);
MString *fragment_shader_code = pgraph_gen_psh_glsl(state->psh);
NV2A_VK_DPRINTF("fragment shader: \n%s",
mstring_get_str(fragment_shader_code));
snode->fragment = pgraph_vk_create_shader_module_from_glsl(
r, VK_SHADER_STAGE_FRAGMENT_BIT,
mstring_get_str(fragment_shader_code));
mstring_unref(fragment_shader_code);
if (previous_numeric_locale) {
setlocale(LC_NUMERIC, previous_numeric_locale);
g_free(previous_numeric_locale);
}
update_shader_constant_locations(snode);
snode->initialized = true;
}
return snode;
}
static void apply_uniform_updates(ShaderUniformLayout *layout,
const UniformInfo *info, int *locs,
void *values, size_t count)
static void update_uniform_attr_values(PGRAPHState *pg, ShaderBinding *binding)
{
for (int i = 0; i < count; i++) {
if (locs[i] != -1) {
uniform_copy(layout, locs[i], (char*)values + info[i].val_offs,
4, (info[i].size * info[i].count) / 4);
}
float values[NV2A_VERTEXSHADER_ATTRIBUTES][4];
int num_uniform_attrs = 0;
pgraph_get_inline_values(pg, binding->state.uniform_attrs, values,
&num_uniform_attrs);
if (num_uniform_attrs > 0) {
uniform1fv(&binding->vertex->uniforms, binding->uniform_attrs_loc,
num_uniform_attrs * 4, &values[0][0]);
}
}
// FIXME: Dirty tracking
static void update_shader_uniforms(PGRAPHState *pg)
// FIXME: Move to common
static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding,
bool binding_changed, bool vertex_program,
bool fixed_function)
{
ShaderState *state = &binding->state;
/* update combiner constants */
for (int i = 0; i < 9; i++) {
uint32_t constant[2];
if (i == 8) {
/* final combiner */
constant[0] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR0);
constant[1] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR1);
} else {
constant[0] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR0 + i * 4);
constant[1] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR1 + i * 4);
}
for (int j = 0; j < 2; j++) {
GLint loc = binding->psh_constant_loc[i][j];
if (loc != -1) {
float value[4];
pgraph_argb_pack32_to_rgba_float(constant[j], value);
uniform1fv(&binding->fragment->uniforms, loc, 4, value);
}
}
}
if (binding->alpha_ref_loc != -1) {
int alpha_ref = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0),
NV_PGRAPH_CONTROL_0_ALPHAREF);
uniform1i(&binding->fragment->uniforms, binding->alpha_ref_loc,
alpha_ref);
}
/* For each texture stage */
for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
int loc;
/* Bump luminance only during stages 1 - 3 */
if (i > 0) {
loc = binding->bump_mat_loc[i];
if (loc != -1) {
uint32_t m_u32[4];
m_u32[0] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT00 + 4 * (i - 1));
m_u32[1] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT01 + 4 * (i - 1));
m_u32[2] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT10 + 4 * (i - 1));
m_u32[3] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT11 + 4 * (i - 1));
float m[4];
m[0] = *(float*)&m_u32[0];
m[1] = *(float*)&m_u32[1];
m[2] = *(float*)&m_u32[2];
m[3] = *(float*)&m_u32[3];
uniformMatrix2fv(&binding->fragment->uniforms, loc, m);
}
loc = binding->bump_scale_loc[i];
if (loc != -1) {
uint32_t v =
pgraph_reg_r(pg, NV_PGRAPH_BUMPSCALE1 + (i - 1) * 4);
uniform1f(&binding->fragment->uniforms, loc,
*(float *)&v);
}
loc = binding->bump_offset_loc[i];
if (loc != -1) {
uint32_t v =
pgraph_reg_r(pg, NV_PGRAPH_BUMPOFFSET1 + (i - 1) * 4);
uniform1f(&binding->fragment->uniforms, loc,
*(float *)&v);
}
}
loc = binding->tex_scale_loc[i];
if (loc != -1) {
assert(pg->vk_renderer_state->texture_bindings[i] != NULL);
float scale = pg->vk_renderer_state->texture_bindings[i]->key.scale;
BasicColorFormatInfo f_basic = kelvin_color_format_info_map[pg->vk_renderer_state->texture_bindings[i]->key.state.color_format];
if (!f_basic.linear) {
scale = 1.0;
}
uniform1f(&binding->fragment->uniforms, loc, scale);
}
}
if (binding->fog_color_loc != -1) {
uint32_t fog_color = pgraph_reg_r(pg, NV_PGRAPH_FOGCOLOR);
uniform4f(&binding->fragment->uniforms, binding->fog_color_loc,
GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_RED) / 255.0,
GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_GREEN) / 255.0,
GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_BLUE) / 255.0,
GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_ALPHA) / 255.0);
}
if (binding->fog_param_loc != -1) {
uint32_t v[2];
v[0] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM0);
v[1] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM1);
uniform2f(&binding->vertex->uniforms,
binding->fog_param_loc, *(float *)&v[0],
*(float *)&v[1]);
}
float zmax;
switch (pg->surface_shape.zeta_format) {
case NV097_SET_SURFACE_FORMAT_ZETA_Z16:
zmax = pg->surface_shape.z_format ? f16_max : (float)0xFFFF;
break;
case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8:
zmax = pg->surface_shape.z_format ? f24_max : (float)0xFFFFFF;
break;
default:
assert(0);
}
if (fixed_function) {
/* update lighting constants */
struct {
uint32_t *v;
int locs;
size_t len;
} lighting_arrays[] = {
{ &pg->ltctxa[0][0], binding->ltctxa_loc, NV2A_LTCTXA_COUNT },
{ &pg->ltctxb[0][0], binding->ltctxb_loc, NV2A_LTCTXB_COUNT },
{ &pg->ltc1[0][0], binding->ltc1_loc, NV2A_LTC1_COUNT },
};
for (int i = 0; i < ARRAY_SIZE(lighting_arrays); i++) {
uniform1iv(
&binding->vertex->uniforms, lighting_arrays[i].locs,
lighting_arrays[i].len * 4, (void *)lighting_arrays[i].v);
}
for (int i = 0; i < NV2A_MAX_LIGHTS; i++) {
int loc = binding->light_infinite_half_vector_loc[i];
if (loc != -1) {
uniform1fv(&binding->vertex->uniforms, loc, 3,
pg->light_infinite_half_vector[i]);
}
loc = binding->light_infinite_direction_loc[i];
if (loc != -1) {
uniform1fv(&binding->vertex->uniforms, loc, 3,
pg->light_infinite_direction[i]);
}
loc = binding->light_local_position_loc[i];
if (loc != -1) {
uniform1fv(&binding->vertex->uniforms, loc, 3,
pg->light_local_position[i]);
}
loc = binding->light_local_attenuation_loc[i];
if (loc != -1) {
uniform1fv(&binding->vertex->uniforms, loc, 3,
pg->light_local_attenuation[i]);
}
}
/* estimate the viewport by assuming it matches the surface ... */
unsigned int aa_width = 1, aa_height = 1;
pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height);
float m11 = 0.5 * (pg->surface_binding_dim.width / aa_width);
float m22 = -0.5 * (pg->surface_binding_dim.height / aa_height);
float m33 = zmax;
float m41 = *(float *)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][0];
float m42 = *(float *)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][1];
float invViewport[16] = {
1.0 / m11, 0, 0, 0, 0, 1.0 / m22, 0,
0, 0, 0, 1.0 / m33, 0, -1.0 + m41 / m11, 1.0 + m42 / m22,
0, 1.0
};
if (binding->inv_viewport_loc != -1) {
uniformMatrix4fv(&binding->vertex->uniforms,
binding->inv_viewport_loc, &invViewport[0]);
}
}
/* update vertex program constants */
uniform1iv(&binding->vertex->uniforms, binding->vsh_constant_loc,
NV2A_VERTEXSHADER_CONSTANTS * 4, (void *)pg->vsh_constants);
if (binding->surface_size_loc != -1) {
unsigned int aa_width = 1, aa_height = 1;
pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height);
uniform2f(&binding->vertex->uniforms, binding->surface_size_loc,
pg->surface_binding_dim.width / aa_width,
pg->surface_binding_dim.height / aa_height);
}
if (binding->clip_range_loc != -1 || binding->clip_range_floc != -1) {
uint32_t v[2];
v[0] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMIN);
v[1] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMAX);
float zclip_min = *(float *)&v[0];
float zclip_max = *(float *)&v[1];
if (binding->clip_range_loc != -1) {
uniform4f(&binding->vertex->uniforms, binding->clip_range_loc, 0,
zmax, zclip_min, zclip_max);
}
if (binding->clip_range_floc != -1) {
uniform4f(&binding->fragment->uniforms, binding->clip_range_floc, 0,
zmax, zclip_min, zclip_max);
}
}
if (binding->depth_offset_loc != -1) {
float zbias = 0.0f;
if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
(NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE |
NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE |
NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) {
uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS);
zbias = *(float *)&zbias_u32;
if (pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR) != 0 &&
(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) &
NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE)) {
/* TODO: emulate zfactor when z_perspective true, i.e.
* w-buffering. Perhaps calculate an additional offset based on
* triangle orientation in geometry shader and pass the result
* to fragment shader and add it to gl_FragDepth as well.
*/
NV2A_UNIMPLEMENTED("NV_PGRAPH_ZOFFSETFACTOR for w-buffering");
}
}
uniform1f(&binding->fragment->uniforms, binding->depth_offset_loc,
zbias);
}
/* Clipping regions */
unsigned int max_gl_width = pg->surface_binding_dim.width;
unsigned int max_gl_height = pg->surface_binding_dim.height;
pgraph_apply_scaling_factor(pg, &max_gl_width, &max_gl_height);
uint32_t clip_regions[8][4];
for (int i = 0; i < 8; i++) {
uint32_t x = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPX0 + i * 4);
unsigned int x_min = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMIN);
unsigned int x_max = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMAX) + 1;
uint32_t y = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPY0 + i * 4);
unsigned int y_min = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMIN);
unsigned int y_max = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMAX) + 1;
pgraph_apply_anti_aliasing_factor(pg, &x_min, &y_min);
pgraph_apply_anti_aliasing_factor(pg, &x_max, &y_max);
pgraph_apply_scaling_factor(pg, &x_min, &y_min);
pgraph_apply_scaling_factor(pg, &x_max, &y_max);
clip_regions[i][0] = x_min;
clip_regions[i][1] = y_min;
clip_regions[i][2] = x_max;
clip_regions[i][3] = y_max;
}
uniform1iv(&binding->fragment->uniforms, binding->clip_region_loc,
8 * 4, (void *)clip_regions);
if (binding->material_alpha_loc != -1) {
uniform1f(&binding->vertex->uniforms, binding->material_alpha_loc,
pg->material_alpha);
}
if (!state->use_push_constants_for_uniform_attrs && state->uniform_attrs) {
update_uniform_attr_values(pg, binding);
}
}
// Quickly check PGRAPH state to see if any registers have changed that
// necessitate a full shader state inspection.
static bool check_shaders_dirty(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
if (!r->shader_binding) {
return true;
}
if (pg->program_data_dirty) {
return true;
}
int num_stages = pgraph_reg_r(pg, NV_PGRAPH_COMBINECTL) & 0xFF;
for (int i = 0; i < num_stages; i++) {
if (pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINEALPHAI0 + i * 4) ||
pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINEALPHAO0 + i * 4) ||
pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINECOLORI0 + i * 4) ||
pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINECOLORO0 + i * 4)) {
return true;
}
}
unsigned int regs[] = {
NV_PGRAPH_COMBINECTL,
NV_PGRAPH_COMBINESPECFOG0,
NV_PGRAPH_COMBINESPECFOG1,
NV_PGRAPH_CONTROL_0,
NV_PGRAPH_CONTROL_3,
NV_PGRAPH_CSV0_C,
NV_PGRAPH_CSV0_D,
NV_PGRAPH_CSV1_A,
NV_PGRAPH_CSV1_B,
NV_PGRAPH_POINTSIZE,
NV_PGRAPH_SETUPRASTER,
NV_PGRAPH_SHADERCLIPMODE,
NV_PGRAPH_SHADERCTL,
NV_PGRAPH_SHADERPROG,
NV_PGRAPH_SHADOWCTL,
NV_PGRAPH_ZCOMPRESSOCCLUDE,
};
for (int i = 0; i < ARRAY_SIZE(regs); i++) {
if (pgraph_is_reg_dirty(pg, regs[i])) {
return true;
}
}
ShaderState *state = &r->shader_binding->state;
if (pg->uniform_attrs != state->uniform_attrs ||
pg->swizzle_attrs != state->swizzle_attrs ||
pg->compressed_attrs != state->compressed_attrs ||
pg->primitive_mode != state->primitive_mode ||
pg->surface_scale_factor != state->surface_scale_factor) {
return true;
}
// Textures
for (int i = 0; i < 4; i++) {
if (pg->texture_matrix_enable[i] != pg->vk_renderer_state->shader_binding->state.texture_matrix_enable[i] ||
pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXCTL0_0 + i * 4) ||
pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXFILTER0 + i * 4) ||
pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXFMT0 + i * 4)) {
return true;
}
}
nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND_NOTDIRTY);
return false;
}
void pgraph_vk_bind_shaders(PGRAPHState *pg)
{
NV2A_VK_DGROUP_BEGIN("%s", __func__);
PGRAPHVkState *r = pg->vk_renderer_state;
r->shader_bindings_changed = false;
if (check_shaders_dirty(pg)) {
ShaderState new_state;
memset(&new_state, 0, sizeof(ShaderState));
new_state = pgraph_get_shader_state(pg);
new_state.vulkan = true;
new_state.psh.vulkan = true;
new_state.use_push_constants_for_uniform_attrs =
(r->device_props.limits.maxPushConstantsSize >=
MAX_UNIFORM_ATTR_VALUES_SIZE);
if (!r->shader_binding || memcmp(&r->shader_binding->state, &new_state, sizeof(ShaderState))) {
r->shader_binding = gen_shaders(pg, &new_state);
r->shader_bindings_changed = true;
}
}
// FIXME: Use dirty bits
pgraph_vk_update_shader_uniforms(pg);
NV2A_VK_DGROUP_END();
}
void pgraph_vk_update_shader_uniforms(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
NV2A_VK_DGROUP_BEGIN("%s", __func__);
nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND);
assert(r->shader_binding);
ShaderBinding *binding = r->shader_binding;
ShaderUniformLayout *layouts[] = { &binding->vsh.module_info->uniforms,
&binding->psh.module_info->uniforms };
VshUniformValues vsh_values;
pgraph_glsl_set_vsh_uniform_values(pg, &binding->state.vsh,
binding->vsh.uniform_locs, &vsh_values);
apply_uniform_updates(&binding->vsh.module_info->uniforms, VshUniformInfo,
binding->vsh.uniform_locs, &vsh_values,
VshUniform__COUNT);
PshUniformValues psh_values;
pgraph_glsl_set_psh_uniform_values(pg, binding->psh.uniform_locs,
&psh_values);
for (int i = 0; i < 4; i++) {
assert(r->texture_bindings[i] != NULL);
float scale = r->texture_bindings[i]->key.scale;
BasicColorFormatInfo f_basic =
kelvin_color_format_info_map[pg->vk_renderer_state
->texture_bindings[i]
->key.state.color_format];
if (!f_basic.linear) {
scale = 1.0;
}
psh_values.texScale[i] = scale;
}
apply_uniform_updates(&binding->psh.module_info->uniforms, PshUniformInfo,
binding->psh.uniform_locs, &psh_values,
PshUniform__COUNT);
ShaderUniformLayout *layouts[] = { &binding->vertex->uniforms,
&binding->fragment->uniforms };
shader_update_constants(pg, r->shader_binding, true,
r->shader_binding->state.vertex_program,
r->shader_binding->state.fixed_function);
for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
uint64_t hash =
fast_hash(layouts[i]->allocation, layouts[i]->total_size);
uint64_t hash = fast_hash(layouts[i]->allocation, layouts[i]->total_size);
r->uniforms_changed |= (hash != r->uniform_buffer_hashes[i]);
r->uniform_buffer_hashes[i] = hash;
}
@ -492,44 +850,13 @@ static void update_shader_uniforms(PGRAPHState *pg)
NV2A_VK_DGROUP_END();
}
void pgraph_vk_bind_shaders(PGRAPHState *pg)
{
NV2A_VK_DGROUP_BEGIN("%s", __func__);
PGRAPHVkState *r = pg->vk_renderer_state;
r->shader_bindings_changed = false;
if (!r->shader_binding ||
pgraph_glsl_check_shader_state_dirty(pg, &r->shader_binding->state)) {
ShaderState new_state = pgraph_glsl_get_shader_state(pg);
if (!r->shader_binding || memcmp(&r->shader_binding->state, &new_state,
sizeof(ShaderState))) {
r->shader_binding = get_shader_binding_for_state(r, &new_state);
r->shader_bindings_changed = true;
}
} else {
nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND_NOTDIRTY);
}
update_shader_uniforms(pg);
NV2A_VK_DGROUP_END();
}
void pgraph_vk_init_shaders(PGRAPHState *pg)
{
PGRAPHVkState *r = pg->vk_renderer_state;
pgraph_vk_init_glsl_compiler();
create_descriptor_pool(pg);
create_descriptor_set_layout(pg);
create_descriptor_sets(pg);
shader_cache_init(pg);
r->use_push_constants_for_uniform_attrs =
(r->device_props.limits.maxPushConstantsSize >=
MAX_UNIFORM_ATTR_VALUES_SIZE);
}
void pgraph_vk_finalize_shaders(PGRAPHState *pg)

View File

@ -524,8 +524,7 @@ void pgraph_vk_unpack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface,
pgraph_vk_end_debug_marker(r, cmd);
}
static void pipeline_cache_entry_init(Lru *lru, LruNode *node,
const void *state)
static void pipeline_cache_entry_init(Lru *lru, LruNode *node, void *state)
{
PGRAPHVkState *r = container_of(lru, PGRAPHVkState, compute.pipeline_cache);
ComputePipeline *snode = container_of(node, ComputePipeline, node);
@ -557,8 +556,7 @@ static void pipeline_cache_entry_post_evict(Lru *lru, LruNode *node)
pipeline_cache_release_node_resources(r, snode);
}
static bool pipeline_cache_entry_compare(Lru *lru, LruNode *node,
const void *key)
static bool pipeline_cache_entry_compare(Lru *lru, LruNode *node, void *key)
{
ComputePipeline *snode = container_of(node, ComputePipeline, node);
return memcmp(&snode->key, key, sizeof(ComputePipelineKey));

View File

@ -122,22 +122,17 @@ static void memcpy_image(void *dst, void const *src, int dst_stride,
}
}
static bool check_surface_overlaps_range(const SurfaceBinding *surface,
hwaddr range_start, hwaddr range_len)
{
hwaddr surface_end = surface->vram_addr + surface->size;
hwaddr range_end = range_start + range_len;
return !(surface->vram_addr >= range_end || range_start >= surface_end);
}
void pgraph_vk_download_surfaces_in_range_if_dirty(PGRAPHState *pg,
hwaddr start, hwaddr size)
void pgraph_vk_download_surfaces_in_range_if_dirty(PGRAPHState *pg, hwaddr start, hwaddr size)
{
PGRAPHVkState *r = pg->vk_renderer_state;
SurfaceBinding *surface;
hwaddr end = start + size - 1;
QTAILQ_FOREACH(surface, &r->surfaces, entry) {
if (check_surface_overlaps_range(surface, start, size)) {
hwaddr surf_end = surface->vram_addr + surface->size - 1;
bool overlapping = !(surface->vram_addr >= end || start >= surf_end);
if (overlapping) {
pgraph_vk_surface_download_if_dirty(
container_of(pg, NV2AState, pgraph), surface);
}
@ -532,54 +527,33 @@ void pgraph_vk_download_dirty_surfaces(NV2AState *d)
static void surface_access_callback(void *opaque, MemoryRegion *mr, hwaddr addr,
hwaddr len, bool write)
{
NV2AState *d = (NV2AState *)opaque;
qemu_mutex_lock(&d->pgraph.lock);
SurfaceBinding *e = opaque;
assert(addr >= e->vram_addr);
hwaddr offset = addr - e->vram_addr;
assert(offset < e->size);
PGRAPHVkState *r = d->pgraph.vk_renderer_state;
bool wait_for_downloads = false;
SurfaceBinding *surface;
QTAILQ_FOREACH(surface, &r->surfaces, entry) {
if (!check_surface_overlaps_range(surface, addr, len)) {
continue;
if (qatomic_read(&e->draw_dirty)) {
trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset);
pgraph_vk_wait_for_surface_download(e);
}
hwaddr offset = addr - surface->vram_addr;
if (write) {
trace_nv2a_pgraph_surface_cpu_write(surface->vram_addr, offset);
} else {
trace_nv2a_pgraph_surface_cpu_read(surface->vram_addr, offset);
}
if (surface->draw_dirty) {
surface->download_pending = true;
wait_for_downloads = true;
}
if (write) {
surface->upload_pending = true;
}
}
qemu_mutex_unlock(&d->pgraph.lock);
if (wait_for_downloads) {
qemu_mutex_lock(&d->pfifo.lock);
qemu_event_reset(&r->downloads_complete);
qatomic_set(&r->downloads_pending, true);
pfifo_kick(d);
qemu_mutex_unlock(&d->pfifo.lock);
qemu_event_wait(&r->downloads_complete);
if (write && !qatomic_read(&e->upload_pending)) {
trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset);
qatomic_set(&e->upload_pending, true);
}
}
static void register_cpu_access_callback(NV2AState *d, SurfaceBinding *surface)
{
if (tcg_enabled()) {
surface->access_cb = mem_access_callback_insert(
qemu_get_cpu(0), d->vram, surface->vram_addr, surface->size,
&surface_access_callback, d);
qemu_mutex_unlock(&d->pgraph.lock);
bql_lock();
mem_access_callback_insert(qemu_get_cpu(0),
d->vram, surface->vram_addr, surface->size,
&surface->access_cb, &surface_access_callback,
surface);
bql_unlock();
qemu_mutex_lock(&d->pgraph.lock);
}
}
@ -587,7 +561,11 @@ static void unregister_cpu_access_callback(NV2AState *d,
SurfaceBinding const *surface)
{
if (tcg_enabled()) {
qemu_mutex_unlock(&d->pgraph.lock);
bql_lock();
mem_access_callback_remove_by_ref(qemu_get_cpu(0), surface->access_cb);
bql_unlock();
qemu_mutex_lock(&d->pgraph.lock);
}
}
@ -649,26 +627,24 @@ static void invalidate_surface(NV2AState *d, SurfaceBinding *surface)
QTAILQ_INSERT_HEAD(&r->invalid_surfaces, surface, entry);
}
static bool check_surfaces_overlap(const SurfaceBinding *surface,
const SurfaceBinding *other_surface)
{
return check_surface_overlaps_range(surface, other_surface->vram_addr,
other_surface->size);
}
static void invalidate_overlapping_surfaces(NV2AState *d,
SurfaceBinding const *surface)
{
PGRAPHVkState *r = d->pgraph.vk_renderer_state;
SurfaceBinding *other_surface, *next_surface;
QTAILQ_FOREACH_SAFE (other_surface, &r->surfaces, entry, next_surface) {
if (check_surfaces_overlap(surface, other_surface)) {
uintptr_t e_end = surface->vram_addr + surface->size - 1;
SurfaceBinding *s, *next;
QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) {
uintptr_t s_end = s->vram_addr + s->size - 1;
bool overlapping =
!(s->vram_addr > e_end || surface->vram_addr > s_end);
if (overlapping) {
trace_nv2a_pgraph_surface_evict_overlapping(
other_surface->vram_addr, other_surface->width,
other_surface->height, other_surface->pitch);
pgraph_vk_surface_download_if_dirty(d, other_surface);
invalidate_surface(d, other_surface);
s->vram_addr, s->width, s->height,
s->pitch);
pgraph_vk_surface_download_if_dirty(d, s);
invalidate_surface(d, s);
}
}
}

View File

@ -1089,9 +1089,12 @@ static void create_texture(PGRAPHState *pg, int texture_idx)
BasicColorFormatInfo f_basic = kelvin_color_format_info_map[state.color_format];
const hwaddr texture_vram_offset = pgraph_get_texture_phys_addr(pg, texture_idx);
size_t texture_palette_data_size;
const hwaddr texture_palette_vram_offset =
pgraph_get_texture_palette_phys_addr_length(pg, texture_idx,
&texture_palette_data_size);
size_t texture_length = pgraph_get_texture_length(pg, &state);
hwaddr texture_palette_vram_offset = 0;
size_t texture_palette_data_size = 0;
uint32_t filter =
pgraph_reg_r(pg, NV_PGRAPH_TEXFILTER0 + texture_idx * 4);
@ -1099,21 +1102,14 @@ static void create_texture(PGRAPHState *pg, int texture_idx)
pgraph_reg_r(pg, NV_PGRAPH_TEXADDRESS0 + texture_idx * 4);
uint32_t border_color_pack32 =
pgraph_reg_r(pg, NV_PGRAPH_BORDERCOLOR0 + texture_idx * 4);
bool is_indexed = (state.color_format ==
NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8);
TextureKey key;
memset(&key, 0, sizeof(key));
key.state = state;
key.texture_vram_offset = texture_vram_offset;
key.texture_length = texture_length;
if (is_indexed) {
texture_palette_vram_offset =
pgraph_get_texture_palette_phys_addr_length(
pg, texture_idx, &texture_palette_data_size);
key.palette_vram_offset = texture_palette_vram_offset;
key.palette_length = texture_palette_data_size;
}
key.scale = 1;
// FIXME: Separate sampler from texture
@ -1121,6 +1117,9 @@ static void create_texture(PGRAPHState *pg, int texture_idx)
key.address = address;
key.border_color = border_color_pack32;
bool is_indexed = (state.color_format ==
NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8);
bool possibly_dirty = false;
bool possibly_dirty_checked = false;
bool surface_to_texture = false;
@ -1433,7 +1432,7 @@ void pgraph_vk_bind_textures(NV2AState *d)
NV2A_VK_DGROUP_END();
}
static void texture_cache_entry_init(Lru *lru, LruNode *node, const void *state)
static void texture_cache_entry_init(Lru *lru, LruNode *node, void *state)
{
TextureBinding *snode = container_of(node, TextureBinding, node);
@ -1486,8 +1485,7 @@ static void texture_cache_entry_post_evict(Lru *lru, LruNode *node)
texture_cache_release_node_resources(r, snode);
}
static bool texture_cache_entry_compare(Lru *lru, LruNode *node,
const void *key)
static bool texture_cache_entry_compare(Lru *lru, LruNode *node, void *key)
{
TextureBinding *snode = container_of(node, TextureBinding, node);
return memcmp(&snode->key, key, sizeof(TextureKey));

View File

@ -20,8 +20,8 @@
#ifndef HW_NV2A_VSH_H
#define HW_NV2A_VSH_H
#include "qemu/osdep.h"
#include "hw/xbox/nv2a/nv2a_regs.h"
#include <stdbool.h>
#include "qemu/mstring.h"
enum VshLight {
LIGHT_OFF,
@ -55,6 +55,8 @@ enum VshFoggen {
FOGGEN_RADIAL,
FOGGEN_PLANAR,
FOGGEN_ABS_PLANAR,
FOGGEN_ERROR4,
FOGGEN_ERROR5,
FOGGEN_FOG_X
};
@ -82,60 +84,6 @@ enum VshSkinning {
#define VSH_TOKEN_SIZE 4
#define VSH_D3DSCM_CORRECTION 96
typedef enum {
PARAM_UNKNOWN = 0,
PARAM_R,
PARAM_V,
PARAM_C
} VshParameterType;
typedef enum {
OUTPUT_C = 0,
OUTPUT_O
} VshOutputType;
typedef enum {
OMUX_MAC = 0,
OMUX_ILU
} VshOutputMux;
typedef enum {
ILU_NOP = 0,
ILU_MOV,
ILU_RCP,
ILU_RCC,
ILU_RSQ,
ILU_EXP,
ILU_LOG,
ILU_LIT
} VshILU;
typedef enum {
MAC_NOP,
MAC_MOV,
MAC_MUL,
MAC_ADD,
MAC_MAD,
MAC_DP3,
MAC_DPH,
MAC_DP4,
MAC_DST,
MAC_MIN,
MAC_MAX,
MAC_SLT,
MAC_SGE,
MAC_ARL
} VshMAC;
typedef enum {
SWIZZLE_X = 0,
SWIZZLE_Y,
SWIZZLE_Z,
SWIZZLE_W
} VshSwizzle;
typedef enum {
FLD_ILU = 0,
FLD_MAC,
@ -182,30 +130,4 @@ typedef enum {
uint8_t vsh_get_field(const uint32_t *shader_token, VshFieldName field_name);
enum ShaderPrimitiveMode {
PRIM_TYPE_INVALID,
PRIM_TYPE_POINTS,
PRIM_TYPE_LINES,
PRIM_TYPE_LINE_LOOP,
PRIM_TYPE_LINE_STRIP,
PRIM_TYPE_TRIANGLES,
PRIM_TYPE_TRIANGLE_STRIP,
PRIM_TYPE_TRIANGLE_FAN,
PRIM_TYPE_QUADS,
PRIM_TYPE_QUAD_STRIP,
PRIM_TYPE_POLYGON,
};
enum ShaderPolygonMode {
POLY_MODE_FILL,
POLY_MODE_POINT,
POLY_MODE_LINE,
};
enum MaterialColorSource {
MATERIAL_COLOR_SRC_MATERIAL,
MATERIAL_COLOR_SRC_DIFFUSE,
MATERIAL_COLOR_SRC_SPECULAR,
};
#endif

View File

@ -11,8 +11,7 @@ nv2a_pgraph_method(uint32_t subchannel, uint32_t graphics_class, uint32_t method
nv2a_pgraph_method_abbrev(uint32_t subchannel, uint32_t graphics_class, uint32_t method, const char *name, unsigned int count) "%d: 0x%"PRIx32" -> 0x%04"PRIx32" %s * %d"
nv2a_pgraph_method_unhandled(uint32_t subchannel, uint32_t graphics_class, uint32_t method, uint32_t parameter) "%d: 0x%"PRIx32" -> 0x%04"PRIx32" 0x%"PRIx32
nv2a_pgraph_surface_compare_mismatch(const char *field, long int a, long int b) "%20s -- %8ld vs %8ld"
nv2a_pgraph_surface_cpu_read(uint32_t addr, uint32_t offset) "0x%08"PRIx32"+0x%"PRIx32
nv2a_pgraph_surface_cpu_write(uint32_t addr, uint32_t offset) "0x%08"PRIx32"+0x%"PRIx32
nv2a_pgraph_surface_cpu_access(uint32_t addr, uint32_t offset) "0x%08"PRIx32"+0x%"PRIx32
nv2a_pgraph_surface_create_color(uint32_t addr, uint32_t width, uint32_t height, const char *layout, uint32_t anti_aliasing, uint32_t clip_x, uint32_t clip_width, uint32_t clip_y, uint32_t clip_height, uint32_t pitch) "Create: [COLOR @ 0x%08" PRIx32 " (%dx%d)] (%s) aa:%d, clip:x=%d,w=%d,y=%d,h=%d,p=%d"
nv2a_pgraph_surface_create_zeta(uint32_t addr, uint32_t width, uint32_t height, const char *layout, uint32_t anti_aliasing, uint32_t clip_x, uint32_t clip_width, uint32_t clip_y, uint32_t clip_height, uint32_t pitch) " Create: [ZETA @ 0x%08" PRIx32 " (%dx%d)] (%s) aa:%d, clip:x=%d,w=%d,y=%d,h=%d,p=%d"
nv2a_pgraph_surface_download(const char *binding, const char *layout, uint32_t addr, uint32_t width, uint32_t height, uint32_t pitch, uint32_t bytes_per_pixel) "[GPU->RAM] %s (%s) surface @ 0x%08" PRIx32 " (w=%d,h=%d,p=%d,bpp=%d)"

1002
hw/xbox/nvnet.c Normal file

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More