From 3f557e6d45d4be4893d5e23cf2e587cd876bb962 Mon Sep 17 00:00:00 2001
From: Matt Borgerson <contact@mborgerson.com>
Date: Sat, 24 Mar 2018 01:04:15 -0700
Subject: [PATCH] Add hw/xbox sources from XQEMU 1.x @ 4d9107e

---
 hw/xbox/Makefile.objs         |   12 +
 hw/xbox/acpi_xbox.c           |  118 +
 hw/xbox/acpi_xbox.h           |   38 +
 hw/xbox/amd_smbus.c           |  256 ++
 hw/xbox/amd_smbus.h           |   45 +
 hw/xbox/chihiro-usb.c         |  331 ++
 hw/xbox/chihiro.c             |  292 ++
 hw/xbox/dsp/Makefile.objs     |    1 +
 hw/xbox/dsp/dsp.c             |  476 ++
 hw/xbox/dsp/dsp.h             |   59 +
 hw/xbox/dsp/dsp_cpu.c         | 1431 ++++++
 hw/xbox/dsp/dsp_cpu.h         |  257 ++
 hw/xbox/dsp/dsp_dis.inl       | 2217 +++++++++
 hw/xbox/dsp/dsp_dma.c         |  247 +
 hw/xbox/dsp/dsp_dma.h         |   54 +
 hw/xbox/dsp/dsp_emu.inl       | 8043 +++++++++++++++++++++++++++++++++
 hw/xbox/g-lru-cache.c         |  338 ++
 hw/xbox/g-lru-cache.h         |   80 +
 hw/xbox/lpc47m157.c           |  247 +
 hw/xbox/mcpx_aci.c            |   99 +
 hw/xbox/mcpx_apu.c            |  651 +++
 hw/xbox/nv2a.c                | 6439 ++++++++++++++++++++++++++
 hw/xbox/nv2a.h                |   25 +
 hw/xbox/nv2a_debug.c          |   94 +
 hw/xbox/nv2a_debug.h          |   60 +
 hw/xbox/nv2a_int.h            | 1284 ++++++
 hw/xbox/nv2a_psh.c            |  847 ++++
 hw/xbox/nv2a_psh.h            |   59 +
 hw/xbox/nv2a_shaders.c        |  944 ++++
 hw/xbox/nv2a_shaders.h        |  115 +
 hw/xbox/nv2a_shaders_common.h |   37 +
 hw/xbox/nv2a_vsh.c            |  768 ++++
 hw/xbox/nv2a_vsh.h            |  142 +
 hw/xbox/nvnet.c               | 1033 +++++
 hw/xbox/smbus_adm1032.c       |   86 +
 hw/xbox/smbus_cx25871.c       |  117 +
 hw/xbox/smbus_xbox_smc.c      |  227 +
 hw/xbox/swizzle.c             |  165 +
 hw/xbox/swizzle.h             |   62 +
 hw/xbox/xbox.c                |  358 ++
 hw/xbox/xbox.h                |   32 +
 hw/xbox/xbox_pci.c            |  555 +++
 hw/xbox/xbox_pci.h            |   83 +
 hw/xbox/xid.c                 |  445 ++
 44 files changed, 29269 insertions(+)
 create mode 100644 hw/xbox/Makefile.objs
 create mode 100644 hw/xbox/acpi_xbox.c
 create mode 100644 hw/xbox/acpi_xbox.h
 create mode 100644 hw/xbox/amd_smbus.c
 create mode 100644 hw/xbox/amd_smbus.h
 create mode 100644 hw/xbox/chihiro-usb.c
 create mode 100644 hw/xbox/chihiro.c
 create mode 100644 hw/xbox/dsp/Makefile.objs
 create mode 100644 hw/xbox/dsp/dsp.c
 create mode 100644 hw/xbox/dsp/dsp.h
 create mode 100644 hw/xbox/dsp/dsp_cpu.c
 create mode 100644 hw/xbox/dsp/dsp_cpu.h
 create mode 100644 hw/xbox/dsp/dsp_dis.inl
 create mode 100644 hw/xbox/dsp/dsp_dma.c
 create mode 100644 hw/xbox/dsp/dsp_dma.h
 create mode 100644 hw/xbox/dsp/dsp_emu.inl
 create mode 100644 hw/xbox/g-lru-cache.c
 create mode 100644 hw/xbox/g-lru-cache.h
 create mode 100644 hw/xbox/lpc47m157.c
 create mode 100644 hw/xbox/mcpx_aci.c
 create mode 100644 hw/xbox/mcpx_apu.c
 create mode 100644 hw/xbox/nv2a.c
 create mode 100644 hw/xbox/nv2a.h
 create mode 100644 hw/xbox/nv2a_debug.c
 create mode 100644 hw/xbox/nv2a_debug.h
 create mode 100644 hw/xbox/nv2a_int.h
 create mode 100644 hw/xbox/nv2a_psh.c
 create mode 100644 hw/xbox/nv2a_psh.h
 create mode 100644 hw/xbox/nv2a_shaders.c
 create mode 100644 hw/xbox/nv2a_shaders.h
 create mode 100644 hw/xbox/nv2a_shaders_common.h
 create mode 100644 hw/xbox/nv2a_vsh.c
 create mode 100644 hw/xbox/nv2a_vsh.h
 create mode 100644 hw/xbox/nvnet.c
 create mode 100644 hw/xbox/smbus_adm1032.c
 create mode 100644 hw/xbox/smbus_cx25871.c
 create mode 100644 hw/xbox/smbus_xbox_smc.c
 create mode 100644 hw/xbox/swizzle.c
 create mode 100644 hw/xbox/swizzle.h
 create mode 100644 hw/xbox/xbox.c
 create mode 100644 hw/xbox/xbox.h
 create mode 100644 hw/xbox/xbox_pci.c
 create mode 100644 hw/xbox/xbox_pci.h
 create mode 100644 hw/xbox/xid.c

diff --git a/hw/xbox/Makefile.objs b/hw/xbox/Makefile.objs
new file mode 100644
index 0000000000..d33e89902b
--- /dev/null
+++ b/hw/xbox/Makefile.objs
@@ -0,0 +1,12 @@
+obj-y += xbox.o chihiro.o
+obj-y += xbox_pci.o acpi_xbox.o
+obj-y += amd_smbus.o smbus_xbox_smc.o smbus_cx25871.o smbus_adm1032.o
+obj-y += nvnet.o
+obj-y += nv2a.o nv2a_vsh.o nv2a_psh.o nv2a_shaders.o nv2a_debug.o
+obj-y += swizzle.o g-lru-cache.o
+obj-y += mcpx_apu.o mcpx_aci.o
+obj-y += lpc47m157.o
+obj-y += xid.o
+obj-y += chihiro-usb.o
+
+obj-y += dsp/
diff --git a/hw/xbox/acpi_xbox.c b/hw/xbox/acpi_xbox.c
new file mode 100644
index 0000000000..ea138bd9da
--- /dev/null
+++ b/hw/xbox/acpi_xbox.c
@@ -0,0 +1,118 @@
+/*
+ * Xbox ACPI implementation
+ *
+ * Copyright (c) 2012 espes
+ *
+ * Based on acpi.c, acpi_ich9.c, acpi_piix4.c
+ * Copyright (c) 2006 Fabrice Bellard
+ * Copyright (c) 2009 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan K.K.
+ * Copyright (c) 2012 Jason Baron <jbaron@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/hw.h"
+#include "hw/i386/pc.h"
+#include "hw/pci/pci.h"
+#include "qemu/timer.h"
+#include "sysemu/sysemu.h"
+#include "hw/acpi/acpi.h"
+#include "hw/xbox/xbox_pci.h"
+
+#include "hw/xbox/acpi_xbox.h"
+
+#define DEBUG
+#ifdef DEBUG
+# define XBOX_DPRINTF(format, ...)     printf(format, ## __VA_ARGS__)
+#else
+# define XBOX_DPRINTF(format, ...)     do { } while (0)
+#endif
+
+#define XBOX_PM_GPIO_BASE 0xC0
+#define XBOX_PM_GPIO_LEN 26
+
+static int field_pin = 0;
+
+static uint64_t xbox_pm_gpio_read(void *opaque, hwaddr addr, unsigned width)
+{
+    uint64_t r = 0;
+    switch (addr) {
+    case 0:
+        // field pin from tv encoder?
+        field_pin = (field_pin+1)&1;
+        r = field_pin << 5;
+        break;
+    default:
+        break;
+    }
+    XBOX_DPRINTF("pm gpio read [0x%llx] -> 0x%llx\n", addr, r);
+    return r;
+}
+
+static void xbox_pm_gpio_write(void *opaque, hwaddr addr, uint64_t val,
+                               unsigned width)
+{
+    XBOX_DPRINTF("pm gpio write [0x%llx] = 0x%llx\n", addr, val);
+}
+
+static const MemoryRegionOps xbox_pm_gpio_ops = {
+    .read = xbox_pm_gpio_read,
+    .write = xbox_pm_gpio_write,
+};
+
+static void pm_update_sci(XBOX_PMRegs *pm)
+{
+    int sci_level, pm1a_sts;
+
+    pm1a_sts = acpi_pm1_evt_get_sts(&pm->acpi_regs);
+
+    sci_level = (((pm1a_sts & pm->acpi_regs.pm1.evt.en) &
+                  (ACPI_BITMASK_RT_CLOCK_ENABLE |
+                   ACPI_BITMASK_POWER_BUTTON_ENABLE |
+                   ACPI_BITMASK_GLOBAL_LOCK_ENABLE |
+                   ACPI_BITMASK_TIMER_ENABLE)) != 0);
+    qemu_set_irq(pm->irq, sci_level);
+
+    /* schedule a timer interruption if needed */
+    acpi_pm_tmr_update(&pm->acpi_regs,
+                       (pm->acpi_regs.pm1.evt.en & ACPI_BITMASK_TIMER_ENABLE) &&
+                       !(pm1a_sts & ACPI_BITMASK_TIMER_STATUS));
+}
+
+static void xbox_pm_update_sci_fn(ACPIREGS *regs)
+{
+    XBOX_PMRegs *pm = container_of(regs, XBOX_PMRegs, acpi_regs);
+    pm_update_sci(pm);
+}
+
+#define XBOX_PM_BASE_BAR 0
+
+void xbox_pm_init(PCIDevice *dev, XBOX_PMRegs *pm, qemu_irq sci_irq) {
+
+    memory_region_init(&pm->io, OBJECT(dev), "xbox-pm", 256);
+
+    pci_register_bar(dev, XBOX_PM_BASE_BAR, PCI_BASE_ADDRESS_SPACE_IO,
+                     &pm->io);
+
+    acpi_pm_tmr_init(&pm->acpi_regs, xbox_pm_update_sci_fn, &pm->io);
+    acpi_pm1_evt_init(&pm->acpi_regs, xbox_pm_update_sci_fn, &pm->io);
+    acpi_pm1_cnt_init(&pm->acpi_regs, &pm->io, 2);
+
+    memory_region_init_io(&pm->io_gpio, OBJECT(dev), &xbox_pm_gpio_ops, pm,
+                          "xbox-pm-gpio", XBOX_PM_GPIO_LEN);
+    memory_region_add_subregion(&pm->io, XBOX_PM_GPIO_BASE, &pm->io_gpio);
+
+    pm->irq = sci_irq;
+}
\ No newline at end of file
diff --git a/hw/xbox/acpi_xbox.h b/hw/xbox/acpi_xbox.h
new file mode 100644
index 0000000000..e5ac3c9f5f
--- /dev/null
+++ b/hw/xbox/acpi_xbox.h
@@ -0,0 +1,38 @@
+/*
+ * QEMU Xbox PM Emulation
+ *
+ *  Copyright (c) 2012 espes
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#ifndef HW_ACPI_XBOX_H
+#define HW_ACPI_XBOX_H
+
+#include "hw/acpi/acpi.h"
+
+typedef struct XBOX_PMRegs {
+    ACPIREGS acpi_regs;
+
+    MemoryRegion io;
+    MemoryRegion io_gpio;
+
+    qemu_irq irq;   
+} XBOX_PMRegs;
+
+void xbox_pm_init(PCIDevice *dev, XBOX_PMRegs *pm, qemu_irq sci_irq);
+//void xbox_pm_iospace_update(MCPX_PMRegs *pm, uint32_t pm_io_base);
+
+
+#endif
\ No newline at end of file
diff --git a/hw/xbox/amd_smbus.c b/hw/xbox/amd_smbus.c
new file mode 100644
index 0000000000..ae097764d1
--- /dev/null
+++ b/hw/xbox/amd_smbus.c
@@ -0,0 +1,256 @@
+/*
+ * AMD756 SMBus implementation
+ *
+ * Copyright (C) 2012 espes
+ *
+ * Based on pm_smbus.c
+ * Copyright (c) 2006 Fabrice Bellard
+ * Based on Linux drivers/i2c/busses/i2c-amd756.c
+ * Copyright (c) 1999-2002 Merlin Hughes <merlin@merlin.org>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/hw.h"
+#include "hw/i386/pc.h"
+#include "hw/xbox/amd_smbus.h"
+#include "hw/i2c/smbus.h"
+
+/* AMD756 SMBus address offsets */
+#define SMB_ADDR_OFFSET     0xE0
+#define SMB_IOSIZE      16
+
+#define SMB_GLOBAL_STATUS       0x0
+#define SMB_GLOBAL_ENABLE       0x2
+#define SMB_HOST_ADDRESS        0x4
+#define SMB_HOST_DATA           0x6
+#define SMB_HOST_COMMAND        0x8
+#define SMB_HOST_BLOCK_DATA     0x9
+#define SMB_HAS_DATA            0xA
+#define SMB_HAS_DEVICE_ADDRESS  0xC
+#define SMB_HAS_HOST_ADDRESS    0xE
+#define SMB_SNOOP_ADDRESS       0xF
+
+/* AMD756 constants */
+#define AMD756_QUICK        0x00
+#define AMD756_BYTE         0x01
+#define AMD756_BYTE_DATA    0x02
+#define AMD756_WORD_DATA    0x03
+#define AMD756_PROCESS_CALL 0x04
+#define AMD756_BLOCK_DATA   0x05
+
+/* 
+  SMBUS event = I/O 28-29 bit 11
+     see E0 for the status bits and enabled in E2
+*/
+#define GS_ABRT_STS (1 << 0)
+#define GS_COL_STS  (1 << 1)
+#define GS_PRERR_STS    (1 << 2)
+#define GS_HST_STS  (1 << 3)
+#define GS_HCYC_STS (1 << 4)
+#define GS_TO_STS   (1 << 5)
+#define GS_SMB_STS  (1 << 11)
+
+#define GS_CLEAR_STS    (GS_ABRT_STS | GS_COL_STS | GS_PRERR_STS | \
+             GS_HCYC_STS | GS_TO_STS )
+
+#define GE_CYC_TYPE_MASK    (7)
+#define GE_HOST_STC     (1 << 3)
+
+#define GE_HCYC_EN      (1 << 4)
+#define GE_ABORT        (1 << 5)
+
+
+
+//#define DEBUG
+
+#ifdef DEBUG
+# define SMBUS_DPRINTF(format, ...)     printf(format, ## __VA_ARGS__)
+#else
+# define SMBUS_DPRINTF(format, ...)     do { } while (0)
+#endif
+
+static void amd756_smb_transaction(AMD756SMBus *s)
+{
+    uint8_t prot = s->smb_ctl & GE_CYC_TYPE_MASK;
+    uint8_t read = s->smb_addr & 0x01;
+    uint8_t cmd = s->smb_cmd;
+    uint8_t addr = (s->smb_addr >> 1) & 0x7f;
+    i2c_bus *bus = s->smbus;
+    
+    SMBUS_DPRINTF("SMBus trans addr=0x%02x prot=0x%02x\n", addr, prot);
+    switch(prot) {
+    case AMD756_QUICK:
+        smbus_quick_command(bus, addr, read);
+        break;
+    case AMD756_BYTE:
+        if (read) {
+            s->smb_data0 = smbus_receive_byte(bus, addr);
+        } else {
+            smbus_send_byte(bus, addr, cmd);
+        }
+        break;
+    case AMD756_BYTE_DATA:
+        if (read) {
+            s->smb_data0 = smbus_read_byte(bus, addr, cmd);
+        } else {
+            smbus_write_byte(bus, addr, cmd, s->smb_data0);
+        }
+        break;
+    case AMD756_WORD_DATA:
+        if (read) {
+            uint16_t val;
+            val = smbus_read_word(bus, addr, cmd);
+            s->smb_data0 = val;
+            s->smb_data1 = val >> 8;
+        } else {
+            smbus_write_word(bus, addr, cmd, s->smb_data0);
+        }
+        break;
+    case AMD756_BLOCK_DATA:
+        if (read) {
+            s->smb_data0 = smbus_read_block(bus, addr, cmd, s->smb_data);
+        } else {
+            smbus_write_block(bus, addr, cmd, s->smb_data, s->smb_data0);
+        }
+        break;
+    default:
+        goto error;
+    }
+    
+    s->smb_stat |= GS_HCYC_STS;
+    
+    return;
+    
+    
+    error:
+      s->smb_stat |= GS_PRERR_STS;
+}
+
+void amd756_smb_ioport_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+    AMD756SMBus *s = opaque;
+    addr &= 0x3f;
+    SMBUS_DPRINTF("SMB writeb port=0x%04x val=0x%02x\n", addr, val);
+    switch(addr) {
+    case SMB_GLOBAL_STATUS:
+
+        if (s->irq) {
+            /* Raise an irq if interrupts are enabled and a new
+             * status is being set */
+            if ((s->smb_ctl & GE_HCYC_EN)
+                && ((val & GS_CLEAR_STS)
+                    & (~(s->smb_stat & GS_CLEAR_STS)))) {
+
+                qemu_irq_raise(s->irq);
+            } else {
+                qemu_irq_lower(s->irq);
+            }
+        }
+
+        if (val & GS_CLEAR_STS) {
+            s->smb_stat = 0;
+            s->smb_index = 0;
+        } else if (val & GS_HCYC_STS) {
+            s->smb_stat = GS_HCYC_STS;
+            s->smb_index = 0;
+        } else {
+            s->smb_stat = GS_HCYC_STS;
+            s->smb_index = 0;
+        }
+
+        break;
+    case SMB_GLOBAL_ENABLE:
+        s->smb_ctl = val;
+        if (val & GE_ABORT)
+            s->smb_stat |= GS_ABRT_STS;
+        if (val & GE_HOST_STC) {
+            amd756_smb_transaction(s);
+
+            if (s->irq
+                && (val & GE_HCYC_EN)
+                && (s->smb_stat & GS_CLEAR_STS)) {
+                qemu_irq_raise(s->irq);
+            }
+        }
+
+        break;
+    case SMB_HOST_COMMAND:
+        s->smb_cmd = val;
+        break;
+    case SMB_HOST_ADDRESS:
+        s->smb_addr = val;
+        break;
+    case SMB_HOST_DATA:
+        s->smb_data0 = val;
+        break;
+    case SMB_HOST_DATA+1:
+        s->smb_data1 = val;
+        break;
+    case SMB_HOST_BLOCK_DATA:
+        s->smb_data[s->smb_index++] = val;
+        if (s->smb_index > 31)
+            s->smb_index = 0;
+        break;
+    default:
+        break;
+    }
+}
+
+uint32_t amd756_smb_ioport_readb(void *opaque, uint32_t addr)
+{
+    AMD756SMBus *s = opaque;
+    uint32_t val;
+
+    addr &= 0x3f;
+    switch(addr) {
+    case SMB_GLOBAL_STATUS:
+        val = s->smb_stat;
+        break;
+    case SMB_GLOBAL_ENABLE:
+        //s->smb_index = 0;
+        val = s->smb_ctl & 0x1f;
+        break;
+    case SMB_HOST_COMMAND:
+        val = s->smb_cmd;
+        break;
+    case SMB_HOST_ADDRESS:
+        val = s->smb_addr;
+        break;
+    case SMB_HOST_DATA:
+        val = s->smb_data0;
+        break;
+    case SMB_HOST_DATA+1:
+        val = s->smb_data1;
+        break;
+    case SMB_HOST_BLOCK_DATA:
+        val = s->smb_data[s->smb_index++];
+        if (s->smb_index > 31)
+            s->smb_index = 0;
+        break;
+    default:
+        val = 0;
+        break;
+    }
+    SMBUS_DPRINTF("SMB readb port=0x%04x val=0x%02x\n", addr, val);
+    return val;
+}
+
+void amd756_smbus_init(DeviceState *parent, AMD756SMBus *smb, qemu_irq irq)
+{
+    smb->smbus = i2c_init_bus(parent, "i2c");
+    smb->smb_stat = 0;
+
+    smb->irq = irq;
+}
\ No newline at end of file
diff --git a/hw/xbox/amd_smbus.h b/hw/xbox/amd_smbus.h
new file mode 100644
index 0000000000..86df54c36e
--- /dev/null
+++ b/hw/xbox/amd_smbus.h
@@ -0,0 +1,45 @@
+/*
+ * AMD756 SMBus implementation
+ *
+ * Copyright (C) 2012 espes
+ *
+ * Based on pm_smbus.c
+ * Copyright (c) 2006 Fabrice Bellard
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef AMD_SMBUS_H
+#define AMD_SMBUS_H
+
+typedef struct AMD756SMBus {
+    i2c_bus *smbus;
+
+    uint8_t smb_stat;
+    uint8_t smb_ctl;
+    uint8_t smb_cmd;
+    uint8_t smb_addr;
+    uint8_t smb_data0;
+    uint8_t smb_data1;
+    uint8_t smb_data[32];
+    uint8_t smb_index;
+
+    qemu_irq irq;
+} AMD756SMBus;
+
+void amd756_smbus_init(DeviceState *parent, AMD756SMBus *smb, qemu_irq irq);
+void amd756_smb_ioport_writeb(void *opaque, uint32_t addr, uint32_t val);
+uint32_t amd756_smb_ioport_readb(void *opaque, uint32_t addr);
+
+#endif /* !AMD_SMBUS_H */
diff --git a/hw/xbox/chihiro-usb.c b/hw/xbox/chihiro-usb.c
new file mode 100644
index 0000000000..4ef0635cb0
--- /dev/null
+++ b/hw/xbox/chihiro-usb.c
@@ -0,0 +1,331 @@
+/*
+ * QEMU Chihiro USB Devices
+ *
+ * Copyright (c) 2016 espes
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/hw.h"
+#include "ui/console.h"
+#include "hw/usb.h"
+#include "hw/usb/desc.h"
+
+#define DEBUG_CUSB
+#ifdef DEBUG_CUSB
+#define DPRINTF(s, ...) printf("chihiro-usb: " s, ## __VA_ARGS__)
+#else
+#define DPRINTF(...)
+#endif
+
+typedef struct ChihiroUSBState {
+    USBDevice dev;
+} ChihiroUSBState;
+
+enum chihiro_usb_strings {
+    STRING_SERIALNUMBER,
+    STRING_MANUFACTURER,
+    STRING_PRODUCT,
+};
+
+static const USBDescStrings chihiro_usb_stringtable = {
+    [STRING_SERIALNUMBER]       = "\x00",
+    [STRING_MANUFACTURER]       = "SEGA",
+    [STRING_PRODUCT]            = "BASEBD" // different for qc?
+};
+
+static const USBDescIface desc_iface_chihiro_an2131qc = {
+    .bInterfaceNumber              = 0,
+    .bNumEndpoints                 = 10,
+    .bInterfaceClass               = USB_CLASS_VENDOR_SPEC,
+    .bInterfaceSubClass            = 0x00,
+    .bInterfaceProtocol            = 0x00,
+    .eps = (USBDescEndpoint[]) {
+        {
+            .bEndpointAddress      = USB_DIR_OUT | 0x01,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 0x0040,
+            .bInterval             = 0,
+        },
+        {
+            .bEndpointAddress      = USB_DIR_OUT | 0x02,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 0x0040,
+            .bInterval             = 0,
+        },
+        {
+            .bEndpointAddress      = USB_DIR_OUT | 0x03,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 0x0040,
+            .bInterval             = 0,
+        },
+        {
+            .bEndpointAddress      = USB_DIR_OUT | 0x04,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 0x0040,
+            .bInterval             = 0,
+        },
+        {
+            .bEndpointAddress      = USB_DIR_OUT | 0x05,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 0x0040,
+            .bInterval             = 0,
+        },
+        {
+            .bEndpointAddress      = USB_DIR_IN | 0x01,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 0x0040,
+            .bInterval             = 0,
+        },
+        {
+            .bEndpointAddress      = USB_DIR_IN | 0x02,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 0x0040,
+            .bInterval             = 0,
+        },
+        {
+            .bEndpointAddress      = USB_DIR_IN | 0x03,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 0x0040,
+            .bInterval             = 0,
+        },
+        {
+            .bEndpointAddress      = USB_DIR_IN | 0x04,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 0x0040,
+            .bInterval             = 0,
+        },
+        {
+            .bEndpointAddress      = USB_DIR_IN | 0x05,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 0x0040,
+            .bInterval             = 0,
+        },
+    },
+};
+
+static const USBDescDevice desc_device_chihiro_an2131qc = {
+    .bcdUSB                        = 0x0100,
+    .bMaxPacketSize0               = 0x40,
+    .bNumConfigurations            = 1,
+    .confs = (USBDescConfig[]) {
+        {
+            .bNumInterfaces        = 1,
+            .bConfigurationValue   = 1,
+            .bmAttributes          = 0x80,
+            .bMaxPower             = 0x96,
+            .nif = 1,
+            .ifs = &desc_iface_chihiro_an2131qc,
+        },
+    },
+};
+
+static const USBDesc desc_chihiro_an2131qc = {
+    .id = {
+        .idVendor          = 0x0CA3,
+        .idProduct         = 0x0002,
+        .bcdDevice         = 0x0108,
+        .iManufacturer     = STRING_MANUFACTURER,
+        .iProduct          = STRING_PRODUCT,
+        .iSerialNumber     = STRING_SERIALNUMBER,
+    },
+    .full = &desc_device_chihiro_an2131qc,
+    .str  = chihiro_usb_stringtable,
+};
+
+
+
+
+
+static const USBDescIface desc_iface_chihiro_an2131sc = {
+    .bInterfaceNumber              = 0,
+    .bNumEndpoints                 = 6,
+    .bInterfaceClass               = USB_CLASS_VENDOR_SPEC,
+    .bInterfaceSubClass            = 0x00,
+    .bInterfaceProtocol            = 0x00,
+    .eps = (USBDescEndpoint[]) {
+        {
+            .bEndpointAddress      = USB_DIR_OUT | 0x01,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 0x0040,
+            .bInterval             = 0,
+        },
+        {
+            .bEndpointAddress      = USB_DIR_OUT | 0x02,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 0x0040,
+            .bInterval             = 0,
+        },
+        {
+            .bEndpointAddress      = USB_DIR_OUT | 0x03,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 0x0040,
+            .bInterval             = 0,
+        },
+        {
+            .bEndpointAddress      = USB_DIR_IN | 0x01,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 0x0040,
+            .bInterval             = 0,
+        },
+        {
+            .bEndpointAddress      = USB_DIR_IN | 0x02,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 0x0040,
+            .bInterval             = 0,
+        },
+        {
+            .bEndpointAddress      = USB_DIR_IN | 0x03,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 0x0040,
+            .bInterval             = 0,
+        },
+    },
+};
+
+static const USBDescDevice desc_device_chihiro_an2131sc = {
+    .bcdUSB                        = 0x0100,
+    .bMaxPacketSize0               = 0x40,
+    .bNumConfigurations            = 1,
+    .confs = (USBDescConfig[]) {
+        {
+            .bNumInterfaces        = 1,
+            .bConfigurationValue   = 1,
+            .bmAttributes          = 0x80,
+            .bMaxPower             = 0x96,
+            .nif = 1,
+            .ifs = &desc_iface_chihiro_an2131sc,
+        },
+    },
+};
+
+static const USBDesc desc_chihiro_an2131sc = {
+    .id = {
+        .idVendor          = 0x0CA3,
+        .idProduct         = 0x0003,
+        .bcdDevice         = 0x0110,
+        .iManufacturer     = STRING_MANUFACTURER,
+        .iProduct          = STRING_PRODUCT,
+        .iSerialNumber     = STRING_SERIALNUMBER,
+    },
+    .full = &desc_device_chihiro_an2131sc,
+    .str  = chihiro_usb_stringtable,
+};
+
+
+
+static void handle_reset(USBDevice *dev)
+{
+    DPRINTF("usb reset\n");
+}
+
+static void handle_control(USBDevice *dev, USBPacket *p,
+               int request, int value, int index, int length, uint8_t *data)
+{
+    DPRINTF("handle control %d %d %d %d\n", request, value, index, length);
+
+    int ret = usb_desc_handle_control(dev, p, request, value, index, length, data);
+    if (ret >= 0) {
+        DPRINTF("handled by usb_desc_handle_control: %d\n", ret);
+        return;
+    }
+}
+
+static void handle_data(USBDevice *dev, USBPacket *p)
+{
+    DPRINTF("handle_data 0x%x %d 0x%zx\n", p->pid, p->ep->nr, p->iov.size);
+}
+
+static void handle_destroy(USBDevice *dev)
+{
+    DPRINTF("usb reset\n");
+}
+
+
+static int chihiro_an2131qc_initfn(USBDevice *dev)
+{
+    // ChihiroUSBState *s = DO_UPCAST(ChihiroUSBState, dev, dev);
+    usb_desc_init(dev);
+
+    return 0;
+}
+
+static void chihiro_an2131qc_class_initfn(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    USBDeviceClass *uc = USB_DEVICE_CLASS(klass);
+
+    uc->init           = chihiro_an2131qc_initfn;
+    uc->product_desc   = "Chihiro an2131qc";
+    uc->usb_desc       = &desc_chihiro_an2131qc;
+
+    uc->handle_reset   = handle_reset;
+    uc->handle_control = handle_control;
+    uc->handle_data    = handle_data;
+    uc->handle_destroy = handle_destroy;
+    uc->handle_attach  = usb_desc_attach;
+
+    //dc->vmsd = &vmstate_usb_kbd;
+}
+
+static const TypeInfo chihiro_an2131qc_info = {
+    .name          = "chihiro-an2131qc",
+    .parent        = TYPE_USB_DEVICE,
+    .instance_size = sizeof(ChihiroUSBState),
+    .class_init    = chihiro_an2131qc_class_initfn,
+};
+
+
+
+static int chihiro_an2131sc_initfn(USBDevice *dev)
+{
+    // ChihiroUSBState *s = DO_UPCAST(ChihiroUSBState, dev, dev);
+    usb_desc_init(dev);
+
+    return 0;
+}
+
+static void chihiro_an2131sc_class_initfn(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    USBDeviceClass *uc = USB_DEVICE_CLASS(klass);
+
+    uc->init           = chihiro_an2131sc_initfn;
+    uc->product_desc   = "Chihiro an2131sc";
+    uc->usb_desc       = &desc_chihiro_an2131sc;
+
+    uc->handle_reset   = handle_reset;
+    uc->handle_control = handle_control;
+    uc->handle_data    = handle_data;
+    uc->handle_destroy = handle_destroy;
+    uc->handle_attach  = usb_desc_attach;
+
+    //dc->vmsd = &vmstate_usb_kbd;
+}
+
+static const TypeInfo chihiro_an2131sc_info = {
+    .name          = "chihiro-an2131sc",
+    .parent        = TYPE_USB_DEVICE,
+    .instance_size = sizeof(ChihiroUSBState),
+    .class_init    = chihiro_an2131sc_class_initfn,
+};
+
+
+static void chihiro_usb_register_types(void)
+{
+    type_register_static(&chihiro_an2131qc_info);
+    type_register_static(&chihiro_an2131sc_info);
+}
+
+type_init(chihiro_usb_register_types)
diff --git a/hw/xbox/chihiro.c b/hw/xbox/chihiro.c
new file mode 100644
index 0000000000..6897ac3f72
--- /dev/null
+++ b/hw/xbox/chihiro.c
@@ -0,0 +1,292 @@
+/*
+ * QEMU Chihiro emulation
+ *
+ * Copyright (c) 2013 espes
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/hw.h"
+#include "hw/i386/pc.h"
+#include "hw/boards.h"
+#include "hw/ide.h"
+#include "hw/loader.h"
+#include "hw/isa/isa.h"
+#include "exec/memory.h"
+#include "qemu/config-file.h"
+#include "sysemu/blockdev.h"
+#include "block/blkmemory.h"
+#include "hw/xbox/xbox.h"
+
+
+#define SEGA_CHIP_REVISION                  0xF0
+#   define SEGA_CHIP_REVISION_CHIP_ID            0xFF00
+#       define SEGA_CHIP_REVISION_FPGA_CHIP_ID      0x0000
+#       define SEGA_CHIP_REVISION_ASIC_CHIP_ID      0x0100
+#   define SEGA_CHIP_REVISION_REVISION_ID_MASK   0x00FF
+#define SEGA_DIMM_SIZE                      0xF4
+#   define SEGA_DIMM_SIZE_128M                  0
+#   define SEGA_DIMM_SIZE_256M                  1
+#   define SEGA_DIMM_SIZE_512M                  2
+#   define SEGA_DIMM_SIZE_1024M                 3
+
+//#define DEBUG_CHIHIRO
+
+typedef struct ChihiroLPCState {
+    ISADevice dev;
+    MemoryRegion ioport;
+} ChihiroLPCState;
+
+#define CHIHIRO_LPC_DEVICE(obj) \
+    OBJECT_CHECK(ChihiroLPCState, (obj), "chihiro-lpc")
+
+
+static uint64_t chhiro_lpc_io_read(void *opaque, hwaddr addr,
+                                   unsigned size)
+{
+    uint64_t r = 0;
+    switch (addr) {
+    case SEGA_CHIP_REVISION:
+        r = SEGA_CHIP_REVISION_ASIC_CHIP_ID;
+        break;
+    case SEGA_DIMM_SIZE:
+        r = SEGA_DIMM_SIZE_128M;
+        break;
+    }
+#ifdef DEBUG_CHIHIRO
+    printf("chihiro lpc read [0x%llx] -> 0x%llx\n", addr, r);
+#endif
+    return r;
+}
+
+static void chhiro_lpc_io_write(void *opaque, hwaddr addr, uint64_t val,
+                                unsigned size)
+{
+#ifdef DEBUG_CHIHIRO
+    printf("chihiro lpc write [0x%llx] = 0x%llx\n", addr, val);
+#endif
+}
+
+static const MemoryRegionOps chihiro_lpc_io_ops = {
+    .read = chhiro_lpc_io_read,
+    .write = chhiro_lpc_io_write,
+    .impl = {
+        .min_access_size = 2,
+        .max_access_size = 2,
+    },
+};
+
+static void chihiro_lpc_realize(DeviceState *dev, Error **errp)
+{
+    ChihiroLPCState *s = CHIHIRO_LPC_DEVICE(dev);
+    ISADevice *isa = ISA_DEVICE(dev);
+    
+    memory_region_init_io(&s->ioport, OBJECT(dev), &chihiro_lpc_io_ops, s,
+                          "chihiro-lpc-io", 0x100);
+    isa_register_ioport(isa, &s->ioport, 0x4000);
+}
+
+static void chihiro_lpc_class_initfn(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    dc->realize = chihiro_lpc_realize;
+    dc->desc = "Chihiro LPC";
+}
+
+static const TypeInfo chihiro_lpc_info = {
+    .name          = "chihiro-lpc",
+    .parent        = TYPE_ISA_DEVICE,
+    .instance_size = sizeof(ChihiroLPCState),
+    .class_init    = chihiro_lpc_class_initfn,
+};
+
+static void chihiro_register_types(void)
+{
+    type_register_static(&chihiro_lpc_info);
+}
+
+type_init(chihiro_register_types)
+
+
+/* The chihiro baseboard communicates with the xbox by acting as an IDE
+ * device. The device maps the boot rom from the mediaboard, a communication
+ * area for interfacing with the network board, and the ram on the baseboard.
+ * The baseboard ram is populated at boot from the gd-rom drive on the
+ * mediaboard containing something like a combined disc+hdd image.
+ */
+
+#define FILESYSTEM_START         0
+#define ROM_START                0x8000000
+#define ROM_SECTORS              0x2000
+#define COMMUNICATION_START      0x9000000
+#define COMMUNICATION_SECTORS    0x10000
+#define SECTOR_SIZE              512
+
+static void chihiro_ide_interface_init(const char *rom_file,
+                                       const char *filesystem_file)
+{
+    if (drive_get(IF_IDE, 0, 1)) {
+        fprintf(stderr, "chihiro ide interface needs to be attached "
+                        "to IDE device 1 but it's already in use.");
+        exit(1);
+    }
+
+    MemoryRegion *interface, *rom, *filesystem;
+    interface = g_malloc(sizeof(*interface));
+    memory_region_init(interface, NULL, "chihiro.interface",
+                       (uint64_t)0x10000000 * SECTOR_SIZE);
+
+    rom = g_malloc(sizeof(*rom));
+    memory_region_init_ram(rom, NULL, "chihiro.interface.rom",
+                           ROM_SECTORS * SECTOR_SIZE);
+    memory_region_add_subregion(interface,
+                                (uint64_t)ROM_START * SECTOR_SIZE, rom);
+
+
+    /* limited by the size of the board ram, which we emulate as 128M for now */
+    filesystem = g_malloc(sizeof(*filesystem));
+    memory_region_init_ram(filesystem, NULL, "chihiro.interface.filesystem",
+                           128 * 1024 * 1024);
+    memory_region_add_subregion(interface,
+                                (uint64_t)FILESYSTEM_START * SECTOR_SIZE,
+                                filesystem);
+
+
+    AddressSpace *interface_space;
+    interface_space = g_malloc(sizeof(*interface_space));
+    address_space_init(interface_space, interface, "chihiro-interface");
+
+    /* read files */
+    int rc, fd = -1;
+
+    if (!rom_file) rom_file = "fpr21042_m29w160et.bin";
+    char *rom_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, rom_file);
+    if (rom_filename) {
+        int rom_size = get_image_size(rom_filename);
+        assert(rom_size < memory_region_size(rom));
+
+        fd = open(rom_filename, O_RDONLY | O_BINARY);
+        assert(fd != -1);
+        rc = read(fd, memory_region_get_ram_ptr(rom), rom_size);
+        assert(rc == rom_size);
+        close(fd);
+    }
+
+
+    if (filesystem_file) {
+        assert(access(filesystem_file, R_OK) == 0);
+
+        int filesystem_size = get_image_size(filesystem_file);
+        assert(filesystem_size < memory_region_size(filesystem));
+
+        fd = open(filesystem_file, O_RDONLY | O_BINARY);
+        assert(fd != -1);
+        rc = read(fd, memory_region_get_ram_ptr(rom), filesystem_size);
+        assert(rc == filesystem_size);
+        close(fd);
+    }
+
+    /* create the device */
+    DriveInfo *dinfo;
+    dinfo = g_malloc0(sizeof(*dinfo));
+    dinfo->id = g_strdup("chihiro-interface");
+    dinfo->bdrv = bdrv_new(dinfo->id);
+    dinfo->type = IF_IDE;
+    dinfo->bus = 0;
+    dinfo->unit = 1;
+    dinfo->refcount = 1;
+
+    assert(!bdrv_memory_open(dinfo->bdrv, interface_space,
+                             memory_region_size(interface)));
+
+    drive_append(dinfo);
+}
+
+static void chihiro_init(QEMUMachineInitArgs *args)
+{
+    /* Placeholder blank eeprom for chihiro:
+     *   Serial number 000000000000
+     *   Mac address 00:00:00:00:00:00
+     *   ...etc.
+     */
+    const uint8_t eeprom[] = {
+        0xA7, 0x65, 0x60, 0x76, 0xB7, 0x2F, 0xFE, 0xD8,
+        0x20, 0xBC, 0x8B, 0x15, 0x13, 0xBF, 0x73, 0x9C,
+        0x8C, 0x3F, 0xD8, 0x07, 0x75, 0x55, 0x5F, 0x8B,
+        0x09, 0xD1, 0x25, 0xD1, 0x1A, 0xA2, 0xD5, 0xB7,
+        0x01, 0x7D, 0x9A, 0x31, 0xCD, 0x9C, 0x83, 0x6B,
+        0x2C, 0xAB, 0xAD, 0x6F, 0xAC, 0x36, 0xDE, 0xEF,
+        0x6F, 0x6E, 0x2F, 0x6F, 0x30, 0x30, 0x30, 0x30,
+        0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    };
+
+    QemuOpts *machine_opts = qemu_opts_find(qemu_find_opts("machine"), NULL);
+    if (machine_opts) {
+        const char *mediaboard_rom_file =
+            qemu_opt_get(machine_opts, "mediaboard_rom");
+        const char *mediaboard_filesystem_file =
+            qemu_opt_get(machine_opts, "mediaboard_filesystem");
+        
+        if (mediaboard_rom_file || mediaboard_filesystem_file) {
+            chihiro_ide_interface_init(mediaboard_rom_file,
+                                       mediaboard_filesystem_file);
+        }
+    }
+
+    ISABus *isa_bus;
+    xbox_init_common(args, (uint8_t*)eeprom, &isa_bus);
+
+    isa_create_simple(isa_bus, "chihiro-lpc");
+}
+
+
+static QEMUMachine chihiro_machine = {
+    .name = "chihiro",
+    .desc = "Sega Chihiro",
+    .init = chihiro_init,
+    .max_cpus = 1,
+    .no_floppy = 1,
+    .no_cdrom = 1,
+    .no_sdcard = 1,
+    PC_DEFAULT_MACHINE_OPTIONS
+};
+
+static void chihiro_machine_init(void) {
+    qemu_register_machine(&chihiro_machine);
+}
+machine_init(chihiro_machine_init);
diff --git a/hw/xbox/dsp/Makefile.objs b/hw/xbox/dsp/Makefile.objs
new file mode 100644
index 0000000000..93a180dee6
--- /dev/null
+++ b/hw/xbox/dsp/Makefile.objs
@@ -0,0 +1 @@
+obj-y += dsp.o dsp_cpu.o dsp_dma.o
diff --git a/hw/xbox/dsp/dsp.c b/hw/xbox/dsp/dsp.c
new file mode 100644
index 0000000000..c3b86b18aa
--- /dev/null
+++ b/hw/xbox/dsp/dsp.c
@@ -0,0 +1,476 @@
+/*
+ * MCPX DSP emulator
+ *
+ * Copyright (c) 2015 espes
+ *
+ * Adapted from Hatari DSP M56001 emulation
+ * (C) 2001-2008 ARAnyM developer team
+ * Adaption to Hatari (C) 2008 by Thomas Huth
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include <assert.h>
+
+#include "qemu-common.h"
+
+#include "dsp_cpu.h"
+#include "dsp_dma.h"
+
+#include "dsp.h"
+
+/* Defines */
+#define BITMASK(x)  ((1<<(x))-1)
+#define ARRAYSIZE(x) (int)(sizeof(x)/sizeof(x[0]))
+
+#define INTERRUPT_ABORT_FRAME (1 << 0)
+#define INTERRUPT_START_FRAME (1 << 1)
+#define INTERRUPT_DMA_EOL (1 << 7)
+
+#define DPRINTF(s, ...) printf(s, ## __VA_ARGS__)
+
+struct DSPState {
+    dsp_core_t core;
+    DSPDMAState dma;
+    int save_cycles;
+
+    uint32_t interrupts;
+};
+
+static uint32_t read_peripheral(dsp_core_t* core, uint32_t address);
+static void write_peripheral(dsp_core_t* core, uint32_t address, uint32_t value);
+
+DSPState* dsp_init(void* scratch_rw_opaque, dsp_scratch_rw_func scratch_rw)
+{
+    DPRINTF("dsp_init\n");
+
+    DSPState* dsp = (DSPState*)malloc(sizeof(DSPState));
+    memset(dsp, 0, sizeof(*dsp));
+
+    dsp->core.read_peripheral = read_peripheral;
+    dsp->core.write_peripheral = write_peripheral;
+
+    dsp->dma.core = &dsp->core;
+    dsp->dma.scratch_rw_opaque = scratch_rw_opaque;
+    dsp->dma.scratch_rw = scratch_rw;
+
+    dsp_reset(dsp);
+
+    return dsp;
+}
+
+void dsp_reset(DSPState* dsp)
+{
+    dsp56k_reset_cpu(&dsp->core);
+    dsp->save_cycles = 0;
+}
+
+void dsp_destroy(DSPState* dsp)
+{
+    free(dsp);
+}
+
+static uint32_t read_peripheral(dsp_core_t* core, uint32_t address) {
+    DSPState* dsp = container_of(core, DSPState, core);
+
+    // printf("read_peripheral 0x%06x\n", address);
+
+    uint32_t v = 0xababa;
+    switch(address) {
+    case 0xFFFFC5:
+        v = dsp->interrupts;
+        if (dsp->dma.eol) {
+            v |= INTERRUPT_DMA_EOL;
+        }
+        break;
+    case 0xFFFFD4:
+        v = dsp_dma_read(&dsp->dma, DMA_NEXT_BLOCK);
+        break;
+    case 0xFFFFD5:
+        v = dsp_dma_read(&dsp->dma, DMA_START_BLOCK);
+        break;
+    case 0xFFFFD6:
+        v = dsp_dma_read(&dsp->dma, DMA_CONTROL);
+        break;
+    case 0xFFFFD7:
+        v = dsp_dma_read(&dsp->dma, DMA_CONFIGURATION);
+        break;
+    }
+
+    // printf(" -> 0x%06x\n", v);
+    return v;
+}
+
+static void write_peripheral(dsp_core_t* core, uint32_t address, uint32_t value) {
+    DSPState* dsp = container_of(core, DSPState, core);
+
+    // printf("write_peripheral [0x%06x] = 0x%06x\n", address, value);
+
+    switch(address) {
+    case 0xFFFFC5:
+        dsp->interrupts &= ~value;
+        if (value & INTERRUPT_DMA_EOL) {
+            dsp->dma.eol = false;
+        }
+        break;
+    case 0xFFFFD4:
+        dsp_dma_write(&dsp->dma, DMA_NEXT_BLOCK, value);
+        break;
+    case 0xFFFFD5:
+        dsp_dma_write(&dsp->dma, DMA_START_BLOCK, value);
+        break;
+    case 0xFFFFD6:
+        dsp_dma_write(&dsp->dma, DMA_CONTROL, value);
+        break;
+    case 0xFFFFD7:
+        dsp_dma_write(&dsp->dma, DMA_CONFIGURATION, value);
+        break;
+    }
+}
+
+
+void dsp_step(DSPState* dsp)
+{
+    dsp56k_execute_instruction(&dsp->core);
+}
+
+void dsp_run(DSPState* dsp, int cycles)
+{
+    dsp->save_cycles += cycles;
+
+    if (dsp->save_cycles <= 0) return;
+
+    // if (unlikely(bDspDebugging)) {
+    //     while (dsp->core.save_cycles > 0)
+    //     {
+    //         dsp56k_execute_instruction();
+    //         dsp->core.save_cycles -= dsp->core.instr_cycle;
+    //         DebugDsp_Check();
+    //     }
+    // } else {
+    //  printf("--> %d\n", dsp->core.save_cycles);
+    while (dsp->save_cycles > 0)
+    {
+        dsp56k_execute_instruction(&dsp->core);
+        dsp->save_cycles -= dsp->core.instr_cycle;
+    }
+
+} 
+
+void dsp_bootstrap(DSPState* dsp)
+{
+    // scratch memory is dma'd in to pram by the bootrom
+    dsp->dma.scratch_rw(dsp->dma.scratch_rw_opaque,
+        (uint8_t*)dsp->core.pram, 0, 0x800*4, false);
+}
+
+void dsp_start_frame(DSPState* dsp)
+{
+    dsp->interrupts |= INTERRUPT_START_FRAME;
+}
+
+/**
+ * Disassemble DSP code between given addresses, return next PC address
+ */
+uint32_t dsp_disasm_address(DSPState* dsp, FILE *out, uint32_t lowerAdr, uint32_t UpperAdr)
+{
+    uint32_t dsp_pc;
+
+    for (dsp_pc=lowerAdr; dsp_pc<=UpperAdr; dsp_pc++) {
+        dsp_pc += dsp56k_execute_one_disasm_instruction(&dsp->core, out, dsp_pc);
+    }
+    return dsp_pc;
+}
+
+uint32_t dsp_read_memory(DSPState* dsp, char space_id, uint32_t address)
+{
+    int space;
+
+    switch (space_id) {
+    case 'X':
+        space = DSP_SPACE_X;
+        break;
+    case 'Y':
+        space = DSP_SPACE_Y;
+        break;
+    case 'P':
+        space = DSP_SPACE_P;
+        break;
+    default:
+        assert(false);
+    }
+
+    return dsp56k_read_memory(&dsp->core, space, address);
+}
+
+
+/**
+ * Output memory values between given addresses in given DSP address space.
+ * Return next DSP address value.
+ */
+uint32_t dsp_disasm_memory(DSPState* dsp, uint32_t dsp_memdump_addr, uint32_t dsp_memdump_upper, char space)
+{
+    uint32_t mem, value;
+
+    for (mem = dsp_memdump_addr; mem <= dsp_memdump_upper; mem++) {
+        value = dsp_read_memory(dsp, space, mem);
+        printf("%04x  %06x\n", mem, value);
+    }
+    return dsp_memdump_upper+1;
+}
+
+/**
+ * Show information on DSP core state which isn't
+ * shown by any of the other commands (dd, dm, dr).
+ */
+void dsp_info(DSPState* dsp)
+{
+    int i, j;
+    const char *stackname[] = { "SSH", "SSL" };
+
+    printf("DSP core information:\n");
+
+    for (i = 0; i < ARRAYSIZE(stackname); i++) {
+        printf("- %s stack:", stackname[i]);
+        for (j = 0; j < ARRAYSIZE(dsp->core.stack[0]); j++) {
+            printf(" %04x", dsp->core.stack[i][j]);
+        }
+        printf("\n");
+    }
+
+    printf("- Interrupt IPL:");
+    for (i = 0; i < ARRAYSIZE(dsp->core.interrupt_ipl); i++) {
+        printf(" %04x", dsp->core.interrupt_ipl[i]);
+    }
+    printf("\n");
+
+    printf("- Pending ints: ");
+    for (i = 0; i < ARRAYSIZE(dsp->core.interrupt_is_pending); i++) {
+        printf(" %04hx", dsp->core.interrupt_is_pending[i]);
+    }
+    printf("\n");
+}
+
+/**
+ * Show DSP register contents
+ */
+void dsp_print_registers(DSPState* dsp)
+{
+    uint32_t i;
+
+    printf("A: A2: %02x  A1: %06x  A0: %06x\n",
+        dsp->core.registers[DSP_REG_A2], dsp->core.registers[DSP_REG_A1], dsp->core.registers[DSP_REG_A0]);
+    printf("B: B2: %02x  B1: %06x  B0: %06x\n",
+        dsp->core.registers[DSP_REG_B2], dsp->core.registers[DSP_REG_B1], dsp->core.registers[DSP_REG_B0]);
+    
+    printf("X: X1: %06x  X0: %06x\n", dsp->core.registers[DSP_REG_X1], dsp->core.registers[DSP_REG_X0]);
+    printf("Y: Y1: %06x  Y0: %06x\n", dsp->core.registers[DSP_REG_Y1], dsp->core.registers[DSP_REG_Y0]);
+
+    for (i=0; i<8; i++) {
+        printf("R%01x: %04x   N%01x: %04x   M%01x: %04x\n", 
+            i, dsp->core.registers[DSP_REG_R0+i],
+            i, dsp->core.registers[DSP_REG_N0+i],
+            i, dsp->core.registers[DSP_REG_M0+i]);
+    }
+
+    printf("LA: %04x   LC: %04x   PC: %04x\n", dsp->core.registers[DSP_REG_LA], dsp->core.registers[DSP_REG_LC], dsp->core.pc);
+    printf("SR: %04x  OMR: %02x\n", dsp->core.registers[DSP_REG_SR], dsp->core.registers[DSP_REG_OMR]);
+    printf("SP: %02x    SSH: %04x  SSL: %04x\n", 
+        dsp->core.registers[DSP_REG_SP], dsp->core.registers[DSP_REG_SSH], dsp->core.registers[DSP_REG_SSL]);
+}
+
+
+/**
+ * Get given DSP register address and required bit mask.
+ * Works for A0-2, B0-2, LA, LC, M0-7, N0-7, R0-7, X0-1, Y0-1, PC, SR, SP,
+ * OMR, SSH & SSL registers, but note that the SP, SSH & SSL registers
+ * need special handling (in DSP*SetRegister()) when they are set.
+ * Return the register width in bits or zero for an error.
+ */
+int dsp_get_register_address(DSPState* dsp, const char *regname, uint32_t **addr, uint32_t *mask)
+{
+#define MAX_REGNAME_LEN 4
+    typedef struct {
+        const char name[MAX_REGNAME_LEN];
+        uint32_t *addr;
+        size_t bits;
+        uint32_t mask;
+    } reg_addr_t;
+    
+    /* sorted by name so that this can be bisected */
+    const reg_addr_t registers[] = {
+
+        /* 56-bit A register */
+        { "A0",  &dsp->core.registers[DSP_REG_A0],  32, BITMASK(24) },
+        { "A1",  &dsp->core.registers[DSP_REG_A1],  32, BITMASK(24) },
+        { "A2",  &dsp->core.registers[DSP_REG_A2],  32, BITMASK(8) },
+
+        /* 56-bit B register */
+        { "B0",  &dsp->core.registers[DSP_REG_B0],  32, BITMASK(24) },
+        { "B1",  &dsp->core.registers[DSP_REG_B1],  32, BITMASK(24) },
+        { "B2",  &dsp->core.registers[DSP_REG_B2],  32, BITMASK(8) },
+
+        /* 16-bit LA & LC registers */
+        { "LA",  &dsp->core.registers[DSP_REG_LA],  32, BITMASK(16) },
+        { "LC",  &dsp->core.registers[DSP_REG_LC],  32, BITMASK(16) },
+
+        /* 16-bit M registers */
+        { "M0",  &dsp->core.registers[DSP_REG_M0],  32, BITMASK(16) },
+        { "M1",  &dsp->core.registers[DSP_REG_M1],  32, BITMASK(16) },
+        { "M2",  &dsp->core.registers[DSP_REG_M2],  32, BITMASK(16) },
+        { "M3",  &dsp->core.registers[DSP_REG_M3],  32, BITMASK(16) },
+        { "M4",  &dsp->core.registers[DSP_REG_M4],  32, BITMASK(16) },
+        { "M5",  &dsp->core.registers[DSP_REG_M5],  32, BITMASK(16) },
+        { "M6",  &dsp->core.registers[DSP_REG_M6],  32, BITMASK(16) },
+        { "M7",  &dsp->core.registers[DSP_REG_M7],  32, BITMASK(16) },
+
+        /* 16-bit N registers */
+        { "N0",  &dsp->core.registers[DSP_REG_N0],  32, BITMASK(16) },
+        { "N1",  &dsp->core.registers[DSP_REG_N1],  32, BITMASK(16) },
+        { "N2",  &dsp->core.registers[DSP_REG_N2],  32, BITMASK(16) },
+        { "N3",  &dsp->core.registers[DSP_REG_N3],  32, BITMASK(16) },
+        { "N4",  &dsp->core.registers[DSP_REG_N4],  32, BITMASK(16) },
+        { "N5",  &dsp->core.registers[DSP_REG_N5],  32, BITMASK(16) },
+        { "N6",  &dsp->core.registers[DSP_REG_N6],  32, BITMASK(16) },
+        { "N7",  &dsp->core.registers[DSP_REG_N7],  32, BITMASK(16) },
+
+        { "OMR", &dsp->core.registers[DSP_REG_OMR], 32, 0x5f },
+
+        /* 16-bit program counter */
+        { "PC",  (uint32_t*)(&dsp->core.pc),  24, BITMASK(24) },
+
+        /* 16-bit DSP R (address) registers */
+        { "R0",  &dsp->core.registers[DSP_REG_R0],  32, BITMASK(16) },
+        { "R1",  &dsp->core.registers[DSP_REG_R1],  32, BITMASK(16) },
+        { "R2",  &dsp->core.registers[DSP_REG_R2],  32, BITMASK(16) },
+        { "R3",  &dsp->core.registers[DSP_REG_R3],  32, BITMASK(16) },
+        { "R4",  &dsp->core.registers[DSP_REG_R4],  32, BITMASK(16) },
+        { "R5",  &dsp->core.registers[DSP_REG_R5],  32, BITMASK(16) },
+        { "R6",  &dsp->core.registers[DSP_REG_R6],  32, BITMASK(16) },
+        { "R7",  &dsp->core.registers[DSP_REG_R7],  32, BITMASK(16) },
+
+        { "SSH", &dsp->core.registers[DSP_REG_SSH], 32, BITMASK(16) },
+        { "SSL", &dsp->core.registers[DSP_REG_SSL], 32, BITMASK(16) },
+        { "SP",  &dsp->core.registers[DSP_REG_SP],  32, BITMASK(6) },
+
+        /* 16-bit status register */
+        { "SR",  &dsp->core.registers[DSP_REG_SR],  32, 0xefff },
+
+        /* 48-bit X register */
+        { "X0",  &dsp->core.registers[DSP_REG_X0],  32, BITMASK(24) },
+        { "X1",  &dsp->core.registers[DSP_REG_X1],  32, BITMASK(24) },
+
+        /* 48-bit Y register */
+        { "Y0",  &dsp->core.registers[DSP_REG_Y0],  32, BITMASK(24) },
+        { "Y1",  &dsp->core.registers[DSP_REG_Y1],  32, BITMASK(24) }
+    };
+    /* left, right, middle, direction */
+    int l, r, m, dir = 0;
+    unsigned int i, len;
+    char reg[MAX_REGNAME_LEN];
+
+    for (i = 0; i < sizeof(reg) && regname[i]; i++) {
+        reg[i] = toupper(regname[i]);
+    }
+    if (i < 2 || regname[i]) {
+        /* too short or longer than any of the names */
+        return 0;
+    }
+    len = i;
+    
+    /* bisect */
+    l = 0;
+    r = ARRAYSIZE(registers) - 1;
+    do {
+        m = (l+r) >> 1;
+        for (i = 0; i < len; i++) {
+            dir = (int)reg[i] - registers[m].name[i];
+            if (dir) {
+                break;
+            }
+        }
+        if (dir == 0) {
+            *addr = registers[m].addr;
+            *mask = registers[m].mask;
+            return registers[m].bits;
+        }
+        if (dir < 0) {
+            r = m-1;
+        } else {
+            l = m+1;
+        }
+    } while (l <= r);
+#undef MAX_REGNAME_LEN
+    return 0;
+}
+
+
+/**
+ * Set given DSP register value, return false if unknown register given
+ */
+bool dsp_disasm_set_register(DSPState* dsp, const char *arg, uint32_t value)
+{
+    uint32_t *addr, mask, sp_value;
+    int bits;
+
+    /* first check registers needing special handling... */
+    if (arg[0]=='S' || arg[0]=='s') {
+        if (arg[1]=='P' || arg[1]=='p') {
+            dsp->core.registers[DSP_REG_SP] = value & BITMASK(6);
+            value &= BITMASK(4); 
+            dsp->core.registers[DSP_REG_SSH] = dsp->core.stack[0][value];
+            dsp->core.registers[DSP_REG_SSL] = dsp->core.stack[1][value];
+            return true;
+        }
+        if (arg[1]=='S' || arg[1]=='s') {
+            sp_value = dsp->core.registers[DSP_REG_SP] & BITMASK(4);
+            if (arg[2]=='H' || arg[2]=='h') {
+                if (sp_value == 0) {
+                    dsp->core.registers[DSP_REG_SSH] = 0;
+                    dsp->core.stack[0][sp_value] = 0;
+                } else {
+                    dsp->core.registers[DSP_REG_SSH] = value & BITMASK(16);
+                    dsp->core.stack[0][sp_value] = value & BITMASK(16);
+                }
+                return true;
+            }
+            if (arg[2]=='L' || arg[2]=='l') {
+                if (sp_value == 0) {
+                    dsp->core.registers[DSP_REG_SSL] = 0;
+                    dsp->core.stack[1][sp_value] = 0;
+                } else {
+                    dsp->core.registers[DSP_REG_SSL] = value & BITMASK(16);
+                    dsp->core.stack[1][sp_value] = value & BITMASK(16);
+                }
+                return true;
+            }
+        }
+    }
+
+    /* ...then registers where address & mask are enough */
+    bits = dsp_get_register_address(dsp, arg, &addr, &mask);
+    switch (bits) {
+    case 32:
+        *addr = value & mask;
+        return true;
+    case 16:
+        *(uint16_t*)addr = value & mask;
+        return true;
+    }
+    return false;
+}
diff --git a/hw/xbox/dsp/dsp.h b/hw/xbox/dsp/dsp.h
new file mode 100644
index 0000000000..94100f882b
--- /dev/null
+++ b/hw/xbox/dsp/dsp.h
@@ -0,0 +1,59 @@
+/*
+ * MCPX DSP emulator
+
+ * Copyright (c) 2015 espes
+
+ * Adapted from Hatari DSP M56001 emulation
+ * (C) 2001-2008 ARAnyM developer team
+ * Adaption to Hatari (C) 2008 by Thomas Huth
+
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef DSP_H
+#define DSP_H
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdio.h>
+
+typedef struct DSPState DSPState;
+
+typedef void (*dsp_scratch_rw_func)(
+    void* opaque, uint8_t* ptr, uint32_t addr, size_t len, bool dir);
+
+/* Dsp commands */
+DSPState* dsp_init(void* scratch_rw_opaque, dsp_scratch_rw_func scratch_rw);
+void dsp_destroy(DSPState* dsp);
+void dsp_reset(DSPState* dsp);
+
+void dsp_step(DSPState* dsp);
+void dsp_run(DSPState* dsp, int cycles);
+
+void dsp_bootstrap(DSPState* dsp);
+void dsp_start_frame(DSPState* dsp);
+
+
+/* Dsp Debugger commands */
+uint32_t dsp_read_memory(DSPState* dsp, char space, uint32_t addr);
+uint32_t dsp_disasm_memory(DSPState* dsp, uint32_t dsp_memdump_addr, uint32_t dsp_memdump_upper, char space);
+uint32_t dsp_disasm_address(DSPState* dsp, FILE *out, uint32_t lowerAdr, uint32_t UpperAdr);
+void dsp_info(DSPState* dsp);
+void dsp_print_registers(DSPState* dsp);
+int dsp_get_register_address(DSPState* dsp, const char *arg, uint32_t **addr, uint32_t *mask);
+bool dsp_disasm_set_register(DSPState* dsp, const char *arg, uint32_t value);
+
+
+#endif /* DSP_H */
diff --git a/hw/xbox/dsp/dsp_cpu.c b/hw/xbox/dsp/dsp_cpu.c
new file mode 100644
index 0000000000..ff00e543d1
--- /dev/null
+++ b/hw/xbox/dsp/dsp_cpu.c
@@ -0,0 +1,1431 @@
+/*
+ * DSP56300 emulator
+ *
+ * Copyright (c) 2015 espes
+ *
+ * Adapted from Hatari DSP M56001 emulation
+ * (C) 2003-2008 ARAnyM developer team
+ * Adaption to Hatari (C) 2008 by Thomas Huth
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <stdbool.h>
+#include <string.h>
+#include <assert.h>
+
+#include "dsp_cpu.h"
+
+#define TRACE_DSP_DISASM 0
+#define TRACE_DSP_DISASM_REG 0
+#define TRACE_DSP_DISASM_MEM 0
+
+#define DPRINTF(s, ...) printf(s, ## __VA_ARGS__)
+
+#define BITMASK(x)  ((1<<(x))-1)
+#define ARRAYSIZE(x) (sizeof(x)/sizeof(x[0]))
+
+// #define DSP_COUNT_IPS     /* Count instruction per seconds */
+
+
+/**********************************
+ *  Defines
+ **********************************/
+
+#define SIGN_PLUS  0
+#define SIGN_MINUS 1
+
+/**********************************
+ *  Functions
+ **********************************/
+
+static void dsp_postexecute_update_pc(dsp_core_t* dsp);
+static void dsp_postexecute_interrupts(dsp_core_t* dsp);
+
+static uint32_t read_memory_p(dsp_core_t* dsp, uint32_t address);
+static uint32_t read_memory_disasm(dsp_core_t* dsp, int space, uint32_t address);
+
+static void write_memory_raw(dsp_core_t* dsp, int space, uint32_t address, uint32_t value);
+static void write_memory_disasm(dsp_core_t* dsp, int space, uint32_t address, uint32_t value);
+
+static void dsp_write_reg(dsp_core_t* dsp, uint32_t numreg, uint32_t value); 
+
+static void dsp_stack_push(dsp_core_t* dsp, uint32_t curpc, uint32_t cursr, uint16_t sshOnly);
+static void dsp_stack_pop(dsp_core_t* dsp, uint32_t *curpc, uint32_t *cursr);
+static void dsp_compute_ssh_ssl(dsp_core_t* dsp);
+
+/* 56bits arithmetic */
+static uint16_t dsp_abs56(uint32_t *dest);
+static uint16_t dsp_asl56(uint32_t *dest, int n);
+static uint16_t dsp_asr56(uint32_t *dest, int n);
+static uint16_t dsp_add56(uint32_t *source, uint32_t *dest);
+static uint16_t dsp_sub56(uint32_t *source, uint32_t *dest);
+static void dsp_mul56(uint32_t source1, uint32_t source2, uint32_t *dest, uint8_t signe);
+static void dsp_rnd56(dsp_core_t* dsp, uint32_t *dest);
+static uint32_t dsp_signextend(int bits, uint32_t v);
+
+static const dsp_interrupt_t dsp_interrupt[12] = {
+    {DSP_INTER_RESET    ,   0x00, 0, "Reset"},
+    {DSP_INTER_ILLEGAL  ,   0x3e, 0, "Illegal"},
+    {DSP_INTER_STACK_ERROR  ,   0x02, 0, "Stack Error"},
+    {DSP_INTER_TRACE    ,   0x04, 0, "Trace"},
+    {DSP_INTER_SWI      ,   0x06, 0, "Swi"},
+    {DSP_INTER_HOST_COMMAND ,   0xff, 1, "Host Command"},
+    {DSP_INTER_HOST_RCV_DATA,   0x20, 1, "Host receive"},
+    {DSP_INTER_HOST_TRX_DATA,   0x22, 1, "Host transmit"},
+    {DSP_INTER_SSI_RCV_DATA_E,  0x0e, 2, "SSI receive with exception"},
+    {DSP_INTER_SSI_RCV_DATA ,   0x0c, 2, "SSI receive"},
+    {DSP_INTER_SSI_TRX_DATA_E,  0x12, 2, "SSI transmit with exception"},
+    {DSP_INTER_SSI_TRX_DATA ,   0x10, 2, "SSI tramsmit"}
+};
+
+static const int registers_tcc[16][2] = {
+    {DSP_REG_B,DSP_REG_A},
+    {DSP_REG_A,DSP_REG_B},
+    {DSP_REG_NULL,DSP_REG_NULL},
+    {DSP_REG_NULL,DSP_REG_NULL},
+
+    {DSP_REG_NULL,DSP_REG_NULL},
+    {DSP_REG_NULL,DSP_REG_NULL},
+    {DSP_REG_NULL,DSP_REG_NULL},
+    {DSP_REG_NULL,DSP_REG_NULL},
+
+    {DSP_REG_X0,DSP_REG_A},
+    {DSP_REG_X0,DSP_REG_B},
+    {DSP_REG_Y0,DSP_REG_A},
+    {DSP_REG_Y0,DSP_REG_B},
+
+    {DSP_REG_X1,DSP_REG_A},
+    {DSP_REG_X1,DSP_REG_B},
+    {DSP_REG_Y1,DSP_REG_A},
+    {DSP_REG_Y1,DSP_REG_B}
+};
+
+static const int registers_mask[64] = {
+    0, 0, 0, 0,
+    24, 24, 24, 24,
+    24, 24, 8, 8,
+    24, 24, 24, 24,
+    
+    16, 16, 16, 16,
+    16, 16, 16, 16,
+    16, 16, 16, 16,
+    16, 16, 16, 16,
+    
+    16, 16, 16, 16,
+    16, 16, 16, 16,
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+
+    0, 0, 0, 0,
+    0, 0, 0, 0,
+    0, 16, 8, 6,
+    16, 16, 16, 16
+};
+
+#include "dsp_emu.inl"
+
+#include "dsp_dis.inl"
+
+typedef bool (*match_func_t)(uint32_t op);
+
+typedef struct OpcodeEntry {
+    const char* template;
+    const char* name;
+    dis_func_t dis_func;
+    emu_func_t emu_func;
+    match_func_t match_func;
+} OpcodeEntry;
+
+static bool match_MMMRRR(uint32_t op)
+{
+    uint32_t RRR = (op >> 8) & BITMASK(3);
+    uint32_t MMM = (op >> 11) & BITMASK(3);
+    if (MMM == 0x6) {
+        return RRR == 0x0 || RRR == 0x4;
+    }
+    return true;
+}
+
+static const OpcodeEntry nonparallel_opcodes[] = {
+    { "0000000101iiiiii1000d000", "add #xx, D", dis_add_imm, emu_add_imm },
+    { "00000001010000001100d000", "add #xxxx, D", dis_add_long, emu_add_long },
+    { "0000000101iiiiii1000d110", "and #xx, D", dis_and_imm, emu_and_imm },
+    { "00000001010000001100d110", "and #xxxx, D", dis_and_long, emu_and_long },
+    { "00000000iiiiiiii101110EE", "andi #xx, D", dis_andi, emu_andi },
+    { "0000110000011101SiiiiiiD", "asl #ii, S2, D", dis_asl_imm, emu_asl_imm },
+    { "0000110000011110010SsssD", "asl S1, S2, D", NULL, NULL },
+    { "0000110000011100SiiiiiiD", "asr #ii, S2, D", dis_asr_imm, emu_asr_imm },
+    { "0000110000011110011SsssD", "asr S1, S2, D", NULL, NULL },
+    { "00001101000100000100CCCC", "bcc xxxx", dis_bcc_long, emu_bcc_long }, //??
+    { "00000101CCCC01aaaa0aaaaa", "bcc xxx", dis_bcc_imm, emu_bcc_imm },
+    { "0000110100011RRR0100CCCC", "bcc Rn", NULL, NULL },
+    { "0000101101MMMRRR0S00bbbb", "bchg #n, [X or Y]:ea", dis_bchg_ea, emu_bchg_ea, match_MMMRRR },
+    { "0000101100aaaaaa0S00bbbb", "bchg #n, [X or Y]:aa", dis_bchg_aa, emu_bchg_aa },
+    { "0000101110pppppp0S00bbbb", "bchg #n, [X or Y]:pp", dis_bchg_pp, emu_bchg_pp },
+    { "0000000101qqqqqq0S0bbbbb", "bchg #n, [X or Y]:qq", NULL, NULL },
+    { "0000101111DDDDDD010bbbbb", "bchg, #n, D", dis_bchg_reg, emu_bchg_reg },
+    { "0000101001MMMRRR0S00bbbb", "bclr #n, [X or Y]:ea", dis_bclr_ea, emu_bclr_ea, match_MMMRRR },
+    { "0000101000aaaaaa0S00bbbb", "bclr #n, [X or Y]:aa", dis_bclr_aa, emu_bclr_aa },
+    { "0000101010pppppp0S00bbbb", "bclr #n, [X or Y]:pp", dis_bclr_pp, emu_bclr_pp },
+    { "0000000100qqqqqq0S00bbbb", "bclr #n, [X or Y]:qq", NULL, NULL },
+    { "0000101011DDDDDD010bbbbb", "bclr #n, D", dis_bclr_reg, emu_bclr_reg },
+    { "000011010001000011000000", "bra xxxx", dis_bra_long, emu_bra_long },
+    { "00000101000011aaaa0aaaaa", "bra xxx", dis_bra_imm, emu_bra_imm },
+    { "0000110100011RRR11000000", "bra Rn", NULL, NULL },
+    { "0000110010MMMRRR0S0bbbbb", "brclr #n, [X or Y]:ea, xxxx", NULL, NULL, match_MMMRRR },
+    { "0000110010aaaaaa1S0bbbbb", "brclr #n, [X or Y]:aa, xxxx", NULL, NULL },
+    { "0000110011pppppp0S0bbbbb", "brclr #n, [X or Y]:pp, xxxx", dis_brclr_pp, emu_brclr_pp },
+    { "0000010010qqqqqq0S0bbbbb", "brclr #n, [X or Y]:qq, xxxx", NULL, NULL },
+    { "0000110011DDDDDD100bbbbb", "brclr #n, S, xxxx", dis_brclr_reg, emu_brclr_reg },
+    { "00000000000000100001CCCC", "brkcc", NULL, NULL },
+    { "0000110010MMMRRR0S1bbbbb", "brset #n, [X or Y]:ea, xxxx", NULL, NULL, match_MMMRRR },
+    { "0000110010aaaaaa1S1bbbbb", "brset #n, [X or Y]:aa, xxxx", NULL, NULL },
+    { "0000110011pppppp0S1bbbbb", "brset #n, [X or Y]:pp, xxxx", dis_brset_pp, emu_brset_pp },
+    { "0000010010qqqqqq0S1bbbbb", "brset #n, [X or Y]:qq, xxxx", NULL, NULL },
+    { "0000110011DDDDDD101bbbbb", "brset #n, S, xxxx", dis_brset_reg, emu_brset_reg },
+    { "00001101000100000000CCCC", "bscc xxxx", NULL, NULL },
+    { "00000101CCCC00aaaa0aaaaa", "bscc xxx", NULL, NULL },
+    { "0000110100011RRR0000CCCC", "bscc Rn", NULL, NULL },
+    { "0000110110MMMRRR0S0bbbbb", "bsclr #n, [X or Y]:ea, xxxx", NULL, NULL, match_MMMRRR },
+    { "0000110110aaaaaa1S0bbbbb", "bsclr #n, [X or Y]:aa, xxxx", NULL, NULL },
+    { "0000010010qqqqqq1S0bbbbb", "bsclr #n, [X or Y]:qq, xxxx", NULL, NULL },
+    { "0000110111pppppp0S0bbbbb", "bsclr #n, [X or Y]:pp, xxxx", NULL, NULL },
+    { "0000110111DDDDDD100bbbbb", "bsclr, #n, S, xxxx", NULL, NULL },
+    { "0000101001MMMRRR0S1bbbbb", "bset #n, [X or Y]:ea", dis_bset_ea, emu_bset_ea, match_MMMRRR },
+    { "0000101000aaaaaa0S1bbbbb", "bset #n, [X or Y]:aa", dis_bset_aa, emu_bset_aa },
+    { "0000101010pppppp0S1bbbbb", "bset #n, [X or Y]:pp", dis_bset_pp, emu_bset_pp },
+    { "0000000100qqqqqq0S1bbbbb", "bset #n, [X or Y]:qq", NULL, NULL },
+    { "0000101011DDDDDD011bbbbb", "bset, #n, D", dis_bset_reg, emu_bset_reg },
+    { "000011010001000010000000", "bsr xxxx", dis_bsr_long, emu_bsr_long },
+    { "00000101000010aaaa0aaaaa", "bsr xxx", dis_bsr_imm, emu_bsr_imm },
+    { "0000110100011RRR10000000", "bsr Rn", NULL, NULL },
+    { "0000110110MMMRRR0S1bbbbb", "bsset #n, [X or Y]:ea, xxxx", NULL, NULL, match_MMMRRR },
+    { "0000110110aaaaaa1S1bbbbb", "bsset #n, [X or Y]:aa, xxxx", NULL, NULL },
+    { "0000110111pppppp0S1bbbbb", "bsset #n, [X or Y]:pp, xxxx", NULL, NULL },
+    { "0000010010qqqqqq1S1bbbbb", "bsset #n, [X or Y]:qq, xxxx", NULL, NULL },
+    { "0000110111DDDDDD101bbbbb", "bsset #n, S, xxxx", NULL, NULL },
+    { "0000101101MMMRRR0S10bbbb", "btst #n, [X or Y]:ea", dis_btst_ea, emu_btst_ea, match_MMMRRR },
+    { "0000101100aaaaaa0S10bbbb", "btst #n, [X or Y]:aa", dis_btst_aa, emu_btst_aa },
+    { "0000101110pppppp0S10bbbb", "btst #n, [X or Y]:pp", dis_btst_pp, emu_btst_pp },
+    { "0000000101qqqqqq0S10bbbb", "btst #n, [X or Y]:qq", NULL, NULL },
+    { "0000101111DDDDDD0110bbbb", "btst #n, D", dis_btst_reg, emu_btst_reg },
+    { "0000110000011110000000SD", "clb S, D", NULL, NULL },
+    { "0000000101iiiiii1000d101", "cmp #xx, S2", dis_cmp_imm, emu_cmp_imm },
+    { "00000001010000001100d101", "cmp #xxxx, S2", dis_cmp_long, emu_cmp_long },
+    { "00001100000111111111gggd", "cmpu S1, S2", dis_cmpu, emu_cmpu },
+    { "000000000000001000000000", "debug", NULL, NULL },
+    { "00000000000000110000CCCC", "debugcc", NULL, NULL },
+    { "00000000000000000000101d", "dec D", NULL, NULL, /*dis_dec, emu_dec*/ },
+    { "000000011000000001JJd000", "div S, D", dis_div, emu_div },
+    { "000000010010010s1sdkQQQQ", "dmac S1, S2, D", NULL, NULL },
+    { "0000011001MMMRRR0S000000", "do [X or Y]:ea, expr", dis_do_ea, emu_do_ea, match_MMMRRR },
+    { "0000011000aaaaaa0S000000", "do [X or Y]:aa, expr", dis_do_aa, emu_do_aa },
+    { "00000110iiiiiiii1000hhhh", "do #xxx, expr", dis_do_imm, emu_do_imm },
+    { "0000011011DDDDDD00000000", "do S, expr", dis_do_reg, emu_do_reg },
+    { "000000000000001000000011", "do_f", NULL, NULL },
+    { "0000011001MMMRRR0S010000", "dor [X or Y]:ea, label", NULL, NULL, match_MMMRRR },
+    { "0000011000aaaaaa0S010000", "dor [X or Y]:aa, label", NULL, NULL },
+    { "00000110iiiiiiii1001hhhh", "dor #xxx, label", dis_dor_imm, emu_dor_imm },
+    { "0000011011DDDDDD00010000", "dor S, label", dis_dor_reg, emu_dor_reg },
+    { "000000000000001000000010", "dor_f", NULL, NULL },
+    { "000000000000000010001100", "enddo", NULL, emu_enddo },
+    { "0000000101iiiiii1000d011", "eor #xx, D", NULL, NULL },
+    { "00000001010000001100d011", "eor #xxxx, D", NULL, NULL },
+    { "0000110000011010000sSSSD", "extract S1, S2, D", NULL, NULL },
+    { "0000110000011000000s000D", "extract #CO, S2, D", NULL, NULL },
+    { "0000110000011010100sSSSD", "extractu S1, S2, D", NULL, NULL },
+    { "0000110000011000100s000D", "extractu #CO, S2, D", NULL, NULL },
+    { "000000000000000000000101", "ill", NULL, emu_illegal },
+    { "00000000000000000000100d", "inc D", NULL, NULL },
+    { "00001100000110110qqqSSSD", "insert S1, S2, D", NULL, NULL },
+    { "00001100000110010qqq000D", "insert #CO, S2, D", NULL, NULL },
+    { "00001110CCCCaaaaaaaaaaaa", "jcc xxx", dis_jcc_imm, emu_jcc_imm },
+    { "0000101011MMMRRR1010CCCC", "jcc ea", dis_jcc_ea, emu_jcc_ea, match_MMMRRR },
+    { "0000101001MMMRRR1S00bbbb", "jclr #n, [X or Y]:ea, xxxx", dis_jclr_ea, emu_jclr_ea, match_MMMRRR },
+    { "0000101000aaaaaa1S00bbbb", "jclr #n, [X or Y]:aa, xxxx", dis_jclr_aa, emu_jclr_aa },
+    { "0000101010pppppp1S00bbbb", "jclr #n, [X or Y]:pp, xxxx", dis_jclr_pp, emu_jclr_pp },
+    { "0000000110qqqqqq1S00bbbb", "jclr #n, [X or Y]:qq, xxxx", NULL, NULL },
+    { "0000101011DDDDDD0000bbbb", "jclr #n, S, xxxx", dis_jclr_reg, emu_jclr_reg },
+    { "0000101011MMMRRR10000000", "jmp ea", dis_jmp_ea, emu_jmp_ea, match_MMMRRR },
+    { "000011000000aaaaaaaaaaaa", "jmp xxx", dis_jmp_imm, emu_jmp_imm },
+    { "00001111CCCCaaaaaaaaaaaa", "jscc xxx", dis_jscc_imm, emu_jscc_imm },
+    { "0000101111MMMRRR1010CCCC", "jscc ea", dis_jscc_ea, emu_jscc_ea, match_MMMRRR },
+    { "0000101101MMMRRR1S00bbbb", "jsclr #n, [X or Y]:ea, xxxx", dis_jsclr_ea, emu_jsclr_ea, match_MMMRRR },
+    { "0000101100MMMRRR1S00bbbb", "jsclr #n, [X or Y]:aa, xxxx", dis_jsclr_aa, emu_jsclr_aa, match_MMMRRR },
+    { "0000101110pppppp1S0bbbbb", "jsclr #n, [X or Y]:pp, xxxx", dis_jsclr_pp, emu_jsclr_pp },
+    { "0000000111qqqqqq1S0bbbbb", "jsclr #n, [X or Y]:qq, xxxx", NULL, NULL },
+    { "0000101111DDDDDD000bbbbb", "jsclr #n, S, xxxx", dis_jsclr_reg, emu_jsclr_reg },
+    { "0000101001MMMRRR1S10bbbb", "jset #n, [X or Y]:ea, xxxx", dis_jset_ea, emu_jset_ea, match_MMMRRR },
+    { "0000101000MMMRRR1S10bbbb", "jset #n, [X or Y]:aa, xxxx", dis_jset_aa, emu_jset_aa, match_MMMRRR },
+    { "0000101010pppppp1S10bbbb", "jset #n, [X or Y]:pp, xxxx", dis_jset_pp, emu_jset_pp },
+    { "0000000110qqqqqq1S10bbbb", "jset #n, [X or Y]:qq, xxxx", NULL, NULL },
+    { "0000101011DDDDDD0010bbbb", "jset #n, S, xxxx", dis_jset_reg, emu_jset_reg },
+    { "0000101111MMMRRR10000000", "jsr ea", dis_jsr_ea, emu_jsr_ea, match_MMMRRR },
+    { "000011010000aaaaaaaaaaaa", "jsr xxx", dis_jsr_imm, emu_jsr_imm },
+    { "0000101101MMMRRR1S10bbbb", "jsset #n, [X or Y]:ea, xxxx", dis_jsset_ea, emu_jsset_ea, match_MMMRRR },
+    { "0000101100aaaaaa1S10bbbb", "jsset #n, [X or Y]:aa, xxxx", dis_jsset_aa, emu_jsset_aa },
+    { "0000101110pppppp1S1bbbbb", "jsset #n, [X or Y]:pp, xxxx", dis_jsset_pp, emu_jsset_pp },
+    { "0000000111qqqqqq1S1bbbbb", "jsset #n, [X or Y]:qq, xxxx", NULL, NULL },
+    { "0000101111DDDDDD001bbbbb", "jsset #n, S, xxxx", dis_jsset_reg, emu_jsset_reg },
+    { "0000010011000RRR000ddddd", "lra Rn, D", NULL, NULL },
+    { "0000010001000000010ddddd", "lra xxxx, D", NULL, NULL },
+    { "000011000001111010iiiiiD", "lsl #ii, D", NULL, NULL },
+    { "00001100000111100001sssD", "lsl S, D", NULL, NULL },
+    { "000011000001111011iiiiiD", "lsr #ii, D", NULL, NULL },
+    { "00001100000111100011sssD", "lsr S, D", NULL, NULL },
+    { "00000100010MMRRR000ddddd", "lua ea, D", dis_lua, emu_lua },
+    { "0000010000aaaRRRaaaadddd", "lua (Rn + aa), D", dis_lua_rel, emu_lua_rel },
+    { "00000001000sssss11QQdk10", "mac S, #n, D", NULL, NULL },
+    { "000000010100000111qqdk10", "maci #xxxx, S, D", NULL, NULL },
+    { "00000001001001101sdkQQQQ", "mac_s_u S1, S2, D", NULL, NULL },
+    { "00000001000sssss11QQdk11", "macr S1, S2, D", NULL, NULL },
+    { "000000010100000111qqdk11", "macri #xxxx, S, D", NULL, NULL },
+    { "00001100000110111000sssD", "merge S, D", NULL, NULL },
+    { "0000101001110RRR1WDDDDDD", "move X:(Rn + xxxx) <-> R", dis_move_x_long, emu_move_x_long },
+    { "0000101101110RRR1WDDDDDD", "move Y:(Rn + xxxx) <-> R", NULL, NULL },
+    { "0000001aaaaaaRRR1a0WDDDD", "move X:(Rn + xxx) <-> R", dis_move_x_imm, emu_move_x_imm },
+    { "0000001aaaaaaRRR1a1WDDDD", "move Y:(Rn + xxx) <-> R", dis_move_y_imm, emu_move_y_imm },
+    { "00000101W1MMMRRR0s1ddddd", "movec [X or Y]:ea <-> R", dis_movec_ea, emu_movec_ea, match_MMMRRR },
+    { "00000101W0aaaaaa0s1ddddd", "movec [X or Y]:aa <-> R", dis_movec_aa, emu_movec_aa, match_MMMRRR },
+    { "00000100W1eeeeee101ddddd", "movec R1, R2", dis_movec_reg, emu_movec_reg },
+    { "00000101iiiiiiii101ddddd", "movec #xx, D1", dis_movec_imm, emu_movec_imm },
+    { "00000111W1MMMRRR10dddddd", "movem P:ea <-> R", dis_movem_ea, emu_movem_ea, match_MMMRRR },
+    { "00000111W0aaaaaa00dddddd", "movem P:ea <-> R", dis_movem_aa, emu_movem_aa, match_MMMRRR },
+    { "0000100sW1MMMRRR1Spppppp", "movep [X or Y]:ea <-> [X or Y]:pp", dis_movep_23, emu_movep_23, match_MMMRRR },
+    { "00000111W1MMMRRR0Sqqqqqq", "movep [X or Y]:ea <-> X:qq", dis_movep_x_qq, emu_movep_x_qq, match_MMMRRR },
+    { "00000111W0MMMRRR1Sqqqqqq", "movep [X or Y]:ea <-> Y:qq", NULL, NULL, match_MMMRRR },
+    { "0000100sW1MMMRRR01pppppp", "movep [X or Y]:pp <-> P:ea", dis_movep_1, emu_movep_1, match_MMMRRR },
+    { "000000001WMMMRRR0sqqqqqq", "movep [X or Y]:qq <-> P:ea", NULL, NULL, match_MMMRRR },
+    { "0000100sW1dddddd00pppppp", "movep [X or Y]:pp <-> R", dis_movep_0, emu_movep_0 },
+    { "00000100W1dddddd1q0qqqqq", "movep X:qq <-> R", NULL, NULL },
+    { "00000100W1dddddd0q1qqqqq", "movep Y:qq <-> R", NULL, NULL },
+    { "00000001000sssss11QQdk00", "mpy S, #n, D", NULL, NULL },
+    { "00000001001001111sdkQQQQ", "mpy_s_u S1, S2, D", NULL, NULL },
+    { "000000010100000111qqdk00", "mpyi #xxxx, S, D", dis_mpyi, emu_mpyi },
+    { "00000001000sssss11QQdk01", "mpyr S, #n, D", NULL, NULL },
+    { "000000010100000111qqdk01", "mpyri #xxxx, S, D", NULL, NULL },
+    { "000000000000000000000000", "nop", NULL, emu_nop},
+    { "0000000111011RRR0001d101", "norm Rn, D", dis_norm, emu_norm },
+    { "00001100000111100010sssD", "normf S, D", NULL, NULL },
+    { "0000000101iiiiii1000d010", "or #xx, D", NULL, NULL },
+    { "00000001010000001100d010", "or #xxxx, D", dis_or_long, emu_or_long },
+    { "00000000iiiiiiii111110EE", "ori #xx, D", dis_ori, emu_ori },
+    { "000000000000000000000011", "pflush", NULL, NULL },
+    { "000000000000000000000001", "pflushun", NULL, NULL },
+    { "000000000000000000000010", "pfree", NULL, NULL },
+    { "0000101111MMMRRR10000001", "plock ea", NULL, NULL, match_MMMRRR },
+    { "000000000000000000001111", "plockr xxxx", NULL, NULL },
+    { "0000101011MMMRRR10000001", "punlock ea", NULL, NULL, match_MMMRRR },
+    { "000000000000000000001110", "punlockr xxxx", NULL, NULL },
+    { "0000011001MMMRRR0S100000", "rep [X or Y]:ea", dis_rep_ea, emu_rep_ea, match_MMMRRR },
+    { "0000011000aaaaaa0S100000", "rep [X or Y]:aa", dis_rep_aa, emu_rep_aa },
+    { "00000110iiiiiiii1010hhhh", "rep #xxx", dis_rep_imm, emu_rep_imm },
+    { "0000011011dddddd00100000", "rep S", dis_rep_reg, emu_rep_reg },
+    { "000000000000000010000100", "reset", NULL, emu_reset },
+    { "000000000000000000000100", "rti", NULL, emu_rti },
+    { "000000000000000000001100", "rts", NULL, emu_rts },
+    { "000000000000000010000111", "stop", NULL, emu_stop },
+    { "0000000101iiiiii1000d100", "sub #xx, D", dis_sub_imm, emu_sub_imm },
+    { "00000001010000001100d100", "sub #xxxx, D", dis_sub_long, emu_sub_long },
+    { "00000010CCCC00000JJJd000", "tcc S1, D1", dis_tcc, emu_tcc },
+    { "00000011CCCC0ttt0JJJdTTT", "tcc S1,D2 S2,D2", dis_tcc, emu_tcc },
+    { "00000010CCCC1ttt00000TTT", "tcc S2, D2", dis_tcc, emu_tcc },
+    { "000000000000000000000110", "trap", NULL, NULL },
+    { "00000000000000000001CCCC", "trapcc", NULL, NULL },
+    { "0000101S11MMMRRR110i0000", "vsl", NULL, NULL, match_MMMRRR },
+    { "000000000000000010000110", "wait", NULL, emu_wait },
+};
+
+static bool matches_initialised;
+static uint32_t nonparallel_matches[ARRAYSIZE(nonparallel_opcodes)][2];
+
+/**********************************
+ *  Emulator kernel
+ **********************************/
+
+void dsp56k_reset_cpu(dsp_core_t* dsp)
+{
+    int i;
+    if (!matches_initialised) {
+        matches_initialised = true;
+        for (i=0; i<ARRAYSIZE(nonparallel_opcodes); i++) {
+            const OpcodeEntry t = nonparallel_opcodes[i];
+            assert(strlen(t.template) == 24);
+
+            uint32_t mask = 0;
+            uint32_t match = 0;
+            int j;
+            for (j=0; j<24; j++) {
+                if (t.template[j] == '0' || t.template[j] == '1') {
+                    mask |= 1 << (24-j-1);
+                    match |= (t.template[j] - '0') << (24-j-1);
+                }
+            }
+
+            nonparallel_matches[i][0] = mask;
+            nonparallel_matches[i][1] = match;
+        }
+    }
+
+    /* Memory */
+    memset(dsp->periph, 0, sizeof(dsp->periph));
+    memset(dsp->stack, 0, sizeof(dsp->stack));
+    memset(dsp->registers, 0, sizeof(dsp->registers));
+    
+    /* Registers */
+    dsp->pc = 0x0000;
+    dsp->registers[DSP_REG_OMR]=0x02;
+    for (i=0;i<8;i++) {
+        dsp->registers[DSP_REG_M0+i]=0x00ffff;
+    }
+
+    /* Interruptions */
+    memset(dsp->interrupt_is_pending, 0, sizeof(dsp->interrupt_is_pending));
+    dsp->interrupt_state = DSP_INTERRUPT_NONE;
+    dsp->interrupt_instr_fetch = -1;
+    dsp->interrupt_save_pc = -1;
+    dsp->interrupt_counter = 0;
+    dsp->interrupt_pipeline_count = 0;
+    for (i=0;i<5;i++) {
+        dsp->interrupt_ipl[i] = 3;
+    }
+    for (i=5;i<12;i++) {
+        dsp->interrupt_ipl[i] = -1;
+    }
+
+    /* Misc */
+    dsp->loop_rep = 0;
+
+
+    /* runtime shit */
+
+    dsp->executing_for_disasm = false;
+    // start_time = SDL_GetTicks();
+    dsp->num_inst = 0;
+
+    dsp->exception_debugging = true;
+    dsp->disasm_prev_inst_pc = 0xFFFFFFFF;
+}
+
+static OpcodeEntry lookup_opcode(uint32_t op) {
+    OpcodeEntry r = {0};
+    int i;
+    for (i=0; i<ARRAYSIZE(nonparallel_opcodes); i++) {
+        if ((op & nonparallel_matches[i][0]) == nonparallel_matches[i][1]) {
+            if (nonparallel_opcodes[i].match_func 
+                && !nonparallel_opcodes[i].match_func(op)) continue;
+            if (r.template != NULL) {
+                printf("qqq %x %s\n", op, r.template);
+            }
+            assert(r.template == NULL);
+            r = nonparallel_opcodes[i];
+        }
+    }
+    return r;
+}
+
+static uint16_t disasm_instruction(dsp_core_t* dsp, dsp_trace_disasm_t mode)
+{
+    dsp->disasm_mode = mode;
+    if (mode == DSP_TRACE_MODE) {
+        if (dsp->disasm_prev_inst_pc == dsp->pc) {
+            if (!dsp->disasm_is_looping) {
+                printf( "Looping on DSP instruction at PC = $%04x\n", dsp->disasm_prev_inst_pc);
+                dsp->disasm_is_looping = true;
+            }
+            return 0;
+        }
+    }
+
+    dsp->disasm_prev_inst_pc = dsp->pc;
+    dsp->disasm_is_looping = false;
+
+    dsp->disasm_cur_inst = dsp56k_read_memory(dsp, DSP_SPACE_P, dsp->pc);
+    dsp->disasm_cur_inst_len = 1;
+
+    dsp->disasm_parallelmove_name[0] = 0;
+
+    if (dsp->disasm_cur_inst < 0x100000) {
+        const OpcodeEntry op = lookup_opcode(dsp->disasm_cur_inst);
+        if (op.template) {
+            if (op.dis_func) {
+                op.dis_func(dsp);
+            } else {
+                sprintf(dsp->disasm_str_instr, "%s", op.name);
+            }
+        } else {
+            dis_undefined(dsp);
+        }
+    } else {
+        dis_pm(dsp);
+        sprintf(dsp->disasm_str_instr, "%s %s",
+            disasm_opcodes_alu[dsp->disasm_cur_inst & BITMASK(8)], dsp->disasm_parallelmove_name);
+    }
+    return dsp->disasm_cur_inst_len;
+}
+
+static void disasm_reg_save(dsp_core_t* dsp)
+{
+    memcpy(dsp->disasm_registers_save, dsp->registers , sizeof(dsp->disasm_registers_save));
+#ifdef DSP_DISASM_REG_PC
+    dsp->pc_save = dsp->pc;
+#endif
+}
+
+static void disasm_reg_compare(dsp_core_t* dsp)
+{
+    int i;
+    bool bRegA = false;
+    bool bRegB = false;
+    
+    for (i=4; i<64; i++) {
+        if (dsp->disasm_registers_save[i] == dsp->registers[i]) {
+            continue;
+        }
+
+        switch(i) {
+            case DSP_REG_X0:
+            case DSP_REG_X1:
+            case DSP_REG_Y0:
+            case DSP_REG_Y1:
+                printf("\tReg: %s  $%06x -> $%06x\n",
+                    registers_name[i], dsp->disasm_registers_save[i], dsp->registers[i]);
+                break;
+            case DSP_REG_R0:
+            case DSP_REG_R1:
+            case DSP_REG_R2:
+            case DSP_REG_R3:
+            case DSP_REG_R4:
+            case DSP_REG_R5:
+            case DSP_REG_R6:
+            case DSP_REG_R7:
+            case DSP_REG_M0:
+            case DSP_REG_M1:
+            case DSP_REG_M2:
+            case DSP_REG_M3:
+            case DSP_REG_M4:
+            case DSP_REG_M5:
+            case DSP_REG_M6:
+            case DSP_REG_M7:
+            case DSP_REG_N0:
+            case DSP_REG_N1:
+            case DSP_REG_N2:
+            case DSP_REG_N3:
+            case DSP_REG_N4:
+            case DSP_REG_N5:
+            case DSP_REG_N6:
+            case DSP_REG_N7:
+            case DSP_REG_SR:
+            case DSP_REG_LA:
+            case DSP_REG_LC:
+                printf("\tReg: %s  $%04x -> $%04x\n",
+                    registers_name[i], dsp->disasm_registers_save[i], dsp->registers[i]);
+                break;
+            case DSP_REG_OMR:
+            case DSP_REG_SP:
+            case DSP_REG_SSH:
+            case DSP_REG_SSL:
+                printf("\tReg: %s  $%02x -> $%02x\n",
+                    registers_name[i], dsp->disasm_registers_save[i], dsp->registers[i]);
+                break;
+            case DSP_REG_A0:
+            case DSP_REG_A1:
+            case DSP_REG_A2:
+                if (bRegA == false) {
+                    printf("\tReg: a   $%02x:%06x:%06x -> $%02x:%06x:%06x\n",
+                        dsp->disasm_registers_save[DSP_REG_A2], dsp->disasm_registers_save[DSP_REG_A1], dsp->disasm_registers_save[DSP_REG_A0],
+                        dsp->registers[DSP_REG_A2], dsp->registers[DSP_REG_A1], dsp->registers[DSP_REG_A0]
+                    );
+                    bRegA = true;
+                }
+                break;
+            case DSP_REG_B0:
+            case DSP_REG_B1:
+            case DSP_REG_B2:
+                if (bRegB == false) {
+                    printf("\tReg: b   $%02x:%06x:%06x -> $%02x:%06x:%06x\n",
+                        dsp->disasm_registers_save[DSP_REG_B2], dsp->disasm_registers_save[DSP_REG_B1], dsp->disasm_registers_save[DSP_REG_B0],
+                        dsp->registers[DSP_REG_B2], dsp->registers[DSP_REG_B1], dsp->registers[DSP_REG_B0]
+                    );
+                    bRegB = true;
+                }
+                break;
+        }
+    }
+
+#ifdef DSP_DISASM_REG_PC
+    if (pc_save != dsp->pc) {
+        printf("\tReg: pc  $%04x -> $%04x\n", pc_save, dsp->pc);
+    }
+#endif
+}
+
+static const char* disasm_get_instruction_text(dsp_core_t* dsp)
+{
+    const int len = sizeof(dsp->disasm_str_instr);
+    // uint64_t count, cycles;
+    // uint16_t cycle_diff;
+    // float percentage;
+    int offset;
+
+    if (dsp->disasm_is_looping) {
+        dsp->disasm_str_instr2[0] = 0;
+    }
+    if (dsp->disasm_cur_inst_len == 1) {
+        offset = sprintf(dsp->disasm_str_instr2, "p:%04x  %06x         (%02d cyc)  %-*s\n", dsp->disasm_prev_inst_pc, dsp->disasm_cur_inst, dsp->instr_cycle, len, dsp->disasm_str_instr);
+    } else {
+        offset = sprintf(dsp->disasm_str_instr2, "p:%04x  %06x %06x  (%02d cyc)  %-*s\n", dsp->disasm_prev_inst_pc, dsp->disasm_cur_inst, read_memory_p(dsp, dsp->disasm_prev_inst_pc + 1), dsp->instr_cycle, len, dsp->disasm_str_instr);
+    }
+    // if (offset > 2 && Profile_DspAddressData(dsp->disasm_prev_inst_pc, &percentage, &count, &cycles, &cycle_diff)) {
+    //     offset -= 2;
+    //     sprintf(str_instr2+offset, "%5.2f%% (%"PRId64", %"PRId64", %d)\n",
+    //             percentage, count, cycles, cycle_diff);
+    // }
+    return dsp->disasm_str_instr2;
+}
+
+/**
+ * Execute one instruction in trace mode at a given PC address.
+ * */
+uint16_t dsp56k_execute_one_disasm_instruction(dsp_core_t* dsp, FILE *out, uint32_t pc)
+{
+    dsp_core_t dsp_core_save;
+
+    /* Set DSP in disasm mode */
+    dsp->executing_for_disasm = true;
+
+    /* Save DSP context before executing instruction */
+    memcpy(&dsp_core_save, dsp, sizeof(dsp_core_t));
+
+    /* execute and disasm instruction */
+    dsp->pc = pc;
+
+    /* Disasm instruction */
+    uint16_t instruction_length = disasm_instruction(dsp, DSP_DISASM_MODE) - 1;
+
+    /* Execute instruction at address given in parameter to get the number of cycles it takes */
+    dsp56k_execute_instruction(dsp);
+
+    fprintf(out, "%s", disasm_get_instruction_text(dsp));
+
+    /* Restore DSP context after executing instruction */
+    memcpy(dsp, &dsp_core_save, sizeof(dsp_core_t));
+    
+    /* Unset DSP in disasm mode */
+    dsp->executing_for_disasm = false;
+
+    return instruction_length;
+}
+
+void dsp56k_execute_instruction(dsp_core_t* dsp)
+{
+    uint32_t disasm_return = 0;
+    dsp->disasm_memory_ptr = 0;
+
+    /* Decode and execute current instruction */
+    dsp->cur_inst = read_memory_p(dsp, dsp->pc);
+    
+    /* Initialize instruction size and cycle counter */
+    dsp->cur_inst_len = 1;
+    dsp->instr_cycle = 2;
+
+    /* Disasm current instruction ? (trace mode only) */
+    if (TRACE_DSP_DISASM) {    
+        /* Call disasm_instruction only when DSP is called in trace mode */
+        if (!dsp->executing_for_disasm) {
+            disasm_return = disasm_instruction(dsp, DSP_TRACE_MODE);
+            
+            if (disasm_return) {
+                printf( "%s", disasm_get_instruction_text(dsp));
+            }
+            if (disasm_return != 0 && TRACE_DSP_DISASM_REG) {
+                /* DSP regs trace enabled only if DSP DISASM is enabled */
+                disasm_reg_save(dsp);
+            }
+        }
+    }
+            
+    if (dsp->cur_inst < 0x100000) {
+        const OpcodeEntry op = lookup_opcode(dsp->cur_inst);
+        if (op.emu_func) {
+            op.emu_func(dsp);
+        } else {
+            printf("%x - %s\n", dsp->cur_inst, op.name);
+            emu_undefined(dsp);
+        }
+    } else {
+        /* Do parallel move read */
+        opcodes_parmove[(dsp->cur_inst>>20) & BITMASK(4)](dsp);
+    }
+
+    /* Disasm current instruction ? (trace mode only) */
+    if (TRACE_DSP_DISASM) {
+        /* Display only when DSP is called in trace mode */
+        if (!dsp->executing_for_disasm) {
+            if (disasm_return != 0) {
+                // printf( "%s", disasm_get_instruction_text(dsp));
+
+                /* DSP regs trace enabled only if DSP DISASM is enabled */
+                if (TRACE_DSP_DISASM_REG)
+                    disasm_reg_compare(dsp);
+
+                if (TRACE_DSP_DISASM_MEM) {
+                    /* 1 memory change to display ? */
+                    if (dsp->disasm_memory_ptr == 1)
+                        printf( "\t%s\n", dsp->str_disasm_memory[0]);
+                    /* 2 memory changes to display ? */
+                    else if (dsp->disasm_memory_ptr == 2) {
+                        printf( "\t%s\n", dsp->str_disasm_memory[0]);
+                        printf( "\t%s\n", dsp->str_disasm_memory[1]);
+                    }
+                }
+            }
+        }
+    }
+
+    /* Process the PC */
+    dsp_postexecute_update_pc(dsp);
+
+    /* Process Interrupts */
+    dsp_postexecute_interrupts(dsp);
+
+#ifdef DSP_COUNT_IPS
+    ++dsp->num_inst;
+    if ((dsp->num_inst & 63) == 0) {
+        /* Evaluate time after <N> instructions have been executed to avoid asking too frequently */
+        uint32_t cur_time = SDL_GetTicks();
+        if (cur_time-start_time>1000) {
+            printf( "Dsp: %d i/s\n", (dsp->num_inst*1000)/(cur_time-start_time));
+            start_time=cur_time;
+            dsp->num_inst=0;
+        }
+    }
+#endif
+}
+
+/**********************************
+ *  Update the PC
+**********************************/
+
+static void dsp_postexecute_update_pc(dsp_core_t* dsp)
+{
+    /* When running a REP, PC must stay on the current instruction */
+    if (dsp->loop_rep) {
+        /* Is PC on the instruction to repeat ? */      
+        if (dsp->pc_on_rep==0) {
+            --dsp->registers[DSP_REG_LC];
+            dsp->registers[DSP_REG_LC] &= BITMASK(16);
+
+            if (dsp->registers[DSP_REG_LC] > 0) {
+                dsp->cur_inst_len = 0;   /* Stay on this instruction */
+            } else {
+                dsp->loop_rep = 0;
+                dsp->registers[DSP_REG_LC] = dsp->registers[DSP_REG_LCSAVE];
+            }
+        } else {
+            /* Init LC at right value */
+            if (dsp->registers[DSP_REG_LC] == 0) {
+                dsp->registers[DSP_REG_LC] = 0x010000;
+            }
+            dsp->pc_on_rep = 0;
+        }
+    }
+
+    /* Normal execution, go to next instruction */
+    dsp->pc += dsp->cur_inst_len;
+
+    /* When running a DO loop, we test the end of loop with the */
+    /* updated PC, pointing to last instruction of the loop */
+    if (dsp->registers[DSP_REG_SR] & (1<<DSP_SR_LF)) {
+
+        /* Did we execute the last instruction in loop ? */
+        if (dsp->pc == dsp->registers[DSP_REG_LA] + 1) {        
+            --dsp->registers[DSP_REG_LC];
+            dsp->registers[DSP_REG_LC] &= BITMASK(16);
+
+            if (dsp->registers[DSP_REG_LC] == 0) {
+                /* end of loop */
+                uint32_t saved_pc, saved_sr;
+
+                dsp_stack_pop(dsp, &saved_pc, &saved_sr);
+                dsp->registers[DSP_REG_SR] &= 0x7f;
+                dsp->registers[DSP_REG_SR] |= saved_sr & (1<<DSP_SR_LF);
+                dsp_stack_pop(dsp, &dsp->registers[DSP_REG_LA], &dsp->registers[DSP_REG_LC]);
+            } else {
+                /* Loop one more time */
+                dsp->pc = dsp->registers[DSP_REG_SSH];
+            }
+        }
+    }
+}
+
+/**********************************
+ *  Interrupts
+**********************************/
+
+/* Post a new interrupt to the interrupt table */
+void dsp56k_add_interrupt(dsp_core_t* dsp, uint16_t inter)
+{
+    /* detect if this interrupt is used or not */
+    if (dsp->interrupt_ipl[inter] == -1)
+        return;
+
+    /* add this interrupt to the pending interrupts table */
+    if (dsp->interrupt_is_pending[inter] == 0) { 
+        dsp->interrupt_is_pending[inter] = 1;
+        dsp->interrupt_counter ++;
+    }
+}
+
+static void dsp_postexecute_interrupts(dsp_core_t* dsp)
+{
+    uint32_t index, instr, i;
+    int32_t ipl_to_raise, ipl_sr;
+
+    /* REP is not interruptible */
+    if (dsp->loop_rep) {
+        return;
+    }
+
+    /* A fast interrupt can not be interrupted. */
+    if (dsp->interrupt_state == DSP_INTERRUPT_DISABLED) {
+
+        switch (dsp->interrupt_pipeline_count) {
+            case 5:
+                dsp->interrupt_pipeline_count --;
+                return;
+            case 4:
+                /* Prefetch interrupt instruction 1 */
+                dsp->interrupt_save_pc = dsp->pc;
+                dsp->pc = dsp->interrupt_instr_fetch;
+
+                /* is it a LONG interrupt ? */
+                instr = read_memory_p(dsp, dsp->interrupt_instr_fetch);
+                if ( ((instr & 0xfff000) == 0x0d0000) || ((instr & 0xffc0ff) == 0x0bc080) ) {
+                    dsp->interrupt_state = DSP_INTERRUPT_LONG;
+                    dsp_stack_push(dsp, dsp->interrupt_save_pc, dsp->registers[DSP_REG_SR], 0); 
+                    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_LF)|(1<<DSP_SR_T)  |
+                                            (1<<DSP_SR_S1)|(1<<DSP_SR_S0) |
+                                            (1<<DSP_SR_I0)|(1<<DSP_SR_I1));
+                    dsp->registers[DSP_REG_SR] |= dsp->interrupt_ipl_to_raise<<DSP_SR_I0;
+                }
+                dsp->interrupt_pipeline_count --;
+                return;
+            case 3:
+                /* Prefetch interrupt instruction 2 */
+                if (dsp->pc == dsp->interrupt_instr_fetch+1) {
+                    instr = read_memory_p(dsp, dsp->pc);
+                    if ( ((instr & 0xfff000) == 0x0d0000) || ((instr & 0xffc0ff) == 0x0bc080) ) {
+                        dsp->interrupt_state = DSP_INTERRUPT_LONG;
+                        dsp_stack_push(dsp, dsp->interrupt_save_pc, dsp->registers[DSP_REG_SR], 0); 
+                        dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_LF)|(1<<DSP_SR_T)  |
+                                                (1<<DSP_SR_S1)|(1<<DSP_SR_S0) |
+                                                (1<<DSP_SR_I0)|(1<<DSP_SR_I1));
+                        dsp->registers[DSP_REG_SR] |= dsp->interrupt_ipl_to_raise<<DSP_SR_I0;
+                    }
+                }
+                dsp->interrupt_pipeline_count --;
+                return;
+            case 2:
+                /* 1 instruction executed after interrupt */
+                /* before re enable interrupts */
+                /* Was it a FAST interrupt ? */
+                if (dsp->pc == dsp->interrupt_instr_fetch+2) {
+                    dsp->pc = dsp->interrupt_save_pc;
+                }
+                dsp->interrupt_pipeline_count --;
+                return;
+            case 1:
+                /* Last instruction executed after interrupt */
+                /* before re enable interrupts */
+                dsp->interrupt_pipeline_count --;
+                return;
+            case 0:
+                /* Re enable interrupts */
+                /* All 6 instruction are done, Interrupts can be enabled again */
+                dsp->interrupt_save_pc = -1;
+                dsp->interrupt_instr_fetch = -1;
+                dsp->interrupt_state = DSP_INTERRUPT_NONE;
+                break;
+        }
+    }
+
+    /* Trace Interrupt ? */
+    if (dsp->registers[DSP_REG_SR] & (1<<DSP_SR_T)) {
+        dsp56k_add_interrupt(dsp, DSP_INTER_TRACE);
+    }
+
+    /* No interrupt to execute */
+    if (dsp->interrupt_counter == 0) {
+        return;
+    }
+
+    /* search for an interrupt */
+    ipl_sr = (dsp->registers[DSP_REG_SR]>>DSP_SR_I0) & BITMASK(2);
+    index = 0xffff;
+    ipl_to_raise = -1;
+
+    /* Arbitrate between all pending interrupts */
+    for (i=0; i<12; i++) {
+        if (dsp->interrupt_is_pending[i] == 1) {
+
+            /* level 3 interrupt ? */
+            if (dsp->interrupt_ipl[i] == 3) {
+                index = i;
+                break;
+            }
+
+            /* level 0, 1 ,2 interrupt ? */
+            /* if interrupt is masked in SR, don't process it */
+            if (dsp->interrupt_ipl[i] < ipl_sr)
+                continue;
+
+            /* if interrupt is lower or equal than current arbitrated interrupt */
+            if (dsp->interrupt_ipl[i] <= ipl_to_raise)
+                continue;
+
+            /* save current arbitrated interrupt */
+            index = i;
+            ipl_to_raise = dsp->interrupt_ipl[i];
+        }
+    }
+
+    /* If there's no interrupt to process, return */
+    if (index == 0xffff) {
+        return;
+    }
+
+    /* remove this interrupt from the pending interrupts table */
+    dsp->interrupt_is_pending[index] = 0;
+    dsp->interrupt_counter --;
+
+    /* process arbritrated interrupt */
+    ipl_to_raise = dsp->interrupt_ipl[index] + 1;
+    if (ipl_to_raise > 3) {
+        ipl_to_raise = 3;
+    }
+
+    dsp->interrupt_instr_fetch = dsp_interrupt[index].vectorAddr;
+    dsp->interrupt_pipeline_count = 5;
+    dsp->interrupt_state = DSP_INTERRUPT_DISABLED;
+    dsp->interrupt_ipl_to_raise = ipl_to_raise;
+
+    DPRINTF("Dsp interrupt: %s\n", dsp_interrupt[index].name);
+
+    /* SSI receive data with exception ? */
+    if (dsp->interrupt_instr_fetch == 0xe) {
+        // dsp->periph[DSP_SPACE_X][DSP_SSI_SR] &= 0xff-(1<<DSP_SSI_SR_ROE);
+        assert(false);
+    }
+
+    /* SSI transmit data with exception ? */
+    else if (dsp->interrupt_instr_fetch == 0x12) {
+        // dsp->periph[DSP_SPACE_X][DSP_SSI_SR] &= 0xff-(1<<DSP_SSI_SR_TUE);
+        assert(false);
+    }
+
+    /* host command ? */
+    else if (dsp->interrupt_instr_fetch == 0xff) {
+        /* Clear HC and HCP interrupt */
+        // dsp->periph[DSP_SPACE_X][DSP_HOST_HSR] &= 0xff - (1<<DSP_HOST_HSR_HCP);
+        // dsp->hostport[CPU_HOST_CVR] &= 0xff - (1<<CPU_HOST_CVR_HC);  
+
+        // dsp->interrupt_instr_fetch = dsp->hostport[CPU_HOST_CVR] & BITMASK(5);
+        // dsp->interrupt_instr_fetch *= 2;    
+        assert(false);
+    }
+}
+
+/**********************************
+ *  Read/Write memory functions
+ **********************************/
+
+static uint32_t read_memory_p(dsp_core_t* dsp, uint32_t address)
+{
+    assert((address & 0xFF000000) == 0);
+    assert(address < DSP_PRAM_SIZE);
+    uint32_t r = dsp->pram[address];
+    assert((r & 0xFF000000) == 0);
+    return r;
+}
+
+uint32_t dsp56k_read_memory(dsp_core_t* dsp, int space, uint32_t address)
+{
+    assert((address & 0xFF000000) == 0);
+
+    if (space == DSP_SPACE_X) {
+        if (address >= DSP_PERIPH_BASE) {
+            assert(dsp->read_peripheral);
+            return dsp->read_peripheral(dsp, address);
+        } else if (address >= DSP_MIXBUFFER_BASE && address < DSP_MIXBUFFER_BASE+DSP_MIXBUFFER_SIZE) {
+            return dsp->mixbuffer[address-DSP_MIXBUFFER_BASE];
+        } else if (address >= DSP_MIXBUFFER_READ_BASE && address < DSP_MIXBUFFER_READ_BASE+DSP_MIXBUFFER_SIZE) {
+            return dsp->mixbuffer[address-DSP_MIXBUFFER_READ_BASE];
+        } else {
+            assert(address < DSP_XRAM_SIZE);
+            return dsp->xram[address];
+        }
+    } else if (space == DSP_SPACE_Y) {
+        assert(address < DSP_YRAM_SIZE);
+        return dsp->yram[address];
+    } else if (space == DSP_SPACE_P) {
+        return read_memory_p(dsp, address);
+    } else {
+        assert(false);
+        return 0;
+    }
+}
+
+void dsp56k_write_memory(dsp_core_t* dsp, int space, uint32_t address, uint32_t value)
+{
+    assert((value & 0xFF000000) == 0);
+    assert((address & 0xFF000000) == 0);
+
+    if (TRACE_DSP_DISASM_MEM)
+        write_memory_disasm(dsp, space, address, value);
+    else    
+        write_memory_raw(dsp, space, address, value);
+}
+
+static void write_memory_raw(dsp_core_t* dsp, int space, uint32_t address, uint32_t value)
+{
+    assert((value & 0xFF000000) == 0);
+    assert((address & 0xFF000000) == 0);
+
+    if (space == DSP_SPACE_X) {
+        if (address >= DSP_PERIPH_BASE) {
+            assert(dsp->write_peripheral);
+            dsp->write_peripheral(dsp, address, value);
+            return;
+        } else if (address >= DSP_MIXBUFFER_BASE && address < DSP_MIXBUFFER_BASE+DSP_MIXBUFFER_SIZE) {
+            dsp->mixbuffer[address-DSP_MIXBUFFER_BASE] = value;
+        } else if (address >= DSP_MIXBUFFER_READ_BASE && address < DSP_MIXBUFFER_READ_BASE+DSP_MIXBUFFER_SIZE) {
+            dsp->mixbuffer[address-DSP_MIXBUFFER_READ_BASE] = value;
+        } else {
+            assert(address < DSP_XRAM_SIZE);
+            dsp->xram[address] = value;
+        }
+    } else if (space == DSP_SPACE_Y) {
+        assert(address < DSP_YRAM_SIZE);
+        dsp->yram[address] = value;
+    } else if (space == DSP_SPACE_P) {
+        assert(address < DSP_PRAM_SIZE);
+        dsp->pram[address] = value;
+    } else {
+        assert(false);
+    }
+}
+
+static uint32_t read_memory_disasm(dsp_core_t* dsp, int space, uint32_t address)
+{
+    return dsp56k_read_memory(dsp, space, address);
+}
+
+static void write_memory_disasm(dsp_core_t* dsp, int space, uint32_t address, uint32_t value)
+{
+    uint32_t oldvalue, curvalue;
+    char space_c;
+
+    oldvalue = read_memory_disasm(dsp, space, address);
+
+    write_memory_raw(dsp, space, address, value);
+
+    switch(space) {
+        case DSP_SPACE_X:
+            space_c = 'x';
+            break;
+        case DSP_SPACE_Y:
+            space_c = 'y';
+            break;
+        case DSP_SPACE_P:
+            space_c = 'p';
+            break;
+        default:
+            assert(false);
+    }
+
+    curvalue = read_memory_disasm(dsp, space, address);
+    if (dsp->disasm_memory_ptr < ARRAYSIZE(dsp->str_disasm_memory)) {
+        sprintf(dsp->str_disasm_memory[dsp->disasm_memory_ptr], "Mem: %c:0x%04x  0x%06x -> 0x%06x", space_c, address, oldvalue, curvalue);
+        dsp->disasm_memory_ptr ++;
+    }
+}
+
+static void dsp_write_reg(dsp_core_t* dsp, uint32_t numreg, uint32_t value)
+{
+    uint32_t stack_error;
+
+    switch (numreg) {
+        case DSP_REG_A:
+            dsp->registers[DSP_REG_A0] = 0;
+            dsp->registers[DSP_REG_A1] = value;
+            dsp->registers[DSP_REG_A2] = value & (1<<23) ? 0xff : 0x0;
+            break;
+        case DSP_REG_B:
+            dsp->registers[DSP_REG_B0] = 0;
+            dsp->registers[DSP_REG_B1] = value;
+            dsp->registers[DSP_REG_B2] = value & (1<<23) ? 0xff : 0x0;
+            break;
+        case DSP_REG_OMR:
+            dsp->registers[DSP_REG_OMR] = value & 0xc7;
+            break;
+        case DSP_REG_SR:
+            dsp->registers[DSP_REG_SR] = value & 0xaf7f;
+            break;
+        case DSP_REG_SP:
+            stack_error = dsp->registers[DSP_REG_SP] & (3<<DSP_SP_SE);
+            if ((stack_error==0) && (value & (3<<DSP_SP_SE))) {
+                /* Stack underflow or overflow detected, raise interrupt */
+                dsp56k_add_interrupt(dsp, DSP_INTER_STACK_ERROR);
+                dsp->registers[DSP_REG_SP] = value & (3<<DSP_SP_SE);
+                if (!dsp->executing_for_disasm) {
+                    printf( "Dsp: Stack Overflow or Underflow\n");
+                }
+                if (dsp->exception_debugging) {
+                    assert(false);
+                }
+            } else {
+                dsp->registers[DSP_REG_SP] = value & BITMASK(6);
+            } 
+            dsp_compute_ssh_ssl(dsp);
+            break;
+        case DSP_REG_SSH:
+            dsp_stack_push(dsp, value, 0, 1);
+            break;
+        case DSP_REG_SSL:
+            numreg = dsp->registers[DSP_REG_SP] & BITMASK(4);
+            if (numreg == 0) {
+                value = 0;
+            }
+            dsp->stack[1][numreg] = value & BITMASK(16);
+            dsp->registers[DSP_REG_SSL] = value & BITMASK(16);
+            break;
+        default:
+            dsp->registers[numreg] = value; 
+            dsp->registers[numreg] &= BITMASK(registers_mask[numreg]);
+            break;
+    }
+}
+
+/**********************************
+ *  Stack push/pop
+ **********************************/
+
+static void dsp_stack_push(dsp_core_t* dsp, uint32_t curpc, uint32_t cursr, uint16_t sshOnly)
+{
+    uint32_t stack_error, underflow, stack;
+
+    stack_error = dsp->registers[DSP_REG_SP] & (1<<DSP_SP_SE);
+    underflow = dsp->registers[DSP_REG_SP] & (1<<DSP_SP_UF);
+    stack = (dsp->registers[DSP_REG_SP] & BITMASK(4)) + 1;
+
+
+    if ((stack_error==0) && (stack & (1<<DSP_SP_SE))) {
+        /* Stack full, raise interrupt */
+        dsp56k_add_interrupt(dsp, DSP_INTER_STACK_ERROR);
+        if (!dsp->executing_for_disasm)
+            printf("Dsp: Stack Overflow\n");
+        if (dsp->exception_debugging)
+            assert(false);
+    }
+    
+    dsp->registers[DSP_REG_SP] = (underflow | stack_error | stack) & BITMASK(6);
+    stack &= BITMASK(4);
+
+    if (stack) {
+        /* SSH part */
+        dsp->stack[0][stack] = curpc & BITMASK(16);
+        /* SSL part, if instruction is not like "MOVEC xx, SSH"  */
+        if (sshOnly == 0) {
+            dsp->stack[1][stack] = cursr & BITMASK(16);
+        }
+    } else {
+        dsp->stack[0][0] = 0;
+        dsp->stack[1][0] = 0;
+    }
+
+    /* Update SSH and SSL registers */
+    dsp->registers[DSP_REG_SSH] = dsp->stack[0][stack];
+    dsp->registers[DSP_REG_SSL] = dsp->stack[1][stack];
+}
+
+static void dsp_stack_pop(dsp_core_t* dsp, uint32_t *newpc, uint32_t *newsr)
+{
+    uint32_t stack_error, underflow, stack;
+
+    stack_error = dsp->registers[DSP_REG_SP] & (1<<DSP_SP_SE);
+    underflow = dsp->registers[DSP_REG_SP] & (1<<DSP_SP_UF);
+    stack = (dsp->registers[DSP_REG_SP] & BITMASK(4)) - 1;
+
+    if ((stack_error==0) && (stack & (1<<DSP_SP_SE))) {
+        /* Stack empty*/
+        dsp56k_add_interrupt(dsp, DSP_INTER_STACK_ERROR);
+        if (!dsp->executing_for_disasm)
+            printf("Dsp: Stack underflow\n");
+        if (dsp->exception_debugging)
+            assert(false);
+    }
+
+    dsp->registers[DSP_REG_SP] = (underflow | stack_error | stack) & BITMASK(6);
+    stack &= BITMASK(4);
+    *newpc = dsp->registers[DSP_REG_SSH];
+    *newsr = dsp->registers[DSP_REG_SSL];
+
+    dsp->registers[DSP_REG_SSH] = dsp->stack[0][stack];
+    dsp->registers[DSP_REG_SSL] = dsp->stack[1][stack];
+}
+
+static void dsp_compute_ssh_ssl(dsp_core_t* dsp)
+{
+    uint32_t stack;
+
+    stack = dsp->registers[DSP_REG_SP];
+    stack &= BITMASK(4);
+    dsp->registers[DSP_REG_SSH] = dsp->stack[0][stack];
+    dsp->registers[DSP_REG_SSL] = dsp->stack[1][stack];
+}
+
+
+
+/**********************************
+ *  56bit arithmetic
+ **********************************/
+
+/* source,dest[0] is 55:48 */
+/* source,dest[1] is 47:24 */
+/* source,dest[2] is 23:00 */
+
+static uint16_t dsp_abs56(uint32_t *dest)
+{
+    uint32_t zerodest[3];
+    uint16_t newsr;
+
+    /* D=|D| */
+
+    if (dest[0] & (1<<7)) {
+        zerodest[0] = zerodest[1] = zerodest[2] = 0;
+
+        newsr = dsp_sub56(dest, zerodest);
+
+        dest[0] = zerodest[0];
+        dest[1] = zerodest[1];
+        dest[2] = zerodest[2];
+    } else {
+        newsr = 0;
+    }
+
+    return newsr;
+}
+
+static uint16_t dsp_asl56(uint32_t *dest, int n)
+{
+    /* Shift left dest n bits: D<<=n */
+
+    uint64_t dest_v = dest[2] | ((uint64_t)dest[1] << 24) | ((uint64_t)dest[0] << 48);
+
+    uint32_t carry = (dest_v >> (56-n)) & 1;
+
+    uint64_t dest_s = dest_v << n;
+    dest[2] = dest_s & BITMASK(24);
+    dest[1] = (dest_s >> 24) & BITMASK(24);
+    dest[0] = (dest_s >> 48) & BITMASK(8);
+
+    uint32_t overflow = (dest_v >> (56-n)) != 0;
+    uint32_t v = ((dest_v >> 55) & 1) != ((dest_s >> 55) & 1);
+
+    return (overflow<<DSP_SR_L)|(v<<DSP_SR_V)|(carry<<DSP_SR_C);
+}
+
+static uint16_t dsp_asr56(uint32_t *dest, int n)
+{
+    /* Shift right dest n bits: D>>=n */
+
+    uint64_t dest_v = dest[2] | ((uint64_t)dest[1] << 24) | ((uint64_t)dest[0] << 48);
+
+    uint16_t carry = (dest_v >> (n-1)) & 1;
+    
+    dest_v >>= n;
+    dest[2] = dest_v & BITMASK(24);
+    dest[1] = (dest_v >> 24) & BITMASK(24);
+    dest[0] = (dest_v >> 48) & BITMASK(8);
+
+    return (carry<<DSP_SR_C);
+}
+
+static uint16_t dsp_add56(uint32_t *source, uint32_t *dest)
+{
+    uint16_t overflow, carry, flg_s, flg_d, flg_r;
+
+    flg_s = (source[0]>>7) & 1;
+    flg_d = (dest[0]>>7) & 1;
+
+    /* Add source to dest: D = D+S */
+    dest[2] += source[2];
+    dest[1] += source[1]+((dest[2]>>24) & 1);
+    dest[0] += source[0]+((dest[1]>>24) & 1);
+
+    carry = (dest[0]>>8) & 1;
+
+    dest[2] &= BITMASK(24);
+    dest[1] &= BITMASK(24);
+    dest[0] &= BITMASK(8);
+
+    flg_r = (dest[0]>>7) & 1;
+
+    /*set overflow*/
+    overflow = (flg_s ^ flg_r) & (flg_d ^ flg_r);
+
+    return (overflow<<DSP_SR_L)|(overflow<<DSP_SR_V)|(carry<<DSP_SR_C);
+}
+
+static uint16_t dsp_sub56(uint32_t *source, uint32_t *dest)
+{
+    uint16_t overflow, carry, flg_s, flg_d, flg_r, dest_save;
+
+    dest_save = dest[0];
+
+    /* Subtract source from dest: D = D-S */
+    dest[2] -= source[2];
+    dest[1] -= source[1]+((dest[2]>>24) & 1);
+    dest[0] -= source[0]+((dest[1]>>24) & 1);
+
+    carry = (dest[0]>>8) & 1;
+
+    dest[2] &= BITMASK(24);
+    dest[1] &= BITMASK(24);
+    dest[0] &= BITMASK(8);
+
+    flg_s = (source[0]>>7) & 1;
+    flg_d = (dest_save>>7) & 1;
+    flg_r = (dest[0]>>7) & 1;
+
+    /* set overflow */
+    overflow = (flg_s ^ flg_d) & (flg_r ^ flg_d);
+
+    return (overflow<<DSP_SR_L)|(overflow<<DSP_SR_V)|(carry<<DSP_SR_C);
+}
+
+static void dsp_mul56(uint32_t source1, uint32_t source2, uint32_t *dest, uint8_t signe)
+{
+    uint32_t part[4], zerodest[3], value;
+
+    /* Multiply: D = S1*S2 */
+    if (source1 & (1<<23)) {
+        signe ^= 1;
+        source1 = (1<<24) - source1;
+    }
+    if (source2 & (1<<23)) {
+        signe ^= 1;
+        source2 = (1<<24) - source2;
+    }
+
+    /* bits 0-11 * bits 0-11 */
+    part[0]=(source1 & BITMASK(12))*(source2 & BITMASK(12));
+    /* bits 12-23 * bits 0-11 */
+    part[1]=((source1>>12) & BITMASK(12))*(source2 & BITMASK(12));
+    /* bits 0-11 * bits 12-23 */
+    part[2]=(source1 & BITMASK(12))*((source2>>12)  & BITMASK(12));
+    /* bits 12-23 * bits 12-23 */
+    part[3]=((source1>>12) & BITMASK(12))*((source2>>12) & BITMASK(12));
+
+    /* Calc dest 2 */
+    dest[2] = part[0];
+    dest[2] += (part[1] & BITMASK(12)) << 12;
+    dest[2] += (part[2] & BITMASK(12)) << 12;
+
+    /* Calc dest 1 */
+    dest[1] = (part[1]>>12) & BITMASK(12);
+    dest[1] += (part[2]>>12) & BITMASK(12);
+    dest[1] += part[3];
+
+    /* Calc dest 0 */
+    dest[0] = 0;
+
+    /* Add carries */
+    value = (dest[2]>>24) & BITMASK(8);
+    if (value) {
+        dest[1] += value;
+        dest[2] &= BITMASK(24);
+    }
+    value = (dest[1]>>24) & BITMASK(8);
+    if (value) {
+        dest[0] += value;
+        dest[1] &= BITMASK(24);
+    }
+
+    /* Get rid of extra sign bit */
+    dsp_asl56(dest, 1);
+
+    if (signe) {
+        zerodest[0] = zerodest[1] = zerodest[2] = 0;
+
+        dsp_sub56(dest, zerodest);
+
+        dest[0] = zerodest[0];
+        dest[1] = zerodest[1];
+        dest[2] = zerodest[2];
+    }
+}
+
+static void dsp_rnd56(dsp_core_t* dsp, uint32_t *dest)
+{
+    uint32_t rnd_const[3];
+
+    rnd_const[0] = 0;
+
+    /* Scaling mode S0 */
+    if (dsp->registers[DSP_REG_SR] & (1<<DSP_SR_S0)) {
+        rnd_const[1] = 1;
+        rnd_const[2] = 0;
+        dsp_add56(rnd_const, dest);
+
+        if ((dest[2]==0) && ((dest[1] & 1) == 0)) {
+            dest[1] &= (0xffffff - 0x3);
+        }
+        dest[1] &= 0xfffffe;
+        dest[2]=0;
+    }
+    /* Scaling mode S1 */
+    else if (dsp->registers[DSP_REG_SR] & (1<<DSP_SR_S1)) {
+        rnd_const[1] = 0;
+        rnd_const[2] = (1<<22);
+        dsp_add56(rnd_const, dest);
+   
+        if ((dest[2] & 0x7fffff) == 0){
+            dest[2] = 0;
+        }
+        dest[2] &= 0x800000;
+    }
+    /* No Scaling */
+    else {
+        rnd_const[1] = 0;
+        rnd_const[2] = (1<<23);
+        dsp_add56(rnd_const, dest);
+
+        if (dest[2] == 0) {
+            dest[1] &= 0xfffffe;
+        }
+        dest[2]=0;
+    }
+}
+
+static uint32_t dsp_signextend(int bits, uint32_t v) {
+    const int shift = sizeof(int)*8 - bits;
+    assert(shift > 0);
+    return (uint32_t)(((int32_t)v << shift) >> shift);
+}
+
+
diff --git a/hw/xbox/dsp/dsp_cpu.h b/hw/xbox/dsp/dsp_cpu.h
new file mode 100644
index 0000000000..86b31561aa
--- /dev/null
+++ b/hw/xbox/dsp/dsp_cpu.h
@@ -0,0 +1,257 @@
+/*
+ * DSP56300 emulator
+ *
+ * Copyright (c) 2015 espes
+ *
+ * Adapted from Hatari DSP M56001 emulation
+ * (C) 2003-2008 ARAnyM developer team
+ * Adaption to Hatari (C) 2008 by Thomas Huth
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef DSP_CPU_H
+#define DSP_CPU_H
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+#define DSP_OMR_MA  0x00
+#define DSP_OMR_MB  0x01
+#define DSP_OMR_DE  0x02
+#define DSP_OMR_SD  0x06
+#define DSP_OMR_EA  0x07
+
+#define DSP_SR_C    0x00
+#define DSP_SR_V    0x01
+#define DSP_SR_Z    0x02
+#define DSP_SR_N    0x03
+#define DSP_SR_U    0x04
+#define DSP_SR_E    0x05
+#define DSP_SR_L    0x06
+
+#define DSP_SR_I0   0x08
+#define DSP_SR_I1   0x09
+#define DSP_SR_S0   0x0a
+#define DSP_SR_S1   0x0b
+#define DSP_SR_T    0x0d
+#define DSP_SR_LF   0x0f
+
+#define DSP_SP_SE   0x04
+#define DSP_SP_UF   0x05
+
+/* Registers numbers in dsp.registers[] */
+#define DSP_REG_X0  0x04
+#define DSP_REG_X1  0x05
+#define DSP_REG_Y0  0x06
+#define DSP_REG_Y1  0x07
+#define DSP_REG_A0  0x08
+#define DSP_REG_B0  0x09
+#define DSP_REG_A2  0x0a
+#define DSP_REG_B2  0x0b
+#define DSP_REG_A1  0x0c
+#define DSP_REG_B1  0x0d
+#define DSP_REG_A   0x0e
+#define DSP_REG_B   0x0f
+
+#define DSP_REG_R0  0x10
+#define DSP_REG_R1  0x11
+#define DSP_REG_R2  0x12
+#define DSP_REG_R3  0x13
+#define DSP_REG_R4  0x14
+#define DSP_REG_R5  0x15
+#define DSP_REG_R6  0x16
+#define DSP_REG_R7  0x17
+
+#define DSP_REG_N0  0x18
+#define DSP_REG_N1  0x19
+#define DSP_REG_N2  0x1a
+#define DSP_REG_N3  0x1b
+#define DSP_REG_N4  0x1c
+#define DSP_REG_N5  0x1d
+#define DSP_REG_N6  0x1e
+#define DSP_REG_N7  0x1f
+
+#define DSP_REG_M0  0x20
+#define DSP_REG_M1  0x21
+#define DSP_REG_M2  0x22
+#define DSP_REG_M3  0x23
+#define DSP_REG_M4  0x24
+#define DSP_REG_M5  0x25
+#define DSP_REG_M6  0x26
+#define DSP_REG_M7  0x27
+
+#define DSP_REG_SR  0x39
+#define DSP_REG_OMR 0x3a
+#define DSP_REG_SP  0x3b
+#define DSP_REG_SSH 0x3c
+#define DSP_REG_SSL 0x3d
+#define DSP_REG_LA  0x3e
+#define DSP_REG_LC  0x3f
+
+#define DSP_REG_NULL    0x00
+#define DSP_REG_LCSAVE  0x30
+
+#define DSP_REG_MAX 0x40
+
+/* Memory spaces for dsp.ram[], dsp.rom[] */
+#define DSP_SPACE_X 0x00
+#define DSP_SPACE_Y 0x01
+#define DSP_SPACE_P 0x02
+
+#define DSP_XRAM_SIZE 3072
+#define DSP_YRAM_SIZE 2048
+#define DSP_PRAM_SIZE 4096
+
+#define DSP_MIXBUFFER_BASE 3072
+#define DSP_MIXBUFFER_SIZE 1024
+#define DSP_MIXBUFFER_READ_BASE 5120
+
+#define DSP_PERIPH_BASE 0xFFFF80
+#define DSP_PERIPH_SIZE 128
+
+#define DSP_INTERRUPT_NONE      0x0
+#define DSP_INTERRUPT_DISABLED  0x1
+#define DSP_INTERRUPT_LONG      0x2
+
+#define DSP_INTER_RESET         0x0
+#define DSP_INTER_ILLEGAL       0x1
+#define DSP_INTER_STACK_ERROR       0x2
+#define DSP_INTER_TRACE         0x3
+#define DSP_INTER_SWI           0x4
+#define DSP_INTER_HOST_COMMAND      0x5
+#define DSP_INTER_HOST_RCV_DATA     0x6
+#define DSP_INTER_HOST_TRX_DATA     0x7
+#define DSP_INTER_SSI_RCV_DATA_E    0x8
+#define DSP_INTER_SSI_RCV_DATA      0x9
+#define DSP_INTER_SSI_TRX_DATA_E    0xa
+#define DSP_INTER_SSI_TRX_DATA      0xb
+
+typedef enum {
+    DSP_TRACE_MODE,
+    DSP_DISASM_MODE
+} dsp_trace_disasm_t;
+
+typedef struct dsp_interrupt_s {
+    const uint16_t inter;
+    const uint16_t vectorAddr;
+    const uint16_t periph;
+    const char *name;
+} dsp_interrupt_t;
+
+typedef struct dsp_core_s dsp_core_t;
+
+struct dsp_core_s {
+    /* DSP instruction Cycle counter */
+    uint16_t instr_cycle;
+
+    /* Registers */
+    uint32_t pc;
+    uint32_t registers[DSP_REG_MAX];
+
+    /* stack[0=ssh], stack[1=ssl] */
+    uint32_t stack[2][16];
+
+    uint32_t xram[DSP_XRAM_SIZE];
+    uint32_t yram[DSP_YRAM_SIZE];
+    uint32_t pram[DSP_PRAM_SIZE];
+
+    uint32_t mixbuffer[DSP_MIXBUFFER_SIZE];
+
+    /* peripheral space, x:0xffff80-0xffffff */
+    uint32_t periph[DSP_PERIPH_SIZE];
+
+    /* Misc */
+    uint32_t loop_rep;      /* executing rep ? */
+    uint32_t pc_on_rep;     /* True if PC is on REP instruction */
+
+    /* Interruptions */
+    uint16_t interrupt_state;        /* NONE, FAST or LONG interrupt */
+    uint16_t interrupt_instr_fetch;        /* vector of the current interrupt */
+    uint16_t interrupt_save_pc;        /* save next pc value before interrupt */
+    uint16_t interrupt_counter;        /* count number of pending interrupts */
+    uint16_t interrupt_ipl_to_raise;     /* save the IPL level to save in the SR register */
+    uint16_t interrupt_pipeline_count; /* used to prefetch correctly the 2 inter instructions */
+    int16_t interrupt_ipl[12];     /* store the current IPL for each interrupt */
+    uint16_t interrupt_is_pending[12];  /* store if interrupt is pending for each interrupt */
+
+    /* callbacks */
+    uint32_t (*read_peripheral)(dsp_core_t* core, uint32_t address);
+    void (*write_peripheral)(dsp_core_t* core, uint32_t address, uint32_t value);
+
+    /* runtime data */
+
+    /* Instructions per second */
+#ifdef DSP_COUNT_IPS
+    uint32_t start_time;
+#endif
+    uint32_t num_inst;
+
+    /* Length of current instruction */
+    uint32_t cur_inst_len; /* =0:jump, >0:increment */
+    /* Current instruction */
+    uint32_t cur_inst;
+
+    /* DSP is in disasm mode ? */
+    /* If yes, stack overflow, underflow and illegal instructions messages are not displayed */
+    bool executing_for_disasm;
+
+    char str_disasm_memory[2][50];     /* Buffer for memory change text in disasm mode */
+    uint32_t disasm_memory_ptr;        /* Pointer for memory change in disasm mode */
+
+    bool exception_debugging;
+
+
+    /* disasm data */
+
+    /* Previous instruction */
+    uint32_t disasm_prev_inst_pc;
+    bool disasm_is_looping;
+
+    /* Used to display dc instead of unknown instruction for illegal opcodes */
+    dsp_trace_disasm_t disasm_mode;
+
+    uint32_t disasm_cur_inst;
+    uint16_t disasm_cur_inst_len;
+
+    /* Current instruction */
+    char disasm_str_instr[128];
+    char disasm_str_instr2[128];
+    char disasm_parallelmove_name[64];
+
+    /**********************************
+     *  Register change
+     **********************************/
+
+    uint32_t disasm_registers_save[64];
+#ifdef DSP_DISASM_REG_PC
+    uint32_t pc_save;
+#endif
+
+};
+
+/* Functions */
+void dsp56k_reset_cpu(dsp_core_t* dsp);		/* Set dsp_core to use */
+void dsp56k_execute_instruction(dsp_core_t* dsp);	/* Execute 1 instruction */
+uint16_t dsp56k_execute_one_disasm_instruction(dsp_core_t* dsp, FILE *out, uint32_t pc);	/* Execute 1 instruction in disasm mode */
+
+uint32_t dsp56k_read_memory(dsp_core_t* dsp, int space, uint32_t address);
+void dsp56k_write_memory(dsp_core_t* dsp, int space, uint32_t address, uint32_t value);
+
+/* Interrupt relative functions */
+void dsp56k_add_interrupt(dsp_core_t* dsp, uint16_t inter);
+
+#endif	/* DSP_CPU_H */
diff --git a/hw/xbox/dsp/dsp_dis.inl b/hw/xbox/dsp/dsp_dis.inl
new file mode 100644
index 0000000000..d8d3797ed2
--- /dev/null
+++ b/hw/xbox/dsp/dsp_dis.inl
@@ -0,0 +1,2217 @@
+/*
+ * DSP56300 disassembly routines
+ *
+ * Copyright (c) 2015 espes
+ *
+ * Adapted from Hatari DSP M56001 emulation
+ * (C) 2003-2008 ARAnyM developer team
+ * Adaption to Hatari (C) 2008 by Thomas Huth
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+typedef void (*dis_func_t)(dsp_core_t* dsp);
+
+
+static const char *registers_name[64]={
+    "","","","",
+    "x0","x1","y0","y1",
+    "a0","b0","a2","b2",
+    "a1","b1","a","b",
+    
+    "r0","r1","r2","r3",
+    "r4","r5","r6","r7",
+    "n0","n1","n2","n3",
+    "n4","n5","n6","n7",
+
+    "m0","m1","m2","m3",
+    "m4","m5","m6","m7",
+    "","","","",
+    "","","","",
+
+    "","","","",
+    "","","","",
+    "","sr","omr","sp",
+    "ssh","ssl","la","lc"
+};
+
+
+static const char* disasm_opcodes_alu[256] = {
+    /* 0x00 - 0x3f */
+    "move"     , "tfr b,a", "addr b,a", "tst a", "undefined", "cmp b,a"  , "subr b,a", "cmpm b,a",
+    "undefined", "tfr a,b", "addr a,b", "tst b", "undefined", "cmp a,b"  , "subr a,b", "cmpm a,b",
+    "add b,a"  , "rnd a"  , "addl b,a", "clr a", "sub b,a"  , "undefined", "subl b,a", "not a",
+    "add a,b"  , "rnd b"  , "addl a,b", "clr b", "sub a,b"  , "max a,b", "subl a,b", "not b",
+    "add x,a"  , "adc x,a", "asr a" , "lsr a", "sub x,a"  , "sbc x,a"  , "abs a" , "ror a",
+    "add x,b"  , "adc x,b", "asr b" , "lsr b", "sub x,b"  , "sbc x,b"  , "abs b" , "ror b",
+    "add y,a"  , "adc y,a", "asl a" , "lsl a", "sub y,a"  , "sbc y,a"  , "neg a" , "rol a",
+    "add y,b"  , "adc y,b", "asl b" , "lsl b", "sub y,b"  , "sbc y,b"  , "neg b" , "rol b",
+    
+    /* 0x40 - 0x7f */
+    "add x0,a", "tfr x0,a", "or x0,a", "eor x0,a", "sub x0,a", "cmp x0,a", "and x0,a", "cmpm x0,a",
+    "add x0,b", "tfr x0,b", "or x0,b", "eor x0,b", "sub x0,b", "cmp x0,b", "and x0,b", "cmpm x0,b",
+    "add y0,a", "tfr y0,a", "or y0,a", "eor y0,a", "sub y0,a", "cmp y0,a", "and y0,a", "cmpm y0,a",
+    "add y0,b", "tfr y0,b", "or y0,b", "eor y0,b", "sub y0,b", "cmp y0,b", "and y0,b", "cmpm y0,b",
+    "add x1,a", "tfr x1,a", "or x1,a", "eor x1,a", "sub x1,a", "cmp x1,a", "and x1,a", "cmpm x1,a",
+    "add x1,b", "tfr x1,b", "or x1,b", "eor x1,b", "sub x1,b", "cmp x1,b", "and x1,b", "cmpm x1,b",
+    "add y1,a", "tfr y1,a", "or y1,a", "eor y1,a", "sub y1,a", "cmp y1,a", "and y1,a", "cmpm y1,a",
+    "add y1,b", "tfr y1,b", "or y1,b", "eor y1,b", "sub y1,b", "cmp y1,b", "and y1,b", "cmpm y1,b",
+
+    /* 0x80 - 0xbf */
+    "mpy +x0,x0,a", "mpyr +x0,x0,a", "mac +x0,x0,a", "macr +x0,x0,a", "mpy -x0,x0,a", "mpyr -x0,x0,a", "mac -x0,x0,a", "macr -x0,x0,a",
+    "mpy +x0,x0,b", "mpyr +x0,x0,b", "mac +x0,x0,b", "macr +x0,x0,b", "mpy -x0,x0,b", "mpyr -x0,x0,b", "mac -x0,x0,b", "macr -x0,x0,b",
+    "mpy +y0,y0,a", "mpyr +y0,y0,a", "mac +y0,y0,a", "macr +y0,y0,a", "mpy -y0,y0,a", "mpyr -y0,y0,a", "mac -y0,y0,a", "macr -y0,y0,a",
+    "mpy +y0,y0,b", "mpyr +y0,y0,b", "mac +y0,y0,b", "macr +y0,y0,b", "mpy -y0,y0,b", "mpyr -y0,y0,b", "mac -y0,y0,b", "macr -y0,y0,b",
+    "mpy +x1,x0,a", "mpyr +x1,x0,a", "mac +x1,x0,a", "macr +x1,x0,a", "mpy -x1,x0,a", "mpyr -x1,x0,a", "mac -x1,x0,a", "macr -x1,x0,a",
+    "mpy +x1,x0,b", "mpyr +x1,x0,b", "mac +x1,x0,b", "macr +x1,x0,b", "mpy -x1,x0,b", "mpyr -x1,x0,b", "mac -x1,x0,b", "macr -x1,x0,b",
+    "mpy +y1,y0,a", "mpyr +y1,y0,a", "mac +y1,y0,a", "macr +y1,y0,a", "mpy -y1,y0,a", "mpyr -y1,y0,a", "mac -y1,y0,a", "macr -y1,y0,a",
+    "mpy +y1,y0,b", "mpyr +y1,y0,b", "mac +y1,y0,b", "macr +y1,y0,b", "mpy -y1,y0,b", "mpyr -y1,y0,b", "mac -y1,y0,b", "macr -y1,y0,b",
+
+    /* 0xc0 - 0xff */
+    "mpy +x0,y1,a", "mpyr +x0,y1,a", "mac +x0,y1,a", "macr +x0,y1,a", "mpy -x0,y1,a", "mpyr -x0,y1,a", "mac -x0,y1,a", "macr -x0,y1,a",
+    "mpy +x0,y1,b", "mpyr +x0,y1,b", "mac +x0,y1,b", "macr +x0,y1,b", "mpy -x0,y1,b", "mpyr -x0,y1,b", "mac -x0,y1,b", "macr -x0,y1,b",
+    "mpy +y0,x0,a", "mpyr +y0,x0,a", "mac +y0,x0,a", "macr +y0,x0,a", "mpy -y0,x0,a", "mpyr -y0,x0,a", "mac -y0,x0,a", "macr -y0,x0,a",
+    "mpy +y0,x0,b", "mpyr +y0,x0,b", "mac +y0,x0,b", "macr +y0,x0,b", "mpy -y0,x0,b", "mpyr -y0,x0,b", "mac -y0,x0,b", "macr -y0,x0,b",
+    "mpy +x1,y0,a", "mpyr +x1,y0,a", "mac +x1,y0,a", "macr +x1,y0,a", "mpy -x1,y0,a", "mpyr -x1,y0,a", "mac -x1,y0,a", "macr -x1,y0,a",
+    "mpy +x1,y0,b", "mpyr +x1,y0,b", "mac +x1,y0,b", "macr +x1,y0,b", "mpy -x1,y0,b", "mpyr -x1,y0,b", "mac -x1,y0,b", "macr -x1,y0,b",
+    "mpy +y1,x1,a", "mpyr +y1,x1,a", "mac +y1,x1,a", "macr +y1,x1,a", "mpy -y1,x1,a", "mpyr -y1,x1,a", "mac -y1,x1,a", "macr -y1,x1,a",
+    "mpy +y1,x1,b", "mpyr +y1,x1,b", "mac +y1,x1,b", "macr +y1,x1,b", "mpy -y1,x1,b", "mpyr -y1,x1,b", "mac -y1,x1,b", "macr -y1,x1,b"
+};
+
+static void dis_pm_0(dsp_core_t* dsp);
+static void dis_pm_1(dsp_core_t* dsp);
+static void dis_pm_2(dsp_core_t* dsp);
+static void dis_pm_4(dsp_core_t* dsp);
+static void dis_pm_8(dsp_core_t* dsp);
+
+static const dis_func_t disasm_opcodes_parmove[16] = {
+    dis_pm_0,
+    dis_pm_1,
+    dis_pm_2,
+    dis_pm_2,
+    dis_pm_4,
+    dis_pm_4,
+    dis_pm_4,
+    dis_pm_4,
+
+    dis_pm_8,
+    dis_pm_8,
+    dis_pm_8,
+    dis_pm_8,
+    dis_pm_8,
+    dis_pm_8,
+    dis_pm_8,
+    dis_pm_8
+};
+
+static const char *registers_lmove[8] = {
+    "a10",
+    "b10",
+    "x",
+    "y",
+    "a",
+    "b",
+    "ab",
+    "ba"
+};
+
+static const char *ea_names[9] = {
+    "(r%d)-n%d",    /* 000xxx */
+    "(r%d)+n%d",    /* 001xxx */
+    "(r%d)-",       /* 010xxx */
+    "(r%d)+",       /* 011xxx */
+    "(r%d)",        /* 100xxx */
+    "(r%d+n%d)",    /* 101xxx */
+    "$%04x",        /* 110000 */
+    "-(r%d)",       /* 111xxx */
+    "$%06x"     /* 110100 */
+};
+
+static const char *cc_name[16] = {
+    "cc",
+    "ge",
+    "ne",
+    "pl",
+    "nn",
+    "ec",
+    "lc",
+    "gt",
+    
+    "cs",
+    "lt",
+    "eq",
+    "mi",
+    "nr",
+    "es",
+    "ls",
+    "le"
+};
+
+
+/**********************************
+ *  Conditions code calculation
+ **********************************/
+
+static void dis_calc_cc(dsp_core_t* dsp, uint32_t cc_mode, char *dest)
+{
+    strcpy(dest, cc_name[cc_mode & BITMASK(4)]);
+}
+
+/**********************************
+ *  Effective address calculation
+ **********************************/
+
+static int dis_calc_ea(dsp_core_t* dsp, uint32_t ea_mode, char *dest)
+{
+    int value, retour, numreg;
+
+    value = (ea_mode >> 3) & BITMASK(3);
+    numreg = ea_mode & BITMASK(3);
+    retour = 0;
+    switch (value) {
+        case 0:
+            /* (Rx)-Nx */
+            sprintf(dest, ea_names[value], numreg, numreg);
+            break;
+        case 1:
+            /* (Rx)+Nx */
+            sprintf(dest, ea_names[value], numreg, numreg);
+            break;
+        case 5:
+            /* (Rx+Nx) */
+            sprintf(dest, ea_names[value], numreg, numreg);
+            break;
+        case 2:
+            /* (Rx)- */
+            sprintf(dest, ea_names[value], numreg);
+            break;
+        case 3:
+            /* (Rx)+ */
+            sprintf(dest, ea_names[value], numreg);
+            break;
+        case 4:
+            /* (Rx) */
+            sprintf(dest, ea_names[value], numreg);
+            break;
+        case 7:
+            /* -(Rx) */
+            sprintf(dest, ea_names[value], numreg);
+            break;
+        case 6:
+            dsp->disasm_cur_inst_len++;
+            switch ((ea_mode >> 2) & 1) {
+                case 0:
+                    /* Absolute address */
+                    sprintf(dest, ea_names[value], read_memory_p(dsp, dsp->pc+1));
+                    break;
+                case 1:
+                    /* Immediate value */
+                    sprintf(dest, ea_names[8], read_memory_p(dsp, dsp->pc+1));
+                    retour = 1;
+                    break;
+            }
+            break;
+    }
+    return retour;
+}
+
+/**********************************
+ *  Non-parallel moves instructions
+ **********************************/
+
+static void dis_undefined(dsp_core_t* dsp)
+{
+    if (dsp->disasm_mode == DSP_DISASM_MODE) {
+        /* In Disasm mode, display dc instruction_opcode */
+        sprintf(dsp->disasm_str_instr, "dc $%06x", dsp->disasm_cur_inst);
+    } else {
+        /* In trace mode, display unknown instruction */
+        sprintf(dsp->disasm_str_instr, "$%06x unknown instruction", dsp->disasm_cur_inst);
+    }
+}
+
+static void dis_add_imm(dsp_core_t* dsp)
+{
+    uint32_t xx = (dsp->disasm_cur_inst >> 8) & BITMASK(6);
+    uint32_t accname = ((dsp->disasm_cur_inst >> 3) & 1) ? DSP_REG_B : DSP_REG_A;
+    sprintf(dsp->disasm_str_instr, "add #$%02x,%s", xx, registers_name[accname]);
+}
+
+static void dis_add_long(dsp_core_t* dsp)
+{
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    dsp->disasm_cur_inst_len++;
+
+    uint32_t accname = ((dsp->disasm_cur_inst >> 3) & 1) ? DSP_REG_B : DSP_REG_A;
+    sprintf(dsp->disasm_str_instr, "add #$%04x,%s", xxxx, registers_name[accname]);
+}
+
+static void dis_and_imm(dsp_core_t* dsp)
+{
+    uint32_t xx = (dsp->disasm_cur_inst >> 8) & BITMASK(6);
+    uint32_t accname = ((dsp->disasm_cur_inst >> 3) & 1) ? DSP_REG_B : DSP_REG_A;
+    sprintf(dsp->disasm_str_instr, "and #$%02x,%s", xx, registers_name[accname]);
+}
+
+static void dis_and_long(dsp_core_t* dsp)
+{
+    dsp->disasm_cur_inst_len++;
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    uint32_t accname = ((dsp->disasm_cur_inst >> 3) & 1) ? DSP_REG_B : DSP_REG_A;
+    sprintf(dsp->disasm_str_instr, "and #$%04x,%s", xxxx, registers_name[accname]);
+}
+
+static void dis_andi(dsp_core_t* dsp)
+{
+    switch(dsp->disasm_cur_inst & BITMASK(2)) {
+        case 0:
+            sprintf(dsp->disasm_str_instr, "andi #$%02x,mr", (dsp->disasm_cur_inst>>8) & BITMASK(8));
+            break;
+        case 1:
+            sprintf(dsp->disasm_str_instr, "andi #$%02x,ccr", (dsp->disasm_cur_inst>>8) & BITMASK(8));
+            break;
+        case 2:
+            sprintf(dsp->disasm_str_instr, "andi #$%02x,omr", (dsp->disasm_cur_inst>>8) & BITMASK(8));
+            break;
+        default:
+            break;
+    }
+}
+
+static void dis_asl_imm(dsp_core_t* dsp)
+{
+    uint32_t S = (dsp->disasm_cur_inst >> 7) & 1;
+    uint32_t D = dsp->disasm_cur_inst & 1;
+    uint32_t ii = (dsp->disasm_cur_inst >> 1) & BITMASK(6);
+    sprintf(dsp->disasm_str_instr, "asl #$%02x,%s,%s",
+        ii,
+        registers_name[S ? DSP_REG_B : DSP_REG_A],
+        registers_name[D ? DSP_REG_B : DSP_REG_A]);
+}
+
+static void dis_asr_imm(dsp_core_t* dsp)
+{
+    uint32_t S = (dsp->disasm_cur_inst >> 7) & 1;
+    uint32_t D = dsp->disasm_cur_inst & 1;
+    uint32_t ii = (dsp->disasm_cur_inst >> 1) & BITMASK(6);
+    sprintf(dsp->disasm_str_instr, "asr #$%02x,%s,%s",
+        ii,
+        registers_name[S ? DSP_REG_B : DSP_REG_A],
+        registers_name[D ? DSP_REG_B : DSP_REG_A]);
+}
+
+static void dis_bcc_long(dsp_core_t* dsp) {
+    dsp->disasm_cur_inst_len++;
+
+    uint32_t cc_code = dsp->disasm_cur_inst & BITMASK(4);
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+
+    char cond_name[16];
+    dis_calc_cc(dsp, cc_code, cond_name);
+
+    sprintf(dsp->disasm_str_instr, "b%s p:$%06x",
+        cond_name, (dsp->pc + xxxx) & BITMASK(24));
+}
+
+static void dis_bcc_imm(dsp_core_t* dsp) {
+    char cond_name[16];
+
+    uint32_t cc_code = (dsp->disasm_cur_inst >> 12) & BITMASK(4);
+    uint32_t xxx = (dsp->disasm_cur_inst & BITMASK(5))
+                    + ((dsp->disasm_cur_inst & (BITMASK(4) << 6)) >> 1);
+
+    dis_calc_cc(dsp, cc_code, cond_name);    
+
+    sprintf(dsp->disasm_str_instr,"b%s p:$%06x",
+        cond_name, (dsp->pc + dsp_signextend(9, xxx)) & BITMASK(24) );
+}
+
+static void dis_bchg_aa(dsp_core_t* dsp)
+{
+    /* bchg #n,x:aa */
+    /* bchg #n,y:aa */
+    char name[16];
+    uint32_t memspace, value, numbit;
+    
+    memspace = (dsp->disasm_cur_inst>>6) & 1;
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    if (memspace) {
+        sprintf(name,"y:$%04x",value);
+    } else {
+        sprintf(name,"x:$%04x",value);
+    }
+
+    sprintf(dsp->disasm_str_instr,"bchg #%d,%s", numbit, name);
+}
+
+static void dis_bchg_ea(dsp_core_t* dsp)
+{
+    /* bchg #n,x:ea */
+    /* bchg #n,y:ea */
+    char name[16], addr_name[16];
+    uint32_t memspace, value, numbit;
+    
+    memspace = (dsp->disasm_cur_inst>>6) & 1;
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    dis_calc_ea(dsp, value, addr_name);
+    if (memspace) {
+        sprintf(name,"y:%s",addr_name);
+    } else {
+        sprintf(name,"x:%s",addr_name);
+    }
+
+    sprintf(dsp->disasm_str_instr,"bchg #%d,%s", numbit, name);
+}
+
+static void dis_bchg_pp(dsp_core_t* dsp)
+{
+    /* bchg #n,x:pp */
+    /* bchg #n,y:pp */
+    char name[16];
+    uint32_t memspace, value, numbit;
+    
+    memspace = (dsp->disasm_cur_inst>>6) & 1;
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    if (memspace) {
+        sprintf(name,"y:$%06x",value+0xffffc0);
+    } else {
+        sprintf(name,"x:$%06x",value+0xffffc0);
+    }
+
+    sprintf(dsp->disasm_str_instr,"bchg #%d,%s", numbit, name);
+}
+
+static void dis_bchg_reg(dsp_core_t* dsp)
+{
+    /* bchg #n,R */
+    uint32_t value, numbit;
+    
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    sprintf(dsp->disasm_str_instr,"bchg #%d,%s", numbit, registers_name[value]);
+}
+
+static void dis_bclr_aa(dsp_core_t* dsp)
+{
+    /* bclr #n,x:aa */
+    /* bclr #n,y:aa */
+    char name[16];
+    uint32_t memspace, value, numbit;
+    
+    memspace = (dsp->disasm_cur_inst>>6) & 1;
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    if (memspace) {
+        sprintf(name,"y:$%04x",value);
+    } else {
+        sprintf(name,"x:$%04x",value);
+    }
+
+    sprintf(dsp->disasm_str_instr,"bclr #%d,%s", numbit, name);
+}
+
+static void dis_bclr_ea(dsp_core_t* dsp)
+{
+    /* bclr #n,x:ea */
+    /* bclr #n,y:ea */
+    char name[16], addr_name[16];
+    uint32_t memspace, value, numbit;
+    
+    memspace = (dsp->disasm_cur_inst>>6) & 1;
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    dis_calc_ea(dsp, value, addr_name);
+    if (memspace) {
+        sprintf(name,"y:%s",addr_name);
+    } else {
+        sprintf(name,"x:%s",addr_name);
+    }
+
+    sprintf(dsp->disasm_str_instr,"bclr #%d,%s", numbit, name);
+}
+
+static void dis_bclr_pp(dsp_core_t* dsp)
+{
+    /* bclr #n,x:pp */
+    /* bclr #n,y:pp */
+    char name[16];
+    uint32_t memspace, value, numbit;
+    
+    memspace = (dsp->disasm_cur_inst>>6) & 1;
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    if (memspace) {
+        sprintf(name,"y:$%06x",value+0xffffc0);
+    } else {
+        sprintf(name,"x:$%06x",value+0xffffc0);
+    }
+
+    sprintf(dsp->disasm_str_instr,"bclr #%d,%s", numbit, name);
+}
+
+static void dis_bclr_reg(dsp_core_t* dsp)
+{
+    /* bclr #n,R */
+    uint32_t value, numbit;
+    
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    sprintf(dsp->disasm_str_instr,"bclr #%d,%s", numbit, registers_name[value]);
+}
+
+static void dis_bra_long(dsp_core_t* dsp)
+{
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    dsp->disasm_cur_inst_len++;
+    sprintf(dsp->disasm_str_instr, "bra p:$%06x",
+        (dsp->pc + xxxx) & BITMASK(24));
+}
+
+static void dis_bra_imm(dsp_core_t* dsp)
+{
+    uint32_t xxx = (dsp->disasm_cur_inst & BITMASK(5))
+                    + ((dsp->disasm_cur_inst & (BITMASK(4) << 6)) >> 1);
+    sprintf(dsp->disasm_str_instr, "bra p:$%04x",
+        (dsp->pc + dsp_signextend(9, xxx)) & BITMASK(24) );
+}
+
+static void dis_brclr_pp(dsp_core_t* dsp)
+{
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    dsp->disasm_cur_inst_len++;
+    
+    uint32_t memspace = (dsp->disasm_cur_inst>>6) & 1;
+    uint32_t value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    uint32_t numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    char name[16];
+    if (memspace) {
+        sprintf(name,"y:$%06x",value+0xffffc0);
+    } else {
+        sprintf(name,"x:$%06x",value+0xffffc0);
+    }
+
+    sprintf(dsp->disasm_str_instr,"brclr #%d,%s,p:$%06x",
+        numbit, name, (dsp->pc + xxxx) & BITMASK(24) );
+}
+
+static void dis_brclr_reg(dsp_core_t* dsp)
+{
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    dsp->disasm_cur_inst_len++;
+
+    uint32_t value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    uint32_t numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    sprintf(dsp->disasm_str_instr, "brclr #%d,%s,p:$%04x",
+        numbit, registers_name[value], (dsp->pc + xxxx) & BITMASK(24));
+}
+
+static void dis_brset_pp(dsp_core_t* dsp)
+{
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    dsp->disasm_cur_inst_len++;
+    
+    uint32_t memspace = (dsp->disasm_cur_inst>>6) & 1;
+    uint32_t value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    uint32_t numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    char name[16];
+    if (memspace) {
+        sprintf(name,"y:$%06x",value+0xffffc0);
+    } else {
+        sprintf(name,"x:$%06x",value+0xffffc0);
+    }
+
+    sprintf(dsp->disasm_str_instr,"brset #%d,%s,p:$%06x",
+        numbit, name, (dsp->pc + xxxx) & BITMASK(24) );
+}
+
+static void dis_brset_reg(dsp_core_t* dsp)
+{
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    dsp->disasm_cur_inst_len++;
+
+    uint32_t value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    uint32_t numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    sprintf(dsp->disasm_str_instr, "brset #%d,%s,p:$%04x",
+        numbit, registers_name[value], (dsp->pc + xxxx) & BITMASK(24));
+}
+
+static void dis_bset_aa(dsp_core_t* dsp)
+{
+    /* bset #n,x:aa */
+    /* bset #n,y:aa */
+    char name[16];
+    uint32_t memspace, value, numbit;
+    
+    memspace = (dsp->disasm_cur_inst>>6) & 1;
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    if (memspace) {
+        sprintf(name,"y:$%04x",value);
+    } else {
+        sprintf(name,"x:$%04x",value);
+    }
+
+    sprintf(dsp->disasm_str_instr,"bset #%d,%s", numbit, name);
+}
+
+static void dis_bset_ea(dsp_core_t* dsp)
+{
+    /* bset #n,x:ea */
+    /* bset #n,y:ea */
+    char name[16], addr_name[16];
+    uint32_t memspace, value, numbit;
+    
+    memspace = (dsp->disasm_cur_inst>>6) & 1;
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    dis_calc_ea(dsp, value, addr_name);
+    if (memspace) {
+        sprintf(name,"y:%s",addr_name);
+    } else {
+        sprintf(name,"x:%s",addr_name);
+    }
+
+    sprintf(dsp->disasm_str_instr,"bset #%d,%s", numbit, name);
+}
+
+static void dis_bset_pp(dsp_core_t* dsp)
+{
+    /* bset #n,x:pp */
+    /* bset #n,y:pp */
+    char name[16];
+    uint32_t memspace, value, numbit;
+    
+    memspace = (dsp->disasm_cur_inst>>6) & 1;
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    if (memspace) {
+        sprintf(name,"y:$%06x",value+0xffffc0);
+    } else {
+        sprintf(name,"x:$%06x",value+0xffffc0);
+    }
+
+    sprintf(dsp->disasm_str_instr,"bset #%d,%s", numbit, name);
+}
+
+static void dis_bset_reg(dsp_core_t* dsp)
+{
+    /* bset #n,R */
+    uint32_t value, numbit;
+    
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    sprintf(dsp->disasm_str_instr,"bset #%d,%s", numbit, registers_name[value]);
+}
+
+static void dis_bsr_long(dsp_core_t* dsp)
+{
+    dsp->disasm_cur_inst_len++;
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    sprintf(dsp->disasm_str_instr, "bsr p:$%06x",
+        (dsp->pc + xxxx) & BITMASK(24));
+}
+
+static void dis_bsr_imm(dsp_core_t* dsp)
+{
+    uint32_t xxx = (dsp->disasm_cur_inst & BITMASK(5))
+                 + ((dsp->disasm_cur_inst & (BITMASK(4) << 6)) >> 1);
+    sprintf(dsp->disasm_str_instr, "bsr p:$%04x",
+        (dsp->pc + dsp_signextend(9, xxx)) & BITMASK(24) );
+}
+
+static void dis_btst_aa(dsp_core_t* dsp)
+{
+    /* btst #n,x:aa */
+    /* btst #n,y:aa */
+    char name[16];
+    uint32_t memspace, value, numbit;
+    
+    memspace = (dsp->disasm_cur_inst>>6) & 1;
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    if (memspace) {
+        sprintf(name,"y:$%04x",value);
+    } else {
+        sprintf(name,"x:$%04x",value);
+    }
+
+    sprintf(dsp->disasm_str_instr,"btst #%d,%s", numbit, name);
+}
+
+static void dis_btst_ea(dsp_core_t* dsp)
+{
+    /* btst #n,x:ea */
+    /* btst #n,y:ea */
+    char name[16], addr_name[16];
+    uint32_t memspace, value, numbit;
+    
+    memspace = (dsp->disasm_cur_inst>>6) & 1;
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    dis_calc_ea(dsp, value, addr_name);
+    if (memspace) {
+        sprintf(name,"y:%s",addr_name);
+    } else {
+        sprintf(name,"x:%s",addr_name);
+    }
+
+    sprintf(dsp->disasm_str_instr,"btst #%d,%s", numbit, name);
+}
+
+static void dis_btst_pp(dsp_core_t* dsp)
+{
+    /* btst #n,x:pp */
+    /* btst #n,y:pp */
+    char name[16];
+    uint32_t memspace, value, numbit;
+    
+    memspace = (dsp->disasm_cur_inst>>6) & 1;
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    if (memspace) {
+        sprintf(name,"y:$%06x",value+0xffffc0);
+    } else {
+        sprintf(name,"x:$%06x",value+0xffffc0);
+    }
+
+    sprintf(dsp->disasm_str_instr,"btst #%d,%s", numbit, name);
+}
+
+static void dis_btst_reg(dsp_core_t* dsp)
+{
+    /* btst #n,R */
+    uint32_t value, numbit;
+    
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    sprintf(dsp->disasm_str_instr,"btst #%d,%s", numbit, registers_name[value]);
+}
+
+static void dis_cmp_imm(dsp_core_t* dsp) {
+    uint32_t xx = (dsp->disasm_cur_inst >> 8) & BITMASK(6);
+    uint32_t d = (dsp->disasm_cur_inst >> 3) & 1;
+
+    sprintf(dsp->disasm_str_instr, "cmp #$%02x,%s",
+        xx, registers_name[d ? DSP_REG_B : DSP_REG_A]);
+}
+
+static void dis_cmp_long(dsp_core_t* dsp) {
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    dsp->disasm_cur_inst_len++;
+
+    uint32_t d = (dsp->disasm_cur_inst >> 3) & 1;
+    sprintf(dsp->disasm_str_instr, "cmp #$%06x,%s",
+        xxxx, registers_name[d ? DSP_REG_B : DSP_REG_A]);
+}
+
+static void dis_cmpu(dsp_core_t* dsp) {
+    uint32_t ggg = (dsp->disasm_cur_inst >> 1) & BITMASK(3);
+    uint32_t d = dsp->disasm_cur_inst & 1;
+
+    uint32_t srcacc = d ? DSP_REG_B : DSP_REG_A;
+    uint32_t srcreg = DSP_REG_NULL;
+    switch (ggg) {
+    case 0: srcreg = d ? DSP_REG_A : DSP_REG_B; break;
+    case 4: srcreg = DSP_REG_X0; break;
+    case 5: srcreg = DSP_REG_Y0; break;
+    case 6: srcreg = DSP_REG_X1; break;
+    case 7: srcreg = DSP_REG_Y1; break;
+    }
+
+    sprintf(dsp->disasm_str_instr, "cmpu %s,%s", registers_name[srcreg], registers_name[srcacc]);
+}
+
+static void dis_div(dsp_core_t* dsp)
+{
+    uint32_t srcreg=DSP_REG_NULL, destreg;
+    
+    switch((dsp->disasm_cur_inst>>4) & BITMASK(2)) {
+    case 0: srcreg = DSP_REG_X0; break;
+    case 1: srcreg = DSP_REG_Y0; break;
+    case 2: srcreg = DSP_REG_X1; break;
+    case 3: srcreg = DSP_REG_Y1; break;
+    }
+    destreg = DSP_REG_A+((dsp->disasm_cur_inst>>3) & 1);
+
+    sprintf(dsp->disasm_str_instr,"div %s,%s", registers_name[srcreg],registers_name[destreg]);
+}
+
+static void dis_do_aa(dsp_core_t* dsp)
+{
+    char name[16];
+
+    dsp->disasm_cur_inst_len++;
+
+    if (dsp->disasm_cur_inst & (1<<6)) {
+        sprintf(name, "y:$%04x", (dsp->disasm_cur_inst>>8) & BITMASK(6));
+    } else {
+        sprintf(name, "x:$%04x", (dsp->disasm_cur_inst>>8) & BITMASK(6));
+    }
+
+    sprintf(dsp->disasm_str_instr,"do %s,p:$%04x",
+        name,
+        read_memory_p(dsp, dsp->pc+1)
+    );
+}
+
+static void dis_do_imm(dsp_core_t* dsp)
+{
+    dsp->disasm_cur_inst_len++;
+
+    sprintf(dsp->disasm_str_instr,"do #$%04x,p:$%04x",
+        ((dsp->disasm_cur_inst>>8) & BITMASK(8))|((dsp->disasm_cur_inst & BITMASK(4))<<8),
+        read_memory_p(dsp, dsp->pc+1)
+    );
+}
+
+static void dis_do_ea(dsp_core_t* dsp)
+{
+    char addr_name[16], name[16];
+    uint32_t ea_mode;
+    
+    dsp->disasm_cur_inst_len++;
+
+    ea_mode = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    dis_calc_ea(dsp, ea_mode, addr_name);
+
+    if (dsp->disasm_cur_inst & (1<<6)) {
+        sprintf(name, "y:%s", addr_name);
+    } else {
+        sprintf(name, "x:%s", addr_name);
+    }
+
+    sprintf(dsp->disasm_str_instr,"do %s,p:$%04x", 
+        name,
+        read_memory_p(dsp, dsp->pc+1)
+    );
+}
+
+static void dis_do_reg(dsp_core_t* dsp)
+{
+    dsp->disasm_cur_inst_len++;
+
+    sprintf(dsp->disasm_str_instr,"do %s,p:$%04x",
+        registers_name[(dsp->disasm_cur_inst>>8) & BITMASK(6)],
+        read_memory_p(dsp, dsp->pc+1)
+    );
+}
+
+static void dis_dor_imm(dsp_core_t* dsp)
+{
+    uint32_t addr = read_memory_p(dsp, dsp->pc+1);
+    dsp->disasm_cur_inst_len++;
+
+    uint32_t xxx = ((dsp->disasm_cur_inst>>8) & BITMASK(8)) | ((dsp->disasm_cur_inst & BITMASK(4))<<8);
+
+    sprintf(dsp->disasm_str_instr,"dor #$%04x,p:$%04x",
+        xxx, (dsp->pc + addr) & BITMASK(24));
+}
+
+static void dis_dor_reg(dsp_core_t* dsp)
+{
+    uint32_t addr = read_memory_p(dsp, dsp->pc+1);
+    dsp->disasm_cur_inst_len++;
+
+    uint32_t numreg = (dsp->disasm_cur_inst >> 8) & BITMASK(6);
+
+    sprintf(dsp->disasm_str_instr,"dor %s,p:$%04x",
+        registers_name[numreg], (dsp->pc + addr) & BITMASK(24));
+}
+
+static void dis_jcc_ea(dsp_core_t* dsp)
+{
+    char cond_name[16], addr_name[16];
+    uint32_t cc_code=0;
+    
+    dis_calc_ea(dsp, (dsp->disasm_cur_inst >>8) & BITMASK(6), addr_name);
+    cc_code=dsp->disasm_cur_inst & BITMASK(4);
+    dis_calc_cc(dsp, cc_code, cond_name);    
+
+    sprintf(dsp->disasm_str_instr,"j%s p:%s", cond_name, addr_name);
+}
+
+static void dis_jcc_imm(dsp_core_t* dsp)
+{
+    char cond_name[16], addr_name[16];
+    uint32_t cc_code=0;
+    
+    sprintf(addr_name, "$%04x", dsp->disasm_cur_inst & BITMASK(12));
+    cc_code=(dsp->disasm_cur_inst>>12) & BITMASK(4);
+    dis_calc_cc(dsp, cc_code, cond_name);    
+
+    sprintf(dsp->disasm_str_instr,"j%s p:%s", cond_name, addr_name);
+}
+
+static void dis_jclr_aa(dsp_core_t* dsp)
+{
+    /* jclr #n,x:aa,p:xx */
+    /* jclr #n,y:aa,p:xx */
+    char srcname[16];
+    uint32_t memspace, value, numbit;
+    
+    dsp->disasm_cur_inst_len++;
+
+    memspace = (dsp->disasm_cur_inst>>6) & 1;
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    if (memspace) {
+        sprintf(srcname, "y:$%04x", value);
+    } else {
+        sprintf(srcname, "x:$%04x", value);
+    }
+
+    sprintf(dsp->disasm_str_instr,"jclr #%d,%s,p:$%04x",
+        numbit,
+        srcname,
+        read_memory_p(dsp, dsp->pc+1)
+    );
+}
+
+static void dis_jclr_ea(dsp_core_t* dsp)
+{
+    /* jclr #n,x:ea,p:xx */
+    /* jclr #n,y:ea,p:xx */
+    char srcname[16], addr_name[16];
+    uint32_t memspace, value, numbit;
+    
+    dsp->disasm_cur_inst_len++;
+
+    memspace = (dsp->disasm_cur_inst>>6) & 1;
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    dis_calc_ea(dsp, value, addr_name);
+    if (memspace) {
+        sprintf(srcname, "y:%s", addr_name);
+    } else {
+        sprintf(srcname, "x:%s", addr_name);
+    }
+
+    sprintf(dsp->disasm_str_instr,"jclr #%d,%s,p:$%04x",
+        numbit,
+        srcname,
+        read_memory_p(dsp, dsp->pc+1)
+    );
+}
+
+static void dis_jclr_pp(dsp_core_t* dsp)
+{
+    /* jclr #n,x:pp,p:xx */
+    /* jclr #n,y:pp,p:xx */
+    char srcname[16];
+    uint32_t memspace, value, numbit;
+    
+    dsp->disasm_cur_inst_len++;
+
+    memspace = (dsp->disasm_cur_inst>>6) & 1;
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    value += 0xffffc0;
+    if (memspace) {
+        sprintf(srcname, "y:$%06x", value);
+    } else {
+        sprintf(srcname, "x:$%06x", value);
+    }
+
+    sprintf(dsp->disasm_str_instr,"jclr #%d,%s,p:$%04x",
+        numbit,
+        srcname,
+        read_memory_p(dsp, dsp->pc+1)
+    );
+}
+
+static void dis_jclr_reg(dsp_core_t* dsp)
+{
+    /* jclr #n,R,p:xx */
+    uint32_t value, numbit;
+    
+    dsp->disasm_cur_inst_len++;
+
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    sprintf(dsp->disasm_str_instr,"jclr #%d,%s,p:$%04x",
+        numbit,
+        registers_name[value],
+        read_memory_p(dsp, dsp->pc+1)
+    );
+}
+
+static void dis_jmp_imm(dsp_core_t* dsp)
+{
+    sprintf(dsp->disasm_str_instr,"jmp p:$%04x", dsp->disasm_cur_inst & BITMASK(12));
+}
+
+static void dis_jmp_ea(dsp_core_t* dsp)
+{
+    char dstname[16];
+
+    dis_calc_ea(dsp, (dsp->disasm_cur_inst >>8) & BITMASK(6), dstname);
+
+    sprintf(dsp->disasm_str_instr,"jmp p:%s", dstname);
+}
+
+static void dis_jscc_ea(dsp_core_t* dsp)
+{
+    char cond_name[16], addr_name[16];
+    uint32_t cc_code=0;
+    
+    dis_calc_ea(dsp, (dsp->disasm_cur_inst>>8) & BITMASK(6), addr_name);
+    cc_code=dsp->disasm_cur_inst & BITMASK(4);
+    dis_calc_cc(dsp, cc_code, cond_name);    
+
+    sprintf(dsp->disasm_str_instr,"js%s p:%s", cond_name, addr_name);
+}
+    
+static void dis_jscc_imm(dsp_core_t* dsp)
+{
+    char cond_name[16], addr_name[16];
+    uint32_t cc_code=0;
+    
+    sprintf(addr_name, "$%04x", dsp->disasm_cur_inst & BITMASK(12));
+    cc_code=(dsp->disasm_cur_inst>>12) & BITMASK(4);
+    dis_calc_cc(dsp, cc_code, cond_name);    
+
+    sprintf(dsp->disasm_str_instr,"js%s p:%s", cond_name, addr_name);
+}
+
+static void dis_jsclr_aa(dsp_core_t* dsp)
+{
+    /* jsclr #n,x:aa,p:xx */
+    /* jsclr #n,y:aa,p:xx */
+    char srcname[16];
+    uint32_t memspace, value, numbit;
+    
+    dsp->disasm_cur_inst_len++;
+
+    memspace = (dsp->disasm_cur_inst>>6) & 1;
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    if (memspace) {
+        sprintf(srcname, "y:$%04x", value);
+    } else {
+        sprintf(srcname, "x:$%04x", value);
+    }
+
+    sprintf(dsp->disasm_str_instr,"jsclr #%d,%s,p:$%04x",
+        numbit,
+        srcname,
+        read_memory_p(dsp, dsp->pc+1)
+    );
+}
+
+static void dis_jsclr_ea(dsp_core_t* dsp)
+{
+    /* jsclr #n,x:ea,p:xx */
+    /* jsclr #n,y:ea,p:xx */
+    char srcname[16], addr_name[16];
+    uint32_t memspace, value, numbit;
+    
+    dsp->disasm_cur_inst_len++;
+
+    memspace = (dsp->disasm_cur_inst>>6) & 1;
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    dis_calc_ea(dsp, value, addr_name);
+    if (memspace) {
+        sprintf(srcname, "y:%s", addr_name);
+    } else {
+        sprintf(srcname, "x:%s", addr_name);
+    }
+
+    sprintf(dsp->disasm_str_instr,"jsclr #%d,%s,p:$%04x",
+        numbit,
+        srcname,
+        read_memory_p(dsp, dsp->pc+1)
+    );
+}
+
+static void dis_jsclr_pp(dsp_core_t* dsp)
+{
+    /* jsclr #n,x:pp,p:xx */
+    /* jsclr #n,y:pp,p:xx */
+    char srcname[16];
+    uint32_t memspace, value, numbit;
+    
+    dsp->disasm_cur_inst_len++;
+
+    memspace = (dsp->disasm_cur_inst>>6) & 1;
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    value += 0xffffc0;
+    if (memspace) {
+        sprintf(srcname, "y:$%06x", value);
+    } else {
+        sprintf(srcname, "x:$%06x", value);
+    }
+
+    sprintf(dsp->disasm_str_instr,"jsclr #%d,%s,p:$%04x",
+        numbit,
+        srcname,
+        read_memory_p(dsp, dsp->pc+1)
+    );
+}
+
+static void dis_jsclr_reg(dsp_core_t* dsp)
+{
+    /* jsclr #n,R,p:xx */
+    uint32_t value, numbit;
+    
+    dsp->disasm_cur_inst_len++;
+
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    sprintf(dsp->disasm_str_instr,"jsclr #%d,%s,p:$%04x",
+        numbit,
+        registers_name[value],
+        read_memory_p(dsp, dsp->pc+1)
+    );
+}
+
+static void dis_jset_aa(dsp_core_t* dsp)
+{
+    /* jset #n,x:aa,p:xx */
+    /* jset #n,y:aa,p:xx */
+    char srcname[16];
+    uint32_t memspace, value, numbit;
+    
+    dsp->disasm_cur_inst_len++;
+
+    memspace = (dsp->disasm_cur_inst>>6) & 1;
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    if (memspace) {
+        sprintf(srcname, "y:$%04x", value);
+    } else {
+        sprintf(srcname, "x:$%04x", value);
+    }
+
+    sprintf(dsp->disasm_str_instr,"jset #%d,%s,p:$%04x",
+        numbit,
+        srcname,
+        read_memory_p(dsp, dsp->pc+1)
+    );
+}
+
+static void dis_jset_ea(dsp_core_t* dsp)
+{
+    /* jset #n,x:ea,p:xx */
+    /* jset #n,y:ea,p:xx */
+    char srcname[16], addr_name[16];
+    uint32_t memspace, value, numbit;
+    
+    dsp->disasm_cur_inst_len++;
+
+    memspace = (dsp->disasm_cur_inst>>6) & 1;
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    dis_calc_ea(dsp, value, addr_name);
+    if (memspace) {
+        sprintf(srcname, "y:%s", addr_name);
+    } else {
+        sprintf(srcname, "x:%s", addr_name);
+    }
+
+    sprintf(dsp->disasm_str_instr,"jset #%d,%s,p:$%04x",
+        numbit,
+        srcname,
+        read_memory_p(dsp, dsp->pc+1)
+    );
+}
+
+static void dis_jset_pp(dsp_core_t* dsp)
+{
+    /* jset #n,x:pp,p:xx */
+    /* jset #n,y:pp,p:xx */
+    char srcname[16];
+    uint32_t memspace, value, numbit;
+    
+    dsp->disasm_cur_inst_len++;
+
+    memspace = (dsp->disasm_cur_inst>>6) & 1;
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    value += 0xffffc0;
+    if (memspace) {
+        sprintf(srcname, "y:$%06x", value);
+    } else {
+        sprintf(srcname, "x:$%06x", value);
+    }
+
+    sprintf(dsp->disasm_str_instr,"jset #%d,%s,p:$%04x",
+        numbit,
+        srcname,
+        read_memory_p(dsp, dsp->pc+1)
+    );
+}
+
+static void dis_jset_reg(dsp_core_t* dsp)
+{
+    /* jset #n,R,p:xx */
+    uint32_t value, numbit;
+    
+    dsp->disasm_cur_inst_len++;
+
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    sprintf(dsp->disasm_str_instr,"jset #%d,%s,p:$%04x",
+        numbit,
+        registers_name[value],
+        read_memory_p(dsp, dsp->pc+1)
+    );
+}
+
+static void dis_jsr_imm(dsp_core_t* dsp)
+{
+    sprintf(dsp->disasm_str_instr,"jsr p:$%04x", dsp->disasm_cur_inst & BITMASK(12));
+}
+
+static void dis_jsr_ea(dsp_core_t* dsp)
+{
+    char dstname[16];
+
+    dis_calc_ea(dsp, (dsp->disasm_cur_inst>>8) & BITMASK(6),dstname);
+
+    sprintf(dsp->disasm_str_instr,"jsr p:%s", dstname);
+}
+
+static void dis_jsset_aa(dsp_core_t* dsp)
+{
+    /* jsset #n,x:aa,p:xx */
+    /* jsset #n,y:aa,p:xx */
+    char srcname[16];
+    uint32_t memspace, value, numbit;
+    
+    dsp->disasm_cur_inst_len++;
+
+    memspace = (dsp->disasm_cur_inst>>6) & 1;
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    if (memspace) {
+        sprintf(srcname, "y:$%04x", value);
+    } else {
+        sprintf(srcname, "x:$%04x", value);
+    }
+
+    sprintf(dsp->disasm_str_instr,"jsset #%d,%s,p:$%04x",
+        numbit,
+        srcname,
+        read_memory_p(dsp, dsp->pc+1)
+    );
+}
+
+static void dis_jsset_ea(dsp_core_t* dsp)
+{
+    /* jsset #n,x:ea,p:xx */
+    /* jsset #n,y:ea,p:xx */
+    char srcname[16], addr_name[16];
+    uint32_t memspace, value, numbit;
+    
+    dsp->disasm_cur_inst_len++;
+
+    memspace = (dsp->disasm_cur_inst>>6) & 1;
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    dis_calc_ea(dsp, value, addr_name);
+    if (memspace) {
+        sprintf(srcname, "y:%s", addr_name);
+    } else {
+        sprintf(srcname, "x:%s", addr_name);
+    }
+
+    sprintf(dsp->disasm_str_instr,"jsset #%d,%s,p:$%04x",
+        numbit,
+        srcname,
+        read_memory_p(dsp, dsp->pc+1)
+    );
+}
+
+static void dis_jsset_pp(dsp_core_t* dsp)
+{
+    /* jsset #n,x:pp,p:xx */
+    /* jsset #n,y:pp,p:xx */
+    char srcname[16];
+    uint32_t memspace, value, numbit;
+    
+    dsp->disasm_cur_inst_len++;
+
+    memspace = (dsp->disasm_cur_inst>>6) & 1;
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    value += 0xffffc0;
+    if (memspace) {
+        sprintf(srcname, "y:$%06x", value);
+    } else {
+        sprintf(srcname, "x:$%06x", value);
+    }
+
+    sprintf(dsp->disasm_str_instr,"jsset #%d,%s,p:$%04x",
+        numbit,
+        srcname,
+        read_memory_p(dsp, dsp->pc+1)
+    );
+}
+
+static void dis_jsset_reg(dsp_core_t* dsp)
+{
+    /* jsset #n,r,p:xx */
+    uint32_t value, numbit;
+    
+    dsp->disasm_cur_inst_len++;
+
+    value = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numbit = dsp->disasm_cur_inst & BITMASK(5);
+
+    sprintf(dsp->disasm_str_instr,"jsset #%d,%s,p:$%04x",
+        numbit,
+        registers_name[value],
+        read_memory_p(dsp, dsp->pc+1)
+    );
+}
+
+static void dis_lua(dsp_core_t* dsp)
+{
+    char addr_name[16];
+
+    dis_calc_ea(dsp, (dsp->disasm_cur_inst>>8) & BITMASK(5), addr_name);
+    uint32_t numreg = dsp->disasm_cur_inst & BITMASK(4);
+    
+    sprintf(dsp->disasm_str_instr,"lua %s,%s", addr_name, registers_name[numreg]);
+}
+
+static void dis_lua_rel(dsp_core_t* dsp)
+{
+    uint32_t aa = ((dsp->disasm_cur_inst >> 4) & BITMASK(4))
+                + (((dsp->disasm_cur_inst >> 11) & BITMASK(3)) << 4);
+    uint32_t addrreg = (dsp->disasm_cur_inst >> 8) & BITMASK(3);
+    uint32_t dstreg = (dsp->disasm_cur_inst & BITMASK(3));
+
+    int32_t aa_s = (int32_t)dsp_signextend(7, aa);
+
+    if (dsp->disasm_cur_inst & (1<<3)) {
+        sprintf(dsp->disasm_str_instr,"lua (r%d + %d),n%d",
+            addrreg, aa_s, dstreg);
+    } else {
+        sprintf(dsp->disasm_str_instr,"lua (r%d + %d),r%d",
+            addrreg, aa_s, dstreg);
+    }
+
+}
+
+static void dis_movec_reg(dsp_core_t* dsp)
+{
+    uint32_t numreg1, numreg2;
+
+    /* S1,D2 */
+    /* S2,D1 */
+
+    numreg2 = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    numreg1 = dsp->disasm_cur_inst & BITMASK(6);
+
+    if (dsp->disasm_cur_inst & (1<<15)) {
+        /* Write D1 */
+        sprintf(dsp->disasm_str_instr,"movec %s,%s", registers_name[numreg2], registers_name[numreg1]);
+    } else {
+        /* Read S1 */
+        sprintf(dsp->disasm_str_instr,"movec %s,%s", registers_name[numreg1], registers_name[numreg2]);
+    }
+}
+
+static void dis_movec_aa(dsp_core_t* dsp)
+{
+    const char *spacename;
+    char srcname[16],dstname[16];
+    uint32_t numreg, addr;
+
+    /* x:aa,D1 */
+    /* S1,x:aa */
+    /* y:aa,D1 */
+    /* S1,y:aa */
+
+    numreg = dsp->disasm_cur_inst & BITMASK(6);
+    addr = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+
+    if (dsp->disasm_cur_inst & (1<<6)) {
+        spacename="y";
+    } else {
+        spacename="x";
+    }
+
+    if (dsp->disasm_cur_inst & (1<<15)) {
+        /* Write D1 */
+        sprintf(srcname, "%s:$%04x", spacename, addr);
+        strcpy(dstname, registers_name[numreg]);
+    } else {
+        /* Read S1 */
+        strcpy(srcname, registers_name[numreg]);
+        sprintf(dstname, "%s:$%04x", spacename, addr);
+    }
+
+    sprintf(dsp->disasm_str_instr,"movec %s,%s", srcname, dstname);
+}
+
+static void dis_movec_imm(dsp_core_t* dsp)
+{
+    uint32_t numreg;
+
+    /* #xx,D1 */
+
+    numreg = dsp->disasm_cur_inst & BITMASK(6);
+
+    sprintf(dsp->disasm_str_instr,"movec #$%02x,%s", (dsp->disasm_cur_inst>>8) & BITMASK(8), registers_name[numreg]);
+}
+
+static void dis_movec_ea(dsp_core_t* dsp)
+{
+    const char *spacename;
+    char srcname[16], dstname[16], addr_name[16];
+    uint32_t numreg, ea_mode;
+    int retour;
+
+    /* x:ea,D1 */
+    /* S1,x:ea */
+    /* y:ea,D1 */
+    /* S1,y:ea */
+    /* #xxxx,D1 */
+
+    numreg = dsp->disasm_cur_inst & BITMASK(6);
+    ea_mode = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    retour = dis_calc_ea(dsp, ea_mode, addr_name);
+
+    if (dsp->disasm_cur_inst & (1<<6)) {
+        spacename="y";
+    } else {
+        spacename="x";
+    }
+
+    if (dsp->disasm_cur_inst & (1<<15)) {
+        /* Write D1 */
+        if (retour) {
+            sprintf(srcname, "#%s", addr_name);
+        } else {
+            sprintf(srcname, "%s:%s", spacename, addr_name);
+        }
+        strcpy(dstname, registers_name[numreg]);
+    } else {
+        /* Read S1 */
+        strcpy(srcname, registers_name[numreg]);
+        sprintf(dstname, "%s:%s", spacename, addr_name);
+    }
+
+    sprintf(dsp->disasm_str_instr,"movec %s,%s", srcname, dstname);
+}
+
+static void dis_movem_aa(dsp_core_t* dsp)
+{
+    /* S,p:aa */
+    /* p:aa,D */
+    char addr_name[16], srcname[16], dstname[16];
+    uint32_t numreg;
+
+    sprintf(addr_name, "$%04x",(dsp->disasm_cur_inst>>8) & BITMASK(6));
+    numreg = dsp->disasm_cur_inst & BITMASK(6);
+    if  (dsp->disasm_cur_inst & (1<<15)) {
+        /* Write D */
+        sprintf(srcname, "p:%s", addr_name);
+        strcpy(dstname, registers_name[numreg]);
+    } else {
+        /* Read S */
+        strcpy(srcname, registers_name[numreg]);
+        sprintf(dstname, "p:%s", addr_name);
+    }
+
+    sprintf(dsp->disasm_str_instr,"movem %s,%s", srcname, dstname);
+}
+
+static void dis_movem_ea(dsp_core_t* dsp)
+{
+    /* S,p:ea */
+    /* p:ea,D */
+    char addr_name[16], srcname[16], dstname[16];
+    uint32_t ea_mode, numreg;
+
+    ea_mode = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    dis_calc_ea(dsp, ea_mode, addr_name);
+    numreg = dsp->disasm_cur_inst & BITMASK(6);
+    if  (dsp->disasm_cur_inst & (1<<15)) {
+        /* Write D */
+        sprintf(srcname, "p:%s", addr_name);
+        strcpy(dstname, registers_name[numreg]);
+    } else {
+        /* Read S */
+        strcpy(srcname, registers_name[numreg]);
+        sprintf(dstname, "p:%s", addr_name);
+    }
+
+    sprintf(dsp->disasm_str_instr,"movem %s,%s", srcname, dstname);
+}
+
+static void dis_movep_0(dsp_core_t* dsp)
+{
+    char srcname[16]="",dstname[16]="";
+    uint32_t addr, memspace, numreg;
+
+    /* S,x:pp */
+    /* x:pp,D */
+    /* S,y:pp */
+    /* y:pp,D */
+
+    addr = 0xffffc0 + (dsp->disasm_cur_inst & BITMASK(6));
+    memspace = (dsp->disasm_cur_inst>>16) & 1;
+    numreg = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+
+    if (dsp->disasm_cur_inst & (1<<15)) {
+        /* Write pp */
+
+        strcpy(srcname, registers_name[numreg]);
+
+        if (memspace) {
+            sprintf(dstname, "y:$%06x", addr);
+        } else {
+            sprintf(dstname, "x:$%06x", addr);
+        }
+    } else {
+        /* Read pp */
+
+        if (memspace) {
+            sprintf(srcname, "y:$%06x", addr);
+        } else {
+            sprintf(srcname, "x:$%06x", addr);
+        }
+
+        strcpy(dstname, registers_name[numreg]);
+    }
+
+    sprintf(dsp->disasm_str_instr,"movep %s,%s", srcname, dstname);
+}
+
+static void dis_movep_1(dsp_core_t* dsp)
+{
+    char srcname[16]="",dstname[16]="",name[16]="";
+    uint32_t addr, memspace; 
+
+    /* p:ea,x:pp */
+    /* x:pp,p:ea */
+    /* p:ea,y:pp */
+    /* y:pp,p:ea */
+
+    addr = 0xffffc0 + (dsp->disasm_cur_inst & BITMASK(6));
+    dis_calc_ea(dsp, (dsp->disasm_cur_inst>>8) & BITMASK(6), name);
+    memspace = (dsp->disasm_cur_inst>>16) & 1;
+
+    if (dsp->disasm_cur_inst & (1<<15)) {
+        /* Write pp */
+
+        sprintf(srcname, "p:%s", name);
+
+        if (memspace) {
+            sprintf(dstname, "y:$%06x", addr);
+        } else {
+            sprintf(dstname, "x:$%06x", addr);
+        }
+    } else {
+        /* Read pp */
+
+        if (memspace) {
+            sprintf(srcname, "y:$%06x", addr);
+        } else {
+            sprintf(srcname, "x:$%06x", addr);
+        }
+
+        sprintf(dstname, "p:%s", name);
+    }
+
+    sprintf(dsp->disasm_str_instr,"movep %s,%s", srcname, dstname);
+}
+
+static void dis_movep_23(dsp_core_t* dsp)
+{
+    char srcname[16]="",dstname[16]="",name[16]="";
+    uint32_t addr, memspace, easpace, retour; 
+
+    /* x:ea,x:pp */
+    /* y:ea,x:pp */
+    /* #xxxxxx,x:pp */
+    /* x:pp,x:ea */
+    /* x:pp,y:ea */
+
+    /* x:ea,y:pp */
+    /* y:ea,y:pp */
+    /* #xxxxxx,y:pp */
+    /* y:pp,y:ea */
+    /* y:pp,x:ea */
+
+    addr = 0xffffc0 + (dsp->disasm_cur_inst & BITMASK(6));
+    retour = dis_calc_ea(dsp, (dsp->disasm_cur_inst>>8) & BITMASK(6), name);
+    memspace = (dsp->disasm_cur_inst>>16) & 1;
+    easpace = (dsp->disasm_cur_inst>>6) & 1;
+
+    if (dsp->disasm_cur_inst & (1<<15)) {
+        /* Write pp */
+
+        if (retour) {
+            sprintf(srcname, "#%s", name);
+        } else {
+            if (easpace) {
+                sprintf(srcname, "y:%s", name);
+            } else {
+                sprintf(srcname, "x:%s", name);
+            }
+        }
+
+        if (memspace) {
+            sprintf(dstname, "y:$%06x", addr);
+        } else {
+            sprintf(dstname, "x:$%06x", addr);
+        }
+    } else {
+        /* Read pp */
+
+        if (memspace) {
+            sprintf(srcname, "y:$%06x", addr);
+        } else {
+            sprintf(srcname, "x:$%06x", addr);
+        }
+
+        if (easpace) {
+            sprintf(dstname, "y:%s", name);
+        } else {
+            sprintf(dstname, "x:%s", name);
+        }
+    }
+
+    sprintf(dsp->disasm_str_instr,"movep %s,%s", srcname, dstname);
+}
+
+static void dis_movep_x_qq(dsp_core_t* dsp) {
+    // 00000111W1MMMRRR0Sqqqqqq
+
+    char srcname[16]="",dstname[16]="",name[16]="";
+
+    uint32_t addr = 0xffff80 + (dsp->disasm_cur_inst & BITMASK(6));
+    uint32_t ea_mode = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+    uint32_t easpace = (dsp->disasm_cur_inst>>6) & 1;
+    int retour = dis_calc_ea(dsp, ea_mode, name);
+
+    if (dsp->disasm_cur_inst & (1<<15)) {
+        /* Write qq */
+
+        if (retour) {
+            sprintf(srcname, "#%s", name);
+        } else {
+            if (easpace) {
+                sprintf(srcname, "y:%s", name);
+            } else {
+                sprintf(srcname, "x:%s", name);
+            }
+        }
+
+        sprintf(dstname, "x:$%04x", addr);
+    } else {
+        /* Read qq */
+
+        sprintf(srcname, "x:$%04x", addr);
+
+        if (easpace) {
+            sprintf(dstname, "y:%s", name);
+        } else {
+            sprintf(dstname, "x:%s", name);
+        }
+    }
+
+    sprintf(dsp->disasm_str_instr,"movep %s,%s", srcname, dstname);
+}
+
+
+static void dis_move_x_long(dsp_core_t* dsp) {
+    dsp->disasm_cur_inst_len++;
+
+    int W = (dsp->disasm_cur_inst >> 6) & 1;
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    uint32_t offreg = DSP_REG_R0 + ((dsp->disasm_cur_inst >> 8) & BITMASK(3));
+    uint32_t numreg = dsp->disasm_cur_inst & BITMASK(6);
+
+    int32_t xxxx_s = dsp_signextend(24, xxxx);
+    if (W) {
+        sprintf(dsp->disasm_str_instr, "move x:(%s + %d), %s",
+            registers_name[offreg], xxxx_s, registers_name[numreg]);
+    } else {
+        sprintf(dsp->disasm_str_instr, "move %s, x:(%s + %d)",
+            registers_name[numreg], registers_name[offreg], xxxx_s);
+    }
+}
+
+static void dis_move_xy_imm(dsp_core_t* dsp, int space)
+{
+    char space_c = space == DSP_SPACE_X ? 'x' : 'y';
+
+    int W = (dsp->disasm_cur_inst >> 4) & 1;
+    uint32_t xxx = (((dsp->disasm_cur_inst >> 11) & BITMASK(6)) << 1)
+             + ((dsp->disasm_cur_inst >> 6) & 1);
+    uint32_t offreg = DSP_REG_R0 + ((dsp->disasm_cur_inst >> 8) & BITMASK(3));
+    uint32_t numreg = dsp->disasm_cur_inst & BITMASK(4);
+   
+    int32_t xxx_s = dsp_signextend(7, xxx);
+    if (W) {
+        sprintf(dsp->disasm_str_instr, "move %c:(%s + %d), %s",
+            space_c, registers_name[offreg], xxx_s, registers_name[numreg]);
+    } else {
+        sprintf(dsp->disasm_str_instr, "move %s, %c:(%s + %d)",
+            registers_name[numreg], space_c, registers_name[offreg], xxx_s);
+    }
+}
+
+static void dis_move_x_imm(dsp_core_t* dsp)
+{
+    dis_move_xy_imm(dsp, DSP_SPACE_X);
+}
+
+static void dis_move_y_imm(dsp_core_t* dsp) {
+    dis_move_xy_imm(dsp, DSP_SPACE_Y);
+}
+
+static void dis_mpyi(dsp_core_t* dsp)
+{
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    dsp->disasm_cur_inst_len++;
+
+    uint32_t k = (dsp->disasm_cur_inst >> 2) & 1;
+    uint32_t d = (dsp->disasm_cur_inst >> 3) & 1;
+    uint32_t qq = (dsp->disasm_cur_inst >> 4) & BITMASK(2);
+
+    unsigned int srcreg = DSP_REG_NULL;
+    switch (qq) {
+    case 0: srcreg = DSP_REG_X0; break;
+    case 1: srcreg = DSP_REG_Y0; break;
+    case 2: srcreg = DSP_REG_X1; break;
+    case 3: srcreg = DSP_REG_Y1; break;
+    }
+
+    unsigned int destreg = d ? DSP_REG_B : DSP_REG_A;
+
+    sprintf(dsp->disasm_str_instr, "mpyi %s#$%06x,%s,%s",
+        k ? "-" : "+", xxxx,
+        registers_name[srcreg], registers_name[destreg]);
+}
+
+static void dis_norm(dsp_core_t* dsp)
+{
+    uint32_t srcreg, destreg;
+
+    srcreg = DSP_REG_R0+((dsp->disasm_cur_inst>>8) & BITMASK(3));
+    destreg = DSP_REG_A+((dsp->disasm_cur_inst>>3) & 1);
+
+    sprintf(dsp->disasm_str_instr,"norm %s,%s", registers_name[srcreg], registers_name[destreg]);
+}
+
+static void dis_or_long(dsp_core_t* dsp)
+{
+    dsp->disasm_cur_inst_len++;
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    uint32_t accname = ((dsp->disasm_cur_inst >> 3) & 1) ? DSP_REG_B : DSP_REG_A;
+    sprintf(dsp->disasm_str_instr, "or #$%04x,%s", xxxx, registers_name[accname]);
+}
+
+static void dis_ori(dsp_core_t* dsp)
+{
+    switch(dsp->disasm_cur_inst & BITMASK(2)) {
+        case 0:
+            sprintf(dsp->disasm_str_instr,"ori #$%02x,mr", (dsp->disasm_cur_inst>>8) & BITMASK(8));
+            break;
+        case 1:
+            sprintf(dsp->disasm_str_instr,"ori #$%02x,ccr", (dsp->disasm_cur_inst>>8) & BITMASK(8));
+            break;
+        case 2:
+            sprintf(dsp->disasm_str_instr,"ori #$%02x,omr", (dsp->disasm_cur_inst>>8) & BITMASK(8));
+            break;
+        default:
+            break;
+    }
+
+}
+
+static void dis_rep_aa(dsp_core_t* dsp)
+{
+    char name[16];
+
+    /* x:aa */
+    /* y:aa */
+
+    if (dsp->disasm_cur_inst & (1<<6)) {
+        sprintf(name, "y:$%04x",(dsp->disasm_cur_inst>>8) & BITMASK(6));
+    } else {
+        sprintf(name, "x:$%04x",(dsp->disasm_cur_inst>>8) & BITMASK(6));
+    }
+
+    sprintf(dsp->disasm_str_instr,"rep %s", name);
+}
+
+static void dis_rep_imm(dsp_core_t* dsp)
+{
+    /* #xxx */
+    sprintf(dsp->disasm_str_instr,"rep #$%02x", ((dsp->disasm_cur_inst>>8) & BITMASK(8))
+        + ((dsp->disasm_cur_inst & BITMASK(4))<<8));
+}
+
+static void dis_rep_ea(dsp_core_t* dsp)
+{
+    char name[16],addr_name[16];
+
+    /* x:ea */
+    /* y:ea */
+
+    dis_calc_ea(dsp, (dsp->disasm_cur_inst>>8) & BITMASK(6), addr_name);
+    if (dsp->disasm_cur_inst & (1<<6)) {
+        sprintf(name, "y:%s",addr_name);
+    } else {
+        sprintf(name, "x:%s",addr_name);
+    }
+
+    sprintf(dsp->disasm_str_instr,"rep %s", name);
+}
+
+static void dis_rep_reg(dsp_core_t* dsp)
+{
+    /* R */
+
+    sprintf(dsp->disasm_str_instr,"rep %s", registers_name[(dsp->disasm_cur_inst>>8) & BITMASK(6)]);
+}
+
+static void dis_sub_imm(dsp_core_t* dsp)
+{
+    uint32_t xx = (dsp->disasm_cur_inst >> 8) & BITMASK(6);
+    uint32_t d = (dsp->disasm_cur_inst >> 3) & 1;
+    sprintf(dsp->disasm_str_instr, "sub #$%02x,%s",
+        xx, registers_name[d ? DSP_REG_B : DSP_REG_A]);
+}
+
+static void dis_sub_long(dsp_core_t* dsp)
+{
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    dsp->disasm_cur_inst_len++;
+    uint32_t d = (dsp->disasm_cur_inst >> 3) & 1;
+    sprintf(dsp->disasm_str_instr, "sub #$%06x,%s",
+        xxxx, registers_name[d ? DSP_REG_B : DSP_REG_A]);
+}
+
+static void dis_tcc(dsp_core_t* dsp)
+{
+    char ccname[16];
+    uint32_t src1reg, dst1reg, src2reg, dst2reg;
+
+    dis_calc_cc(dsp, (dsp->disasm_cur_inst>>12) & BITMASK(4), ccname);
+    src1reg = registers_tcc[(dsp->disasm_cur_inst>>3) & BITMASK(4)][0];
+    dst1reg = registers_tcc[(dsp->disasm_cur_inst>>3) & BITMASK(4)][1];
+
+    if (dsp->disasm_cur_inst & (1<<16)) {
+        src2reg = DSP_REG_R0+((dsp->disasm_cur_inst>>8) & BITMASK(3));
+        dst2reg = DSP_REG_R0+(dsp->disasm_cur_inst & BITMASK(3));
+
+        sprintf(dsp->disasm_str_instr,"t%s %s,%s %s,%s",
+            ccname,
+            registers_name[src1reg],
+            registers_name[dst1reg],
+            registers_name[src2reg],
+            registers_name[dst2reg]
+        );
+    } else {
+        sprintf(dsp->disasm_str_instr,"t%s %s,%s",
+            ccname,
+            registers_name[src1reg],
+            registers_name[dst1reg]
+        );
+    }
+}
+
+
+/**********************************
+ *  Parallel moves
+ **********************************/
+
+static void dis_pm(dsp_core_t* dsp)
+{
+    uint32_t value = (dsp->disasm_cur_inst >> 20) & BITMASK(4);
+    disasm_opcodes_parmove[value](dsp);
+}
+
+static void dis_pm_0(dsp_core_t* dsp)
+{
+    char space_name[16], addr_name[16];
+    uint32_t memspace, numreg1, numreg2;
+/*
+    0000 100d 00mm mrrr S,x:ea  x0,D
+    0000 100d 10mm mrrr S,y:ea  y0,D
+*/
+    memspace = (dsp->disasm_cur_inst>>15) & 1;
+    numreg1 = DSP_REG_A+((dsp->disasm_cur_inst>>16) & 1);
+    dis_calc_ea(dsp, (dsp->disasm_cur_inst>>8) & BITMASK(6), addr_name);
+
+    if (memspace) {
+        strcpy(space_name,"y");
+        numreg2 = DSP_REG_Y0;
+    } else {
+        strcpy(space_name,"x");
+        numreg2 = DSP_REG_X0;
+    }
+
+    sprintf(dsp->disasm_parallelmove_name,
+        "%s,%s:%s %s,%s",
+        registers_name[numreg1],
+        space_name,
+        addr_name,
+        registers_name[numreg2],
+        registers_name[numreg1]
+    );
+}
+
+static void dis_pm_1(dsp_core_t* dsp)
+{
+/*
+    0001 ffdf w0mm mrrr x:ea,D1     S2,D2
+                        S1,x:ea     S2,D2
+                        #xxxxxx,D1  S2,D2
+    0001 deff w1mm mrrr S1,D1       y:ea,D2
+                        S1,D1       S2,y:ea
+                        S1,D1       #xxxxxx,D2
+*/
+
+    char addr_name[16];
+    uint32_t memspace, write_flag, retour, s1reg, s2reg, d1reg, d2reg;
+
+    memspace = (dsp->disasm_cur_inst>>14) & 1;
+    write_flag = (dsp->disasm_cur_inst>>15) & 1;
+    retour = dis_calc_ea(dsp, (dsp->disasm_cur_inst>>8) & BITMASK(6), addr_name);
+
+    if (memspace==DSP_SPACE_Y) {
+        s2reg = d2reg = DSP_REG_Y0;
+        switch((dsp->disasm_cur_inst>>16) & BITMASK(2)) {
+            case 0: s2reg = d2reg = DSP_REG_Y0; break;
+            case 1: s2reg = d2reg = DSP_REG_Y1; break;
+            case 2: s2reg = d2reg = DSP_REG_A;  break;
+            case 3: s2reg = d2reg = DSP_REG_B;  break;
+        }
+
+        s1reg = DSP_REG_A+((dsp->disasm_cur_inst>>19) & 1);
+        d1reg = DSP_REG_X0+((dsp->disasm_cur_inst>>18) & 1);
+
+        if (write_flag) {
+            /* Write D2 */
+
+            if (retour) {
+                sprintf(dsp->disasm_parallelmove_name,"%s,%s #%s,%s",
+                    registers_name[s1reg],
+                    registers_name[d1reg],
+                    addr_name,
+                    registers_name[d2reg]
+                );
+            } else {
+                sprintf(dsp->disasm_parallelmove_name,"%s,%s y:%s,%s",
+                    registers_name[s1reg],
+                    registers_name[d1reg],
+                    addr_name,
+                    registers_name[d2reg]
+                );
+            }
+        } else {
+            /* Read S2 */
+            sprintf(dsp->disasm_parallelmove_name,"%s,%s %s,y:%s",
+                registers_name[s1reg],
+                registers_name[d1reg],
+                registers_name[s2reg],
+                addr_name
+            );
+        }       
+
+    } else {
+        s1reg = d1reg = DSP_REG_X0;
+        switch((dsp->disasm_cur_inst>>18) & BITMASK(2)) {
+            case 0: s1reg = d1reg = DSP_REG_X0; break;
+            case 1: s1reg = d1reg = DSP_REG_X1; break;
+            case 2: s1reg = d1reg = DSP_REG_A;  break;
+            case 3: s1reg = d1reg = DSP_REG_B;  break;
+        }
+
+        s2reg = DSP_REG_A+((dsp->disasm_cur_inst>>17) & 1);
+        d2reg = DSP_REG_Y0+((dsp->disasm_cur_inst>>16) & 1);
+
+        if (write_flag) {
+            /* Write D1 */
+
+            if (retour) {
+                sprintf(dsp->disasm_parallelmove_name,"#%s,%s %s,%s",
+                    addr_name,
+                    registers_name[d1reg],
+                    registers_name[s2reg],
+                    registers_name[d2reg]
+                );
+            } else {
+                sprintf(dsp->disasm_parallelmove_name,"x:%s,%s %s,%s",
+                    addr_name,
+                    registers_name[d1reg],
+                    registers_name[s2reg],
+                    registers_name[d2reg]
+                );
+            }
+        } else {
+            /* Read S1 */
+            sprintf(dsp->disasm_parallelmove_name,"%s,x:%s %s,%s",
+                registers_name[s1reg],
+                addr_name,
+                registers_name[s2reg],
+                registers_name[d2reg]
+            );
+        }       
+    
+    }
+}
+
+static void dis_pm_2(dsp_core_t* dsp)
+{
+    char addr_name[16];
+    uint32_t numreg1, numreg2;
+/*
+    0010 0000 0000 0000 nop
+    0010 0000 010m mrrr R update
+    0010 00ee eeed dddd S,D
+    001d dddd iiii iiii #xx,D
+*/
+    if (((dsp->disasm_cur_inst >> 8) & 0xffff) == 0x2000) {
+        return;
+    }
+
+    if (((dsp->disasm_cur_inst >> 8) & 0xffe0) == 0x2040) {
+        dis_calc_ea(dsp, (dsp->disasm_cur_inst>>8) & BITMASK(5), addr_name);
+        sprintf(dsp->disasm_parallelmove_name, "%s,r%d",addr_name, (dsp->disasm_cur_inst>>8) & BITMASK(3));
+        return;
+    }
+
+    if (((dsp->disasm_cur_inst >> 8) & 0xfc00) == 0x2000) {
+        numreg1 = (dsp->disasm_cur_inst>>13) & BITMASK(5);
+        numreg2 = (dsp->disasm_cur_inst>>8) & BITMASK(5);
+        sprintf(dsp->disasm_parallelmove_name, "%s,%s", registers_name[numreg1], registers_name[numreg2]); 
+        return;
+    }
+
+    numreg1 = (dsp->disasm_cur_inst>>16) & BITMASK(5);
+    sprintf(dsp->disasm_parallelmove_name, "#$%02x,%s", (dsp->disasm_cur_inst >> 8) & BITMASK(8), registers_name[numreg1]);
+}
+
+static void dis_pm_4(dsp_core_t* dsp)
+{
+    char addr_name[16];
+    uint32_t value, retour, ea_mode, memspace;
+/*
+    0100 l0ll w0aa aaaa l:aa,D
+                        S,l:aa
+    0100 l0ll w1mm mrrr l:ea,D
+                        S,l:ea
+    01dd 0ddd w0aa aaaa x:aa,D
+                        S,x:aa
+    01dd 0ddd w1mm mrrr x:ea,D
+                        S,x:ea
+                        #xxxxxx,D
+    01dd 1ddd w0aa aaaa y:aa,D
+                        S,y:aa
+    01dd 1ddd w1mm mrrr y:ea,D
+                        S,y:ea
+                        #xxxxxx,D
+*/
+    value = (dsp->disasm_cur_inst>>16) & BITMASK(3);
+    value |= (dsp->disasm_cur_inst>>17) & (BITMASK(2)<<3);
+
+    ea_mode = (dsp->disasm_cur_inst>>8) & BITMASK(6);
+
+    if ((value>>2)==0) {
+        /* L: memory move */
+        if (dsp->disasm_cur_inst & (1<<14)) {
+            retour = dis_calc_ea(dsp, ea_mode, addr_name);   
+        } else {
+            sprintf(addr_name,"$%04x", ea_mode);
+            retour = 0;
+        }
+
+        value = (dsp->disasm_cur_inst>>16) & BITMASK(2);
+        value |= (dsp->disasm_cur_inst>>17) & (1<<2);
+
+        if (dsp->disasm_cur_inst & (1<<15)) {
+            /* Write D */
+
+            if (retour) {
+                sprintf(dsp->disasm_parallelmove_name, "#%s,%s", addr_name, registers_lmove[value]);
+            } else {
+                sprintf(dsp->disasm_parallelmove_name, "l:%s,%s", addr_name, registers_lmove[value]);
+            }
+        } else {
+            /* Read S */
+            sprintf(dsp->disasm_parallelmove_name, "%s,l:%s", registers_lmove[value], addr_name);
+        }
+
+        return;
+    }
+
+    memspace = (dsp->disasm_cur_inst>>19) & 1;
+    if (dsp->disasm_cur_inst & (1<<14)) {
+        retour = dis_calc_ea(dsp, ea_mode, addr_name);   
+    } else {
+        sprintf(addr_name,"$%04x", ea_mode);
+        retour = 0;
+    }
+
+    if (memspace) {
+        /* Y: */
+
+        if (dsp->disasm_cur_inst & (1<<15)) {
+            /* Write D */
+
+            if (retour) {
+                sprintf(dsp->disasm_parallelmove_name, "#%s,%s", addr_name, registers_name[value]);
+            } else {
+                sprintf(dsp->disasm_parallelmove_name, "y:%s,%s", addr_name, registers_name[value]);
+            }
+
+        } else {
+            /* Read S */
+            sprintf(dsp->disasm_parallelmove_name, "%s,y:%s", registers_name[value], addr_name);
+        }
+    } else {
+        /* X: */
+
+        if (dsp->disasm_cur_inst & (1<<15)) {
+            /* Write D */
+
+            if (retour) {
+                sprintf(dsp->disasm_parallelmove_name, "#%s,%s", addr_name, registers_name[value]);
+            } else {
+                sprintf(dsp->disasm_parallelmove_name, "x:%s,%s", addr_name, registers_name[value]);
+            }
+        } else {
+            /* Read S */
+            sprintf(dsp->disasm_parallelmove_name, "%s,x:%s", registers_name[value], addr_name);
+        }
+    }
+}
+
+static void dis_pm_8(dsp_core_t* dsp)
+{
+    char addr1_name[16], addr2_name[16];
+    uint32_t ea_mode1, ea_mode2, numreg1, numreg2;
+/*
+    1wmm eeff WrrM MRRR x:ea,D1     y:ea,D2 
+                        x:ea,D1     S2,y:ea
+                        S1,x:ea     y:ea,D2
+                        S1,x:ea     S2,y:ea
+*/
+    numreg1 = DSP_REG_X0;
+    switch((dsp->disasm_cur_inst>>18) & BITMASK(2)) {
+        case 0: numreg1 = DSP_REG_X0;   break;
+        case 1: numreg1 = DSP_REG_X1;   break;
+        case 2: numreg1 = DSP_REG_A;    break;
+        case 3: numreg1 = DSP_REG_B;    break;
+    }
+
+    numreg2 = DSP_REG_Y0;
+    switch((dsp->disasm_cur_inst>>16) & BITMASK(2)) {
+        case 0: numreg2 = DSP_REG_Y0;   break;
+        case 1: numreg2 = DSP_REG_Y1;   break;
+        case 2: numreg2 = DSP_REG_A;    break;
+        case 3: numreg2 = DSP_REG_B;    break;
+    }
+
+    ea_mode1 = (dsp->disasm_cur_inst>>8) & BITMASK(5);
+    if ((ea_mode1>>3) == 0) {
+        ea_mode1 |= (1<<5);
+    }
+    ea_mode2 = (dsp->disasm_cur_inst>>13) & BITMASK(2);
+    ea_mode2 |= ((dsp->disasm_cur_inst>>20) & BITMASK(2))<<3;
+    if ((ea_mode1 & (1<<2))==0) {
+        ea_mode2 |= 1<<2;
+    }
+    if ((ea_mode2>>3) == 0) {
+        ea_mode2 |= (1<<5);
+    }
+
+    dis_calc_ea(dsp, ea_mode1, addr1_name);
+    dis_calc_ea(dsp, ea_mode2, addr2_name);
+    
+    if (dsp->disasm_cur_inst & (1<<15)) {
+        if (dsp->disasm_cur_inst & (1<<22)) {
+            sprintf(dsp->disasm_parallelmove_name, "x:%s,%s y:%s,%s",
+                addr1_name,
+                registers_name[numreg1],
+                addr2_name,
+                registers_name[numreg2]
+            );
+        } else {
+            sprintf(dsp->disasm_parallelmove_name, "x:%s,%s %s,y:%s",
+                addr1_name,
+                registers_name[numreg1],
+                registers_name[numreg2],
+                addr2_name
+            );
+        }
+    } else {
+        if (dsp->disasm_cur_inst & (1<<22)) {
+            sprintf(dsp->disasm_parallelmove_name, "%s,x:%s y:%s,%s",
+                registers_name[numreg1],
+                addr1_name,
+                addr2_name,
+                registers_name[numreg2]
+            );
+        } else {
+            sprintf(dsp->disasm_parallelmove_name, "%s,x:%s %s,y:%s",
+                registers_name[numreg1],
+                addr1_name,
+                registers_name[numreg2],
+                addr2_name
+            );
+        }
+    }   
+}
diff --git a/hw/xbox/dsp/dsp_dma.c b/hw/xbox/dsp/dsp_dma.c
new file mode 100644
index 0000000000..667134ecb9
--- /dev/null
+++ b/hw/xbox/dsp/dsp_dma.c
@@ -0,0 +1,247 @@
+/*
+ * MCPX DSP DMA
+ *
+ * Copyright (c) 2015 espes
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <assert.h>
+
+#include "dsp_dma.h"
+
+#define DMA_CONFIGURATION_AUTOSTART (1 << 0)
+#define DMA_CONFIGURATION_AUTOREADY (1 << 1)
+#define DMA_CONFIGURATION_IOC_CLEAR (1 << 2)
+#define DMA_CONFIGURATION_EOL_CLEAR (1 << 3)
+#define DMA_CONFIGURATION_ERR_CLEAR (1 << 4)
+
+#define DMA_CONTROL_ACTION 0x7
+#define DMA_CONTROL_ACTION_NOP 0
+#define DMA_CONTROL_ACTION_START 1
+#define DMA_CONTROL_ACTION_STOP 2
+#define DMA_CONTROL_ACTION_FREEZE 3
+#define DMA_CONTROL_ACTION_UNFREEZE 4
+#define DMA_CONTROL_ACTION_ABORT 5
+#define DMA_CONTROL_FROZEN (1 << 3)
+#define DMA_CONTROL_RUNNING (1 << 4)
+#define DMA_CONTROL_STOPPED (1 << 5)
+
+#define NODE_POINTER_VAL 0x3fff
+#define NODE_POINTER_EOL (1 << 14)
+
+#define NODE_CONTROL_DIRECTION (1 << 1)
+
+
+// #define DEBUG
+#ifdef DEBUG
+# define DPRINTF(s, ...) printf(s, ## __VA_ARGS__)
+#else
+# define DPRINTF(s, ...) do { } while (0)
+#endif
+
+static void dsp_dma_run(DSPDMAState *s)
+{
+    if (!(s->control & DMA_CONTROL_RUNNING)
+        || (s->control & DMA_CONTROL_FROZEN)) {
+        return;
+    }
+    while (!(s->next_block & NODE_POINTER_EOL)) {
+        uint32_t addr = s->next_block & NODE_POINTER_VAL;
+        assert((addr+6) < sizeof(s->core->xram));
+
+        uint32_t next_block = dsp56k_read_memory(s->core, DSP_SPACE_X, addr);
+        uint32_t control = dsp56k_read_memory(s->core, DSP_SPACE_X, addr+1);
+        uint32_t count = dsp56k_read_memory(s->core, DSP_SPACE_X, addr+2);
+        uint32_t dsp_offset = dsp56k_read_memory(s->core, DSP_SPACE_X, addr+3);
+        uint32_t scratch_offset = dsp56k_read_memory(s->core, DSP_SPACE_X, addr+4);
+        uint32_t scratch_base = dsp56k_read_memory(s->core, DSP_SPACE_X, addr+5);
+        uint32_t scratch_size = dsp56k_read_memory(s->core, DSP_SPACE_X, addr+6)+1;
+
+        s->next_block = next_block;
+        if (s->next_block & NODE_POINTER_EOL) {
+            s->eol = true;
+        }
+
+
+        DPRINTF("\n\n\nDMA addr %x, control %x, count %x, "
+                 "dsp_offset %x, scratch_offset %x, base %x, size %x\n\n\n",
+                addr, control, count, dsp_offset,
+                scratch_offset, scratch_base, scratch_size);
+
+        uint32_t format = (control >> 10) & 7;
+        unsigned int item_size;
+        uint32_t item_mask = 0xffffffff;
+        switch(format) {
+        case 1:
+            item_size = 2;
+            break;
+        case 2: //big-endian?
+        case 6:
+            item_size = 4;
+            item_mask = 0x00FFFFFF;
+            break;
+        default:
+            fprintf(stderr, "Unknown dsp dma format: 0x%x\n", format);
+            assert(false);
+            break;
+        }
+
+        uint32_t buf_id = (control >> 5) & 0xf;
+
+        size_t scratch_addr;
+        if (buf_id == 0xe) { // 'circular'?
+            // assert(scratch_offset == 0);
+            // assert(scratch_offset + count * item_size < scratch_size);
+            if (scratch_offset + count * item_size >= scratch_size) {
+                // This happens during the startup sound effect.
+                // I think it might actually be a bug in the code...
+                DPRINTF("skipping bad dma...\n");
+                continue;
+            }
+            scratch_addr = scratch_base + scratch_offset; //??
+        } else {
+            // assert(buf_id == 0xf) // 'offset'
+            scratch_addr = scratch_offset;
+        }
+
+        uint32_t mem_address;
+        int mem_space;
+        if (dsp_offset < 0x1800) {
+            assert(dsp_offset+count < 0x1800);
+            mem_space = DSP_SPACE_X;
+            mem_address = dsp_offset;
+        } else if (dsp_offset >= 0x1800 && dsp_offset < 0x2000) { //?
+            assert(dsp_offset+count < 0x2000);
+            mem_space = DSP_SPACE_Y;
+            mem_address = dsp_offset - 0x1800;
+        } else if (dsp_offset >= 0x2800 && dsp_offset < 0x3800) { //?
+            assert(dsp_offset+count < 0x3800);
+            mem_space = DSP_SPACE_P;
+            mem_address = dsp_offset - 0x2800;
+        } else {
+            assert(false);
+        }
+
+        uint8_t* scratch_buf = calloc(count, item_size);
+
+        if (control & NODE_CONTROL_DIRECTION) {
+            int i;
+            for (i=0; i<count; i++) {
+                uint32_t v = dsp56k_read_memory(s->core,
+                    mem_space, mem_address+i);
+                switch(item_size) {
+                case 2:
+                    *(uint16_t*)(scratch_buf + i*2) = v;
+                    break;
+                case 4:
+                    *(uint32_t*)(scratch_buf + i*4) = v;
+                    break;
+                default:
+                    assert(false);
+                    break;
+                }
+            }
+
+            // write to scratch memory
+            s->scratch_rw(s->scratch_rw_opaque,
+                scratch_buf, scratch_addr, count*item_size, 1);
+        } else {
+            // read from scratch memory
+            s->scratch_rw(s->scratch_rw_opaque,
+                scratch_buf, scratch_addr, count*item_size, 0);
+
+            int i;
+            for (i=0; i<count; i++) {
+                uint32_t v;
+                switch(item_size) {
+                case 2:
+                    v = *(uint16_t*)(scratch_buf + i*2);
+                    break;
+                case 4:
+                    v = (*(uint32_t*)(scratch_buf + i*4)) & item_mask;
+                    break;
+                default:
+                    assert(false);
+                    break;
+                }
+                // DPRINTF("... %06x\n", v);
+                dsp56k_write_memory(s->core, mem_space, mem_address+i, v);
+            }
+        }
+
+        free(scratch_buf);
+
+    }
+}
+
+uint32_t dsp_dma_read(DSPDMAState *s, DSPDMARegister reg)
+{
+    switch (reg) {
+    case DMA_CONFIGURATION:
+        return s->configuration;
+    case DMA_CONTROL:
+        return s->control;
+    case DMA_START_BLOCK:
+        return s->start_block;
+    case DMA_NEXT_BLOCK:
+        return s->next_block;
+    default:
+        assert(false);
+    }
+    return 0;
+}
+
+void dsp_dma_write(DSPDMAState *s, DSPDMARegister reg, uint32_t v)
+{
+    switch (reg) {
+    case DMA_CONFIGURATION:
+        s->configuration = v;
+        break;
+    case DMA_CONTROL:
+        switch(v & DMA_CONTROL_ACTION) {
+        case DMA_CONTROL_ACTION_START:
+            s->control |= DMA_CONTROL_RUNNING;
+            s->control &= ~DMA_CONTROL_STOPPED;
+            break;
+        case DMA_CONTROL_ACTION_STOP:
+            s->control |= DMA_CONTROL_STOPPED;
+            s->control &= ~DMA_CONTROL_RUNNING;
+            break;
+        case DMA_CONTROL_ACTION_FREEZE:
+            s->control |= DMA_CONTROL_FROZEN;
+            break;
+        case DMA_CONTROL_ACTION_UNFREEZE:
+            s->control &= ~DMA_CONTROL_FROZEN;
+            break;
+        default:
+            assert(false);
+            break;
+        }
+        dsp_dma_run(s);
+        break;
+    case DMA_START_BLOCK:
+        s->start_block = v;
+        break;
+    case DMA_NEXT_BLOCK:
+        s->next_block = v;
+        break;
+    default:
+        assert(false);
+    }
+}
+
diff --git a/hw/xbox/dsp/dsp_dma.h b/hw/xbox/dsp/dsp_dma.h
new file mode 100644
index 0000000000..1a65e83e2d
--- /dev/null
+++ b/hw/xbox/dsp/dsp_dma.h
@@ -0,0 +1,54 @@
+/*
+ * MCPX DSP DMA
+ *
+ * Copyright (c) 2015 espes
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef DSP_DMA_H
+#define DSP_DMA_H
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "dsp.h"
+#include "dsp_cpu.h"
+
+typedef enum DSPDMARegister {
+    DMA_CONFIGURATION,
+    DMA_CONTROL,
+    DMA_START_BLOCK,
+    DMA_NEXT_BLOCK,
+} DSPDMARegister;
+
+typedef struct DSPDMAState {
+    dsp_core_t* core;
+
+    void* scratch_rw_opaque;
+    dsp_scratch_rw_func scratch_rw;
+
+    uint32_t configuration;
+    uint32_t control;
+    uint32_t start_block;
+    uint32_t next_block;
+
+    bool error;
+    bool eol;
+} DSPDMAState;
+
+uint32_t dsp_dma_read(DSPDMAState *s, DSPDMARegister reg);
+void dsp_dma_write(DSPDMAState *s, DSPDMARegister reg, uint32_t v);
+
+#endif
\ No newline at end of file
diff --git a/hw/xbox/dsp/dsp_emu.inl b/hw/xbox/dsp/dsp_emu.inl
new file mode 100644
index 0000000000..0889633703
--- /dev/null
+++ b/hw/xbox/dsp/dsp_emu.inl
@@ -0,0 +1,8043 @@
+/*
+ * DSP56300 instruction routines
+ *
+ * Copyright (c) 2015 espes
+ *
+ * Adapted from Hatari DSP M56001 emulation
+ * (C) 2003-2008 ARAnyM developer team
+ * Adaption to Hatari (C) 2008 by Thomas Huth
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+typedef void (*emu_func_t)(dsp_core_t* dsp);
+
+
+static void emu_undefined(dsp_core_t* dsp)
+{
+    if (!dsp->executing_for_disasm) {
+        dsp->cur_inst_len = 0;
+        printf("Dsp: 0x%04x: 0x%06x Illegal instruction\n",dsp->pc, dsp->cur_inst);
+        /* Add some artificial CPU cycles to avoid being stuck in an infinite loop */
+        dsp->instr_cycle += 100;
+    } else {
+        dsp->cur_inst_len = 1;
+        dsp->instr_cycle = 0;
+    }
+    if (dsp->exception_debugging) {
+        assert(false);
+    }
+}
+
+
+/**********************************
+ *  Effective address calculation
+ **********************************/
+
+
+static void emu_update_rn_bitreverse(dsp_core_t* dsp, uint32_t numreg)
+{
+    int revbits, i;
+    uint32_t value, r_reg;
+
+    /* Check how many bits to reverse */
+    value = dsp->registers[DSP_REG_N0+numreg];
+    for (revbits=0;revbits<16;revbits++) {
+        if (value & (1<<revbits)) {
+            break;
+        }
+    }   
+    revbits++;
+        
+    /* Reverse Rn bits */
+    r_reg = dsp->registers[DSP_REG_R0+numreg];
+    value = r_reg & (BITMASK(16)-BITMASK(revbits));
+    for (i=0;i<revbits;i++) {
+        if (r_reg & (1<<i)) {
+            value |= 1<<(revbits-i-1);
+        }
+    }
+
+    /* Increment */
+    value++;
+    value &= BITMASK(revbits);
+
+    /* Reverse Rn bits */
+    r_reg &= (BITMASK(16)-BITMASK(revbits));
+    r_reg |= value;
+
+    value = r_reg & (BITMASK(16)-BITMASK(revbits));
+    for (i=0;i<revbits;i++) {
+        if (r_reg & (1<<i)) {
+            value |= 1<<(revbits-i-1);
+        }
+    }
+
+    dsp->registers[DSP_REG_R0+numreg] = value;
+}
+
+static void emu_update_rn_modulo(dsp_core_t* dsp, uint32_t numreg, int16_t modifier)
+{
+    uint16_t bufsize, modulo, lobound, hibound, bufmask;
+    int16_t r_reg, orig_modifier=modifier;
+
+    modulo = dsp->registers[DSP_REG_M0+numreg]+1;
+    bufsize = 1;
+    bufmask = BITMASK(16);
+    while (bufsize < modulo) {
+        bufsize <<= 1;
+        bufmask <<= 1;
+    }
+    
+    lobound = dsp->registers[DSP_REG_R0+numreg] & bufmask;
+    hibound = lobound + modulo - 1;
+
+    r_reg = (int16_t) dsp->registers[DSP_REG_R0+numreg];
+
+    if (orig_modifier>modulo) {
+        while (modifier>bufsize) {
+            r_reg += bufsize;
+            modifier -= bufsize;
+        }
+        while (modifier<-bufsize) {
+            r_reg -= bufsize;
+            modifier += bufsize;
+        }
+    }
+
+    r_reg += modifier;
+
+    if (orig_modifier!=modulo) {
+        if (r_reg>hibound) {
+            r_reg -= modulo;
+        } else if (r_reg<lobound) {
+            r_reg += modulo;
+        }   
+    }
+
+    dsp->registers[DSP_REG_R0+numreg] = ((uint32_t) r_reg) & BITMASK(16);
+}
+
+static void emu_update_rn(dsp_core_t* dsp, uint32_t numreg, int16_t modifier)
+{
+    int16_t value;
+    uint16_t m_reg;
+
+    m_reg = (uint16_t) dsp->registers[DSP_REG_M0+numreg];
+    if (m_reg == 65535) {
+        /* Linear addressing mode */
+        value = (int16_t) dsp->registers[DSP_REG_R0+numreg];
+        value += modifier;
+        dsp->registers[DSP_REG_R0+numreg] = ((uint32_t) value) & BITMASK(16);
+    } else if (m_reg == 0) {
+        /* Bit reversed carry update */
+        emu_update_rn_bitreverse(dsp, numreg);
+    } else if (m_reg<=32767) {
+        /* Modulo update */
+        emu_update_rn_modulo(dsp, numreg, modifier);
+    } else {
+        /* Undefined */
+    }
+}
+
+static int emu_calc_ea(dsp_core_t* dsp, uint32_t ea_mode, uint32_t *dst_addr)
+{
+    uint32_t value, numreg, curreg;
+
+    value = (ea_mode >> 3) & BITMASK(3);
+    numreg = ea_mode & BITMASK(3);
+    switch (value) {
+        case 0:
+            /* (Rx)-Nx */
+            *dst_addr = dsp->registers[DSP_REG_R0+numreg];
+            emu_update_rn(dsp, numreg, -dsp->registers[DSP_REG_N0+numreg]);
+            break;
+        case 1:
+            /* (Rx)+Nx */
+            *dst_addr = dsp->registers[DSP_REG_R0+numreg];
+            emu_update_rn(dsp, numreg, dsp->registers[DSP_REG_N0+numreg]);
+            break;
+        case 2:
+            /* (Rx)- */
+            *dst_addr = dsp->registers[DSP_REG_R0+numreg];
+            emu_update_rn(dsp, numreg, -1);
+            break;
+        case 3:
+            /* (Rx)+ */
+            *dst_addr = dsp->registers[DSP_REG_R0+numreg];
+            emu_update_rn(dsp, numreg, +1);
+            break;
+        case 4:
+            /* (Rx) */
+            *dst_addr = dsp->registers[DSP_REG_R0+numreg];
+            break;
+        case 5:
+            /* (Rx+Nx) */
+            dsp->instr_cycle += 2;
+            curreg = dsp->registers[DSP_REG_R0+numreg];
+            emu_update_rn(dsp, numreg, dsp->registers[DSP_REG_N0+numreg]);
+            *dst_addr = dsp->registers[DSP_REG_R0+numreg];
+            dsp->registers[DSP_REG_R0+numreg] = curreg;
+            break;
+        case 6:
+            /* aa */
+            dsp->instr_cycle += 2;
+            *dst_addr = read_memory_p(dsp, dsp->pc+1);
+            dsp->cur_inst_len++;
+            if (numreg != 0) {
+                return 1; /* immediate value */
+            }
+            break;
+        case 7:
+            /* -(Rx) */
+            dsp->instr_cycle += 2;
+            emu_update_rn(dsp, numreg, -1);
+            *dst_addr = dsp->registers[DSP_REG_R0+numreg];
+            break;
+    }
+    /* address */
+    return 0;
+}
+
+/**********************************
+ *  Condition code test
+ **********************************/
+
+static int emu_calc_cc(dsp_core_t* dsp, uint32_t cc_code)
+{
+    uint16_t value1, value2, value3;
+
+    switch (cc_code) {
+        case 0:  /* CC (HS) */
+            value1 = dsp->registers[DSP_REG_SR] & (1<<DSP_SR_C);
+            return (value1==0);
+        case 1: /* GE */
+            value1 = (dsp->registers[DSP_REG_SR] >> DSP_SR_N) & 1;
+            value2 = (dsp->registers[DSP_REG_SR] >> DSP_SR_V) & 1;
+            return ((value1 ^ value2) == 0);
+        case 2: /* NE */
+            value1 = dsp->registers[DSP_REG_SR] & (1<<DSP_SR_Z);
+            return (value1==0);
+        case 3: /* PL */
+            value1 = dsp->registers[DSP_REG_SR] & (1<<DSP_SR_N);
+            return (value1==0);
+        case 4: /* NN */
+            value1 = (dsp->registers[DSP_REG_SR] >> DSP_SR_Z) & 1;
+            value2 = (~(dsp->registers[DSP_REG_SR] >> DSP_SR_U)) & 1;
+            value3 = (~(dsp->registers[DSP_REG_SR] >> DSP_SR_E)) & 1;
+            return ((value1 | (value2 & value3)) == 0);
+        case 5: /* EC */
+            value1 = dsp->registers[DSP_REG_SR] & (1<<DSP_SR_E);
+            return (value1==0);
+        case 6: /* LC */
+            value1 = dsp->registers[DSP_REG_SR] & (1<<DSP_SR_L);
+            return (value1==0);
+        case 7: /* GT */ 
+            value1 = (dsp->registers[DSP_REG_SR] >> DSP_SR_N) & 1;
+            value2 = (dsp->registers[DSP_REG_SR] >> DSP_SR_V) & 1;
+            value3 = (dsp->registers[DSP_REG_SR] >> DSP_SR_Z) & 1;
+            return ((value3 | (value1 ^ value2)) == 0);
+        case 8: /* CS (LO) */
+            value1 = dsp->registers[DSP_REG_SR] & (1<<DSP_SR_C);
+            return (value1==1);
+        case 9: /* LT */
+            value1 = (dsp->registers[DSP_REG_SR] >> DSP_SR_N) & 1;
+            value2 = (dsp->registers[DSP_REG_SR] >> DSP_SR_V) & 1;
+            return ((value1 ^ value2) == 1);
+        case 10: /* EQ */
+            value1 = (dsp->registers[DSP_REG_SR] >> DSP_SR_Z) & 1;
+            return (value1==1);
+        case 11: /* MI */
+            value1 = (dsp->registers[DSP_REG_SR] >> DSP_SR_N) & 1;
+            return (value1==1);
+        case 12: /* NR */
+            value1 = (dsp->registers[DSP_REG_SR] >> DSP_SR_Z) & 1;
+            value2 = (~(dsp->registers[DSP_REG_SR] >> DSP_SR_U)) & 1;
+            value3 = (~(dsp->registers[DSP_REG_SR] >> DSP_SR_E)) & 1;
+            return ((value1 | (value2 & value3)) == 1);
+        case 13: /* ES */
+            value1 = (dsp->registers[DSP_REG_SR] >> DSP_SR_E) & 1;
+            return (value1==1);
+        case 14: /* LS */
+            value1 = (dsp->registers[DSP_REG_SR] >> DSP_SR_L) & 1;
+            return (value1==1);
+        case 15: /* LE */
+            value1 = (dsp->registers[DSP_REG_SR] >> DSP_SR_N) & 1;
+            value2 = (dsp->registers[DSP_REG_SR] >> DSP_SR_V) & 1;
+            value3 = (dsp->registers[DSP_REG_SR] >> DSP_SR_Z) & 1;
+            return ((value3 | (value1 ^ value2)) == 1);
+    }
+    return 0;
+}
+
+/**********************************
+ *  Set/clear ccr bits
+ **********************************/
+
+/* reg0 has bits 55..48 */
+/* reg1 has bits 47..24 */
+/* reg2 has bits 23..0 */
+
+static void emu_ccr_update_e_u_n_z(dsp_core_t* dsp, uint32_t reg0, uint32_t reg1, uint32_t reg2) 
+{
+    uint32_t scaling, value_e, value_u;
+
+    /* Initialize SR register */
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_E) | (1<<DSP_SR_U) | (1<<DSP_SR_N) | (1<<DSP_SR_Z));
+
+    scaling = (dsp->registers[DSP_REG_SR]>>DSP_SR_S0) & BITMASK(2);
+    switch(scaling) {
+        case 0:
+            /* Extension Bit (E) */
+            value_e = (reg0<<1) + (reg1>>23);
+            if ((value_e != 0) && (value_e != BITMASK(9)))
+                dsp->registers[DSP_REG_SR] |= 1 << DSP_SR_E;
+
+            /* Unnormalized bit (U) */
+            if ((reg1 & 0xc00000) == 0 || (reg1 & 0xc00000) == 0xc00000) 
+                dsp->registers[DSP_REG_SR] |= 1 << DSP_SR_U;
+            break;
+        case 1:
+            /* Extension Bit (E) */
+            if ((reg0 != 0) && (reg0 != BITMASK(8)))
+                dsp->registers[DSP_REG_SR] |= 1 << DSP_SR_E;
+
+            /* Unnormalized bit (U) */
+            value_u = ((reg0<<1) + (reg1>>23)) & 3;
+            if (value_u == 0 || value_u == 3) 
+                dsp->registers[DSP_REG_SR] |= 1 << DSP_SR_U;
+            break;
+        case 2:
+            /* Extension Bit (E) */
+            value_e = (reg0<<2) + (reg1>>22);
+            if ((value_e != 0) && (value_e != BITMASK(10)))
+                dsp->registers[DSP_REG_SR] |= 1 << DSP_SR_E;
+
+            /* Unnormalized bit (U) */
+            if ((reg1 & 0x600000) == 0 || (reg1 & 0x600000) == 0x600000) 
+                dsp->registers[DSP_REG_SR] |= 1 << DSP_SR_U;
+            break;
+        default:
+            return;
+            break;
+    }
+
+    /* Zero Flag (Z) */
+    if ((reg1 == 0) && (reg2 == 0) && (reg0 == 0))
+        dsp->registers[DSP_REG_SR] |= 1 << DSP_SR_Z;
+
+    /* Negative Flag (N) */
+    dsp->registers[DSP_REG_SR] |= (reg0>>4) & 0x8;
+}
+
+/**********************************
+ *  ALU instructions
+ **********************************/
+
+static void emu_abs_a(dsp_core_t* dsp)
+{
+    uint32_t dest[3], overflowed;
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+
+    overflowed = ((dest[2]==0) && (dest[1]==0) && (dest[0]==0x80));
+
+    dsp_abs56(dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= (overflowed<<DSP_SR_L)|(overflowed<<DSP_SR_V);
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+}
+
+static void emu_abs_b(dsp_core_t* dsp)
+{
+    uint32_t dest[3], overflowed;
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+
+    overflowed = ((dest[2]==0) && (dest[1]==0) && (dest[0]==0x80));
+
+    dsp_abs56(dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= (overflowed<<DSP_SR_L)|(overflowed<<DSP_SR_V);
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+}
+
+static void emu_adc_x_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3], curcarry;
+    uint16_t newsr;
+
+    curcarry = (dsp->registers[DSP_REG_SR]>>DSP_SR_C) & 1;
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+
+    source[2] = dsp->registers[DSP_REG_X0];
+    source[1] = dsp->registers[DSP_REG_X1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_add56(source, dest);
+    
+    if (curcarry) {
+        source[0]=0; source[1]=0; source[2]=1;
+        newsr |= dsp_add56(source, dest);
+    }
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_adc_x_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3], curcarry;
+    uint16_t newsr;
+
+    curcarry = (dsp->registers[DSP_REG_SR]>>DSP_SR_C) & 1;
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+
+    source[2] = dsp->registers[DSP_REG_X0];
+    source[1] = dsp->registers[DSP_REG_X1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_add56(source, dest);
+    
+    if (curcarry) {
+        source[0]=0; source[1]=0; source[2]=1;
+        newsr |= dsp_add56(source, dest);
+    }
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_adc_y_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3], curcarry;
+    uint16_t newsr;
+
+    curcarry = (dsp->registers[DSP_REG_SR]>>DSP_SR_C) & 1;
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+
+    source[2] = dsp->registers[DSP_REG_Y0];
+    source[1] = dsp->registers[DSP_REG_Y1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_add56(source, dest);
+    
+    if (curcarry) {
+        source[0]=0; source[1]=0; source[2]=1;
+        newsr |= dsp_add56(source, dest);
+    }
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_adc_y_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3], curcarry;
+    uint16_t newsr;
+
+    curcarry = (dsp->registers[DSP_REG_SR]>>DSP_SR_C) & 1;
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+
+    source[2] = dsp->registers[DSP_REG_Y0];
+    source[1] = dsp->registers[DSP_REG_Y1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_add56(source, dest);
+    
+    if (curcarry) {
+        source[0]=0; source[1]=0; source[2]=1;
+        newsr |= dsp_add56(source, dest);
+    }
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_add_b_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+
+    source[0] = dsp->registers[DSP_REG_B2];
+    source[1] = dsp->registers[DSP_REG_B1];
+    source[2] = dsp->registers[DSP_REG_B0];
+
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_add_a_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+
+    source[0] = dsp->registers[DSP_REG_A2];
+    source[1] = dsp->registers[DSP_REG_A1];
+    source[2] = dsp->registers[DSP_REG_A0];
+
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_add_x_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+
+    source[1] = dsp->registers[DSP_REG_X1];
+    source[2] = dsp->registers[DSP_REG_X0];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_add_x_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+
+    source[1] = dsp->registers[DSP_REG_X1];
+    source[2] = dsp->registers[DSP_REG_X0];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_add_y_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+
+    source[1] = dsp->registers[DSP_REG_Y1];
+    source[2] = dsp->registers[DSP_REG_Y0];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_add_y_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+
+    source[1] = dsp->registers[DSP_REG_Y1];
+    source[2] = dsp->registers[DSP_REG_Y0];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_add_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_X0];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_add_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_X0];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_add_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_Y0];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_add_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_Y0];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_add_x1_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_X1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_add_x1_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_X1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_add_y1_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_Y1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_add_y1_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_Y1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_addl_b_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_asl56(dest, 1);
+
+    source[0] = dsp->registers[DSP_REG_B2];
+    source[1] = dsp->registers[DSP_REG_B1];
+    source[2] = dsp->registers[DSP_REG_B0];
+    newsr |= dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_addl_a_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_asl56(dest, 1);
+
+    source[0] = dsp->registers[DSP_REG_A2];
+    source[1] = dsp->registers[DSP_REG_A1];
+    source[2] = dsp->registers[DSP_REG_A0];
+    newsr |= dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_addr_b_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_asr56(dest, 1);
+
+    source[0] = dsp->registers[DSP_REG_B2];
+    source[1] = dsp->registers[DSP_REG_B1];
+    source[2] = dsp->registers[DSP_REG_B0];
+    newsr |= dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_addr_a_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_asr56(dest, 1);
+
+    source[0] = dsp->registers[DSP_REG_A2];
+    source[1] = dsp->registers[DSP_REG_A1];
+    source[2] = dsp->registers[DSP_REG_A0];
+    newsr |= dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_and_x0_a(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_A1] &= dsp->registers[DSP_REG_X0];
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_A1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_A1]==0)<<DSP_SR_Z;
+}
+
+static void emu_and_x0_b(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_B1] &= dsp->registers[DSP_REG_X0];
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_B1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_B1]==0)<<DSP_SR_Z;
+}
+
+static void emu_and_y0_a(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_A1] &= dsp->registers[DSP_REG_Y0];
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_A1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_A1]==0)<<DSP_SR_Z;
+}
+
+static void emu_and_y0_b(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_B1] &= dsp->registers[DSP_REG_Y0];
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_B1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_B1]==0)<<DSP_SR_Z;
+}
+
+static void emu_and_x1_a(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_A1] &= dsp->registers[DSP_REG_X1];
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_A1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_A1]==0)<<DSP_SR_Z;
+}
+
+static void emu_and_x1_b(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_B1] &= dsp->registers[DSP_REG_X1];
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_B1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_B1]==0)<<DSP_SR_Z;
+}
+
+static void emu_and_y1_a(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_A1] &= dsp->registers[DSP_REG_Y1];
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_A1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_A1]==0)<<DSP_SR_Z;
+}
+
+static void emu_and_y1_b(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_B1] &= dsp->registers[DSP_REG_Y1];
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_B1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_B1]==0)<<DSP_SR_Z;
+}
+
+static void emu_asl_a(dsp_core_t* dsp)
+{
+    uint32_t dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+
+    newsr = dsp_asl56(dest, 1);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_C)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= newsr;
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+}
+
+static void emu_asl_b(dsp_core_t* dsp)
+{
+    uint32_t dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+
+    newsr = dsp_asl56(dest, 1);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_C)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= newsr;
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+}
+
+static void emu_asr_a(dsp_core_t* dsp)
+{
+    uint32_t dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+
+    newsr = dsp_asr56(dest, 1);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_C)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= newsr;
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+}
+
+static void emu_asr_b(dsp_core_t* dsp)
+{
+    uint32_t dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+
+    newsr = dsp_asr56(dest, 1);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_C)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= newsr;
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+}
+
+static void emu_clr_a(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_A2] = 0;
+    dsp->registers[DSP_REG_A1] = 0;
+    dsp->registers[DSP_REG_A0] = 0;
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_E)|(1<<DSP_SR_N)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= (1<<DSP_SR_U)|(1<<DSP_SR_Z);
+}
+
+static void emu_clr_b(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_B2] = 0;
+    dsp->registers[DSP_REG_B1] = 0;
+    dsp->registers[DSP_REG_B0] = 0;
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_E)|(1<<DSP_SR_N)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= (1<<DSP_SR_U)|(1<<DSP_SR_Z);
+}
+
+static void emu_cmp_b_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+
+    source[0] = dsp->registers[DSP_REG_B2];
+    source[1] = dsp->registers[DSP_REG_B1];
+    source[2] = dsp->registers[DSP_REG_B0];
+
+    newsr = dsp_sub56(source, dest);
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_cmp_a_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+
+    source[0] = dsp->registers[DSP_REG_A2];
+    source[1] = dsp->registers[DSP_REG_A1];
+    source[2] = dsp->registers[DSP_REG_A0];
+
+    newsr = dsp_sub56(source, dest);
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_cmp_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[2] = dsp->registers[DSP_REG_A0];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[0] = dsp->registers[DSP_REG_A2];
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_X0];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_sub56(source, dest);
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_cmp_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_X0];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_sub56(source, dest);
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_cmp_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[2] = dsp->registers[DSP_REG_A0];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[0] = dsp->registers[DSP_REG_A2];
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_Y0];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_sub56(source, dest);
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_cmp_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_Y0];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_sub56(source, dest);
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+static void emu_cmp_x1_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[2] = dsp->registers[DSP_REG_A0];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[0] = dsp->registers[DSP_REG_A2];
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_X1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_sub56(source, dest);
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_cmp_x1_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_X1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_sub56(source, dest);
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_cmp_y1_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[2] = dsp->registers[DSP_REG_A0];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[0] = dsp->registers[DSP_REG_A2];
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_Y1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_sub56(source, dest);
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_cmp_y1_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_Y1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_sub56(source, dest);
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_cmpm_b_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    dsp_abs56(dest);
+
+    source[0] = dsp->registers[DSP_REG_B2];
+    source[1] = dsp->registers[DSP_REG_B1];
+    source[2] = dsp->registers[DSP_REG_B0];
+    dsp_abs56(source);
+
+    newsr = dsp_sub56(source, dest);
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_cmpm_a_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    dsp_abs56(dest);
+
+    source[0] = dsp->registers[DSP_REG_A2];
+    source[1] = dsp->registers[DSP_REG_A1];
+    source[2] = dsp->registers[DSP_REG_A0];
+    dsp_abs56(source);
+
+    newsr = dsp_sub56(source, dest);
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_cmpm_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[2] = dsp->registers[DSP_REG_A0];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dsp_abs56(dest);
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_X0];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+    dsp_abs56(source);
+
+    newsr = dsp_sub56(source, dest);
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_cmpm_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    dsp_abs56(dest);
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_X0];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+    dsp_abs56(source);
+
+    newsr = dsp_sub56(source, dest);
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_cmpm_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[2] = dsp->registers[DSP_REG_A0];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dsp_abs56(dest);
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_Y0];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+    dsp_abs56(source);
+
+    newsr = dsp_sub56(source, dest);
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_cmpm_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    dsp_abs56(dest);
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_Y0];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+    dsp_abs56(source);
+
+    newsr = dsp_sub56(source, dest);
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_cmpm_x1_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[2] = dsp->registers[DSP_REG_A0];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dsp_abs56(dest);
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_X1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+    dsp_abs56(source);
+
+    newsr = dsp_sub56(source, dest);
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_cmpm_x1_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    dsp_abs56(dest);
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_X1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+    dsp_abs56(source);
+
+    newsr = dsp_sub56(source, dest);
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_cmpm_y1_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[2] = dsp->registers[DSP_REG_A0];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dsp_abs56(dest);
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_Y1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+    dsp_abs56(source);
+
+    newsr = dsp_sub56(source, dest);
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_cmpm_y1_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    dsp_abs56(dest);
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_Y1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+    dsp_abs56(source);
+
+    newsr = dsp_sub56(source, dest);
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_eor_x0_a(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_A1] ^= dsp->registers[DSP_REG_X0];
+    dsp->registers[DSP_REG_A1] &= BITMASK(24); /* FIXME: useless ? */
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_A1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_A1]==0)<<DSP_SR_Z;
+}
+
+static void emu_eor_x0_b(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_B1] ^= dsp->registers[DSP_REG_X0];
+    dsp->registers[DSP_REG_B1] &= BITMASK(24); /* FIXME: useless ? */
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_B1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_B1]==0)<<DSP_SR_Z;
+}
+
+static void emu_eor_y0_a(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_A1] ^= dsp->registers[DSP_REG_Y0];
+    dsp->registers[DSP_REG_A1] &= BITMASK(24); /* FIXME: useless ? */
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_A1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_A1]==0)<<DSP_SR_Z;
+}
+
+static void emu_eor_y0_b(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_B1] ^= dsp->registers[DSP_REG_Y0];
+    dsp->registers[DSP_REG_B1] &= BITMASK(24); /* FIXME: useless ? */
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_B1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_B1]==0)<<DSP_SR_Z;
+}
+
+static void emu_eor_x1_a(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_A1] ^= dsp->registers[DSP_REG_X1];
+    dsp->registers[DSP_REG_A1] &= BITMASK(24); /* FIXME: useless ? */
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_A1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_A1]==0)<<DSP_SR_Z;
+}
+
+static void emu_eor_x1_b(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_B1] ^= dsp->registers[DSP_REG_X1];
+    dsp->registers[DSP_REG_B1] &= BITMASK(24); /* FIXME: useless ? */
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_B1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_B1]==0)<<DSP_SR_Z;
+}
+
+static void emu_eor_y1_a(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_A1] ^= dsp->registers[DSP_REG_Y1];
+    dsp->registers[DSP_REG_A1] &= BITMASK(24); /* FIXME: useless ? */
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_A1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_A1]==0)<<DSP_SR_Z;
+}
+
+static void emu_eor_y1_b(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_B1] ^= dsp->registers[DSP_REG_Y1];
+    dsp->registers[DSP_REG_B1] &= BITMASK(24); /* FIXME: useless ? */
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_B1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_B1]==0)<<DSP_SR_Z;
+}
+
+static void emu_lsl_a(dsp_core_t* dsp)
+{
+    uint32_t newcarry = (dsp->registers[DSP_REG_A1]>>23) & 1;
+
+    dsp->registers[DSP_REG_A1] <<= 1;
+    dsp->registers[DSP_REG_A1] &= BITMASK(24);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_C)|(1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= newcarry;
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_A1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_A1]==0)<<DSP_SR_Z;
+}
+
+static void emu_lsl_b(dsp_core_t* dsp)
+{
+    uint32_t newcarry = (dsp->registers[DSP_REG_B1]>>23) & 1;
+
+    dsp->registers[DSP_REG_B1] <<= 1;
+    dsp->registers[DSP_REG_B1] &= BITMASK(24);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_C)|(1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= newcarry;
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_B1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_B1]==0)<<DSP_SR_Z;
+}
+
+static void emu_lsr_a(dsp_core_t* dsp)
+{
+    uint32_t newcarry = dsp->registers[DSP_REG_A1] & 1;
+    dsp->registers[DSP_REG_A1] >>= 1;
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_C)|(1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= newcarry;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_A1]==0)<<DSP_SR_Z;
+}
+
+static void emu_lsr_b(dsp_core_t* dsp)
+{
+    uint32_t newcarry = dsp->registers[DSP_REG_B1] & 1;
+    dsp->registers[DSP_REG_B1] >>= 1;
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_C)|(1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= newcarry;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_B1]==0)<<DSP_SR_Z;
+}
+
+static void emu_mac_p_x0_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_X0], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_m_x0_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_X0], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+static void emu_mac_p_x0_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_X0], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_m_x0_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_X0], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_p_y0_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_Y0], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_m_y0_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_Y0], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+static void emu_mac_p_y0_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_Y0], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_m_y0_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_Y0], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_p_x1_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_X0], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_m_x1_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_X0], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_p_x1_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_X0], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_m_x1_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_X0], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_p_y1_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_Y0], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_m_y1_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_Y0], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_p_y1_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_Y0], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_m_y1_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_Y0], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_p_x0_y1_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_Y1], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_m_x0_y1_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_Y1], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_p_x0_y1_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_Y1], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_m_x0_y1_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_Y1], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_p_y0_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_X0], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_m_y0_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_X0], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_p_y0_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_X0], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_m_y0_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_X0], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_p_x1_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_Y0], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_m_x1_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_Y0], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_p_x1_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_Y0], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_m_x1_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_Y0], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_p_y1_x1_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_X1], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_m_y1_x1_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_X1], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_p_y1_x1_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_X1], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_mac_m_y1_x1_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_X1], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_p_x0_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_X0], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_m_x0_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_X0], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+static void emu_macr_p_x0_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_X0], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_m_x0_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_X0], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_p_y0_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_Y0], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_m_y0_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_Y0], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+static void emu_macr_p_y0_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_Y0], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_m_y0_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_Y0], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_p_x1_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_X0], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_m_x1_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_X0], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_p_x1_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_X0], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_m_x1_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_X0], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_p_y1_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_Y0], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_m_y1_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_Y0], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_p_y1_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_Y0], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_m_y1_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_Y0], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_p_x0_y1_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_Y1], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_m_x0_y1_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_Y1], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_p_x0_y1_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_Y1], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_m_x0_y1_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_Y1], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_p_y0_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_X0], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_m_y0_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_X0], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_p_y0_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_X0], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_m_y0_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_X0], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_p_x1_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_Y0], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_m_x1_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_Y0], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_p_x1_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_Y0], source, SIGN_PLUS);
+
+    dsp_rnd56(dsp, dest);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_m_x1_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_Y0], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_p_y1_x1_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_X1], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_m_y1_x1_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_X1], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_p_y1_x1_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_X1], source, SIGN_PLUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+static void emu_macr_m_y1_x1_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_X1], source, SIGN_MINUS);
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_add56(source, dest);
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= newsr & 0xfe;
+}
+
+
+static void emu_move(dsp_core_t* dsp)
+{
+    /*  move instruction inside alu opcodes
+        taken care of by parallel move dispatcher */
+}
+
+static void emu_mpy_p_x0_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_X0], source, SIGN_PLUS);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_m_x0_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_X0], source, SIGN_MINUS);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_p_x0_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_X0], source, SIGN_PLUS);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_m_x0_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_X0], source, SIGN_MINUS);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_p_y0_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_Y0], source, SIGN_PLUS);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_m_y0_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_Y0], source, SIGN_MINUS);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_p_y0_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_Y0], source, SIGN_PLUS);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_m_y0_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_Y0], source, SIGN_MINUS);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_p_x1_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_X0], source, SIGN_PLUS);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_m_x1_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_X0], source, SIGN_MINUS);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_p_x1_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_X0], source, SIGN_PLUS);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_m_x1_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_X0], source, SIGN_MINUS);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_p_y1_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_Y0], source, SIGN_PLUS);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_m_y1_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_Y0], source, SIGN_MINUS);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_p_y1_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_Y0], source, SIGN_PLUS);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_m_y1_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_Y0], source, SIGN_MINUS);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_p_x0_y1_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_Y1], source, SIGN_PLUS);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_m_x0_y1_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_Y1], source, SIGN_MINUS);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_p_x0_y1_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_Y1], source, SIGN_PLUS);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_m_x0_y1_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_Y1], source, SIGN_MINUS);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_p_y0_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_X0], source, SIGN_PLUS);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_m_y0_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_X0], source, SIGN_MINUS);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_p_y0_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_X0], source, SIGN_PLUS);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_m_y0_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_X0], source, SIGN_MINUS);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_p_x1_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_Y0], source, SIGN_PLUS);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_m_x1_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_Y0], source, SIGN_MINUS);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_p_x1_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_Y0], source, SIGN_PLUS);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_m_x1_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_Y0], source, SIGN_MINUS);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_p_y1_x1_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_X1], source, SIGN_PLUS);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_m_y1_x1_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_X1], source, SIGN_MINUS);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_p_y1_x1_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_X1], source, SIGN_PLUS);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpy_m_y1_x1_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_X1], source, SIGN_MINUS);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_p_x0_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_X0], source, SIGN_PLUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_m_x0_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_X0], source, SIGN_MINUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_p_x0_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_X0], source, SIGN_PLUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_m_x0_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_X0], source, SIGN_MINUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_p_y0_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_Y0], source, SIGN_PLUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_m_y0_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_Y0], source, SIGN_MINUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_p_y0_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_Y0], source, SIGN_PLUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_m_y0_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_Y0], source, SIGN_MINUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_p_x1_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_X0], source, SIGN_PLUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_m_x1_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_X0], source, SIGN_MINUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_p_x1_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_X0], source, SIGN_PLUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_m_x1_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_X0], source, SIGN_MINUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_p_y1_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_Y0], source, SIGN_PLUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_m_y1_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_Y0], source, SIGN_MINUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_p_y1_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_Y0], source, SIGN_PLUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_m_y1_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_Y0], source, SIGN_MINUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_p_x0_y1_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_Y1], source, SIGN_PLUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_m_x0_y1_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_Y1], source, SIGN_MINUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_p_x0_y1_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_Y1], source, SIGN_PLUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_m_x0_y1_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X0], dsp->registers[DSP_REG_Y1], source, SIGN_MINUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_p_y0_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_X0], source, SIGN_PLUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_m_y0_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_X0], source, SIGN_MINUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_p_y0_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_X0], source, SIGN_PLUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_m_y0_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y0], dsp->registers[DSP_REG_X0], source, SIGN_MINUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_p_x1_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_Y0], source, SIGN_PLUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_m_x1_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_Y0], source, SIGN_MINUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_p_x1_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_Y0], source, SIGN_PLUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_m_x1_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_X1], dsp->registers[DSP_REG_Y0], source, SIGN_MINUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_p_y1_x1_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_X1], source, SIGN_PLUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_m_y1_x1_a(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_X1], source, SIGN_MINUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_A2] = source[0];
+    dsp->registers[DSP_REG_A1] = source[1];
+    dsp->registers[DSP_REG_A0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_p_y1_x1_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_X1], source, SIGN_PLUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_mpyr_m_y1_x1_b(dsp_core_t* dsp)
+{
+    uint32_t source[3];
+
+    dsp_mul56(dsp->registers[DSP_REG_Y1], dsp->registers[DSP_REG_X1], source, SIGN_MINUS);
+    dsp_rnd56(dsp, source);
+
+    dsp->registers[DSP_REG_B2] = source[0];
+    dsp->registers[DSP_REG_B1] = source[1];
+    dsp->registers[DSP_REG_B0] = source[2];
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_neg_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3], overflowed;
+
+    source[0] = dsp->registers[DSP_REG_A2];
+    source[1] = dsp->registers[DSP_REG_A1];
+    source[2] = dsp->registers[DSP_REG_A0];
+
+    overflowed = ((source[2]==0) && (source[1]==0) && (source[0]==0x80));
+
+    dest[0] = dest[1] = dest[2] = 0;
+
+    dsp_sub56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= (overflowed<<DSP_SR_L)|(overflowed<<DSP_SR_V);
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+}
+
+static void emu_neg_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3], overflowed;
+
+    source[0] = dsp->registers[DSP_REG_B2];
+    source[1] = dsp->registers[DSP_REG_B1];
+    source[2] = dsp->registers[DSP_REG_B0];
+
+    overflowed = ((source[2]==0) && (source[1]==0) && (source[0]==0x80));
+
+    dest[0] = dest[1] = dest[2] = 0;
+
+    dsp_sub56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+    dsp->registers[DSP_REG_SR] |= (overflowed<<DSP_SR_L)|(overflowed<<DSP_SR_V);
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+}
+
+static void emu_nop(dsp_core_t* dsp)
+{
+}
+
+static void emu_not_a(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_A1] = ~dsp->registers[DSP_REG_A1];
+    dsp->registers[DSP_REG_A1] &= BITMASK(24); /* FIXME: useless ? */
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_A1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_A1]==0)<<DSP_SR_Z;
+}
+
+static void emu_not_b(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_B1] = ~dsp->registers[DSP_REG_B1];
+    dsp->registers[DSP_REG_B1] &= BITMASK(24); /* FIXME: useless ? */
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_B1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_B1]==0)<<DSP_SR_Z;
+}
+
+static void emu_or_x0_a(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_A1] |= dsp->registers[DSP_REG_X0];
+    dsp->registers[DSP_REG_A1] &= BITMASK(24); /* FIXME: useless ? */
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_A1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_A1]==0)<<DSP_SR_Z;
+}
+
+static void emu_or_x0_b(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_B1] |= dsp->registers[DSP_REG_X0];
+    dsp->registers[DSP_REG_B1] &= BITMASK(24); /* FIXME: useless ? */
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_B1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_B1]==0)<<DSP_SR_Z;
+}
+
+static void emu_or_y0_a(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_A1] |= dsp->registers[DSP_REG_Y0];
+    dsp->registers[DSP_REG_A1] &= BITMASK(24); /* FIXME: useless ? */
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_A1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_A1]==0)<<DSP_SR_Z;
+}
+
+static void emu_or_y0_b(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_B1] |= dsp->registers[DSP_REG_Y0];
+    dsp->registers[DSP_REG_B1] &= BITMASK(24); /* FIXME: useless ? */
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_B1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_B1]==0)<<DSP_SR_Z;
+}
+
+static void emu_or_x1_a(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_A1] |= dsp->registers[DSP_REG_X1];
+    dsp->registers[DSP_REG_A1] &= BITMASK(24); /* FIXME: useless ? */
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_A1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_A1]==0)<<DSP_SR_Z;
+}
+
+static void emu_or_x1_b(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_B1] |= dsp->registers[DSP_REG_X1];
+    dsp->registers[DSP_REG_B1] &= BITMASK(24); /* FIXME: useless ? */
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_B1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_B1]==0)<<DSP_SR_Z;
+}
+
+static void emu_or_y1_a(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_A1] |= dsp->registers[DSP_REG_Y1];
+    dsp->registers[DSP_REG_A1] &= BITMASK(24); /* FIXME: useless ? */
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_A1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_A1]==0)<<DSP_SR_Z;
+}
+
+static void emu_or_y1_b(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_B1] |= dsp->registers[DSP_REG_Y1];
+    dsp->registers[DSP_REG_B1] &= BITMASK(24); /* FIXME: useless ? */
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_B1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_B1]==0)<<DSP_SR_Z;
+}
+
+static void emu_rnd_a(dsp_core_t* dsp)
+{
+    uint32_t dest[3];
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+}
+
+static void emu_rnd_b(dsp_core_t* dsp)
+{
+    uint32_t dest[3];
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+
+    dsp_rnd56(dsp, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+}
+
+static void emu_rol_a(dsp_core_t* dsp)
+{
+    uint32_t newcarry;
+
+    newcarry = (dsp->registers[DSP_REG_A1]>>23) & 1;
+
+    dsp->registers[DSP_REG_A1] <<= 1;
+    dsp->registers[DSP_REG_A1] |= newcarry;
+    dsp->registers[DSP_REG_A1] &= BITMASK(24);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_C)|(1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= newcarry;
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_A1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_A1]==0)<<DSP_SR_Z;
+}
+
+static void emu_rol_b(dsp_core_t* dsp)
+{
+    uint32_t newcarry;
+
+    newcarry = (dsp->registers[DSP_REG_B1]>>23) & 1;
+
+    dsp->registers[DSP_REG_B1] <<= 1;
+    dsp->registers[DSP_REG_B1] |= newcarry;
+    dsp->registers[DSP_REG_B1] &= BITMASK(24);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_C)|(1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= newcarry;
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[DSP_REG_B1]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_B1]==0)<<DSP_SR_Z;
+}
+
+static void emu_ror_a(dsp_core_t* dsp)
+{
+    uint32_t newcarry;
+
+    newcarry = dsp->registers[DSP_REG_A1] & 1;
+
+    dsp->registers[DSP_REG_A1] >>= 1;
+    dsp->registers[DSP_REG_A1] |= newcarry<<23;
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_C)|(1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= newcarry;
+    dsp->registers[DSP_REG_SR] |= newcarry<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_A1]==0)<<DSP_SR_Z;
+}
+
+static void emu_ror_b(dsp_core_t* dsp)
+{
+    uint32_t newcarry;
+
+    newcarry = dsp->registers[DSP_REG_B1] & 1;
+
+    dsp->registers[DSP_REG_B1] >>= 1;
+    dsp->registers[DSP_REG_B1] |= newcarry<<23;
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_C)|(1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= newcarry;
+    dsp->registers[DSP_REG_SR] |= newcarry<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[DSP_REG_B1]==0)<<DSP_SR_Z;
+}
+
+static void emu_sbc_x_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3], curcarry;
+    uint16_t newsr;
+
+    curcarry = (dsp->registers[DSP_REG_SR]>>(DSP_SR_C)) & 1;
+
+    dest[2] = dsp->registers[DSP_REG_A0];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[0] = dsp->registers[DSP_REG_A2];
+
+    source[2] = dsp->registers[DSP_REG_X0];
+    source[1] = dsp->registers[DSP_REG_X1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_sub56(source, dest);
+    
+    if (curcarry) {
+        source[0]=0; source[1]=0; source[2]=1;
+        newsr |= dsp_sub56(source, dest);
+    }
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_sbc_x_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3], curcarry;
+    uint16_t newsr;
+
+    curcarry = (dsp->registers[DSP_REG_SR]>>(DSP_SR_C)) & 1;
+
+    dest[2] = dsp->registers[DSP_REG_B0];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[0] = dsp->registers[DSP_REG_B2];
+
+    source[2] = dsp->registers[DSP_REG_X0];
+    source[1] = dsp->registers[DSP_REG_X1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_sub56(source, dest);
+    
+    if (curcarry) {
+        source[0]=0; source[1]=0; source[2]=1;
+        newsr |= dsp_sub56(source, dest);
+    }
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_sbc_y_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3], curcarry;
+    uint16_t newsr;
+
+    curcarry = (dsp->registers[DSP_REG_SR]>>(DSP_SR_C)) & 1;
+
+    dest[2] = dsp->registers[DSP_REG_A0];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[0] = dsp->registers[DSP_REG_A2];
+
+    source[2] = dsp->registers[DSP_REG_Y0];
+    source[1] = dsp->registers[DSP_REG_Y1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_sub56(source, dest);
+    
+    if (curcarry) {
+        source[0]=0; source[1]=0; source[2]=1;
+        newsr |= dsp_sub56(source, dest);
+    }
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_sbc_y_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3], curcarry;
+    uint16_t newsr;
+
+    curcarry = (dsp->registers[DSP_REG_SR]>>(DSP_SR_C)) & 1;
+
+    dest[2] = dsp->registers[DSP_REG_B0];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[0] = dsp->registers[DSP_REG_B2];
+
+    source[2] = dsp->registers[DSP_REG_Y0];
+    source[1] = dsp->registers[DSP_REG_Y1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_sub56(source, dest);
+    
+    if (curcarry) {
+        source[0]=0; source[1]=0; source[2]=1;
+        newsr |= dsp_sub56(source, dest);
+    }
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_sub_b_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[2] = dsp->registers[DSP_REG_A0];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[0] = dsp->registers[DSP_REG_A2];
+
+    source[2] = dsp->registers[DSP_REG_B0];
+    source[1] = dsp->registers[DSP_REG_B1];
+    source[0] = dsp->registers[DSP_REG_B2];
+
+    newsr = dsp_sub56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_sub_a_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[2] = dsp->registers[DSP_REG_B0];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[0] = dsp->registers[DSP_REG_B2];
+
+    source[2] = dsp->registers[DSP_REG_A0];
+    source[1] = dsp->registers[DSP_REG_A1];
+    source[0] = dsp->registers[DSP_REG_A2];
+
+    newsr = dsp_sub56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_sub_x_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[2] = dsp->registers[DSP_REG_A0];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[0] = dsp->registers[DSP_REG_A2];
+
+    source[2] = dsp->registers[DSP_REG_X0];
+    source[1] = dsp->registers[DSP_REG_X1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_sub56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_sub_x_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[2] = dsp->registers[DSP_REG_B0];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[0] = dsp->registers[DSP_REG_B2];
+
+    source[2] = dsp->registers[DSP_REG_X0];
+    source[1] = dsp->registers[DSP_REG_X1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_sub56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_sub_y_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[2] = dsp->registers[DSP_REG_A0];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[0] = dsp->registers[DSP_REG_A2];
+
+    source[2] = dsp->registers[DSP_REG_Y0];
+    source[1] = dsp->registers[DSP_REG_Y1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_sub56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_sub_y_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[2] = dsp->registers[DSP_REG_B0];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[0] = dsp->registers[DSP_REG_B2];
+
+    source[2] = dsp->registers[DSP_REG_Y0];
+    source[1] = dsp->registers[DSP_REG_Y1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_sub56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_sub_x0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[2] = dsp->registers[DSP_REG_A0];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[0] = dsp->registers[DSP_REG_A2];
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_X0];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_sub56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_sub_x0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[2] = dsp->registers[DSP_REG_B0];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[0] = dsp->registers[DSP_REG_B2];
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_X0];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_sub56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_sub_y0_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[2] = dsp->registers[DSP_REG_A0];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[0] = dsp->registers[DSP_REG_A2];
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_Y0];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_sub56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_sub_y0_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[2] = dsp->registers[DSP_REG_B0];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[0] = dsp->registers[DSP_REG_B2];
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_Y0];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_sub56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_sub_x1_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[2] = dsp->registers[DSP_REG_A0];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[0] = dsp->registers[DSP_REG_A2];
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_X1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_sub56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_sub_x1_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[2] = dsp->registers[DSP_REG_B0];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[0] = dsp->registers[DSP_REG_B2];
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_X1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_sub56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_sub_y1_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[2] = dsp->registers[DSP_REG_A0];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[0] = dsp->registers[DSP_REG_A2];
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_Y1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_sub56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_sub_y1_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[2] = dsp->registers[DSP_REG_B0];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[0] = dsp->registers[DSP_REG_B2];
+
+    source[2] = 0;
+    source[1] = dsp->registers[DSP_REG_Y1];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    newsr = dsp_sub56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_subl_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    newsr = dsp_asl56(dest, 1);
+
+    source[0] = dsp->registers[DSP_REG_B2];
+    source[1] = dsp->registers[DSP_REG_B1];
+    source[2] = dsp->registers[DSP_REG_B0];
+    newsr |= dsp_sub56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_subl_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    newsr = dsp_asl56(dest, 1);
+
+    source[0] = dsp->registers[DSP_REG_A2];
+    source[1] = dsp->registers[DSP_REG_A1];
+    source[2] = dsp->registers[DSP_REG_A0];
+    newsr |= dsp_sub56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_subr_a(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_A2];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[2] = dsp->registers[DSP_REG_A0];
+    
+    newsr = dsp_asr56(dest, 1);
+
+    source[0] = dsp->registers[DSP_REG_B2];
+    source[1] = dsp->registers[DSP_REG_B1];
+    source[2] = dsp->registers[DSP_REG_B0];
+    
+    newsr |= dsp_sub56(source, dest);
+
+    dsp->registers[DSP_REG_A2] = dest[0];
+    dsp->registers[DSP_REG_A1] = dest[1];
+    dsp->registers[DSP_REG_A0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_subr_b(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+    uint16_t newsr;
+
+    dest[0] = dsp->registers[DSP_REG_B2];
+    dest[1] = dsp->registers[DSP_REG_B1];
+    dest[2] = dsp->registers[DSP_REG_B0];
+    
+    newsr = dsp_asr56(dest, 1);
+
+    source[0] = dsp->registers[DSP_REG_A2];
+    source[1] = dsp->registers[DSP_REG_A1];
+    source[2] = dsp->registers[DSP_REG_A0];
+    
+    newsr |= dsp_sub56(source, dest);
+
+    dsp->registers[DSP_REG_B2] = dest[0];
+    dsp->registers[DSP_REG_B1] = dest[1];
+    dsp->registers[DSP_REG_B0] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_tfr_b_a(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_A0] = dsp->registers[DSP_REG_B0];
+    dsp->registers[DSP_REG_A1] = dsp->registers[DSP_REG_B1];
+    dsp->registers[DSP_REG_A2] = dsp->registers[DSP_REG_B2];
+}
+
+static void emu_tfr_a_b(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_B0] = dsp->registers[DSP_REG_A0];
+    dsp->registers[DSP_REG_B1] = dsp->registers[DSP_REG_A1];
+    dsp->registers[DSP_REG_B2] = dsp->registers[DSP_REG_A2];
+}
+
+static void emu_tfr_x0_a(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_A0] = 0;
+    dsp->registers[DSP_REG_A1] = dsp->registers[DSP_REG_X0];
+    if (dsp->registers[DSP_REG_A1] & (1<<23))
+        dsp->registers[DSP_REG_A2] = 0xff;
+    else
+        dsp->registers[DSP_REG_A2] = 0x0;
+}
+
+static void emu_tfr_x0_b(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_B0] = 0;
+    dsp->registers[DSP_REG_B1] = dsp->registers[DSP_REG_X0];
+    if (dsp->registers[DSP_REG_B1] & (1<<23))
+        dsp->registers[DSP_REG_B2] = 0xff;
+    else
+        dsp->registers[DSP_REG_B2] = 0x0;
+}
+
+static void emu_tfr_y0_a(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_A0] = 0;
+    dsp->registers[DSP_REG_A1] = dsp->registers[DSP_REG_Y0];
+    if (dsp->registers[DSP_REG_A1] & (1<<23))
+        dsp->registers[DSP_REG_A2] = 0xff;
+    else
+        dsp->registers[DSP_REG_A2] = 0x0;
+}
+
+static void emu_tfr_y0_b(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_B0] = 0;
+    dsp->registers[DSP_REG_B1] = dsp->registers[DSP_REG_Y0];
+    if (dsp->registers[DSP_REG_B1] & (1<<23))
+        dsp->registers[DSP_REG_B2] = 0xff;
+    else
+        dsp->registers[DSP_REG_B2] = 0x0;
+}
+
+static void emu_tfr_x1_a(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_A0] = 0;
+    dsp->registers[DSP_REG_A1] = dsp->registers[DSP_REG_X1];
+    if (dsp->registers[DSP_REG_A1] & (1<<23))
+        dsp->registers[DSP_REG_A2] = 0xff;
+    else
+        dsp->registers[DSP_REG_A2] = 0x0;
+}
+
+static void emu_tfr_x1_b(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_B0] = 0;
+    dsp->registers[DSP_REG_B1] = dsp->registers[DSP_REG_X1];
+    if (dsp->registers[DSP_REG_B1] & (1<<23))
+        dsp->registers[DSP_REG_B2] = 0xff;
+    else
+        dsp->registers[DSP_REG_B2] = 0x0;
+}
+
+static void emu_tfr_y1_a(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_A0] = 0;
+    dsp->registers[DSP_REG_A1] = dsp->registers[DSP_REG_Y1];
+    if (dsp->registers[DSP_REG_A1] & (1<<23))
+        dsp->registers[DSP_REG_A2] = 0xff;
+    else
+        dsp->registers[DSP_REG_A2] = 0x0;
+}
+
+static void emu_tfr_y1_b(dsp_core_t* dsp)
+{
+    dsp->registers[DSP_REG_B0] = 0;
+    dsp->registers[DSP_REG_B1] = dsp->registers[DSP_REG_Y1];
+    if (dsp->registers[DSP_REG_B1] & (1<<23))
+        dsp->registers[DSP_REG_B2] = 0xff;
+    else
+        dsp->registers[DSP_REG_B2] = 0x0;
+}
+
+static void emu_tst_a(dsp_core_t* dsp)
+{
+    emu_ccr_update_e_u_n_z(dsp, dsp->registers[DSP_REG_A2],
+                dsp->registers[DSP_REG_A1],
+                dsp->registers[DSP_REG_A0]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_tst_b(dsp_core_t* dsp)
+{
+    emu_ccr_update_e_u_n_z(dsp, dsp->registers[DSP_REG_B2],
+                dsp->registers[DSP_REG_B1],
+                dsp->registers[DSP_REG_B0]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_max(dsp_core_t* dsp)
+{
+    uint32_t source[3], dest[3];
+
+    dest[2] = dsp->registers[DSP_REG_A0];
+    dest[1] = dsp->registers[DSP_REG_A1];
+    dest[0] = dsp->registers[DSP_REG_A2];
+
+    source[2] = dsp->registers[DSP_REG_B0];
+    source[1] = dsp->registers[DSP_REG_B1];
+    source[0] = dsp->registers[DSP_REG_B2];
+
+    dsp_sub56(source, dest);
+    bool pass = ((dest[0] & (1<<7))
+        || (dest[0] == 0 && dest[1] == 0 && dest[2] == 0));
+
+    if (pass) {
+        dsp->registers[DSP_REG_B0] = dsp->registers[DSP_REG_A2];
+        dsp->registers[DSP_REG_B1] = dsp->registers[DSP_REG_A1];
+        dsp->registers[DSP_REG_B2] = dsp->registers[DSP_REG_A0];
+    }
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_C);
+    dsp->registers[DSP_REG_SR] |= pass<<DSP_SR_C;
+}
+
+
+static const emu_func_t opcodes_alu[256] = {
+    /* 0x00 - 0x3f */
+    emu_move     , emu_tfr_b_a, emu_addr_b_a, emu_tst_a, emu_undefined, emu_cmp_b_a, emu_subr_a, emu_cmpm_b_a,
+    emu_undefined, emu_tfr_a_b, emu_addr_a_b, emu_tst_b, emu_undefined, emu_cmp_a_b, emu_subr_b, emu_cmpm_a_b,
+    emu_add_b_a, emu_rnd_a, emu_addl_b_a, emu_clr_a, emu_sub_b_a, emu_undefined, emu_subl_a, emu_not_a,
+    emu_add_a_b, emu_rnd_b, emu_addl_a_b, emu_clr_b, emu_sub_a_b, emu_max, emu_subl_b, emu_not_b,
+    emu_add_x_a, emu_adc_x_a, emu_asr_a, emu_lsr_a, emu_sub_x_a, emu_sbc_x_a, emu_abs_a, emu_ror_a,
+    emu_add_x_b, emu_adc_x_b, emu_asr_b, emu_lsr_b, emu_sub_x_b, emu_sbc_x_b, emu_abs_b, emu_ror_b,
+    emu_add_y_a, emu_adc_y_a, emu_asl_a, emu_lsl_a, emu_sub_y_a, emu_sbc_y_a, emu_neg_a, emu_rol_a,
+    emu_add_y_b, emu_adc_y_b, emu_asl_b, emu_lsl_b, emu_sub_y_b, emu_sbc_y_b, emu_neg_b, emu_rol_b,
+    
+    /* 0x40 - 0x7f */
+    emu_add_x0_a, emu_tfr_x0_a, emu_or_x0_a, emu_eor_x0_a, emu_sub_x0_a, emu_cmp_x0_a, emu_and_x0_a, emu_cmpm_x0_a,
+    emu_add_x0_b, emu_tfr_x0_b, emu_or_x0_b, emu_eor_x0_b, emu_sub_x0_b, emu_cmp_x0_b, emu_and_x0_b, emu_cmpm_x0_b,
+    emu_add_y0_a, emu_tfr_y0_a, emu_or_y0_a, emu_eor_y0_a, emu_sub_y0_a, emu_cmp_y0_a, emu_and_y0_a, emu_cmpm_y0_a,
+    emu_add_y0_b, emu_tfr_y0_b, emu_or_y0_b, emu_eor_y0_b, emu_sub_y0_b, emu_cmp_y0_b, emu_and_y0_b, emu_cmpm_y0_b,
+    emu_add_x1_a, emu_tfr_x1_a, emu_or_x1_a, emu_eor_x1_a, emu_sub_x1_a, emu_cmp_x1_a, emu_and_x1_a, emu_cmpm_x1_a,
+    emu_add_x1_b, emu_tfr_x1_b, emu_or_x1_b, emu_eor_x1_b, emu_sub_x1_b, emu_cmp_x1_b, emu_and_x1_b, emu_cmpm_x1_b,
+    emu_add_y1_a, emu_tfr_y1_a, emu_or_y1_a, emu_eor_y1_a, emu_sub_y1_a, emu_cmp_y1_a, emu_and_y1_a, emu_cmpm_y1_a,
+    emu_add_y1_b, emu_tfr_y1_b, emu_or_y1_b, emu_eor_y1_b, emu_sub_y1_b, emu_cmp_y1_b, emu_and_y1_b, emu_cmpm_y1_b,
+
+    /* 0x80 - 0xbf */
+    emu_mpy_p_x0_x0_a, emu_mpyr_p_x0_x0_a, emu_mac_p_x0_x0_a, emu_macr_p_x0_x0_a, emu_mpy_m_x0_x0_a, emu_mpyr_m_x0_x0_a, emu_mac_m_x0_x0_a, emu_macr_m_x0_x0_a,
+    emu_mpy_p_x0_x0_b, emu_mpyr_p_x0_x0_b, emu_mac_p_x0_x0_b, emu_macr_p_x0_x0_b, emu_mpy_m_x0_x0_b, emu_mpyr_m_x0_x0_b, emu_mac_m_x0_x0_b, emu_macr_m_x0_x0_b,
+    emu_mpy_p_y0_y0_a, emu_mpyr_p_y0_y0_a, emu_mac_p_y0_y0_a, emu_macr_p_y0_y0_a, emu_mpy_m_y0_y0_a, emu_mpyr_m_y0_y0_a, emu_mac_m_y0_y0_a, emu_macr_m_y0_y0_a,
+    emu_mpy_p_y0_y0_b, emu_mpyr_p_y0_y0_b, emu_mac_p_y0_y0_b, emu_macr_p_y0_y0_b, emu_mpy_m_y0_y0_b, emu_mpyr_m_y0_y0_b, emu_mac_m_y0_y0_b, emu_macr_m_y0_y0_b,
+    emu_mpy_p_x1_x0_a, emu_mpyr_p_x1_x0_a, emu_mac_p_x1_x0_a, emu_macr_p_x1_x0_a, emu_mpy_m_x1_x0_a, emu_mpyr_m_x1_x0_a, emu_mac_m_x1_x0_a, emu_macr_m_x1_x0_a,
+    emu_mpy_p_x1_x0_b, emu_mpyr_p_x1_x0_b, emu_mac_p_x1_x0_b, emu_macr_p_x1_x0_b, emu_mpy_m_x1_x0_b, emu_mpyr_m_x1_x0_b, emu_mac_m_x1_x0_b, emu_macr_m_x1_x0_b,
+    emu_mpy_p_y1_y0_a, emu_mpyr_p_y1_y0_a, emu_mac_p_y1_y0_a, emu_macr_p_y1_y0_a, emu_mpy_m_y1_y0_a, emu_mpyr_m_y1_y0_a, emu_mac_m_y1_y0_a, emu_macr_m_y1_y0_a,
+    emu_mpy_p_y1_y0_b, emu_mpyr_p_y1_y0_b, emu_mac_p_y1_y0_b, emu_macr_p_y1_y0_b, emu_mpy_m_y1_y0_b, emu_mpyr_m_y1_y0_b, emu_mac_m_y1_y0_b, emu_macr_m_y1_y0_b,
+
+    /* 0xc0_m_ 0xff */
+    emu_mpy_p_x0_y1_a, emu_mpyr_p_x0_y1_a, emu_mac_p_x0_y1_a, emu_macr_p_x0_y1_a, emu_mpy_m_x0_y1_a, emu_mpyr_m_x0_y1_a, emu_mac_m_x0_y1_a, emu_macr_m_x0_y1_a,
+    emu_mpy_p_x0_y1_b, emu_mpyr_p_x0_y1_b, emu_mac_p_x0_y1_b, emu_macr_p_x0_y1_b, emu_mpy_m_x0_y1_b, emu_mpyr_m_x0_y1_b, emu_mac_m_x0_y1_b, emu_macr_m_x0_y1_b,
+    emu_mpy_p_y0_x0_a, emu_mpyr_p_y0_x0_a, emu_mac_p_y0_x0_a, emu_macr_p_y0_x0_a, emu_mpy_m_y0_x0_a, emu_mpyr_m_y0_x0_a, emu_mac_m_y0_x0_a, emu_macr_m_y0_x0_a,
+    emu_mpy_p_y0_x0_b, emu_mpyr_p_y0_x0_b, emu_mac_p_y0_x0_b, emu_macr_p_y0_x0_b, emu_mpy_m_y0_x0_b, emu_mpyr_m_y0_x0_b, emu_mac_m_y0_x0_b, emu_macr_m_y0_x0_b,
+    emu_mpy_p_x1_y0_a, emu_mpyr_p_x1_y0_a, emu_mac_p_x1_y0_a, emu_macr_p_x1_y0_a, emu_mpy_m_x1_y0_a, emu_mpyr_m_x1_y0_a, emu_mac_m_x1_y0_a, emu_macr_m_x1_y0_a,
+    emu_mpy_p_x1_y0_b, emu_mpyr_p_x1_y0_b, emu_mac_p_x1_y0_b, emu_macr_p_x1_y0_b, emu_mpy_m_x1_y0_b, emu_mpyr_m_x1_y0_b, emu_mac_m_x1_y0_b, emu_macr_m_x1_y0_b,
+    emu_mpy_p_y1_x1_a, emu_mpyr_p_y1_x1_a, emu_mac_p_y1_x1_a, emu_macr_p_y1_x1_a, emu_mpy_m_y1_x1_a, emu_mpyr_m_y1_x1_a, emu_mac_m_y1_x1_a, emu_macr_m_y1_x1_a,
+    emu_mpy_p_y1_x1_b, emu_mpyr_p_y1_x1_b, emu_mac_p_y1_x1_b, emu_macr_p_y1_x1_b, emu_mpy_m_y1_x1_b, emu_mpyr_m_y1_x1_b, emu_mac_m_y1_x1_b, emu_macr_m_y1_x1_b
+};
+
+
+/**********************************
+ *  ALU instructions
+ **********************************/
+
+static void emu_pm_0(dsp_core_t* dsp);
+static void emu_pm_1(dsp_core_t* dsp);
+static void emu_pm_2(dsp_core_t* dsp);
+static void emu_pm_2_2(dsp_core_t* dsp);
+static void emu_pm_3(dsp_core_t* dsp);
+static void emu_pm_4(dsp_core_t* dsp);
+static void emu_pm_4x(dsp_core_t* dsp);
+static void emu_pm_5(dsp_core_t* dsp);
+static void emu_pm_8(dsp_core_t* dsp);
+
+static int emu_pm_read_accu24(dsp_core_t* dsp, int numreg, uint32_t *dest)
+{
+    uint32_t scaling, value, reg;
+    int got_limited = 0;
+
+    /* Read an accumulator, stores it limited */
+
+    scaling = (dsp->registers[DSP_REG_SR]>>DSP_SR_S0) & BITMASK(2);
+    reg = numreg & 1;
+
+    value = (dsp->registers[DSP_REG_A2+reg]) << 24;
+    value += dsp->registers[DSP_REG_A1+reg];
+
+    switch(scaling) {
+        case 0:
+            /* No scaling */
+            break;
+        case 1:
+            /* scaling down */
+            value >>= 1;
+            break;
+        case 2:
+            /* scaling up */
+            value <<= 1;
+            value |= (dsp->registers[DSP_REG_A0+reg]>>23) & 1;
+            break;
+        /* indeterminate */
+        case 3: 
+            break;
+    }
+
+    /* limiting ? */
+    value &= BITMASK(24);
+
+    if (dsp->registers[DSP_REG_A2+reg] == 0) {
+        if (value <= 0x007fffff) {
+            /* No limiting */
+            *dest=value;
+            return 0;
+        } 
+    }
+
+    if (dsp->registers[DSP_REG_A2+reg] == 0xff) {
+        if (value >= 0x00800000) {
+            /* No limiting */
+            *dest=value;
+            return 0;
+        } 
+    }
+
+    if (dsp->registers[DSP_REG_A2+reg] & (1<<7)) {
+        /* Limited to maximum negative value */
+        *dest=0x00800000;
+        dsp->registers[DSP_REG_SR] |= (1<<DSP_SR_L);
+        got_limited=1;
+    } else {
+        /* Limited to maximal positive value */
+        *dest=0x007fffff;
+        dsp->registers[DSP_REG_SR] |= (1<<DSP_SR_L);
+        got_limited=1;
+    }   
+
+    return got_limited;
+}
+
+static void emu_pm_0(dsp_core_t* dsp)
+{
+    uint32_t memspace, numreg, addr, save_accu, save_xy0;
+/*
+    0000 100d 00mm mrrr S,x:ea  x0,D
+    0000 100d 10mm mrrr S,y:ea  y0,D
+*/
+    memspace = (dsp->cur_inst>>15) & 1;
+    numreg = (dsp->cur_inst>>16) & 1;
+    emu_calc_ea(dsp, (dsp->cur_inst>>8) & BITMASK(6), &addr);
+
+    /* Save A or B */   
+    emu_pm_read_accu24(dsp, numreg, &save_accu);
+
+    /* Save X0 or Y0 */
+    save_xy0 = dsp->registers[DSP_REG_X0+(memspace<<1)];
+
+    /* Execute parallel instruction */
+    opcodes_alu[dsp->cur_inst & BITMASK(8)](dsp);
+
+    /* Move [A|B] to [x|y]:ea */    
+    dsp56k_write_memory(dsp, memspace, addr, save_accu);
+
+    /* Move [x|y]0 to [A|B] */
+    dsp->registers[DSP_REG_A0+numreg] = 0;
+    dsp->registers[DSP_REG_A1+numreg] = save_xy0;
+    dsp->registers[DSP_REG_A2+numreg] = save_xy0 & (1<<23) ? 0xff : 0x0;
+}
+
+static void emu_pm_1(dsp_core_t* dsp)
+{
+    uint32_t memspace, numreg1, numreg2, value, xy_addr, retour, save_1, save_2;
+/*
+    0001 ffdf w0mm mrrr x:ea,D1     S2,D2
+                        S1,x:ea     S2,D2
+                        #xxxxxx,D1  S2,D2
+    0001 deff w1mm mrrr S1,D1       y:ea,D2
+                        S1,D1       S2,y:ea
+                        S1,D1       #xxxxxx,D2
+*/
+    value = (dsp->cur_inst>>8) & BITMASK(6);
+    retour = emu_calc_ea(dsp, value, &xy_addr);  
+    memspace = (dsp->cur_inst>>14) & 1;
+    numreg1 = numreg2 = DSP_REG_NULL;
+
+    if (memspace) {
+        /* Y: */
+        switch((dsp->cur_inst>>16) & BITMASK(2)) {
+            case 0: numreg1 = DSP_REG_Y0;   break;
+            case 1: numreg1 = DSP_REG_Y1;   break;
+            case 2: numreg1 = DSP_REG_A;    break;
+            case 3: numreg1 = DSP_REG_B;    break;
+        }
+    } else {
+        /* X: */
+        switch((dsp->cur_inst>>18) & BITMASK(2)) {
+            case 0: numreg1 = DSP_REG_X0;   break;
+            case 1: numreg1 = DSP_REG_X1;   break;
+            case 2: numreg1 = DSP_REG_A;    break;
+            case 3: numreg1 = DSP_REG_B;    break;
+        }
+    }
+
+    if (dsp->cur_inst & (1<<15)) {
+        /* Write D1 */
+        if (retour)
+            save_1 = xy_addr;
+        else
+            save_1 = dsp56k_read_memory(dsp, memspace, xy_addr);
+    } else {
+        /* Read S1 */
+        if ((numreg1==DSP_REG_A) || (numreg1==DSP_REG_B))
+            emu_pm_read_accu24(dsp, numreg1, &save_1);
+        else
+            save_1 = dsp->registers[numreg1];
+    }
+    
+    /* S2 */
+    if (memspace) {
+        /* Y: */
+        numreg2 = DSP_REG_A + ((dsp->cur_inst>>19) & 1);
+    } else {
+        /* X: */
+        numreg2 = DSP_REG_A + ((dsp->cur_inst>>17) & 1);
+    }   
+    emu_pm_read_accu24(dsp, numreg2, &save_2);
+    
+
+    /* Execute parallel instruction */
+    opcodes_alu[dsp->cur_inst & BITMASK(8)](dsp);
+
+
+    /* Write parallel move values */
+    if (dsp->cur_inst & (1<<15)) {
+        /* Write D1 */
+        if (numreg1 == DSP_REG_A) {
+            dsp->registers[DSP_REG_A0] = 0x0;
+            dsp->registers[DSP_REG_A1] = save_1;
+            dsp->registers[DSP_REG_A2] = save_1 & (1<<23) ? 0xff : 0x0;
+        }
+        else if (numreg1 == DSP_REG_B) {
+            dsp->registers[DSP_REG_B0] = 0x0;
+            dsp->registers[DSP_REG_B1] = save_1;
+            dsp->registers[DSP_REG_B2] = save_1 & (1<<23) ? 0xff : 0x0;
+        }
+        else {
+    }       dsp->registers[numreg1] = save_1;
+    } else {
+        /* Read S1 */
+        dsp56k_write_memory(dsp, memspace, xy_addr, save_1);
+    }
+
+    /* S2 -> D2 */
+    if (memspace) {
+        /* Y: */
+        numreg2 = DSP_REG_X0 + ((dsp->cur_inst>>18) & 1);
+    } else {
+        /* X: */
+        numreg2 = DSP_REG_Y0 + ((dsp->cur_inst>>16) & 1);
+    }   
+    dsp->registers[numreg2] = save_2;
+}
+
+static void emu_pm_2_2(dsp_core_t* dsp);
+
+static void emu_pm_2(dsp_core_t* dsp)
+{
+    uint32_t dummy;
+/*
+    0010 0000 0000 0000 nop
+    0010 0000 010m mrrr R update
+    0010 00ee eeed dddd S,D
+    001d dddd iiii iiii #xx,D
+*/
+    if ((dsp->cur_inst & 0xffff00) == 0x200000) {
+        /* Execute parallel instruction */
+        opcodes_alu[dsp->cur_inst & BITMASK(8)](dsp);
+        return;
+    }
+
+    if ((dsp->cur_inst & 0xffe000) == 0x204000) {
+        emu_calc_ea(dsp, (dsp->cur_inst>>8) & BITMASK(5), &dummy);
+        /* Execute parallel instruction */
+        opcodes_alu[dsp->cur_inst & BITMASK(8)](dsp);
+        return;
+    }
+
+    if ((dsp->cur_inst & 0xfc0000) == 0x200000) {
+        emu_pm_2_2(dsp);
+        return;
+    }
+
+    emu_pm_3(dsp);
+}
+
+static void emu_pm_2_2(dsp_core_t* dsp)
+{
+/*
+    0010 00ee eeed dddd S,D
+*/
+    uint32_t srcreg, dstreg, save_reg;
+    
+    srcreg = (dsp->cur_inst >> 13) & BITMASK(5);
+    dstreg = (dsp->cur_inst >> 8) & BITMASK(5);
+
+    if ((srcreg == DSP_REG_A) || (srcreg == DSP_REG_B))
+        /* Accu to register: limited 24 bits */
+        emu_pm_read_accu24(dsp, srcreg, &save_reg);
+    else
+        save_reg = dsp->registers[srcreg];
+
+    /* Execute parallel instruction */
+    opcodes_alu[dsp->cur_inst & BITMASK(8)](dsp);
+
+    /* Write reg */
+    if (dstreg == DSP_REG_A) {
+        dsp->registers[DSP_REG_A0] = 0x0;
+        dsp->registers[DSP_REG_A1] = save_reg;
+        dsp->registers[DSP_REG_A2] = save_reg & (1<<23) ? 0xff : 0x0;
+    }
+    else if (dstreg == DSP_REG_B) {
+        dsp->registers[DSP_REG_B0] = 0x0;
+        dsp->registers[DSP_REG_B1] = save_reg;
+        dsp->registers[DSP_REG_B2] = save_reg & (1<<23) ? 0xff : 0x0;
+    }
+    else {
+        dsp->registers[dstreg] = save_reg & BITMASK(registers_mask[dstreg]);
+    }
+}
+
+static void emu_pm_3(dsp_core_t* dsp)
+{
+    uint32_t dstreg, srcvalue;
+/*
+    001d dddd iiii iiii #xx,R
+*/
+
+    /* Execute parallel instruction */
+    opcodes_alu[dsp->cur_inst & BITMASK(8)](dsp);
+
+    /* Write reg */
+    dstreg = (dsp->cur_inst >> 16) & BITMASK(5);
+    srcvalue = (dsp->cur_inst >> 8) & BITMASK(8);
+
+    switch(dstreg) {
+        case DSP_REG_X0:
+        case DSP_REG_X1:
+        case DSP_REG_Y0:
+        case DSP_REG_Y1:
+        case DSP_REG_A:
+        case DSP_REG_B:
+            srcvalue <<= 16;
+            break;
+    }
+
+    if (dstreg == DSP_REG_A) {
+        dsp->registers[DSP_REG_A0] = 0x0;
+        dsp->registers[DSP_REG_A1] = srcvalue;
+        dsp->registers[DSP_REG_A2] = srcvalue & (1<<23) ? 0xff : 0x0;
+    }
+    else if (dstreg == DSP_REG_B) {
+        dsp->registers[DSP_REG_B0] = 0x0;
+        dsp->registers[DSP_REG_B1] = srcvalue;
+        dsp->registers[DSP_REG_B2] = srcvalue & (1<<23) ? 0xff : 0x0;
+    }
+    else {
+        dsp->registers[dstreg] = srcvalue & BITMASK(registers_mask[dstreg]);
+    }
+}
+
+static void emu_pm_4(dsp_core_t* dsp)
+{
+/*
+    0100 l0ll w0aa aaaa             l:aa,D
+                        S,l:aa
+    0100 l0ll w1mm mrrr             l:ea,D
+                        S,l:ea
+    01dd 0ddd w0aa aaaa             x:aa,D
+                        S,x:aa
+    01dd 0ddd w1mm mrrr             x:ea,D
+                        S,x:ea
+                        #xxxxxx,D
+    01dd 1ddd w0aa aaaa             y:aa,D
+                        S,y:aa
+    01dd 1ddd w1mm mrrr             y:ea,D
+                        S,y:ea
+                        #xxxxxx,D
+*/
+    if ((dsp->cur_inst & 0xf40000)==0x400000) {
+        emu_pm_4x(dsp);
+        return;
+    }
+
+    emu_pm_5(dsp);
+}
+
+static void emu_pm_4x(dsp_core_t* dsp)
+{
+    uint32_t value, numreg, l_addr, save_lx, save_ly;
+/*
+    0100 l0ll w0aa aaaa         l:aa,D
+                    S,l:aa
+    0100 l0ll w1mm mrrr         l:ea,D
+                    S,l:ea
+*/
+    value = (dsp->cur_inst>>8) & BITMASK(6);
+    if (dsp->cur_inst & (1<<14)) {
+        emu_calc_ea(dsp, value, &l_addr);    
+    } else {
+        l_addr = value;
+    }
+
+    numreg = (dsp->cur_inst>>16) & BITMASK(2);
+    numreg |= (dsp->cur_inst>>17) & (1<<2);
+
+    if (dsp->cur_inst & (1<<15)) {
+        /* Write D */
+        save_lx = dsp56k_read_memory(dsp, DSP_SPACE_X,l_addr);
+        save_ly = dsp56k_read_memory(dsp, DSP_SPACE_Y,l_addr);
+    }
+    else {
+        /* Read S */
+        switch(numreg) {
+            case 0:
+                /* A10 */
+                save_lx = dsp->registers[DSP_REG_A1];
+                save_ly = dsp->registers[DSP_REG_A0];
+                break;
+            case 1:
+                /* B10 */
+                save_lx = dsp->registers[DSP_REG_B1];
+                save_ly = dsp->registers[DSP_REG_B0];
+                break;
+            case 2:
+                /* X */
+                save_lx = dsp->registers[DSP_REG_X1];
+                save_ly = dsp->registers[DSP_REG_X0];
+                break;
+            case 3:
+                /* Y */
+                save_lx = dsp->registers[DSP_REG_Y1];
+                save_ly = dsp->registers[DSP_REG_Y0];
+                break;
+            case 4:
+                /* A */
+                if (emu_pm_read_accu24(dsp, DSP_REG_A, &save_lx)) {
+                    /* Was limited, set lower part */
+                    save_ly = (save_lx & (1<<23) ? 0 : 0xffffff);
+                } else {
+                    /* Not limited */
+                    save_ly = dsp->registers[DSP_REG_A0];
+                }
+                break;
+            case 5:
+                /* B */
+                if (emu_pm_read_accu24(dsp, DSP_REG_B, &save_lx)) {
+                    /* Was limited, set lower part */
+                    save_ly = (save_lx & (1<<23) ? 0 : 0xffffff);
+                } else {
+                    /* Not limited */
+                    save_ly = dsp->registers[DSP_REG_B0];
+                }
+                break;
+            case 6:
+                /* AB */
+                emu_pm_read_accu24(dsp, DSP_REG_A, &save_lx); 
+                emu_pm_read_accu24(dsp, DSP_REG_B, &save_ly); 
+                break;
+            case 7:
+                /* BA */
+                emu_pm_read_accu24(dsp, DSP_REG_B, &save_lx); 
+                emu_pm_read_accu24(dsp, DSP_REG_A, &save_ly); 
+                break;
+        }
+    }
+
+    /* Execute parallel instruction */
+    opcodes_alu[dsp->cur_inst & BITMASK(8)](dsp);
+
+
+    if (dsp->cur_inst & (1<<15)) {
+        /* Write D */
+        switch(numreg) {
+            case 0: /* A10 */
+                dsp->registers[DSP_REG_A1] = save_lx;
+                dsp->registers[DSP_REG_A0] = save_ly;
+                break;
+            case 1: /* B10 */
+                dsp->registers[DSP_REG_B1] = save_lx;
+                dsp->registers[DSP_REG_B0] = save_ly;
+                break;
+            case 2: /* X */
+                dsp->registers[DSP_REG_X1] = save_lx;
+                dsp->registers[DSP_REG_X0] = save_ly;
+                break;
+            case 3: /* Y */
+                dsp->registers[DSP_REG_Y1] = save_lx;
+                dsp->registers[DSP_REG_Y0] = save_ly;
+                break;
+            case 4: /* A */
+                dsp->registers[DSP_REG_A0] = save_ly;
+                dsp->registers[DSP_REG_A1] = save_lx;
+                dsp->registers[DSP_REG_A2] = save_lx & (1<<23) ? 0xff : 0;
+                break;
+            case 5: /* B */
+                dsp->registers[DSP_REG_B0] = save_ly;
+                dsp->registers[DSP_REG_B1] = save_lx;
+                dsp->registers[DSP_REG_B2] = save_lx & (1<<23) ? 0xff : 0;
+                break;
+            case 6: /* AB */
+                dsp->registers[DSP_REG_A0] = 0;
+                dsp->registers[DSP_REG_A1] = save_lx;
+                dsp->registers[DSP_REG_A2] = save_lx & (1<<23) ? 0xff : 0;
+                dsp->registers[DSP_REG_B0] = 0;
+                dsp->registers[DSP_REG_B1] = save_ly;
+                dsp->registers[DSP_REG_B2] = save_ly & (1<<23) ? 0xff : 0;
+                break;
+            case 7: /* BA */
+                dsp->registers[DSP_REG_B0] = 0;
+                dsp->registers[DSP_REG_B1] = save_lx;
+                dsp->registers[DSP_REG_B2] = save_lx & (1<<23) ? 0xff : 0;
+                dsp->registers[DSP_REG_A0] = 0;
+                dsp->registers[DSP_REG_A1] = save_ly;
+                dsp->registers[DSP_REG_A2] = save_ly & (1<<23) ? 0xff : 0;
+                break;
+        }
+    }
+    else {
+        /* Read S */
+        dsp56k_write_memory(dsp, DSP_SPACE_X, l_addr, save_lx);
+        dsp56k_write_memory(dsp, DSP_SPACE_Y, l_addr, save_ly);
+    }
+}
+
+static void emu_pm_5(dsp_core_t* dsp)
+{
+    uint32_t memspace, numreg, value, xy_addr, retour;
+/*
+    01dd 0ddd w0aa aaaa             x:aa,D
+                        S,x:aa
+    01dd 0ddd w1mm mrrr             x:ea,D
+                        S,x:ea
+                        #xxxxxx,D
+    01dd 1ddd w0aa aaaa             y:aa,D
+                        S,y:aa
+    01dd 1ddd w1mm mrrr             y:ea,D
+                        S,y:ea
+                        #xxxxxx,D
+*/
+
+    value = (dsp->cur_inst>>8) & BITMASK(6);
+
+    if (dsp->cur_inst & (1<<14)) {
+        retour = emu_calc_ea(dsp, value, &xy_addr);  
+    } else {
+        xy_addr = value;
+        retour = 0;
+    }
+
+    memspace = (dsp->cur_inst>>19) & 1;
+    numreg = (dsp->cur_inst>>16) & BITMASK(3);
+    numreg |= (dsp->cur_inst>>17) & (BITMASK(2)<<3);
+
+    if (dsp->cur_inst & (1<<15)) {
+        /* Write D */
+        if (retour)
+            value = xy_addr;
+        else
+            value = dsp56k_read_memory(dsp, memspace, xy_addr);
+    }
+    else {
+        /* Read S */
+        if ((numreg==DSP_REG_A) || (numreg==DSP_REG_B))
+            emu_pm_read_accu24(dsp, numreg, &value);
+        else
+            value = dsp->registers[numreg];
+    }
+
+
+    /* Execute parallel instruction */
+    opcodes_alu[dsp->cur_inst & BITMASK(8)](dsp);
+
+    if (dsp->cur_inst & (1<<15)) {
+        /* Write D */
+        if (numreg == DSP_REG_A) {
+            dsp->registers[DSP_REG_A0] = 0x0;
+            dsp->registers[DSP_REG_A1] = value;
+            dsp->registers[DSP_REG_A2] = value & (1<<23) ? 0xff : 0x0;
+        }
+        else if (numreg == DSP_REG_B) {
+            dsp->registers[DSP_REG_B0] = 0x0;
+            dsp->registers[DSP_REG_B1] = value;
+            dsp->registers[DSP_REG_B2] = value & (1<<23) ? 0xff : 0x0;
+        }
+        else {
+            dsp->registers[numreg] = value & BITMASK(registers_mask[numreg]);
+        }
+    }
+    else {
+        /* Read S */
+        dsp56k_write_memory(dsp, memspace, xy_addr, value);
+    }
+}
+
+static void emu_pm_8(dsp_core_t* dsp)
+{
+    uint32_t ea1, ea2;
+    uint32_t numreg1, numreg2;
+    uint32_t save_reg1, save_reg2, x_addr, y_addr;
+/*
+    1wmm eeff WrrM MRRR             x:ea,D1     y:ea,D2 
+                        x:ea,D1     S2,y:ea
+                        S1,x:ea     y:ea,D2
+                        S1,x:ea     S2,y:ea
+*/
+    numreg1 = numreg2 = DSP_REG_NULL;
+
+    ea1 = (dsp->cur_inst>>8) & BITMASK(5);
+    if ((ea1>>3) == 0) {
+        ea1 |= (1<<5);
+    }
+    ea2 = (dsp->cur_inst>>13) & BITMASK(2);
+    ea2 |= (dsp->cur_inst>>17) & (BITMASK(2)<<3);
+    if ((ea1 & (1<<2))==0) {
+        ea2 |= 1<<2;
+    }
+    if ((ea2>>3) == 0) {
+        ea2 |= (1<<5);
+    }
+
+    emu_calc_ea(dsp, ea1, &x_addr);
+    emu_calc_ea(dsp, ea2, &y_addr);
+
+    switch((dsp->cur_inst>>18) & BITMASK(2)) {
+        case 0: numreg1=DSP_REG_X0; break;
+        case 1: numreg1=DSP_REG_X1; break;
+        case 2: numreg1=DSP_REG_A;  break;
+        case 3: numreg1=DSP_REG_B;  break;
+    }
+    switch((dsp->cur_inst>>16) & BITMASK(2)) {
+        case 0: numreg2=DSP_REG_Y0; break;
+        case 1: numreg2=DSP_REG_Y1; break;
+        case 2: numreg2=DSP_REG_A;  break;
+        case 3: numreg2=DSP_REG_B;  break;
+    }
+    
+    if (dsp->cur_inst & (1<<15)) {
+        /* Write D1 */
+        save_reg1 = dsp56k_read_memory(dsp, DSP_SPACE_X, x_addr);
+    } else {
+        /* Read S1 */
+        if ((numreg1==DSP_REG_A) || (numreg1==DSP_REG_B))
+            emu_pm_read_accu24(dsp, numreg1, &save_reg1);
+        else
+            save_reg1 = dsp->registers[numreg1];
+    }
+
+    if (dsp->cur_inst & (1<<22)) {
+        /* Write D2 */
+        save_reg2 = dsp56k_read_memory(dsp, DSP_SPACE_Y, y_addr);
+    } else {
+        /* Read S2 */
+        if ((numreg2==DSP_REG_A) || (numreg2==DSP_REG_B))
+            emu_pm_read_accu24(dsp, numreg2, &save_reg2);
+        else
+            save_reg2 = dsp->registers[numreg2];
+    }
+
+
+    /* Execute parallel instruction */
+    opcodes_alu[dsp->cur_inst & BITMASK(8)](dsp);
+
+    /* Write first parallel move */
+    if (dsp->cur_inst & (1<<15)) {
+        /* Write D1 */
+        if (numreg1 == DSP_REG_A) {
+            dsp->registers[DSP_REG_A0] = 0x0;
+            dsp->registers[DSP_REG_A1] = save_reg1;
+            dsp->registers[DSP_REG_A2] = save_reg1 & (1<<23) ? 0xff : 0x0;
+        }
+        else if (numreg1 == DSP_REG_B) {
+            dsp->registers[DSP_REG_B0] = 0x0;
+            dsp->registers[DSP_REG_B1] = save_reg1;
+            dsp->registers[DSP_REG_B2] = save_reg1 & (1<<23) ? 0xff : 0x0;
+        }
+        else {
+            dsp->registers[numreg1] = save_reg1;
+        }
+    } else {
+        /* Read S1 */
+        dsp56k_write_memory(dsp, DSP_SPACE_X, x_addr, save_reg1);
+    }
+
+    /* Write second parallel move */
+    if (dsp->cur_inst & (1<<22)) {
+        /* Write D2 */
+        if (numreg2 == DSP_REG_A) {
+            dsp->registers[DSP_REG_A0] = 0x0;
+            dsp->registers[DSP_REG_A1] = save_reg2;
+            dsp->registers[DSP_REG_A2] = save_reg2 & (1<<23) ? 0xff : 0x0;
+        }
+        else if (numreg2 == DSP_REG_B) {
+            dsp->registers[DSP_REG_B0] = 0x0;
+            dsp->registers[DSP_REG_B1] = save_reg2;
+            dsp->registers[DSP_REG_B2] = save_reg2 & (1<<23) ? 0xff : 0x0;
+        }
+        else {
+            dsp->registers[numreg2] = save_reg2;
+        }
+    } else {
+        /* Read S2 */
+        dsp56k_write_memory(dsp, DSP_SPACE_Y, y_addr, save_reg2);
+    }
+}
+
+static const emu_func_t opcodes_parmove[16] = {
+    emu_pm_0, emu_pm_1, emu_pm_2, emu_pm_3, emu_pm_4, emu_pm_5, emu_pm_5, emu_pm_5,
+    emu_pm_8, emu_pm_8, emu_pm_8, emu_pm_8, emu_pm_8, emu_pm_8, emu_pm_8, emu_pm_8
+};
+
+
+/**********************************
+ *  Non-parallel moves instructions
+ **********************************/
+
+static void emu_add_x(dsp_core_t* dsp, uint32_t x, uint32_t d)
+{
+    uint32_t source[3], dest[3];
+    if (d) {
+        dest[0] = dsp->registers[DSP_REG_B2];
+        dest[1] = dsp->registers[DSP_REG_B1];
+        dest[2] = dsp->registers[DSP_REG_B0];
+    } else {
+        dest[0] = dsp->registers[DSP_REG_A2];
+        dest[1] = dsp->registers[DSP_REG_A1];
+        dest[2] = dsp->registers[DSP_REG_A0];
+    }
+
+    source[2] = 0;
+    source[1] = x;
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    uint16_t newsr = dsp_add56(source, dest);
+
+    if (d) {
+        dsp->registers[DSP_REG_B2] = dest[0];
+        dsp->registers[DSP_REG_B1] = dest[1];
+        dsp->registers[DSP_REG_B0] = dest[2];
+    } else {
+        dsp->registers[DSP_REG_A2] = dest[0];
+        dsp->registers[DSP_REG_A1] = dest[1];
+        dsp->registers[DSP_REG_A0] = dest[2];
+    }
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_add_imm(dsp_core_t* dsp)
+{
+    uint32_t xx = (dsp->cur_inst >> 8) & BITMASK(6);
+    uint32_t d = (dsp->cur_inst >> 3) & 1;
+    emu_add_x(dsp, xx, d);
+}
+
+static void emu_add_long(dsp_core_t* dsp)
+{
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    dsp->cur_inst_len++;
+    uint32_t d = (dsp->cur_inst >> 3) & 1;
+    emu_add_x(dsp, xxxx, d);
+}
+
+static void emu_and_x(dsp_core_t* dsp, uint32_t x, uint32_t d)
+{
+    int dstreg;
+    if (d) {
+        dstreg = DSP_REG_B1;
+    } else {
+        dstreg = DSP_REG_A1;
+    }
+
+    dsp->registers[dstreg] &= x;
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[dstreg]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[dstreg]==0)<<DSP_SR_Z;
+}
+
+static void emu_and_imm(dsp_core_t* dsp)
+{
+    uint32_t xx = (dsp->cur_inst >> 8) & BITMASK(6);
+    uint32_t d = (dsp->cur_inst >> 3) & 1;
+    emu_and_x(dsp, xx, d);
+}
+
+static void emu_and_long(dsp_core_t* dsp)
+{
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    dsp->cur_inst_len++;
+    uint32_t d = (dsp->cur_inst >> 3) & 1;
+    emu_and_x(dsp, xxxx, d);
+}
+
+static void emu_andi(dsp_core_t* dsp)
+{
+    uint32_t regnum, value;
+
+    value = (dsp->cur_inst >> 8) & BITMASK(8);
+    regnum = dsp->cur_inst & BITMASK(2);
+    switch(regnum) {
+        case 0:
+            /* mr */
+            dsp->registers[DSP_REG_SR] &= (value<<8)|BITMASK(8);
+            break;
+        case 1:
+            /* ccr */
+            dsp->registers[DSP_REG_SR] &= (BITMASK(8)<<8)|value;
+            break;
+        case 2:
+            /* omr */
+            dsp->registers[DSP_REG_OMR] &= value;
+            break;
+    }
+}
+
+static void emu_asl_imm(dsp_core_t* dsp)
+{
+    uint32_t S = ((dsp->cur_inst >> 7) & 1);
+    uint32_t D = dsp->cur_inst & 1;
+    uint32_t ii = (dsp->cur_inst >> 1) & BITMASK(6);
+
+    uint32_t dest[3];
+
+    if (S) {
+        dest[0] = dsp->registers[DSP_REG_B2];
+        dest[1] = dsp->registers[DSP_REG_B1];
+        dest[2] = dsp->registers[DSP_REG_B0];
+    } else {
+        dest[0] = dsp->registers[DSP_REG_A2];
+        dest[1] = dsp->registers[DSP_REG_A1];
+        dest[2] = dsp->registers[DSP_REG_A0];
+    }
+
+    uint16_t newsr = dsp_asl56(dest, ii);
+
+    if (D) {
+        dsp->registers[DSP_REG_B2] = dest[0];
+        dsp->registers[DSP_REG_B1] = dest[1];
+        dsp->registers[DSP_REG_B0] = dest[2];
+    } else {
+        dsp->registers[DSP_REG_A2] = dest[0];
+        dsp->registers[DSP_REG_A1] = dest[1];
+        dsp->registers[DSP_REG_A0] = dest[2];
+    }
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_C)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= newsr;
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+}
+
+static void emu_asr_imm(dsp_core_t* dsp)
+{
+    uint32_t S = ((dsp->cur_inst >> 7) & 1);
+    uint32_t D = dsp->cur_inst & 1;
+    uint32_t ii = (dsp->cur_inst >> 1) & BITMASK(6);
+
+    uint32_t dest[3];
+    if (S) {
+        dest[0] = dsp->registers[DSP_REG_B2];
+        dest[1] = dsp->registers[DSP_REG_B1];
+        dest[2] = dsp->registers[DSP_REG_B0];
+    } else {
+        dest[0] = dsp->registers[DSP_REG_A2];
+        dest[1] = dsp->registers[DSP_REG_A1];
+        dest[2] = dsp->registers[DSP_REG_A0];
+    }
+
+    uint16_t newsr = dsp_asr56(dest, ii);
+
+    if (D) {
+        dsp->registers[DSP_REG_B2] = dest[0];
+        dsp->registers[DSP_REG_B1] = dest[1];
+        dsp->registers[DSP_REG_B0] = dest[2];
+    } else {
+        dsp->registers[DSP_REG_A2] = dest[0];
+        dsp->registers[DSP_REG_A1] = dest[1];
+        dsp->registers[DSP_REG_A0] = dest[2];
+    }
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_C)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= newsr;
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+}
+
+static void emu_bcc_long(dsp_core_t* dsp)
+{
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    dsp->cur_inst_len++;
+
+    uint32_t cc_code = dsp->cur_inst & BITMASK(4);
+    if (emu_calc_cc(dsp, cc_code)) {
+        dsp->pc += xxxx;
+        dsp->pc &= BITMASK(24);
+        dsp->cur_inst_len = 0;
+    }
+
+    //TODO: cycles?
+}
+
+static void emu_bcc_imm(dsp_core_t* dsp)
+{
+    uint32_t xxx = (dsp->cur_inst & BITMASK(5))
+                    + ((dsp->cur_inst & (BITMASK(4) << 6)) >> 1);
+
+    uint32_t cc_code = (dsp->cur_inst >> 12) & BITMASK(4);
+
+    if (emu_calc_cc(dsp, cc_code)) {
+        dsp->pc += dsp_signextend(9, xxx);
+        dsp->pc &= BITMASK(24);
+        dsp->cur_inst_len = 0;
+    }
+
+    //TODO: cycles
+}
+
+static void emu_bchg_aa(dsp_core_t* dsp)
+{
+    uint32_t memspace, addr, value, newcarry, numbit;
+    
+    memspace = (dsp->cur_inst>>6) & 1;
+    value = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+
+    addr = value;
+    value = dsp56k_read_memory(dsp, memspace, addr);
+    newcarry = (value>>numbit) & 1;
+    if (newcarry) {
+        value -= (1<<numbit);
+    } else {
+        value += (1<<numbit);
+    }
+    dsp56k_write_memory(dsp, memspace, addr, value);
+
+    /* Set carry */
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_C);
+    dsp->registers[DSP_REG_SR] |= newcarry<<DSP_SR_C;
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_bchg_ea(dsp_core_t* dsp)
+{
+    uint32_t memspace, addr, value, newcarry, numbit;
+    
+    memspace = (dsp->cur_inst>>6) & 1;
+    value = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+
+    emu_calc_ea(dsp, value, &addr);
+    value = dsp56k_read_memory(dsp, memspace, addr);
+    newcarry = (value>>numbit) & 1;
+    if (newcarry) {
+        value -= (1<<numbit);
+    } else {
+        value += (1<<numbit);
+    }
+    dsp56k_write_memory(dsp, memspace, addr, value);
+
+    /* Set carry */
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_C);
+    dsp->registers[DSP_REG_SR] |= newcarry<<DSP_SR_C;
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_bchg_pp(dsp_core_t* dsp)
+{
+    uint32_t memspace, addr, value, newcarry, numbit;
+    
+    memspace = (dsp->cur_inst>>6) & 1;
+    value = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+
+    addr = 0xffffc0 + value;
+    value = dsp56k_read_memory(dsp, memspace, addr);
+    newcarry = (value>>numbit) & 1;
+    if (newcarry) {
+        value -= (1<<numbit);
+    } else {
+        value += (1<<numbit);
+    }
+    dsp56k_write_memory(dsp, memspace, addr, value);
+
+    /* Set carry */
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_C);
+    dsp->registers[DSP_REG_SR] |= newcarry<<DSP_SR_C;
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_bchg_reg(dsp_core_t* dsp)
+{
+    uint32_t value, numreg, newcarry, numbit;
+    
+    numreg = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+
+    if ((numreg==DSP_REG_A) || (numreg==DSP_REG_B)) {
+        emu_pm_read_accu24(dsp, numreg, &value);
+    } else {
+        value = dsp->registers[numreg];
+    }
+
+    newcarry = (value>>numbit) & 1;
+    if (newcarry) {
+        value -= (1<<numbit);
+    } else {
+        value += (1<<numbit);
+    }
+
+    dsp_write_reg(dsp, numreg, value);
+
+    /* Set carry */
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_C);
+    dsp->registers[DSP_REG_SR] |= newcarry<<DSP_SR_C;
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_bclr_aa(dsp_core_t* dsp)
+{
+    uint32_t memspace, addr, value, newcarry, numbit;
+    
+    memspace = (dsp->cur_inst>>6) & 1;
+    addr = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+
+    value = dsp56k_read_memory(dsp, memspace, addr);
+    newcarry = (value>>numbit) & 1;
+    value &= 0xffffffff-(1<<numbit);
+    dsp56k_write_memory(dsp, memspace, addr, value);
+
+    /* Set carry */
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_C);
+    dsp->registers[DSP_REG_SR] |= newcarry<<DSP_SR_C;
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_bclr_ea(dsp_core_t* dsp)
+{
+    uint32_t memspace, addr, value, newcarry, numbit;
+    
+    memspace = (dsp->cur_inst>>6) & 1;
+    value = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+
+    emu_calc_ea(dsp, value, &addr);
+    value = dsp56k_read_memory(dsp, memspace, addr);
+    newcarry = (value>>numbit) & 1;
+    value &= 0xffffffff-(1<<numbit);
+    dsp56k_write_memory(dsp, memspace, addr, value);
+
+    /* Set carry */
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_C);
+    dsp->registers[DSP_REG_SR] |= newcarry<<DSP_SR_C;
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_bclr_pp(dsp_core_t* dsp)
+{
+    uint32_t memspace, addr, value, newcarry, numbit;
+    
+    memspace = (dsp->cur_inst>>6) & 1;
+    value = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+
+    addr = 0xffffc0 + value;
+    value = dsp56k_read_memory(dsp, memspace, addr);
+    newcarry = (value>>numbit) & 1;
+    value &= 0xffffffff-(1<<numbit);
+    dsp56k_write_memory(dsp, memspace, addr, value);
+
+    /* Set carry */
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_C);
+    dsp->registers[DSP_REG_SR] |= newcarry<<DSP_SR_C;
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_bclr_reg(dsp_core_t* dsp)
+{
+    uint32_t value, numreg, newcarry, numbit;
+    
+    numreg = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+
+    if ((numreg==DSP_REG_A) || (numreg==DSP_REG_B)) {
+        emu_pm_read_accu24(dsp, numreg, &value);
+    } else {
+        value = dsp->registers[numreg];
+    }
+
+    newcarry = (value>>numbit) & 1;
+    value &= 0xffffffff-(1<<numbit);
+
+    dsp_write_reg(dsp, numreg, value);
+
+    /* Set carry */
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_C);
+    dsp->registers[DSP_REG_SR] |= newcarry<<DSP_SR_C;
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_bra_long(dsp_core_t* dsp)
+{
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    dsp->cur_inst_len++;
+
+    dsp->pc += xxxx;
+    dsp->pc &= BITMASK(24);
+    dsp->cur_inst_len = 0;
+}
+
+static void emu_bra_imm(dsp_core_t* dsp)
+{
+    uint32_t xxx = (dsp->cur_inst & BITMASK(5))
+                    + ((dsp->cur_inst & (BITMASK(4) << 6)) >> 1);
+
+    dsp->pc += dsp_signextend(9, xxx);
+    dsp->pc &= BITMASK(24);
+    dsp->cur_inst_len = 0;
+}
+
+static void emu_brclr_pp(dsp_core_t* dsp)
+{
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    dsp->cur_inst_len++;
+
+    uint32_t memspace = (dsp->cur_inst>>6) & 1;
+    uint32_t value = (dsp->cur_inst>>8) & BITMASK(6);
+    uint32_t numbit = dsp->cur_inst & BITMASK(5);
+    uint32_t addr = 0xffffc0 + value;
+    value = dsp56k_read_memory(dsp, memspace, addr);
+
+    dsp->instr_cycle += 4;
+
+   if ((value & (1<<numbit))==0) {
+        dsp->pc += xxxx;
+        dsp->pc &= BITMASK(24);
+        dsp->cur_inst_len = 0;
+    }
+}
+
+static void emu_brclr_reg(dsp_core_t* dsp)
+{
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    dsp->cur_inst_len++;
+
+    uint32_t numreg = (dsp->cur_inst>>8) & BITMASK(6);
+    uint32_t numbit = dsp->cur_inst & BITMASK(5);
+
+    uint32_t value;
+    if ((numreg==DSP_REG_A) || (numreg==DSP_REG_B)) {
+        emu_pm_read_accu24(dsp, numreg, &value);
+    } else {
+        value = dsp->registers[numreg];
+    }
+
+    dsp->instr_cycle += 4;
+    
+    if ((value & (1<<numbit)) == 0) {
+        dsp->pc += xxxx;
+        dsp->pc &= BITMASK(24);
+        dsp->cur_inst_len=0;
+    }
+}
+
+static void emu_brset_pp(dsp_core_t* dsp)
+{
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    dsp->cur_inst_len++;
+
+    uint32_t memspace = (dsp->cur_inst>>6) & 1;
+    uint32_t value = (dsp->cur_inst>>8) & BITMASK(6);
+    uint32_t numbit = dsp->cur_inst & BITMASK(5);
+    uint32_t addr = 0xffffc0 + value;
+    value = dsp56k_read_memory(dsp, memspace, addr);
+
+    dsp->instr_cycle += 4;
+
+    if (value & (1<<numbit)) {
+        dsp->pc += xxxx;
+        dsp->pc &= BITMASK(24);
+        dsp->cur_inst_len = 0;
+    }
+}
+
+static void emu_brset_reg(dsp_core_t* dsp)
+{
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    dsp->cur_inst_len++;
+
+    uint32_t numreg = (dsp->cur_inst>>8) & BITMASK(6);
+    uint32_t numbit = dsp->cur_inst & BITMASK(5);
+
+    uint32_t value;
+    if ((numreg==DSP_REG_A) || (numreg==DSP_REG_B)) {
+        emu_pm_read_accu24(dsp, numreg, &value);
+    } else {
+        value = dsp->registers[numreg];
+    }
+
+    dsp->instr_cycle += 4;
+    
+    if (value & (1<<numbit)) {
+        dsp->pc += xxxx;
+        dsp->pc &= BITMASK(24);
+        dsp->cur_inst_len=0;
+    }
+}
+
+static void emu_bset_aa(dsp_core_t* dsp)
+{
+    uint32_t memspace, addr, value, newcarry, numbit;
+    
+    memspace = (dsp->cur_inst>>6) & 1;
+    value = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+
+    addr = value;
+    value = dsp56k_read_memory(dsp, memspace, addr);
+    newcarry = (value>>numbit) & 1;
+    value |= (1<<numbit);
+    dsp56k_write_memory(dsp, memspace, addr, value);
+
+    /* Set carry */
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_C);
+    dsp->registers[DSP_REG_SR] |= newcarry<<DSP_SR_C;
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_bsr_long(dsp_core_t* dsp)
+{
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    dsp->cur_inst_len++;
+
+    if (dsp->interrupt_state != DSP_INTERRUPT_LONG){
+        dsp_stack_push(dsp, dsp->pc+dsp->cur_inst_len, dsp->registers[DSP_REG_SR], 0);
+    } else {
+        dsp->interrupt_state = DSP_INTERRUPT_DISABLED;
+    }
+
+    dsp->pc += xxxx;
+    dsp->pc &= BITMASK(24);
+    dsp->cur_inst_len = 0;
+
+    dsp->instr_cycle += 4;
+}
+
+static void emu_bsr_imm(dsp_core_t* dsp)
+{
+    uint32_t xxx = (dsp->cur_inst & BITMASK(5))
+                 + ((dsp->cur_inst & (BITMASK(4) << 6)) >> 1);
+
+    if (dsp->interrupt_state != DSP_INTERRUPT_LONG){
+        dsp_stack_push(dsp, dsp->pc+dsp->cur_inst_len, dsp->registers[DSP_REG_SR], 0);
+    } else {
+        dsp->interrupt_state = DSP_INTERRUPT_DISABLED;
+    }
+
+    dsp->pc += dsp_signextend(9, xxx);
+    dsp->pc &= BITMASK(24);
+    dsp->cur_inst_len = 0;
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_bset_ea(dsp_core_t* dsp)
+{
+    uint32_t memspace, addr, value, newcarry, numbit;
+    
+    memspace = (dsp->cur_inst>>6) & 1;
+    value = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+
+    emu_calc_ea(dsp, value, &addr);
+    value = dsp56k_read_memory(dsp, memspace, addr);
+    newcarry = (value>>numbit) & 1;
+    value |= (1<<numbit);
+    dsp56k_write_memory(dsp, memspace, addr, value);
+
+    /* Set carry */
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_C);
+    dsp->registers[DSP_REG_SR] |= newcarry<<DSP_SR_C;
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_bset_pp(dsp_core_t* dsp)
+{
+    uint32_t memspace, addr, value, newcarry, numbit;
+    
+    memspace = (dsp->cur_inst>>6) & 1;
+    value = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+    addr = 0xffffc0 + value;
+    value = dsp56k_read_memory(dsp, memspace, addr);
+    newcarry = (value>>numbit) & 1;
+    value |= (1<<numbit);
+    dsp56k_write_memory(dsp, memspace, addr, value);
+
+    /* Set carry */
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_C);
+    dsp->registers[DSP_REG_SR] |= newcarry<<DSP_SR_C;
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_bset_reg(dsp_core_t* dsp)
+{
+    uint32_t value, numreg, newcarry, numbit;
+    
+    numreg = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+
+    if ((numreg==DSP_REG_A) || (numreg==DSP_REG_B)) {
+        emu_pm_read_accu24(dsp, numreg, &value);
+    } else {
+        value = dsp->registers[numreg];
+    }
+
+    newcarry = (value>>numbit) & 1;
+    value |= (1<<numbit);
+
+    dsp_write_reg(dsp, numreg, value);
+
+    /* Set carry */
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_C);
+    dsp->registers[DSP_REG_SR] |= newcarry<<DSP_SR_C;
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_btst_aa(dsp_core_t* dsp)
+{
+    uint32_t memspace, addr, value, newcarry, numbit;
+    
+    memspace = (dsp->cur_inst>>6) & 1;
+    value = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+
+    addr = value;
+    value = dsp56k_read_memory(dsp, memspace, addr);
+    newcarry = (value>>numbit) & 1;
+
+    /* Set carry */
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_C);
+    dsp->registers[DSP_REG_SR] |= newcarry<<DSP_SR_C;
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_btst_ea(dsp_core_t* dsp)
+{
+    uint32_t memspace, addr, value, newcarry, numbit;
+    
+    memspace = (dsp->cur_inst>>6) & 1;
+    value = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+
+    emu_calc_ea(dsp, value, &addr);
+    value = dsp56k_read_memory(dsp, memspace, addr);
+    newcarry = (value>>numbit) & 1;
+
+    /* Set carry */
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_C);
+    dsp->registers[DSP_REG_SR] |= newcarry<<DSP_SR_C;
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_btst_pp(dsp_core_t* dsp)
+{
+    uint32_t memspace, addr, value, newcarry, numbit;
+    
+    memspace = (dsp->cur_inst>>6) & 1;
+    value = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+
+    addr = 0xffffc0 + value;
+    value = dsp56k_read_memory(dsp, memspace, addr);
+    newcarry = (value>>numbit) & 1;
+
+    /* Set carry */
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_C);
+    dsp->registers[DSP_REG_SR] |= newcarry<<DSP_SR_C;
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_btst_reg(dsp_core_t* dsp)
+{
+    uint32_t value, numreg, newcarry, numbit;
+    
+    numreg = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+
+    if ((numreg==DSP_REG_A) || (numreg==DSP_REG_B)) {
+        emu_pm_read_accu24(dsp, numreg, &value);
+    } else {
+        value = dsp->registers[numreg];
+    }
+
+    newcarry = (value>>numbit) & 1;
+
+    /* Set carry */
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_C);
+    dsp->registers[DSP_REG_SR] |= newcarry<<DSP_SR_C;
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_cmp_imm(dsp_core_t* dsp)
+{
+    uint32_t xx = (dsp->cur_inst >> 8) & BITMASK(6);
+    uint32_t d = (dsp->cur_inst >> 3) & 1;
+
+    uint32_t source[3], dest[3];
+
+    if (d) {
+        dest[2] = dsp->registers[DSP_REG_B0];
+        dest[1] = dsp->registers[DSP_REG_B1];
+        dest[0] = dsp->registers[DSP_REG_B2];
+    } else {
+        dest[2] = dsp->registers[DSP_REG_A0];
+        dest[1] = dsp->registers[DSP_REG_A1];
+        dest[0] = dsp->registers[DSP_REG_A2];
+    }
+
+    source[2] = 0;
+    source[1] = xx;
+    source[0] = 0x0;
+
+    uint16_t newsr = dsp_sub56(source, dest);
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_cmp_long(dsp_core_t* dsp)
+{
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    dsp->cur_inst_len++;
+
+    uint32_t d = (dsp->cur_inst >> 3) & 1;
+
+    uint32_t source[3], dest[3];
+    if (d) {
+        dest[2] = dsp->registers[DSP_REG_B0];
+        dest[1] = dsp->registers[DSP_REG_B1];
+        dest[0] = dsp->registers[DSP_REG_B2];
+    } else {
+        dest[2] = dsp->registers[DSP_REG_A0];
+        dest[1] = dsp->registers[DSP_REG_A1];
+        dest[0] = dsp->registers[DSP_REG_A2];
+    }
+
+    source[2] = 0;
+    source[1] = xxxx;
+    source[0] = 0x0;
+
+    uint16_t newsr = dsp_sub56(source, dest);
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_cmpu(dsp_core_t* dsp)
+{
+    uint32_t ggg = (dsp->cur_inst >> 1) & BITMASK(3);
+    uint32_t d = dsp->cur_inst & 1;
+
+    uint32_t srcreg = DSP_REG_NULL;
+    switch (ggg) {
+    case 0: srcreg = d ? DSP_REG_A : DSP_REG_B; break;
+    case 4: srcreg = DSP_REG_X0; break;
+    case 5: srcreg = DSP_REG_Y0; break;
+    case 6: srcreg = DSP_REG_X1; break;
+    case 7: srcreg = DSP_REG_Y1; break;
+    }
+
+    uint32_t source[3], dest[3];
+    if (d) {
+        dest[2] = dsp->registers[DSP_REG_B0];
+        dest[1] = dsp->registers[DSP_REG_B1];
+        dest[0] = dsp->registers[DSP_REG_B2];
+    } else {
+        dest[2] = dsp->registers[DSP_REG_A0];
+        dest[1] = dsp->registers[DSP_REG_A1];
+        dest[0] = dsp->registers[DSP_REG_A2];
+    }
+
+    uint32_t value;
+    if (srcreg == DSP_REG_A || srcreg == DSP_REG_B) {
+        emu_pm_read_accu24(dsp, srcreg, &value);
+    } else {
+        value = dsp->registers[srcreg];
+    }
+
+    source[2] = 0;
+    source[1] = value;
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    uint16_t newsr = dsp_sub56(source, dest);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(
+        (1<<DSP_SR_V)|(1<<DSP_SR_C)|(1<<DSP_SR_Z)|(1<<DSP_SR_N));
+    dsp->registers[DSP_REG_SR] |= newsr & (1<<DSP_SR_C);
+
+    /* Zero Flag (Z) */
+    if ((dest[0] == 0) && (dest[2] == 0) && (dest[1] == 0))
+        dsp->registers[DSP_REG_SR] |= 1 << DSP_SR_Z;
+
+    /* Negative Flag (N) */
+    dsp->registers[DSP_REG_SR] |= (dest[0]>>4) & 0x8;
+}
+
+static void emu_div(dsp_core_t* dsp)
+{
+    uint32_t srcreg, destreg, source[3], dest[3];
+    uint16_t newsr;
+
+    srcreg = DSP_REG_NULL;
+    switch((dsp->cur_inst>>4) & BITMASK(2)) {
+    case 0: srcreg = DSP_REG_X0; break;
+    case 1: srcreg = DSP_REG_Y0; break;
+    case 2: srcreg = DSP_REG_X1; break;
+    case 3: srcreg = DSP_REG_Y1; break;
+    }
+    source[2] = 0;
+    source[1] = dsp->registers[srcreg];
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    destreg = DSP_REG_A + ((dsp->cur_inst>>3) & 1);
+    if (destreg == DSP_REG_A) {
+        dest[0] = dsp->registers[DSP_REG_A2];
+        dest[1] = dsp->registers[DSP_REG_A1];
+        dest[2] = dsp->registers[DSP_REG_A0];
+    } else {
+        dest[0] = dsp->registers[DSP_REG_B2];
+        dest[1] = dsp->registers[DSP_REG_B1];
+        dest[2] = dsp->registers[DSP_REG_B0];
+    }
+
+    if (((dest[0]>>7) & 1) ^ ((source[1]>>23) & 1)) {
+        /* D += S */
+        newsr = dsp_asl56(dest, 1);
+        dsp_add56(source, dest);
+    } else {
+        /* D -= S */
+        newsr = dsp_asl56(dest, 1);
+        dsp_sub56(source, dest);
+    }
+
+    dest[2] |= (dsp->registers[DSP_REG_SR]>>DSP_SR_C) & 1;
+
+    if (destreg == DSP_REG_A) {
+        dsp->registers[DSP_REG_A2] = dest[0];
+        dsp->registers[DSP_REG_A1] = dest[1];
+        dsp->registers[DSP_REG_A0] = dest[2];
+    } else {
+        dsp->registers[DSP_REG_B2] = dest[0];
+        dsp->registers[DSP_REG_B1] = dest[1];
+        dsp->registers[DSP_REG_B0] = dest[2];
+    }
+    
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_C)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= (1-((dest[0]>>7) & 1))<<DSP_SR_C;
+    dsp->registers[DSP_REG_SR] |= newsr & (1<<DSP_SR_L);
+    dsp->registers[DSP_REG_SR] |= newsr & (1<<DSP_SR_V);
+}
+
+/*
+    DO instruction parameter encoding
+
+    xxxxxxxx 00xxxxxx 0xxxxxxx  aa
+    xxxxxxxx 01xxxxxx 0xxxxxxx  ea
+    xxxxxxxx YYxxxxxx 1xxxxxxx  imm
+    xxxxxxxx 11xxxxxx 0xxxxxxx  reg
+*/
+
+static void emu_do_aa(dsp_core_t* dsp)
+{
+    uint32_t memspace, addr;
+
+    /* x:aa */
+    /* y:aa */
+
+    dsp_stack_push(dsp, dsp->registers[DSP_REG_LA], dsp->registers[DSP_REG_LC], 0);
+    dsp->registers[DSP_REG_LA] = read_memory_p(dsp, dsp->pc+1) & BITMASK(16);
+    dsp->cur_inst_len++;
+    dsp_stack_push(dsp, dsp->pc+dsp->cur_inst_len, dsp->registers[DSP_REG_SR], 0);
+    dsp->registers[DSP_REG_SR] |= (1<<DSP_SR_LF);
+
+    memspace = (dsp->cur_inst>>6) & 1;
+    addr = (dsp->cur_inst>>8) & BITMASK(6);
+    dsp->registers[DSP_REG_LC] = dsp56k_read_memory(dsp, memspace, addr) & BITMASK(16);
+
+    dsp->instr_cycle += 4;
+}
+
+static void emu_do_imm(dsp_core_t* dsp)
+{
+    /* #xx */
+
+    dsp_stack_push(dsp, dsp->registers[DSP_REG_LA], dsp->registers[DSP_REG_LC], 0);
+    dsp->registers[DSP_REG_LA] = read_memory_p(dsp, dsp->pc+1) & BITMASK(16);
+    dsp->cur_inst_len++;
+    dsp_stack_push(dsp, dsp->pc+dsp->cur_inst_len, dsp->registers[DSP_REG_SR], 0);
+    dsp->registers[DSP_REG_SR] |= (1<<DSP_SR_LF);
+
+    dsp->registers[DSP_REG_LC] = ((dsp->cur_inst>>8) & BITMASK(8))
+        + ((dsp->cur_inst & BITMASK(4))<<8);
+
+    dsp->instr_cycle += 4;
+}
+
+static void emu_do_ea(dsp_core_t* dsp)
+{
+    uint32_t memspace, ea_mode, addr;
+
+    /* x:ea */
+    /* y:ea */
+
+    dsp_stack_push(dsp, dsp->registers[DSP_REG_LA], dsp->registers[DSP_REG_LC], 0);
+    dsp->registers[DSP_REG_LA] = read_memory_p(dsp, dsp->pc+1) & BITMASK(16);
+    dsp->cur_inst_len++;
+    dsp_stack_push(dsp, dsp->pc+dsp->cur_inst_len, dsp->registers[DSP_REG_SR], 0);
+    dsp->registers[DSP_REG_SR] |= (1<<DSP_SR_LF);
+
+    memspace = (dsp->cur_inst>>6) & 1;
+    ea_mode = (dsp->cur_inst>>8) & BITMASK(6);
+    emu_calc_ea(dsp, ea_mode, &addr);
+    dsp->registers[DSP_REG_LC] = dsp56k_read_memory(dsp, memspace, addr) & BITMASK(16);
+
+    dsp->instr_cycle += 4;
+}
+
+static void emu_do_reg(dsp_core_t* dsp)
+{
+    uint32_t numreg;
+
+    /* S */
+
+    dsp_stack_push(dsp, dsp->registers[DSP_REG_LA], dsp->registers[DSP_REG_LC], 0);
+    dsp->registers[DSP_REG_LA] = read_memory_p(dsp, dsp->pc+1) & BITMASK(16);
+    dsp->cur_inst_len++;
+
+    numreg = (dsp->cur_inst>>8) & BITMASK(6);
+    if ((numreg == DSP_REG_A) || (numreg == DSP_REG_B)) {
+        emu_pm_read_accu24(dsp, numreg, &dsp->registers[DSP_REG_LC]); 
+    } else {
+        dsp->registers[DSP_REG_LC] = dsp->registers[numreg];
+    }
+    dsp->registers[DSP_REG_LC] &= BITMASK(16);
+
+    dsp_stack_push(dsp, dsp->pc+dsp->cur_inst_len, dsp->registers[DSP_REG_SR], 0);
+    dsp->registers[DSP_REG_SR] |= (1<<DSP_SR_LF);
+
+    dsp->instr_cycle += 4;
+}
+
+static void emu_dor_imm(dsp_core_t* dsp)
+{
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    dsp->cur_inst_len++;
+
+    dsp_stack_push(dsp, dsp->registers[DSP_REG_LA], dsp->registers[DSP_REG_LC], 0);
+    dsp->registers[DSP_REG_LA] = (dsp->pc + xxxx) & BITMASK(16);
+    
+    dsp_stack_push(dsp, dsp->pc+dsp->cur_inst_len, dsp->registers[DSP_REG_SR], 0);
+    dsp->registers[DSP_REG_SR] |= (1<<DSP_SR_LF);
+
+    dsp->registers[DSP_REG_LC] = ((dsp->cur_inst>>8) & BITMASK(8))
+        + ((dsp->cur_inst & BITMASK(4))<<8);
+
+    dsp->instr_cycle += 4;
+}
+
+static void emu_dor_reg(dsp_core_t* dsp)
+{
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    dsp->cur_inst_len++;
+
+    dsp_stack_push(dsp, dsp->registers[DSP_REG_LA], dsp->registers[DSP_REG_LC], 0);
+    dsp->registers[DSP_REG_LA] = (dsp->pc + xxxx) & BITMASK(16);
+    
+    dsp_stack_push(dsp, dsp->pc+dsp->cur_inst_len, dsp->registers[DSP_REG_SR], 0);
+    dsp->registers[DSP_REG_SR] |= (1<<DSP_SR_LF);
+
+    uint32_t numreg = (dsp->cur_inst>>8) & BITMASK(6);
+    if ((numreg == DSP_REG_A) || (numreg == DSP_REG_B)) {
+        emu_pm_read_accu24(dsp, numreg, &dsp->registers[DSP_REG_LC]); 
+    } else {
+        dsp->registers[DSP_REG_LC] = dsp->registers[numreg];
+    }
+    dsp->registers[DSP_REG_LC] &= BITMASK(16);
+
+    dsp->instr_cycle += 4;
+}
+
+static void emu_enddo(dsp_core_t* dsp)
+{
+    uint32_t saved_pc, saved_sr;
+
+    dsp_stack_pop(dsp, &saved_pc, &saved_sr);
+    dsp->registers[DSP_REG_SR] &= 0x7f;
+    dsp->registers[DSP_REG_SR] |= saved_sr & (1<<DSP_SR_LF);
+    dsp_stack_pop(dsp, &dsp->registers[DSP_REG_LA], &dsp->registers[DSP_REG_LC]);
+}
+
+static void emu_illegal(dsp_core_t* dsp)
+{
+    /* Raise interrupt p:0x003e */
+    dsp56k_add_interrupt(dsp, DSP_INTER_ILLEGAL);
+    if (dsp->exception_debugging) {
+        assert(false);
+    }
+}
+
+static void emu_jcc_imm(dsp_core_t* dsp)
+{
+    uint32_t cc_code, newpc;
+
+    newpc = dsp->cur_inst & BITMASK(12);
+    cc_code=(dsp->cur_inst>>12) & BITMASK(4);
+    if (emu_calc_cc(dsp, cc_code)) {
+        dsp->pc = newpc;
+        dsp->cur_inst_len = 0;
+    }
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_jcc_ea(dsp_core_t* dsp)
+{
+    uint32_t newpc, cc_code;
+
+    emu_calc_ea(dsp, (dsp->cur_inst >>8) & BITMASK(6), &newpc);
+    cc_code=dsp->cur_inst & BITMASK(4);
+
+    if (emu_calc_cc(dsp, cc_code)) {
+        dsp->pc = newpc;
+        dsp->cur_inst_len = 0;
+    }
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_jclr_aa(dsp_core_t* dsp)
+{
+    uint32_t memspace, addr, value, numbit, newaddr;
+    
+    memspace = (dsp->cur_inst>>6) & 1;
+    addr = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+    value = dsp56k_read_memory(dsp, memspace, addr);
+    newaddr = read_memory_p(dsp, dsp->pc+1);
+
+    dsp->instr_cycle += 4;
+
+    if ((value & (1<<numbit))==0) {
+        dsp->pc = newaddr;
+        dsp->cur_inst_len = 0;
+        return;
+    } 
+    ++dsp->cur_inst_len;
+}
+
+static void emu_jclr_ea(dsp_core_t* dsp)
+{
+    uint32_t memspace, addr, value, numbit, newaddr;
+    
+    memspace = (dsp->cur_inst>>6) & 1;
+    value = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+    newaddr = read_memory_p(dsp, dsp->pc+1);
+    
+    emu_calc_ea(dsp, value, &addr);
+    value = dsp56k_read_memory(dsp, memspace, addr);
+
+    dsp->instr_cycle += 4;
+
+    if ((value & (1<<numbit))==0) {
+        dsp->pc = newaddr;
+        dsp->cur_inst_len = 0;
+        return;
+    } 
+    ++dsp->cur_inst_len;
+}
+
+static void emu_jclr_pp(dsp_core_t* dsp)
+{
+    uint32_t memspace, addr, value, numbit, newaddr;
+    
+    memspace = (dsp->cur_inst>>6) & 1;
+    value = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+    addr = 0xffffc0 + value;
+    value = dsp56k_read_memory(dsp, memspace, addr);
+    newaddr = read_memory_p(dsp, dsp->pc+1);
+
+    dsp->instr_cycle += 4;
+
+    if ((value & (1<<numbit))==0) {
+        dsp->pc = newaddr;
+        dsp->cur_inst_len = 0;
+        return;
+    } 
+    ++dsp->cur_inst_len;
+}
+
+static void emu_jclr_reg(dsp_core_t* dsp)
+{
+    uint32_t value, numreg, numbit, newaddr;
+    
+    numreg = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+    newaddr = read_memory_p(dsp, dsp->pc+1);
+
+    if ((numreg==DSP_REG_A) || (numreg==DSP_REG_B)) {
+        emu_pm_read_accu24(dsp, numreg, &value);
+    } else {
+        value = dsp->registers[numreg];
+    }
+
+    dsp->instr_cycle += 4;
+
+    if ((value & (1<<numbit))==0) {
+        dsp->pc = newaddr;
+        dsp->cur_inst_len = 0;
+        return;
+    } 
+    ++dsp->cur_inst_len;
+}
+
+static void emu_jmp_ea(dsp_core_t* dsp)
+{
+    uint32_t newpc;
+
+    emu_calc_ea(dsp, (dsp->cur_inst>>8) & BITMASK(6), &newpc);
+    dsp->cur_inst_len = 0;
+    dsp->pc = newpc;
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_jmp_imm(dsp_core_t* dsp)
+{
+    uint32_t newpc;
+
+    newpc = dsp->cur_inst & BITMASK(12);
+    dsp->cur_inst_len = 0;
+    dsp->pc = newpc;
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_jscc_ea(dsp_core_t* dsp)
+{
+    uint32_t newpc, cc_code;
+
+    emu_calc_ea(dsp, (dsp->cur_inst >>8) & BITMASK(6), &newpc);
+    cc_code=dsp->cur_inst & BITMASK(4);
+
+    if (emu_calc_cc(dsp, cc_code)) {
+        dsp_stack_push(dsp, dsp->pc+dsp->cur_inst_len, dsp->registers[DSP_REG_SR], 0);
+        dsp->pc = newpc;
+        dsp->cur_inst_len = 0;
+    } 
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_jscc_imm(dsp_core_t* dsp)
+{
+    uint32_t cc_code, newpc;
+
+    newpc = dsp->cur_inst & BITMASK(12);
+    cc_code=(dsp->cur_inst>>12) & BITMASK(4);
+    if (emu_calc_cc(dsp, cc_code)) {
+        dsp_stack_push(dsp, dsp->pc+dsp->cur_inst_len, dsp->registers[DSP_REG_SR], 0);
+        dsp->pc = newpc;
+        dsp->cur_inst_len = 0;
+    } 
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_jsclr_aa(dsp_core_t* dsp)
+{
+    uint32_t memspace, addr, value, newpc, numbit, newaddr;
+    
+    memspace = (dsp->cur_inst>>6) & 1;
+    addr = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+    value = dsp56k_read_memory(dsp, memspace, addr);
+    newaddr = read_memory_p(dsp, dsp->pc+1);
+    
+    dsp->instr_cycle += 4;
+    
+    if ((value & (1<<numbit))==0) {
+        dsp_stack_push(dsp, dsp->pc+2, dsp->registers[DSP_REG_SR], 0);
+        newpc = newaddr;
+        dsp->pc = newpc;
+        dsp->cur_inst_len = 0;
+        return;
+    } 
+    ++dsp->cur_inst_len;
+}
+
+static void emu_jsclr_ea(dsp_core_t* dsp)
+{
+    uint32_t memspace, addr, value, newpc, numbit, newaddr;
+    
+    memspace = (dsp->cur_inst>>6) & 1;
+    value = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+    emu_calc_ea(dsp, value, &addr);
+    value = dsp56k_read_memory(dsp, memspace, addr);
+    newaddr = read_memory_p(dsp, dsp->pc+1);
+
+    dsp->instr_cycle += 4;
+    
+    if ((value & (1<<numbit))==0) {
+        dsp_stack_push(dsp, dsp->pc+2, dsp->registers[DSP_REG_SR], 0);
+        newpc = newaddr;
+        dsp->pc = newpc;
+        dsp->cur_inst_len = 0;
+        return;
+    } 
+    ++dsp->cur_inst_len;
+}
+
+static void emu_jsclr_pp(dsp_core_t* dsp)
+{
+    uint32_t memspace, addr, value, newpc, numbit, newaddr;
+    
+    memspace = (dsp->cur_inst>>6) & 1;
+    value = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+    addr = 0xffffc0 + value;
+    value = dsp56k_read_memory(dsp, memspace, addr);
+    newaddr = read_memory_p(dsp, dsp->pc+1);
+
+    dsp->instr_cycle += 4;
+    
+    if ((value & (1<<numbit))==0) {
+        dsp_stack_push(dsp, dsp->pc+2, dsp->registers[DSP_REG_SR], 0);
+        newpc = newaddr;
+        dsp->pc = newpc;
+        dsp->cur_inst_len = 0;
+        return;
+    } 
+    ++dsp->cur_inst_len;
+}
+
+static void emu_jsclr_reg(dsp_core_t* dsp)
+{
+    uint32_t value, numreg, newpc, numbit, newaddr;
+    
+    numreg = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+    newaddr = read_memory_p(dsp, dsp->pc+1);
+
+    if ((numreg==DSP_REG_A) || (numreg==DSP_REG_B)) {
+        emu_pm_read_accu24(dsp, numreg, &value);
+    } else {
+        value = dsp->registers[numreg];
+    }
+
+    dsp->instr_cycle += 4;
+    
+    if ((value & (1<<numbit))==0) {
+        dsp_stack_push(dsp, dsp->pc+2, dsp->registers[DSP_REG_SR], 0);
+        newpc = newaddr;
+        dsp->pc = newpc;
+        dsp->cur_inst_len = 0;
+        return;
+    } 
+    ++dsp->cur_inst_len;
+}
+
+static void emu_jset_aa(dsp_core_t* dsp)
+{
+    uint32_t memspace, addr, value, numbit, newpc, newaddr;
+    
+    memspace = (dsp->cur_inst>>6) & 1;
+    addr = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+    value = dsp56k_read_memory(dsp, memspace, addr);
+    newaddr = read_memory_p(dsp, dsp->pc+1);
+
+    dsp->instr_cycle += 4;
+    
+    if (value & (1<<numbit)) {
+        newpc = newaddr;
+        dsp->pc = newpc;
+        dsp->cur_inst_len=0;
+        return;
+    } 
+    ++dsp->cur_inst_len;
+}
+
+static void emu_jset_ea(dsp_core_t* dsp)
+{
+    uint32_t memspace, addr, value, numbit, newpc, newaddr;
+    
+    memspace = (dsp->cur_inst>>6) & 1;
+    value = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+    emu_calc_ea(dsp, value, &addr);
+    value = dsp56k_read_memory(dsp, memspace, addr);
+    newaddr = read_memory_p(dsp, dsp->pc+1);
+
+    dsp->instr_cycle += 4;
+
+    if (value & (1<<numbit)) {
+        newpc = newaddr;
+        dsp->pc = newpc;
+        dsp->cur_inst_len=0;
+        return;
+    } 
+    ++dsp->cur_inst_len;
+}
+
+static void emu_jset_pp(dsp_core_t* dsp)
+{
+    uint32_t memspace, addr, value, numbit, newpc, newaddr;
+    
+    memspace = (dsp->cur_inst>>6) & 1;
+    value = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+    addr = 0xffffc0 + value;
+    value = dsp56k_read_memory(dsp, memspace, addr);
+    newaddr = read_memory_p(dsp, dsp->pc+1);
+
+    dsp->instr_cycle += 4;
+    
+    if (value & (1<<numbit)) {
+        newpc = newaddr;
+        dsp->pc = newpc;
+        dsp->cur_inst_len=0;
+        return;
+    } 
+    ++dsp->cur_inst_len;
+}
+
+static void emu_jset_reg(dsp_core_t* dsp)
+{
+    uint32_t value, numreg, numbit, newpc, newaddr;
+    
+    numreg = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+    newaddr = read_memory_p(dsp, dsp->pc+1);
+    
+    if ((numreg==DSP_REG_A) || (numreg==DSP_REG_B)) {
+        emu_pm_read_accu24(dsp, numreg, &value);
+    } else {
+        value = dsp->registers[numreg];
+    }
+
+    dsp->instr_cycle += 4;
+    
+    if (value & (1<<numbit)) {
+        newpc = newaddr;
+        dsp->pc = newpc;
+        dsp->cur_inst_len=0;
+        return;
+    } 
+    ++dsp->cur_inst_len;
+}
+
+static void emu_jsr_imm(dsp_core_t* dsp)
+{
+    uint32_t newpc;
+
+    newpc = dsp->cur_inst & BITMASK(12);
+
+    if (dsp->interrupt_state != DSP_INTERRUPT_LONG){
+        dsp_stack_push(dsp, dsp->pc+dsp->cur_inst_len, dsp->registers[DSP_REG_SR], 0);
+    }
+    else {
+        dsp->interrupt_state = DSP_INTERRUPT_DISABLED;
+    }
+
+    dsp->pc = newpc;
+    dsp->cur_inst_len = 0;
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_jsr_ea(dsp_core_t* dsp)
+{
+    uint32_t newpc;
+
+    emu_calc_ea(dsp, (dsp->cur_inst>>8) & BITMASK(6),&newpc);
+
+    if (dsp->interrupt_state != DSP_INTERRUPT_LONG){
+        dsp_stack_push(dsp, dsp->pc+dsp->cur_inst_len, dsp->registers[DSP_REG_SR], 0);
+    }
+    else {
+        dsp->interrupt_state = DSP_INTERRUPT_DISABLED;
+    }
+
+    dsp->pc = newpc;
+    dsp->cur_inst_len = 0;
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_jsset_aa(dsp_core_t* dsp)
+{
+    uint32_t memspace, addr, value, newpc, numbit, newaddr;
+    
+    memspace = (dsp->cur_inst>>6) & 1;
+    addr = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+    value = dsp56k_read_memory(dsp, memspace, addr);
+    newaddr = read_memory_p(dsp, dsp->pc+1);
+    
+    dsp->instr_cycle += 4;
+
+    if (value & (1<<numbit)) {
+        dsp_stack_push(dsp, dsp->pc+2, dsp->registers[DSP_REG_SR], 0);
+        newpc = newaddr;
+        dsp->pc = newpc;
+        dsp->cur_inst_len = 0;
+        return;
+    } 
+    ++dsp->cur_inst_len;
+}
+
+static void emu_jsset_ea(dsp_core_t* dsp)
+{
+    uint32_t memspace, addr, value, newpc, numbit, newaddr;
+    
+    memspace = (dsp->cur_inst>>6) & 1;
+    value = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+    emu_calc_ea(dsp, value, &addr);
+    value = dsp56k_read_memory(dsp, memspace, addr);
+    newaddr = read_memory_p(dsp, dsp->pc+1);
+    
+    dsp->instr_cycle += 4;
+
+    if (value & (1<<numbit)) {
+        dsp_stack_push(dsp, dsp->pc+2, dsp->registers[DSP_REG_SR], 0);
+        newpc = newaddr;
+        dsp->pc = newpc;
+        dsp->cur_inst_len = 0;
+        return;
+    } 
+    ++dsp->cur_inst_len;
+}
+
+static void emu_jsset_pp(dsp_core_t* dsp)
+{
+    uint32_t memspace, addr, value, newpc, numbit, newaddr;
+    
+    memspace = (dsp->cur_inst>>6) & 1;
+    value = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+    addr = 0xffffc0 + value;
+    value = dsp56k_read_memory(dsp, memspace, addr);
+    newaddr = read_memory_p(dsp, dsp->pc+1);
+
+    dsp->instr_cycle += 4;
+
+    if (value & (1<<numbit)) {
+        dsp_stack_push(dsp, dsp->pc+2, dsp->registers[DSP_REG_SR], 0);
+        newpc = newaddr;
+        dsp->pc = newpc;
+        dsp->cur_inst_len = 0;
+        return;
+    } 
+    ++dsp->cur_inst_len;
+}
+
+static void emu_jsset_reg(dsp_core_t* dsp)
+{
+    uint32_t value, numreg, newpc, numbit, newaddr;
+    
+    numreg = (dsp->cur_inst>>8) & BITMASK(6);
+    numbit = dsp->cur_inst & BITMASK(5);
+    newaddr = read_memory_p(dsp, dsp->pc+1);
+    
+    if ((numreg==DSP_REG_A) || (numreg==DSP_REG_B)) {
+        emu_pm_read_accu24(dsp, numreg, &value);
+    } else {
+        value = dsp->registers[numreg];
+    }
+
+    dsp->instr_cycle += 4;
+
+    if (value & (1<<numbit)) {
+        dsp_stack_push(dsp, dsp->pc+2, dsp->registers[DSP_REG_SR], 0);
+        newpc = newaddr;
+        dsp->pc = newpc;
+        dsp->cur_inst_len = 0;
+        return;
+    } 
+    ++dsp->cur_inst_len;
+}
+
+static void emu_lua(dsp_core_t* dsp)
+{
+    uint32_t value, srcreg, dstreg, srcsave, srcnew;
+
+    // TODO: I don't think this is right
+
+    srcreg = (dsp->cur_inst>>8) & BITMASK(3);
+
+    srcsave = dsp->registers[DSP_REG_R0+srcreg];
+    emu_calc_ea(dsp, (dsp->cur_inst>>8) & BITMASK(5), &value);
+    srcnew = dsp->registers[DSP_REG_R0+srcreg];
+    dsp->registers[DSP_REG_R0+srcreg] = srcsave;
+
+    dstreg = dsp->cur_inst & BITMASK(3);
+    
+    if (dsp->cur_inst & (1<<3)) {
+        dsp->registers[DSP_REG_N0+dstreg] = srcnew;
+    } else {
+        dsp->registers[DSP_REG_R0+dstreg] = srcnew;
+    }
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_lua_rel(dsp_core_t* dsp)
+{
+    uint32_t aa = ((dsp->cur_inst >> 4) & BITMASK(4))
+                + (((dsp->cur_inst >> 11) & BITMASK(3)) << 4);
+    uint32_t addrreg = (dsp->cur_inst>>8) & BITMASK(3);
+    uint32_t dstreg = dsp->cur_inst & BITMASK(3);
+    
+    uint32_t v = (dsp->registers[DSP_REG_R0+addrreg]
+        + dsp_signextend(7, aa)) & BITMASK(24);
+
+    if (dsp->cur_inst & (1<<3)) {
+        dsp->registers[DSP_REG_N0+dstreg] = v;
+    } else {
+        dsp->registers[DSP_REG_R0+dstreg] = v;
+    }
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_movec_reg(dsp_core_t* dsp)
+{
+    uint32_t numreg1, numreg2, value, dummy;
+
+    /* S1,D2 */
+    /* S2,D1 */
+
+    numreg2 = (dsp->cur_inst>>8) & BITMASK(6);
+    numreg1 = dsp->cur_inst & BITMASK(6);
+
+    if (dsp->cur_inst & (1<<15)) {
+        /* Write D1 */
+
+        if ((numreg2 == DSP_REG_A) || (numreg2 == DSP_REG_B)) {
+            emu_pm_read_accu24(dsp, numreg2, &value); 
+        } else {
+            value = dsp->registers[numreg2];
+        }
+        value &= BITMASK(registers_mask[numreg1]);
+        dsp_write_reg(dsp, numreg1, value);
+    } else {
+        /* Read S1 */
+        if (numreg1 == DSP_REG_SSH) {
+            dsp_stack_pop(dsp, &value, &dummy);
+        } 
+        else {
+            value = dsp->registers[numreg1];
+        }
+
+        if (numreg2 == DSP_REG_A) {
+            dsp->registers[DSP_REG_A0] = 0;
+            dsp->registers[DSP_REG_A1] = value & BITMASK(24);
+            dsp->registers[DSP_REG_A2] = value & (1<<23) ? 0xff : 0x0;
+        }
+        else if (numreg2 == DSP_REG_B) {
+            dsp->registers[DSP_REG_B0] = 0;
+            dsp->registers[DSP_REG_B1] = value & BITMASK(24);
+            dsp->registers[DSP_REG_B2] = value & (1<<23) ? 0xff : 0x0;
+        }
+        else {
+            dsp->registers[numreg2] = value & BITMASK(registers_mask[numreg2]);
+        }
+    }
+}
+
+static void emu_movec_aa(dsp_core_t* dsp)
+{
+    uint32_t numreg, addr, memspace, value, dummy;
+
+    /* x:aa,D1 */
+    /* S1,x:aa */
+    /* y:aa,D1 */
+    /* S1,y:aa */
+
+    numreg = dsp->cur_inst & BITMASK(6);
+    addr = (dsp->cur_inst>>8) & BITMASK(6);
+    memspace = (dsp->cur_inst>>6) & 1;
+
+    if (dsp->cur_inst & (1<<15)) {
+        /* Write D1 */
+        value = dsp56k_read_memory(dsp, memspace, addr);
+        value &= BITMASK(registers_mask[numreg]);
+        dsp_write_reg(dsp, numreg, value);
+    } else {
+        /* Read S1 */
+        if (numreg == DSP_REG_SSH) {
+            dsp_stack_pop(dsp, &value, &dummy);
+        } 
+        else {
+            value = dsp->registers[numreg];
+        }
+        dsp56k_write_memory(dsp, memspace, addr, value);
+    }
+}
+
+static void emu_movec_imm(dsp_core_t* dsp)
+{
+    uint32_t numreg, value;
+
+    /* #xx,D1 */
+    numreg = dsp->cur_inst & BITMASK(6);
+    value = (dsp->cur_inst>>8) & BITMASK(8);
+    value &= BITMASK(registers_mask[numreg]);
+    dsp_write_reg(dsp, numreg, value);
+}
+
+static void emu_movec_ea(dsp_core_t* dsp)
+{
+    uint32_t numreg, addr, memspace, ea_mode, value, dummy;
+    int retour;
+
+    /* x:ea,D1 */
+    /* S1,x:ea */
+    /* y:ea,D1 */
+    /* S1,y:ea */
+    /* #xxxx,D1 */
+
+    numreg = dsp->cur_inst & BITMASK(6);
+    ea_mode = (dsp->cur_inst>>8) & BITMASK(6);
+    memspace = (dsp->cur_inst>>6) & 1;
+
+    if (dsp->cur_inst & (1<<15)) {
+        /* Write D1 */
+        retour = emu_calc_ea(dsp, ea_mode, &addr);
+        if (retour) {
+            value = addr;
+        } else {
+            value = dsp56k_read_memory(dsp, memspace, addr);
+        }
+        value &= BITMASK(registers_mask[numreg]);
+        dsp_write_reg(dsp, numreg, value);
+    } else {
+        /* Read S1 */
+        emu_calc_ea(dsp, ea_mode, &addr);
+        if (numreg == DSP_REG_SSH) {
+            dsp_stack_pop(dsp, &value, &dummy);
+        } 
+        else {
+            value = dsp->registers[numreg];
+        }
+        dsp56k_write_memory(dsp, memspace, addr, value);
+    }
+}
+
+static void emu_movem_aa(dsp_core_t* dsp)
+{
+    uint32_t numreg, addr, value, dummy;
+
+    numreg = dsp->cur_inst & BITMASK(6);
+    addr = (dsp->cur_inst>>8) & BITMASK(6);
+
+    if  (dsp->cur_inst & (1<<15)) {
+        /* Write D */
+        value = read_memory_p(dsp, addr);
+        value &= BITMASK(registers_mask[numreg]);
+        dsp_write_reg(dsp, numreg, value);
+    } else {
+        /* Read S */
+        if (numreg == DSP_REG_SSH) {
+            dsp_stack_pop(dsp, &value, &dummy);
+        } 
+        else if ((numreg == DSP_REG_A) || (numreg == DSP_REG_B)) {
+            emu_pm_read_accu24(dsp, numreg, &value); 
+        } 
+        else {
+            value = dsp->registers[numreg];
+        }
+        dsp56k_write_memory(dsp, DSP_SPACE_P, addr, value);
+    }
+
+    dsp->instr_cycle += 4;
+}
+
+static void emu_movem_ea(dsp_core_t* dsp)
+{
+    uint32_t numreg, addr, ea_mode, value, dummy;
+
+    numreg = dsp->cur_inst & BITMASK(6);
+    ea_mode = (dsp->cur_inst>>8) & BITMASK(6);
+    emu_calc_ea(dsp, ea_mode, &addr);
+
+    if  (dsp->cur_inst & (1<<15)) {
+        /* Write D */
+        value = read_memory_p(dsp, addr);
+        value &= BITMASK(registers_mask[numreg]);
+        dsp_write_reg(dsp, numreg, value);
+    } else {
+        /* Read S */
+        if (numreg == DSP_REG_SSH) {
+            dsp_stack_pop(dsp, &value, &dummy);
+        } 
+        else if ((numreg == DSP_REG_A) || (numreg == DSP_REG_B)) {
+            emu_pm_read_accu24(dsp, numreg, &value); 
+        } 
+        else {
+            value = dsp->registers[numreg];
+        }
+        dsp56k_write_memory(dsp, DSP_SPACE_P, addr, value);
+    }
+
+    dsp->instr_cycle += 4;
+}
+
+static void emu_movep_0(dsp_core_t* dsp)
+{
+    /* S,x:pp */
+    /* x:pp,D */
+    /* S,y:pp */
+    /* y:pp,D */
+    
+    uint32_t addr, memspace, numreg, value, dummy;
+
+    addr = 0xffffc0 + (dsp->cur_inst & BITMASK(6));
+    memspace = (dsp->cur_inst>>16) & 1;
+    numreg = (dsp->cur_inst>>8) & BITMASK(6);
+
+    if  (dsp->cur_inst & (1<<15)) {
+        /* Write pp */
+        if ((numreg == DSP_REG_A) || (numreg == DSP_REG_B)) {
+            emu_pm_read_accu24(dsp, numreg, &value); 
+        }
+        else if (numreg == DSP_REG_SSH) {
+            dsp_stack_pop(dsp, &value, &dummy);
+        }
+        else {
+            value = dsp->registers[numreg];
+        }
+        dsp56k_write_memory(dsp, memspace, addr, value);
+    } else {
+        /* Read pp */
+        value = dsp56k_read_memory(dsp, memspace, addr);
+        value &= BITMASK(registers_mask[numreg]);
+        dsp_write_reg(dsp, numreg, value);
+    }
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_movep_1(dsp_core_t* dsp)
+{
+    /* p:ea,x:pp */
+    /* x:pp,p:ea */
+    /* p:ea,y:pp */
+    /* y:pp,p:ea */
+
+    uint32_t xyaddr, memspace, paddr;
+
+    xyaddr = 0xffffc0 + (dsp->cur_inst & BITMASK(6));
+    emu_calc_ea(dsp, (dsp->cur_inst>>8) & BITMASK(6), &paddr);
+    memspace = (dsp->cur_inst>>16) & 1;
+
+    if (dsp->cur_inst & (1<<15)) {
+        /* Write pp */
+        dsp56k_write_memory(dsp, memspace, xyaddr, read_memory_p(dsp, paddr));
+    } else {
+        /* Read pp */
+        dsp56k_write_memory(dsp, DSP_SPACE_P, paddr, dsp56k_read_memory(dsp, memspace, xyaddr));
+    }
+
+    /* Movep is 4 cycles, but according to the motorola doc, */
+    /* movep from p memory to x or y peripheral memory takes */
+    /* 2 more cycles, so +4 cycles at total */
+    dsp->instr_cycle += 4;
+}
+
+static void emu_movep_23(dsp_core_t* dsp)
+{
+    /* x:ea,x:pp */
+    /* y:ea,x:pp */
+    /* #xxxxxx,x:pp */
+    /* x:pp,x:ea */
+    /* x:pp,y:pp */
+    /* x:ea,y:pp */
+    /* y:ea,y:pp */
+    /* #xxxxxx,y:pp */
+    /* y:pp,y:ea */
+    /* y:pp,x:ea */
+
+    uint32_t addr, peraddr, easpace, perspace, ea_mode;
+    int retour;
+
+    peraddr = 0xffffc0 + (dsp->cur_inst & BITMASK(6));
+    perspace = (dsp->cur_inst>>16) & 1;
+    
+    ea_mode = (dsp->cur_inst>>8) & BITMASK(6);
+    easpace = (dsp->cur_inst>>6) & 1;
+    retour = emu_calc_ea(dsp, ea_mode, &addr);
+
+    if (dsp->cur_inst & (1<<15)) {
+        /* Write pp */
+        
+        if (retour) {
+            dsp56k_write_memory(dsp, perspace, peraddr, addr);
+        } else {
+            dsp56k_write_memory(dsp, perspace, peraddr, dsp56k_read_memory(dsp, easpace, addr));
+        }
+    } else {
+        /* Read pp */
+        dsp56k_write_memory(dsp, easpace, addr, dsp56k_read_memory(dsp, perspace, peraddr));
+    }
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_movep_x_qq(dsp_core_t* dsp)
+{
+    // 00000111W1MMMRRR0sqqqqqq
+
+    uint32_t x_addr = 0xffff80 + (dsp->cur_inst & BITMASK(6));
+    uint32_t ea_mode = (dsp->cur_inst>>8) & BITMASK(6);
+    uint32_t ea_space = (dsp->cur_inst>>6) & 1;
+    uint32_t ea_addr;
+    int retour = emu_calc_ea(dsp, ea_mode, &ea_addr);
+
+    if (dsp->cur_inst & (1<<15)) {
+        /* Write qq */
+        
+        if (retour) {
+            dsp56k_write_memory(dsp, DSP_SPACE_X, x_addr, ea_addr);
+        } else {
+            dsp56k_write_memory(dsp, DSP_SPACE_X, x_addr,
+                dsp56k_read_memory(dsp, ea_space, ea_addr));
+        }
+    } else {
+        /* Read qq */
+        dsp56k_write_memory(dsp, ea_space, ea_addr,
+            dsp56k_read_memory(dsp, DSP_SPACE_X, x_addr));
+    }
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_move_x_long(dsp_core_t* dsp)
+{
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    dsp->cur_inst_len++;
+    
+    int W = (dsp->cur_inst >> 6) & 1;
+    uint32_t offreg = DSP_REG_R0 + ((dsp->cur_inst >> 8) & BITMASK(3));
+    uint32_t numreg = dsp->cur_inst & BITMASK(6);
+    uint32_t x_addr = (dsp->registers[offreg] + xxxx) & BITMASK(24);
+
+    if (!W) {
+        uint32_t value;
+        if (numreg == DSP_REG_A || numreg == DSP_REG_B) {
+            emu_pm_read_accu24(dsp, numreg, &value);
+        } else {
+            value = dsp->registers[numreg];
+        }
+        dsp56k_write_memory(dsp, DSP_SPACE_X, x_addr, value);
+    } else {
+        dsp_write_reg(dsp, numreg, dsp56k_read_memory(dsp, DSP_SPACE_X, x_addr));
+    }
+
+    // TODO: cycles
+}
+
+static void emu_move_xy_imm(dsp_core_t* dsp, int space)
+{
+    uint32_t xxx = (((dsp->cur_inst >> 11) & BITMASK(6)) << 1)
+             + ((dsp->cur_inst >> 6) & 1);
+    int W = (dsp->cur_inst >> 4) & 1;
+    uint32_t offreg = DSP_REG_R0 + ((dsp->cur_inst >> 8) & BITMASK(3));
+    uint32_t numreg = dsp->cur_inst & BITMASK(4);
+    uint32_t addr = (dsp->registers[offreg] + dsp_signextend(7, xxx)) & BITMASK(24);
+    
+    if (!W) {
+        uint32_t value;
+        if (numreg == DSP_REG_A || numreg == DSP_REG_B) {
+            emu_pm_read_accu24(dsp, numreg, &value);
+        } else {
+            value = dsp->registers[numreg];
+        }
+        dsp56k_write_memory(dsp, space, addr, value);
+    } else {
+        dsp_write_reg(dsp, numreg, dsp56k_read_memory(dsp, space, addr));
+    }
+
+    // TODO: cycles
+}
+
+static void emu_move_x_imm(dsp_core_t* dsp)
+{
+    emu_move_xy_imm(dsp, DSP_SPACE_X);
+}
+
+static void emu_move_y_imm(dsp_core_t* dsp)
+{
+    emu_move_xy_imm(dsp, DSP_SPACE_Y);
+}
+
+static void emu_mpyi(dsp_core_t* dsp)
+{
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    dsp->cur_inst_len++;
+
+    uint32_t k = (dsp->cur_inst >> 2) & 1;
+    uint32_t d = (dsp->cur_inst >> 3) & 1;
+    uint32_t qq = (dsp->cur_inst >> 4) & BITMASK(2);
+
+    unsigned int srcreg = DSP_REG_NULL;
+    switch (qq) {
+    case 0: srcreg = DSP_REG_X0; break;
+    case 1: srcreg = DSP_REG_Y0; break;
+    case 2: srcreg = DSP_REG_X1; break;
+    case 3: srcreg = DSP_REG_Y1; break;
+    }
+
+    uint32_t source[3];
+    dsp_mul56(xxxx, dsp->registers[srcreg], source, k ? SIGN_MINUS : SIGN_PLUS);
+
+    if (d) {
+        dsp->registers[DSP_REG_B2] = source[0];
+        dsp->registers[DSP_REG_B1] = source[1];
+        dsp->registers[DSP_REG_B0] = source[2];
+    } else {
+        dsp->registers[DSP_REG_A2] = source[0];
+        dsp->registers[DSP_REG_A1] = source[1];
+        dsp->registers[DSP_REG_A0] = source[2];
+    }
+
+    emu_ccr_update_e_u_n_z(dsp, source[0], source[1], source[2]);
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-(1<<DSP_SR_V);
+}
+
+static void emu_norm(dsp_core_t* dsp)
+{
+    uint32_t cursr,cur_e, cur_euz, dest[3], numreg, rreg;
+    uint16_t newsr;
+
+    cursr = dsp->registers[DSP_REG_SR];
+    cur_e = (cursr>>DSP_SR_E) & 1;  /* E */
+    cur_euz = ~cur_e;           /* (not E) and U and (not Z) */
+    cur_euz &= (cursr>>DSP_SR_U) & 1;
+    cur_euz &= ~((cursr>>DSP_SR_Z) & 1);
+    cur_euz &= 1;
+
+    numreg = (dsp->cur_inst>>3) & 1;
+    dest[0] = dsp->registers[DSP_REG_A2+numreg];
+    dest[1] = dsp->registers[DSP_REG_A1+numreg];
+    dest[2] = dsp->registers[DSP_REG_A0+numreg];
+    rreg = DSP_REG_R0+((dsp->cur_inst>>8) & BITMASK(3));
+
+    if (cur_euz) {
+        newsr = dsp_asl56(dest, 1);
+        --dsp->registers[rreg];
+        dsp->registers[rreg] &= BITMASK(16);
+    } else if (cur_e) {
+        newsr = dsp_asr56(dest, 1);
+        ++dsp->registers[rreg];
+        dsp->registers[rreg] &= BITMASK(16);
+    } else {
+        newsr = 0;
+    }
+
+    dsp->registers[DSP_REG_A2+numreg] = dest[0];
+    dsp->registers[DSP_REG_A1+numreg] = dest[1];
+    dsp->registers[DSP_REG_A0+numreg] = dest[2];
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_or_long(dsp_core_t* dsp)
+{
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    dsp->cur_inst_len++;
+
+    int dstreg;
+    if ((dsp->cur_inst >> 3) & 1) {
+        dstreg = DSP_REG_B1;
+    } else {
+        dstreg = DSP_REG_A1;
+    }
+
+    dsp->registers[dstreg] |= xxxx;
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_N)|(1<<DSP_SR_Z)|(1<<DSP_SR_V));
+    dsp->registers[DSP_REG_SR] |= ((dsp->registers[dstreg]>>23) & 1)<<DSP_SR_N;
+    dsp->registers[DSP_REG_SR] |= (dsp->registers[dstreg]==0)<<DSP_SR_Z;
+
+    // TODO: cycles?
+}
+
+static void emu_ori(dsp_core_t* dsp)
+{
+    uint32_t regnum, value;
+
+    value = (dsp->cur_inst >> 8) & BITMASK(8);
+    regnum = dsp->cur_inst & BITMASK(2);
+    switch(regnum) {
+        case 0:
+            /* mr */
+            dsp->registers[DSP_REG_SR] |= value<<8;
+            break;
+        case 1:
+            /* ccr */
+            dsp->registers[DSP_REG_SR] |= value;
+            break;
+        case 2:
+            /* omr */
+            dsp->registers[DSP_REG_OMR] |= value;
+            break;
+    }
+}
+
+/*
+    REP instruction parameter encoding
+
+    xxxxxxxx 00xxxxxx 0xxxxxxx  aa
+    xxxxxxxx 01xxxxxx 0xxxxxxx  ea
+    xxxxxxxx YYxxxxxx 1xxxxxxx  imm
+    xxxxxxxx 11xxxxxx 0xxxxxxx  reg
+*/
+
+static void emu_rep_aa(dsp_core_t* dsp)
+{
+    /* x:aa */
+    /* y:aa */
+    dsp->registers[DSP_REG_LCSAVE] = dsp->registers[DSP_REG_LC];
+    dsp->pc_on_rep = 1; /* Not decrement LC at first time */
+    dsp->loop_rep = 1;  /* We are now running rep */
+
+    dsp->registers[DSP_REG_LC]=dsp56k_read_memory(dsp, (dsp->cur_inst>>6) & 1,(dsp->cur_inst>>8) & BITMASK(6));
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_rep_imm(dsp_core_t* dsp)
+{
+    /* #xxx */
+
+    dsp->registers[DSP_REG_LCSAVE] = dsp->registers[DSP_REG_LC];
+    dsp->pc_on_rep = 1; /* Not decrement LC at first time */
+    dsp->loop_rep = 1;  /* We are now running rep */
+
+    dsp->registers[DSP_REG_LC] = ((dsp->cur_inst>>8) & BITMASK(8))
+        + ((dsp->cur_inst & BITMASK(4))<<8);
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_rep_ea(dsp_core_t* dsp)
+{
+    uint32_t value;
+
+    /* x:ea */
+    /* y:ea */
+
+    dsp->registers[DSP_REG_LCSAVE] = dsp->registers[DSP_REG_LC];
+    dsp->pc_on_rep = 1; /* Not decrement LC at first time */
+    dsp->loop_rep = 1;  /* We are now running rep */
+
+    emu_calc_ea(dsp, (dsp->cur_inst>>8) & BITMASK(6),&value);
+    dsp->registers[DSP_REG_LC]= dsp56k_read_memory(dsp, (dsp->cur_inst>>6) & 1, value);
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_rep_reg(dsp_core_t* dsp)
+{
+    uint32_t numreg;
+
+    /* R */
+
+    dsp->registers[DSP_REG_LCSAVE] = dsp->registers[DSP_REG_LC];
+    dsp->pc_on_rep = 1; /* Not decrement LC at first time */
+    dsp->loop_rep = 1;  /* We are now running rep */
+
+    numreg = (dsp->cur_inst>>8) & BITMASK(6);
+    if ((numreg == DSP_REG_A) || (numreg == DSP_REG_B)) {
+        emu_pm_read_accu24(dsp, numreg, &dsp->registers[DSP_REG_LC]); 
+    } else {
+        dsp->registers[DSP_REG_LC] = dsp->registers[numreg];
+    }
+    dsp->registers[DSP_REG_LC] &= BITMASK(16);
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_reset(dsp_core_t* dsp)
+{
+    /* Reset external peripherals */
+    dsp->instr_cycle += 2;
+}
+
+static void emu_rti(dsp_core_t* dsp)
+{
+    uint32_t newpc = 0, newsr = 0;
+
+    dsp_stack_pop(dsp, &newpc, &newsr);
+    dsp->pc = newpc;
+    dsp->registers[DSP_REG_SR] = newsr;
+    dsp->cur_inst_len = 0;
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_rts(dsp_core_t* dsp)
+{
+    uint32_t newpc = 0, newsr;
+
+    dsp_stack_pop(dsp, &newpc, &newsr);
+    dsp->pc = newpc;
+    dsp->cur_inst_len = 0;
+
+    dsp->instr_cycle += 2;
+}
+
+static void emu_stop(dsp_core_t* dsp)
+{
+    DPRINTF("Dsp: STOP instruction\n");
+}
+
+static void emu_sub_x(dsp_core_t* dsp, uint32_t x, uint32_t d)
+{
+    uint32_t source[3], dest[3];
+
+    if (d) {
+        dest[0] = dsp->registers[DSP_REG_B2];
+        dest[1] = dsp->registers[DSP_REG_B1];
+        dest[2] = dsp->registers[DSP_REG_B0];
+    } else {
+        dest[0] = dsp->registers[DSP_REG_A2];
+        dest[1] = dsp->registers[DSP_REG_A1];
+        dest[2] = dsp->registers[DSP_REG_A0];
+    }
+
+    source[2] = 0;
+    source[1] = x;
+    source[0] = source[1] & (1<<23) ? 0xff : 0x0;
+
+    uint16_t newsr = dsp_sub56(source, dest);
+
+    if (d) {
+        dsp->registers[DSP_REG_B2] = dest[0];
+        dsp->registers[DSP_REG_B1] = dest[1];
+        dsp->registers[DSP_REG_B0] = dest[2];
+    } else {
+        dsp->registers[DSP_REG_A2] = dest[0];
+        dsp->registers[DSP_REG_A1] = dest[1];
+        dsp->registers[DSP_REG_A0] = dest[2];
+    }
+
+    emu_ccr_update_e_u_n_z(dsp, dest[0], dest[1], dest[2]);
+
+    dsp->registers[DSP_REG_SR] &= BITMASK(16)-((1<<DSP_SR_V)|(1<<DSP_SR_C));
+    dsp->registers[DSP_REG_SR] |= newsr;
+}
+
+static void emu_sub_imm(dsp_core_t* dsp)
+{
+    uint32_t xx = (dsp->cur_inst >> 8) & BITMASK(6);
+    uint32_t d = (dsp->cur_inst >> 3) & 1;
+    emu_sub_x(dsp, xx, d);
+}
+
+static void emu_sub_long(dsp_core_t* dsp)
+{
+    uint32_t xxxx = read_memory_p(dsp, dsp->pc+1);
+    dsp->cur_inst_len++;
+
+    uint32_t d = (dsp->cur_inst >> 3) & 1;
+    emu_sub_x(dsp, xxxx, d);
+}
+
+static void emu_tcc(dsp_core_t* dsp)
+{
+    uint32_t cc_code, regsrc1, regdest1;
+    uint32_t regsrc2, regdest2;
+    uint32_t val0, val1, val2;
+    
+    cc_code = (dsp->cur_inst>>12) & BITMASK(4);
+
+    if (emu_calc_cc(dsp, cc_code)) {
+        regsrc1 = registers_tcc[(dsp->cur_inst>>3) & BITMASK(4)][0];
+        regdest1 = registers_tcc[(dsp->cur_inst>>3) & BITMASK(4)][1];
+
+        /* Read S1 */
+        if (regsrc1 == DSP_REG_A) {
+            val0 = dsp->registers[DSP_REG_A0];
+            val1 = dsp->registers[DSP_REG_A1];
+            val2 = dsp->registers[DSP_REG_A2];
+        }
+        else if (regsrc1 == DSP_REG_B) {
+            val0 = dsp->registers[DSP_REG_B0];
+            val1 = dsp->registers[DSP_REG_B1];
+            val2 = dsp->registers[DSP_REG_B2];
+        }
+        else {
+            val0 = 0;
+            val1 = dsp->registers[regsrc1];
+            val2 = val1 & (1<<23) ? 0xff : 0x0;
+        }
+        
+        /* Write D1 */
+        if (regdest1 == DSP_REG_A) {
+            dsp->registers[DSP_REG_A2] = val2;
+            dsp->registers[DSP_REG_A1] = val1;
+            dsp->registers[DSP_REG_A0] = val0;
+        }
+        else {
+            dsp->registers[DSP_REG_B2] = val2;
+            dsp->registers[DSP_REG_B1] = val1;
+            dsp->registers[DSP_REG_B0] = val0;
+        }
+
+        /* S2,D2 transfer */
+        if (dsp->cur_inst & (1<<16)) {
+            regsrc2 = DSP_REG_R0+((dsp->cur_inst>>8) & BITMASK(3));
+            regdest2 = DSP_REG_R0+(dsp->cur_inst & BITMASK(3));
+
+            dsp->registers[regdest2] = dsp->registers[regsrc2];
+        }
+    }
+}
+
+static void emu_wait(dsp_core_t* dsp)
+{
+    DPRINTF("Dsp: WAIT instruction\n");
+}
+
+
diff --git a/hw/xbox/g-lru-cache.c b/hw/xbox/g-lru-cache.c
new file mode 100644
index 0000000000..95b87b4356
--- /dev/null
+++ b/hw/xbox/g-lru-cache.c
@@ -0,0 +1,338 @@
+/* g-lru-cache.c
+ *
+ * Copyright (C) 2009 - Christian Hergert
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * 
+ * This is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/* 
+ * Ideally, you want to use fast_get. This is because we are using a
+ * GStaticRWLock which is indeed slower than a mutex if you have lots of writer
+ * acquisitions. This doesn't make it a true LRU, though, as the oldest
+ * retrieval from strorage is the first item evicted.
+ */
+
+#include "g-lru-cache.h"
+
+// #define DEBUG
+
+#define LRU_CACHE_PRIVATE(object)          \
+    (G_TYPE_INSTANCE_GET_PRIVATE((object), \
+    G_TYPE_LRU_CACHE,                      \
+    GLruCachePrivate))
+
+struct _GLruCachePrivate
+{
+    GStaticRWLock   rw_lock;
+    guint           max_size;
+    gboolean        fast_get;
+    
+    GHashTable     *hash_table;
+    GEqualFunc      key_equal_func;
+    GCopyFunc       key_copy_func;
+    GList          *newest;
+    GList          *oldest;
+    
+    GLookupFunc     retrieve_func;
+    
+    gpointer        user_data;
+    GDestroyNotify  user_destroy_func;
+};
+
+G_DEFINE_TYPE (GLruCache, g_lru_cache, G_TYPE_OBJECT);
+
+static void
+g_lru_cache_finalize (GObject *object)
+{
+    GLruCachePrivate *priv = LRU_CACHE_PRIVATE (object);
+    
+    if (priv->user_data && priv->user_destroy_func)
+        priv->user_destroy_func (priv->user_data);
+    
+    priv->user_data = NULL;
+    priv->user_destroy_func = NULL;
+    
+    g_hash_table_destroy (priv->hash_table);
+    priv->hash_table = NULL;
+    
+    g_list_free (priv->newest);
+    priv->newest = NULL;
+    priv->oldest = NULL;
+    
+    G_OBJECT_CLASS (g_lru_cache_parent_class)->finalize (object);
+}
+
+static void
+g_lru_cache_class_init (GLruCacheClass *klass)
+{
+    GObjectClass *object_class = G_OBJECT_CLASS (klass);
+    
+    object_class->finalize = g_lru_cache_finalize;
+
+    g_type_class_add_private (object_class, sizeof (GLruCachePrivate));
+}
+
+static void
+g_lru_cache_init (GLruCache *self)
+{
+    self->priv = LRU_CACHE_PRIVATE (self);
+    
+    self->priv->max_size = 1024;
+    self->priv->fast_get = FALSE;
+    g_static_rw_lock_init (&self->priv->rw_lock);
+}
+
+static void
+g_lru_cache_evict_n_oldest_locked (GLruCache *self, gint n)
+{
+    GList *victim;
+    gint   i;
+    
+    for (i = 0; i < n; i++)
+    {
+        victim = self->priv->oldest;
+        
+        if (victim == NULL)
+            return;
+        
+        if (victim->prev)
+            victim->prev->next = NULL;
+        
+        self->priv->oldest = victim->prev;
+        g_hash_table_remove (self->priv->hash_table, victim->data);
+        
+        if (self->priv->newest == victim)
+            self->priv->newest = NULL;
+        
+        g_list_free1 (victim); /* victim->data is owned by hashtable */
+    }
+    
+#ifdef DEBUG
+    g_assert (g_hash_table_size (self->priv->hash_table) == g_list_length (self->priv->newest));
+#endif
+}
+
+GLruCache*
+g_lru_cache_new (GHashFunc      hash_func,
+                 GEqualFunc     key_equal_func,
+                 GCopyFunc      key_copy_func,
+                 GLookupFunc    retrieve_func,
+                 GDestroyNotify key_destroy_func,
+                 GDestroyNotify value_destroy_func,
+                 gpointer       user_data,
+                 GDestroyNotify user_destroy_func)
+{
+    GLruCache *self = g_object_new (G_TYPE_LRU_CACHE, NULL);
+    
+    self->priv->hash_table = g_hash_table_new_full (hash_func,
+                                                    key_equal_func,
+                                                    key_destroy_func,
+                                                    value_destroy_func);
+    
+    self->priv->key_equal_func = key_equal_func;
+    self->priv->key_copy_func = key_copy_func;
+    self->priv->retrieve_func = retrieve_func;
+    self->priv->user_data = user_data;
+    self->priv->user_destroy_func = user_destroy_func;
+    
+    return self;
+}
+
+void
+g_lru_cache_set_max_size (GLruCache *self, guint max_size)
+{
+    g_return_if_fail (G_IS_LRU_CACHE (self));
+    
+    guint old_max_size = self->priv->max_size;
+    
+    g_static_rw_lock_writer_lock (&(self->priv->rw_lock));
+    
+    self->priv->max_size = max_size;
+    
+    if (old_max_size > max_size)
+        g_lru_cache_evict_n_oldest_locked (self, old_max_size - max_size);
+    
+    g_static_rw_lock_writer_unlock (&(self->priv->rw_lock));
+}
+
+guint
+g_lru_cache_get_max_size (GLruCache *self)
+{
+    g_return_val_if_fail (G_IS_LRU_CACHE (self), -1);
+    return self->priv->max_size;
+}
+
+guint
+g_lru_cache_get_size (GLruCache *self)
+{
+    g_return_val_if_fail (G_IS_LRU_CACHE (self), -1);
+    return g_hash_table_size (self->priv->hash_table);
+}
+
+gpointer
+g_lru_cache_get (GLruCache *self, gpointer key)
+{
+    g_return_val_if_fail (G_IS_LRU_CACHE (self), NULL);
+    
+    gpointer value;
+    
+    g_static_rw_lock_reader_lock (&(self->priv->rw_lock));
+    
+    value = g_hash_table_lookup (self->priv->hash_table, key);
+    
+#ifdef DEBUG
+    if (value)
+        g_debug ("Cache Hit!");
+    else
+        g_debug ("Cache miss");
+#endif
+    
+    g_static_rw_lock_reader_unlock (&(self->priv->rw_lock));
+    
+    if (!value)
+    {
+        g_static_rw_lock_writer_lock (&(self->priv->rw_lock));
+        
+        if (!g_hash_table_lookup (self->priv->hash_table, key))
+        {
+            if (g_hash_table_size (self->priv->hash_table) >= self->priv->max_size)
+#ifdef DEBUG
+            {
+                g_debug ("We are at capacity, must evict oldest");
+#endif
+                g_lru_cache_evict_n_oldest_locked (self, 1);
+#ifdef DEBUG
+            }
+            
+            g_debug ("Retrieving value from external resource");
+#endif
+
+            value = self->priv->retrieve_func (key, self->priv->user_data);
+            
+            if (self->priv->key_copy_func)
+                g_hash_table_insert (self->priv->hash_table,
+                    self->priv->key_copy_func (key, self->priv->user_data),
+                    value);
+            else
+                g_hash_table_insert (self->priv->hash_table, key, value);
+            
+            self->priv->newest = g_list_prepend (self->priv->newest, key);
+            
+            if (self->priv->oldest == NULL)
+                self->priv->oldest = self->priv->newest;
+        }
+#ifdef DEBUG
+        else g_debug ("Lost storage race with another thread");
+#endif
+        
+        g_static_rw_lock_writer_unlock (&(self->priv->rw_lock));
+    }
+
+    /* fast_get means that we do not reposition the item to the head
+     * of the list. it essentially makes the lru, a lru from storage,
+     * not lru to user.
+     */
+
+    else if (!self->priv->fast_get &&
+             !self->priv->key_equal_func (key, self->priv->newest->data))
+    {
+#ifdef DEBUG
+        g_debug ("Making item most recent");
+#endif
+
+        g_static_rw_lock_writer_lock (&(self->priv->rw_lock));
+
+        GList *list = self->priv->newest;
+        GList *tmp;
+        GEqualFunc equal = self->priv->key_equal_func;
+
+        for (tmp = list; tmp; tmp = tmp->next)
+        {
+            if (equal (key, tmp->data))
+            {
+                GList *tmp1 = g_list_remove_link (list, tmp);
+                self->priv->newest = g_list_prepend (tmp1, tmp);
+                break;
+            }
+        }
+
+        g_static_rw_lock_writer_unlock (&(self->priv->rw_lock));
+    }
+    
+    return value;
+}
+
+void
+g_lru_cache_evict (GLruCache *self, gpointer key)
+{
+    g_return_if_fail (G_IS_LRU_CACHE (self));
+    
+    GEqualFunc  equal = self->priv->key_equal_func;
+    GList      *list  = NULL;
+    
+    g_static_rw_lock_writer_lock (&(self->priv->rw_lock));
+    
+    if (equal (key, self->priv->oldest))
+    {
+        g_lru_cache_evict_n_oldest_locked (self, 1);
+    }
+    else
+    {        
+        for (list = self->priv->newest; list; list = list->next)
+        {
+            /* key, list->data is owned by hashtable */
+            if (equal (key, list->data))
+            {
+                self->priv->newest = g_list_remove_link (self->priv->newest, list);
+                g_list_free (list);
+                break;
+            }
+        }
+        g_hash_table_remove (self->priv->hash_table, key);
+    }
+    
+    g_static_rw_lock_writer_unlock (&(self->priv->rw_lock));
+}
+
+void
+g_lru_cache_clear (GLruCache *self)
+{
+    g_return_if_fail (G_IS_LRU_CACHE (self));
+    
+    g_static_rw_lock_writer_lock (&(self->priv->rw_lock));
+    
+    g_hash_table_remove_all (self->priv->hash_table);
+    g_list_free (self->priv->newest);
+    
+    self->priv->oldest = NULL;
+    self->priv->newest = NULL;
+    
+    g_static_rw_lock_writer_unlock (&(self->priv->rw_lock));
+}
+
+void
+g_lru_cache_set_fast_get (GLruCache *self, gboolean fast_get)
+{
+    g_return_if_fail (G_IS_LRU_CACHE (self));
+    self->priv->fast_get = fast_get;
+}
+
+gboolean
+g_lru_cache_get_fast_get (GLruCache *self)
+{
+    g_return_val_if_fail (G_IS_LRU_CACHE (self), FALSE);
+    return self->priv->fast_get;
+}
+
diff --git a/hw/xbox/g-lru-cache.h b/hw/xbox/g-lru-cache.h
new file mode 100644
index 0000000000..f55b22ebab
--- /dev/null
+++ b/hw/xbox/g-lru-cache.h
@@ -0,0 +1,80 @@
+/* g-lru-cache.h
+ *
+ * Copyright (C) 2009 - Christian Hergert
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * 
+ * This is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifndef __G_LRU_CACHE_H__
+#define __G_LRU_CACHE_H__
+
+#include <glib.h>
+#include <glib-object.h>
+
+G_BEGIN_DECLS
+
+#define G_TYPE_LRU_CACHE        (g_lru_cache_get_type ())
+#define G_LRU_CACHE(obj)        (G_TYPE_CHECK_INSTANCE_CAST ((obj), G_TYPE_LRU_CACHE, GLruCache))
+#define G_LRU_CACHE_CONST(obj)      (G_TYPE_CHECK_INSTANCE_CAST ((obj), G_TYPE_LRU_CACHE, GLruCache const))
+#define G_LRU_CACHE_CLASS(klass)    (G_TYPE_CHECK_CLASS_CAST ((klass), G_TYPE_LRU_CACHE, GLruCacheClass))
+#define G_IS_LRU_CACHE(obj)     (G_TYPE_CHECK_INSTANCE_TYPE ((obj), G_TYPE_LRU_CACHE))
+#define G_IS_LRU_CACHE_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), G_TYPE_LRU_CACHE))
+#define G_LRU_CACHE_GET_CLASS(obj)  (G_TYPE_INSTANCE_GET_CLASS ((obj), G_TYPE_LRU_CACHE, GLruCacheClass))
+#define G_LOOKUP_FUNC(func)             ((GLookupFunc)func)
+
+typedef struct _GLruCache       GLruCache;
+typedef struct _GLruCacheClass  GLruCacheClass;
+typedef struct _GLruCachePrivate    GLruCachePrivate;
+
+typedef gpointer (*GLookupFunc) (gpointer key, gpointer user_data);
+
+struct _GLruCache
+{
+    GObject parent;
+    
+    GLruCachePrivate *priv;
+};
+
+struct _GLruCacheClass
+{
+    GObjectClass parent_class;
+};
+
+GType      g_lru_cache_get_type     (void) G_GNUC_CONST;
+
+GLruCache* g_lru_cache_new          (GHashFunc      hash_func,
+                                     GEqualFunc     key_equal_func,
+                                     GCopyFunc      key_copy_func,
+                                     GLookupFunc    retrieve_func,
+                                     GDestroyNotify key_destroy_func,
+                                     GDestroyNotify value_destroy_func,
+                                     gpointer       user_data,
+                                     GDestroyNotify user_destroy_func);
+
+void       g_lru_cache_set_max_size (GLruCache *self, guint max_size);
+guint      g_lru_cache_get_max_size (GLruCache *self);
+
+guint      g_lru_cache_get_size     (GLruCache *self);
+
+gpointer   g_lru_cache_get          (GLruCache *self, gpointer key);
+void       g_lru_cache_evict        (GLruCache *self, gpointer key);
+void       g_lru_cache_clear        (GLruCache *self);
+
+gboolean   g_lru_cache_get_fast_get (GLruCache *self);
+void       g_lru_cache_set_fast_get (GLruCache *self, gboolean fast_get);
+
+G_END_DECLS
+
+#endif /* __G_LRU_CACHE_H__ */
diff --git a/hw/xbox/lpc47m157.c b/hw/xbox/lpc47m157.c
new file mode 100644
index 0000000000..209aa79490
--- /dev/null
+++ b/hw/xbox/lpc47m157.c
@@ -0,0 +1,247 @@
+/*
+ * QEMU SMSC LPC47M157 (Super I/O)
+ *
+ * Copyright (c) 2013 espes
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/isa/isa.h"
+#include "hw/char/serial.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/char.h"
+#include "qapi/qmp/qerror.h"
+
+#define MAX_DEVICE 0xC
+#define DEVICE_FDD              0x0
+#define DEVICE_PARALLEL_PORT    0x3
+#define DEVICE_SERIAL_PORT_1    0x4
+#define DEVICE_SERIAL_PORT_2    0x5
+#define DEVICE_KEYBOARD         0x7
+#define DEVICE_GAME_PORT        0x9
+#define DEVICE_PME              0xA
+#define DEVICE_MPU_401          0xB
+
+#define ENTER_CONFIG_KEY    0x55
+#define EXIT_CONFIG_KEY     0xAA
+
+#define MAX_CONFIG_REG  0x30
+#define MAX_DEVICE_REGS 0xFF
+
+#define CONFIG_DEVICE_NUMBER    0x07
+#define CONFIG_PORT_LOW         0x26
+#define CONFIG_PORT_HIGH        0x27
+
+#define CONFIG_DEVICE_ACTIVATE              0x30
+#define CONFIG_DEVICE_BASE_ADDRESS_HIGH     0x60
+#define CONFIG_DEVICE_BASE_ADDRESS_LOW      0x61
+#define CONFIG_DEVICE_INETRRUPT             0x70
+
+#define DEBUG_LPC47M157
+
+typedef struct LPC47M157State {
+    ISADevice dev;
+
+    MemoryRegion io;
+
+    bool configuration_mode;
+    uint32_t selected_reg;
+
+    uint8_t config_regs[MAX_CONFIG_REG];
+    uint8_t device_regs[MAX_DEVICE][MAX_DEVICE_REGS];
+
+    struct {
+        bool active;
+        SerialState state;
+    } serial[2];
+} LPC47M157State;
+
+#define LPC47M157_DEVICE(obj) \
+    OBJECT_CHECK(LPC47M157State, (obj), "lpc47m157")
+
+static void update_devices(LPC47M157State *s)
+{
+    ISADevice *isadev = ISA_DEVICE(s);
+    
+    /* init serial devices */
+    int i;
+    for (i=0; i<2; i++) {
+        uint8_t *dev = s->device_regs[DEVICE_SERIAL_PORT_1 + i];
+        if (dev[CONFIG_DEVICE_ACTIVATE] && !s->serial[i].active) {
+            
+            uint32_t iobase = (dev[CONFIG_DEVICE_BASE_ADDRESS_HIGH] << 8)
+                                | dev[CONFIG_DEVICE_BASE_ADDRESS_LOW];
+            uint32_t irq = dev[CONFIG_DEVICE_INETRRUPT];
+
+            SerialState *ss = &s->serial[i].state;
+            if (irq != 0) {
+                isa_init_irq(isadev, &ss->irq, irq);
+            }
+            isa_register_ioport(isadev, &ss->io, iobase);
+
+            s->serial[i].active = true;
+        }
+    }
+}
+
+static void lpc47m157_io_write(void *opaque, hwaddr addr, uint64_t val,
+                               unsigned int size)
+{
+    LPC47M157State *s = opaque;
+
+#ifdef DEBUG_LPC47M157
+    printf("lpc47m157 io write 0x%llx = 0x%llx\n", addr, val);
+#endif
+
+    if (addr == 0) { //INDEX_PORT
+        if (val == ENTER_CONFIG_KEY) {
+            assert(!s->configuration_mode);
+            s->configuration_mode = true;
+        } else if (val == EXIT_CONFIG_KEY) {
+            assert(s->configuration_mode);
+            s->configuration_mode = false;
+
+            update_devices(s);
+        } else {
+            s->selected_reg = val;
+        }
+    } else if (addr == 1) { //DATA_PORT
+        if (s->selected_reg < MAX_CONFIG_REG) {
+            /* global configuration register */
+            s->config_regs[s->selected_reg] = val;
+        } else {
+            /* device register */
+            assert(s->config_regs[CONFIG_DEVICE_NUMBER] < MAX_DEVICE);
+            uint8_t* dev = s->device_regs[s->config_regs[CONFIG_DEVICE_NUMBER]];
+            dev[s->selected_reg] = val;
+#ifdef DEBUG_LPC47M157
+            printf("lpc47m157 dev %x . %x = %llx\n", s->config_regs[CONFIG_DEVICE_NUMBER], s->selected_reg, val);
+#endif
+        }
+    } else {
+        assert(false);
+    }
+}
+
+static uint64_t lpc47m157_io_read(void *opaque, hwaddr addr, unsigned int size)
+{
+    LPC47M157State *s = opaque;
+    uint32_t val = 0;
+
+    if (addr == 0) { //INDEX_PORT
+
+    } else if (addr == 1) { //DATA_PORT
+        if (s->selected_reg < MAX_CONFIG_REG) {
+            val = s->config_regs[s->selected_reg];
+        } else {
+            assert(s->config_regs[CONFIG_DEVICE_NUMBER] < MAX_DEVICE);
+            uint8_t* dev = s->device_regs[s->config_regs[CONFIG_DEVICE_NUMBER]];
+            val = dev[s->selected_reg];
+        }
+    } else {
+        assert(false);
+    }
+
+#ifdef DEBUG_LPC47M157
+    printf("lpc47m157 io read 0x%llx -> 0x%x\n", addr, val);
+#endif
+
+    return val;
+}
+
+static const MemoryRegionOps lpc47m157_io_ops = {
+    .read  = lpc47m157_io_read,
+    .write = lpc47m157_io_write,
+    .valid = {
+        .min_access_size = 1,
+        .max_access_size = 1,
+    },
+};
+
+static void lpc47m157_realize(DeviceState *dev, Error **errp)
+{
+    LPC47M157State *s = LPC47M157_DEVICE(dev);
+    ISADevice *isa = ISA_DEVICE(dev);
+
+    const uint32_t iobase = 0x2e; //0x4e if SYSOPT pin, make it a property 
+    s->config_regs[CONFIG_PORT_LOW] = iobase & 0xFF;
+    s->config_regs[CONFIG_PORT_HIGH] = iobase >> 8;
+
+    memory_region_init_io(&s->io, OBJECT(s),
+                          &lpc47m157_io_ops, s, "lpc47m157", 2);
+    isa_register_ioport(isa, &s->io, iobase);
+
+    /* init serial cores */
+    int i;
+    for (i=0; i<2; i++) {
+        CharDriverState *chr = serial_hds[i];
+        if (chr == NULL) {
+            char name[5];
+            snprintf(name, sizeof(name), "ser%d", i);
+            chr = qemu_chr_new(name, "null", NULL);
+        }
+
+        SerialState *ss = &s->serial[i].state;
+        ss->chr = chr;
+        ss->baudbase = 115200;
+
+        Error *err = NULL;
+        serial_realize_core(ss, &err);
+        if (err != NULL) {
+            qerror_report_err(err);
+            error_free(err);
+            exit(1);
+        }
+
+        memory_region_init_io(&ss->io, OBJECT(s),
+                              &serial_io_ops, ss, "serial", 8);
+    }
+}
+
+static const VMStateDescription vmstate_lpc47m157= {
+    .name = "lpc47m157",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_STRUCT(serial[0].state, LPC47M157State, 0,
+                       vmstate_serial, SerialState),
+        VMSTATE_STRUCT(serial[1].state, LPC47M157State, 0,
+                       vmstate_serial, SerialState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void lpc47m157_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = lpc47m157_realize;
+    dc->vmsd = &vmstate_lpc47m157;
+    //dc->reset = pc87312_reset;
+    //dc->props = pc87312_properties;
+}
+
+static const TypeInfo lpc47m157_type_info = {
+    .name          = "lpc47m157",
+    .parent        = TYPE_ISA_DEVICE,
+    .instance_size = sizeof(LPC47M157State),
+    .class_init    = lpc47m157_class_init,
+};
+
+static void lpc47m157_register_types(void)
+{
+    type_register_static(&lpc47m157_type_info);
+}
+
+type_init(lpc47m157_register_types)
\ No newline at end of file
diff --git a/hw/xbox/mcpx_aci.c b/hw/xbox/mcpx_aci.c
new file mode 100644
index 0000000000..850182824c
--- /dev/null
+++ b/hw/xbox/mcpx_aci.c
@@ -0,0 +1,99 @@
+/*
+ * QEMU MCPX Audio Codec Interface implementation
+ *
+ * Copyright (c) 2012 espes
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include "hw/hw.h"
+#include "hw/i386/pc.h"
+#include "hw/pci/pci.h"
+#include "hw/audio/ac97_int.h"
+
+typedef struct MCPXACIState {
+    PCIDevice dev;
+
+    AC97LinkState ac97;
+
+
+    MemoryRegion io_nam, io_nabm;
+
+    MemoryRegion mmio;
+    MemoryRegion nam_mmio, nabm_mmio;
+} MCPXACIState;
+
+
+#define MCPX_ACI_DEVICE(obj) \
+    OBJECT_CHECK(MCPXACIState, (obj), "mcpx-aci")
+
+
+static int mcpx_aci_initfn(PCIDevice *dev)
+{
+    MCPXACIState *d = MCPX_ACI_DEVICE(dev);
+
+    dev->config[PCI_INTERRUPT_PIN] = 0x01;
+
+    //mmio
+    memory_region_init(&d->mmio, OBJECT(dev), "mcpx-aci-mmio", 0x1000);
+
+    memory_region_init_io(&d->io_nam, OBJECT(dev), &ac97_io_nam_ops, &d->ac97,
+                          "mcpx-aci-nam", 0x100);
+    memory_region_init_io(&d->io_nabm, OBJECT(dev), &ac97_io_nabm_ops, &d->ac97,
+                          "mcpx-aci-nabm", 0x80);
+
+    /*pci_register_bar(&d->dev, 0, PCI_BASE_ADDRESS_SPACE_IO, &d->io_nam);
+    pci_register_bar(&d->dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &d->io_nabm);
+
+    memory_region_init_alias(&d->nam_mmio, NULL, &d->io_nam, 0, 0x100);
+    memory_region_add_subregion(&d->mmio, 0x0, &d->nam_mmio);
+
+    memory_region_init_alias(&d->nabm_mmio, NULL, &d->io_nabm, 0, 0x80);
+    memory_region_add_subregion(&d->mmio, 0x100, &d->nabm_mmio);*/
+
+    memory_region_add_subregion(&d->mmio, 0x0, &d->io_nam);
+    memory_region_add_subregion(&d->mmio, 0x100, &d->io_nabm);
+
+    pci_register_bar(&d->dev, 2, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
+
+    ac97_common_init(&d->ac97, &d->dev, pci_get_address_space(&d->dev));
+
+    return 0;
+}
+
+static void mcpx_aci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+    k->vendor_id = PCI_VENDOR_ID_NVIDIA;
+    k->device_id = PCI_DEVICE_ID_NVIDIA_MCPX_ACI;
+    k->revision = 210;
+    k->class_id = PCI_CLASS_MULTIMEDIA_AUDIO;
+    k->init = mcpx_aci_initfn;
+
+    dc->desc = "MCPX Audio Codec Interface";
+}
+
+static const TypeInfo mcpx_aci_info = {
+    .name          = "mcpx-aci",
+    .parent        = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(MCPXACIState),
+    .class_init    = mcpx_aci_class_init,
+};
+
+static void mcpx_aci_register(void)
+{
+    type_register_static(&mcpx_aci_info);
+}
+type_init(mcpx_aci_register);
\ No newline at end of file
diff --git a/hw/xbox/mcpx_apu.c b/hw/xbox/mcpx_apu.c
new file mode 100644
index 0000000000..5117ad11db
--- /dev/null
+++ b/hw/xbox/mcpx_apu.c
@@ -0,0 +1,651 @@
+/*
+ * QEMU MCPX Audio Processing Unit implementation
+ *
+ * Copyright (c) 2012 espes
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include "hw/hw.h"
+#include "hw/i386/pc.h"
+#include "hw/pci/pci.h"
+
+#include "hw/xbox/dsp/dsp.h"
+
+#define NV_PAPU_ISTS                                     0x00001000
+#   define NV_PAPU_ISTS_GINTSTS                               (1 << 0)
+#   define NV_PAPU_ISTS_FETINTSTS                             (1 << 4)
+#define NV_PAPU_IEN                                      0x00001004
+#define NV_PAPU_FECTL                                    0x00001100
+#   define NV_PAPU_FECTL_FEMETHMODE                         0x000000E0
+#       define NV_PAPU_FECTL_FEMETHMODE_FREE_RUNNING            0x00000000
+#       define NV_PAPU_FECTL_FEMETHMODE_HALTED                  0x00000080
+#       define NV_PAPU_FECTL_FEMETHMODE_TRAPPED                 0x000000E0
+#   define NV_PAPU_FECTL_FETRAPREASON                       0x00000F00
+#       define NV_PAPU_FECTL_FETRAPREASON_REQUESTED             0x00000F00
+#define NV_PAPU_FECV                                     0x00001110
+#define NV_PAPU_FEAV                                     0x00001118
+#   define NV_PAPU_FEAV_VALUE                               0x0000FFFF
+#   define NV_PAPU_FEAV_LST                                 0x00030000
+#define NV_PAPU_FEDECMETH                                0x00001300
+#define NV_PAPU_FEDECPARAM                               0x00001304
+#define NV_PAPU_FEMEMADDR                                0x00001324
+#define NV_PAPU_FEMEMDATA                                0x00001334
+#define NV_PAPU_FETFORCE0                                0x00001500
+#define NV_PAPU_FETFORCE1                                0x00001504
+#   define NV_PAPU_FETFORCE1_SE2FE_IDLE_VOICE               (1 << 15)
+#define NV_PAPU_SECTL                                    0x00002000
+#   define NV_PAPU_SECTL_XCNTMODE                           0x00000018
+#       define NV_PAPU_SECTL_XCNTMODE_OFF                       0
+#define NV_PAPU_XGSCNT                                   0x0000200C
+#define NV_PAPU_VPVADDR                                  0x0000202C
+#define NV_PAPU_GPSADDR                                  0x00002040
+#define NV_PAPU_EPSADDR                                  0x00002048
+#define NV_PAPU_TVL2D                                    0x00002054
+#define NV_PAPU_CVL2D                                    0x00002058
+#define NV_PAPU_NVL2D                                    0x0000205C
+#define NV_PAPU_TVL3D                                    0x00002060
+#define NV_PAPU_CVL3D                                    0x00002064
+#define NV_PAPU_NVL3D                                    0x00002068
+#define NV_PAPU_TVLMP                                    0x0000206C
+#define NV_PAPU_CVLMP                                    0x00002070
+#define NV_PAPU_NVLMP                                    0x00002074
+#define NV_PAPU_GPSMAXSGE                                0x000020D4
+#define NV_PAPU_EPSMAXSGE                                0x000020DC
+
+#define NV_PAPU_GPRST                                    0x0000FFFC
+#define NV_PAPU_GPRST_GPRST                                 (1 << 0)
+#define NV_PAPU_GPRST_GPDSPRST                              (1 << 1)
+#define NV_PAPU_GPRST_GPNMI                                 (1 << 2)
+#define NV_PAPU_GPRST_GPABORT                               (1 << 3)
+
+#define NV_PAPU_EPXMEM                                   0x00000000
+#define NV_PAPU_EPYMEM                                   0x00006000
+#define NV_PAPU_EPPMEM                                   0x0000A000
+#define NV_PAPU_EPRST                                    0x0000FFFC
+
+static const struct {
+    hwaddr top, current, next;
+} voice_list_regs[] = {
+    {NV_PAPU_TVL2D, NV_PAPU_CVL2D, NV_PAPU_NVL2D}, //2D
+    {NV_PAPU_TVL3D, NV_PAPU_CVL3D, NV_PAPU_NVL3D}, //3D
+    {NV_PAPU_TVLMP, NV_PAPU_CVLMP, NV_PAPU_NVLMP}, //MP
+};
+
+
+/* audio processor object / front-end messages */
+#define NV1BA0_PIO_FREE                                  0x00000010
+#define NV1BA0_PIO_SET_ANTECEDENT_VOICE                  0x00000120
+#   define NV1BA0_PIO_SET_ANTECEDENT_VOICE_HANDLE           0x0000FFFF
+#   define NV1BA0_PIO_SET_ANTECEDENT_VOICE_LIST             0x00030000
+#       define NV1BA0_PIO_SET_ANTECEDENT_VOICE_LIST_INHERIT     0
+#       define NV1BA0_PIO_SET_ANTECEDENT_VOICE_LIST_2D_TOP      1
+#       define NV1BA0_PIO_SET_ANTECEDENT_VOICE_LIST_3D_TOP      2
+#       define NV1BA0_PIO_SET_ANTECEDENT_VOICE_LIST_MP_TOP      3
+#define NV1BA0_PIO_VOICE_ON                              0x00000124
+#   define NV1BA0_PIO_VOICE_ON_HANDLE                       0x0000FFFF
+#define NV1BA0_PIO_VOICE_OFF                             0x00000128
+#define NV1BA0_PIO_VOICE_PAUSE                           0x00000140
+#   define NV1BA0_PIO_VOICE_PAUSE_HANDLE                    0x0000FFFF
+#   define NV1BA0_PIO_VOICE_PAUSE_ACTION                    (1 << 18)
+#define NV1BA0_PIO_SET_CURRENT_VOICE                     0x000002F8
+
+#define SE2FE_IDLE_VOICE                                 0x00008000
+
+
+/* voice structure */
+#define NV_PAVS_SIZE                                     0x00000080
+#define NV_PAVS_VOICE_PAR_STATE                          0x00000054
+#   define NV_PAVS_VOICE_PAR_STATE_PAUSED                   (1 << 18)
+#   define NV_PAVS_VOICE_PAR_STATE_ACTIVE_VOICE             (1 << 21)
+#define NV_PAVS_VOICE_TAR_PITCH_LINK                     0x0000007c
+#   define NV_PAVS_VOICE_TAR_PITCH_LINK_NEXT_VOICE_HANDLE   0x0000FFFF
+
+
+
+#define MCPX_HW_MAX_VOICES 256
+
+
+#define GET_MASK(v, mask) (((v) & (mask)) >> (ffs(mask)-1))
+
+#define SET_MASK(v, mask, val)                                       \
+    do {                                                             \
+        (v) &= ~(mask);                                              \
+        (v) |= ((val) << (ffs(mask)-1)) & (mask);                    \
+    } while (0)
+
+
+
+// #define MCPX_DEBUG
+#ifdef MCPX_DEBUG
+# define MCPX_DPRINTF(format, ...)       printf(format, ## __VA_ARGS__)
+#else
+# define MCPX_DPRINTF(format, ...)       do { } while (0)
+#endif
+
+
+typedef struct MCPXAPUState {
+    PCIDevice dev;
+
+    MemoryRegion mmio;
+
+    /* Setup Engine */
+    struct {
+        QEMUTimer *frame_timer;
+    } se;
+
+    /* Voice Processor */
+    struct {
+        MemoryRegion mmio;
+    } vp;
+
+    /* Global Processor */
+    struct {
+        MemoryRegion mmio;
+        DSPState *dsp;
+        uint32_t regs[0x10000];
+    } gp;
+
+    /* Encode Processor */
+    struct {
+        MemoryRegion mmio;
+        DSPState *dsp;
+        uint32_t regs[0x10000];
+    } ep;
+
+    uint32_t regs[0x20000];
+
+} MCPXAPUState;
+
+
+#define MCPX_APU_DEVICE(obj) \
+    OBJECT_CHECK(MCPXAPUState, (obj), "mcpx-apu")
+
+static uint32_t voice_get_mask(MCPXAPUState *d,
+                               unsigned int voice_handle,
+                               hwaddr offset,
+                               uint32_t mask)
+{
+    assert(voice_handle < 0xFFFF);
+    hwaddr voice = d->regs[NV_PAPU_VPVADDR]
+                    + voice_handle * NV_PAVS_SIZE;
+    return (ldl_le_phys(voice + offset) & mask) >> (ffs(mask)-1);
+}
+static void voice_set_mask(MCPXAPUState *d,
+                           unsigned int voice_handle,
+                           hwaddr offset,
+                           uint32_t mask,
+                           uint32_t val)
+{
+    assert(voice_handle < 0xFFFF);
+    hwaddr voice = d->regs[NV_PAPU_VPVADDR]
+                    + voice_handle * NV_PAVS_SIZE;
+    uint32_t v = ldl_le_phys(voice + offset) & ~mask;
+    stl_le_phys(voice + offset,
+                v | ((val << (ffs(mask)-1)) & mask));
+}
+
+
+
+static void update_irq(MCPXAPUState *d)
+{
+    if ((d->regs[NV_PAPU_IEN] & NV_PAPU_ISTS_GINTSTS)
+        && ((d->regs[NV_PAPU_ISTS] & ~NV_PAPU_ISTS_GINTSTS)
+              & d->regs[NV_PAPU_IEN])) {
+
+        d->regs[NV_PAPU_ISTS] |= NV_PAPU_ISTS_GINTSTS;
+        MCPX_DPRINTF("mcpx irq raise\n");
+        pci_irq_assert(&d->dev);
+    } else {
+        d->regs[NV_PAPU_ISTS] &= ~NV_PAPU_ISTS_GINTSTS;
+        MCPX_DPRINTF("mcpx irq lower\n");
+        pci_irq_deassert(&d->dev);
+    }
+}
+
+static uint64_t mcpx_apu_read(void *opaque,
+                              hwaddr addr, unsigned int size)
+{
+    MCPXAPUState *d = opaque;
+
+    uint64_t r = 0;
+    switch (addr) {
+    case NV_PAPU_XGSCNT:
+        r = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) / 100; //???
+        break;
+    default:
+        if (addr < 0x20000) {
+            r = d->regs[addr];
+        }
+        break;
+    }
+
+    MCPX_DPRINTF("mcpx apu: read [0x%llx] -> 0x%llx\n", addr, r);
+    return r;
+}
+static void mcpx_apu_write(void *opaque, hwaddr addr,
+                           uint64_t val, unsigned int size)
+{
+    MCPXAPUState *d = opaque;
+
+    MCPX_DPRINTF("mcpx apu: [0x%llx] = 0x%llx\n", addr, val);
+
+    switch (addr) {
+    case NV_PAPU_ISTS:
+        /* the bits of the interrupts to clear are wrtten */
+        d->regs[NV_PAPU_ISTS] &= ~val;
+        update_irq(d);
+        break;
+    case NV_PAPU_SECTL:
+        if ( ((val & NV_PAPU_SECTL_XCNTMODE) >> 3)
+                == NV_PAPU_SECTL_XCNTMODE_OFF) {
+            timer_del(d->se.frame_timer);
+        } else {
+            timer_mod(d->se.frame_timer,
+                qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 10);
+        }
+        d->regs[addr] = val;
+        break;
+    case NV_PAPU_FEMEMDATA:
+        /* 'magic write'
+         * This value is expected to be written to FEMEMADDR on completion of
+         * something to do with notifies. Just do it now :/ */
+        stl_le_phys(d->regs[NV_PAPU_FEMEMADDR], val);
+        d->regs[addr] = val;
+        break;
+    default:
+        if (addr < 0x20000) {
+            d->regs[addr] = val;
+        }
+        break;
+    }
+}
+static const MemoryRegionOps mcpx_apu_mmio_ops = {
+    .read = mcpx_apu_read,
+    .write = mcpx_apu_write,
+};
+
+
+static void fe_method(MCPXAPUState *d,
+                      uint32_t method, uint32_t argument)
+{
+    MCPX_DPRINTF("mcpx fe_method 0x%x 0x%x\n", method, argument);
+
+    //assert((d->regs[NV_PAPU_FECTL] & NV_PAPU_FECTL_FEMETHMODE) == 0);
+
+    d->regs[NV_PAPU_FEDECMETH] = method;
+    d->regs[NV_PAPU_FEDECPARAM] = argument;
+    unsigned int selected_handle, list;
+    switch (method) {
+    case NV1BA0_PIO_SET_ANTECEDENT_VOICE:
+        d->regs[NV_PAPU_FEAV] = argument;
+        break;
+    case NV1BA0_PIO_VOICE_ON:
+        selected_handle = argument & NV1BA0_PIO_VOICE_ON_HANDLE;
+        list = GET_MASK(d->regs[NV_PAPU_FEAV], NV_PAPU_FEAV_LST);
+        if (list != NV1BA0_PIO_SET_ANTECEDENT_VOICE_LIST_INHERIT) {
+            /* voice is added to the top of the selected list */
+            unsigned int top_reg = voice_list_regs[list-1].top;
+            voice_set_mask(d, selected_handle,
+                NV_PAVS_VOICE_TAR_PITCH_LINK,
+                NV_PAVS_VOICE_TAR_PITCH_LINK_NEXT_VOICE_HANDLE,
+                d->regs[top_reg]);
+            d->regs[top_reg] = selected_handle;
+        } else {
+            unsigned int antecedent_voice =
+                GET_MASK(d->regs[NV_PAPU_FEAV], NV_PAPU_FEAV_VALUE);
+            /* voice is added after the antecedent voice */
+            assert(antecedent_voice != 0xFFFF);
+
+            uint32_t next_handle = voice_get_mask(d, antecedent_voice,
+                NV_PAVS_VOICE_TAR_PITCH_LINK,
+                NV_PAVS_VOICE_TAR_PITCH_LINK_NEXT_VOICE_HANDLE);
+            voice_set_mask(d, selected_handle,
+                NV_PAVS_VOICE_TAR_PITCH_LINK,
+                NV_PAVS_VOICE_TAR_PITCH_LINK_NEXT_VOICE_HANDLE,
+                next_handle);
+            voice_set_mask(d, antecedent_voice,
+                NV_PAVS_VOICE_TAR_PITCH_LINK,
+                NV_PAVS_VOICE_TAR_PITCH_LINK_NEXT_VOICE_HANDLE,
+                selected_handle);
+
+            voice_set_mask(d, selected_handle,
+                    NV_PAVS_VOICE_PAR_STATE,
+                    NV_PAVS_VOICE_PAR_STATE_ACTIVE_VOICE,
+                    1);
+        }
+        break;
+    case NV1BA0_PIO_VOICE_OFF:
+        voice_set_mask(d, argument,
+                NV_PAVS_VOICE_PAR_STATE,
+                NV_PAVS_VOICE_PAR_STATE_ACTIVE_VOICE,
+                0);
+        break;
+    case NV1BA0_PIO_VOICE_PAUSE:
+        voice_set_mask(d, argument & NV1BA0_PIO_VOICE_PAUSE_HANDLE,
+                NV_PAVS_VOICE_PAR_STATE,
+                NV_PAVS_VOICE_PAR_STATE_PAUSED,
+                (argument & NV1BA0_PIO_VOICE_PAUSE_ACTION) != 0);
+        break;
+    case NV1BA0_PIO_SET_CURRENT_VOICE:
+        d->regs[NV_PAPU_FECV] = argument;
+        break;
+    case SE2FE_IDLE_VOICE:
+        if (d->regs[NV_PAPU_FETFORCE1] & NV_PAPU_FETFORCE1_SE2FE_IDLE_VOICE) {
+            
+            d->regs[NV_PAPU_FECTL] &= ~NV_PAPU_FECTL_FEMETHMODE;
+            d->regs[NV_PAPU_FECTL] |= NV_PAPU_FECTL_FEMETHMODE_TRAPPED;
+
+            d->regs[NV_PAPU_FECTL] &= ~NV_PAPU_FECTL_FETRAPREASON;
+            d->regs[NV_PAPU_FECTL] |= NV_PAPU_FECTL_FETRAPREASON_REQUESTED;
+
+            d->regs[NV_PAPU_ISTS] |= NV_PAPU_ISTS_FETINTSTS;
+            update_irq(d);
+        } else {
+            assert(false);
+        }
+        break;
+    default:
+        assert(false);
+        break;
+    }
+}
+
+
+static uint64_t vp_read(void *opaque,
+                        hwaddr addr, unsigned int size)
+{
+    MCPX_DPRINTF("mcpx apu VP: read [0x%llx]\n", addr);
+    switch (addr) {
+    case NV1BA0_PIO_FREE:
+        /* we don't simulate the queue for now,
+         * pretend to always be empty */
+        return 0x80;
+    default:
+        break;
+    }
+    return 0;
+}
+static void vp_write(void *opaque, hwaddr addr,
+                     uint64_t val, unsigned int size)
+{
+    MCPXAPUState *d = opaque;
+
+    MCPX_DPRINTF("mcpx apu VP: [0x%llx] = 0x%llx\n", addr, val);
+
+    switch (addr) {
+    case NV1BA0_PIO_SET_ANTECEDENT_VOICE:
+    case NV1BA0_PIO_VOICE_ON:
+    case NV1BA0_PIO_VOICE_OFF:
+    case NV1BA0_PIO_VOICE_PAUSE:
+    case NV1BA0_PIO_SET_CURRENT_VOICE:
+        /* TODO: these should instead be queueing up fe commands */
+        fe_method(d, addr, val);
+        break;
+    default:
+        break;
+    }
+}
+static const MemoryRegionOps vp_ops = {
+    .read = vp_read,
+    .write = vp_write,
+};
+
+static void scratch_rw(hwaddr sge_base, unsigned int max_sge,
+                       uint8_t* ptr, uint32_t addr, size_t len, bool dir)
+{
+    int i;
+    for (i=0; i<len; i++) {
+        unsigned int entry = (addr + i) / TARGET_PAGE_SIZE;
+        assert(entry < max_sge);
+        uint32_t prd_address = ldl_le_phys(sge_base + entry*4*2);
+        uint32_t prd_control = ldl_le_phys(sge_base + entry*4*2 + 1);
+
+        hwaddr paddr = prd_address + (addr + i) % TARGET_PAGE_SIZE;
+
+        if (dir) {
+            stb_phys(paddr, ptr[i]);
+        } else {
+            ptr[i] = ldub_phys(paddr);
+        }
+    }
+}
+
+static void gp_scratch_rw(void *opaque, uint8_t* ptr, uint32_t addr, size_t len, bool dir)
+{
+    MCPXAPUState *d = opaque;
+    scratch_rw(d->regs[NV_PAPU_GPSADDR], d->regs[NV_PAPU_GPSMAXSGE],
+               ptr, addr, len, dir);
+}
+
+static void ep_scratch_rw(void *opaque, uint8_t* ptr, uint32_t addr, size_t len, bool dir)
+{
+    MCPXAPUState *d = opaque;
+    scratch_rw(d->regs[NV_PAPU_EPSADDR], d->regs[NV_PAPU_EPSMAXSGE],
+               ptr, addr, len, dir);
+}
+
+static void proc_rst_write(DSPState *dsp, uint32_t oldval, uint32_t val)
+{
+    if (!(val & NV_PAPU_GPRST_GPRST) || !(val & NV_PAPU_GPRST_GPDSPRST)) {
+        dsp_reset(dsp);
+    } else if ((!(oldval & NV_PAPU_GPRST_GPRST)
+                || !(oldval & NV_PAPU_GPRST_GPDSPRST))
+            && ((val & NV_PAPU_GPRST_GPRST) && (val & NV_PAPU_GPRST_GPDSPRST)) ) {
+        dsp_bootstrap(dsp);
+    }
+}
+
+/* Global Processor - programmable DSP */
+static uint64_t gp_read(void *opaque,
+                        hwaddr addr, unsigned int size)
+{
+    MCPXAPUState *d = opaque;
+
+    uint64_t r = d->gp.regs[addr];
+    MCPX_DPRINTF("mcpx apu GP: read [0x%llx] -> 0x%llx\n", addr, r);
+    return r;
+}
+static void gp_write(void *opaque, hwaddr addr,
+                     uint64_t val, unsigned int size)
+{
+    MCPXAPUState *d = opaque;
+
+    MCPX_DPRINTF("mcpx apu GP: [0x%llx] = 0x%llx\n", addr, val);
+
+    switch (addr) {
+    case NV_PAPU_GPRST:
+        proc_rst_write(d->gp.dsp, d->gp.regs[NV_PAPU_GPRST], val);
+        d->gp.regs[NV_PAPU_GPRST] = val;
+        break;
+    default:
+        d->gp.regs[addr] = val;
+        break;
+    }
+}
+static const MemoryRegionOps gp_ops = {
+    .read = gp_read,
+    .write = gp_write,
+};
+
+
+/* Encode Processor - encoding DSP */
+static uint64_t ep_read(void *opaque,
+                        hwaddr addr, unsigned int size)
+{
+    MCPXAPUState *d = opaque;
+
+    uint64_t r = 0;
+    switch (addr) {
+    case NV_PAPU_EPXMEM ... NV_PAPU_EPXMEM + 0xC00*4: {
+        uint32_t xaddr = (addr - NV_PAPU_EPXMEM) / 4;
+        r = dsp_read_memory(d->ep.dsp, 'X', xaddr);
+        break;
+    }
+    case NV_PAPU_EPYMEM ... NV_PAPU_EPYMEM + 0x100*4: {
+        uint32_t yaddr = (addr - NV_PAPU_EPYMEM) / 4;
+        r = dsp_read_memory(d->ep.dsp, 'Y', yaddr);
+        break;
+    }
+    case NV_PAPU_EPPMEM ... NV_PAPU_EPPMEM + 0x1000*4: {
+        uint32_t paddr = (addr - NV_PAPU_EPPMEM) / 4;
+        r = dsp_read_memory(d->ep.dsp, 'P', paddr);
+        break;
+    }
+    default:
+        r = d->ep.regs[addr];
+        break;
+    }
+    MCPX_DPRINTF("mcpx apu EP: read [0x%llx] -> 0x%llx\n", addr, r);
+    return r;
+}
+static void ep_write(void *opaque, hwaddr addr,
+                     uint64_t val, unsigned int size)
+{
+    MCPXAPUState *d = opaque;
+
+    MCPX_DPRINTF("mcpx apu EP: [0x%llx] = 0x%llx\n", addr, val);
+
+    switch (addr) {
+    case NV_PAPU_EPXMEM ... NV_PAPU_EPXMEM + 0xC00*4: {
+        assert(false);
+        break;
+    }
+    case NV_PAPU_EPYMEM ... NV_PAPU_EPYMEM + 0x100*4: {
+        assert(false);
+        break;
+    }
+    case NV_PAPU_EPPMEM ... NV_PAPU_EPPMEM + 0x1000*4: {
+        assert(false);
+        break;
+    }
+    case NV_PAPU_EPRST:
+        proc_rst_write(d->ep.dsp, d->ep.regs[NV_PAPU_EPRST], val);
+        d->ep.regs[NV_PAPU_EPRST] = val;
+        break;
+    default:
+        d->ep.regs[addr] = val;
+        break;
+    }
+}
+static const MemoryRegionOps ep_ops = {
+    .read = ep_read,
+    .write = ep_write,
+};
+
+
+/* TODO: this should be on a thread so it waits on the voice lock */
+static void se_frame(void *opaque)
+{
+    MCPXAPUState *d = opaque;
+    timer_mod(d->se.frame_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 10);
+    MCPX_DPRINTF("mcpx frame ping\n");
+    int list;
+    for (list=0; list < 3; list++) {
+        hwaddr top, current, next;
+        top = voice_list_regs[list].top;
+        current = voice_list_regs[list].current;
+        next = voice_list_regs[list].next;
+
+        d->regs[current] = d->regs[top];
+        MCPX_DPRINTF("list %d current voice %d\n", list, d->regs[current]);
+        while (d->regs[current] != 0xFFFF) {
+            d->regs[next] = voice_get_mask(d, d->regs[current],
+                NV_PAVS_VOICE_TAR_PITCH_LINK,
+                NV_PAVS_VOICE_TAR_PITCH_LINK_NEXT_VOICE_HANDLE);
+            if (!voice_get_mask(d, d->regs[current],
+                    NV_PAVS_VOICE_PAR_STATE,
+                    NV_PAVS_VOICE_PAR_STATE_ACTIVE_VOICE)) {
+                MCPX_DPRINTF("voice %d not active...!\n", d->regs[current]);
+                fe_method(d, SE2FE_IDLE_VOICE, d->regs[current]);
+            }
+            MCPX_DPRINTF("next voice %d\n", d->regs[next]);
+            d->regs[current] = d->regs[next];
+        }
+    }
+
+    if ((d->gp.regs[NV_PAPU_GPRST] & NV_PAPU_GPRST_GPRST)
+        && (d->gp.regs[NV_PAPU_GPRST] & NV_PAPU_GPRST_GPDSPRST)) {
+        dsp_start_frame(d->gp.dsp);
+
+        // hax
+        dsp_run(d->gp.dsp, 1000);
+    }
+    if ((d->ep.regs[NV_PAPU_EPRST] & NV_PAPU_GPRST_GPRST)
+        && (d->ep.regs[NV_PAPU_EPRST] & NV_PAPU_GPRST_GPDSPRST)) {
+        dsp_start_frame(d->ep.dsp);
+
+        // hax
+        // dsp_run(d->ep.dsp, 1000);
+    }
+}
+
+
+static int mcpx_apu_initfn(PCIDevice *dev)
+{
+    MCPXAPUState *d = MCPX_APU_DEVICE(dev);
+
+    dev->config[PCI_INTERRUPT_PIN] = 0x01;
+
+    memory_region_init_io(&d->mmio, OBJECT(dev), &mcpx_apu_mmio_ops, d,
+                          "mcpx-apu-mmio", 0x80000);
+
+    memory_region_init_io(&d->vp.mmio, OBJECT(dev), &vp_ops, d,
+                          "mcpx-apu-vp", 0x10000);
+    memory_region_add_subregion(&d->mmio, 0x20000, &d->vp.mmio);
+
+    memory_region_init_io(&d->gp.mmio, OBJECT(dev), &gp_ops, d,
+                          "mcpx-apu-gp", 0x10000);
+    memory_region_add_subregion(&d->mmio, 0x30000, &d->gp.mmio);
+
+    memory_region_init_io(&d->ep.mmio, OBJECT(dev), &ep_ops, d,
+                          "mcpx-apu-ep", 0x10000);
+    memory_region_add_subregion(&d->mmio, 0x50000, &d->ep.mmio);
+
+    pci_register_bar(&d->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
+
+
+    d->se.frame_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, se_frame, d);
+
+    d->gp.dsp = dsp_init(d, gp_scratch_rw);
+    d->ep.dsp = dsp_init(d, ep_scratch_rw);
+
+    return 0;
+}
+
+static void mcpx_apu_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+    k->vendor_id = PCI_VENDOR_ID_NVIDIA;
+    k->device_id = PCI_DEVICE_ID_NVIDIA_MCPX_APU;
+    k->revision = 210;
+    k->class_id = PCI_CLASS_MULTIMEDIA_AUDIO;
+    k->init = mcpx_apu_initfn;
+
+    dc->desc = "MCPX Audio Processing Unit";
+}
+
+static const TypeInfo mcpx_apu_info = {
+    .name          = "mcpx-apu",
+    .parent        = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(MCPXAPUState),
+    .class_init    = mcpx_apu_class_init,
+};
+
+static void mcpx_apu_register(void)
+{
+    type_register_static(&mcpx_apu_info);
+}
+type_init(mcpx_apu_register);
\ No newline at end of file
diff --git a/hw/xbox/nv2a.c b/hw/xbox/nv2a.c
new file mode 100644
index 0000000000..b444931bbb
--- /dev/null
+++ b/hw/xbox/nv2a.c
@@ -0,0 +1,6439 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include "hw/hw.h"
+#include "hw/i386/pc.h"
+#include "ui/console.h"
+#include "hw/pci/pci.h"
+#include "ui/console.h"
+#include "hw/display/vga.h"
+#include "hw/display/vga_int.h"
+#include "qemu/queue.h"
+#include "qemu/thread.h"
+#include "qapi/qmp/qstring.h"
+#include "gl/gloffscreen.h"
+#include "gl/glextensions.h"
+
+#include "hw/xbox/g-lru-cache.h"
+#include "hw/xbox/swizzle.h"
+#include "hw/xbox/nv2a_shaders.h"
+#include "hw/xbox/nv2a_debug.h"
+
+#include "hw/xbox/nv2a.h"
+#include "hw/xbox/nv2a_int.h"
+
+#define USE_TEXTURE_CACHE
+
+static const GLenum pgraph_texture_min_filter_map[] = {
+    0,
+    GL_NEAREST,
+    GL_LINEAR,
+    GL_NEAREST_MIPMAP_NEAREST,
+    GL_LINEAR_MIPMAP_NEAREST,
+    GL_NEAREST_MIPMAP_LINEAR,
+    GL_LINEAR_MIPMAP_LINEAR,
+    GL_LINEAR, /* TODO: Convolution filter... */
+};
+
+static const GLenum pgraph_texture_mag_filter_map[] = {
+    0,
+    GL_NEAREST,
+    GL_LINEAR,
+    0,
+    GL_LINEAR /* TODO: Convolution filter... */
+};
+
+static const GLenum pgraph_texture_addr_map[] = {
+    0,
+    GL_REPEAT,
+    GL_MIRRORED_REPEAT,
+    GL_CLAMP_TO_EDGE,
+    GL_CLAMP_TO_BORDER,
+    // GL_CLAMP
+};
+
+static const GLenum pgraph_blend_factor_map[] = {
+    GL_ZERO,
+    GL_ONE,
+    GL_SRC_COLOR,
+    GL_ONE_MINUS_SRC_COLOR,
+    GL_SRC_ALPHA,
+    GL_ONE_MINUS_SRC_ALPHA,
+    GL_DST_ALPHA,
+    GL_ONE_MINUS_DST_ALPHA,
+    GL_DST_COLOR,
+    GL_ONE_MINUS_DST_COLOR,
+    GL_SRC_ALPHA_SATURATE,
+    0,
+    GL_CONSTANT_COLOR,
+    GL_ONE_MINUS_CONSTANT_COLOR,
+    GL_CONSTANT_ALPHA,
+    GL_ONE_MINUS_CONSTANT_ALPHA,
+};
+
+static const GLenum pgraph_blend_equation_map[] = {
+    GL_FUNC_SUBTRACT,
+    GL_FUNC_REVERSE_SUBTRACT,
+    GL_FUNC_ADD,
+    GL_MIN,
+    GL_MAX,
+    GL_FUNC_REVERSE_SUBTRACT,
+    GL_FUNC_ADD,
+};
+
+static const GLenum pgraph_blend_logicop_map[] = {
+    GL_CLEAR,
+    GL_AND,
+    GL_AND_REVERSE,
+    GL_COPY,
+    GL_AND_INVERTED,
+    GL_NOOP,
+    GL_XOR,
+    GL_OR,
+    GL_NOR,
+    GL_EQUIV,
+    GL_INVERT,
+    GL_OR_REVERSE,
+    GL_COPY_INVERTED,
+    GL_OR_INVERTED,
+    GL_NAND,
+    GL_SET,
+};
+
+static const GLenum pgraph_cull_face_map[] = {
+    0,
+    GL_FRONT,
+    GL_BACK,
+    GL_FRONT_AND_BACK
+};
+
+static const GLenum pgraph_depth_func_map[] = {
+    GL_NEVER,
+    GL_LESS,
+    GL_EQUAL,
+    GL_LEQUAL,
+    GL_GREATER,
+    GL_NOTEQUAL,
+    GL_GEQUAL,
+    GL_ALWAYS,
+};
+
+static const GLenum pgraph_stencil_func_map[] = {
+    GL_NEVER,
+    GL_LESS,
+    GL_EQUAL,
+    GL_LEQUAL,
+    GL_GREATER,
+    GL_NOTEQUAL,
+    GL_GEQUAL,
+    GL_ALWAYS,
+};
+
+static const GLenum pgraph_stencil_op_map[] = {
+    0,
+    GL_KEEP,
+    GL_ZERO,
+    GL_REPLACE,
+    GL_INCR,
+    GL_DECR,
+    GL_INVERT,
+    GL_INCR_WRAP,
+    GL_DECR_WRAP,
+};
+
+typedef struct ColorFormatInfo {
+    unsigned int bytes_per_pixel;
+    bool linear;
+    GLint gl_internal_format;
+    GLenum gl_format;
+    GLenum gl_type;
+    GLenum gl_swizzle_mask[4];
+} ColorFormatInfo;
+
+static const ColorFormatInfo kelvin_color_format_map[66] = {
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8] =
+        {1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
+         {GL_RED, GL_RED, GL_RED, GL_ONE}},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8] =
+        {1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
+         {GL_RED, GL_RED, GL_RED, GL_RED}},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5] =
+        {2, false, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5] =
+        {2, false, GL_RGB5, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4] =
+        {2, false, GL_RGBA4, GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5] =
+        {2, false, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8] =
+        {4, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8] =
+        {4, false, GL_RGB8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
+
+    /* paletted texture */
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8] =
+        {1, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
+
+    [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5] =
+        {4, false, GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, 0, GL_RGBA},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8] =
+        {4, false, GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, 0, GL_RGBA},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8] =
+        {4, false, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, 0, GL_RGBA},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5] =
+        {2, true, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5] =
+        {2, true, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8] =
+        {4, true, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8] =
+        {1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
+         {GL_RED, GL_RED, GL_RED, GL_ONE}},
+
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8] =
+        {1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
+         {GL_ONE, GL_ONE, GL_ONE, GL_RED}},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8] =
+        {2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
+         {GL_GREEN, GL_GREEN, GL_GREEN, GL_RED}},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8] =
+        {1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
+         {GL_RED, GL_RED, GL_RED, GL_RED}},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5] =
+        {2, true, GL_RGB5, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4] =
+        {2, false, GL_RGBA4, GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8] =
+        {4, true, GL_RGB8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8] =
+        {1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
+         {GL_ONE, GL_ONE, GL_ONE, GL_RED}},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8Y8] =
+        {2, true, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
+         {GL_GREEN, GL_GREEN, GL_GREEN, GL_RED}},
+
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5] =
+        {2, false, GL_RGB8_SNORM, GL_RGB, GL_BYTE}, /* FIXME: This might be signed */
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8] =
+        {2, false, GL_RG8_SNORM, GL_RG, GL_BYTE, /* FIXME: This might be signed */
+         {GL_ZERO, GL_RED, GL_GREEN, GL_ONE}},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8] =
+        {2, false, GL_RG8_SNORM, GL_RG, GL_BYTE, /* FIXME: This might be signed */
+         {GL_RED, GL_ZERO, GL_GREEN, GL_ONE}},
+
+
+    /* TODO: format conversion */
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8] =
+        {2, true, GL_RGBA8,  GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED] =
+        {4, true, GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FIXED] =
+        {2, true, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16] =
+        {2, true, GL_R16, GL_RED, GL_UNSIGNED_SHORT,
+         {GL_RED, GL_RED, GL_RED, GL_ONE}},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8] =
+        {4, false, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},
+
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8] =
+        {4, false, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8},
+
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8] =
+        {4, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8] =
+        {4, true, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8] =
+        {4, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}
+};
+
+typedef struct SurfaceColorFormatInfo {
+    unsigned int bytes_per_pixel;
+    GLint gl_internal_format;
+    GLenum gl_format;
+    GLenum gl_type;
+} SurfaceColorFormatInfo;
+
+static const SurfaceColorFormatInfo kelvin_surface_color_format_map[] = {
+    [NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5] =
+        {2, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
+    [NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5] =
+        {2, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},
+    [NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8] =
+        {4, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
+    [NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8] =
+        {4, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
+};
+
+#define GET_MASK(v, mask) (((v) & (mask)) >> (ffs(mask)-1))
+
+#define SET_MASK(v, mask, val) ({                                    \
+        const unsigned int __val = (val);                             \
+        const unsigned int __mask = (mask);                          \
+        (v) &= ~(__mask);                                            \
+        (v) |= ((__val) << (ffs(__mask)-1)) & (__mask);              \
+    })
+
+#define CASE_4(v, step)                                              \
+    case (v):                                                        \
+    case (v)+(step):                                                 \
+    case (v)+(step)*2:                                               \
+    case (v)+(step)*3
+
+
+enum FIFOEngine {
+    ENGINE_SOFTWARE = 0,
+    ENGINE_GRAPHICS = 1,
+    ENGINE_DVD = 2,
+};
+
+typedef struct RAMHTEntry {
+    uint32_t handle;
+    hwaddr instance;
+    enum FIFOEngine engine;
+    unsigned int channel_id : 5;
+    bool valid;
+} RAMHTEntry;
+
+typedef struct DMAObject {
+    unsigned int dma_class;
+    unsigned int dma_target;
+    hwaddr address;
+    hwaddr limit;
+} DMAObject;
+
+typedef struct VertexAttribute {
+    bool dma_select;
+    hwaddr offset;
+
+    /* inline arrays are packed in order?
+     * Need to pass the offset to converted attributes */
+    unsigned int inline_array_offset;
+
+    float inline_value[4];
+
+    unsigned int format;
+    unsigned int size; /* size of the data type */
+    unsigned int count; /* number of components */
+    uint32_t stride;
+
+    bool needs_conversion;
+    uint8_t *converted_buffer;
+    unsigned int converted_elements;
+    unsigned int converted_size;
+    unsigned int converted_count;
+
+    float *inline_buffer;
+
+    GLint gl_count;
+    GLenum gl_type;
+    GLboolean gl_normalize;
+
+    GLuint gl_converted_buffer;
+    GLuint gl_inline_buffer;
+} VertexAttribute;
+
+typedef struct Surface {
+    bool draw_dirty;
+    bool buffer_dirty;
+    bool write_enabled_cache;
+    unsigned int pitch;
+
+    hwaddr offset;
+} Surface;
+
+typedef struct SurfaceShape {
+    unsigned int z_format;
+    unsigned int color_format;
+    unsigned int zeta_format;
+    unsigned int log_width, log_height;
+    unsigned int clip_x, clip_y;
+    unsigned int clip_width, clip_height;
+    unsigned int anti_aliasing;
+} SurfaceShape;
+
+typedef struct TextureShape {
+    bool cubemap;
+    unsigned int dimensionality;
+    unsigned int color_format;
+    unsigned int levels;
+    unsigned int width, height, depth;
+
+    unsigned int min_mipmap_level, max_mipmap_level;
+    unsigned int pitch;
+} TextureShape;
+
+typedef struct TextureKey {
+    TextureShape state;
+    uint64_t data_hash;
+    uint8_t* texture_data;
+    uint8_t* palette_data;
+} TextureKey;
+
+typedef struct TextureBinding {
+    GLenum gl_target;
+    GLuint gl_texture;
+    unsigned int refcnt;
+} TextureBinding;
+
+
+typedef struct KelvinState {
+    hwaddr object_instance;
+} KelvinState;
+
+typedef struct ContextSurfaces2DState {
+    hwaddr object_instance;
+    hwaddr dma_image_source;
+    hwaddr dma_image_dest;
+    unsigned int color_format;
+    unsigned int source_pitch, dest_pitch;
+    hwaddr source_offset, dest_offset;
+} ContextSurfaces2DState;
+
+typedef struct ImageBlitState {
+    hwaddr object_instance;
+    hwaddr context_surfaces;
+    unsigned int operation;
+    unsigned int in_x, in_y;
+    unsigned int out_x, out_y;
+    unsigned int width, height;
+} ImageBlitState;
+
+
+typedef struct PGRAPHState {
+    QemuMutex lock;
+
+    uint32_t pending_interrupts;
+    uint32_t enabled_interrupts;
+    QemuCond interrupt_cond;
+
+    /* subchannels state we're not sure the location of... */
+    ContextSurfaces2DState context_surfaces_2d;
+    ImageBlitState image_blit;
+    KelvinState kelvin;
+
+    QemuCond fifo_access_cond;
+    QemuCond flip_3d;
+
+    hwaddr dma_color, dma_zeta;
+    Surface surface_color, surface_zeta;
+    unsigned int surface_type;
+    SurfaceShape surface_shape;
+    SurfaceShape last_surface_shape;
+
+    hwaddr dma_a, dma_b;
+    GLruCache *texture_cache;
+    bool texture_dirty[NV2A_MAX_TEXTURES];
+    TextureBinding *texture_binding[NV2A_MAX_TEXTURES];
+
+    GHashTable *shader_cache;
+    ShaderBinding *shader_binding;
+
+    bool texture_matrix_enable[NV2A_MAX_TEXTURES];
+
+    /* FIXME: Move to NV_PGRAPH_BUMPMAT... */
+    float bump_env_matrix[NV2A_MAX_TEXTURES-1][4]; /* 3 allowed stages with 2x2 matrix each */
+
+    GloContext *gl_context;
+    GLuint gl_framebuffer;
+    GLuint gl_color_buffer, gl_zeta_buffer;
+
+    hwaddr dma_state;
+    hwaddr dma_notifies;
+    hwaddr dma_semaphore;
+
+    hwaddr dma_report;
+    hwaddr report_offset;
+    bool zpass_pixel_count_enable;
+    unsigned int zpass_pixel_count_result;
+    unsigned int gl_zpass_pixel_count_query_count;
+    GLuint* gl_zpass_pixel_count_queries;
+
+    hwaddr dma_vertex_a, dma_vertex_b;
+
+    unsigned int primitive_mode;
+
+    bool enable_vertex_program_write;
+
+    uint32_t program_data[NV2A_MAX_TRANSFORM_PROGRAM_LENGTH][VSH_TOKEN_SIZE];
+
+    uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];
+    bool vsh_constants_dirty[NV2A_VERTEXSHADER_CONSTANTS];
+
+    /* lighting constant arrays */
+    uint32_t ltctxa[NV2A_LTCTXA_COUNT][4];
+    bool ltctxa_dirty[NV2A_LTCTXA_COUNT];
+    uint32_t ltctxb[NV2A_LTCTXB_COUNT][4];
+    bool ltctxb_dirty[NV2A_LTCTXB_COUNT];
+    uint32_t ltc1[NV2A_LTC1_COUNT][4];
+    bool ltc1_dirty[NV2A_LTC1_COUNT];
+
+    // should figure out where these are in lighting context
+    float light_infinite_half_vector[NV2A_MAX_LIGHTS][3];
+    float light_infinite_direction[NV2A_MAX_LIGHTS][3];
+    float light_local_position[NV2A_MAX_LIGHTS][3];
+    float light_local_attenuation[NV2A_MAX_LIGHTS][3];
+
+    VertexAttribute vertex_attributes[NV2A_VERTEXSHADER_ATTRIBUTES];
+
+    unsigned int inline_array_length;
+    uint32_t inline_array[NV2A_MAX_BATCH_LENGTH];
+    GLuint gl_inline_array_buffer;
+
+    unsigned int inline_elements_length;
+    uint32_t inline_elements[NV2A_MAX_BATCH_LENGTH];
+
+    unsigned int inline_buffer_length;
+
+    unsigned int draw_arrays_length;
+    unsigned int draw_arrays_max_count;
+    /* FIXME: Unknown size, possibly endless, 1000 will do for now */
+    GLint gl_draw_arrays_start[1000];
+    GLsizei gl_draw_arrays_count[1000];
+
+    GLuint gl_element_buffer;
+    GLuint gl_memory_buffer;
+
+    GLuint gl_vertex_array;
+
+    uint32_t regs[0x2000];
+} PGRAPHState;
+
+
+typedef struct NV2AState {
+    PCIDevice dev;
+    qemu_irq irq;
+
+    bool exiting;
+
+    VGACommonState vga;
+    GraphicHwOps hw_ops;
+
+    QEMUTimer *vblank_timer;
+
+    MemoryRegion *vram;
+    MemoryRegion vram_pci;
+    uint8_t *vram_ptr;
+    MemoryRegion ramin;
+    uint8_t *ramin_ptr;
+
+    MemoryRegion mmio;
+
+    MemoryRegion block_mmio[NV_NUM_BLOCKS];
+
+    struct {
+        uint32_t pending_interrupts;
+        uint32_t enabled_interrupts;
+    } pmc;
+
+    struct {
+        uint32_t pending_interrupts;
+        uint32_t enabled_interrupts;
+
+        QemuMutex lock;
+        QemuThread puller_thread;
+        QemuCond puller_cond;
+        QemuThread pusher_thread;
+        QemuCond pusher_cond;
+
+        uint32_t regs[0x2000];
+    } pfifo;
+
+    struct {
+        uint32_t regs[0x1000];
+    } pvideo;
+
+    struct {
+        uint32_t pending_interrupts;
+        uint32_t enabled_interrupts;
+
+        uint32_t numerator;
+        uint32_t denominator;
+
+        uint32_t alarm_time;
+    } ptimer;
+
+    struct {
+        uint32_t regs[0x1000];
+    } pfb;
+
+    struct PGRAPHState pgraph;
+
+    struct {
+        uint32_t pending_interrupts;
+        uint32_t enabled_interrupts;
+
+        hwaddr start;
+    } pcrtc;
+
+    struct {
+        uint32_t core_clock_coeff;
+        uint64_t core_clock_freq;
+        uint32_t memory_clock_coeff;
+        uint32_t video_clock_coeff;
+    } pramdac;
+
+} NV2AState;
+
+
+#define NV2A_DEVICE(obj) \
+    OBJECT_CHECK(NV2AState, (obj), "nv2a")
+
+static void reg_log_read(int block, hwaddr addr, uint64_t val);
+static void reg_log_write(int block, hwaddr addr, uint64_t val);
+static void pgraph_method_log(unsigned int subchannel,
+                              unsigned int graphics_class,
+                              unsigned int method, uint32_t parameter);
+
+static uint64_t fnv_hash(const uint8_t *data, size_t len)
+{
+    /* 64 bit Fowler/Noll/Vo FNV-1a hash code */
+    uint64_t hval = 0xcbf29ce484222325ULL;
+    const uint8_t *dp = data;
+    const uint8_t *de = data + len;
+    while (dp < de) {
+        hval ^= (uint64_t) *dp++;
+        hval += (hval << 1) + (hval << 4) + (hval << 5) +
+            (hval << 7) + (hval << 8) + (hval << 40);
+    }
+
+    return (guint)hval;
+}
+
+static uint64_t fast_hash(const uint8_t *data, size_t len, unsigned int samples)
+{
+#ifdef __SSE4_2__
+    uint64_t h[4] = {len, 0, 0, 0};
+    assert(samples > 0);
+
+    if (len < 8 || len % 8) {
+        return fnv_hash(data, len);
+    }
+
+    assert(len >= 8 && len % 8 == 0);
+    const uint64_t *dp = (const uint64_t*)data;
+    const uint64_t *de = dp + (len / 8);
+    size_t step = len / 8 / samples;
+    if (step == 0) step = 1;
+
+    while (dp < de - step * 3) {
+        h[0] = __builtin_ia32_crc32di(h[0], dp[step * 0]);
+        h[1] = __builtin_ia32_crc32di(h[1], dp[step * 1]);
+        h[2] = __builtin_ia32_crc32di(h[2], dp[step * 2]);
+        h[3] = __builtin_ia32_crc32di(h[3], dp[step * 3]);
+        dp += step * 4;
+    }
+    if (dp < de - step * 0)
+        h[0] = __builtin_ia32_crc32di(h[0], dp[step * 0]);
+    if (dp < de - step * 1)
+        h[1] = __builtin_ia32_crc32di(h[1], dp[step * 1]);
+    if (dp < de - step * 2)
+        h[2] = __builtin_ia32_crc32di(h[2], dp[step * 2]);
+
+    return h[0] + (h[1] << 10) + (h[2] << 21) + (h[3] << 32);
+#else
+    return fnv_hash(data, len);
+#endif
+}
+
+static void update_irq(NV2AState *d)
+{
+    /* PFIFO */
+    if (d->pfifo.pending_interrupts & d->pfifo.enabled_interrupts) {
+        d->pmc.pending_interrupts |= NV_PMC_INTR_0_PFIFO;
+    } else {
+        d->pmc.pending_interrupts &= ~NV_PMC_INTR_0_PFIFO;
+    }
+
+    /* PCRTC */
+    if (d->pcrtc.pending_interrupts & d->pcrtc.enabled_interrupts) {
+        d->pmc.pending_interrupts |= NV_PMC_INTR_0_PCRTC;
+    } else {
+        d->pmc.pending_interrupts &= ~NV_PMC_INTR_0_PCRTC;
+    }
+
+    /* PGRAPH */
+    if (d->pgraph.pending_interrupts & d->pgraph.enabled_interrupts) {
+        d->pmc.pending_interrupts |= NV_PMC_INTR_0_PGRAPH;
+    } else {
+        d->pmc.pending_interrupts &= ~NV_PMC_INTR_0_PGRAPH;
+    }
+
+    if (d->pmc.pending_interrupts && d->pmc.enabled_interrupts) {
+        NV2A_DPRINTF("raise irq\n");
+        pci_irq_assert(&d->dev);
+    } else {
+        pci_irq_deassert(&d->dev);
+    }
+}
+
+static uint32_t ramht_hash(NV2AState *d, uint32_t handle)
+{
+    unsigned int ramht_size =
+        1 << (GET_MASK(d->pfifo.regs[NV_PFIFO_RAMHT], NV_PFIFO_RAMHT_SIZE)+12);
+
+    /* XXX: Think this is different to what nouveau calculates... */
+    unsigned int bits = ffs(ramht_size)-2;
+
+    uint32_t hash = 0;
+    while (handle) {
+        hash ^= (handle & ((1 << bits) - 1));
+        handle >>= bits;
+    }
+
+    unsigned int channel_id = GET_MASK(d->pfifo.regs[NV_PFIFO_CACHE1_PUSH1],
+                                       NV_PFIFO_CACHE1_PUSH1_CHID);
+    hash ^= channel_id << (bits - 4);
+
+    return hash;
+}
+
+
+static RAMHTEntry ramht_lookup(NV2AState *d, uint32_t handle)
+{
+    hwaddr ramht_size =
+        1 << (GET_MASK(d->pfifo.regs[NV_PFIFO_RAMHT], NV_PFIFO_RAMHT_SIZE)+12);
+
+    uint32_t hash = ramht_hash(d, handle);
+    assert(hash * 8 < ramht_size);
+
+    hwaddr ramht_address =
+        GET_MASK(d->pfifo.regs[NV_PFIFO_RAMHT],
+                 NV_PFIFO_RAMHT_BASE_ADDRESS) << 12;
+
+    assert(ramht_address + hash * 8 < memory_region_size(&d->ramin));
+
+    uint8_t *entry_ptr = d->ramin_ptr + ramht_address + hash * 8;
+
+    uint32_t entry_handle = ldl_le_p((uint32_t*)entry_ptr);
+    uint32_t entry_context = ldl_le_p((uint32_t*)(entry_ptr + 4));
+
+    return (RAMHTEntry){
+        .handle = entry_handle,
+        .instance = (entry_context & NV_RAMHT_INSTANCE) << 4,
+        .engine = (entry_context & NV_RAMHT_ENGINE) >> 16,
+        .channel_id = (entry_context & NV_RAMHT_CHID) >> 24,
+        .valid = entry_context & NV_RAMHT_STATUS,
+    };
+}
+
+static DMAObject nv_dma_load(NV2AState *d, hwaddr dma_obj_address)
+{
+    assert(dma_obj_address < memory_region_size(&d->ramin));
+
+    uint32_t *dma_obj = (uint32_t*)(d->ramin_ptr + dma_obj_address);
+    uint32_t flags = ldl_le_p(dma_obj);
+    uint32_t limit = ldl_le_p(dma_obj + 1);
+    uint32_t frame = ldl_le_p(dma_obj + 2);
+
+    return (DMAObject){
+        .dma_class = GET_MASK(flags, NV_DMA_CLASS),
+        .dma_target = GET_MASK(flags, NV_DMA_TARGET),
+        .address = (frame & NV_DMA_ADDRESS) | GET_MASK(flags, NV_DMA_ADJUST),
+        .limit = limit,
+    };
+}
+
+static void *nv_dma_map(NV2AState *d, hwaddr dma_obj_address, hwaddr *len)
+{
+    DMAObject dma = nv_dma_load(d, dma_obj_address);
+
+    /* TODO: Handle targets and classes properly */
+    NV2A_DPRINTF("dma_map %" HWADDR_PRIx " - %x, %x, %" HWADDR_PRIx " %" HWADDR_PRIx "\n",
+                 dma_obj_address,
+                 dma.dma_class, dma.dma_target, dma.address, dma.limit);
+
+    dma.address &= 0x07FFFFFF;
+
+    assert(dma.address < memory_region_size(d->vram));
+    // assert(dma.address + dma.limit < memory_region_size(d->vram));
+    *len = dma.limit;
+    return d->vram_ptr + dma.address;
+}
+
+/* 16 bit to [0.0, F16_MAX = 511.9375] */
+static float convert_f16_to_float(uint16_t f16) {
+    if (f16 == 0x0000) { return 0.0; }
+    uint32_t i = (f16 << 11) + 0x3C000000;
+    return *(float*)&i;
+}
+
+/* 24 bit to [0.0, F24_MAX] */
+static float convert_f24_to_float(uint32_t f24) {
+    assert(!(f24 >> 24));
+    f24 &= 0xFFFFFF;
+    if (f24 == 0x000000) { return 0.0; }
+    uint32_t i = f24 << 7;
+    return *(float*)&i;
+}
+
+static void pgraph_update_memory_buffer(NV2AState *d, hwaddr addr, hwaddr size,
+                                        bool f)
+{
+    glBindBuffer(GL_ARRAY_BUFFER, d->pgraph.gl_memory_buffer);
+
+    hwaddr end = TARGET_PAGE_ALIGN(addr + size);
+    addr &= TARGET_PAGE_MASK;
+    assert(end < memory_region_size(d->vram));
+    if (f || memory_region_test_and_clear_dirty(d->vram,
+                                                addr,
+                                                end - addr,
+                                                DIRTY_MEMORY_NV2A)) {
+        glBufferSubData(GL_ARRAY_BUFFER, addr, end - addr, d->vram_ptr + addr);
+    }
+}
+
+static void pgraph_bind_vertex_attributes(NV2AState *d,
+                                          unsigned int num_elements,
+                                          bool inline_data,
+                                          unsigned int inline_stride)
+{
+    int i, j;
+    PGRAPHState *pg = &d->pgraph;
+
+    if (inline_data) {
+        NV2A_GL_DGROUP_BEGIN("%s (num_elements: %d inline stride: %d)",
+                             __func__, num_elements, inline_stride);
+    } else {
+        NV2A_GL_DGROUP_BEGIN("%s (num_elements: %d)", __func__, num_elements);
+    }
+
+    for (i=0; i<NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+        VertexAttribute *attribute = &pg->vertex_attributes[i];
+        if (attribute->count) {
+            uint8_t *data;
+            unsigned int in_stride;
+            if (inline_data && attribute->needs_conversion) {
+                data = (uint8_t*)pg->inline_array
+                        + attribute->inline_array_offset;
+                in_stride = inline_stride;
+            } else {
+                hwaddr dma_len;
+                if (attribute->dma_select) {
+                    data = nv_dma_map(d, pg->dma_vertex_b, &dma_len);
+                } else {
+                    data = nv_dma_map(d, pg->dma_vertex_a, &dma_len);
+                }
+
+                assert(attribute->offset < dma_len);
+                data += attribute->offset;
+
+                in_stride = attribute->stride;
+            }
+
+            if (attribute->needs_conversion) {
+                NV2A_DPRINTF("converted %d\n", i);
+
+                unsigned int out_stride = attribute->converted_size
+                                        * attribute->converted_count;
+
+                if (num_elements > attribute->converted_elements) {
+                    attribute->converted_buffer = g_realloc(
+                        attribute->converted_buffer,
+                        num_elements * out_stride);
+                }
+
+                for (j=attribute->converted_elements; j<num_elements; j++) {
+                    uint8_t *in = data + j * in_stride;
+                    uint8_t *out = attribute->converted_buffer + j * out_stride;
+
+                    switch (attribute->format) {
+                    case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP: {
+                        uint32_t p = ldl_le_p((uint32_t*)in);
+                        float *xyz = (float*)out;
+                        xyz[0] = ((int32_t)(((p >>  0) & 0x7FF) << 21) >> 21)
+                                                                      / 1023.0f;
+                        xyz[1] = ((int32_t)(((p >> 11) & 0x7FF) << 21) >> 21)
+                                                                      / 1023.0f;
+                        xyz[2] = ((int32_t)(((p >> 22) & 0x3FF) << 22) >> 22)
+                                                                       / 511.0f;
+                        break;
+                    }
+                    default:
+                        assert(false);
+                        break;
+                    }
+                }
+
+
+                glBindBuffer(GL_ARRAY_BUFFER, attribute->gl_converted_buffer);
+                if (num_elements != attribute->converted_elements) {
+                    glBufferData(GL_ARRAY_BUFFER,
+                                 num_elements * out_stride,
+                                 attribute->converted_buffer,
+                                 GL_DYNAMIC_DRAW);
+                    attribute->converted_elements = num_elements;
+                }
+
+
+                glVertexAttribPointer(i,
+                    attribute->converted_count,
+                    attribute->gl_type,
+                    attribute->gl_normalize,
+                    out_stride,
+                    0);
+            } else if (inline_data) {
+                glBindBuffer(GL_ARRAY_BUFFER, pg->gl_inline_array_buffer);
+                glVertexAttribPointer(i,
+                                      attribute->gl_count,
+                                      attribute->gl_type,
+                                      attribute->gl_normalize,
+                                      inline_stride,
+                                      (void*)(uintptr_t)attribute->inline_array_offset);
+            } else {
+                hwaddr addr = data - d->vram_ptr;
+                pgraph_update_memory_buffer(d, addr,
+                                            num_elements * attribute->stride,
+                                            false);
+                glVertexAttribPointer(i,
+                    attribute->gl_count,
+                    attribute->gl_type,
+                    attribute->gl_normalize,
+                    attribute->stride,
+                    (void*)addr);
+            }
+            glEnableVertexAttribArray(i);
+        } else {
+            glDisableVertexAttribArray(i);
+
+            glVertexAttrib4fv(i, attribute->inline_value);
+        }
+    }
+    NV2A_GL_DGROUP_END();
+}
+
+static unsigned int pgraph_bind_inline_array(NV2AState *d)
+{
+    int i;
+
+    PGRAPHState *pg = &d->pgraph;
+
+    unsigned int offset = 0;
+    for (i=0; i<NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+        VertexAttribute *attribute = &pg->vertex_attributes[i];
+        if (attribute->count) {
+            attribute->inline_array_offset = offset;
+
+            NV2A_DPRINTF("bind inline attribute %d size=%d, count=%d\n",
+                i, attribute->size, attribute->count);
+            offset += attribute->size * attribute->count;
+            assert(offset % 4 == 0);
+        }
+    }
+
+    unsigned int vertex_size = offset;
+
+
+    unsigned int index_count = pg->inline_array_length*4 / vertex_size;
+
+    NV2A_DPRINTF("draw inline array %d, %d\n", vertex_size, index_count);
+
+    glBindBuffer(GL_ARRAY_BUFFER, pg->gl_inline_array_buffer);
+    glBufferData(GL_ARRAY_BUFFER, pg->inline_array_length*4, pg->inline_array,
+                 GL_DYNAMIC_DRAW);
+
+    pgraph_bind_vertex_attributes(d, index_count, true, vertex_size);
+
+    return index_count;
+}
+
+static uint8_t cliptobyte(int x)
+{
+    return (uint8_t)((x < 0) ? 0 : ((x > 255) ? 255 : x));
+}
+
+static void convert_yuy2_to_rgb(const uint8_t *line, unsigned int ix,
+                                uint8_t *r, uint8_t *g, uint8_t* b) {
+    int c, d, e;
+    c = (int)line[ix * 2] - 16;
+    if (ix % 2) {
+        d = (int)line[ix * 2 - 1] - 128;
+        e = (int)line[ix * 2 + 1] - 128;
+    } else {
+        d = (int)line[ix * 2 + 1] - 128;
+        e = (int)line[ix * 2 + 3] - 128;
+    }
+    *r = cliptobyte((298 * c + 409 * e + 128) >> 8);
+    *g = cliptobyte((298 * c - 100 * d - 208 * e + 128) >> 8);
+    *b = cliptobyte((298 * c + 516 * d + 128) >> 8);
+}
+
+static uint8_t* convert_texture_data(const TextureShape s,
+                                     const uint8_t *data,
+                                     const uint8_t *palette_data,
+                                     unsigned int width,
+                                     unsigned int height,
+                                     unsigned int depth,
+                                     unsigned int row_pitch,
+                                     unsigned int slice_pitch)
+{
+    if (s.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8) {
+        assert(depth == 1); /* FIXME */
+        uint8_t* converted_data = g_malloc(width * height * 4);
+        int x, y;
+        for (y = 0; y < height; y++) {
+            for (x = 0; x < width; x++) {
+                uint8_t index = data[y * row_pitch + x];
+                uint32_t color = *(uint32_t*)(palette_data + index * 4);
+                *(uint32_t*)(converted_data + y * width * 4 + x * 4) = color;
+            }
+        }
+        return converted_data;
+    } else if (s.color_format
+                   == NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8) {
+        assert(depth == 1); /* FIXME */
+        uint8_t* converted_data = g_malloc(width * height * 4);
+        int x, y;
+        for (y = 0; y < height; y++) {
+            const uint8_t* line = &data[y * s.width * 2];
+            for (x = 0; x < width; x++) {
+                uint8_t* pixel = &converted_data[(y * s.width + x) * 4];
+                /* FIXME: Actually needs uyvy? */
+                convert_yuy2_to_rgb(line, x, &pixel[0], &pixel[1], &pixel[2]);
+                pixel[3] = 255;
+          }
+        }
+        return converted_data;
+    } else if (s.color_format
+                   == NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5) {
+        assert(depth == 1); /* FIXME */
+        uint8_t *converted_data = g_malloc(width * height * 3);
+        int x, y;
+        for (y = 0; y < height; y++) {
+            for (x = 0; x < width; x++) {
+                uint16_t rgb655 = *(uint16_t*)(data + y * row_pitch + x * 2);
+                int8_t *pixel = (int8_t*)&converted_data[(y * width + x) * 3];
+                /* Maps 5 bit G and B signed value range to 8 bit
+                 * signed values. R is probably unsigned.
+                 */
+                rgb655 ^= (1 << 9) | (1 << 4);
+                pixel[0] = ((rgb655 & 0xFC00) >> 10) * 0x7F / 0x3F;
+                pixel[1] = ((rgb655 & 0x03E0) >> 5) * 0xFF / 0x1F - 0x80;
+                pixel[2] = (rgb655 & 0x001F) * 0xFF / 0x1F - 0x80;
+            }
+        }
+        return converted_data;
+    } else {
+        return NULL;
+    }
+}
+
+static void upload_gl_texture(GLenum gl_target,
+                              const TextureShape s,
+                              const uint8_t *texture_data,
+                              const uint8_t *palette_data)
+{
+    ColorFormatInfo f = kelvin_color_format_map[s.color_format];
+
+    switch(gl_target) {
+    case GL_TEXTURE_1D:
+        assert(false);
+        break;
+    case GL_TEXTURE_RECTANGLE: {
+        /* Can't handle strides unaligned to pixels */
+        assert(s.pitch % f.bytes_per_pixel == 0);
+        glPixelStorei(GL_UNPACK_ROW_LENGTH,
+                      s.pitch / f.bytes_per_pixel);
+
+        uint8_t *converted = convert_texture_data(s, texture_data,
+                                                  palette_data,
+                                                  s.width, s.height, 1,
+                                                  s.pitch, 0);
+
+        glTexImage2D(gl_target, 0, f.gl_internal_format,
+                     s.width, s.height, 0,
+                     f.gl_format, f.gl_type,
+                     converted ? converted : texture_data);
+
+        if (converted) {
+          g_free(converted);
+        }
+
+        glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+        break;
+    }
+    case GL_TEXTURE_2D:
+    case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+    case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+    case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+    case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: {
+
+        unsigned int width = s.width, height = s.height;
+
+        int level;
+        for (level = 0; level < s.levels; level++) {
+            if (f.gl_format == 0) { /* compressed */
+
+                width = MAX(width, 4); height = MAX(height, 4);
+
+                unsigned int block_size;
+                if (f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
+                    block_size = 8;
+                } else {
+                    block_size = 16;
+                }
+
+                glCompressedTexImage2D(gl_target, level, f.gl_internal_format,
+                                       width, height, 0,
+                                       width/4 * height/4 * block_size,
+                                       texture_data);
+
+                texture_data += width/4 * height/4 * block_size;
+            } else {
+
+                width = MAX(width, 1); height = MAX(height, 1);
+
+                unsigned int pitch = width * f.bytes_per_pixel;
+                uint8_t *unswizzled = g_malloc(height * pitch);
+                unswizzle_rect(texture_data, width, height,
+                               unswizzled, pitch, f.bytes_per_pixel);
+
+                uint8_t *converted = convert_texture_data(s, unswizzled,
+                                                          palette_data,
+                                                          width, height, 1,
+                                                          pitch, 0);
+
+                glTexImage2D(gl_target, level, f.gl_internal_format,
+                             width, height, 0,
+                             f.gl_format, f.gl_type,
+                             converted ? converted : unswizzled);
+
+                if (converted) {
+                    g_free(converted);
+                }
+                g_free(unswizzled);
+
+                texture_data += width * height * f.bytes_per_pixel;
+            }
+
+            width /= 2;
+            height /= 2;
+        }
+
+        break;
+    }
+    case GL_TEXTURE_3D: {
+
+        unsigned int width = s.width, height = s.height, depth = s.depth;
+
+        assert(f.gl_format != 0); /* FIXME: compressed not supported yet */
+        assert(f.linear == false);
+
+        int level;
+        for (level = 0; level < s.levels; level++) {
+
+            unsigned int row_pitch = width * f.bytes_per_pixel;
+            unsigned int slice_pitch = row_pitch * height;
+            uint8_t *unswizzled = g_malloc(slice_pitch * depth);
+            unswizzle_box(texture_data, width, height, depth, unswizzled,
+                           row_pitch, slice_pitch, f.bytes_per_pixel);
+
+            uint8_t *converted = convert_texture_data(s, unswizzled,
+                                                      palette_data,
+                                                      width, height, depth,
+                                                      row_pitch, slice_pitch);
+
+            glTexImage3D(gl_target, level, f.gl_internal_format,
+                         width, height, depth, 0,
+                         f.gl_format, f.gl_type,
+                         converted ? converted : unswizzled);
+
+            if (converted) {
+                g_free(converted);
+            }
+            g_free(unswizzled);
+
+            texture_data += width * height * depth * f.bytes_per_pixel;
+
+            width /= 2;
+            height /= 2;
+            depth /= 2;
+        }
+        break;
+    }
+    default:
+        assert(false);
+        break;
+    }
+}
+
+static TextureBinding* generate_texture(const TextureShape s,
+                                        const uint8_t *texture_data,
+                                        const uint8_t *palette_data)
+{
+    ColorFormatInfo f = kelvin_color_format_map[s.color_format];
+
+    /* Create a new opengl texture */
+    GLuint gl_texture;
+    glGenTextures(1, &gl_texture);
+
+    GLenum gl_target;
+    if (s.cubemap) {
+        assert(f.linear == false);
+        assert(s.dimensionality == 2);
+        gl_target = GL_TEXTURE_CUBE_MAP;
+    } else {
+        if (f.linear) {
+            /* linear textures use unnormalised texcoords.
+             * GL_TEXTURE_RECTANGLE_ARB conveniently also does, but
+             * does not allow repeat and mirror wrap modes.
+             *  (or mipmapping, but xbox d3d says 'Non swizzled and non
+             *   compressed textures cannot be mip mapped.')
+             * Not sure if that'll be an issue. */
+
+            /* FIXME: GLSL 330 provides us with textureSize()! Use that? */
+            gl_target = GL_TEXTURE_RECTANGLE;
+            assert(s.dimensionality == 2);
+        } else {
+            switch(s.dimensionality) {
+            case 1: gl_target = GL_TEXTURE_1D; break;
+            case 2: gl_target = GL_TEXTURE_2D; break;
+            case 3: gl_target = GL_TEXTURE_3D; break;
+            default:
+                assert(false);
+                break;
+            }
+        }
+    }
+
+    glBindTexture(gl_target, gl_texture);
+
+    NV2A_GL_DLABEL(GL_TEXTURE, gl_texture,
+                   "format: 0x%02X%s, %d dimensions%s, width: %d, height: %d, depth: %d",
+                   s.color_format, f.linear ? "" : " (SZ)",
+                   s.dimensionality, s.cubemap ? " (Cubemap)" : "",
+                   s.width, s.height, s.depth);
+
+    if (gl_target == GL_TEXTURE_CUBE_MAP) {
+
+        size_t length = 0;
+        unsigned int w = s.width, h = s.height;
+        int level;
+        for (level = 0; level < s.levels; level++) {
+            /* FIXME: This is wrong for compressed textures and textures with 1x? non-square mipmaps */
+            length += w * h * f.bytes_per_pixel;
+            w /= 2;
+            h /= 2;
+        }
+
+        upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_X,
+                          s, texture_data + 0 * length, palette_data);
+        upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
+                          s, texture_data + 1 * length, palette_data);
+        upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
+                          s, texture_data + 2 * length, palette_data);
+        upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y,
+                          s, texture_data + 3 * length, palette_data);
+        upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
+                          s, texture_data + 4 * length, palette_data);
+        upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z,
+                          s, texture_data + 5 * length, palette_data);
+    } else {
+        upload_gl_texture(gl_target, s, texture_data, palette_data);
+    }
+
+    /* Linear textures don't support mipmapping */
+    if (!f.linear) {
+        glTexParameteri(gl_target, GL_TEXTURE_BASE_LEVEL,
+            s.min_mipmap_level);
+        glTexParameteri(gl_target, GL_TEXTURE_MAX_LEVEL,
+            s.levels - 1);
+    }
+
+    if (f.gl_swizzle_mask[0] != 0 || f.gl_swizzle_mask[1] != 0
+        || f.gl_swizzle_mask[2] != 0 || f.gl_swizzle_mask[3] != 0) {
+        glTexParameteriv(gl_target, GL_TEXTURE_SWIZZLE_RGBA,
+                         (const GLint *)f.gl_swizzle_mask);
+    }
+
+    TextureBinding* ret = g_malloc(sizeof(TextureBinding));
+    ret->gl_target = gl_target;
+    ret->gl_texture = gl_texture;
+    ret->refcnt = 1;
+    return ret;
+}
+
+/* functions for texture LRU cache */
+static guint texture_key_hash(gconstpointer key)
+{
+    const TextureKey *k = key;
+    uint64_t state_hash = fnv_hash(
+        (const uint8_t*)&k->state, sizeof(TextureShape));
+    return state_hash ^ k->data_hash;
+}
+static gboolean texture_key_equal(gconstpointer a, gconstpointer b)
+{
+    const TextureKey *ak = a, *bk = b;
+    return memcmp(&ak->state, &bk->state, sizeof(TextureShape)) == 0
+            && ak->data_hash == bk->data_hash;
+}
+static gpointer texture_key_retrieve(gpointer key, gpointer user_data)
+{
+    const TextureKey *k = key;
+    TextureBinding *v = generate_texture(k->state,
+                                         k->texture_data,
+                                         k->palette_data);
+    return v;
+}
+static void texture_key_destroy(gpointer data)
+{
+    g_free(data);
+}
+static void texture_binding_destroy(gpointer data)
+{
+    TextureBinding *binding = data;
+    assert(binding->refcnt > 0);
+    binding->refcnt--;
+    if (binding->refcnt == 0) {
+        glDeleteTextures(1, &binding->gl_texture);
+        g_free(binding);
+    }
+}
+
+static void pgraph_bind_textures(NV2AState *d)
+{
+    int i;
+    PGRAPHState *pg = &d->pgraph;
+
+    NV2A_GL_DGROUP_BEGIN("%s", __func__);
+
+    for (i=0; i<NV2A_MAX_TEXTURES; i++) {
+
+        uint32_t ctl_0 = pg->regs[NV_PGRAPH_TEXCTL0_0 + i*4];
+        uint32_t ctl_1 = pg->regs[NV_PGRAPH_TEXCTL1_0 + i*4];
+        uint32_t fmt = pg->regs[NV_PGRAPH_TEXFMT0 + i*4];
+        uint32_t filter = pg->regs[NV_PGRAPH_TEXFILTER0 + i*4];
+        uint32_t address =  pg->regs[NV_PGRAPH_TEXADDRESS0 + i*4];
+        uint32_t palette =  pg->regs[NV_PGRAPH_TEXPALETTE0 + i*4];
+
+        bool enabled = GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_ENABLE);
+        unsigned int min_mipmap_level =
+            GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_MIN_LOD_CLAMP);
+        unsigned int max_mipmap_level =
+            GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_MAX_LOD_CLAMP);
+
+        unsigned int pitch =
+            GET_MASK(ctl_1, NV_PGRAPH_TEXCTL1_0_IMAGE_PITCH);
+
+        unsigned int dma_select =
+            GET_MASK(fmt, NV_PGRAPH_TEXFMT0_CONTEXT_DMA);
+        bool cubemap =
+            GET_MASK(fmt, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE);
+        unsigned int dimensionality =
+            GET_MASK(fmt, NV_PGRAPH_TEXFMT0_DIMENSIONALITY);
+        unsigned int color_format = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_COLOR);
+        unsigned int levels = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_MIPMAP_LEVELS);
+        unsigned int log_width = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_U);
+        unsigned int log_height = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_V);
+        unsigned int log_depth = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_P);
+
+        unsigned int rect_width =
+            GET_MASK(pg->regs[NV_PGRAPH_TEXIMAGERECT0 + i*4],
+                     NV_PGRAPH_TEXIMAGERECT0_WIDTH);
+        unsigned int rect_height =
+            GET_MASK(pg->regs[NV_PGRAPH_TEXIMAGERECT0 + i*4],
+                     NV_PGRAPH_TEXIMAGERECT0_HEIGHT);
+
+        unsigned int lod_bias =
+            GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIPMAP_LOD_BIAS);
+        unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN);
+        unsigned int mag_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG);
+
+        unsigned int addru = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRU);
+        unsigned int addrv = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRV);
+        unsigned int addrp = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRP);
+
+        unsigned int border_source = GET_MASK(fmt,
+                                              NV_PGRAPH_TEXFMT0_BORDER_SOURCE);
+        uint32_t border_color = pg->regs[NV_PGRAPH_BORDERCOLOR0 + i*4];
+
+        unsigned int offset = pg->regs[NV_PGRAPH_TEXOFFSET0 + i*4];
+
+        bool palette_dma_select =
+            GET_MASK(palette, NV_PGRAPH_TEXPALETTE0_CONTEXT_DMA);
+        unsigned int palette_length_index =
+            GET_MASK(palette, NV_PGRAPH_TEXPALETTE0_LENGTH);
+        unsigned int palette_offset =
+            palette & NV_PGRAPH_TEXPALETTE0_OFFSET;
+
+        unsigned int palette_length = 0;
+        switch (palette_length_index) {
+        case NV_PGRAPH_TEXPALETTE0_LENGTH_256: palette_length = 256; break;
+        case NV_PGRAPH_TEXPALETTE0_LENGTH_128: palette_length = 128; break;
+        case NV_PGRAPH_TEXPALETTE0_LENGTH_64: palette_length = 64; break;
+        case NV_PGRAPH_TEXPALETTE0_LENGTH_32: palette_length = 32; break;
+        default: assert(false); break;
+        }
+
+        /* Check for unsupported features */
+        assert(!(filter & NV_PGRAPH_TEXFILTER0_ASIGNED));
+        assert(!(filter & NV_PGRAPH_TEXFILTER0_RSIGNED));
+        assert(!(filter & NV_PGRAPH_TEXFILTER0_GSIGNED));
+        assert(!(filter & NV_PGRAPH_TEXFILTER0_BSIGNED));
+
+        glActiveTexture(GL_TEXTURE0 + i);
+        if (!enabled) {
+            glBindTexture(GL_TEXTURE_CUBE_MAP, 0);
+            glBindTexture(GL_TEXTURE_RECTANGLE, 0);
+            glBindTexture(GL_TEXTURE_1D, 0);
+            glBindTexture(GL_TEXTURE_2D, 0);
+            glBindTexture(GL_TEXTURE_3D, 0);
+            continue;
+        }
+
+        if (!pg->texture_dirty[i] && pg->texture_binding[i]) {
+            glBindTexture(pg->texture_binding[i]->gl_target,
+                          pg->texture_binding[i]->gl_texture);
+            continue;
+        }
+
+        NV2A_DPRINTF(" texture %d is format 0x%x, off 0x%x (r %d, %d or %d, %d, %d; %d%s),"
+                        " filter %x %x, levels %d-%d %d bias %d\n",
+                     i, color_format, offset,
+                     rect_width, rect_height,
+                     1 << log_width, 1 << log_height, 1 << log_depth,
+                     pitch,
+                     cubemap ? "; cubemap" : "",
+                     min_filter, mag_filter,
+                     min_mipmap_level, max_mipmap_level, levels,
+                     lod_bias);
+
+        assert(color_format < ARRAY_SIZE(kelvin_color_format_map));
+        ColorFormatInfo f = kelvin_color_format_map[color_format];
+        if (f.bytes_per_pixel == 0) {
+            fprintf(stderr, "nv2a: unimplemented texture color format 0x%x\n",
+                    color_format);
+            abort();
+        }
+
+        unsigned int width, height, depth;
+        if (f.linear) {
+            assert(dimensionality == 2);
+            width = rect_width;
+            height = rect_height;
+            depth = 1;
+        } else {
+            width = 1 << log_width;
+            height = 1 << log_height;
+            depth = 1 << log_depth;
+
+            /* FIXME: What about 3D mipmaps? */
+            levels = MIN(levels, max_mipmap_level + 1);
+            if (f.gl_format != 0) {
+                /* Discard mipmap levels that would be smaller than 1x1.
+                 * FIXME: Is this actually needed?
+                 *
+                 * >> Level 0: 32 x 4
+                 *    Level 1: 16 x 2
+                 *    Level 2: 8 x 1
+                 *    Level 3: 4 x 1
+                 *    Level 4: 2 x 1
+                 *    Level 5: 1 x 1
+                 */
+                levels = MIN(levels, MAX(log_width, log_height) + 1);
+            } else {
+                /* OpenGL requires DXT textures to always have a width and
+                 * height a multiple of 4. The Xbox and DirectX handles DXT
+                 * textures smaller than 4 by padding the reset of the block.
+                 *
+                 * See:
+                 * https://msdn.microsoft.com/en-us/library/windows/desktop/bb204843(v=vs.85).aspx
+                 * https://msdn.microsoft.com/en-us/library/windows/desktop/bb694531%28v=vs.85%29.aspx#Virtual_Size
+                 *
+                 * Work around this for now by discarding mipmap levels that
+                 * would result in too-small textures. A correct solution
+                 * will be to decompress these levels manually, or add texture
+                 * sampling logic.
+                 *
+                 * >> Level 0: 64 x 8
+                 *    Level 1: 32 x 4
+                 *    Level 2: 16 x 2 << Ignored
+                 * >> Level 0: 16 x 16
+                 *    Level 1: 8 x 8
+                 *    Level 2: 4 x 4 << OK!
+                 */
+                if (log_width < 2 || log_height < 2) {
+                    /* Base level is smaller than 4x4... */
+                    levels = 1;
+                } else {
+                    levels = MIN(levels, MIN(log_width, log_height) - 1);
+                }
+            }
+            assert(levels > 0);
+        }
+
+        hwaddr dma_len;
+        uint8_t *texture_data;
+        if (dma_select) {
+            texture_data = nv_dma_map(d, pg->dma_b, &dma_len);
+        } else {
+            texture_data = nv_dma_map(d, pg->dma_a, &dma_len);
+        }
+        assert(offset < dma_len);
+        texture_data += offset;
+
+        hwaddr palette_dma_len;
+        uint8_t *palette_data;
+        if (palette_dma_select) {
+            palette_data = nv_dma_map(d, pg->dma_b, &palette_dma_len);
+        } else {
+            palette_data = nv_dma_map(d, pg->dma_a, &palette_dma_len);
+        }
+        assert(palette_offset < palette_dma_len);
+        palette_data += palette_offset;
+
+        NV2A_DPRINTF(" - 0x%tx\n", texture_data - d->vram_ptr);
+
+        size_t length = 0;
+        if (f.linear) {
+            assert(cubemap == false);
+            assert(dimensionality == 2);
+            length = height * pitch;
+        } else {
+            if (dimensionality >= 2) {
+                unsigned int w = width, h = height;
+                int level;
+                if (f.gl_format != 0) {
+                    for (level = 0; level < levels; level++) {
+                        w = MAX(w, 1); h = MAX(h, 1);
+                        length += w * h * f.bytes_per_pixel;
+                        w /= 2;
+                        h /= 2;
+                    }
+                } else {
+                    /* Compressed textures are a bit different */
+                    unsigned int block_size;
+                    if (f.gl_internal_format ==
+                            GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
+                        block_size = 8;
+                    } else {
+                        block_size = 16;
+                    }
+
+                    for (level = 0; level < levels; level++) {
+                        w = MAX(w, 4); h = MAX(h, 4);
+                        length += w/4 * h/4 * block_size;
+                        w /= 2; h /= 2;
+                    }
+                }
+                if (cubemap) {
+                    assert(dimensionality == 2);
+                    length *= 6;
+                }
+                if (dimensionality >= 3) {
+                    length *= depth;
+                }
+            }
+        }
+
+        TextureShape state = {
+            .cubemap = cubemap,
+            .dimensionality = dimensionality,
+            .color_format = color_format,
+            .levels = levels,
+            .width = width,
+            .height = height,
+            .depth = depth,
+            .min_mipmap_level = min_mipmap_level,
+            .max_mipmap_level = max_mipmap_level,
+            .pitch = pitch,
+        };
+
+#ifdef USE_TEXTURE_CACHE
+        TextureKey key = {
+            .state = state,
+            .data_hash = fast_hash(texture_data, length, 5003)
+                            ^ fnv_hash(palette_data, palette_length),
+            .texture_data = texture_data,
+            .palette_data = palette_data,
+        };
+
+        gpointer cache_key = g_malloc(sizeof(TextureKey));
+        memcpy(cache_key, &key, sizeof(TextureKey));
+
+        TextureBinding *binding = g_lru_cache_get(pg->texture_cache, cache_key);
+        assert(binding);
+        binding->refcnt++;
+#else
+        TextureBinding *binding = generate_texture(state,
+                                                   texture_data, palette_data);
+#endif
+
+        glBindTexture(binding->gl_target, binding->gl_texture);
+
+
+        if (f.linear) {
+            /* somtimes games try to set mipmap min filters on linear textures.
+             * this could indicate a bug... */
+            switch (min_filter) {
+            case NV_PGRAPH_TEXFILTER0_MIN_BOX_NEARESTLOD:
+            case NV_PGRAPH_TEXFILTER0_MIN_BOX_TENT_LOD:
+                min_filter = NV_PGRAPH_TEXFILTER0_MIN_BOX_LOD0;
+                break;
+            case NV_PGRAPH_TEXFILTER0_MIN_TENT_NEARESTLOD:
+            case NV_PGRAPH_TEXFILTER0_MIN_TENT_TENT_LOD:
+                min_filter = NV_PGRAPH_TEXFILTER0_MIN_TENT_LOD0;
+                break;
+            }
+        }
+
+        glTexParameteri(binding->gl_target, GL_TEXTURE_MIN_FILTER,
+            pgraph_texture_min_filter_map[min_filter]);
+        glTexParameteri(binding->gl_target, GL_TEXTURE_MAG_FILTER,
+            pgraph_texture_mag_filter_map[mag_filter]);
+
+        /* Texture wrapping */
+        assert(addru < ARRAY_SIZE(pgraph_texture_addr_map));
+        glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_S,
+            pgraph_texture_addr_map[addru]);
+        if (dimensionality > 1) {
+            assert(addrv < ARRAY_SIZE(pgraph_texture_addr_map));
+            glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_T,
+                pgraph_texture_addr_map[addrv]);
+        }
+        if (dimensionality > 2) {
+            assert(addrp < ARRAY_SIZE(pgraph_texture_addr_map));
+            glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_R,
+                pgraph_texture_addr_map[addrp]);
+        }
+
+        /* FIXME: Only upload if necessary? [s, t or r = GL_CLAMP_TO_BORDER] */
+        if (border_source == NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR) {
+            GLfloat gl_border_color[] = {
+                /* FIXME: Color channels might be wrong order */
+                ((border_color >> 16) & 0xFF) / 255.0f, /* red */
+                ((border_color >> 8) & 0xFF) / 255.0f,  /* green */
+                (border_color & 0xFF) / 255.0f,         /* blue */
+                ((border_color >> 24) & 0xFF) / 255.0f  /* alpha */
+            };
+            glTexParameterfv(binding->gl_target, GL_TEXTURE_BORDER_COLOR,
+                gl_border_color);
+        }
+
+        if (pg->texture_binding[i]) {
+            texture_binding_destroy(pg->texture_binding[i]);
+        }
+        pg->texture_binding[i] = binding;
+        pg->texture_dirty[i] = false;
+    }
+    NV2A_GL_DGROUP_END();
+}
+
+static void pgraph_apply_anti_aliasing_factor(PGRAPHState *pg,
+                                              unsigned int *width,
+                                              unsigned int *height)
+{
+    switch (pg->surface_shape.anti_aliasing) {
+    case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_1:
+        break;
+    case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_CORNER_2:
+        if (width) { *width *= 2; }
+        break;
+    case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_SQUARE_OFFSET_4:
+        if (width) { *width *= 2; }
+        if (height) { *height *= 2; }
+        break;
+    default:
+        assert(false);
+        break;
+    }
+}
+
+static void pgraph_get_surface_dimensions(PGRAPHState *pg,
+                                          unsigned int *width,
+                                          unsigned int *height)
+{
+    bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE);
+    if (swizzle) {
+        *width = 1 << pg->surface_shape.log_width;
+        *height = 1 << pg->surface_shape.log_height;
+    } else {
+        *width = pg->surface_shape.clip_width;
+        *height = pg->surface_shape.clip_height;
+    }
+}
+
+/* hash and equality for shader cache hash table */
+static guint shader_hash(gconstpointer key)
+{
+    return fnv_hash(key, sizeof(ShaderState));
+}
+static gboolean shader_equal(gconstpointer a, gconstpointer b)
+{
+    const ShaderState *as = a, *bs = b;
+    return memcmp(as, bs, sizeof(ShaderState)) == 0;
+}
+
+static void pgraph_shader_update_constants(PGRAPHState *pg,
+                                           ShaderBinding *binding,
+                                           bool binding_changed,
+                                           bool vertex_program,
+                                           bool fixed_function)
+{
+    int i, j;
+
+    /* update combiner constants */
+    for (i = 0; i<= 8; i++) {
+        uint32_t constant[2];
+        if (i == 8) {
+            /* final combiner */
+            constant[0] = pg->regs[NV_PGRAPH_SPECFOGFACTOR0];
+            constant[1] = pg->regs[NV_PGRAPH_SPECFOGFACTOR1];
+        } else {
+            constant[0] = pg->regs[NV_PGRAPH_COMBINEFACTOR0 + i * 4];
+            constant[1] = pg->regs[NV_PGRAPH_COMBINEFACTOR1 + i * 4];
+        }
+
+        for (j = 0; j < 2; j++) {
+            GLint loc = binding->psh_constant_loc[i][j];
+            if (loc != -1) {
+                float value[4];
+                value[0] = (float) ((constant[j] >> 16) & 0xFF) / 255.0f;
+                value[1] = (float) ((constant[j] >> 8) & 0xFF) / 255.0f;
+                value[2] = (float) (constant[j] & 0xFF) / 255.0f;
+                value[3] = (float) ((constant[j] >> 24) & 0xFF) / 255.0f;
+
+                glUniform4fv(loc, 1, value);
+            }
+        }
+    }
+    if (binding->alpha_ref_loc != -1) {
+        float alpha_ref = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
+                                   NV_PGRAPH_CONTROL_0_ALPHAREF) / 255.0;
+        glUniform1f(binding->alpha_ref_loc, alpha_ref);
+    }
+
+
+    /* For each texture stage */
+    for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
+        // char name[32];
+        GLint loc;
+
+        /* Bump luminance only during stages 1 - 3 */
+        if (i > 0) {
+            loc = binding->bump_mat_loc[i];
+            if (loc != -1) {
+                glUniformMatrix2fv(loc, 1, GL_FALSE, pg->bump_env_matrix[i - 1]);
+            }
+            loc = binding->bump_scale_loc[i];
+            if (loc != -1) {
+                glUniform1f(loc, *(float*)&pg->regs[
+                                NV_PGRAPH_BUMPSCALE1 + (i - 1) * 4]);
+            }
+            loc = binding->bump_offset_loc[i];
+            if (loc != -1) {
+                glUniform1f(loc, *(float*)&pg->regs[
+                            NV_PGRAPH_BUMPOFFSET1 + (i - 1) * 4]);
+            }
+        }
+
+    }
+
+    if (binding->fog_color_loc != -1) {
+        uint32_t fog_color = pg->regs[NV_PGRAPH_FOGCOLOR];
+        glUniform4f(binding->fog_color_loc,
+                    GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_RED) / 255.0,
+                    GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_GREEN) / 255.0,
+                    GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_BLUE) / 255.0,
+                    GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_ALPHA) / 255.0);
+    }
+    if (binding->fog_param_loc[0] != -1) {
+        glUniform1f(binding->fog_param_loc[0],
+                    *(float*)&pg->regs[NV_PGRAPH_FOGPARAM0]);
+    }
+    if (binding->fog_param_loc[1] != -1) {
+        glUniform1f(binding->fog_param_loc[1],
+                    *(float*)&pg->regs[NV_PGRAPH_FOGPARAM1]);
+    }
+
+
+    float zclip_max = *(float*)&pg->regs[NV_PGRAPH_ZCLIPMAX];
+    float zclip_min = *(float*)&pg->regs[NV_PGRAPH_ZCLIPMIN];
+
+    if (fixed_function) {
+        /* update lighting constants */
+        struct {
+            uint32_t* v;
+            bool* dirty;
+            GLint* locs;
+            size_t len;
+        } lighting_arrays[] = {
+            {&pg->ltctxa[0][0], &pg->ltctxa_dirty[0], binding->ltctxa_loc, NV2A_LTCTXA_COUNT},
+            {&pg->ltctxb[0][0], &pg->ltctxb_dirty[0], binding->ltctxb_loc, NV2A_LTCTXB_COUNT},
+            {&pg->ltc1[0][0], &pg->ltc1_dirty[0], binding->ltc1_loc, NV2A_LTC1_COUNT},
+        };
+
+        for (i=0; i<ARRAY_SIZE(lighting_arrays); i++) {
+            uint32_t *lighting_v = lighting_arrays[i].v;
+            bool *lighting_dirty = lighting_arrays[i].dirty;
+            GLint *lighting_locs = lighting_arrays[i].locs;
+            size_t lighting_len = lighting_arrays[i].len;
+            for (j=0; j<lighting_len; j++) {
+                if (!lighting_dirty[j] && !binding_changed) continue;
+                GLint loc = lighting_locs[j];
+                if (loc != -1) {
+                    glUniform4fv(loc, 1, (const GLfloat*)&lighting_v[j*4]);
+                }
+                lighting_dirty[j] = false;
+            }
+        }
+
+
+        for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
+            GLint loc;
+            loc = binding->light_infinite_half_vector_loc[i];
+            if (loc != -1) {
+                glUniform3fv(loc, 1, pg->light_infinite_half_vector[i]);
+            }
+            loc = binding->light_infinite_direction_loc[i];
+            if (loc != -1) {
+                glUniform3fv(loc, 1, pg->light_infinite_direction[i]);
+            }
+
+            loc = binding->light_local_position_loc[i];
+            if (loc != -1) {
+                glUniform3fv(loc, 1, pg->light_local_position[i]);
+            }
+            loc = binding->light_local_attenuation_loc[i];
+            if (loc != -1) {
+                glUniform3fv(loc, 1, pg->light_local_attenuation[i]);
+            }
+        }
+
+        /* estimate the viewport by assuming it matches the surface ... */
+        //FIXME: Get surface dimensions?
+        float m11 = 0.5 * pg->surface_shape.clip_width;
+        float m22 = -0.5 * pg->surface_shape.clip_height;
+        float m33 = zclip_max - zclip_min;
+        //float m41 = m11;
+        //float m42 = -m22;
+        float m43 = zclip_min;
+        //float m44 = 1.0;
+
+        if (m33 == 0.0) {
+            m33 = 1.0;
+        }
+        float invViewport[16] = {
+            1.0/m11, 0, 0, 0,
+            0, 1.0/m22, 0, 0,
+            0, 0, 1.0/m33, 0,
+            -1.0, 1.0, -m43/m33, 1.0
+        };
+
+        if (binding->inv_viewport_loc != -1) {
+            glUniformMatrix4fv(binding->inv_viewport_loc,
+                               1, GL_FALSE, &invViewport[0]);
+        }
+
+    }
+
+    /* update vertex program constants */
+    for (i=0; i<NV2A_VERTEXSHADER_CONSTANTS; i++) {
+        if (!pg->vsh_constants_dirty[i] && !binding_changed) continue;
+
+        GLint loc = binding->vsh_constant_loc[i];
+        //assert(loc != -1);
+        if (loc != -1) {
+            glUniform4fv(loc, 1, (const GLfloat*)pg->vsh_constants[i]);
+        }
+        pg->vsh_constants_dirty[i] = false;
+    }
+
+    if (binding->surface_size_loc != -1) {
+        glUniform2f(binding->surface_size_loc, pg->surface_shape.clip_width,
+                    pg->surface_shape.clip_height);
+    }
+
+    if (binding->clip_range_loc != -1) {
+        glUniform2f(binding->clip_range_loc, zclip_min, zclip_max);
+    }
+
+}
+
+static void pgraph_bind_shaders(PGRAPHState *pg)
+{
+    int i, j;
+
+    bool vertex_program = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D],
+                                   NV_PGRAPH_CSV0_D_MODE) == 2;
+
+    bool fixed_function = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D],
+                                   NV_PGRAPH_CSV0_D_MODE) == 0;
+
+    int program_start = GET_MASK(pg->regs[NV_PGRAPH_CSV0_C],
+                                 NV_PGRAPH_CSV0_C_CHEOPS_PROGRAM_START);
+
+    NV2A_GL_DGROUP_BEGIN("%s (VP: %s FFP: %s)", __func__,
+                         vertex_program ? "yes" : "no",
+                         fixed_function ? "yes" : "no");
+
+    ShaderBinding* old_binding = pg->shader_binding;
+
+    ShaderState state = {
+        .psh = (PshState){
+            /* register combier stuff */
+            .combiner_control = pg->regs[NV_PGRAPH_COMBINECTL],
+            .shader_stage_program = pg->regs[NV_PGRAPH_SHADERPROG],
+            .other_stage_input = pg->regs[NV_PGRAPH_SHADERCTL],
+            .final_inputs_0 = pg->regs[NV_PGRAPH_COMBINESPECFOG0],
+            .final_inputs_1 = pg->regs[NV_PGRAPH_COMBINESPECFOG1],
+
+            .alpha_test = pg->regs[NV_PGRAPH_CONTROL_0]
+                            & NV_PGRAPH_CONTROL_0_ALPHATESTENABLE,
+            .alpha_func = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
+                                   NV_PGRAPH_CONTROL_0_ALPHAFUNC),
+        },
+
+        /* fixed function stuff */
+        .skinning = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D],
+                             NV_PGRAPH_CSV0_D_SKIN),
+        .lighting = GET_MASK(pg->regs[NV_PGRAPH_CSV0_C],
+                             NV_PGRAPH_CSV0_C_LIGHTING),
+        .normalization = pg->regs[NV_PGRAPH_CSV0_C]
+                           & NV_PGRAPH_CSV0_C_NORMALIZATION_ENABLE,
+
+        .fixed_function = fixed_function,
+
+        /* vertex program stuff */
+        .vertex_program = vertex_program,
+        .z_perspective = pg->regs[NV_PGRAPH_CONTROL_0]
+                            & NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE,
+
+        /* geometry shader stuff */
+        .primitive_mode = pg->primitive_mode,
+        .polygon_front_mode = GET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
+                                       NV_PGRAPH_SETUPRASTER_FRONTFACEMODE),
+        .polygon_back_mode = GET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
+                                      NV_PGRAPH_SETUPRASTER_BACKFACEMODE),
+    };
+
+    state.program_length = 0;
+    memset(state.program_data, 0, sizeof(state.program_data));
+
+    if (vertex_program) {
+        // copy in vertex program tokens
+        for (i = program_start; i < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH; i++) {
+            uint32_t *cur_token = (uint32_t*)&pg->program_data[i];
+            memcpy(&state.program_data[state.program_length],
+                   cur_token,
+                   VSH_TOKEN_SIZE * sizeof(uint32_t));
+            state.program_length++;
+
+            if (vsh_get_field(cur_token, FLD_FINAL)) {
+                break;
+            }
+        }
+    }
+
+    /* Texgen */
+    for (i = 0; i < 4; i++) {
+        unsigned int reg = (i < 2) ? NV_PGRAPH_CSV1_A : NV_PGRAPH_CSV1_B;
+        for (j = 0; j < 4; j++) {
+            unsigned int masks[] = {
+                (i % 2) ? NV_PGRAPH_CSV1_A_T1_S : NV_PGRAPH_CSV1_A_T0_S,
+                (i % 2) ? NV_PGRAPH_CSV1_A_T1_T : NV_PGRAPH_CSV1_A_T0_T,
+                (i % 2) ? NV_PGRAPH_CSV1_A_T1_R : NV_PGRAPH_CSV1_A_T0_R,
+                (i % 2) ? NV_PGRAPH_CSV1_A_T1_Q : NV_PGRAPH_CSV1_A_T0_Q
+            };
+            state.texgen[i][j] = GET_MASK(pg->regs[reg], masks[j]);
+        }
+    }
+
+    /* Fog */
+    state.fog_enable = pg->regs[NV_PGRAPH_CONTROL_3]
+                           & NV_PGRAPH_CONTROL_3_FOGENABLE;
+    if (state.fog_enable) {
+        /*FIXME: Use CSV0_D? */
+        state.fog_mode = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_3],
+                                  NV_PGRAPH_CONTROL_3_FOG_MODE);
+        state.foggen = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D],
+                                NV_PGRAPH_CSV0_D_FOGGENMODE);
+    } else {
+        /* FIXME: Do we still pass the fogmode? */
+        state.fog_mode = 0;
+        state.foggen = 0;
+    }
+
+    /* Texture matrices */
+    for (i = 0; i < 4; i++) {
+        state.texture_matrix_enable[i] = pg->texture_matrix_enable[i];
+    }
+
+    /* Lighting */
+    if (state.lighting) {
+        for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
+            state.light[i] = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D],
+                                      NV_PGRAPH_CSV0_D_LIGHT0 << (i * 2));
+        }
+    }
+
+    for (i = 0; i < 8; i++) {
+        state.psh.rgb_inputs[i] = pg->regs[NV_PGRAPH_COMBINECOLORI0 + i * 4];
+        state.psh.rgb_outputs[i] = pg->regs[NV_PGRAPH_COMBINECOLORO0 + i * 4];
+        state.psh.alpha_inputs[i] = pg->regs[NV_PGRAPH_COMBINEALPHAI0 + i * 4];
+        state.psh.alpha_outputs[i] = pg->regs[NV_PGRAPH_COMBINEALPHAO0 + i * 4];
+        //constant_0[i] = pg->regs[NV_PGRAPH_COMBINEFACTOR0 + i * 4];
+        //constant_1[i] = pg->regs[NV_PGRAPH_COMBINEFACTOR1 + i * 4];
+    }
+
+    for (i = 0; i < 4; i++) {
+        state.psh.rect_tex[i] = false;
+        bool enabled = pg->regs[NV_PGRAPH_TEXCTL0_0 + i*4]
+                         & NV_PGRAPH_TEXCTL0_0_ENABLE;
+        unsigned int color_format =
+            GET_MASK(pg->regs[NV_PGRAPH_TEXFMT0 + i*4],
+                     NV_PGRAPH_TEXFMT0_COLOR);
+
+        if (enabled && kelvin_color_format_map[color_format].linear) {
+            state.psh.rect_tex[i] = true;
+        }
+
+        for (j = 0; j < 4; j++) {
+            state.psh.compare_mode[i][j] =
+                (pg->regs[NV_PGRAPH_SHADERCLIPMODE] >> (4 * i + j)) & 1;
+        }
+        state.psh.alphakill[i] = pg->regs[NV_PGRAPH_TEXCTL0_0 + i*4]
+                               & NV_PGRAPH_TEXCTL0_0_ALPHAKILLEN;
+    }
+
+    ShaderBinding* cached_shader = g_hash_table_lookup(pg->shader_cache, &state);
+    if (cached_shader) {
+        pg->shader_binding = cached_shader;
+    } else {
+        pg->shader_binding = generate_shaders(state);
+
+        /* cache it */
+        ShaderState *cache_state = g_malloc(sizeof(*cache_state));
+        memcpy(cache_state, &state, sizeof(*cache_state));
+        g_hash_table_insert(pg->shader_cache, cache_state,
+                            (gpointer)pg->shader_binding);
+    }
+
+    bool binding_changed = (pg->shader_binding != old_binding);
+
+    glUseProgram(pg->shader_binding->gl_program);
+
+    pgraph_shader_update_constants(pg, pg->shader_binding, binding_changed,
+                                   vertex_program, fixed_function);
+
+    NV2A_GL_DGROUP_END();
+}
+
+static bool pgraph_framebuffer_dirty(PGRAPHState *pg)
+{
+    bool shape_changed = memcmp(&pg->surface_shape, &pg->last_surface_shape,
+                                sizeof(SurfaceShape)) != 0;
+    if (!shape_changed || (!pg->surface_shape.color_format
+            && !pg->surface_shape.zeta_format)) {
+        return false;
+    }
+    return true;
+}
+
+static bool pgraph_color_write_enabled(PGRAPHState *pg)
+{
+    return pg->regs[NV_PGRAPH_CONTROL_0] & (
+        NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE
+        | NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE
+        | NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE
+        | NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE);
+}
+
+static bool pgraph_zeta_write_enabled(PGRAPHState *pg)
+{
+    return pg->regs[NV_PGRAPH_CONTROL_0] & (
+        NV_PGRAPH_CONTROL_0_ZWRITEENABLE
+        | NV_PGRAPH_CONTROL_0_STENCIL_WRITE_ENABLE);
+}
+
+static void pgraph_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta)
+{
+    NV2A_DPRINTF("pgraph_set_surface_dirty(%d, %d) -- %d %d\n",
+                 color, zeta,
+                 pgraph_color_write_enabled(pg), pgraph_zeta_write_enabled(pg));
+    /* FIXME: Does this apply to CLEARs too? */
+    color = color && pgraph_color_write_enabled(pg);
+    zeta = zeta && pgraph_zeta_write_enabled(pg);
+    pg->surface_color.draw_dirty |= color;
+    pg->surface_zeta.draw_dirty |= zeta;
+}
+
+static void pgraph_update_surface_part(NV2AState *d, bool upload, bool color) {
+    PGRAPHState *pg = &d->pgraph;
+
+    unsigned int width, height;
+    pgraph_get_surface_dimensions(pg, &width, &height);
+    pgraph_apply_anti_aliasing_factor(pg, &width, &height);
+
+    Surface *surface;
+    hwaddr dma_address;
+    GLuint *gl_buffer;
+    unsigned int bytes_per_pixel;
+    GLenum gl_internal_format, gl_format, gl_type, gl_attachment;
+
+    if (color) {
+        surface = &pg->surface_color;
+        dma_address = pg->dma_color;
+        gl_buffer = &pg->gl_color_buffer;
+
+        assert(pg->surface_shape.color_format != 0);
+        assert(pg->surface_shape.color_format
+                < ARRAY_SIZE(kelvin_surface_color_format_map));
+        SurfaceColorFormatInfo f =
+            kelvin_surface_color_format_map[pg->surface_shape.color_format];
+        if (f.bytes_per_pixel == 0) {
+            fprintf(stderr, "nv2a: unimplemented color surface format 0x%x\n",
+                    pg->surface_shape.color_format);
+            abort();
+        }
+
+        bytes_per_pixel = f.bytes_per_pixel;
+        gl_internal_format = f.gl_internal_format;
+        gl_format = f.gl_format;
+        gl_type = f.gl_type;
+        gl_attachment = GL_COLOR_ATTACHMENT0;
+
+    } else {
+        surface = &pg->surface_zeta;
+        dma_address = pg->dma_zeta;
+        gl_buffer = &pg->gl_zeta_buffer;
+
+        assert(pg->surface_shape.zeta_format != 0);
+        switch (pg->surface_shape.zeta_format) {
+        case NV097_SET_SURFACE_FORMAT_ZETA_Z16:
+            bytes_per_pixel = 2;
+            gl_format = GL_DEPTH_COMPONENT;
+            gl_attachment = GL_DEPTH_ATTACHMENT;
+            if (pg->surface_shape.z_format) {
+                gl_type = GL_HALF_FLOAT;
+                gl_internal_format = GL_DEPTH_COMPONENT32F;
+            } else {
+                gl_type = GL_UNSIGNED_SHORT;
+                gl_internal_format = GL_DEPTH_COMPONENT16;
+            }
+            break;
+        case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8:
+            bytes_per_pixel = 4;
+            gl_format = GL_DEPTH_STENCIL;
+            gl_attachment = GL_DEPTH_STENCIL_ATTACHMENT;
+            if (pg->surface_shape.z_format) {
+                assert(false);
+                gl_type = GL_FLOAT_32_UNSIGNED_INT_24_8_REV;
+                gl_internal_format = GL_DEPTH32F_STENCIL8;
+            } else {
+                gl_type = GL_UNSIGNED_INT_24_8;
+                gl_internal_format = GL_DEPTH24_STENCIL8;
+            }
+            break;
+        default:
+            assert(false);
+            break;
+        }
+    }
+
+
+    DMAObject dma = nv_dma_load(d, dma_address);
+    /* There's a bunch of bugs that could cause us to hit this function
+     * at the wrong time and get a invalid dma object.
+     * Check that it's sane. */
+    assert(dma.dma_class == NV_DMA_IN_MEMORY_CLASS);
+
+    assert(dma.address + surface->offset != 0);
+    assert(surface->offset <= dma.limit);
+    assert(surface->offset + surface->pitch * height <= dma.limit + 1);
+
+    hwaddr data_len;
+    uint8_t *data = nv_dma_map(d, dma_address, &data_len);
+
+    /* TODO */
+    // assert(pg->surface_clip_x == 0 && pg->surface_clip_y == 0);
+
+    bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE);
+
+    uint8_t *buf = data + surface->offset;
+    if (swizzle) {
+        buf = g_malloc(height * surface->pitch);
+    }
+
+    bool dirty = surface->buffer_dirty;
+    if (color) {
+        dirty |= memory_region_test_and_clear_dirty(d->vram,
+                                               dma.address + surface->offset,
+                                               surface->pitch * height,
+                                               DIRTY_MEMORY_NV2A);
+    }
+    if (upload && dirty) {
+        /* surface modified (or moved) by the cpu.
+         * copy it into the opengl renderbuffer */
+        assert(!surface->draw_dirty);
+
+        assert(surface->pitch % bytes_per_pixel == 0);
+
+        if (swizzle) {
+            unswizzle_rect(data + surface->offset,
+                           width, height,
+                           buf,
+                           surface->pitch,
+                           bytes_per_pixel);
+        }
+
+        if (!color) {
+            /* need to clear the depth_stencil and depth attachment for zeta */
+            glFramebufferTexture2D(GL_FRAMEBUFFER,
+                                   GL_DEPTH_ATTACHMENT,
+                                   GL_TEXTURE_2D,
+                                   0, 0);
+            glFramebufferTexture2D(GL_FRAMEBUFFER,
+                                   GL_DEPTH_STENCIL_ATTACHMENT,
+                                   GL_TEXTURE_2D,
+                                   0, 0);
+        }
+
+        glFramebufferTexture2D(GL_FRAMEBUFFER,
+                               gl_attachment,
+                               GL_TEXTURE_2D,
+                               0, 0);
+
+        if (*gl_buffer) {
+            glDeleteTextures(1, gl_buffer);
+            *gl_buffer = 0;
+        }
+
+        glGenTextures(1, gl_buffer);
+        glBindTexture(GL_TEXTURE_2D, *gl_buffer);
+
+        /* This is VRAM so we can't do this inplace! */
+        uint8_t *flipped_buf = g_malloc(width * height * bytes_per_pixel);
+        unsigned int irow;
+        for (irow = 0; irow < height; irow++) {
+            memcpy(&flipped_buf[width * (height - irow - 1)
+                                     * bytes_per_pixel],
+                   &buf[surface->pitch * irow],
+                   width * bytes_per_pixel);
+        }
+
+        glTexImage2D(GL_TEXTURE_2D, 0, gl_internal_format,
+                     width, height, 0,
+                     gl_format, gl_type,
+                     flipped_buf);
+
+        g_free(flipped_buf);
+
+        glFramebufferTexture2D(GL_FRAMEBUFFER,
+                               gl_attachment,
+                               GL_TEXTURE_2D,
+                               *gl_buffer, 0);
+
+        assert(glCheckFramebufferStatus(GL_FRAMEBUFFER)
+            == GL_FRAMEBUFFER_COMPLETE);
+
+        if (color) {
+            pgraph_update_memory_buffer(d, dma.address + surface->offset,
+                                        surface->pitch * height, true);
+        }
+        surface->buffer_dirty = false;
+
+
+        uint8_t *out = data + surface->offset + 64;
+        NV2A_DPRINTF("upload_surface %s 0x%" HWADDR_PRIx " - 0x%" HWADDR_PRIx ", "
+                      "(0x%" HWADDR_PRIx " - 0x%" HWADDR_PRIx ", "
+                        "%d %d, %d %d, %d) - %x %x %x %x\n",
+            color ? "color" : "zeta",
+            dma.address, dma.address + dma.limit,
+            dma.address + surface->offset,
+            dma.address + surface->pitch * height,
+            pg->surface_shape.clip_x, pg->surface_shape.clip_y,
+            pg->surface_shape.clip_width,
+            pg->surface_shape.clip_height,
+            surface->pitch,
+            out[0], out[1], out[2], out[3]);
+
+    }
+
+    if (!upload && surface->draw_dirty) {
+        /* read the opengl framebuffer into the surface */
+
+        glo_readpixels(gl_format, gl_type,
+                       bytes_per_pixel, surface->pitch,
+                       width, height,
+                       buf);
+        assert(glGetError() == GL_NO_ERROR);
+
+        if (swizzle) {
+            swizzle_rect(buf,
+                         width, height,
+                         data + surface->offset,
+                         surface->pitch,
+                         bytes_per_pixel);
+        }
+
+        memory_region_set_client_dirty(d->vram,
+                                       dma.address + surface->offset,
+                                       surface->pitch * height,
+                                       DIRTY_MEMORY_VGA);
+
+        if (color) {
+            pgraph_update_memory_buffer(d, dma.address + surface->offset,
+                                        surface->pitch * height, true);
+        }
+
+        surface->draw_dirty = false;
+        surface->write_enabled_cache = false;
+
+        uint8_t *out = data + surface->offset + 64;
+        NV2A_DPRINTF("read_surface %s 0x%" HWADDR_PRIx " - 0x%" HWADDR_PRIx ", "
+                      "(0x%" HWADDR_PRIx " - 0x%" HWADDR_PRIx ", "
+                        "%d %d, %d %d, %d) - %x %x %x %x\n",
+            color ? "color" : "zeta",
+            dma.address, dma.address + dma.limit,
+            dma.address + surface->offset,
+            dma.address + surface->pitch * pg->surface_shape.clip_height,
+            pg->surface_shape.clip_x, pg->surface_shape.clip_y,
+            pg->surface_shape.clip_width, pg->surface_shape.clip_height,
+            surface->pitch,
+            out[0], out[1], out[2], out[3]);
+
+    }
+
+    if (swizzle) {
+        g_free(buf);
+    }
+}
+
+static void pgraph_update_surface(NV2AState *d, bool upload,
+                                  bool color_write, bool zeta_write)
+{
+    PGRAPHState *pg = &d->pgraph;
+
+    pg->surface_shape.z_format = GET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
+                                          NV_PGRAPH_SETUPRASTER_Z_FORMAT);
+
+    /* FIXME: Does this apply to CLEARs too? */
+    color_write = color_write && pgraph_color_write_enabled(pg);
+    zeta_write = zeta_write && pgraph_zeta_write_enabled(pg);
+
+    if (upload && pgraph_framebuffer_dirty(pg)) {
+        assert(!pg->surface_color.draw_dirty);
+        assert(!pg->surface_zeta.draw_dirty);
+
+        pg->surface_color.buffer_dirty = true;
+        pg->surface_zeta.buffer_dirty = true;
+
+        glFramebufferTexture2D(GL_FRAMEBUFFER,
+                               GL_COLOR_ATTACHMENT0,
+                               GL_TEXTURE_2D,
+                               0, 0);
+
+        if (pg->gl_color_buffer) {
+            glDeleteTextures(1, &pg->gl_color_buffer);
+            pg->gl_color_buffer = 0;
+        }
+
+        glFramebufferTexture2D(GL_FRAMEBUFFER,
+                               GL_DEPTH_ATTACHMENT,
+                               GL_TEXTURE_2D,
+                               0, 0);
+        glFramebufferTexture2D(GL_FRAMEBUFFER,
+                               GL_DEPTH_STENCIL_ATTACHMENT,
+                               GL_TEXTURE_2D,
+                               0, 0);
+
+        if (pg->gl_zeta_buffer) {
+            glDeleteTextures(1, &pg->gl_zeta_buffer);
+            pg->gl_zeta_buffer = 0;
+        }
+
+        memcpy(&pg->last_surface_shape, &pg->surface_shape,
+               sizeof(SurfaceShape));
+    }
+
+    if ((color_write || (!upload && pg->surface_color.write_enabled_cache))
+        && (upload || pg->surface_color.draw_dirty)) {
+        pgraph_update_surface_part(d, upload, true);
+    }
+
+
+    if ((zeta_write || (!upload && pg->surface_zeta.write_enabled_cache))
+        && (upload || pg->surface_zeta.draw_dirty)) {
+        pgraph_update_surface_part(d, upload, false);
+    }
+}
+
+
+static void pgraph_init(NV2AState *d)
+{
+    int i;
+
+    PGRAPHState *pg = &d->pgraph;
+
+    qemu_mutex_init(&pg->lock);
+    qemu_cond_init(&pg->interrupt_cond);
+    qemu_cond_init(&pg->fifo_access_cond);
+    qemu_cond_init(&pg->flip_3d);
+
+    /* fire up opengl */
+
+    pg->gl_context = glo_context_create();
+    assert(pg->gl_context);
+
+#ifdef DEBUG_NV2A_GL
+    glEnable(GL_DEBUG_OUTPUT);
+#endif
+
+    glextensions_init();
+
+    /* DXT textures */
+    assert(glo_check_extension("GL_EXT_texture_compression_s3tc"));
+    /*  Internal RGB565 texture format */
+    assert(glo_check_extension("GL_ARB_ES2_compatibility"));
+
+    GLint max_vertex_attributes;
+    glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_vertex_attributes);
+    assert(max_vertex_attributes >= NV2A_VERTEXSHADER_ATTRIBUTES);
+
+
+    glGenFramebuffers(1, &pg->gl_framebuffer);
+    glBindFramebuffer(GL_FRAMEBUFFER, pg->gl_framebuffer);
+
+    /* need a valid framebuffer to start with */
+    glGenTextures(1, &pg->gl_color_buffer);
+    glBindTexture(GL_TEXTURE_2D, pg->gl_color_buffer);
+    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 640, 480,
+                 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
+                           GL_TEXTURE_2D, pg->gl_color_buffer, 0);
+
+    assert(glCheckFramebufferStatus(GL_FRAMEBUFFER)
+            == GL_FRAMEBUFFER_COMPLETE);
+
+    //glPolygonMode( GL_FRONT_AND_BACK, GL_LINE );
+
+    pg->texture_cache = g_lru_cache_new(
+        texture_key_hash, texture_key_equal,
+        NULL, texture_key_retrieve,
+        texture_key_destroy, texture_binding_destroy,
+        NULL, NULL);
+    g_lru_cache_set_max_size(pg->texture_cache, 512);
+
+    pg->shader_cache = g_hash_table_new(shader_hash, shader_equal);
+
+
+    for (i=0; i<NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+        glGenBuffers(1, &pg->vertex_attributes[i].gl_converted_buffer);
+        glGenBuffers(1, &pg->vertex_attributes[i].gl_inline_buffer);
+    }
+    glGenBuffers(1, &pg->gl_inline_array_buffer);
+    glGenBuffers(1, &pg->gl_element_buffer);
+
+    glGenBuffers(1, &pg->gl_memory_buffer);
+    glBindBuffer(GL_ARRAY_BUFFER, pg->gl_memory_buffer);
+    glBufferData(GL_ARRAY_BUFFER,
+                 memory_region_size(d->vram),
+                 NULL,
+                 GL_DYNAMIC_DRAW);
+
+    glGenVertexArrays(1, &pg->gl_vertex_array);
+    glBindVertexArray(pg->gl_vertex_array);
+
+    assert(glGetError() == GL_NO_ERROR);
+
+    glo_set_current(NULL);
+}
+
+static void pgraph_destroy(PGRAPHState *pg)
+{
+    qemu_mutex_destroy(&pg->lock);
+    qemu_cond_destroy(&pg->interrupt_cond);
+    qemu_cond_destroy(&pg->fifo_access_cond);
+    qemu_cond_destroy(&pg->flip_3d);
+
+    glo_set_current(pg->gl_context);
+
+    if (pg->gl_color_buffer) {
+        glDeleteTextures(1, &pg->gl_color_buffer);
+    }
+    if (pg->gl_zeta_buffer) {
+        glDeleteTextures(1, &pg->gl_zeta_buffer);
+    }
+    glDeleteFramebuffers(1, &pg->gl_framebuffer);
+
+    // TODO: clear out shader cached
+    // TODO: clear out texture cache
+
+    glo_set_current(NULL);
+
+    glo_context_destroy(pg->gl_context);
+}
+
+static unsigned int kelvin_map_stencil_op(uint32_t parameter)
+{
+    unsigned int op;
+    switch (parameter) {
+    case NV097_SET_STENCIL_OP_V_KEEP:
+        op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_KEEP; break;
+    case NV097_SET_STENCIL_OP_V_ZERO:
+        op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_ZERO; break;
+    case NV097_SET_STENCIL_OP_V_REPLACE:
+        op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_REPLACE; break;
+    case NV097_SET_STENCIL_OP_V_INCRSAT:
+        op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INCRSAT; break;
+    case NV097_SET_STENCIL_OP_V_DECRSAT:
+        op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_DECRSAT; break;
+    case NV097_SET_STENCIL_OP_V_INVERT:
+        op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INVERT; break;
+    case NV097_SET_STENCIL_OP_V_INCR:
+        op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INCR; break;
+    case NV097_SET_STENCIL_OP_V_DECR:
+        op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_DECR; break;
+    default:
+        assert(false);
+        break;
+    }
+    return op;
+}
+
+static unsigned int kelvin_map_polygon_mode(uint32_t parameter)
+{
+    unsigned int mode;
+    switch (parameter) {
+    case NV097_SET_FRONT_POLYGON_MODE_V_POINT:
+        mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_POINT; break;
+    case NV097_SET_FRONT_POLYGON_MODE_V_LINE:
+        mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_LINE; break;
+    case NV097_SET_FRONT_POLYGON_MODE_V_FILL:
+        mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_FILL; break;
+    default:
+        assert(false);
+        break;
+    }
+    return mode;
+}
+
+static unsigned int kelvin_map_texgen(uint32_t parameter, unsigned int channel)
+{
+    assert(channel < 4);
+    unsigned int texgen;
+    switch (parameter) {
+    case NV097_SET_TEXGEN_S_DISABLE:
+        texgen = NV_PGRAPH_CSV1_A_T0_S_DISABLE; break;
+    case NV097_SET_TEXGEN_S_EYE_LINEAR:
+        texgen = NV_PGRAPH_CSV1_A_T0_S_EYE_LINEAR; break;
+    case NV097_SET_TEXGEN_S_OBJECT_LINEAR:
+        texgen = NV_PGRAPH_CSV1_A_T0_S_OBJECT_LINEAR; break;
+    case NV097_SET_TEXGEN_S_SPHERE_MAP:
+        assert(channel < 2);
+        texgen = NV_PGRAPH_CSV1_A_T0_S_SPHERE_MAP; break;
+    case NV097_SET_TEXGEN_S_REFLECTION_MAP:
+        assert(channel < 3);
+        texgen = NV_PGRAPH_CSV1_A_T0_S_REFLECTION_MAP; break;
+    case NV097_SET_TEXGEN_S_NORMAL_MAP:
+        assert(channel < 3);
+        texgen = NV_PGRAPH_CSV1_A_T0_S_NORMAL_MAP; break;
+    default:
+        assert(false);
+        break;
+    }
+    return texgen;
+}
+
+static void pgraph_allocate_inline_buffer_vertices(PGRAPHState *pg,
+                                                   unsigned int attr)
+{
+    int i;
+    VertexAttribute *attribute = &pg->vertex_attributes[attr];
+
+    if (attribute->inline_buffer || pg->inline_buffer_length == 0) {
+        return;
+    }
+
+    /* Now upload the previous attribute value */
+    attribute->inline_buffer = g_malloc(NV2A_MAX_BATCH_LENGTH
+                                                  * sizeof(float) * 4);
+    for (i = 0; i < pg->inline_buffer_length; i++) {
+        memcpy(&attribute->inline_buffer[i * 4],
+               attribute->inline_value,
+               sizeof(float) * 4);
+    }
+}
+
+static void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg)
+{
+    int i;
+
+    assert(pg->inline_buffer_length < NV2A_MAX_BATCH_LENGTH);
+
+    for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+        VertexAttribute *attribute = &pg->vertex_attributes[i];
+        if (attribute->inline_buffer) {
+            memcpy(&attribute->inline_buffer[
+                      pg->inline_buffer_length * 4],
+                   attribute->inline_value,
+                   sizeof(float) * 4);
+        }
+    }
+
+    pg->inline_buffer_length++;
+}
+
+static void pgraph_method(NV2AState *d,
+                          unsigned int subchannel,
+                          unsigned int method,
+                          uint32_t parameter)
+{
+    int i;
+    unsigned int slot;
+
+    PGRAPHState *pg = &d->pgraph;
+
+    bool channel_valid =
+        d->pgraph.regs[NV_PGRAPH_CTX_CONTROL] & NV_PGRAPH_CTX_CONTROL_CHID;
+    assert(channel_valid);
+
+    unsigned channel_id = GET_MASK(pg->regs[NV_PGRAPH_CTX_USER], NV_PGRAPH_CTX_USER_CHID);
+
+    ContextSurfaces2DState *context_surfaces_2d = &pg->context_surfaces_2d;
+    ImageBlitState *image_blit = &pg->image_blit;
+    KelvinState *kelvin = &pg->kelvin;
+
+    assert(subchannel < 8);
+
+    if (method == NV_SET_OBJECT) {
+        assert(parameter < memory_region_size(&d->ramin));
+        uint8_t *obj_ptr = d->ramin_ptr + parameter;
+
+        uint32_t ctx_1 = ldl_le_p((uint32_t*)obj_ptr);
+        uint32_t ctx_2 = ldl_le_p((uint32_t*)(obj_ptr+4));
+        uint32_t ctx_3 = ldl_le_p((uint32_t*)(obj_ptr+8));
+        uint32_t ctx_4 = ldl_le_p((uint32_t*)(obj_ptr+12));
+        uint32_t ctx_5 = parameter;
+
+        pg->regs[NV_PGRAPH_CTX_CACHE1 + subchannel * 4] = ctx_1;
+        pg->regs[NV_PGRAPH_CTX_CACHE2 + subchannel * 4] = ctx_2;
+        pg->regs[NV_PGRAPH_CTX_CACHE3 + subchannel * 4] = ctx_3;
+        pg->regs[NV_PGRAPH_CTX_CACHE4 + subchannel * 4] = ctx_4;
+        pg->regs[NV_PGRAPH_CTX_CACHE5 + subchannel * 4] = ctx_5;
+    }
+
+    // is this right?
+    pg->regs[NV_PGRAPH_CTX_SWITCH1] = pg->regs[NV_PGRAPH_CTX_CACHE1 + subchannel * 4];
+    pg->regs[NV_PGRAPH_CTX_SWITCH2] = pg->regs[NV_PGRAPH_CTX_CACHE2 + subchannel * 4];
+    pg->regs[NV_PGRAPH_CTX_SWITCH3] = pg->regs[NV_PGRAPH_CTX_CACHE3 + subchannel * 4];
+    pg->regs[NV_PGRAPH_CTX_SWITCH4] = pg->regs[NV_PGRAPH_CTX_CACHE4 + subchannel * 4];
+    pg->regs[NV_PGRAPH_CTX_SWITCH5] = pg->regs[NV_PGRAPH_CTX_CACHE5 + subchannel * 4];
+
+    uint32_t graphics_class = GET_MASK(pg->regs[NV_PGRAPH_CTX_SWITCH1],
+                                       NV_PGRAPH_CTX_SWITCH1_GRCLASS);
+
+    // NV2A_DPRINTF("graphics_class %d 0x%x\n", subchannel, graphics_class);
+    pgraph_method_log(subchannel, graphics_class, method, parameter);
+
+    if (subchannel != 0) {
+        // catches context switching issues on xbox d3d
+        assert(graphics_class != 0x97);
+    }
+
+    /* ugly switch for now */
+    switch (graphics_class) {
+
+    case NV_CONTEXT_SURFACES_2D: { switch (method) {
+    case NV062_SET_OBJECT:
+        context_surfaces_2d->object_instance = parameter;
+        break;
+
+    case NV062_SET_CONTEXT_DMA_IMAGE_SOURCE:
+        context_surfaces_2d->dma_image_source = parameter;
+        break;
+    case NV062_SET_CONTEXT_DMA_IMAGE_DESTIN:
+        context_surfaces_2d->dma_image_dest = parameter;
+        break;
+    case NV062_SET_COLOR_FORMAT:
+        context_surfaces_2d->color_format = parameter;
+        break;
+    case NV062_SET_PITCH:
+        context_surfaces_2d->source_pitch = parameter & 0xFFFF;
+        context_surfaces_2d->dest_pitch = parameter >> 16;
+        break;
+    case NV062_SET_OFFSET_SOURCE:
+        context_surfaces_2d->source_offset = parameter & 0x07FFFFFF;
+        break;
+    case NV062_SET_OFFSET_DESTIN:
+        context_surfaces_2d->dest_offset = parameter & 0x07FFFFFF;
+        break;
+    } break; }
+
+    case NV_IMAGE_BLIT: { switch (method) {
+    case NV09F_SET_OBJECT:
+        image_blit->object_instance = parameter;
+        break;
+
+    case NV09F_SET_CONTEXT_SURFACES:
+        image_blit->context_surfaces = parameter;
+        break;
+    case NV09F_SET_OPERATION:
+        image_blit->operation = parameter;
+        break;
+    case NV09F_CONTROL_POINT_IN:
+        image_blit->in_x = parameter & 0xFFFF;
+        image_blit->in_y = parameter >> 16;
+        break;
+    case NV09F_CONTROL_POINT_OUT:
+        image_blit->out_x = parameter & 0xFFFF;
+        image_blit->out_y = parameter >> 16;
+        break;
+    case NV09F_SIZE:
+        image_blit->width = parameter & 0xFFFF;
+        image_blit->height = parameter >> 16;
+
+        /* I guess this kicks it off? */
+        if (image_blit->operation == NV09F_SET_OPERATION_SRCCOPY) {
+
+            NV2A_GL_DPRINTF(true, "NV09F_SET_OPERATION_SRCCOPY");
+
+            ContextSurfaces2DState *context_surfaces = context_surfaces_2d;
+            assert(context_surfaces->object_instance
+                    == image_blit->context_surfaces);
+
+            unsigned int bytes_per_pixel;
+            switch (context_surfaces->color_format) {
+            case NV062_SET_COLOR_FORMAT_LE_Y8:
+                bytes_per_pixel = 1;
+                break;
+            case NV062_SET_COLOR_FORMAT_LE_R5G6B5:
+                bytes_per_pixel = 2;
+                break;
+            case NV062_SET_COLOR_FORMAT_LE_A8R8G8B8:
+                bytes_per_pixel = 4;
+                break;
+            default:
+                fprintf(stderr, "Unknown blit surface format: 0x%x\n", context_surfaces->color_format);
+                assert(false);
+                break;
+            }
+
+            hwaddr source_dma_len, dest_dma_len;
+            uint8_t *source, *dest;
+
+            source = nv_dma_map(d, context_surfaces->dma_image_source,
+                                &source_dma_len);
+            assert(context_surfaces->source_offset < source_dma_len);
+            source += context_surfaces->source_offset;
+
+            dest = nv_dma_map(d, context_surfaces->dma_image_dest,
+                              &dest_dma_len);
+            assert(context_surfaces->dest_offset < dest_dma_len);
+            dest += context_surfaces->dest_offset;
+
+            NV2A_DPRINTF("  - 0x%tx -> 0x%tx\n", source - d->vram_ptr,
+                                                 dest - d->vram_ptr);
+
+            int y;
+            for (y=0; y<image_blit->height; y++) {
+                uint8_t *source_row = source
+                    + (image_blit->in_y + y) * context_surfaces->source_pitch
+                    + image_blit->in_x * bytes_per_pixel;
+
+                uint8_t *dest_row = dest
+                    + (image_blit->out_y + y) * context_surfaces->dest_pitch
+                    + image_blit->out_x * bytes_per_pixel;
+
+                memmove(dest_row, source_row,
+                        image_blit->width * bytes_per_pixel);
+            }
+
+        } else {
+            assert(false);
+        }
+
+        break;
+    } break; }
+
+
+    case NV_KELVIN_PRIMITIVE: { switch (method) {
+    case NV097_SET_OBJECT:
+        kelvin->object_instance = parameter;
+        break;
+
+    case NV097_NO_OPERATION:
+        /* The bios uses nop as a software method call -
+         * it seems to expect a notify interrupt if the parameter isn't 0.
+         * According to a nouveau guy it should still be a nop regardless
+         * of the parameter. It's possible a debug register enables this,
+         * but nothing obvious sticks out. Weird.
+         */
+        if (parameter != 0) {
+            assert(!(pg->pending_interrupts & NV_PGRAPH_INTR_ERROR));
+
+            SET_MASK(pg->regs[NV_PGRAPH_TRAPPED_ADDR],
+                NV_PGRAPH_TRAPPED_ADDR_CHID, channel_id);
+            SET_MASK(pg->regs[NV_PGRAPH_TRAPPED_ADDR],
+                NV_PGRAPH_TRAPPED_ADDR_SUBCH, subchannel);
+            SET_MASK(pg->regs[NV_PGRAPH_TRAPPED_ADDR],
+                NV_PGRAPH_TRAPPED_ADDR_MTHD, method);
+            pg->regs[NV_PGRAPH_TRAPPED_DATA_LOW] = parameter;
+            pg->regs[NV_PGRAPH_NSOURCE] = NV_PGRAPH_NSOURCE_NOTIFICATION; /* TODO: check this */
+            pg->pending_interrupts |= NV_PGRAPH_INTR_ERROR;
+
+            qemu_mutex_unlock(&pg->lock);
+            qemu_mutex_lock_iothread();
+            update_irq(d);
+            qemu_mutex_lock(&pg->lock);
+            qemu_mutex_unlock_iothread();
+
+            while (pg->pending_interrupts & NV_PGRAPH_INTR_ERROR) {
+                qemu_cond_wait(&pg->interrupt_cond, &pg->lock);
+            }
+        }
+        break;
+
+    case NV097_WAIT_FOR_IDLE:
+        pgraph_update_surface(d, false, true, true);
+        break;
+
+
+    case NV097_SET_FLIP_READ:
+        SET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_READ_3D,
+                 parameter);
+        break;
+    case NV097_SET_FLIP_WRITE:
+        SET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_WRITE_3D,
+                 parameter);
+        break;
+    case NV097_SET_FLIP_MODULO:
+        SET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_MODULO_3D,
+                 parameter);
+        break;
+    case NV097_FLIP_INCREMENT_WRITE: {
+        NV2A_DPRINTF("flip increment write %d -> ",
+            GET_MASK(pg->regs[NV_PGRAPH_SURFACE],
+                          NV_PGRAPH_SURFACE_WRITE_3D));
+        SET_MASK(pg->regs[NV_PGRAPH_SURFACE],
+                 NV_PGRAPH_SURFACE_WRITE_3D,
+                 (GET_MASK(pg->regs[NV_PGRAPH_SURFACE],
+                          NV_PGRAPH_SURFACE_WRITE_3D)+1)
+                    % GET_MASK(pg->regs[NV_PGRAPH_SURFACE],
+                               NV_PGRAPH_SURFACE_MODULO_3D) );
+        NV2A_DPRINTF("%d\n",
+            GET_MASK(pg->regs[NV_PGRAPH_SURFACE],
+                          NV_PGRAPH_SURFACE_WRITE_3D));
+
+        if (glFrameTerminatorGREMEDY) {
+            glFrameTerminatorGREMEDY();
+        }
+
+        break;
+    }
+    case NV097_FLIP_STALL:
+        pgraph_update_surface(d, false, true, true);
+
+        while (true) {
+            NV2A_DPRINTF("flip stall read: %d, write: %d, modulo: %d\n",
+                GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_READ_3D),
+                GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_WRITE_3D),
+                GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_MODULO_3D));
+
+            uint32_t s = pg->regs[NV_PGRAPH_SURFACE];
+            if (GET_MASK(s, NV_PGRAPH_SURFACE_READ_3D)
+                != GET_MASK(s, NV_PGRAPH_SURFACE_WRITE_3D)) {
+                break;
+            }
+            qemu_cond_wait(&pg->flip_3d, &pg->lock);
+        }
+        NV2A_DPRINTF("flip stall done\n");
+        break;
+
+    // TODO: these should be loading the dma objects from ramin here?
+    case NV097_SET_CONTEXT_DMA_NOTIFIES:
+        pg->dma_notifies = parameter;
+        break;
+    case NV097_SET_CONTEXT_DMA_A:
+        pg->dma_a = parameter;
+        break;
+    case NV097_SET_CONTEXT_DMA_B:
+        pg->dma_b = parameter;
+        break;
+    case NV097_SET_CONTEXT_DMA_STATE:
+        pg->dma_state = parameter;
+        break;
+    case NV097_SET_CONTEXT_DMA_COLOR:
+        /* try to get any straggling draws in before the surface's changed :/ */
+        pgraph_update_surface(d, false, true, true);
+
+        pg->dma_color = parameter;
+        break;
+    case NV097_SET_CONTEXT_DMA_ZETA:
+        pg->dma_zeta = parameter;
+        break;
+    case NV097_SET_CONTEXT_DMA_VERTEX_A:
+        pg->dma_vertex_a = parameter;
+        break;
+    case NV097_SET_CONTEXT_DMA_VERTEX_B:
+        pg->dma_vertex_b = parameter;
+        break;
+    case NV097_SET_CONTEXT_DMA_SEMAPHORE:
+        pg->dma_semaphore = parameter;
+        break;
+    case NV097_SET_CONTEXT_DMA_REPORT:
+        pg->dma_report = parameter;
+        break;
+
+    case NV097_SET_SURFACE_CLIP_HORIZONTAL:
+        pgraph_update_surface(d, false, true, true);
+
+        pg->surface_shape.clip_x =
+            GET_MASK(parameter, NV097_SET_SURFACE_CLIP_HORIZONTAL_X);
+        pg->surface_shape.clip_width =
+            GET_MASK(parameter, NV097_SET_SURFACE_CLIP_HORIZONTAL_WIDTH);
+        break;
+    case NV097_SET_SURFACE_CLIP_VERTICAL:
+        pgraph_update_surface(d, false, true, true);
+
+        pg->surface_shape.clip_y =
+            GET_MASK(parameter, NV097_SET_SURFACE_CLIP_VERTICAL_Y);
+        pg->surface_shape.clip_height =
+            GET_MASK(parameter, NV097_SET_SURFACE_CLIP_VERTICAL_HEIGHT);
+        break;
+    case NV097_SET_SURFACE_FORMAT:
+        pgraph_update_surface(d, false, true, true);
+
+        pg->surface_shape.color_format =
+            GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_COLOR);
+        pg->surface_shape.zeta_format =
+            GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_ZETA);
+        pg->surface_type =
+            GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_TYPE);
+        pg->surface_shape.anti_aliasing =
+            GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_ANTI_ALIASING);
+        pg->surface_shape.log_width =
+            GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_WIDTH);
+        pg->surface_shape.log_height =
+            GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_HEIGHT);
+        break;
+    case NV097_SET_SURFACE_PITCH:
+        pgraph_update_surface(d, false, true, true);
+
+        pg->surface_color.pitch =
+            GET_MASK(parameter, NV097_SET_SURFACE_PITCH_COLOR);
+        pg->surface_zeta.pitch =
+            GET_MASK(parameter, NV097_SET_SURFACE_PITCH_ZETA);
+        break;
+    case NV097_SET_SURFACE_COLOR_OFFSET:
+        pgraph_update_surface(d, false, true, true);
+
+        pg->surface_color.offset = parameter;
+        break;
+    case NV097_SET_SURFACE_ZETA_OFFSET:
+        pgraph_update_surface(d, false, true, true);
+
+        pg->surface_zeta.offset = parameter;
+        break;
+
+    case NV097_SET_COMBINER_ALPHA_ICW ...
+            NV097_SET_COMBINER_ALPHA_ICW + 28:
+        slot = (method - NV097_SET_COMBINER_ALPHA_ICW) / 4;
+        pg->regs[NV_PGRAPH_COMBINEALPHAI0 + slot*4] = parameter;
+        break;
+
+    case NV097_SET_COMBINER_SPECULAR_FOG_CW0:
+        pg->regs[NV_PGRAPH_COMBINESPECFOG0] = parameter;
+        break;
+
+    case NV097_SET_COMBINER_SPECULAR_FOG_CW1:
+        pg->regs[NV_PGRAPH_COMBINESPECFOG1] = parameter;
+        break;
+
+    CASE_4(NV097_SET_TEXTURE_ADDRESS, 64):
+        slot = (method - NV097_SET_TEXTURE_ADDRESS) / 64;
+        pg->regs[NV_PGRAPH_TEXADDRESS0 + slot * 4] = parameter;
+        break;
+    case NV097_SET_CONTROL0: {
+        pgraph_update_surface(d, false, true, true);
+
+        bool stencil_write_enable =
+            parameter & NV097_SET_CONTROL0_STENCIL_WRITE_ENABLE;
+        SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
+                 NV_PGRAPH_CONTROL_0_STENCIL_WRITE_ENABLE,
+                 stencil_write_enable);
+
+        uint32_t z_format = GET_MASK(parameter, NV097_SET_CONTROL0_Z_FORMAT);
+        SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
+                 NV_PGRAPH_SETUPRASTER_Z_FORMAT, z_format);
+
+        bool z_perspective =
+            parameter & NV097_SET_CONTROL0_Z_PERSPECTIVE_ENABLE;
+        SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
+                 NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE,
+                 z_perspective);
+        break;
+    }
+
+    case NV097_SET_FOG_MODE: {
+        /* FIXME: There is also NV_PGRAPH_CSV0_D_FOG_MODE */
+        unsigned int mode;
+        switch (parameter) {
+        case NV097_SET_FOG_MODE_V_LINEAR:
+            mode = NV_PGRAPH_CONTROL_3_FOG_MODE_LINEAR; break;
+        case NV097_SET_FOG_MODE_V_EXP:
+            mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP; break;
+        case NV097_SET_FOG_MODE_V_EXP2:
+            mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP2; break;
+        case NV097_SET_FOG_MODE_V_EXP_ABS:
+            mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP_ABS; break;
+        case NV097_SET_FOG_MODE_V_EXP2_ABS:
+            mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP2_ABS; break;
+        case NV097_SET_FOG_MODE_V_LINEAR_ABS:
+            mode = NV_PGRAPH_CONTROL_3_FOG_MODE_LINEAR_ABS; break;
+        default:
+            assert(false);
+            break;
+        }
+        SET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], NV_PGRAPH_CONTROL_3_FOG_MODE,
+                 mode);
+        break;
+    }
+    case NV097_SET_FOG_GEN_MODE: {
+        unsigned int mode;
+        switch (parameter) {
+        case NV097_SET_FOG_GEN_MODE_V_SPEC_ALPHA:
+            mode = NV_PGRAPH_CSV0_D_FOGGENMODE_SPEC_ALPHA; break;
+        case NV097_SET_FOG_GEN_MODE_V_RADIAL:
+            mode = NV_PGRAPH_CSV0_D_FOGGENMODE_RADIAL; break;
+        case NV097_SET_FOG_GEN_MODE_V_PLANAR:
+            mode = NV_PGRAPH_CSV0_D_FOGGENMODE_PLANAR; break;
+        case NV097_SET_FOG_GEN_MODE_V_ABS_PLANAR:
+            mode = NV_PGRAPH_CSV0_D_FOGGENMODE_ABS_PLANAR; break;
+        case NV097_SET_FOG_GEN_MODE_V_FOG_X:
+            mode = NV_PGRAPH_CSV0_D_FOGGENMODE_FOG_X; break;
+        default:
+            assert(false);
+            break;
+        }
+        SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_FOGGENMODE, mode);
+        break;
+    }
+    case NV097_SET_FOG_ENABLE:
+/*
+      FIXME: There is also:
+        SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_FOGENABLE,
+             parameter);
+*/
+        SET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], NV_PGRAPH_CONTROL_3_FOGENABLE,
+             parameter);
+        break;
+    case NV097_SET_FOG_COLOR: {
+        /* PGRAPH channels are ARGB, parameter channels are ABGR */
+        uint8_t red = GET_MASK(parameter, NV097_SET_FOG_COLOR_RED);
+        uint8_t green = GET_MASK(parameter, NV097_SET_FOG_COLOR_GREEN);
+        uint8_t blue = GET_MASK(parameter, NV097_SET_FOG_COLOR_BLUE);
+        uint8_t alpha = GET_MASK(parameter, NV097_SET_FOG_COLOR_ALPHA);
+        SET_MASK(pg->regs[NV_PGRAPH_FOGCOLOR], NV_PGRAPH_FOGCOLOR_RED, red);
+        SET_MASK(pg->regs[NV_PGRAPH_FOGCOLOR], NV_PGRAPH_FOGCOLOR_GREEN, green);
+        SET_MASK(pg->regs[NV_PGRAPH_FOGCOLOR], NV_PGRAPH_FOGCOLOR_BLUE, blue);
+        SET_MASK(pg->regs[NV_PGRAPH_FOGCOLOR], NV_PGRAPH_FOGCOLOR_ALPHA, alpha);
+        break;
+    }
+    case NV097_SET_ALPHA_TEST_ENABLE:
+        SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
+                 NV_PGRAPH_CONTROL_0_ALPHATESTENABLE, parameter);
+        break;
+    case NV097_SET_BLEND_ENABLE:
+        SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_EN, parameter);
+        break;
+    case NV097_SET_CULL_FACE_ENABLE:
+        SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
+                 NV_PGRAPH_SETUPRASTER_CULLENABLE,
+                 parameter);
+        break;
+    case NV097_SET_DEPTH_TEST_ENABLE:
+        SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], NV_PGRAPH_CONTROL_0_ZENABLE,
+                 parameter);
+        break;
+    case NV097_SET_DITHER_ENABLE:
+        SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
+                 NV_PGRAPH_CONTROL_0_DITHERENABLE, parameter);
+        break;
+    case NV097_SET_LIGHTING_ENABLE:
+        SET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_LIGHTING,
+                 parameter);
+        break;
+    case NV097_SET_SKIN_MODE:
+        SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_SKIN,
+                 parameter);
+        break;
+    case NV097_SET_STENCIL_TEST_ENABLE:
+        SET_MASK(pg->regs[NV_PGRAPH_CONTROL_1],
+                 NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE, parameter);
+        break;
+    case NV097_SET_POLY_OFFSET_POINT_ENABLE:
+        SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
+                 NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE, parameter);
+        break;
+    case NV097_SET_POLY_OFFSET_LINE_ENABLE:
+        SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
+                 NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE, parameter);
+        break;
+    case NV097_SET_POLY_OFFSET_FILL_ENABLE:
+        SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
+                 NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE, parameter);
+        break;
+    case NV097_SET_ALPHA_FUNC:
+        SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
+                 NV_PGRAPH_CONTROL_0_ALPHAFUNC, parameter & 0xF);
+        break;
+    case NV097_SET_ALPHA_REF:
+        SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
+                 NV_PGRAPH_CONTROL_0_ALPHAREF, parameter);
+        break;
+    case NV097_SET_BLEND_FUNC_SFACTOR: {
+        unsigned int factor;
+        switch (parameter) {
+        case NV097_SET_BLEND_FUNC_SFACTOR_V_ZERO:
+            factor = NV_PGRAPH_BLEND_SFACTOR_ZERO; break;
+        case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE:
+            factor = NV_PGRAPH_BLEND_SFACTOR_ONE; break;
+        case NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_COLOR:
+            factor = NV_PGRAPH_BLEND_SFACTOR_SRC_COLOR; break;
+        case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_SRC_COLOR:
+            factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_SRC_COLOR; break;
+        case NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_ALPHA:
+            factor = NV_PGRAPH_BLEND_SFACTOR_SRC_ALPHA; break;
+        case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_SRC_ALPHA:
+            factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_SRC_ALPHA; break;
+        case NV097_SET_BLEND_FUNC_SFACTOR_V_DST_ALPHA:
+            factor = NV_PGRAPH_BLEND_SFACTOR_DST_ALPHA; break;
+        case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_DST_ALPHA:
+            factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_DST_ALPHA; break;
+        case NV097_SET_BLEND_FUNC_SFACTOR_V_DST_COLOR:
+            factor = NV_PGRAPH_BLEND_SFACTOR_DST_COLOR; break;
+        case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_DST_COLOR:
+            factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_DST_COLOR; break;
+        case NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_ALPHA_SATURATE:
+            factor = NV_PGRAPH_BLEND_SFACTOR_SRC_ALPHA_SATURATE; break;
+        case NV097_SET_BLEND_FUNC_SFACTOR_V_CONSTANT_COLOR:
+            factor = NV_PGRAPH_BLEND_SFACTOR_CONSTANT_COLOR; break;
+        case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_CONSTANT_COLOR:
+            factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_CONSTANT_COLOR; break;
+        case NV097_SET_BLEND_FUNC_SFACTOR_V_CONSTANT_ALPHA:
+            factor = NV_PGRAPH_BLEND_SFACTOR_CONSTANT_ALPHA; break;
+        case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_CONSTANT_ALPHA:
+            factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_CONSTANT_ALPHA; break;
+        default:
+            fprintf(stderr, "Unknown blend source factor: 0x%x\n", parameter);
+            assert(false);
+            break;
+        }
+        SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_SFACTOR, factor);
+
+        break;
+    }
+
+    case NV097_SET_BLEND_FUNC_DFACTOR: {
+        unsigned int factor;
+        switch (parameter) {
+        case NV097_SET_BLEND_FUNC_DFACTOR_V_ZERO:
+            factor = NV_PGRAPH_BLEND_DFACTOR_ZERO; break;
+        case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE:
+            factor = NV_PGRAPH_BLEND_DFACTOR_ONE; break;
+        case NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_COLOR:
+            factor = NV_PGRAPH_BLEND_DFACTOR_SRC_COLOR; break;
+        case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_SRC_COLOR:
+            factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_SRC_COLOR; break;
+        case NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_ALPHA:
+            factor = NV_PGRAPH_BLEND_DFACTOR_SRC_ALPHA; break;
+        case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_SRC_ALPHA:
+            factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_SRC_ALPHA; break;
+        case NV097_SET_BLEND_FUNC_DFACTOR_V_DST_ALPHA:
+            factor = NV_PGRAPH_BLEND_DFACTOR_DST_ALPHA; break;
+        case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_DST_ALPHA:
+            factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_DST_ALPHA; break;
+        case NV097_SET_BLEND_FUNC_DFACTOR_V_DST_COLOR:
+            factor = NV_PGRAPH_BLEND_DFACTOR_DST_COLOR; break;
+        case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_DST_COLOR:
+            factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_DST_COLOR; break;
+        case NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_ALPHA_SATURATE:
+            factor = NV_PGRAPH_BLEND_DFACTOR_SRC_ALPHA_SATURATE; break;
+        case NV097_SET_BLEND_FUNC_DFACTOR_V_CONSTANT_COLOR:
+            factor = NV_PGRAPH_BLEND_DFACTOR_CONSTANT_COLOR; break;
+        case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_CONSTANT_COLOR:
+            factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_CONSTANT_COLOR; break;
+        case NV097_SET_BLEND_FUNC_DFACTOR_V_CONSTANT_ALPHA:
+            factor = NV_PGRAPH_BLEND_DFACTOR_CONSTANT_ALPHA; break;
+        case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_CONSTANT_ALPHA:
+            factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_CONSTANT_ALPHA; break;
+        default:
+            fprintf(stderr, "Unknown blend destination factor: 0x%x\n", parameter);
+            assert(false);
+            break;
+        }
+        SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_DFACTOR, factor);
+
+        break;
+    }
+
+    case NV097_SET_BLEND_COLOR:
+        pg->regs[NV_PGRAPH_BLENDCOLOR] = parameter;
+        break;
+
+    case NV097_SET_BLEND_EQUATION: {
+        unsigned int equation;
+        switch (parameter) {
+        case NV097_SET_BLEND_EQUATION_V_FUNC_SUBTRACT:
+            equation = 0; break;
+        case NV097_SET_BLEND_EQUATION_V_FUNC_REVERSE_SUBTRACT:
+            equation = 1; break;
+        case NV097_SET_BLEND_EQUATION_V_FUNC_ADD:
+            equation = 2; break;
+        case NV097_SET_BLEND_EQUATION_V_MIN:
+            equation = 3; break;
+        case NV097_SET_BLEND_EQUATION_V_MAX:
+            equation = 4; break;
+        case NV097_SET_BLEND_EQUATION_V_FUNC_REVERSE_SUBTRACT_SIGNED:
+            equation = 5; break;
+        case NV097_SET_BLEND_EQUATION_V_FUNC_ADD_SIGNED:
+            equation = 6; break;
+        default:
+            assert(false);
+            break;
+        }
+        SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_EQN, equation);
+
+        break;
+    }
+
+    case NV097_SET_DEPTH_FUNC:
+        SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], NV_PGRAPH_CONTROL_0_ZFUNC,
+                 parameter & 0xF);
+        break;
+
+    case NV097_SET_COLOR_MASK: {
+        pg->surface_color.write_enabled_cache |= pgraph_color_write_enabled(pg);
+
+        bool alpha = parameter & NV097_SET_COLOR_MASK_ALPHA_WRITE_ENABLE;
+        bool red = parameter & NV097_SET_COLOR_MASK_RED_WRITE_ENABLE;
+        bool green = parameter & NV097_SET_COLOR_MASK_GREEN_WRITE_ENABLE;
+        bool blue = parameter & NV097_SET_COLOR_MASK_BLUE_WRITE_ENABLE;
+        SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
+                 NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE, alpha);
+        SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
+                 NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE, red);
+        SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
+                 NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE, green);
+        SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
+                 NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE, blue);
+        break;
+    }
+    case NV097_SET_DEPTH_MASK:
+        pg->surface_zeta.write_enabled_cache |= pgraph_zeta_write_enabled(pg);
+
+        SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
+                 NV_PGRAPH_CONTROL_0_ZWRITEENABLE, parameter);
+        break;
+    case NV097_SET_STENCIL_MASK:
+        SET_MASK(pg->regs[NV_PGRAPH_CONTROL_1],
+                 NV_PGRAPH_CONTROL_1_STENCIL_MASK_WRITE, parameter);
+        break;
+    case NV097_SET_STENCIL_FUNC:
+        SET_MASK(pg->regs[NV_PGRAPH_CONTROL_1],
+                 NV_PGRAPH_CONTROL_1_STENCIL_FUNC, parameter & 0xF);
+        break;
+    case NV097_SET_STENCIL_FUNC_REF:
+        SET_MASK(pg->regs[NV_PGRAPH_CONTROL_1],
+                 NV_PGRAPH_CONTROL_1_STENCIL_REF, parameter);
+        break;
+    case NV097_SET_STENCIL_FUNC_MASK:
+        SET_MASK(pg->regs[NV_PGRAPH_CONTROL_1],
+                 NV_PGRAPH_CONTROL_1_STENCIL_MASK_READ, parameter);
+        break;
+    case NV097_SET_STENCIL_OP_FAIL:
+        SET_MASK(pg->regs[NV_PGRAPH_CONTROL_2],
+                 NV_PGRAPH_CONTROL_2_STENCIL_OP_FAIL,
+                 kelvin_map_stencil_op(parameter));
+        break;
+    case NV097_SET_STENCIL_OP_ZFAIL:
+        SET_MASK(pg->regs[NV_PGRAPH_CONTROL_2],
+                 NV_PGRAPH_CONTROL_2_STENCIL_OP_ZFAIL,
+                 kelvin_map_stencil_op(parameter));
+        break;
+    case NV097_SET_STENCIL_OP_ZPASS:
+        SET_MASK(pg->regs[NV_PGRAPH_CONTROL_2],
+                 NV_PGRAPH_CONTROL_2_STENCIL_OP_ZPASS,
+                 kelvin_map_stencil_op(parameter));
+        break;
+
+    case NV097_SET_POLYGON_OFFSET_SCALE_FACTOR:
+        pg->regs[NV_PGRAPH_ZOFFSETFACTOR] = parameter;
+        break;
+    case NV097_SET_POLYGON_OFFSET_BIAS:
+        pg->regs[NV_PGRAPH_ZOFFSETBIAS] = parameter;
+        break;
+    case NV097_SET_FRONT_POLYGON_MODE:
+        SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
+                 NV_PGRAPH_SETUPRASTER_FRONTFACEMODE,
+                 kelvin_map_polygon_mode(parameter));
+        break;
+    case NV097_SET_BACK_POLYGON_MODE:
+        SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
+                 NV_PGRAPH_SETUPRASTER_BACKFACEMODE,
+                 kelvin_map_polygon_mode(parameter));
+        break;
+    case NV097_SET_CLIP_MIN:
+        pg->regs[NV_PGRAPH_ZCLIPMIN] = parameter;
+        break;
+    case NV097_SET_CLIP_MAX:
+        pg->regs[NV_PGRAPH_ZCLIPMAX] = parameter;
+        break;
+    case NV097_SET_CULL_FACE: {
+        unsigned int face;
+        switch (parameter) {
+        case NV097_SET_CULL_FACE_V_FRONT:
+            face = NV_PGRAPH_SETUPRASTER_CULLCTRL_FRONT; break;
+        case NV097_SET_CULL_FACE_V_BACK:
+            face = NV_PGRAPH_SETUPRASTER_CULLCTRL_BACK; break;
+        case NV097_SET_CULL_FACE_V_FRONT_AND_BACK:
+            face = NV_PGRAPH_SETUPRASTER_CULLCTRL_FRONT_AND_BACK; break;
+        default:
+            assert(false);
+            break;
+        }
+        SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
+                 NV_PGRAPH_SETUPRASTER_CULLCTRL,
+                 face);
+        break;
+    }
+    case NV097_SET_FRONT_FACE: {
+        bool ccw;
+        switch (parameter) {
+        case NV097_SET_FRONT_FACE_V_CW:
+            ccw = false; break;
+        case NV097_SET_FRONT_FACE_V_CCW:
+            ccw = true; break;
+        default:
+            fprintf(stderr, "Unknown front face: 0x%x\n", parameter);
+            assert(false);
+            break;
+        }
+        SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
+                 NV_PGRAPH_SETUPRASTER_FRONTFACE,
+                 ccw ? 1 : 0);
+        break;
+    }
+    case NV097_SET_NORMALIZATION_ENABLE:
+        SET_MASK(pg->regs[NV_PGRAPH_CSV0_C],
+                 NV_PGRAPH_CSV0_C_NORMALIZATION_ENABLE,
+                 parameter);
+        break;
+
+    case NV097_SET_LIGHT_ENABLE_MASK:
+        SET_MASK(d->pgraph.regs[NV_PGRAPH_CSV0_D],
+                 NV_PGRAPH_CSV0_D_LIGHTS,
+                 parameter);
+        break;
+
+    CASE_4(NV097_SET_TEXGEN_S, 16): {
+        slot = (method - NV097_SET_TEXGEN_S) / 16;
+        unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A
+                                      : NV_PGRAPH_CSV1_B;
+        unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_S
+                                       : NV_PGRAPH_CSV1_A_T0_S;
+        SET_MASK(pg->regs[reg], mask, kelvin_map_texgen(parameter, 0));
+        break;
+    }
+    CASE_4(NV097_SET_TEXGEN_T, 16): {
+        slot = (method - NV097_SET_TEXGEN_T) / 16;
+        unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A
+                                      : NV_PGRAPH_CSV1_B;
+        unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_T
+                                       : NV_PGRAPH_CSV1_A_T0_T;
+        SET_MASK(pg->regs[reg], mask, kelvin_map_texgen(parameter, 1));
+        break;
+    }
+    CASE_4(NV097_SET_TEXGEN_R, 16): {
+        slot = (method - NV097_SET_TEXGEN_R) / 16;
+        unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A
+                                      : NV_PGRAPH_CSV1_B;
+        unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_R
+                                       : NV_PGRAPH_CSV1_A_T0_R;
+        SET_MASK(pg->regs[reg], mask, kelvin_map_texgen(parameter, 2));
+        break;
+    }
+    CASE_4(NV097_SET_TEXGEN_Q, 16): {
+        slot = (method - NV097_SET_TEXGEN_Q) / 16;
+        unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A
+                                      : NV_PGRAPH_CSV1_B;
+        unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_Q
+                                       : NV_PGRAPH_CSV1_A_T0_Q;
+        SET_MASK(pg->regs[reg], mask, kelvin_map_texgen(parameter, 3));
+        break;
+    }
+    CASE_4(NV097_SET_TEXTURE_MATRIX_ENABLE,4):
+        slot = (method - NV097_SET_TEXTURE_MATRIX_ENABLE) / 4;
+        pg->texture_matrix_enable[slot] = parameter;
+        break;
+
+    case NV097_SET_PROJECTION_MATRIX ...
+            NV097_SET_PROJECTION_MATRIX + 0x3c: {
+        slot = (method - NV097_SET_PROJECTION_MATRIX) / 4;
+        // pg->projection_matrix[slot] = *(float*)&parameter;
+        unsigned int row = NV_IGRAPH_XF_XFCTX_PMAT0 + slot/4;
+        pg->vsh_constants[row][slot%4] = parameter;
+        pg->vsh_constants_dirty[row] = true;
+        break;
+    }
+
+    case NV097_SET_MODEL_VIEW_MATRIX ...
+            NV097_SET_MODEL_VIEW_MATRIX + 0xfc: {
+        slot = (method - NV097_SET_MODEL_VIEW_MATRIX) / 4;
+        unsigned int matnum = slot / 16;
+        unsigned int entry = slot % 16;
+        unsigned int row = NV_IGRAPH_XF_XFCTX_MMAT0 + matnum*8 + entry/4;
+        pg->vsh_constants[row][entry % 4] = parameter;
+        pg->vsh_constants_dirty[row] = true;
+        break;
+    }
+
+    case NV097_SET_INVERSE_MODEL_VIEW_MATRIX ...
+            NV097_SET_INVERSE_MODEL_VIEW_MATRIX + 0xfc: {
+        slot = (method - NV097_SET_INVERSE_MODEL_VIEW_MATRIX) / 4;
+        unsigned int matnum = slot / 16;
+        unsigned int entry = slot % 16;
+        unsigned int row = NV_IGRAPH_XF_XFCTX_IMMAT0 + matnum*8 + entry/4;
+        pg->vsh_constants[row][entry % 4] = parameter;
+        pg->vsh_constants_dirty[row] = true;
+        break;
+    }
+
+    case NV097_SET_COMPOSITE_MATRIX ...
+            NV097_SET_COMPOSITE_MATRIX + 0x3c: {
+        slot = (method - NV097_SET_COMPOSITE_MATRIX) / 4;
+        unsigned int row = NV_IGRAPH_XF_XFCTX_CMAT0 + slot/4;
+        pg->vsh_constants[row][slot%4] = parameter;
+        pg->vsh_constants_dirty[row] = true;
+        break;
+    }
+
+    case NV097_SET_TEXTURE_MATRIX ...
+            NV097_SET_TEXTURE_MATRIX + 0xfc: {
+        slot = (method - NV097_SET_TEXTURE_MATRIX) / 4;
+        unsigned int tex = slot / 16;
+        unsigned int entry = slot % 16;
+        unsigned int row = NV_IGRAPH_XF_XFCTX_T0MAT + tex*8 + entry/4;
+        pg->vsh_constants[row][entry%4] = parameter;
+        pg->vsh_constants_dirty[row] = true;
+        break;
+    }
+
+    case NV097_SET_FOG_PARAMS ...
+            NV097_SET_FOG_PARAMS + 8:
+        slot = (method - NV097_SET_FOG_PARAMS) / 4;
+        if (slot < 2) {
+            pg->regs[NV_PGRAPH_FOGPARAM0 + slot*4] = parameter;
+        } else {
+            /* FIXME: No idea where slot = 2 is */
+        }
+
+        pg->ltctxa[NV_IGRAPH_XF_LTCTXA_FOG_K][slot] = parameter;
+        pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_FOG_K] = true;
+        break;
+
+    /* Handles NV097_SET_TEXGEN_PLANE_S,T,R,Q */
+    case NV097_SET_TEXGEN_PLANE_S ...
+            NV097_SET_TEXGEN_PLANE_S + 0xfc: {
+        slot = (method - NV097_SET_TEXGEN_PLANE_S) / 4;
+        unsigned int tex = slot / 16;
+        unsigned int entry = slot % 16;
+        unsigned int row = NV_IGRAPH_XF_XFCTX_TG0MAT + tex*8 + entry/4;
+        pg->vsh_constants[row][entry%4] = parameter;
+        pg->vsh_constants_dirty[row] = true;
+        break;
+    }
+
+    case NV097_SET_TEXGEN_VIEW_MODEL:
+        SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_TEXGEN_REF,
+                 parameter);
+        break;
+
+    case NV097_SET_FOG_PLANE ...
+            NV097_SET_FOG_PLANE + 12:
+        slot = (method - NV097_SET_FOG_PLANE) / 4;
+        pg->vsh_constants[NV_IGRAPH_XF_XFCTX_FOG][slot] = parameter;
+        pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_FOG] = true;
+        break;
+
+    case NV097_SET_SCENE_AMBIENT_COLOR ...
+            NV097_SET_SCENE_AMBIENT_COLOR + 8:
+        slot = (method - NV097_SET_SCENE_AMBIENT_COLOR) / 4;
+        // ??
+        pg->ltctxa[NV_IGRAPH_XF_LTCTXA_FR_AMB][slot] = parameter;
+        pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_FR_AMB] = true;
+        break;
+
+    case NV097_SET_VIEWPORT_OFFSET ...
+            NV097_SET_VIEWPORT_OFFSET + 12:
+        slot = (method - NV097_SET_VIEWPORT_OFFSET) / 4;
+        pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][slot] = parameter;
+        pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_VPOFF] = true;
+        break;
+
+    case NV097_SET_EYE_POSITION ...
+            NV097_SET_EYE_POSITION + 12:
+        slot = (method - NV097_SET_EYE_POSITION) / 4;
+        pg->vsh_constants[NV_IGRAPH_XF_XFCTX_EYEP][slot] = parameter;
+        pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_EYEP] = true;
+        break;
+    case NV097_SET_COMBINER_FACTOR0 ...
+            NV097_SET_COMBINER_FACTOR0 + 28:
+        slot = (method - NV097_SET_COMBINER_FACTOR0) / 4;
+        pg->regs[NV_PGRAPH_COMBINEFACTOR0 + slot*4] = parameter;
+        break;
+
+    case NV097_SET_COMBINER_FACTOR1 ...
+            NV097_SET_COMBINER_FACTOR1 + 28:
+        slot = (method - NV097_SET_COMBINER_FACTOR1) / 4;
+        pg->regs[NV_PGRAPH_COMBINEFACTOR1 + slot*4] = parameter;
+        break;
+
+    case NV097_SET_COMBINER_ALPHA_OCW ...
+            NV097_SET_COMBINER_ALPHA_OCW + 28:
+        slot = (method - NV097_SET_COMBINER_ALPHA_OCW) / 4;
+        pg->regs[NV_PGRAPH_COMBINEALPHAO0 + slot*4] = parameter;
+        break;
+
+    case NV097_SET_COMBINER_COLOR_ICW ...
+            NV097_SET_COMBINER_COLOR_ICW + 28:
+        slot = (method - NV097_SET_COMBINER_COLOR_ICW) / 4;
+        pg->regs[NV_PGRAPH_COMBINECOLORI0 + slot*4] = parameter;
+        break;
+
+    case NV097_SET_VIEWPORT_SCALE ...
+            NV097_SET_VIEWPORT_SCALE + 12:
+        slot = (method - NV097_SET_VIEWPORT_SCALE) / 4;
+        pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPSCL][slot] = parameter;
+        pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_VPSCL] = true;
+        break;
+
+    case NV097_SET_TRANSFORM_PROGRAM ...
+            NV097_SET_TRANSFORM_PROGRAM + 0x7c: {
+
+        slot = (method - NV097_SET_TRANSFORM_PROGRAM) / 4;
+
+        int program_load = GET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET],
+                                    NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR);
+
+        assert(program_load < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH);
+        pg->program_data[program_load][slot%4] = parameter;
+
+        if (slot % 4 == 3) {
+            SET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET],
+                     NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR, program_load+1);
+        }
+
+        break;
+    }
+
+    case NV097_SET_TRANSFORM_CONSTANT ...
+            NV097_SET_TRANSFORM_CONSTANT + 0x7c: {
+
+        slot = (method - NV097_SET_TRANSFORM_CONSTANT) / 4;
+
+        int const_load = GET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET],
+                                  NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR);
+
+        assert(const_load < NV2A_VERTEXSHADER_CONSTANTS);
+        // VertexShaderConstant *constant = &pg->constants[const_load];
+        pg->vsh_constants_dirty[const_load] |=
+            (parameter != pg->vsh_constants[const_load][slot%4]);
+        pg->vsh_constants[const_load][slot%4] = parameter;
+
+        if (slot % 4 == 3) {
+            SET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET],
+                     NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR, const_load+1);
+        }
+        break;
+    }
+
+    case NV097_SET_VERTEX3F ...
+            NV097_SET_VERTEX3F + 8: {
+        slot = (method - NV097_SET_VERTEX3F) / 4;
+        VertexAttribute *attribute =
+            &pg->vertex_attributes[NV2A_VERTEX_ATTR_POSITION];
+        pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_POSITION);
+        attribute->inline_value[slot] = *(float*)&parameter;
+        attribute->inline_value[3] = 1.0f;
+        if (slot == 2) {
+            pgraph_finish_inline_buffer_vertex(pg);
+        }
+        break;
+    }
+
+    /* Handles NV097_SET_BACK_LIGHT_* */
+    case NV097_SET_BACK_LIGHT_AMBIENT_COLOR ...
+            NV097_SET_BACK_LIGHT_SPECULAR_COLOR + 0x1C8: {
+        slot = (method - NV097_SET_BACK_LIGHT_AMBIENT_COLOR) / 4;
+        unsigned int part = NV097_SET_BACK_LIGHT_AMBIENT_COLOR / 4 + slot % 16;
+        slot /= 16; /* [Light index] */
+        assert(slot < 8);
+        switch(part * 4) {
+        case NV097_SET_BACK_LIGHT_AMBIENT_COLOR ...
+                NV097_SET_BACK_LIGHT_AMBIENT_COLOR + 8:
+            part -= NV097_SET_BACK_LIGHT_AMBIENT_COLOR / 4;
+            pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_BAMB + slot*6][part] = parameter;
+            pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_BAMB + slot*6] = true;
+            break;
+        case NV097_SET_BACK_LIGHT_DIFFUSE_COLOR ...
+                NV097_SET_BACK_LIGHT_DIFFUSE_COLOR + 8:
+            part -= NV097_SET_BACK_LIGHT_DIFFUSE_COLOR / 4;
+            pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_BDIF + slot*6][part] = parameter;
+            pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_BDIF + slot*6] = true;
+            break;
+        case NV097_SET_BACK_LIGHT_SPECULAR_COLOR ...
+                NV097_SET_BACK_LIGHT_SPECULAR_COLOR + 8:
+            part -= NV097_SET_BACK_LIGHT_SPECULAR_COLOR / 4;
+            pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_BSPC + slot*6][part] = parameter;
+            pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_BSPC + slot*6] = true;
+            break;
+        default:
+            assert(false);
+            break;
+        }
+        break;
+    }
+    /* Handles all the light source props except for NV097_SET_BACK_LIGHT_* */
+    case NV097_SET_LIGHT_AMBIENT_COLOR ...
+            NV097_SET_LIGHT_LOCAL_ATTENUATION + 0x38C: {
+        slot = (method - NV097_SET_LIGHT_AMBIENT_COLOR) / 4;
+        unsigned int part = NV097_SET_LIGHT_AMBIENT_COLOR / 4 + slot % 32;
+        slot /= 32; /* [Light index] */
+        assert(slot < 8);
+        switch(part * 4) {
+        case NV097_SET_LIGHT_AMBIENT_COLOR ...
+                NV097_SET_LIGHT_AMBIENT_COLOR + 8:
+            part -= NV097_SET_LIGHT_AMBIENT_COLOR / 4;
+            pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_AMB + slot*6][part] = parameter;
+            pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_AMB + slot*6] = true;
+            break;
+        case NV097_SET_LIGHT_DIFFUSE_COLOR ...
+               NV097_SET_LIGHT_DIFFUSE_COLOR + 8:
+            part -= NV097_SET_LIGHT_DIFFUSE_COLOR / 4;
+            pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_DIF + slot*6][part] = parameter;
+            pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_DIF + slot*6] = true;
+            break;
+        case NV097_SET_LIGHT_SPECULAR_COLOR ...
+                NV097_SET_LIGHT_SPECULAR_COLOR + 8:
+            part -= NV097_SET_LIGHT_SPECULAR_COLOR / 4;
+            pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_SPC + slot*6][part] = parameter;
+            pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_SPC + slot*6] = true;
+            break;
+        case NV097_SET_LIGHT_LOCAL_RANGE:
+            pg->ltc1[NV_IGRAPH_XF_LTC1_r0 + slot][0] = parameter;
+            pg->ltc1_dirty[NV_IGRAPH_XF_LTC1_r0 + slot] = true;
+            break;
+        case NV097_SET_LIGHT_INFINITE_HALF_VECTOR ...
+                NV097_SET_LIGHT_INFINITE_HALF_VECTOR + 8:
+            part -= NV097_SET_LIGHT_INFINITE_HALF_VECTOR / 4;
+            pg->light_infinite_half_vector[slot][part] = *(float*)&parameter;
+            break;
+        case NV097_SET_LIGHT_INFINITE_DIRECTION ...
+                NV097_SET_LIGHT_INFINITE_DIRECTION + 8:
+            part -= NV097_SET_LIGHT_INFINITE_DIRECTION / 4;
+            pg->light_infinite_direction[slot][part] = *(float*)&parameter;
+            break;
+        case NV097_SET_LIGHT_SPOT_FALLOFF ...
+                NV097_SET_LIGHT_SPOT_FALLOFF + 8:
+            part -= NV097_SET_LIGHT_SPOT_FALLOFF / 4;
+            pg->ltctxa[NV_IGRAPH_XF_LTCTXA_L0_K + slot*2][part] = parameter;
+            pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_L0_K + slot*2] = true;
+            break;
+        case NV097_SET_LIGHT_SPOT_DIRECTION ...
+                NV097_SET_LIGHT_SPOT_DIRECTION + 12:
+            part -= NV097_SET_LIGHT_SPOT_DIRECTION / 4;
+            pg->ltctxa[NV_IGRAPH_XF_LTCTXA_L0_SPT + slot*2][part] = parameter;
+            pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_L0_SPT + slot*2] = true;
+            break;
+        case NV097_SET_LIGHT_LOCAL_POSITION ...
+                NV097_SET_LIGHT_LOCAL_POSITION + 8:
+            part -= NV097_SET_LIGHT_LOCAL_POSITION / 4;
+            pg->light_local_position[slot][part] = *(float*)&parameter;
+            break;
+        case NV097_SET_LIGHT_LOCAL_ATTENUATION ...
+                NV097_SET_LIGHT_LOCAL_ATTENUATION + 8:
+            part -= NV097_SET_LIGHT_LOCAL_ATTENUATION / 4;
+            pg->light_local_attenuation[slot][part] = *(float*)&parameter;
+            break;
+        default:
+            assert(false);
+            break;
+        }
+        break;
+    }
+
+    case NV097_SET_VERTEX4F ...
+            NV097_SET_VERTEX4F + 12: {
+        slot = (method - NV097_SET_VERTEX4F) / 4;
+        VertexAttribute *attribute =
+            &pg->vertex_attributes[NV2A_VERTEX_ATTR_POSITION];
+        pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_POSITION);
+        attribute->inline_value[slot] = *(float*)&parameter;
+        if (slot == 3) {
+            pgraph_finish_inline_buffer_vertex(pg);
+        }
+        break;
+    }
+
+    case NV097_SET_VERTEX_DATA_ARRAY_FORMAT ...
+            NV097_SET_VERTEX_DATA_ARRAY_FORMAT + 0x3c: {
+
+        slot = (method - NV097_SET_VERTEX_DATA_ARRAY_FORMAT) / 4;
+        VertexAttribute *vertex_attribute = &pg->vertex_attributes[slot];
+
+        vertex_attribute->format =
+            GET_MASK(parameter, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE);
+        vertex_attribute->count =
+            GET_MASK(parameter, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_SIZE);
+        vertex_attribute->stride =
+            GET_MASK(parameter, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_STRIDE);
+
+        NV2A_DPRINTF("vertex data array format=%d, count=%d, stride=%d\n",
+            vertex_attribute->format,
+            vertex_attribute->count,
+            vertex_attribute->stride);
+
+        vertex_attribute->gl_count = vertex_attribute->count;
+
+        switch (vertex_attribute->format) {
+        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D:
+            vertex_attribute->gl_type = GL_UNSIGNED_BYTE;
+            vertex_attribute->gl_normalize = GL_TRUE;
+            vertex_attribute->size = 1;
+            assert(vertex_attribute->count == 4);
+            // http://www.opengl.org/registry/specs/ARB/vertex_array_bgra.txt
+            vertex_attribute->gl_count = GL_BGRA;
+            vertex_attribute->needs_conversion = false;
+            break;
+        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL:
+            vertex_attribute->gl_type = GL_UNSIGNED_BYTE;
+            vertex_attribute->gl_normalize = GL_TRUE;
+            vertex_attribute->size = 1;
+            vertex_attribute->needs_conversion = false;
+            break;
+        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1:
+            vertex_attribute->gl_type = GL_SHORT;
+            vertex_attribute->gl_normalize = GL_TRUE;
+            vertex_attribute->size = 2;
+            vertex_attribute->needs_conversion = false;
+            break;
+        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F:
+            vertex_attribute->gl_type = GL_FLOAT;
+            vertex_attribute->gl_normalize = GL_FALSE;
+            vertex_attribute->size = 4;
+            vertex_attribute->needs_conversion = false;
+            break;
+        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K:
+            vertex_attribute->gl_type = GL_SHORT;
+            vertex_attribute->gl_normalize = GL_FALSE;
+            vertex_attribute->size = 2;
+            vertex_attribute->needs_conversion = false;
+            break;
+        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP:
+            /* 3 signed, normalized components packed in 32-bits. (11,11,10) */
+            vertex_attribute->size = 4;
+            vertex_attribute->gl_type = GL_FLOAT;
+            vertex_attribute->gl_normalize = GL_FALSE;
+            vertex_attribute->needs_conversion = true;
+            vertex_attribute->converted_size = sizeof(float);
+            vertex_attribute->converted_count = 3 * vertex_attribute->count;
+            break;
+        default:
+            fprintf(stderr, "Unknown vertex type: 0x%x\n", vertex_attribute->format);
+            assert(false);
+            break;
+        }
+
+        if (vertex_attribute->needs_conversion) {
+            vertex_attribute->converted_elements = 0;
+        } else {
+            if (vertex_attribute->converted_buffer) {
+                g_free(vertex_attribute->converted_buffer);
+                vertex_attribute->converted_buffer = NULL;
+            }
+        }
+
+        break;
+    }
+
+    case NV097_SET_VERTEX_DATA_ARRAY_OFFSET ...
+            NV097_SET_VERTEX_DATA_ARRAY_OFFSET + 0x3c:
+
+        slot = (method - NV097_SET_VERTEX_DATA_ARRAY_OFFSET) / 4;
+
+        pg->vertex_attributes[slot].dma_select =
+            parameter & 0x80000000;
+        pg->vertex_attributes[slot].offset =
+            parameter & 0x7fffffff;
+
+        pg->vertex_attributes[slot].converted_elements = 0;
+
+        break;
+
+    case NV097_SET_LOGIC_OP_ENABLE:
+        SET_MASK(pg->regs[NV_PGRAPH_BLEND],
+                 NV_PGRAPH_BLEND_LOGICOP_ENABLE, parameter);
+        break;
+
+    case NV097_SET_LOGIC_OP:
+        SET_MASK(pg->regs[NV_PGRAPH_BLEND],
+                 NV_PGRAPH_BLEND_LOGICOP, parameter & 0xF);
+        break;
+
+    case NV097_CLEAR_REPORT_VALUE:
+        /* FIXME: Does this have a value in parameter? Also does this (also?) modify
+         *        the report memory block?
+         */
+        if (pg->gl_zpass_pixel_count_query_count) {
+            glDeleteQueries(pg->gl_zpass_pixel_count_query_count,
+                            pg->gl_zpass_pixel_count_queries);
+            pg->gl_zpass_pixel_count_query_count = 0;
+        }
+        pg->zpass_pixel_count_result = 0;
+        break;
+
+    case NV097_SET_ZPASS_PIXEL_COUNT_ENABLE:
+        pg->zpass_pixel_count_enable = parameter;
+        break;
+
+    case NV097_GET_REPORT: {
+        /* FIXME: This was first intended to be watchpoint-based. However,
+         *        qemu / kvm only supports virtual-address watchpoints.
+         *        This'll do for now, but accuracy and performance with other
+         *        approaches could be better
+         */
+        uint8_t type = GET_MASK(parameter, NV097_GET_REPORT_TYPE);
+        assert(type == NV097_GET_REPORT_TYPE_ZPASS_PIXEL_CNT);
+        hwaddr offset = GET_MASK(parameter, NV097_GET_REPORT_OFFSET);
+
+        uint64_t timestamp = 0x0011223344556677; /* FIXME: Update timestamp?! */
+        uint32_t done = 0;
+
+        /* FIXME: Multisampling affects this (both: OGL and Xbox GPU),
+         *        not sure if CLEARs also count
+         */
+        /* FIXME: What about clipping regions etc? */
+        for(i = 0; i < pg->gl_zpass_pixel_count_query_count; i++) {
+            GLuint gl_query_result;
+            glGetQueryObjectuiv(pg->gl_zpass_pixel_count_queries[i],
+                                GL_QUERY_RESULT,
+                                &gl_query_result);
+            pg->zpass_pixel_count_result += gl_query_result;
+        }
+        if (pg->gl_zpass_pixel_count_query_count) {
+            glDeleteQueries(pg->gl_zpass_pixel_count_query_count,
+                            pg->gl_zpass_pixel_count_queries);
+        }
+        pg->gl_zpass_pixel_count_query_count = 0;
+
+        hwaddr report_dma_len;
+        uint8_t *report_data = nv_dma_map(d, pg->dma_report,
+                                             &report_dma_len);
+        assert(offset < report_dma_len);
+        report_data += offset;
+
+        stq_le_p((uint64_t*)&report_data[0], timestamp);
+        stl_le_p((uint32_t*)&report_data[8], pg->zpass_pixel_count_result);
+        stl_le_p((uint32_t*)&report_data[12], done);
+
+        break;
+    }
+
+    case NV097_SET_EYE_DIRECTION ...
+            NV097_SET_EYE_DIRECTION + 8:
+        slot = (method - NV097_SET_EYE_DIRECTION) / 4;
+        pg->ltctxa[NV_IGRAPH_XF_LTCTXA_EYED][slot] = parameter;
+        pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_EYED] = true;
+        break;
+
+    case NV097_SET_BEGIN_END: {
+        bool depth_test =
+            pg->regs[NV_PGRAPH_CONTROL_0] & NV_PGRAPH_CONTROL_0_ZENABLE;
+        bool stencil_test = pg->regs[NV_PGRAPH_CONTROL_1]
+                                & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE;
+
+        if (parameter == NV097_SET_BEGIN_END_OP_END) {
+
+            assert(pg->shader_binding);
+
+            if (pg->draw_arrays_length) {
+
+                NV2A_GL_DPRINTF(false, "Draw Arrays");
+
+                assert(pg->inline_buffer_length == 0);
+                assert(pg->inline_array_length == 0);
+                assert(pg->inline_elements_length == 0);
+
+                pgraph_bind_vertex_attributes(d, pg->draw_arrays_max_count,
+                                              false, 0);
+                glMultiDrawArrays(pg->shader_binding->gl_primitive_mode,
+                                  pg->gl_draw_arrays_start,
+                                  pg->gl_draw_arrays_count,
+                                  pg->draw_arrays_length);
+            } else if (pg->inline_buffer_length) {
+
+                NV2A_GL_DPRINTF(false, "Inline Buffer");
+
+                assert(pg->draw_arrays_length == 0);
+                assert(pg->inline_array_length == 0);
+                assert(pg->inline_elements_length == 0);
+
+                for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+                    VertexAttribute *attribute = &pg->vertex_attributes[i];
+
+                    if (attribute->inline_buffer) {
+
+                        glBindBuffer(GL_ARRAY_BUFFER,
+                                     attribute->gl_inline_buffer);
+                        glBufferData(GL_ARRAY_BUFFER,
+                                     pg->inline_buffer_length
+                                        * sizeof(float) * 4,
+                                     attribute->inline_buffer,
+                                     GL_DYNAMIC_DRAW);
+
+                        /* Clear buffer for next batch */
+                        g_free(attribute->inline_buffer);
+                        attribute->inline_buffer = NULL;
+
+                        glVertexAttribPointer(i, 4, GL_FLOAT, GL_FALSE, 0, 0);
+                        glEnableVertexAttribArray(i);
+                    } else {
+                        glDisableVertexAttribArray(i);
+
+                        glVertexAttrib4fv(i, attribute->inline_value);
+                    }
+
+                }
+
+                glDrawArrays(pg->shader_binding->gl_primitive_mode,
+                             0, pg->inline_buffer_length);
+            } else if (pg->inline_array_length) {
+
+                NV2A_GL_DPRINTF(false, "Inline Array");
+
+                assert(pg->draw_arrays_length == 0);
+                assert(pg->inline_buffer_length == 0);
+                assert(pg->inline_elements_length == 0);
+
+                unsigned int index_count = pgraph_bind_inline_array(d);
+                glDrawArrays(pg->shader_binding->gl_primitive_mode,
+                             0, index_count);
+            } else if (pg->inline_elements_length) {
+
+                NV2A_GL_DPRINTF(false, "Inline Elements");
+
+                assert(pg->draw_arrays_length == 0);
+                assert(pg->inline_buffer_length == 0);
+                assert(pg->inline_array_length == 0);
+
+                uint32_t max_element = 0;
+                uint32_t min_element = (uint32_t)-1;
+                for (i=0; i<pg->inline_elements_length; i++) {
+                    max_element = MAX(pg->inline_elements[i], max_element);
+                    min_element = MIN(pg->inline_elements[i], min_element);
+                }
+
+                pgraph_bind_vertex_attributes(d, max_element+1, false, 0);
+
+                glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, pg->gl_element_buffer);
+                glBufferData(GL_ELEMENT_ARRAY_BUFFER,
+                             pg->inline_elements_length*4,
+                             pg->inline_elements,
+                             GL_DYNAMIC_DRAW);
+
+                glDrawRangeElements(pg->shader_binding->gl_primitive_mode,
+                                    min_element, max_element,
+                                    pg->inline_elements_length,
+                                    GL_UNSIGNED_INT,
+                                    (void*)0);
+
+            } else {
+                NV2A_GL_DPRINTF(true, "EMPTY NV097_SET_BEGIN_END");
+                assert(false);
+            }
+
+            /* End of visibility testing */
+            if (pg->zpass_pixel_count_enable) {
+                glEndQuery(GL_SAMPLES_PASSED);
+            }
+
+            NV2A_GL_DGROUP_END();
+        } else {
+            NV2A_GL_DGROUP_BEGIN("NV097_SET_BEGIN_END: 0x%x", parameter);
+            assert(parameter <= NV097_SET_BEGIN_END_OP_POLYGON);
+
+            pgraph_update_surface(d, true, true, depth_test || stencil_test);
+
+            pg->primitive_mode = parameter;
+
+            uint32_t control_0 = pg->regs[NV_PGRAPH_CONTROL_0];
+
+            bool alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE;
+            bool red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE;
+            bool green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE;
+            bool blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE;
+            glColorMask(red, green, blue, alpha);
+            glDepthMask(!!(control_0 & NV_PGRAPH_CONTROL_0_ZWRITEENABLE));
+            glStencilMask(GET_MASK(pg->regs[NV_PGRAPH_CONTROL_1],
+                                   NV_PGRAPH_CONTROL_1_STENCIL_MASK_WRITE));
+
+            if (pg->regs[NV_PGRAPH_BLEND] & NV_PGRAPH_BLEND_EN) {
+                glEnable(GL_BLEND);
+                uint32_t sfactor = GET_MASK(pg->regs[NV_PGRAPH_BLEND],
+                                            NV_PGRAPH_BLEND_SFACTOR);
+                uint32_t dfactor = GET_MASK(pg->regs[NV_PGRAPH_BLEND],
+                                            NV_PGRAPH_BLEND_DFACTOR);
+                assert(sfactor < ARRAY_SIZE(pgraph_blend_factor_map));
+                assert(dfactor < ARRAY_SIZE(pgraph_blend_factor_map));
+                glBlendFunc(pgraph_blend_factor_map[sfactor],
+                            pgraph_blend_factor_map[dfactor]);
+
+                uint32_t equation = GET_MASK(pg->regs[NV_PGRAPH_BLEND],
+                                             NV_PGRAPH_BLEND_EQN);
+                assert(equation < ARRAY_SIZE(pgraph_blend_equation_map));
+                glBlendEquation(pgraph_blend_equation_map[equation]);
+
+                uint32_t blend_color = pg->regs[NV_PGRAPH_BLENDCOLOR];
+                glBlendColor( ((blend_color >> 16) & 0xFF) / 255.0f, /* red */
+                              ((blend_color >> 8) & 0xFF) / 255.0f,  /* green */
+                              (blend_color & 0xFF) / 255.0f,         /* blue */
+                              ((blend_color >> 24) & 0xFF) / 255.0f);/* alpha */
+            } else {
+                glDisable(GL_BLEND);
+            }
+
+            /* Face culling */
+            if (pg->regs[NV_PGRAPH_SETUPRASTER]
+                    & NV_PGRAPH_SETUPRASTER_CULLENABLE) {
+                uint32_t cull_face = GET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
+                                              NV_PGRAPH_SETUPRASTER_CULLCTRL);
+                assert(cull_face < ARRAY_SIZE(pgraph_cull_face_map));
+                glCullFace(pgraph_cull_face_map[cull_face]);
+                glEnable(GL_CULL_FACE);
+            } else {
+                glDisable(GL_CULL_FACE);
+            }
+
+            /* Front-face select */
+            glFrontFace(pg->regs[NV_PGRAPH_SETUPRASTER]
+                            & NV_PGRAPH_SETUPRASTER_FRONTFACE
+                                ? GL_CCW : GL_CW);
+
+            /* Polygon offset */
+            /* FIXME: GL implementation-specific, maybe do this in VS? */
+            if (pg->regs[NV_PGRAPH_SETUPRASTER] &
+                    NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE) {
+                glEnable(GL_POLYGON_OFFSET_FILL);
+            } else {
+                glDisable(GL_POLYGON_OFFSET_FILL);
+            }
+            if (pg->regs[NV_PGRAPH_SETUPRASTER] &
+                    NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE) {
+                glEnable(GL_POLYGON_OFFSET_LINE);
+            } else {
+                glDisable(GL_POLYGON_OFFSET_LINE);
+            }
+            if (pg->regs[NV_PGRAPH_SETUPRASTER] &
+                    NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE) {
+                glEnable(GL_POLYGON_OFFSET_POINT);
+            } else {
+                glDisable(GL_POLYGON_OFFSET_POINT);
+            }
+            if (pg->regs[NV_PGRAPH_SETUPRASTER] &
+                    (NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE |
+                     NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE |
+                     NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) {
+                GLfloat zfactor = *(float*)&pg->regs[NV_PGRAPH_ZOFFSETFACTOR];
+                GLfloat zbias = *(float*)&pg->regs[NV_PGRAPH_ZOFFSETBIAS];
+                glPolygonOffset(zfactor, zbias);
+            }
+
+            /* Depth testing */
+            if (depth_test) {
+                glEnable(GL_DEPTH_TEST);
+
+                uint32_t depth_func = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
+                                               NV_PGRAPH_CONTROL_0_ZFUNC);
+                assert(depth_func < ARRAY_SIZE(pgraph_depth_func_map));
+                glDepthFunc(pgraph_depth_func_map[depth_func]);
+            } else {
+                glDisable(GL_DEPTH_TEST);
+            }
+
+            if (stencil_test) {
+                glEnable(GL_STENCIL_TEST);
+
+                uint32_t stencil_func = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_1],
+                                            NV_PGRAPH_CONTROL_1_STENCIL_FUNC);
+                uint32_t stencil_ref = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_1],
+                                            NV_PGRAPH_CONTROL_1_STENCIL_REF);
+                uint32_t func_mask = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_1],
+                                        NV_PGRAPH_CONTROL_1_STENCIL_MASK_READ);
+                uint32_t op_fail = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_2],
+                                        NV_PGRAPH_CONTROL_2_STENCIL_OP_FAIL);
+                uint32_t op_zfail = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_2],
+                                        NV_PGRAPH_CONTROL_2_STENCIL_OP_ZFAIL);
+                uint32_t op_zpass = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_2],
+                                        NV_PGRAPH_CONTROL_2_STENCIL_OP_ZPASS);
+
+                assert(stencil_func < ARRAY_SIZE(pgraph_stencil_func_map));
+                assert(op_fail < ARRAY_SIZE(pgraph_stencil_op_map));
+                assert(op_zfail < ARRAY_SIZE(pgraph_stencil_op_map));
+                assert(op_zpass < ARRAY_SIZE(pgraph_stencil_op_map));
+
+                glStencilFunc(
+                    pgraph_stencil_func_map[stencil_func],
+                    stencil_ref,
+                    func_mask);
+
+                glStencilOp(
+                    pgraph_stencil_op_map[op_fail],
+                    pgraph_stencil_op_map[op_zfail],
+                    pgraph_stencil_op_map[op_zpass]);
+
+            } else {
+                glDisable(GL_STENCIL_TEST);
+            }
+
+            /* Dither */
+            /* FIXME: GL implementation dependent */
+            if (pg->regs[NV_PGRAPH_CONTROL_0] &
+                    NV_PGRAPH_CONTROL_0_DITHERENABLE) {
+                glEnable(GL_DITHER);
+            } else {
+                glDisable(GL_DITHER);
+            }
+
+            pgraph_bind_shaders(pg);
+            pgraph_bind_textures(d);
+
+            //glDisableVertexAttribArray(NV2A_VERTEX_ATTR_DIFFUSE);
+            //glVertexAttrib4f(NV2A_VERTEX_ATTR_DIFFUSE, 1.0, 1.0, 1.0, 1.0);
+
+
+            unsigned int width, height;
+            pgraph_get_surface_dimensions(pg, &width, &height);
+            pgraph_apply_anti_aliasing_factor(pg, &width, &height);
+            glViewport(0, 0, width, height);
+
+            pg->inline_elements_length = 0;
+            pg->inline_array_length = 0;
+            pg->inline_buffer_length = 0;
+            pg->draw_arrays_length = 0;
+            pg->draw_arrays_max_count = 0;
+
+            /* Visibility testing */
+            if (pg->zpass_pixel_count_enable) {
+                GLuint gl_query;
+                glGenQueries(1, &gl_query);
+                pg->gl_zpass_pixel_count_query_count++;
+                pg->gl_zpass_pixel_count_queries = g_realloc(
+                    pg->gl_zpass_pixel_count_queries,
+                    sizeof(GLuint) * pg->gl_zpass_pixel_count_query_count);
+                pg->gl_zpass_pixel_count_queries[
+                    pg->gl_zpass_pixel_count_query_count - 1] = gl_query;
+                glBeginQuery(GL_SAMPLES_PASSED, gl_query);
+            }
+
+        }
+
+        pgraph_set_surface_dirty(pg, true, depth_test || stencil_test);
+        break;
+    }
+    CASE_4(NV097_SET_TEXTURE_OFFSET, 64):
+        slot = (method - NV097_SET_TEXTURE_OFFSET) / 64;
+        pg->regs[NV_PGRAPH_TEXOFFSET0 + slot * 4] = parameter;
+        pg->texture_dirty[slot] = true;
+        break;
+    CASE_4(NV097_SET_TEXTURE_FORMAT, 64): {
+        slot = (method - NV097_SET_TEXTURE_FORMAT) / 64;
+
+        bool dma_select =
+            GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_CONTEXT_DMA) == 2;
+        bool cubemap =
+            GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_CUBEMAP_ENABLE);
+        unsigned int border_source =
+            GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BORDER_SOURCE);
+        unsigned int dimensionality =
+            GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_DIMENSIONALITY);
+        unsigned int color_format =
+            GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_COLOR);
+        unsigned int levels =
+            GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_MIPMAP_LEVELS);
+        unsigned int log_width =
+            GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BASE_SIZE_U);
+        unsigned int log_height =
+            GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BASE_SIZE_V);
+        unsigned int log_depth =
+            GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BASE_SIZE_P);
+
+        uint32_t *reg = &pg->regs[NV_PGRAPH_TEXFMT0 + slot * 4];
+        SET_MASK(*reg, NV_PGRAPH_TEXFMT0_CONTEXT_DMA, dma_select);
+        SET_MASK(*reg, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE, cubemap);
+        SET_MASK(*reg, NV_PGRAPH_TEXFMT0_BORDER_SOURCE, border_source);
+        SET_MASK(*reg, NV_PGRAPH_TEXFMT0_DIMENSIONALITY, dimensionality);
+        SET_MASK(*reg, NV_PGRAPH_TEXFMT0_COLOR, color_format);
+        SET_MASK(*reg, NV_PGRAPH_TEXFMT0_MIPMAP_LEVELS, levels);
+        SET_MASK(*reg, NV_PGRAPH_TEXFMT0_BASE_SIZE_U, log_width);
+        SET_MASK(*reg, NV_PGRAPH_TEXFMT0_BASE_SIZE_V, log_height);
+        SET_MASK(*reg, NV_PGRAPH_TEXFMT0_BASE_SIZE_P, log_depth);
+
+        pg->texture_dirty[slot] = true;
+        break;
+    }
+    CASE_4(NV097_SET_TEXTURE_CONTROL0, 64):
+        slot = (method - NV097_SET_TEXTURE_CONTROL0) / 64;
+        pg->regs[NV_PGRAPH_TEXCTL0_0 + slot*4] = parameter;
+        break;
+    CASE_4(NV097_SET_TEXTURE_CONTROL1, 64):
+        slot = (method - NV097_SET_TEXTURE_CONTROL1) / 64;
+        pg->regs[NV_PGRAPH_TEXCTL1_0 + slot*4] = parameter;
+        break;
+    CASE_4(NV097_SET_TEXTURE_FILTER, 64):
+        slot = (method - NV097_SET_TEXTURE_FILTER) / 64;
+        pg->regs[NV_PGRAPH_TEXFILTER0 + slot * 4] = parameter;
+        break;
+    CASE_4(NV097_SET_TEXTURE_IMAGE_RECT, 64):
+        slot = (method - NV097_SET_TEXTURE_IMAGE_RECT) / 64;
+        pg->regs[NV_PGRAPH_TEXIMAGERECT0 + slot * 4] = parameter;
+        pg->texture_dirty[slot] = true;
+        break;
+    CASE_4(NV097_SET_TEXTURE_PALETTE, 64): {
+        slot = (method - NV097_SET_TEXTURE_PALETTE) / 64;
+
+        bool dma_select =
+            GET_MASK(parameter, NV097_SET_TEXTURE_PALETTE_CONTEXT_DMA) == 1;
+        unsigned int length =
+            GET_MASK(parameter, NV097_SET_TEXTURE_PALETTE_LENGTH);
+        unsigned int offset =
+            GET_MASK(parameter, NV097_SET_TEXTURE_PALETTE_OFFSET);
+
+        uint32_t *reg = &pg->regs[NV_PGRAPH_TEXPALETTE0 + slot * 4];
+        SET_MASK(*reg, NV_PGRAPH_TEXPALETTE0_CONTEXT_DMA, dma_select);
+        SET_MASK(*reg, NV_PGRAPH_TEXPALETTE0_LENGTH, length);
+        SET_MASK(*reg, NV_PGRAPH_TEXPALETTE0_OFFSET, offset);
+
+        pg->texture_dirty[slot] = true;
+        break;
+    }
+
+    CASE_4(NV097_SET_TEXTURE_BORDER_COLOR, 64):
+        slot = (method - NV097_SET_TEXTURE_BORDER_COLOR) / 64;
+        pg->regs[NV_PGRAPH_BORDERCOLOR0 + slot * 4] = parameter;
+        break;
+    CASE_4(NV097_SET_TEXTURE_SET_BUMP_ENV_MAT + 0x0, 64):
+    CASE_4(NV097_SET_TEXTURE_SET_BUMP_ENV_MAT + 0x4, 64):
+    CASE_4(NV097_SET_TEXTURE_SET_BUMP_ENV_MAT + 0x8, 64):
+    CASE_4(NV097_SET_TEXTURE_SET_BUMP_ENV_MAT + 0xc, 64):
+        slot = (method - NV097_SET_TEXTURE_SET_BUMP_ENV_MAT) / 4;
+        assert((slot / 16) > 0);
+        slot -= 16;
+        pg->bump_env_matrix[slot / 16][slot % 4] = *(float*)&parameter;
+        break;
+
+    CASE_4(NV097_SET_TEXTURE_SET_BUMP_ENV_SCALE, 64):
+        slot = (method - NV097_SET_TEXTURE_SET_BUMP_ENV_SCALE) / 64;
+        assert(slot > 0);
+        slot--;
+        pg->regs[NV_PGRAPH_BUMPSCALE1 + slot * 4] = parameter;
+        break;
+    CASE_4(NV097_SET_TEXTURE_SET_BUMP_ENV_OFFSET, 64):
+        slot = (method - NV097_SET_TEXTURE_SET_BUMP_ENV_OFFSET) / 64;
+        assert(slot > 0);
+        slot--;
+        pg->regs[NV_PGRAPH_BUMPOFFSET1 + slot * 4] = parameter;
+        break;
+
+    case NV097_ARRAY_ELEMENT16:
+        assert(pg->inline_elements_length < NV2A_MAX_BATCH_LENGTH);
+        pg->inline_elements[
+            pg->inline_elements_length++] = parameter & 0xFFFF;
+        pg->inline_elements[
+            pg->inline_elements_length++] = parameter >> 16;
+        break;
+    case NV097_ARRAY_ELEMENT32:
+        assert(pg->inline_elements_length < NV2A_MAX_BATCH_LENGTH);
+        pg->inline_elements[
+            pg->inline_elements_length++] = parameter;
+        break;
+    case NV097_DRAW_ARRAYS: {
+
+        unsigned int start = GET_MASK(parameter, NV097_DRAW_ARRAYS_START_INDEX);
+        unsigned int count = GET_MASK(parameter, NV097_DRAW_ARRAYS_COUNT)+1;
+
+        pg->draw_arrays_max_count = MAX(pg->draw_arrays_max_count, start + count);
+
+        assert(pg->draw_arrays_length < ARRAY_SIZE(pg->gl_draw_arrays_start));
+
+        /* Attempt to connect primitives */
+        if (pg->draw_arrays_length > 0) {
+            unsigned int last_start =
+                pg->gl_draw_arrays_start[pg->draw_arrays_length - 1];
+            GLsizei* last_count =
+                &pg->gl_draw_arrays_count[pg->draw_arrays_length - 1];
+            if (start == (last_start + *last_count)) {
+                *last_count += count;
+                break;
+            }
+        }
+
+        pg->gl_draw_arrays_start[pg->draw_arrays_length] = start;
+        pg->gl_draw_arrays_count[pg->draw_arrays_length] = count;
+        pg->draw_arrays_length++;
+        break;
+    }
+    case NV097_INLINE_ARRAY:
+        assert(pg->inline_array_length < NV2A_MAX_BATCH_LENGTH);
+        pg->inline_array[
+            pg->inline_array_length++] = parameter;
+        break;
+    case NV097_SET_EYE_VECTOR ...
+            NV097_SET_EYE_VECTOR + 8:
+        slot = (method - NV097_SET_EYE_VECTOR) / 4;
+        pg->regs[NV_PGRAPH_EYEVEC0 + slot * 4] = parameter;
+        break;
+
+    case NV097_SET_VERTEX_DATA2F_M ...
+            NV097_SET_VERTEX_DATA2F_M + 0x7c: {
+        slot = (method - NV097_SET_VERTEX_DATA2F_M) / 4;
+        unsigned int part = slot % 2;
+        slot /= 2;
+        VertexAttribute *attribute = &pg->vertex_attributes[slot];
+        pgraph_allocate_inline_buffer_vertices(pg, slot);
+        attribute->inline_value[part] = *(float*)&parameter;
+        /* FIXME: Should these really be set to 0.0 and 1.0 ? Conditions? */
+        attribute->inline_value[2] = 0.0;
+        attribute->inline_value[3] = 1.0;
+        if ((slot == 0) && (part == 1)) {
+            pgraph_finish_inline_buffer_vertex(pg);
+        }
+        break;
+    }
+    case NV097_SET_VERTEX_DATA4F_M ...
+            NV097_SET_VERTEX_DATA4F_M + 0xfc: {
+        slot = (method - NV097_SET_VERTEX_DATA4F_M) / 4;
+        unsigned int part = slot % 4;
+        slot /= 4;
+        VertexAttribute *attribute = &pg->vertex_attributes[slot];
+        pgraph_allocate_inline_buffer_vertices(pg, slot);
+        attribute->inline_value[part] = *(float*)&parameter;
+        if ((slot == 0) && (part == 3)) {
+            pgraph_finish_inline_buffer_vertex(pg);
+        }
+        break;
+    }
+    case NV097_SET_VERTEX_DATA2S ...
+            NV097_SET_VERTEX_DATA2S + 0x3c: {
+        slot = (method - NV097_SET_VERTEX_DATA2S) / 4;
+        assert(false); /* FIXME: Untested! */
+        VertexAttribute *attribute = &pg->vertex_attributes[slot];
+        pgraph_allocate_inline_buffer_vertices(pg, slot);
+        /* FIXME: Is mapping to [-1,+1] correct? */
+        attribute->inline_value[0] = ((int16_t)(parameter & 0xFFFF) * 2.0 + 1)
+                                         / 65535.0;
+        attribute->inline_value[1] = ((int16_t)(parameter >> 16) * 2.0 + 1)
+                                         / 65535.0;
+        /* FIXME: Should these really be set to 0.0 and 1.0 ? Conditions? */
+        attribute->inline_value[2] = 0.0;
+        attribute->inline_value[3] = 1.0;
+        if (slot == 0) {
+            pgraph_finish_inline_buffer_vertex(pg);
+            assert(false); /* FIXME: Untested */
+        }
+        break;
+    }
+    case NV097_SET_VERTEX_DATA4UB ...
+            NV097_SET_VERTEX_DATA4UB + 0x3c: {
+        slot = (method - NV097_SET_VERTEX_DATA4UB) / 4;
+        VertexAttribute *attribute = &pg->vertex_attributes[slot];
+        pgraph_allocate_inline_buffer_vertices(pg, slot);
+        attribute->inline_value[0] = (parameter & 0xFF) / 255.0;
+        attribute->inline_value[1] = ((parameter >> 8) & 0xFF) / 255.0;
+        attribute->inline_value[2] = ((parameter >> 16) & 0xFF) / 255.0;
+        attribute->inline_value[3] = ((parameter >> 24) & 0xFF) / 255.0;
+        if (slot == 0) {
+            pgraph_finish_inline_buffer_vertex(pg);
+            assert(false); /* FIXME: Untested */
+        }
+        break;
+    }
+    case NV097_SET_VERTEX_DATA4S_M ...
+            NV097_SET_VERTEX_DATA4S_M + 0x7c: {
+        slot = (method - NV097_SET_VERTEX_DATA4S_M) / 4;
+        unsigned int part = slot % 2;
+        slot /= 2;
+        assert(false); /* FIXME: Untested! */
+        VertexAttribute *attribute = &pg->vertex_attributes[slot];
+        pgraph_allocate_inline_buffer_vertices(pg, slot);
+        /* FIXME: Is mapping to [-1,+1] correct? */
+        attribute->inline_value[part * 2 + 0] = ((int16_t)(parameter & 0xFFFF)
+                                                     * 2.0 + 1) / 65535.0;
+        attribute->inline_value[part * 2 + 1] = ((int16_t)(parameter >> 16)
+                                                     * 2.0 + 1) / 65535.0;
+        if ((slot == 0) && (part == 1)) {
+            pgraph_finish_inline_buffer_vertex(pg);
+            assert(false); /* FIXME: Untested */
+        }
+        break;
+    }
+
+    case NV097_SET_SEMAPHORE_OFFSET:
+        pg->regs[NV_PGRAPH_SEMAPHOREOFFSET] = parameter;
+        break;
+    case NV097_BACK_END_WRITE_SEMAPHORE_RELEASE: {
+
+        pgraph_update_surface(d, false, true, true);
+
+        //qemu_mutex_unlock(&d->pgraph.lock);
+        //qemu_mutex_lock_iothread();
+
+        uint32_t semaphore_offset = pg->regs[NV_PGRAPH_SEMAPHOREOFFSET];
+
+        hwaddr semaphore_dma_len;
+        uint8_t *semaphore_data = nv_dma_map(d, pg->dma_semaphore,
+                                             &semaphore_dma_len);
+        assert(semaphore_offset < semaphore_dma_len);
+        semaphore_data += semaphore_offset;
+
+        stl_le_p((uint32_t*)semaphore_data, parameter);
+
+        //qemu_mutex_lock(&d->pgraph.lock);
+        //qemu_mutex_unlock_iothread();
+
+        break;
+    }
+    case NV097_SET_ZSTENCIL_CLEAR_VALUE:
+        pg->regs[NV_PGRAPH_ZSTENCILCLEARVALUE] = parameter;
+        break;
+
+    case NV097_SET_COLOR_CLEAR_VALUE:
+        pg->regs[NV_PGRAPH_COLORCLEARVALUE] = parameter;
+        break;
+
+    case NV097_CLEAR_SURFACE: {
+        NV2A_DPRINTF("---------PRE CLEAR ------\n");
+        GLbitfield gl_mask = 0;
+
+        bool write_color = (parameter & NV097_CLEAR_SURFACE_COLOR);
+        bool write_zeta =
+            (parameter & (NV097_CLEAR_SURFACE_Z | NV097_CLEAR_SURFACE_STENCIL));
+
+        if (write_zeta) {
+            uint32_t clear_zstencil =
+                d->pgraph.regs[NV_PGRAPH_ZSTENCILCLEARVALUE];
+            GLint gl_clear_stencil;
+            GLfloat gl_clear_depth;
+
+            /* FIXME: Put these in some lookup table */
+            const float f16_max = 511.9375f;
+            /* FIXME: 7 bits of mantissa unused. maybe use full buffer? */
+            const float f24_max = 3.4027977E38;
+
+            switch(pg->surface_shape.zeta_format) {
+            case NV097_SET_SURFACE_FORMAT_ZETA_Z16: {
+                uint16_t z = clear_zstencil & 0xFFFF;
+                /* FIXME: Remove bit for stencil clear? */
+                if (pg->surface_shape.z_format) {
+                    gl_clear_depth = convert_f16_to_float(z) / f16_max;
+                    assert(false); /* FIXME: Untested */
+                } else {
+                    gl_clear_depth = z / (float)0xFFFF;
+                }
+                break;
+            }
+            case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8: {
+                gl_clear_stencil = clear_zstencil & 0xFF;
+                uint32_t z = clear_zstencil >> 8;
+                if (pg->surface_shape.z_format) {
+                    gl_clear_depth = convert_f24_to_float(z) / f24_max;
+                    assert(false); /* FIXME: Untested */
+                } else {
+                    gl_clear_depth = z / (float)0xFFFFFF;
+                }
+                break;
+            }
+            default:
+                fprintf(stderr, "Unknown zeta surface format: 0x%x\n", pg->surface_shape.zeta_format);
+                assert(false);
+                break;
+            }
+            if (parameter & NV097_CLEAR_SURFACE_Z) {
+                gl_mask |= GL_DEPTH_BUFFER_BIT;
+                glDepthMask(GL_TRUE);
+                glClearDepth(gl_clear_depth);
+            }
+            if (parameter & NV097_CLEAR_SURFACE_STENCIL) {
+                gl_mask |= GL_STENCIL_BUFFER_BIT;
+                glStencilMask(0xff);
+                glClearStencil(gl_clear_stencil);            
+            }
+        }
+        if (write_color) {
+            gl_mask |= GL_COLOR_BUFFER_BIT;
+            glColorMask((parameter & NV097_CLEAR_SURFACE_R)
+                             ? GL_TRUE : GL_FALSE,
+                        (parameter & NV097_CLEAR_SURFACE_G)
+                             ? GL_TRUE : GL_FALSE,
+                        (parameter & NV097_CLEAR_SURFACE_B)
+                             ? GL_TRUE : GL_FALSE,
+                        (parameter & NV097_CLEAR_SURFACE_A)
+                             ? GL_TRUE : GL_FALSE);
+            uint32_t clear_color = d->pgraph.regs[NV_PGRAPH_COLORCLEARVALUE];
+
+            /* Handle RGB */
+            GLfloat red, green, blue;
+            switch(pg->surface_shape.color_format) {
+            case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5:
+            case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_O1R5G5B5:
+                red = ((clear_color >> 10) & 0x1F) / 31.0f;
+                green = ((clear_color >> 5) & 0x1F) / 31.0f;
+                blue = (clear_color & 0x1F) / 31.0f;
+                assert(false); /* Untested */
+                break;
+            case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5:
+                red = ((clear_color >> 11) & 0x1F) / 31.0f;
+                green = ((clear_color >> 5) & 0x3F) / 63.0f;
+                blue = (clear_color & 0x1F) / 31.0f;
+                break;
+            case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8:
+            case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_O8R8G8B8:
+            case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_Z1A7R8G8B8:
+            case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_O1A7R8G8B8:
+            case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8:
+                red = ((clear_color >> 16) & 0xFF) / 255.0f;
+                green = ((clear_color >> 8) & 0xFF) / 255.0f;
+                blue = (clear_color & 0xFF) / 255.0f;
+                break;
+            case NV097_SET_SURFACE_FORMAT_COLOR_LE_B8:
+            case NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8:
+                /* Xbox D3D doesn't support clearing those */
+            default:
+                red = 1.0f;
+                green = 0.0f;
+                blue = 1.0f;
+                fprintf(stderr, "CLEAR_SURFACE for color_format 0x%x unsupported",
+                        pg->surface_shape.color_format);
+                assert(false);
+                break;
+            }
+
+            /* Handle alpha */
+            GLfloat alpha;
+            switch(pg->surface_shape.color_format) {
+            /* FIXME: CLEAR_SURFACE seems to work like memset, so maybe we
+             *        also have to clear non-alpha bits with alpha value?
+             *        As GL doesn't own those pixels we'd have to do this on
+             *        our own in xbox memory.
+             */
+            case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_Z1A7R8G8B8:
+            case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_O1A7R8G8B8:
+                alpha = ((clear_color >> 24) & 0x7F) / 127.0f;
+                assert(false); /* Untested */
+                break;
+            case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8:
+                alpha = ((clear_color >> 24) & 0xFF) / 255.0f;
+                break;
+            default:
+                alpha = 1.0f;
+                break;
+            }
+
+            glClearColor(red, green, blue, alpha);
+        }
+        pgraph_update_surface(d, true, write_color, write_zeta);
+
+        glEnable(GL_SCISSOR_TEST);
+
+        unsigned int xmin = GET_MASK(pg->regs[NV_PGRAPH_CLEARRECTX],
+                NV_PGRAPH_CLEARRECTX_XMIN);
+        unsigned int xmax = GET_MASK(pg->regs[NV_PGRAPH_CLEARRECTX],
+                NV_PGRAPH_CLEARRECTX_XMAX);
+        unsigned int ymin = GET_MASK(pg->regs[NV_PGRAPH_CLEARRECTY],
+                NV_PGRAPH_CLEARRECTY_YMIN);
+        unsigned int ymax = GET_MASK(pg->regs[NV_PGRAPH_CLEARRECTY],
+                NV_PGRAPH_CLEARRECTY_YMAX);
+
+        unsigned int scissor_x = xmin;
+        unsigned int scissor_y = pg->surface_shape.clip_height - ymax - 1;
+
+        unsigned int scissor_width = xmax - xmin + 1;
+        unsigned int scissor_height = ymax - ymin + 1;
+
+        pgraph_apply_anti_aliasing_factor(pg, &scissor_x, &scissor_y);
+        pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height);
+
+        /* FIXME: Should this really be inverted instead of ymin? */
+        glScissor(scissor_x, scissor_y, scissor_width, scissor_height);
+
+        NV2A_DPRINTF("------------------CLEAR 0x%x %d,%d - %d,%d  %x---------------\n",
+            parameter, xmin, ymin, xmax, ymax, d->pgraph.regs[NV_PGRAPH_COLORCLEARVALUE]);
+
+        /* Dither */
+        /* FIXME: Maybe also disable it here? + GL implementation dependent */
+        if (pg->regs[NV_PGRAPH_CONTROL_0] &
+                NV_PGRAPH_CONTROL_0_DITHERENABLE) {
+            glEnable(GL_DITHER);
+        } else {
+            glDisable(GL_DITHER);
+        }
+
+        glClear(gl_mask);
+
+        glDisable(GL_SCISSOR_TEST);
+
+        pgraph_set_surface_dirty(pg, write_color, write_zeta);
+        break;
+    }
+
+    case NV097_SET_CLEAR_RECT_HORIZONTAL:
+        pg->regs[NV_PGRAPH_CLEARRECTX] = parameter;
+        break;
+    case NV097_SET_CLEAR_RECT_VERTICAL:
+        pg->regs[NV_PGRAPH_CLEARRECTY] = parameter;
+        break;
+
+    case NV097_SET_SPECULAR_FOG_FACTOR ...
+            NV097_SET_SPECULAR_FOG_FACTOR + 4:
+        slot = (method - NV097_SET_SPECULAR_FOG_FACTOR) / 4;
+        pg->regs[NV_PGRAPH_SPECFOGFACTOR0 + slot*4] = parameter;
+        break;
+
+    case NV097_SET_SHADER_CLIP_PLANE_MODE:
+        pg->regs[NV_PGRAPH_SHADERCLIPMODE] = parameter;
+        break;
+
+    case NV097_SET_COMBINER_COLOR_OCW ...
+            NV097_SET_COMBINER_COLOR_OCW + 28:
+        slot = (method - NV097_SET_COMBINER_COLOR_OCW) / 4;
+        pg->regs[NV_PGRAPH_COMBINECOLORO0 + slot*4] = parameter;
+        break;
+
+    case NV097_SET_COMBINER_CONTROL:
+        pg->regs[NV_PGRAPH_COMBINECTL] = parameter;
+        break;
+
+    case NV097_SET_SHADOW_ZSLOPE_THRESHOLD:
+        pg->regs[NV_PGRAPH_SHADOWZSLOPETHRESHOLD] = parameter;
+        assert(parameter == 0x7F800000); /* FIXME: Unimplemented */
+        break;
+
+    case NV097_SET_SHADER_STAGE_PROGRAM:
+        pg->regs[NV_PGRAPH_SHADERPROG] = parameter;
+        break;
+
+    case NV097_SET_SHADER_OTHER_STAGE_INPUT:
+        pg->regs[NV_PGRAPH_SHADERCTL] = parameter;
+        break;
+
+    case NV097_SET_TRANSFORM_EXECUTION_MODE:
+        SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_MODE,
+                 GET_MASK(parameter,
+                          NV097_SET_TRANSFORM_EXECUTION_MODE_MODE));
+        SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_RANGE_MODE,
+                 GET_MASK(parameter,
+                          NV097_SET_TRANSFORM_EXECUTION_MODE_RANGE_MODE));
+        break;
+    case NV097_SET_TRANSFORM_PROGRAM_CXT_WRITE_EN:
+        pg->enable_vertex_program_write = parameter;
+        break;
+    case NV097_SET_TRANSFORM_PROGRAM_LOAD:
+        assert(parameter < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH);
+        SET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET],
+                 NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR, parameter);
+        break;
+    case NV097_SET_TRANSFORM_PROGRAM_START:
+        assert(parameter < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH);
+        SET_MASK(pg->regs[NV_PGRAPH_CSV0_C],
+                 NV_PGRAPH_CSV0_C_CHEOPS_PROGRAM_START, parameter);
+        break;
+    case NV097_SET_TRANSFORM_CONSTANT_LOAD:
+        assert(parameter < NV2A_VERTEXSHADER_CONSTANTS);
+        SET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET],
+                 NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR, parameter);
+        NV2A_DPRINTF("load to %d\n", parameter);
+        break;
+
+    default:
+        NV2A_GL_DPRINTF(true, "    unhandled  (0x%02x 0x%08x)",
+                        graphics_class, method);
+        break;
+    } break; }
+
+    default:
+        NV2A_GL_DPRINTF(true, "    unhandled  (0x%02x 0x%08x)",
+                        graphics_class, method);
+        break;
+
+    }
+}
+
+
+static void pgraph_context_switch(NV2AState *d, unsigned int channel_id)
+{
+    bool channel_valid =
+        d->pgraph.regs[NV_PGRAPH_CTX_CONTROL] & NV_PGRAPH_CTX_CONTROL_CHID;
+    unsigned pgraph_channel_id = GET_MASK(d->pgraph.regs[NV_PGRAPH_CTX_USER], NV_PGRAPH_CTX_USER_CHID);
+
+    bool valid = channel_valid && pgraph_channel_id == channel_id;
+    if (!valid) {
+        SET_MASK(d->pgraph.regs[NV_PGRAPH_TRAPPED_ADDR],
+                 NV_PGRAPH_TRAPPED_ADDR_CHID, channel_id);
+
+        NV2A_DPRINTF("pgraph switching to ch %d\n", channel_id);
+
+        /* TODO: hardware context switching */
+        assert(!(d->pgraph.regs[NV_PGRAPH_DEBUG_3]
+                & NV_PGRAPH_DEBUG_3_HW_CONTEXT_SWITCH));
+
+        qemu_mutex_unlock(&d->pgraph.lock);
+        qemu_mutex_lock_iothread();
+        d->pgraph.pending_interrupts |= NV_PGRAPH_INTR_CONTEXT_SWITCH;
+        update_irq(d);
+
+        qemu_mutex_lock(&d->pgraph.lock);
+        qemu_mutex_unlock_iothread();
+
+        // wait for the interrupt to be serviced
+        while (d->pgraph.pending_interrupts & NV_PGRAPH_INTR_CONTEXT_SWITCH) {
+            qemu_cond_wait(&d->pgraph.interrupt_cond, &d->pgraph.lock);
+        }
+    }
+}
+
+static void pgraph_wait_fifo_access(NV2AState *d) {
+    while (!(d->pgraph.regs[NV_PGRAPH_FIFO] & NV_PGRAPH_FIFO_ACCESS)) {
+        qemu_cond_wait(&d->pgraph.fifo_access_cond, &d->pgraph.lock);
+    }
+}
+
+static void pfifo_run_puller(NV2AState *d)
+{
+    uint32_t *pull0 = &d->pfifo.regs[NV_PFIFO_CACHE1_PULL0];
+    uint32_t *pull1 = &d->pfifo.regs[NV_PFIFO_CACHE1_PULL1];
+    uint32_t *engine_reg = &d->pfifo.regs[NV_PFIFO_CACHE1_ENGINE];
+
+    uint32_t *status = &d->pfifo.regs[NV_PFIFO_CACHE1_STATUS];
+    uint32_t *get_reg = &d->pfifo.regs[NV_PFIFO_CACHE1_GET];
+    uint32_t *put_reg = &d->pfifo.regs[NV_PFIFO_CACHE1_PUT];
+
+    // TODO
+    // CacheEntry working_cache[NV2A_CACHE1_SIZE];
+    // int working_cache_size = 0;
+    // pull everything into our own queue
+
+    // TODO think more about locking
+
+    while (true) {
+        if (!GET_MASK(*pull0, NV_PFIFO_CACHE1_PULL0_ACCESS)) return;
+
+        /* empty cache1 */
+        if (*status & NV_PFIFO_CACHE1_STATUS_LOW_MARK) break;
+
+        uint32_t get = *get_reg;
+        uint32_t put = *put_reg;
+
+        assert(get < 128*4 && (get % 4) == 0);
+        uint32_t method_entry = d->pfifo.regs[NV_PFIFO_CACHE1_METHOD + get*2];
+        uint32_t parameter = d->pfifo.regs[NV_PFIFO_CACHE1_DATA + get*2];
+
+        uint32_t new_get = (get+4) & 0x1fc;
+        *get_reg = new_get;
+
+        if (new_get == put) {
+            // set low mark
+            *status |= NV_PFIFO_CACHE1_STATUS_LOW_MARK;
+        }
+        if (*status & NV_PFIFO_CACHE1_STATUS_HIGH_MARK) {
+            // unset high mark
+            *status &= ~NV_PFIFO_CACHE1_STATUS_HIGH_MARK;
+            // signal pusher
+            qemu_cond_signal(&d->pfifo.pusher_cond);            
+        }
+
+
+        uint32_t method = method_entry & 0x1FFC;
+        uint32_t subchannel = GET_MASK(method_entry, NV_PFIFO_CACHE1_METHOD_SUBCHANNEL);
+
+        // NV2A_DPRINTF("pull %d 0x%x 0x%x - subch %d\n", get/4, method_entry, parameter, subchannel);
+
+        if (method == 0) {
+            RAMHTEntry entry = ramht_lookup(d, parameter);
+            assert(entry.valid);
+
+            // assert(entry.channel_id == state->channel_id);
+
+            assert(entry.engine == ENGINE_GRAPHICS);
+
+
+            /* the engine is bound to the subchannel */
+            assert(subchannel < 8);
+            SET_MASK(*engine_reg, 3 << (4*subchannel), entry.engine);
+            SET_MASK(*pull1, NV_PFIFO_CACHE1_PULL1_ENGINE, entry.engine);
+            // NV2A_DPRINTF("engine_reg1 %d 0x%x\n", subchannel, *engine_reg);
+
+
+            // TODO: this is fucked
+            qemu_mutex_lock(&d->pgraph.lock);
+            //make pgraph busy
+            qemu_mutex_unlock(&d->pfifo.lock);
+
+            pgraph_context_switch(d, entry.channel_id);
+            pgraph_wait_fifo_access(d);
+            pgraph_method(d, subchannel, 0, entry.instance);
+
+            // make pgraph not busy
+            qemu_mutex_unlock(&d->pgraph.lock);
+            qemu_mutex_lock(&d->pfifo.lock);
+
+        } else if (method >= 0x100) {
+            // method passed to engine
+
+            /* methods that take objects.
+             * TODO: Check this range is correct for the nv2a */
+            if (method >= 0x180 && method < 0x200) {
+                //qemu_mutex_lock_iothread();
+                RAMHTEntry entry = ramht_lookup(d, parameter);
+                assert(entry.valid);
+                // assert(entry.channel_id == state->channel_id);
+                parameter = entry.instance;
+                //qemu_mutex_unlock_iothread();
+            }
+
+            enum FIFOEngine engine = GET_MASK(*engine_reg, 3 << (4*subchannel));
+            // NV2A_DPRINTF("engine_reg2 %d 0x%x\n", subchannel, *engine_reg);
+            assert(engine == ENGINE_GRAPHICS);
+            SET_MASK(*pull1, NV_PFIFO_CACHE1_PULL1_ENGINE, engine);
+
+            // TODO: this is fucked
+            qemu_mutex_lock(&d->pgraph.lock);
+            //make pgraph busy
+            qemu_mutex_unlock(&d->pfifo.lock);
+
+            pgraph_wait_fifo_access(d);
+            pgraph_method(d, subchannel, method, parameter);
+
+            // make pgraph not busy
+            qemu_mutex_unlock(&d->pgraph.lock);
+            qemu_mutex_lock(&d->pfifo.lock);
+        } else {
+            assert(false);
+        }
+
+    }
+}
+
+static void* pfifo_puller_thread(void *arg)
+{
+    NV2AState *d = arg;
+
+    glo_set_current(d->pgraph.gl_context);
+
+    qemu_mutex_lock(&d->pfifo.lock);
+    while (true) {
+        pfifo_run_puller(d);
+        qemu_cond_wait(&d->pfifo.puller_cond, &d->pfifo.lock);
+
+        if (d->exiting) {
+            break;
+        }
+    }
+    qemu_mutex_unlock(&d->pfifo.lock);
+
+    return NULL;
+}
+
+static void pfifo_run_pusher(NV2AState *d)
+{
+    uint32_t *push0 = &d->pfifo.regs[NV_PFIFO_CACHE1_PUSH0];
+    uint32_t *push1 = &d->pfifo.regs[NV_PFIFO_CACHE1_PUSH1];
+    uint32_t *dma_subroutine = &d->pfifo.regs[NV_PFIFO_CACHE1_DMA_SUBROUTINE];
+    uint32_t *dma_state = &d->pfifo.regs[NV_PFIFO_CACHE1_DMA_STATE];
+    uint32_t *dma_push = &d->pfifo.regs[NV_PFIFO_CACHE1_DMA_PUSH];
+    uint32_t *dma_get = &d->pfifo.regs[NV_PFIFO_CACHE1_DMA_GET];
+    uint32_t *dma_put = &d->pfifo.regs[NV_PFIFO_CACHE1_DMA_PUT];
+    uint32_t *dma_dcount = &d->pfifo.regs[NV_PFIFO_CACHE1_DMA_DCOUNT];
+
+    uint32_t *status = &d->pfifo.regs[NV_PFIFO_CACHE1_STATUS];
+    uint32_t *get_reg = &d->pfifo.regs[NV_PFIFO_CACHE1_GET];
+    uint32_t *put_reg = &d->pfifo.regs[NV_PFIFO_CACHE1_PUT];
+
+    if (!GET_MASK(*push0, NV_PFIFO_CACHE1_PUSH0_ACCESS)) return;
+    if (!GET_MASK(*dma_push, NV_PFIFO_CACHE1_DMA_PUSH_ACCESS)) return;
+
+    /* suspended */
+    if (GET_MASK(*dma_push, NV_PFIFO_CACHE1_DMA_PUSH_STATUS)) return;
+
+    // TODO: should we become busy here??
+    // NV_PFIFO_CACHE1_DMA_PUSH_STATE _BUSY
+
+    unsigned int channel_id = GET_MASK(*push1,
+                                       NV_PFIFO_CACHE1_PUSH1_CHID);
+
+
+    /* Channel running DMA mode */
+    uint32_t channel_modes = d->pfifo.regs[NV_PFIFO_MODE];
+    assert(channel_modes & (1 << channel_id));
+
+    assert(GET_MASK(*push1, NV_PFIFO_CACHE1_PUSH1_MODE)
+            == NV_PFIFO_CACHE1_PUSH1_MODE_DMA);
+
+    /* We're running so there should be no pending errors... */
+    assert(GET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_ERROR)
+            == NV_PFIFO_CACHE1_DMA_STATE_ERROR_NONE);
+
+    hwaddr dma_instance =
+        GET_MASK(d->pfifo.regs[NV_PFIFO_CACHE1_DMA_INSTANCE],
+                 NV_PFIFO_CACHE1_DMA_INSTANCE_ADDRESS) << 4;
+
+    hwaddr dma_len;
+    uint8_t *dma = nv_dma_map(d, dma_instance, &dma_len);
+
+    while (true) {
+        uint32_t dma_get_v = *dma_get;
+        uint32_t dma_put_v = *dma_put;
+        if (dma_get_v == dma_put_v) break;
+        if (dma_get_v >= dma_len) {
+            assert(false);
+            SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_ERROR,
+                     NV_PFIFO_CACHE1_DMA_STATE_ERROR_PROTECTION);
+            break;
+        }
+
+        uint32_t word = ldl_le_p((uint32_t*)(dma + dma_get_v));
+        dma_get_v += 4;
+
+        uint32_t method_type =
+            GET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_METHOD_TYPE);
+        uint32_t method_subchannel =
+            GET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_SUBCHANNEL);
+        uint32_t method =
+            GET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_METHOD) << 2;
+        uint32_t method_count =
+            GET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_METHOD_COUNT);
+
+        uint32_t subroutine_state =
+            GET_MASK(*dma_subroutine, NV_PFIFO_CACHE1_DMA_SUBROUTINE_STATE);
+
+        if (method_count) {
+            /* full */
+            if (*status & NV_PFIFO_CACHE1_STATUS_HIGH_MARK) return;
+
+
+            /* data word of methods command */
+            d->pfifo.regs[NV_PFIFO_CACHE1_DMA_DATA_SHADOW] = word;
+
+            uint32_t put = *put_reg;
+            uint32_t get = *get_reg;
+
+            assert((method & 3) == 0);
+            uint32_t method_entry = 0;
+            SET_MASK(method_entry, NV_PFIFO_CACHE1_METHOD_ADDRESS, method >> 2);
+            SET_MASK(method_entry, NV_PFIFO_CACHE1_METHOD_TYPE, method_type);
+            SET_MASK(method_entry, NV_PFIFO_CACHE1_METHOD_SUBCHANNEL, method_subchannel);
+
+            // NV2A_DPRINTF("push %d 0x%x 0x%x - subch %d\n", put/4, method_entry, word, method_subchannel);
+
+            assert(put < 128*4 && (put%4) == 0);
+            d->pfifo.regs[NV_PFIFO_CACHE1_METHOD + put*2] = method_entry;
+            d->pfifo.regs[NV_PFIFO_CACHE1_DATA + put*2] = word;
+
+            uint32_t new_put = (put+4) & 0x1fc;
+            *put_reg = new_put;
+            if (new_put == get) {
+                // set high mark
+                *status |= NV_PFIFO_CACHE1_STATUS_HIGH_MARK;
+            }
+            if (*status & NV_PFIFO_CACHE1_STATUS_LOW_MARK) {
+                // unset low mark
+                *status &= ~NV_PFIFO_CACHE1_STATUS_LOW_MARK;
+                // signal puller
+                qemu_cond_signal(&d->pfifo.puller_cond);
+            }
+
+            if (method_type == NV_PFIFO_CACHE1_DMA_STATE_METHOD_TYPE_INC) {
+                SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_METHOD,
+                         (method + 4) >> 2);
+            }
+            SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_METHOD_COUNT,
+                     method_count - 1);
+            (*dma_dcount)++;
+        } else {
+            /* no command active - this is the first word of a new one */
+            d->pfifo.regs[NV_PFIFO_CACHE1_DMA_RSVD_SHADOW] = word;
+
+            /* match all forms */
+            if ((word & 0xe0000003) == 0x20000000) {
+                /* old jump */
+                d->pfifo.regs[NV_PFIFO_CACHE1_DMA_GET_JMP_SHADOW] =
+                    dma_get_v;
+                dma_get_v = word & 0x1fffffff;
+                NV2A_DPRINTF("pb OLD_JMP 0x%x\n", dma_get_v);
+            } else if ((word & 3) == 1) {
+                /* jump */
+                d->pfifo.regs[NV_PFIFO_CACHE1_DMA_GET_JMP_SHADOW] =
+                    dma_get_v;
+                dma_get_v = word & 0xfffffffc;
+                NV2A_DPRINTF("pb JMP 0x%x\n", dma_get_v);
+            } else if ((word & 3) == 2) {
+                /* call */
+                if (subroutine_state) {
+                    SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_ERROR,
+                             NV_PFIFO_CACHE1_DMA_STATE_ERROR_CALL);
+                    break;
+                } else {
+                    *dma_subroutine = dma_get_v;
+                    SET_MASK(*dma_subroutine,
+                             NV_PFIFO_CACHE1_DMA_SUBROUTINE_STATE, 1);
+                    dma_get_v = word & 0xfffffffc;
+                    NV2A_DPRINTF("pb CALL 0x%x\n", dma_get_v);
+                }
+            } else if (word == 0x00020000) {
+                /* return */
+                if (!subroutine_state) {
+                    SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_ERROR,
+                             NV_PFIFO_CACHE1_DMA_STATE_ERROR_RETURN);
+                    // break;
+                } else {
+                    dma_get_v = *dma_subroutine & 0xfffffffc;
+                    SET_MASK(*dma_subroutine,
+                             NV_PFIFO_CACHE1_DMA_SUBROUTINE_STATE, 0);
+                    NV2A_DPRINTF("pb RET 0x%x\n", dma_get_v);
+                }
+            } else if ((word & 0xe0030003) == 0) {
+                /* increasing methods */
+                SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_METHOD,
+                         (word & 0x1fff) >> 2 );
+                SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_SUBCHANNEL,
+                         (word >> 13) & 7);
+                SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_METHOD_COUNT,
+                         (word >> 18) & 0x7ff);
+                SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_METHOD_TYPE,
+                         NV_PFIFO_CACHE1_DMA_STATE_METHOD_TYPE_INC);
+                *dma_dcount = 0;
+            } else if ((word & 0xe0030003) == 0x40000000) {
+                /* non-increasing methods */
+                SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_METHOD,
+                         (word & 0x1fff) >> 2 );
+                SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_SUBCHANNEL,
+                         (word >> 13) & 7);
+                SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_METHOD_COUNT,
+                         (word >> 18) & 0x7ff);
+                SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_METHOD_TYPE,
+                         NV_PFIFO_CACHE1_DMA_STATE_METHOD_TYPE_NON_INC);
+                *dma_dcount = 0;
+            } else {
+                NV2A_DPRINTF("pb reserved cmd 0x%x - 0x%x\n",
+                             dma_get_v, word);
+                SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_ERROR,
+                         NV_PFIFO_CACHE1_DMA_STATE_ERROR_RESERVED_CMD);
+                // break;
+                assert(false);
+            }
+        }
+
+        *dma_get = dma_get_v;
+
+        if (GET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_ERROR)) {
+            break;
+        }
+    }
+
+    // NV2A_DPRINTF("DMA pusher done: max 0x%" HWADDR_PRIx ", 0x%" HWADDR_PRIx " - 0x%" HWADDR_PRIx "\n",
+    //      dma_len, control->dma_get, control->dma_put);
+
+    uint32_t error = GET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_ERROR);
+    if (error) {
+        NV2A_DPRINTF("pb error: %d\n", error);
+        assert(false);
+
+        SET_MASK(*dma_push, NV_PFIFO_CACHE1_DMA_PUSH_STATUS, 1); /* suspended */
+
+        // d->pfifo.pending_interrupts |= NV_PFIFO_INTR_0_DMA_PUSHER;
+        // update_irq(d);
+    }
+}
+
+static void* pfifo_pusher_thread(void *arg)
+{
+    NV2AState *d = arg;
+
+    qemu_mutex_lock(&d->pfifo.lock);
+    while (true) {
+        pfifo_run_pusher(d);
+        qemu_cond_wait(&d->pfifo.pusher_cond, &d->pfifo.lock);
+
+        if (d->exiting) {
+            break;
+        }
+    }
+    qemu_mutex_unlock(&d->pfifo.lock);
+
+    return NULL;
+}
+
+
+
+
+
+/* PMC - card master control */
+static uint64_t pmc_read(void *opaque,
+                              hwaddr addr, unsigned int size)
+{
+    NV2AState *d = opaque;
+
+    uint64_t r = 0;
+    switch (addr) {
+    case NV_PMC_BOOT_0:
+        /* chipset and stepping:
+         * NV2A, A02, Rev 0 */
+
+        r = 0x02A000A2;
+        break;
+    case NV_PMC_INTR_0:
+        /* Shows which functional units have pending IRQ */
+        r = d->pmc.pending_interrupts;
+        break;
+    case NV_PMC_INTR_EN_0:
+        /* Selects which functional units can cause IRQs */
+        r = d->pmc.enabled_interrupts;
+        break;
+    default:
+        break;
+    }
+
+    reg_log_read(NV_PMC, addr, r);
+    return r;
+}
+static void pmc_write(void *opaque, hwaddr addr,
+                           uint64_t val, unsigned int size)
+{
+    NV2AState *d = opaque;
+
+    reg_log_write(NV_PMC, addr, val);
+
+    switch (addr) {
+    case NV_PMC_INTR_0:
+        /* the bits of the interrupts to clear are wrtten */
+        d->pmc.pending_interrupts &= ~val;
+        update_irq(d);
+        break;
+    case NV_PMC_INTR_EN_0:
+        d->pmc.enabled_interrupts = val;
+        update_irq(d);
+        break;
+    default:
+        break;
+    }
+}
+
+
+/* PBUS - bus control */
+static uint64_t pbus_read(void *opaque,
+                               hwaddr addr, unsigned int size)
+{
+    NV2AState *d = opaque;
+
+    uint64_t r = 0;
+    switch (addr) {
+    case NV_PBUS_PCI_NV_0:
+        r = pci_get_long(d->dev.config + PCI_VENDOR_ID);
+        break;
+    case NV_PBUS_PCI_NV_1:
+        r = pci_get_long(d->dev.config + PCI_COMMAND);
+        break;
+    case NV_PBUS_PCI_NV_2:
+        r = pci_get_long(d->dev.config + PCI_CLASS_REVISION);
+        break;
+    default:
+        break;
+    }
+
+    reg_log_read(NV_PBUS, addr, r);
+    return r;
+}
+static void pbus_write(void *opaque, hwaddr addr,
+                            uint64_t val, unsigned int size)
+{
+    NV2AState *d = opaque;
+
+    reg_log_write(NV_PBUS, addr, val);
+
+    switch (addr) {
+    case NV_PBUS_PCI_NV_1:
+        pci_set_long(d->dev.config + PCI_COMMAND, val);
+        break;
+    default:
+        break;
+    }
+}
+
+
+/* PFIFO - MMIO and DMA FIFO submission to PGRAPH and VPE */
+static uint64_t pfifo_read(void *opaque,
+                                  hwaddr addr, unsigned int size)
+{
+    int i;
+    NV2AState *d = opaque;
+
+    qemu_mutex_lock(&d->pfifo.lock);
+
+    uint64_t r = 0;
+    switch (addr) {
+    case NV_PFIFO_INTR_0:
+        r = d->pfifo.pending_interrupts;
+        break;
+    case NV_PFIFO_INTR_EN_0:
+        r = d->pfifo.enabled_interrupts;
+        break;
+    case NV_PFIFO_RUNOUT_STATUS:
+        r = NV_PFIFO_RUNOUT_STATUS_LOW_MARK; /* low mark empty */
+        break;
+    default:
+        r = d->pfifo.regs[addr];
+        break;
+    }
+
+    qemu_mutex_unlock(&d->pfifo.lock);
+
+    reg_log_read(NV_PFIFO, addr, r);
+    return r;
+}
+static void pfifo_write(void *opaque, hwaddr addr,
+                        uint64_t val, unsigned int size)
+{
+    int i;
+    NV2AState *d = opaque;
+
+    reg_log_write(NV_PFIFO, addr, val);
+
+    qemu_mutex_lock(&d->pfifo.lock);
+
+    switch (addr) {
+    case NV_PFIFO_INTR_0:
+        d->pfifo.pending_interrupts &= ~val;
+        update_irq(d);
+        break;
+    case NV_PFIFO_INTR_EN_0:
+        d->pfifo.enabled_interrupts = val;
+        update_irq(d);
+        break;
+    default:
+        d->pfifo.regs[addr] = val;
+        break;
+    }
+
+    qemu_cond_broadcast(&d->pfifo.pusher_cond);
+    qemu_cond_broadcast(&d->pfifo.puller_cond);
+
+    qemu_mutex_unlock(&d->pfifo.lock);
+}
+
+
+static uint64_t prma_read(void *opaque,
+                                  hwaddr addr, unsigned int size)
+{
+    reg_log_read(NV_PRMA, addr, 0);
+    return 0;
+}
+static void prma_write(void *opaque, hwaddr addr,
+                               uint64_t val, unsigned int size)
+{
+    reg_log_write(NV_PRMA, addr, val);
+}
+
+
+static void pvideo_vga_invalidate(NV2AState *d)
+{
+    int y1 = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT],
+                      NV_PVIDEO_POINT_OUT_Y);
+    int y2 = y1 + GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT],
+                           NV_PVIDEO_SIZE_OUT_HEIGHT);
+    NV2A_DPRINTF("pvideo_vga_invalidate %d %d\n", y1, y2);
+    vga_invalidate_scanlines(&d->vga, y1, y2);
+}
+
+static uint64_t pvideo_read(void *opaque,
+                            hwaddr addr, unsigned int size)
+{
+    NV2AState *d = opaque;
+
+    uint64_t r = 0;
+    switch (addr) {
+    case NV_PVIDEO_STOP:
+        r = 0;
+        break;
+    default:
+        r = d->pvideo.regs[addr];
+        break;
+    }
+
+    reg_log_read(NV_PVIDEO, addr, r);
+    return r;
+}
+static void pvideo_write(void *opaque, hwaddr addr,
+                         uint64_t val, unsigned int size)
+{
+    NV2AState *d = opaque;
+
+    reg_log_write(NV_PVIDEO, addr, val);
+
+    switch (addr) {
+    case NV_PVIDEO_BUFFER:
+        d->pvideo.regs[addr] = val;
+        d->vga.enable_overlay = true;
+        pvideo_vga_invalidate(d);
+        break;
+    case NV_PVIDEO_STOP:
+        d->pvideo.regs[NV_PVIDEO_BUFFER] = 0;
+        d->vga.enable_overlay = false;
+        pvideo_vga_invalidate(d);
+        break;
+    default:
+        d->pvideo.regs[addr] = val;
+        break;
+    }
+}
+
+
+
+
+/* PIMTER - time measurement and time-based alarms */
+static uint64_t ptimer_get_clock(NV2AState *d)
+{
+    return muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+                    d->pramdac.core_clock_freq * d->ptimer.numerator,
+                    get_ticks_per_sec() * d->ptimer.denominator);
+}
+static uint64_t ptimer_read(void *opaque,
+                                  hwaddr addr, unsigned int size)
+{
+    NV2AState *d = opaque;
+
+    uint64_t r = 0;
+    switch (addr) {
+    case NV_PTIMER_INTR_0:
+        r = d->ptimer.pending_interrupts;
+        break;
+    case NV_PTIMER_INTR_EN_0:
+        r = d->ptimer.enabled_interrupts;
+        break;
+    case NV_PTIMER_NUMERATOR:
+        r = d->ptimer.numerator;
+        break;
+    case NV_PTIMER_DENOMINATOR:
+        r = d->ptimer.denominator;
+        break;
+    case NV_PTIMER_TIME_0:
+        r = (ptimer_get_clock(d) & 0x7ffffff) << 5;
+        break;
+    case NV_PTIMER_TIME_1:
+        r = (ptimer_get_clock(d) >> 27) & 0x1fffffff;
+        break;
+    default:
+        break;
+    }
+
+    reg_log_read(NV_PTIMER, addr, r);
+    return r;
+}
+static void ptimer_write(void *opaque, hwaddr addr,
+                               uint64_t val, unsigned int size)
+{
+    NV2AState *d = opaque;
+
+    reg_log_write(NV_PTIMER, addr, val);
+
+    switch (addr) {
+    case NV_PTIMER_INTR_0:
+        d->ptimer.pending_interrupts &= ~val;
+        update_irq(d);
+        break;
+    case NV_PTIMER_INTR_EN_0:
+        d->ptimer.enabled_interrupts = val;
+        update_irq(d);
+        break;
+    case NV_PTIMER_DENOMINATOR:
+        d->ptimer.denominator = val;
+        break;
+    case NV_PTIMER_NUMERATOR:
+        d->ptimer.numerator = val;
+        break;
+    case NV_PTIMER_ALARM_0:
+        d->ptimer.alarm_time = val;
+        break;
+    default:
+        break;
+    }
+}
+
+
+static uint64_t pcounter_read(void *opaque,
+                                  hwaddr addr, unsigned int size)
+{
+    reg_log_read(NV_PCOUNTER, addr, 0);
+    return 0;
+}
+static void pcounter_write(void *opaque, hwaddr addr,
+                               uint64_t val, unsigned int size)
+{
+    reg_log_write(NV_PCOUNTER, addr, val);
+}
+
+
+static uint64_t pvpe_read(void *opaque,
+                                  hwaddr addr, unsigned int size)
+{
+    reg_log_read(NV_PVPE, addr, 0);
+    return 0;
+}
+static void pvpe_write(void *opaque, hwaddr addr,
+                               uint64_t val, unsigned int size)
+{
+    reg_log_write(NV_PVPE, addr, val);
+}
+
+
+static uint64_t ptv_read(void *opaque,
+                                  hwaddr addr, unsigned int size)
+{
+    reg_log_read(NV_PTV, addr, 0);
+    return 0;
+}
+static void ptv_write(void *opaque, hwaddr addr,
+                               uint64_t val, unsigned int size)
+{
+    reg_log_write(NV_PTV, addr, val);
+}
+
+
+static uint64_t prmfb_read(void *opaque,
+                                  hwaddr addr, unsigned int size)
+{
+    reg_log_read(NV_PRMFB, addr, 0);
+    return 0;
+}
+static void prmfb_write(void *opaque, hwaddr addr,
+                               uint64_t val, unsigned int size)
+{
+    reg_log_write(NV_PRMFB, addr, val);
+}
+
+
+/* PRMVIO - aliases VGA sequencer and graphics controller registers */
+static uint64_t prmvio_read(void *opaque,
+                                  hwaddr addr, unsigned int size)
+{
+    NV2AState *d = opaque;
+    uint64_t r = vga_ioport_read(&d->vga, addr);
+
+    reg_log_read(NV_PRMVIO, addr, r);
+    return r;
+}
+static void prmvio_write(void *opaque, hwaddr addr,
+                               uint64_t val, unsigned int size)
+{
+    NV2AState *d = opaque;
+
+    reg_log_write(NV_PRMVIO, addr, val);
+
+    vga_ioport_write(&d->vga, addr, val);
+}
+
+
+static uint64_t pfb_read(void *opaque,
+                         hwaddr addr, unsigned int size)
+{
+    NV2AState *d = opaque;
+
+    uint64_t r = 0;
+    switch (addr) {
+    case NV_PFB_CFG0:
+        /* 3-4 memory partitions. The debug bios checks this. */
+        r = 3;
+        break;
+    case NV_PFB_CSTATUS:
+        r = memory_region_size(d->vram);
+        break;
+    case NV_PFB_WBC:
+        r = 0; /* Flush not pending. */
+        break;
+    default:
+        r = d->pfb.regs[addr];
+        break;
+    }
+
+    reg_log_read(NV_PFB, addr, r);
+    return r;
+}
+static void pfb_write(void *opaque, hwaddr addr,
+                       uint64_t val, unsigned int size)
+{
+    NV2AState *d = opaque;
+
+    reg_log_write(NV_PFB, addr, val);
+
+    switch (addr) {
+    default:
+        d->pfb.regs[addr] = val;
+        break;
+    }
+}
+
+
+static uint64_t pstraps_read(void *opaque,
+                                  hwaddr addr, unsigned int size)
+{
+    reg_log_read(NV_PSTRAPS, addr, 0);
+    return 0;
+}
+static void pstraps_write(void *opaque, hwaddr addr,
+                               uint64_t val, unsigned int size)
+{
+    reg_log_write(NV_PSTRAPS, addr, val);
+}
+
+/* PGRAPH - accelerated 2d/3d drawing engine */
+static uint64_t pgraph_read(void *opaque,
+                                  hwaddr addr, unsigned int size)
+{
+    NV2AState *d = opaque;
+
+    qemu_mutex_lock(&d->pgraph.lock);
+
+    uint64_t r = 0;
+    switch (addr) {
+    case NV_PGRAPH_INTR:
+        r = d->pgraph.pending_interrupts;
+        break;
+    case NV_PGRAPH_INTR_EN:
+        r = d->pgraph.enabled_interrupts;
+        break;
+    default:
+        r = d->pgraph.regs[addr];
+        break;
+    }
+
+    qemu_mutex_unlock(&d->pgraph.lock);
+
+    reg_log_read(NV_PGRAPH, addr, r);
+    return r;
+}
+static void pgraph_write(void *opaque, hwaddr addr,
+                               uint64_t val, unsigned int size)
+{
+    NV2AState *d = opaque;
+
+    reg_log_write(NV_PGRAPH, addr, val);
+
+    qemu_mutex_lock(&d->pgraph.lock);
+
+    switch (addr) {
+    case NV_PGRAPH_INTR:
+        d->pgraph.pending_interrupts &= ~val;
+        qemu_cond_broadcast(&d->pgraph.interrupt_cond);
+        break;
+    case NV_PGRAPH_INTR_EN:
+        d->pgraph.enabled_interrupts = val;
+        break;
+    case NV_PGRAPH_INCREMENT:
+        if (val & NV_PGRAPH_INCREMENT_READ_3D) {
+            SET_MASK(d->pgraph.regs[NV_PGRAPH_SURFACE],
+                     NV_PGRAPH_SURFACE_READ_3D,
+                     (GET_MASK(d->pgraph.regs[NV_PGRAPH_SURFACE],
+                              NV_PGRAPH_SURFACE_READ_3D)+1)
+                        % GET_MASK(d->pgraph.regs[NV_PGRAPH_SURFACE],
+                                   NV_PGRAPH_SURFACE_MODULO_3D) );
+            qemu_cond_broadcast(&d->pgraph.flip_3d);
+        }
+        break;
+    case NV_PGRAPH_CHANNEL_CTX_TRIGGER: {
+        hwaddr context_address =
+            GET_MASK(d->pgraph.regs[NV_PGRAPH_CHANNEL_CTX_POINTER], NV_PGRAPH_CHANNEL_CTX_POINTER_INST) << 4;
+
+        if (val & NV_PGRAPH_CHANNEL_CTX_TRIGGER_READ_IN) {
+            unsigned pgraph_channel_id =
+                GET_MASK(d->pgraph.regs[NV_PGRAPH_CTX_USER], NV_PGRAPH_CTX_USER_CHID);
+
+            NV2A_DPRINTF("PGRAPH: read channel %d context from %" HWADDR_PRIx "\n",
+                         pgraph_channel_id, context_address);
+
+            assert(context_address < memory_region_size(&d->ramin));
+
+            uint8_t *context_ptr = d->ramin_ptr + context_address;
+            uint32_t context_user = ldl_le_p((uint32_t*)context_ptr);
+
+            NV2A_DPRINTF("    - CTX_USER = 0x%x\n", context_user);
+
+            d->pgraph.regs[NV_PGRAPH_CTX_USER] = context_user;
+            // pgraph_set_context_user(d, context_user);
+        }
+        if (val & NV_PGRAPH_CHANNEL_CTX_TRIGGER_WRITE_OUT) {
+            /* do stuff ... */
+        }
+
+        break;
+    }
+    default:
+        d->pgraph.regs[addr] = val;
+        break;
+    }
+
+    // events
+    switch (addr) {
+    case NV_PGRAPH_FIFO:
+        qemu_cond_broadcast(&d->pgraph.fifo_access_cond);
+        break;
+    }
+
+    qemu_mutex_unlock(&d->pgraph.lock);
+}
+
+
+static uint64_t pcrtc_read(void *opaque,
+                                hwaddr addr, unsigned int size)
+{
+    NV2AState *d = opaque;
+
+    uint64_t r = 0;
+    switch (addr) {
+        case NV_PCRTC_INTR_0:
+            r = d->pcrtc.pending_interrupts;
+            break;
+        case NV_PCRTC_INTR_EN_0:
+            r = d->pcrtc.enabled_interrupts;
+            break;
+        case NV_PCRTC_START:
+            r = d->pcrtc.start;
+            break;
+        default:
+            break;
+    }
+
+    reg_log_read(NV_PCRTC, addr, r);
+    return r;
+}
+static void pcrtc_write(void *opaque, hwaddr addr,
+                             uint64_t val, unsigned int size)
+{
+    NV2AState *d = opaque;
+
+    reg_log_write(NV_PCRTC, addr, val);
+
+    switch (addr) {
+    case NV_PCRTC_INTR_0:
+        d->pcrtc.pending_interrupts &= ~val;
+        update_irq(d);
+        break;
+    case NV_PCRTC_INTR_EN_0:
+        d->pcrtc.enabled_interrupts = val;
+        update_irq(d);
+        break;
+    case NV_PCRTC_START:
+        val &= 0x07FFFFFF;
+        assert(val < memory_region_size(d->vram));
+        d->pcrtc.start = val;
+
+        NV2A_DPRINTF("PCRTC_START - %x %x %x %x\n",
+                d->vram_ptr[val+64], d->vram_ptr[val+64+1],
+                d->vram_ptr[val+64+2], d->vram_ptr[val+64+3]);
+        break;
+    default:
+        break;
+    }
+}
+
+
+/* PRMCIO - aliases VGA CRTC and attribute controller registers */
+static uint64_t prmcio_read(void *opaque,
+                                  hwaddr addr, unsigned int size)
+{
+    NV2AState *d = opaque;
+    uint64_t r = vga_ioport_read(&d->vga, addr);
+
+    reg_log_read(NV_PRMCIO, addr, r);
+    return r;
+}
+static void prmcio_write(void *opaque, hwaddr addr,
+                               uint64_t val, unsigned int size)
+{
+    NV2AState *d = opaque;
+
+    reg_log_write(NV_PRMCIO, addr, val);
+
+    switch (addr) {
+    case VGA_ATT_W:
+        /* Cromwell sets attrs without enabling VGA_AR_ENABLE_DISPLAY
+         * (which should result in a blank screen).
+         * Either nvidia's hardware is lenient or it is set through
+         * something else. The former seems more likely.
+         */
+        if (d->vga.ar_flip_flop == 0) {
+            val |= VGA_AR_ENABLE_DISPLAY;
+        }
+        break;
+    default:
+        break;
+    }
+
+    vga_ioport_write(&d->vga, addr, val);
+}
+
+
+static uint64_t pramdac_read(void *opaque,
+                                  hwaddr addr, unsigned int size)
+{
+    NV2AState *d = opaque;
+
+    uint64_t r = 0;
+    switch (addr & ~3) {
+    case NV_PRAMDAC_NVPLL_COEFF:
+        r = d->pramdac.core_clock_coeff;
+        break;
+    case NV_PRAMDAC_MPLL_COEFF:
+        r = d->pramdac.memory_clock_coeff;
+        break;
+    case NV_PRAMDAC_VPLL_COEFF:
+        r = d->pramdac.video_clock_coeff;
+        break;
+    case NV_PRAMDAC_PLL_TEST_COUNTER:
+        /* emulated PLLs locked instantly? */
+        r = NV_PRAMDAC_PLL_TEST_COUNTER_VPLL2_LOCK
+             | NV_PRAMDAC_PLL_TEST_COUNTER_NVPLL_LOCK
+             | NV_PRAMDAC_PLL_TEST_COUNTER_MPLL_LOCK
+             | NV_PRAMDAC_PLL_TEST_COUNTER_VPLL_LOCK;
+        break;
+    default:
+        break;
+    }
+
+    /* Surprisingly, QEMU doesn't handle unaligned access for you properly */
+    r >>= 32 - 8 * size - 8 * (addr & 3);
+
+    NV2A_DPRINTF("PRAMDAC: read %d [0x%" HWADDR_PRIx "] -> %" HWADDR_PRIx "\n", size, addr, r);
+    return r;
+}
+static void pramdac_write(void *opaque, hwaddr addr,
+                               uint64_t val, unsigned int size)
+{
+    NV2AState *d = opaque;
+    uint32_t m, n, p;
+
+    reg_log_write(NV_PRAMDAC, addr, val);
+
+    switch (addr) {
+    case NV_PRAMDAC_NVPLL_COEFF:
+        d->pramdac.core_clock_coeff = val;
+
+        m = val & NV_PRAMDAC_NVPLL_COEFF_MDIV;
+        n = (val & NV_PRAMDAC_NVPLL_COEFF_NDIV) >> 8;
+        p = (val & NV_PRAMDAC_NVPLL_COEFF_PDIV) >> 16;
+
+        if (m == 0) {
+            d->pramdac.core_clock_freq = 0;
+        } else {
+            d->pramdac.core_clock_freq = (NV2A_CRYSTAL_FREQ * n)
+                                          / (1 << p) / m;
+        }
+
+        break;
+    case NV_PRAMDAC_MPLL_COEFF:
+        d->pramdac.memory_clock_coeff = val;
+        break;
+    case NV_PRAMDAC_VPLL_COEFF:
+        d->pramdac.video_clock_coeff = val;
+        break;
+    default:
+        break;
+    }
+}
+
+
+static uint64_t prmdio_read(void *opaque,
+                                  hwaddr addr, unsigned int size)
+{
+    reg_log_read(NV_PRMDIO, addr, 0);
+    return 0;
+}
+static void prmdio_write(void *opaque, hwaddr addr,
+                               uint64_t val, unsigned int size)
+{
+    reg_log_write(NV_PRMDIO, addr, val);
+}
+
+
+/* PRAMIN - RAMIN access */
+/*
+static uint64_t pramin_read(void *opaque,
+                                 hwaddr addr, unsigned int size)
+{
+    NV2A_DPRINTF("nv2a PRAMIN: read [0x%" HWADDR_PRIx "] -> 0x%" HWADDR_PRIx "\n", addr, r);
+    return 0;
+}
+static void pramin_write(void *opaque, hwaddr addr,
+                              uint64_t val, unsigned int size)
+{
+    NV2A_DPRINTF("nv2a PRAMIN: [0x%" HWADDR_PRIx "] = 0x%02llx\n", addr, val);
+}*/
+
+
+/* USER - PFIFO MMIO and DMA submission area */
+static uint64_t user_read(void *opaque,
+                          hwaddr addr, unsigned int size)
+{
+    NV2AState *d = opaque;
+
+    unsigned int channel_id = addr >> 16;
+    assert(channel_id < NV2A_NUM_CHANNELS);
+
+    qemu_mutex_lock(&d->pfifo.lock);
+
+    uint32_t channel_modes = d->pfifo.regs[NV_PFIFO_MODE];
+
+    uint64_t r = 0;
+    if (channel_modes & (1 << channel_id)) {
+        /* DMA Mode */
+
+        unsigned int cur_channel_id =
+            GET_MASK(d->pfifo.regs[NV_PFIFO_CACHE1_PUSH1],
+                     NV_PFIFO_CACHE1_PUSH1_CHID);
+
+        if (channel_id == cur_channel_id) {
+            switch (addr & 0xFFFF) {
+            case NV_USER_DMA_PUT:
+                r = d->pfifo.regs[NV_PFIFO_CACHE1_DMA_PUT];
+                break;
+            case NV_USER_DMA_GET:
+                r = d->pfifo.regs[NV_PFIFO_CACHE1_DMA_GET];
+                break;
+            case NV_USER_REF:
+                r = d->pfifo.regs[NV_PFIFO_CACHE1_REF];
+                break;
+            default:
+                break;
+            }
+        } else {
+            /* ramfc */
+            assert(false);
+        }
+    } else {
+        /* PIO Mode */
+        assert(false);
+    }
+
+    qemu_mutex_unlock(&d->pfifo.lock);
+
+    reg_log_read(NV_USER, addr, r);
+    return r;
+}
+static void user_write(void *opaque, hwaddr addr,
+                            uint64_t val, unsigned int size)
+{
+    NV2AState *d = opaque;
+
+    reg_log_write(NV_USER, addr, val);
+
+    unsigned int channel_id = addr >> 16;
+    assert(channel_id < NV2A_NUM_CHANNELS);
+
+    qemu_mutex_lock(&d->pfifo.lock);
+
+    uint32_t channel_modes = d->pfifo.regs[NV_PFIFO_MODE];
+    if (channel_modes & (1 << channel_id)) {
+        /* DMA Mode */
+        unsigned int cur_channel_id =
+            GET_MASK(d->pfifo.regs[NV_PFIFO_CACHE1_PUSH1],
+                     NV_PFIFO_CACHE1_PUSH1_CHID);
+
+        if (channel_id == cur_channel_id) {
+            switch (addr & 0xFFFF) {
+            case NV_USER_DMA_PUT:
+                d->pfifo.regs[NV_PFIFO_CACHE1_DMA_PUT] = val;
+                break;
+            case NV_USER_DMA_GET:
+                d->pfifo.regs[NV_PFIFO_CACHE1_DMA_GET] = val;
+                break;
+            case NV_USER_REF:
+                d->pfifo.regs[NV_PFIFO_CACHE1_REF] = val;
+                break;
+            default:
+                assert(false);
+                break;
+            }
+
+            // kick pfifo
+            qemu_cond_broadcast(&d->pfifo.pusher_cond);
+            qemu_cond_broadcast(&d->pfifo.puller_cond);
+
+        } else {
+            /* ramfc */
+            assert(false);
+        }
+    } else {
+        /* PIO Mode */
+        assert(false);
+    }
+
+    qemu_mutex_unlock(&d->pfifo.lock);
+
+}
+
+
+
+
+typedef struct NV2ABlockInfo {
+    const char* name;
+    hwaddr offset;
+    uint64_t size;
+    MemoryRegionOps ops;
+} NV2ABlockInfo;
+
+static const struct NV2ABlockInfo blocktable[] = {
+    [ NV_PMC ]  = {
+        .name = "PMC",
+        .offset = 0x000000,
+        .size   = 0x001000,
+        .ops = {
+            .read = pmc_read,
+            .write = pmc_write,
+        },
+    },
+    [ NV_PBUS ]  = {
+        .name = "PBUS",
+        .offset = 0x001000,
+        .size   = 0x001000,
+        .ops = {
+            .read = pbus_read,
+            .write = pbus_write,
+        },
+    },
+    [ NV_PFIFO ]  = {
+        .name = "PFIFO",
+        .offset = 0x002000,
+        .size   = 0x002000,
+        .ops = {
+            .read = pfifo_read,
+            .write = pfifo_write,
+        },
+    },
+    [ NV_PRMA ]  = {
+        .name = "PRMA",
+        .offset = 0x007000,
+        .size   = 0x001000,
+        .ops = {
+            .read = prma_read,
+            .write = prma_write,
+        },
+    },
+    [ NV_PVIDEO ]  = {
+        .name = "PVIDEO",
+        .offset = 0x008000,
+        .size   = 0x001000,
+        .ops = {
+            .read = pvideo_read,
+            .write = pvideo_write,
+        },
+    },
+    [ NV_PTIMER ]  = {
+        .name = "PTIMER",
+        .offset = 0x009000,
+        .size   = 0x001000,
+        .ops = {
+            .read = ptimer_read,
+            .write = ptimer_write,
+        },
+    },
+    [ NV_PCOUNTER ]  = {
+        .name = "PCOUNTER",
+        .offset = 0x00a000,
+        .size   = 0x001000,
+        .ops = {
+            .read = pcounter_read,
+            .write = pcounter_write,
+        },
+    },
+    [ NV_PVPE ]  = {
+        .name = "PVPE",
+        .offset = 0x00b000,
+        .size   = 0x001000,
+        .ops = {
+            .read = pvpe_read,
+            .write = pvpe_write,
+        },
+    },
+    [ NV_PTV ]  = {
+        .name = "PTV",
+        .offset = 0x00d000,
+        .size   = 0x001000,
+        .ops = {
+            .read = ptv_read,
+            .write = ptv_write,
+        },
+    },
+    [ NV_PRMFB ]  = {
+        .name = "PRMFB",
+        .offset = 0x0a0000,
+        .size   = 0x020000,
+        .ops = {
+            .read = prmfb_read,
+            .write = prmfb_write,
+        },
+    },
+    [ NV_PRMVIO ]  = {
+        .name = "PRMVIO",
+        .offset = 0x0c0000,
+        .size   = 0x001000,
+        .ops = {
+            .read = prmvio_read,
+            .write = prmvio_write,
+        },
+    },
+    [ NV_PFB ]  = {
+        .name = "PFB",
+        .offset = 0x100000,
+        .size   = 0x001000,
+        .ops = {
+            .read = pfb_read,
+            .write = pfb_write,
+        },
+    },
+    [ NV_PSTRAPS ]  = {
+        .name = "PSTRAPS",
+        .offset = 0x101000,
+        .size   = 0x001000,
+        .ops = {
+            .read = pstraps_read,
+            .write = pstraps_write,
+        },
+    },
+    [ NV_PGRAPH ]  = {
+        .name = "PGRAPH",
+        .offset = 0x400000,
+        .size   = 0x002000,
+        .ops = {
+            .read = pgraph_read,
+            .write = pgraph_write,
+        },
+    },
+    [ NV_PCRTC ]  = {
+        .name = "PCRTC",
+        .offset = 0x600000,
+        .size   = 0x001000,
+        .ops = {
+            .read = pcrtc_read,
+            .write = pcrtc_write,
+        },
+    },
+    [ NV_PRMCIO ]  = {
+        .name = "PRMCIO",
+        .offset = 0x601000,
+        .size   = 0x001000,
+        .ops = {
+            .read = prmcio_read,
+            .write = prmcio_write,
+        },
+    },
+    [ NV_PRAMDAC ]  = {
+        .name = "PRAMDAC",
+        .offset = 0x680000,
+        .size   = 0x001000,
+        .ops = {
+            .read = pramdac_read,
+            .write = pramdac_write,
+        },
+    },
+    [ NV_PRMDIO ]  = {
+        .name = "PRMDIO",
+        .offset = 0x681000,
+        .size   = 0x001000,
+        .ops = {
+            .read = prmdio_read,
+            .write = prmdio_write,
+        },
+    },
+    /*[ NV_PRAMIN ]  = {
+        .name = "PRAMIN",
+        .offset = 0x700000,
+        .size   = 0x100000,
+        .ops = {
+            .read = pramin_read,
+            .write = pramin_write,
+        },
+    },*/
+    [ NV_USER ]  = {
+        .name = "USER",
+        .offset = 0x800000,
+        .size   = 0x800000,
+        .ops = {
+            .read = user_read,
+            .write = user_write,
+        },
+    },
+};
+
+static const char* nv2a_reg_names[] = {};
+static const char* nv2a_method_names[] = {};
+
+static void reg_log_read(int block, hwaddr addr, uint64_t val) {
+    if (blocktable[block].name) {
+        hwaddr naddr = blocktable[block].offset + addr;
+        if (naddr < ARRAY_SIZE(nv2a_reg_names) && nv2a_reg_names[naddr]) {
+            NV2A_DPRINTF("%s: read [%s] -> 0x%" PRIx64 "\n",
+                    blocktable[block].name, nv2a_reg_names[naddr], val);
+        } else {
+            NV2A_DPRINTF("%s: read [%" HWADDR_PRIx "] -> 0x%" PRIx64 "\n",
+                    blocktable[block].name, addr, val);
+        }
+    } else {
+        NV2A_DPRINTF("(%d?): read [%" HWADDR_PRIx "] -> 0x%" PRIx64 "\n",
+                block, addr, val);
+    }
+}
+
+static void reg_log_write(int block, hwaddr addr, uint64_t val) {
+    if (blocktable[block].name) {
+        hwaddr naddr = blocktable[block].offset + addr;
+        if (naddr < ARRAY_SIZE(nv2a_reg_names) && nv2a_reg_names[naddr]) {
+            NV2A_DPRINTF("%s: [%s] = 0x%" PRIx64 "\n",
+                    blocktable[block].name, nv2a_reg_names[naddr], val);
+        } else {
+            NV2A_DPRINTF("%s: [%" HWADDR_PRIx "] = 0x%" PRIx64 "\n",
+                    blocktable[block].name, addr, val);
+        }
+    } else {
+        NV2A_DPRINTF("(%d?): [%" HWADDR_PRIx "] = 0x%" PRIx64 "\n",
+                block, addr, val);
+    }
+}
+static void pgraph_method_log(unsigned int subchannel,
+                              unsigned int graphics_class,
+                              unsigned int method, uint32_t parameter) {
+    static unsigned int last = 0;
+    static unsigned int count = 0;
+    if (last == 0x1800 && method != last) {
+        NV2A_GL_DPRINTF(true, "pgraph method (%d) 0x%x * %d",
+                     subchannel, last, count);
+    }
+    if (method != 0x1800) {
+        const char* method_name = NULL;
+        unsigned int nmethod = 0;
+        switch (graphics_class) {
+            case NV_KELVIN_PRIMITIVE:
+                nmethod = method | (0x5c << 16);
+                break;
+            case NV_CONTEXT_SURFACES_2D:
+                nmethod = method | (0x6d << 16);
+                break;
+            default:
+                break;
+        }
+        if (nmethod != 0 && nmethod < ARRAY_SIZE(nv2a_method_names)) {
+            method_name = nv2a_method_names[nmethod];
+        }
+        if (method_name) {
+            NV2A_DPRINTF("pgraph method (%d): %s (0x%x)\n",
+                         subchannel, method_name, parameter);
+        } else {
+            NV2A_DPRINTF("pgraph method (%d): 0x%x -> 0x%04x (0x%x)\n",
+                         subchannel, graphics_class, method, parameter);
+        }
+
+    }
+    if (method == last) { count++; }
+    else {count = 0; }
+    last = method;
+}
+
+static void nv2a_overlay_draw_line(VGACommonState *vga, uint8_t *line, int y)
+{
+    NV2A_DPRINTF("nv2a_overlay_draw_line\n");
+
+    NV2AState *d = container_of(vga, NV2AState, vga);
+    DisplaySurface *surface = qemu_console_surface(d->vga.con);
+
+    int surf_bpp = surface_bytes_per_pixel(surface);
+    int surf_width = surface_width(surface);
+
+    if (!(d->pvideo.regs[NV_PVIDEO_BUFFER] & NV_PVIDEO_BUFFER_0_USE)) return;
+
+    hwaddr base = d->pvideo.regs[NV_PVIDEO_BASE];
+    hwaddr limit = d->pvideo.regs[NV_PVIDEO_LIMIT];
+    hwaddr offset = d->pvideo.regs[NV_PVIDEO_OFFSET];
+
+    int in_width = GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN],
+                            NV_PVIDEO_SIZE_IN_WIDTH);
+    int in_height = GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN],
+                             NV_PVIDEO_SIZE_IN_HEIGHT);
+    int in_s = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN],
+                        NV_PVIDEO_POINT_IN_S);
+    int in_t = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN],
+                        NV_PVIDEO_POINT_IN_T);
+    int in_pitch = GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT],
+                            NV_PVIDEO_FORMAT_PITCH);
+    int in_color = GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT],
+                            NV_PVIDEO_FORMAT_COLOR);
+
+    // TODO: support other color formats
+    assert(in_color == NV_PVIDEO_FORMAT_COLOR_LE_CR8YB8CB8YA8);
+
+    int out_width = GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT],
+                             NV_PVIDEO_SIZE_OUT_WIDTH);
+    int out_height = GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT],
+                             NV_PVIDEO_SIZE_OUT_HEIGHT);
+    int out_x = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT],
+                         NV_PVIDEO_POINT_OUT_X);
+    int out_y = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT],
+                         NV_PVIDEO_POINT_OUT_Y);
+
+
+    if (y < out_y || y >= out_y + out_height) return;
+
+    // TODO: scaling, color keys
+
+    int in_y = y - out_y;
+    if (in_y >= in_height) return;
+
+    assert(offset + in_pitch * (in_y + 1) <= limit);
+    uint8_t *in_line = d->vram_ptr + base + offset + in_pitch * in_y;
+
+    int x;
+    for (x=0; x<out_width; x++) {
+        int ox = out_x + x;
+        if (ox >= surf_width) break;
+        int ix = in_s + x;
+        if (ix >= in_width) break;
+
+        uint8_t r,g,b;
+        convert_yuy2_to_rgb(in_line, ix, &r, &g, &b);
+
+        unsigned int pixel = vga->rgb_to_pixel(r, g, b);
+        switch (surf_bpp) {
+        case 1:
+            ((uint8_t*)line)[ox] = pixel;
+            break;
+        case 2:
+            ((uint16_t*)line)[ox] = pixel;
+            break;
+        case 4:
+            ((uint32_t*)line)[ox] = pixel;
+            break;
+        default:
+            assert(false);
+            break;
+        }
+    }
+}
+
+static int nv2a_get_bpp(VGACommonState *s)
+{
+    if ((s->cr[0x28] & 3) == 3) {
+        return 32;
+    }
+    return (s->cr[0x28] & 3) * 8;
+}
+
+static void nv2a_get_offsets(VGACommonState *s,
+                             uint32_t *pline_offset,
+                             uint32_t *pstart_addr,
+                             uint32_t *pline_compare)
+{
+    NV2AState *d = container_of(s, NV2AState, vga);
+    uint32_t start_addr, line_offset, line_compare;
+
+    line_offset = s->cr[0x13]
+        | ((s->cr[0x19] & 0xe0) << 3)
+        | ((s->cr[0x25] & 0x20) << 6);
+    line_offset <<= 3;
+    *pline_offset = line_offset;
+
+    start_addr = d->pcrtc.start / 4;
+    *pstart_addr = start_addr;
+
+    line_compare = s->cr[VGA_CRTC_LINE_COMPARE] |
+        ((s->cr[VGA_CRTC_OVERFLOW] & 0x10) << 4) |
+        ((s->cr[VGA_CRTC_MAX_SCAN] & 0x40) << 3);
+    *pline_compare = line_compare;
+}
+
+
+static void nv2a_vga_gfx_update(void *opaque)
+{
+    VGACommonState *vga = opaque;
+    vga->hw_ops->gfx_update(vga);
+
+    NV2AState *d = container_of(vga, NV2AState, vga);
+    d->pcrtc.pending_interrupts |= NV_PCRTC_INTR_0_VBLANK;
+    update_irq(d);
+}
+
+static void nv2a_init_memory(NV2AState *d, MemoryRegion *ram)
+{
+    /* xbox is UMA - vram *is* ram */
+    d->vram = ram;
+
+     /* PCI exposed vram */
+    memory_region_init_alias(&d->vram_pci, OBJECT(d), "nv2a-vram-pci", d->vram,
+                             0, memory_region_size(d->vram));
+    pci_register_bar(&d->dev, 1, PCI_BASE_ADDRESS_MEM_PREFETCH, &d->vram_pci);
+
+
+    /* RAMIN - should be in vram somewhere, but not quite sure where atm */
+    memory_region_init_ram(&d->ramin, OBJECT(d), "nv2a-ramin", 0x100000);
+    /* memory_region_init_alias(&d->ramin, "nv2a-ramin", &d->vram,
+                         memory_region_size(&d->vram) - 0x100000,
+                         0x100000); */
+
+    memory_region_add_subregion(&d->mmio, 0x700000, &d->ramin);
+
+
+    d->vram_ptr = memory_region_get_ram_ptr(d->vram);
+    d->ramin_ptr = memory_region_get_ram_ptr(&d->ramin);
+
+    memory_region_set_log(d->vram, true, DIRTY_MEMORY_NV2A);
+    memory_region_set_dirty(d->vram, 0, memory_region_size(d->vram));
+
+    /* hacky. swap out vga's vram */
+    memory_region_destroy(&d->vga.vram);
+    memory_region_init_alias(&d->vga.vram, OBJECT(d), "vga.vram",
+                             d->vram, 0, memory_region_size(d->vram));
+    d->vga.vram_ptr = memory_region_get_ram_ptr(&d->vga.vram);
+    vga_dirty_log_start(&d->vga);
+
+
+    pgraph_init(d);
+
+    /* fire up puller */
+    qemu_thread_create(&d->pfifo.puller_thread,
+                       pfifo_puller_thread,
+                       d, QEMU_THREAD_JOINABLE);
+
+    /* fire up pusher */
+    qemu_thread_create(&d->pfifo.pusher_thread,
+                       pfifo_pusher_thread,
+                       d, QEMU_THREAD_JOINABLE);
+}
+
+static int nv2a_initfn(PCIDevice *dev)
+{
+    int i;
+    NV2AState *d;
+
+    d = NV2A_DEVICE(dev);
+
+    dev->config[PCI_INTERRUPT_PIN] = 0x01;
+
+    d->pcrtc.start = 0;
+
+    d->pramdac.core_clock_coeff = 0x00011c01; /* 189MHz...? */
+    d->pramdac.core_clock_freq = 189000000;
+    d->pramdac.memory_clock_coeff = 0;
+    d->pramdac.video_clock_coeff = 0x0003C20D; /* 25182Khz...? */
+
+
+
+    /* legacy VGA shit */
+    VGACommonState *vga = &d->vga;
+    vga->vram_size_mb = 4;
+    /* seems to start in color mode */
+    vga->msr = VGA_MIS_COLOR;
+
+    vga_common_init(vga, OBJECT(dev));
+    vga->get_bpp = nv2a_get_bpp;
+    vga->get_offsets = nv2a_get_offsets;
+    vga->overlay_draw_line = nv2a_overlay_draw_line;
+
+    d->hw_ops = *vga->hw_ops;
+    d->hw_ops.gfx_update = nv2a_vga_gfx_update;
+    vga->con = graphic_console_init(DEVICE(dev), &d->hw_ops, vga);
+
+
+    /* mmio */
+    memory_region_init(&d->mmio, OBJECT(dev), "nv2a-mmio", 0x1000000);
+    pci_register_bar(&d->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
+
+    for (i=0; i<ARRAY_SIZE(blocktable); i++) {
+        if (!blocktable[i].name) continue;
+        memory_region_init_io(&d->block_mmio[i], OBJECT(dev),
+                              &blocktable[i].ops, d,
+                              blocktable[i].name, blocktable[i].size);
+        memory_region_add_subregion(&d->mmio, blocktable[i].offset,
+                                    &d->block_mmio[i]);
+    }
+
+    qemu_mutex_init(&d->pfifo.lock);
+    qemu_cond_init(&d->pfifo.puller_cond);
+    qemu_cond_init(&d->pfifo.pusher_cond);
+
+    d->pfifo.regs[NV_PFIFO_CACHE1_STATUS] |= NV_PFIFO_CACHE1_STATUS_LOW_MARK;
+
+    return 0;
+}
+
+static void nv2a_exitfn(PCIDevice *dev)
+{
+    NV2AState *d;
+    d = NV2A_DEVICE(dev);
+
+    d->exiting = true;
+    
+    qemu_cond_broadcast(&d->pfifo.puller_cond);
+    qemu_cond_broadcast(&d->pfifo.pusher_cond);
+    qemu_thread_join(&d->pfifo.puller_thread);
+    qemu_thread_join(&d->pfifo.pusher_thread);
+
+    pgraph_destroy(&d->pgraph);
+}
+
+static void nv2a_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+    k->vendor_id = PCI_VENDOR_ID_NVIDIA;
+    k->device_id = PCI_DEVICE_ID_NVIDIA_GEFORCE_NV2A;
+    k->revision = 161;
+    k->class_id = PCI_CLASS_DISPLAY_3D;
+    k->init = nv2a_initfn;
+    k->exit = nv2a_exitfn;
+
+    dc->desc = "GeForce NV2A Integrated Graphics";
+}
+
+static const TypeInfo nv2a_info = {
+    .name          = "nv2a",
+    .parent        = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(NV2AState),
+    .class_init    = nv2a_class_init,
+};
+
+static void nv2a_register(void)
+{
+    type_register_static(&nv2a_info);
+}
+type_init(nv2a_register);
+
+void nv2a_init(PCIBus *bus, int devfn, MemoryRegion *ram)
+{
+    PCIDevice *dev = pci_create_simple(bus, devfn, "nv2a");
+    NV2AState *d = NV2A_DEVICE(dev);
+    nv2a_init_memory(d, ram);
+}
diff --git a/hw/xbox/nv2a.h b/hw/xbox/nv2a.h
new file mode 100644
index 0000000000..24b665aad7
--- /dev/null
+++ b/hw/xbox/nv2a.h
@@ -0,0 +1,25 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_NV2A_H
+#define HW_NV2A_H
+
+void nv2a_init(PCIBus *bus, int devfn, MemoryRegion *ram);
+
+#endif
diff --git a/hw/xbox/nv2a_debug.c b/hw/xbox/nv2a_debug.c
new file mode 100644
index 0000000000..7e1b38907e
--- /dev/null
+++ b/hw/xbox/nv2a_debug.c
@@ -0,0 +1,94 @@
+/*
+ * QEMU Geforce NV2A debug helpers
+ *
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2012 espes
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/xbox/nv2a_debug.h"
+
+#ifdef DEBUG_NV2A_GL
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <assert.h>
+
+#include "gl/glextensions.h"
+
+void gl_debug_message(bool cc, const char *fmt, ...)
+{
+    size_t n;
+    char buffer[1024];
+    va_list ap;
+    va_start(ap, fmt);
+    n = vsnprintf(buffer, sizeof(buffer), fmt, ap);
+    assert(n <= sizeof(buffer));
+    va_end(ap);
+
+    if(glDebugMessageInsert) {
+        glDebugMessageInsert(GL_DEBUG_SOURCE_APPLICATION, GL_DEBUG_TYPE_MARKER,
+                             0, GL_DEBUG_SEVERITY_NOTIFICATION, n, buffer);
+    }
+    if (cc) {
+        fwrite(buffer, sizeof(char), n, stdout);
+        fputc('\n', stdout);
+    }
+}
+
+void gl_debug_group_begin(const char *fmt, ...)
+{
+    size_t n;
+    char buffer[1024];
+    va_list ap;
+    va_start(ap, fmt);
+    n = vsnprintf(buffer, sizeof(buffer), fmt, ap);
+    assert(n <= sizeof(buffer));
+    va_end(ap);
+
+    /* Check for errors before entering group */
+    assert(glGetError() == GL_NO_ERROR);
+
+    if (glPushDebugGroup) {
+        glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, n, buffer);
+    }
+}
+
+void gl_debug_group_end(void)
+{
+    /* Check for errors when leaving group */
+    assert(glGetError() == GL_NO_ERROR);
+
+    if (glPopDebugGroup) {
+        glPopDebugGroup();
+    }
+}
+
+void gl_debug_label(GLenum target, GLuint name, const char *fmt, ...)
+{
+    size_t n;
+    char buffer[1024];
+    va_list ap;
+    va_start(ap, fmt);
+    n = vsnprintf(buffer, sizeof(buffer), fmt, ap);
+    assert(n <= sizeof(buffer));
+    va_end(ap);
+
+    if (glObjectLabel) {
+        glObjectLabel(target, name, n, buffer);
+    }
+}
+
+#endif
diff --git a/hw/xbox/nv2a_debug.h b/hw/xbox/nv2a_debug.h
new file mode 100644
index 0000000000..dab2be6427
--- /dev/null
+++ b/hw/xbox/nv2a_debug.h
@@ -0,0 +1,60 @@
+/*
+ * QEMU Geforce NV2A debug helpers
+ *
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2012 espes
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_NV2A_DEBUG_H
+#define HW_NV2A_DEBUG_H
+
+// #define DEBUG_NV2A
+#ifdef DEBUG_NV2A
+# define NV2A_DPRINTF(format, ...)       printf("nv2a: " format, ## __VA_ARGS__)
+#else
+# define NV2A_DPRINTF(format, ...)       do { } while (0)
+#endif
+
+// #define DEBUG_NV2A_GL
+#ifdef DEBUG_NV2A_GL
+
+#include <stdbool.h>
+#include "gl/gloffscreen.h"
+
+void gl_debug_message(bool cc, const char *fmt, ...);
+void gl_debug_group_begin(const char *fmt, ...);
+void gl_debug_group_end(void);
+void gl_debug_label(GLenum target, GLuint name, const char *fmt, ...);
+
+# define NV2A_GL_DPRINTF(cc, format, ...) \
+    gl_debug_message(cc, "nv2a: " format, ## __VA_ARGS__)
+# define NV2A_GL_DGROUP_BEGIN(format, ...) \
+    gl_debug_group_begin("nv2a: " format, ## __VA_ARGS__)
+# define NV2A_GL_DGROUP_END() \
+    gl_debug_group_end()
+# define NV2A_GL_DLABEL(target, name, format, ...)  \
+    gl_debug_label(target, name, "nv2a: { " format " }", ## __VA_ARGS__)
+
+#else
+# define NV2A_GL_DPRINTF(cc, format, ...)          do { \
+        if (cc) NV2A_DPRINTF(format "\n", ##__VA_ARGS__ ); \
+    } while (0)
+# define NV2A_GL_DGROUP_BEGIN(format, ...)         do { } while (0)
+# define NV2A_GL_DGROUP_END()                      do { } while (0)
+# define NV2A_GL_DLABEL(target, name, format, ...) do { } while (0)
+#endif
+
+#endif
diff --git a/hw/xbox/nv2a_int.h b/hw/xbox/nv2a_int.h
new file mode 100644
index 0000000000..570f0f18e7
--- /dev/null
+++ b/hw/xbox/nv2a_int.h
@@ -0,0 +1,1284 @@
+/*
+ * QEMU Geforce NV2A internal definitions
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define NV_NUM_BLOCKS 21
+#define NV_PMC          0   /* card master control */
+#define NV_PBUS         1   /* bus control */
+#define NV_PFIFO        2   /* MMIO and DMA FIFO submission to PGRAPH and VPE */
+#define NV_PFIFO_CACHE  3
+#define NV_PRMA         4   /* access to BAR0/BAR1 from real mode */
+#define NV_PVIDEO       5   /* video overlay */
+#define NV_PTIMER       6   /* time measurement and time-based alarms */
+#define NV_PCOUNTER     7   /* performance monitoring counters */
+#define NV_PVPE         8   /* MPEG2 decoding engine */
+#define NV_PTV          9   /* TV encoder */
+#define NV_PRMFB        10  /* aliases VGA memory window */
+#define NV_PRMVIO       11  /* aliases VGA sequencer and graphics controller registers */
+#define NV_PFB          12  /* memory interface */
+#define NV_PSTRAPS      13  /* straps readout / override */
+#define NV_PGRAPH       14  /* accelerated 2d/3d drawing engine */
+#define NV_PCRTC        15  /* more CRTC controls */
+#define NV_PRMCIO       16  /* aliases VGA CRTC and attribute controller registers */
+#define NV_PRAMDAC      17  /* RAMDAC, cursor, and PLL control */
+#define NV_PRMDIO       18  /* aliases VGA palette registers */
+#define NV_PRAMIN       19  /* RAMIN access */
+#define NV_USER         20  /* PFIFO MMIO and DMA submission area */
+
+#define NV_PMC_BOOT_0                                    0x00000000
+#define NV_PMC_INTR_0                                    0x00000100
+#   define NV_PMC_INTR_0_PFIFO                                 (1 << 8)
+#   define NV_PMC_INTR_0_PGRAPH                               (1 << 12)
+#   define NV_PMC_INTR_0_PCRTC                                (1 << 24)
+#   define NV_PMC_INTR_0_PBUS                                 (1 << 28)
+#   define NV_PMC_INTR_0_SOFTWARE                             (1 << 31)
+#define NV_PMC_INTR_EN_0                                 0x00000140
+#   define NV_PMC_INTR_EN_0_HARDWARE                            1
+#   define NV_PMC_INTR_EN_0_SOFTWARE                            2
+#define NV_PMC_ENABLE                                    0x00000200
+#   define NV_PMC_ENABLE_PFIFO                                 (1 << 8)
+#   define NV_PMC_ENABLE_PGRAPH                               (1 << 12)
+
+
+/* These map approximately to the pci registers */
+#define NV_PBUS_PCI_NV_0                                 0x00000800
+#   define NV_PBUS_PCI_NV_0_VENDOR_ID                         0x0000FFFF
+#   define NV_CONFIG_PCI_NV_0_DEVICE_ID                       0xFFFF0000
+#define NV_PBUS_PCI_NV_1                                 0x00000804
+#define NV_PBUS_PCI_NV_2                                 0x00000808
+#   define NV_PBUS_PCI_NV_2_REVISION_ID                       0x000000FF
+#   define NV_PBUS_PCI_NV_2_CLASS_CODE                        0xFFFFFF00
+
+
+#define NV_PFIFO_INTR_0                                  0x00000100
+#   define NV_PFIFO_INTR_0_CACHE_ERROR                          (1 << 0)
+#   define NV_PFIFO_INTR_0_RUNOUT                               (1 << 4)
+#   define NV_PFIFO_INTR_0_RUNOUT_OVERFLOW                      (1 << 8)
+#   define NV_PFIFO_INTR_0_DMA_PUSHER                          (1 << 12)
+#   define NV_PFIFO_INTR_0_DMA_PT                              (1 << 16)
+#   define NV_PFIFO_INTR_0_SEMAPHORE                           (1 << 20)
+#   define NV_PFIFO_INTR_0_ACQUIRE_TIMEOUT                     (1 << 24)
+#define NV_PFIFO_INTR_EN_0                               0x00000140
+#   define NV_PFIFO_INTR_EN_0_CACHE_ERROR                       (1 << 0)
+#   define NV_PFIFO_INTR_EN_0_RUNOUT                            (1 << 4)
+#   define NV_PFIFO_INTR_EN_0_RUNOUT_OVERFLOW                   (1 << 8)
+#   define NV_PFIFO_INTR_EN_0_DMA_PUSHER                       (1 << 12)
+#   define NV_PFIFO_INTR_EN_0_DMA_PT                           (1 << 16)
+#   define NV_PFIFO_INTR_EN_0_SEMAPHORE                        (1 << 20)
+#   define NV_PFIFO_INTR_EN_0_ACQUIRE_TIMEOUT                  (1 << 24)
+#define NV_PFIFO_RAMHT                                   0x00000210
+#   define NV_PFIFO_RAMHT_BASE_ADDRESS                        0x000001F0
+#   define NV_PFIFO_RAMHT_SIZE                                0x00030000
+#       define NV_PFIFO_RAMHT_SIZE_4K                             0
+#       define NV_PFIFO_RAMHT_SIZE_8K                             1
+#       define NV_PFIFO_RAMHT_SIZE_16K                            2
+#       define NV_PFIFO_RAMHT_SIZE_32K                            3
+#   define NV_PFIFO_RAMHT_SEARCH                              0x03000000
+#       define NV_PFIFO_RAMHT_SEARCH_16                           0
+#       define NV_PFIFO_RAMHT_SEARCH_32                           1
+#       define NV_PFIFO_RAMHT_SEARCH_64                           2
+#       define NV_PFIFO_RAMHT_SEARCH_128                          3
+#define NV_PFIFO_RAMFC                                   0x00000214
+#   define NV_PFIFO_RAMFC_BASE_ADDRESS1                       0x000001FC
+#   define NV_PFIFO_RAMFC_SIZE                                0x00010000
+#   define NV_PFIFO_RAMFC_BASE_ADDRESS2                       0x00FE0000
+#define NV_PFIFO_RAMRO                                   0x00000218
+#   define NV_PFIFO_RAMRO_BASE_ADDRESS                        0x000001FE
+#   define NV_PFIFO_RAMRO_SIZE                                0x00010000
+#define NV_PFIFO_RUNOUT_STATUS                           0x00000400
+#   define NV_PFIFO_RUNOUT_STATUS_RANOUT                       (1 << 0)
+#   define NV_PFIFO_RUNOUT_STATUS_LOW_MARK                     (1 << 4)
+#   define NV_PFIFO_RUNOUT_STATUS_HIGH_MARK                    (1 << 8)
+#define NV_PFIFO_MODE                                    0x00000504
+#define NV_PFIFO_DMA                                     0x00000508
+#define NV_PFIFO_CACHE1_PUSH0                            0x00001200
+#   define NV_PFIFO_CACHE1_PUSH0_ACCESS                         (1 << 0)
+#define NV_PFIFO_CACHE1_PUSH1                            0x00001204
+#   define NV_PFIFO_CACHE1_PUSH1_CHID                         0x0000001F
+#   define NV_PFIFO_CACHE1_PUSH1_MODE                         0x00000100
+#       define NV_PFIFO_CACHE1_PUSH1_MODE_PIO                     0
+#       define NV_PFIFO_CACHE1_PUSH1_MODE_DMA                     1
+#define NV_PFIFO_CACHE1_PUT                              0x00001210
+#define NV_PFIFO_CACHE1_STATUS                           0x00001214
+#   define NV_PFIFO_CACHE1_STATUS_LOW_MARK                      (1 << 4)
+#   define NV_PFIFO_CACHE1_STATUS_HIGH_MARK                     (1 << 8)
+#define NV_PFIFO_CACHE1_DMA_PUSH                         0x00001220
+#   define NV_PFIFO_CACHE1_DMA_PUSH_ACCESS                      (1 << 0)
+#   define NV_PFIFO_CACHE1_DMA_PUSH_STATE                       (1 << 4)
+#   define NV_PFIFO_CACHE1_DMA_PUSH_BUFFER                      (1 << 8)
+#   define NV_PFIFO_CACHE1_DMA_PUSH_STATUS                     (1 << 12)
+#   define NV_PFIFO_CACHE1_DMA_PUSH_ACQUIRE                    (1 << 16)
+#define NV_PFIFO_CACHE1_DMA_FETCH                        0x00001224
+#   define NV_PFIFO_CACHE1_DMA_FETCH_TRIG                     0x000000F8
+#   define NV_PFIFO_CACHE1_DMA_FETCH_SIZE                     0x0000E000
+#   define NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS                 0x001F0000
+#define NV_PFIFO_CACHE1_DMA_STATE                        0x00001228
+#   define NV_PFIFO_CACHE1_DMA_STATE_METHOD_TYPE                (1 << 0)
+#       define NV_PFIFO_CACHE1_DMA_STATE_METHOD_TYPE_INC          0
+#       define NV_PFIFO_CACHE1_DMA_STATE_METHOD_TYPE_NON_INC      1
+#   define NV_PFIFO_CACHE1_DMA_STATE_METHOD                   0x00001FFC
+#   define NV_PFIFO_CACHE1_DMA_STATE_SUBCHANNEL               0x0000E000
+#   define NV_PFIFO_CACHE1_DMA_STATE_METHOD_COUNT             0x1FFC0000
+#   define NV_PFIFO_CACHE1_DMA_STATE_ERROR                    0xE0000000
+#       define NV_PFIFO_CACHE1_DMA_STATE_ERROR_NONE               0
+#       define NV_PFIFO_CACHE1_DMA_STATE_ERROR_CALL               1
+#       define NV_PFIFO_CACHE1_DMA_STATE_ERROR_NON_CACHE          2
+#       define NV_PFIFO_CACHE1_DMA_STATE_ERROR_RETURN             3
+#       define NV_PFIFO_CACHE1_DMA_STATE_ERROR_RESERVED_CMD       4
+#       define NV_PFIFO_CACHE1_DMA_STATE_ERROR_PROTECTION         6
+#define NV_PFIFO_CACHE1_DMA_INSTANCE                     0x0000122C
+#   define NV_PFIFO_CACHE1_DMA_INSTANCE_ADDRESS               0x0000FFFF
+#define NV_PFIFO_CACHE1_DMA_PUT                          0x00001240
+#define NV_PFIFO_CACHE1_DMA_GET                          0x00001244
+#define NV_PFIFO_CACHE1_REF                              0x00001248
+#define NV_PFIFO_CACHE1_DMA_SUBROUTINE                   0x0000124C
+#   define NV_PFIFO_CACHE1_DMA_SUBROUTINE_RETURN_OFFSET       0x1FFFFFFC
+#   define NV_PFIFO_CACHE1_DMA_SUBROUTINE_STATE                (1 << 0)
+#define NV_PFIFO_CACHE1_PULL0                            0x00001250
+#   define NV_PFIFO_CACHE1_PULL0_ACCESS                        (1 << 0)
+#define NV_PFIFO_CACHE1_PULL1                            0x00001254
+#   define NV_PFIFO_CACHE1_PULL1_ENGINE                       0x00000003
+#define NV_PFIFO_CACHE1_GET                              0x00001270
+#define NV_PFIFO_CACHE1_ENGINE                           0x00001280
+#define NV_PFIFO_CACHE1_DMA_DCOUNT                       0x000012A0
+#   define NV_PFIFO_CACHE1_DMA_DCOUNT_VALUE                   0x00001FFC
+#define NV_PFIFO_CACHE1_DMA_GET_JMP_SHADOW               0x000012A4
+#   define NV_PFIFO_CACHE1_DMA_GET_JMP_SHADOW_OFFSET          0x1FFFFFFC
+#define NV_PFIFO_CACHE1_DMA_RSVD_SHADOW                  0x000012A8
+#define NV_PFIFO_CACHE1_DMA_DATA_SHADOW                  0x000012AC
+#define NV_PFIFO_CACHE1_METHOD                           0x00001800
+#   define NV_PFIFO_CACHE1_METHOD_TYPE                         (1 << 0)
+#   define NV_PFIFO_CACHE1_METHOD_ADDRESS                     0x00001FFC
+#   define NV_PFIFO_CACHE1_METHOD_SUBCHANNEL                  0x0000E000
+#define NV_PFIFO_CACHE1_DATA                             0x00001804
+
+#define NV_PGRAPH_DEBUG_3                                0x0000008C
+#   define NV_PGRAPH_DEBUG_3_HW_CONTEXT_SWITCH                (1 << 2)
+#define NV_PGRAPH_INTR                                   0x00000100
+#   define NV_PGRAPH_INTR_NOTIFY                              (1 << 0)
+#   define NV_PGRAPH_INTR_MISSING_HW                          (1 << 4)
+#   define NV_PGRAPH_INTR_TLB_PRESENT_DMA_R                   (1 << 6)
+#   define NV_PGRAPH_INTR_TLB_PRESENT_DMA_W                   (1 << 7)
+#   define NV_PGRAPH_INTR_TLB_PRESENT_TEX_A                   (1 << 8)
+#   define NV_PGRAPH_INTR_TLB_PRESENT_TEX_B                   (1 << 9)
+#   define NV_PGRAPH_INTR_TLB_PRESENT_VTX                    (1 << 10)
+#   define NV_PGRAPH_INTR_CONTEXT_SWITCH                     (1 << 12)
+#   define NV_PGRAPH_INTR_STATE3D                            (1 << 13)
+#   define NV_PGRAPH_INTR_BUFFER_NOTIFY                      (1 << 16)
+#   define NV_PGRAPH_INTR_ERROR                              (1 << 20)
+#   define NV_PGRAPH_INTR_SINGLE_STEP                        (1 << 24)
+#define NV_PGRAPH_NSOURCE                                0x00000108
+#   define NV_PGRAPH_NSOURCE_NOTIFICATION                     (1 << 0)
+#define NV_PGRAPH_INTR_EN                                0x00000140
+#   define NV_PGRAPH_INTR_EN_NOTIFY                           (1 << 0)
+#   define NV_PGRAPH_INTR_EN_MISSING_HW                       (1 << 4)
+#   define NV_PGRAPH_INTR_EN_TLB_PRESENT_DMA_R                (1 << 6)
+#   define NV_PGRAPH_INTR_EN_TLB_PRESENT_DMA_W                (1 << 7)
+#   define NV_PGRAPH_INTR_EN_TLB_PRESENT_TEX_A                (1 << 8)
+#   define NV_PGRAPH_INTR_EN_TLB_PRESENT_TEX_B                (1 << 9)
+#   define NV_PGRAPH_INTR_EN_TLB_PRESENT_VTX                 (1 << 10)
+#   define NV_PGRAPH_INTR_EN_CONTEXT_SWITCH                  (1 << 12)
+#   define NV_PGRAPH_INTR_EN_STATE3D                         (1 << 13)
+#   define NV_PGRAPH_INTR_EN_BUFFER_NOTIFY                   (1 << 16)
+#   define NV_PGRAPH_INTR_EN_ERROR                           (1 << 20)
+#   define NV_PGRAPH_INTR_EN_SINGLE_STEP                     (1 << 24)
+#define NV_PGRAPH_CTX_CONTROL                            0x00000144
+#   define NV_PGRAPH_CTX_CONTROL_MINIMUM_TIME                 0x00000003
+#   define NV_PGRAPH_CTX_CONTROL_TIME                           (1 << 8)
+#   define NV_PGRAPH_CTX_CONTROL_CHID                          (1 << 16)
+#   define NV_PGRAPH_CTX_CONTROL_CHANGE                        (1 << 20)
+#   define NV_PGRAPH_CTX_CONTROL_SWITCHING                     (1 << 24)
+#   define NV_PGRAPH_CTX_CONTROL_DEVICE                        (1 << 28)
+#define NV_PGRAPH_CTX_USER                               0x00000148
+#   define NV_PGRAPH_CTX_USER_CHANNEL_3D                        (1 << 0)
+#   define NV_PGRAPH_CTX_USER_CHANNEL_3D_VALID                  (1 << 4)
+#   define NV_PGRAPH_CTX_USER_SUBCH                           0x0000E000
+#   define NV_PGRAPH_CTX_USER_CHID                            0x1F000000
+#   define NV_PGRAPH_CTX_USER_SINGLE_STEP                      (1 << 31)
+#define NV_PGRAPH_CTX_SWITCH1                            0x0000014C
+#   define NV_PGRAPH_CTX_SWITCH1_GRCLASS                      0x000000FF
+#   define NV_PGRAPH_CTX_SWITCH1_CHROMA_KEY                    (1 << 12)
+#   define NV_PGRAPH_CTX_SWITCH1_SWIZZLE                       (1 << 14)
+#   define NV_PGRAPH_CTX_SWITCH1_PATCH_CONFIG                 0x00038000
+#   define NV_PGRAPH_CTX_SWITCH1_SYNCHRONIZE                   (1 << 18)
+#   define NV_PGRAPH_CTX_SWITCH1_ENDIAN_MODE                   (1 << 19)
+#   define NV_PGRAPH_CTX_SWITCH1_CLASS_TYPE                    (1 << 22)
+#   define NV_PGRAPH_CTX_SWITCH1_SINGLE_STEP                   (1 << 23)
+#   define NV_PGRAPH_CTX_SWITCH1_PATCH_STATUS                  (1 << 24)
+#   define NV_PGRAPH_CTX_SWITCH1_CONTEXT_SURFACE0              (1 << 25)
+#   define NV_PGRAPH_CTX_SWITCH1_CONTEXT_SURFACE1              (1 << 26)
+#   define NV_PGRAPH_CTX_SWITCH1_CONTEXT_PATTERN               (1 << 27)
+#   define NV_PGRAPH_CTX_SWITCH1_CONTEXT_ROP                   (1 << 28)
+#   define NV_PGRAPH_CTX_SWITCH1_CONTEXT_BETA1                 (1 << 29)
+#   define NV_PGRAPH_CTX_SWITCH1_CONTEXT_BETA4                 (1 << 30)
+#   define NV_PGRAPH_CTX_SWITCH1_VOLATILE_RESET                (1 << 31)
+#define NV_PGRAPH_CTX_SWITCH2                            0x00000150
+#define NV_PGRAPH_CTX_SWITCH3                            0x00000154
+#define NV_PGRAPH_CTX_SWITCH4                            0x00000158
+#   define NV_PGRAPH_CTX_SWITCH4_USER_INSTANCE                0x0000FFFF
+#define NV_PGRAPH_CTX_SWITCH5                            0x0000015C
+#define NV_PGRAPH_CTX_CACHE1                             0x00000160
+#define NV_PGRAPH_CTX_CACHE2                             0x00000180
+#define NV_PGRAPH_CTX_CACHE3                             0x000001A0
+#define NV_PGRAPH_CTX_CACHE4                             0x000001C0
+#define NV_PGRAPH_CTX_CACHE5                             0x000001E0
+#define NV_PGRAPH_TRAPPED_ADDR                           0x00000704
+#   define NV_PGRAPH_TRAPPED_ADDR_MTHD                        0x00001FFF
+#   define NV_PGRAPH_TRAPPED_ADDR_SUBCH                       0x00070000
+#   define NV_PGRAPH_TRAPPED_ADDR_CHID                        0x01F00000
+#   define NV_PGRAPH_TRAPPED_ADDR_DHV                         0x10000000
+#define NV_PGRAPH_TRAPPED_DATA_LOW                       0x00000708
+#define NV_PGRAPH_SURFACE                                0x00000710
+#   define NV_PGRAPH_SURFACE_WRITE_3D                         0x00700000
+#   define NV_PGRAPH_SURFACE_READ_3D                          0x07000000
+#   define NV_PGRAPH_SURFACE_MODULO_3D                        0x70000000
+#define NV_PGRAPH_INCREMENT                              0x0000071C
+#   define NV_PGRAPH_INCREMENT_READ_BLIT                        (1 << 0)
+#   define NV_PGRAPH_INCREMENT_READ_3D                          (1 << 1)
+#define NV_PGRAPH_FIFO                                   0x00000720
+#   define NV_PGRAPH_FIFO_ACCESS                                (1 << 0)
+#define NV_PGRAPH_CHANNEL_CTX_TABLE                      0x00000780
+#   define NV_PGRAPH_CHANNEL_CTX_TABLE_INST                   0x0000FFFF
+#define NV_PGRAPH_CHANNEL_CTX_POINTER                    0x00000784
+#   define NV_PGRAPH_CHANNEL_CTX_POINTER_INST                 0x0000FFFF
+#define NV_PGRAPH_CHANNEL_CTX_TRIGGER                    0x00000788
+#   define NV_PGRAPH_CHANNEL_CTX_TRIGGER_READ_IN                (1 << 0)
+#   define NV_PGRAPH_CHANNEL_CTX_TRIGGER_WRITE_OUT              (1 << 1)
+#define NV_PGRAPH_CSV0_D                                 0x00000FB4
+#   define NV_PGRAPH_CSV0_D_LIGHTS                              0x0000FFFF
+#   define NV_PGRAPH_CSV0_D_LIGHT0                              0x00000003
+#       define NV_PGRAPH_CSV0_D_LIGHT0_OFF                          0
+#       define NV_PGRAPH_CSV0_D_LIGHT0_INFINITE                     1
+#       define NV_PGRAPH_CSV0_D_LIGHT0_LOCAL                        2
+#       define NV_PGRAPH_CSV0_D_LIGHT0_SPOT                         3
+#   define NV_PGRAPH_CSV0_D_RANGE_MODE                          (1 << 18)
+#   define NV_PGRAPH_CSV0_D_FOGENABLE                           (1 << 19)
+#   define NV_PGRAPH_CSV0_D_TEXGEN_REF                          (1 << 20)
+#       define NV_PGRAPH_CSV0_D_TEXGEN_REF_LOCAL_VIEWER             0
+#       define NV_PGRAPH_CSV0_D_TEXGEN_REF_INFINITE_VIEWER          1
+#   define NV_PGRAPH_CSV0_D_FOG_MODE                            (1 << 21)
+#       define NV_PGRAPH_CSV0_D_FOG_MODE_LINEAR                     0
+#       define NV_PGRAPH_CSV0_D_FOG_MODE_EXP                        1
+#   define NV_PGRAPH_CSV0_D_FOGGENMODE                          0x01C00000
+#       define NV_PGRAPH_CSV0_D_FOGGENMODE_SPEC_ALPHA               0
+#       define NV_PGRAPH_CSV0_D_FOGGENMODE_RADIAL                   1
+#       define NV_PGRAPH_CSV0_D_FOGGENMODE_PLANAR                   2
+#       define NV_PGRAPH_CSV0_D_FOGGENMODE_ABS_PLANAR               3
+#       define NV_PGRAPH_CSV0_D_FOGGENMODE_FOG_X                    4
+#   define NV_PGRAPH_CSV0_D_MODE                                0xC0000000
+#   define NV_PGRAPH_CSV0_D_SKIN                                0x1C000000
+#       define NV_PGRAPH_CSV0_D_SKIN_OFF                            0
+#       define NV_PGRAPH_CSV0_D_SKIN_2G                             1
+#       define NV_PGRAPH_CSV0_D_SKIN_2                              2
+#       define NV_PGRAPH_CSV0_D_SKIN_3G                             3
+#       define NV_PGRAPH_CSV0_D_SKIN_3                              4
+#       define NV_PGRAPH_CSV0_D_SKIN_4G                             5
+#       define NV_PGRAPH_CSV0_D_SKIN_4                              6
+#define NV_PGRAPH_CSV0_C                                 0x00000FB8
+#   define NV_PGRAPH_CSV0_C_CHEOPS_PROGRAM_START                0x0000FF00
+#   define NV_PGRAPH_CSV0_C_NORMALIZATION_ENABLE                (1 << 27)
+#   define NV_PGRAPH_CSV0_C_LIGHTING                            (1 << 31)
+#define NV_PGRAPH_CSV1_B                                 0x00000FBC
+#define NV_PGRAPH_CSV1_A                                 0x00000FC0
+#   define NV_PGRAPH_CSV1_A_T0_ENABLE                           (1 << 0)
+#   define NV_PGRAPH_CSV1_A_T0_MODE                             (1 << 1)
+#   define NV_PGRAPH_CSV1_A_T0_TEXTURE                          (1 << 2)
+#       define NV_PGRAPH_CSV1_A_T0_TEXTURE_2D                       0
+#       define NV_PGRAPH_CSV1_A_T0_TEXTURE_3D                       1
+#   define NV_PGRAPH_CSV1_A_T0_S                                0x00000070
+#       define NV_PGRAPH_CSV1_A_T0_S_DISABLE                        0
+#       define NV_PGRAPH_CSV1_A_T0_S_NORMAL_MAP                     4
+#       define NV_PGRAPH_CSV1_A_T0_S_REFLECTION_MAP                 5
+#       define NV_PGRAPH_CSV1_A_T0_S_EYE_LINEAR                     1
+#       define NV_PGRAPH_CSV1_A_T0_S_OBJECT_LINEAR                  2
+#       define NV_PGRAPH_CSV1_A_T0_S_SPHERE_MAP                     3
+#   define NV_PGRAPH_CSV1_A_T0_T                                0x00000380
+#   define NV_PGRAPH_CSV1_A_T0_R                                0x00001C00
+#   define NV_PGRAPH_CSV1_A_T0_Q                                0x0000E000
+#   define NV_PGRAPH_CSV1_A_T1_ENABLE                           (1 << 16)
+#   define NV_PGRAPH_CSV1_A_T1_MODE                             (1 << 17)
+#   define NV_PGRAPH_CSV1_A_T1_TEXTURE                          (1 << 18)
+#   define NV_PGRAPH_CSV1_A_T1_S                                0x00700000
+#   define NV_PGRAPH_CSV1_A_T1_T                                0x03800000
+#   define NV_PGRAPH_CSV1_A_T1_R                                0x1C000000
+#   define NV_PGRAPH_CSV1_A_T1_Q                                0xE0000000
+#define NV_PGRAPH_CHEOPS_OFFSET                          0x00000FC4
+#   define NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR                  0x000000FF
+#   define NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR                 0x0000FF00
+#define NV_PGRAPH_DMA_STATE                              0x00001034
+#define NV_PGRAPH_BLEND                                  0x00001804
+#   define NV_PGRAPH_BLEND_EQN                                  0x00000007
+#   define NV_PGRAPH_BLEND_EN                                   (1 << 3)
+#   define NV_PGRAPH_BLEND_SFACTOR                              0x000000F0
+#       define NV_PGRAPH_BLEND_SFACTOR_ZERO                         0
+#       define NV_PGRAPH_BLEND_SFACTOR_ONE                          1
+#       define NV_PGRAPH_BLEND_SFACTOR_SRC_COLOR                    2
+#       define NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_SRC_COLOR          3
+#       define NV_PGRAPH_BLEND_SFACTOR_SRC_ALPHA                    4
+#       define NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_SRC_ALPHA          5
+#       define NV_PGRAPH_BLEND_SFACTOR_DST_ALPHA                    6
+#       define NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_DST_ALPHA          7
+#       define NV_PGRAPH_BLEND_SFACTOR_DST_COLOR                    8
+#       define NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_DST_COLOR          9
+#       define NV_PGRAPH_BLEND_SFACTOR_SRC_ALPHA_SATURATE           10
+#       define NV_PGRAPH_BLEND_SFACTOR_CONSTANT_COLOR               12
+#       define NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_CONSTANT_COLOR     13
+#       define NV_PGRAPH_BLEND_SFACTOR_CONSTANT_ALPHA               14
+#       define NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_CONSTANT_ALPHA     15
+#   define NV_PGRAPH_BLEND_DFACTOR                              0x00000F00
+#       define NV_PGRAPH_BLEND_DFACTOR_ZERO                         0
+#       define NV_PGRAPH_BLEND_DFACTOR_ONE                          1
+#       define NV_PGRAPH_BLEND_DFACTOR_SRC_COLOR                    2
+#       define NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_SRC_COLOR          3
+#       define NV_PGRAPH_BLEND_DFACTOR_SRC_ALPHA                    4
+#       define NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_SRC_ALPHA          5
+#       define NV_PGRAPH_BLEND_DFACTOR_DST_ALPHA                    6
+#       define NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_DST_ALPHA          7
+#       define NV_PGRAPH_BLEND_DFACTOR_DST_COLOR                    8
+#       define NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_DST_COLOR          9
+#       define NV_PGRAPH_BLEND_DFACTOR_SRC_ALPHA_SATURATE           10
+#       define NV_PGRAPH_BLEND_DFACTOR_CONSTANT_COLOR               12
+#       define NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_CONSTANT_COLOR     13
+#       define NV_PGRAPH_BLEND_DFACTOR_CONSTANT_ALPHA               14
+#       define NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_CONSTANT_ALPHA     15
+#   define NV_PGRAPH_BLEND_LOGICOP_ENABLE                       (1 << 16)
+#   define NV_PGRAPH_BLEND_LOGICOP                              0x0000F000
+#define NV_PGRAPH_BLENDCOLOR                             0x00001808
+#define NV_PGRAPH_BORDERCOLOR0                           0x0000180C
+#define NV_PGRAPH_BORDERCOLOR1                           0x00001810
+#define NV_PGRAPH_BORDERCOLOR2                           0x00001814
+#define NV_PGRAPH_BORDERCOLOR3                           0x00001818
+#define NV_PGRAPH_BUMPOFFSET1                            0x0000184C
+#define NV_PGRAPH_BUMPSCALE1                             0x00001858
+#define NV_PGRAPH_CLEARRECTX                             0x00001864
+#       define NV_PGRAPH_CLEARRECTX_XMIN                          0x00000FFF
+#       define NV_PGRAPH_CLEARRECTX_XMAX                          0x0FFF0000
+#define NV_PGRAPH_CLEARRECTY                             0x00001868
+#       define NV_PGRAPH_CLEARRECTY_YMIN                          0x00000FFF
+#       define NV_PGRAPH_CLEARRECTY_YMAX                          0x0FFF0000
+#define NV_PGRAPH_COLORCLEARVALUE                        0x0000186C
+#define NV_PGRAPH_COMBINEFACTOR0                         0x00001880
+#define NV_PGRAPH_COMBINEFACTOR1                         0x000018A0
+#define NV_PGRAPH_COMBINEALPHAI0                         0x000018C0
+#define NV_PGRAPH_COMBINEALPHAO0                         0x000018E0
+#define NV_PGRAPH_COMBINECOLORI0                         0x00001900
+#define NV_PGRAPH_COMBINECOLORO0                         0x00001920
+#define NV_PGRAPH_COMBINECTL                             0x00001940
+#define NV_PGRAPH_COMBINESPECFOG0                        0x00001944
+#define NV_PGRAPH_COMBINESPECFOG1                        0x00001948
+#define NV_PGRAPH_CONTROL_0                              0x0000194C
+#   define NV_PGRAPH_CONTROL_0_ALPHAREF                         0x000000FF
+#   define NV_PGRAPH_CONTROL_0_ALPHAFUNC                        0x00000F00
+#   define NV_PGRAPH_CONTROL_0_ALPHATESTENABLE                  (1 << 12)
+#   define NV_PGRAPH_CONTROL_0_ZENABLE                          (1 << 14)
+#   define NV_PGRAPH_CONTROL_0_ZFUNC                            0x000F0000
+#       define NV_PGRAPH_CONTROL_0_ZFUNC_NEVER                      0
+#       define NV_PGRAPH_CONTROL_0_ZFUNC_LESS                       1
+#       define NV_PGRAPH_CONTROL_0_ZFUNC_EQUAL                      2
+#       define NV_PGRAPH_CONTROL_0_ZFUNC_LEQUAL                     3
+#       define NV_PGRAPH_CONTROL_0_ZFUNC_GREATER                    4
+#       define NV_PGRAPH_CONTROL_0_ZFUNC_NOTEQUAL                   5
+#       define NV_PGRAPH_CONTROL_0_ZFUNC_GEQUAL                     6
+#       define NV_PGRAPH_CONTROL_0_ZFUNC_ALWAYS                     7
+#   define NV_PGRAPH_CONTROL_0_DITHERENABLE                     (1 << 22)
+#   define NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE             (1 << 23)
+#   define NV_PGRAPH_CONTROL_0_ZWRITEENABLE                     (1 << 24)
+#   define NV_PGRAPH_CONTROL_0_STENCIL_WRITE_ENABLE             (1 << 25)
+#   define NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE               (1 << 26)
+#   define NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE                 (1 << 27)
+#   define NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE               (1 << 28)
+#   define NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE                (1 << 29)
+#define NV_PGRAPH_CONTROL_1                              0x00001950
+#   define NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE              (1 << 0)
+#   define NV_PGRAPH_CONTROL_1_STENCIL_FUNC                     0x000000F0
+#       define NV_PGRAPH_CONTROL_1_STENCIL_FUNC_NEVER               0
+#       define NV_PGRAPH_CONTROL_1_STENCIL_FUNC_LESS                1
+#       define NV_PGRAPH_CONTROL_1_STENCIL_FUNC_EQUAL               2
+#       define NV_PGRAPH_CONTROL_1_STENCIL_FUNC_LEQUAL              3
+#       define NV_PGRAPH_CONTROL_1_STENCIL_FUNC_GREATER             4
+#       define NV_PGRAPH_CONTROL_1_STENCIL_FUNC_NOTEQUAL            5
+#       define NV_PGRAPH_CONTROL_1_STENCIL_FUNC_GEQUAL              6
+#       define NV_PGRAPH_CONTROL_1_STENCIL_FUNC_ALWAYS              7
+#   define NV_PGRAPH_CONTROL_1_STENCIL_REF                      0x0000FF00
+#   define NV_PGRAPH_CONTROL_1_STENCIL_MASK_READ                0x00FF0000
+#   define NV_PGRAPH_CONTROL_1_STENCIL_MASK_WRITE               0xFF000000
+#define NV_PGRAPH_CONTROL_2                              0x00001954
+#   define NV_PGRAPH_CONTROL_2_STENCIL_OP_FAIL                  0x0000000F
+#   define NV_PGRAPH_CONTROL_2_STENCIL_OP_ZFAIL                 0x000000F0
+#   define NV_PGRAPH_CONTROL_2_STENCIL_OP_ZPASS                 0x00000F00
+#       define NV_PGRAPH_CONTROL_2_STENCIL_OP_V_KEEP                1
+#       define NV_PGRAPH_CONTROL_2_STENCIL_OP_V_ZERO                2
+#       define NV_PGRAPH_CONTROL_2_STENCIL_OP_V_REPLACE             3
+#       define NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INCRSAT             4
+#       define NV_PGRAPH_CONTROL_2_STENCIL_OP_V_DECRSAT             5
+#       define NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INVERT              6
+#       define NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INCR                7
+#       define NV_PGRAPH_CONTROL_2_STENCIL_OP_V_DECR                8
+#define NV_PGRAPH_CONTROL_3                              0x00001958
+#   define NV_PGRAPH_CONTROL_3_FOGENABLE                        (1 << 8)
+#   define NV_PGRAPH_CONTROL_3_FOG_MODE                         0x00070000
+#       define NV_PGRAPH_CONTROL_3_FOG_MODE_LINEAR                  0
+#       define NV_PGRAPH_CONTROL_3_FOG_MODE_EXP                     1
+#       define NV_PGRAPH_CONTROL_3_FOG_MODE_EXP2                    3
+#       define NV_PGRAPH_CONTROL_3_FOG_MODE_EXP_ABS                 5
+#       define NV_PGRAPH_CONTROL_3_FOG_MODE_EXP2_ABS                7
+#       define NV_PGRAPH_CONTROL_3_FOG_MODE_LINEAR_ABS              4
+#define NV_PGRAPH_FOGCOLOR                               0x00001980
+#   define NV_PGRAPH_FOGCOLOR_RED                               0x00FF0000
+#   define NV_PGRAPH_FOGCOLOR_GREEN                             0x0000FF00
+#   define NV_PGRAPH_FOGCOLOR_BLUE                              0x000000FF
+#   define NV_PGRAPH_FOGCOLOR_ALPHA                             0xFF000000
+#define NV_PGRAPH_FOGPARAM0                              0x00001984
+#define NV_PGRAPH_FOGPARAM1                              0x00001988
+#define NV_PGRAPH_SETUPRASTER                            0x00001990
+#   define NV_PGRAPH_SETUPRASTER_FRONTFACEMODE                  0x00000003
+#       define NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_FILL             0
+#       define NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_POINT            1
+#       define NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_LINE             2
+#   define NV_PGRAPH_SETUPRASTER_BACKFACEMODE                   0x0000000C
+#   define NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE             (1 << 6)
+#   define NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE              (1 << 7)
+#   define NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE              (1 << 8)
+#   define NV_PGRAPH_SETUPRASTER_CULLCTRL                       0x00600000
+#       define NV_PGRAPH_SETUPRASTER_CULLCTRL_FRONT                 1
+#       define NV_PGRAPH_SETUPRASTER_CULLCTRL_BACK                  2
+#       define NV_PGRAPH_SETUPRASTER_CULLCTRL_FRONT_AND_BACK        3
+#   define NV_PGRAPH_SETUPRASTER_FRONTFACE                      (1 << 23)
+#   define NV_PGRAPH_SETUPRASTER_CULLENABLE                     (1 << 28)
+#   define NV_PGRAPH_SETUPRASTER_Z_FORMAT                       (1 << 29)
+#define NV_PGRAPH_SHADERCLIPMODE                         0x00001994
+#define NV_PGRAPH_SHADERCTL                              0x00001998
+#define NV_PGRAPH_SHADERPROG                             0x0000199C
+#define NV_PGRAPH_SEMAPHOREOFFSET                        0x000019A0
+#define NV_PGRAPH_SHADOWZSLOPETHRESHOLD                  0x000019A8
+#define NV_PGRAPH_SPECFOGFACTOR0                         0x000019AC
+#define NV_PGRAPH_SPECFOGFACTOR1                         0x000019B0
+#define NV_PGRAPH_TEXADDRESS0                            0x000019BC
+#   define NV_PGRAPH_TEXADDRESS0_ADDRU                          0x00000007
+#       define NV_PGRAPH_TEXADDRESS0_ADDRU_WRAP                      1
+#       define NV_PGRAPH_TEXADDRESS0_ADDRU_MIRROR                    2
+#       define NV_PGRAPH_TEXADDRESS0_ADDRU_CLAMP_TO_EDGE             3
+#       define NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER                    4
+#       define NV_PGRAPH_TEXADDRESS0_ADDRU_CLAMP_OGL                 5
+#   define NV_PGRAPH_TEXADDRESS0_WRAP_U                         (1 << 4)
+#   define NV_PGRAPH_TEXADDRESS0_ADDRV                          0x00000700
+#   define NV_PGRAPH_TEXADDRESS0_WRAP_V                         (1 << 12)
+#   define NV_PGRAPH_TEXADDRESS0_ADDRP                          0x00070000
+#   define NV_PGRAPH_TEXADDRESS0_WRAP_P                         (1 << 20)
+#   define NV_PGRAPH_TEXADDRESS0_WRAP_Q                         (1 << 24)
+#define NV_PGRAPH_TEXADDRESS1                            0x000019C0
+#define NV_PGRAPH_TEXADDRESS2                            0x000019C4
+#define NV_PGRAPH_TEXADDRESS3                            0x000019C8
+#define NV_PGRAPH_TEXCTL0_0                              0x000019CC
+#   define NV_PGRAPH_TEXCTL0_0_ALPHAKILLEN                      (1 << 2)
+#   define NV_PGRAPH_TEXCTL0_0_MAX_LOD_CLAMP                    0x0003FFC0
+#   define NV_PGRAPH_TEXCTL0_0_MIN_LOD_CLAMP                    0x3FFC0000
+#   define NV_PGRAPH_TEXCTL0_0_ENABLE                           (1 << 30)
+#define NV_PGRAPH_TEXCTL0_1                              0x000019D0
+#define NV_PGRAPH_TEXCTL0_2                              0x000019D4
+#define NV_PGRAPH_TEXCTL0_3                              0x000019D8
+#define NV_PGRAPH_TEXCTL1_0                              0x000019DC
+#   define NV_PGRAPH_TEXCTL1_0_IMAGE_PITCH                      0xFFFF0000
+#define NV_PGRAPH_TEXCTL1_1                              0x000019E0
+#define NV_PGRAPH_TEXCTL1_2                              0x000019E4
+#define NV_PGRAPH_TEXCTL1_3                              0x000019E8
+#define NV_PGRAPH_TEXCTL2_0                              0x000019EC
+#define NV_PGRAPH_TEXCTL2_1                              0x000019F0
+#define NV_PGRAPH_TEXFILTER0                             0x000019F4
+#   define NV_PGRAPH_TEXFILTER0_MIPMAP_LOD_BIAS                 0x00001FFF
+#   define NV_PGRAPH_TEXFILTER0_MIN                             0x003F0000
+#       define NV_PGRAPH_TEXFILTER0_MIN_BOX_LOD0                    1
+#       define NV_PGRAPH_TEXFILTER0_MIN_TENT_LOD0                   2
+#       define NV_PGRAPH_TEXFILTER0_MIN_BOX_NEARESTLOD              3
+#       define NV_PGRAPH_TEXFILTER0_MIN_TENT_NEARESTLOD             4
+#       define NV_PGRAPH_TEXFILTER0_MIN_BOX_TENT_LOD                5
+#       define NV_PGRAPH_TEXFILTER0_MIN_TENT_TENT_LOD               6
+#       define NV_PGRAPH_TEXFILTER0_MIN_CONVOLUTION_2D_LOD0         7
+#   define NV_PGRAPH_TEXFILTER0_MAG                             0x0F000000
+#   define NV_PGRAPH_TEXFILTER0_ASIGNED                         (1 << 28)
+#   define NV_PGRAPH_TEXFILTER0_RSIGNED                         (1 << 29)
+#   define NV_PGRAPH_TEXFILTER0_GSIGNED                         (1 << 30)
+#   define NV_PGRAPH_TEXFILTER0_BSIGNED                         (1 << 31)
+#define NV_PGRAPH_TEXFILTER1                             0x000019F8
+#define NV_PGRAPH_TEXFILTER2                             0x000019FC
+#define NV_PGRAPH_TEXFILTER3                             0x00001A00
+#define NV_PGRAPH_TEXFMT0                                0x00001A04
+#   define NV_PGRAPH_TEXFMT0_CONTEXT_DMA                        (1 << 1)
+#   define NV_PGRAPH_TEXFMT0_CUBEMAPENABLE                      (1 << 2)
+#   define NV_PGRAPH_TEXFMT0_BORDER_SOURCE                      (1 << 3)
+#       define NV_PGRAPH_TEXFMT0_BORDER_SOURCE_TEXTURE              0
+#       define NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR                1
+#   define NV_PGRAPH_TEXFMT0_DIMENSIONALITY                     0x000000C0
+#   define NV_PGRAPH_TEXFMT0_COLOR                              0x00007F00
+#   define NV_PGRAPH_TEXFMT0_MIPMAP_LEVELS                      0x000F0000
+#   define NV_PGRAPH_TEXFMT0_BASE_SIZE_U                        0x00F00000
+#   define NV_PGRAPH_TEXFMT0_BASE_SIZE_V                        0x0F000000
+#   define NV_PGRAPH_TEXFMT0_BASE_SIZE_P                        0xF0000000
+#define NV_PGRAPH_TEXFMT1                                0x00001A08
+#define NV_PGRAPH_TEXFMT2                                0x00001A0C
+#define NV_PGRAPH_TEXFMT3                                0x00001A10
+#define NV_PGRAPH_TEXIMAGERECT0                          0x00001A14
+#   define NV_PGRAPH_TEXIMAGERECT0_WIDTH                        0x1FFF0000
+#   define NV_PGRAPH_TEXIMAGERECT0_HEIGHT                       0x00001FFF
+#define NV_PGRAPH_TEXIMAGERECT1                          0x00001A18
+#define NV_PGRAPH_TEXIMAGERECT2                          0x00001A1C
+#define NV_PGRAPH_TEXIMAGERECT3                          0x00001A20
+#define NV_PGRAPH_TEXOFFSET0                             0x00001A24
+#define NV_PGRAPH_TEXOFFSET1                             0x00001A28
+#define NV_PGRAPH_TEXOFFSET2                             0x00001A2C
+#define NV_PGRAPH_TEXOFFSET3                             0x00001A30
+#define NV_PGRAPH_TEXPALETTE0                            0x00001A34
+#   define NV_PGRAPH_TEXPALETTE0_CONTEXT_DMA                    (1 << 0)
+#   define NV_PGRAPH_TEXPALETTE0_LENGTH                         0x0000000C
+#       define NV_PGRAPH_TEXPALETTE0_LENGTH_256                     0
+#       define NV_PGRAPH_TEXPALETTE0_LENGTH_128                     1
+#       define NV_PGRAPH_TEXPALETTE0_LENGTH_64                      2
+#       define NV_PGRAPH_TEXPALETTE0_LENGTH_32                      3
+#   define NV_PGRAPH_TEXPALETTE0_OFFSET                         0xFFFFFFC0
+#define NV_PGRAPH_TEXPALETTE1                            0x00001A38
+#define NV_PGRAPH_TEXPALETTE2                            0x00001A3C
+#define NV_PGRAPH_TEXPALETTE3                            0x00001A40
+#define NV_PGRAPH_ZSTENCILCLEARVALUE                     0x00001A88
+#define NV_PGRAPH_ZCLIPMIN                               0x00001A90
+#define NV_PGRAPH_ZOFFSETBIAS                            0x00001AA4
+#define NV_PGRAPH_ZOFFSETFACTOR                          0x00001AA8
+#define NV_PGRAPH_EYEVEC0                                0x00001AAC
+#define NV_PGRAPH_EYEVEC1                                0x00001AB0
+#define NV_PGRAPH_EYEVEC2                                0x00001AB4
+#define NV_PGRAPH_ZCLIPMAX                               0x00001ABC
+
+
+#define NV_PCRTC_INTR_0                                  0x00000100
+#   define NV_PCRTC_INTR_0_VBLANK                               (1 << 0)
+#define NV_PCRTC_INTR_EN_0                               0x00000140
+#   define NV_PCRTC_INTR_EN_0_VBLANK                            (1 << 0)
+#define NV_PCRTC_START                                   0x00000800
+#define NV_PCRTC_CONFIG                                  0x00000804
+
+
+#define NV_PVIDEO_INTR                                   0x00000100
+#   define NV_PVIDEO_INTR_BUFFER_0                              (1 << 0)
+#   define NV_PVIDEO_INTR_BUFFER_1                              (1 << 4)
+#define NV_PVIDEO_INTR_EN                                0x00000140
+#   define NV_PVIDEO_INTR_EN_BUFFER_0                           (1 << 0)
+#   define NV_PVIDEO_INTR_EN_BUFFER_1                           (1 << 4)
+#define NV_PVIDEO_BUFFER                                 0x00000700
+#   define NV_PVIDEO_BUFFER_0_USE                               (1 << 0)
+#   define NV_PVIDEO_BUFFER_1_USE                               (1 << 4)
+#define NV_PVIDEO_STOP                                   0x00000704
+#define NV_PVIDEO_BASE                                   0x00000900
+#define NV_PVIDEO_LIMIT                                  0x00000908
+#define NV_PVIDEO_LUMINANCE                              0x00000910
+#define NV_PVIDEO_CHROMINANCE                            0x00000918
+#define NV_PVIDEO_OFFSET                                 0x00000920
+#define NV_PVIDEO_SIZE_IN                                0x00000928
+#   define NV_PVIDEO_SIZE_IN_WIDTH                            0x000007FF
+#   define NV_PVIDEO_SIZE_IN_HEIGHT                           0x07FF0000
+#define NV_PVIDEO_POINT_IN                               0x00000930
+#   define NV_PVIDEO_POINT_IN_S                               0x00007FFF
+#   define NV_PVIDEO_POINT_IN_T                               0xFFFE0000
+#define NV_PVIDEO_DS_DX                                  0x00000938
+#define NV_PVIDEO_DT_DY                                  0x00000940
+#define NV_PVIDEO_POINT_OUT                              0x00000948
+#   define NV_PVIDEO_POINT_OUT_X                              0x00000FFF
+#   define NV_PVIDEO_POINT_OUT_Y                              0x0FFF0000
+#define NV_PVIDEO_SIZE_OUT                               0x00000950
+#   define NV_PVIDEO_SIZE_OUT_WIDTH                           0x00000FFF
+#   define NV_PVIDEO_SIZE_OUT_HEIGHT                          0x0FFF0000
+#define NV_PVIDEO_FORMAT                                 0x00000958
+#   define NV_PVIDEO_FORMAT_PITCH                             0x00001FFF
+#   define NV_PVIDEO_FORMAT_COLOR                             0x00030000
+#       define NV_PVIDEO_FORMAT_COLOR_LE_CR8YB8CB8YA8             1
+#   define NV_PVIDEO_FORMAT_DISPLAY                            (1 << 20)
+
+
+#define NV_PTIMER_INTR_0                                 0x00000100
+#   define NV_PTIMER_INTR_0_ALARM                               (1 << 0)
+#define NV_PTIMER_INTR_EN_0                              0x00000140
+#   define NV_PTIMER_INTR_EN_0_ALARM                            (1 << 0)
+#define NV_PTIMER_NUMERATOR                              0x00000200
+#define NV_PTIMER_DENOMINATOR                            0x00000210
+#define NV_PTIMER_TIME_0                                 0x00000400
+#define NV_PTIMER_TIME_1                                 0x00000410
+#define NV_PTIMER_ALARM_0                                0x00000420
+
+
+#define NV_PFB_CFG0                                      0x00000200
+#   define NV_PFB_CFG0_PART                                   0x00000003
+#define NV_PFB_CSTATUS                                   0x0000020C
+#define NV_PFB_WBC                                       0x00000410
+#   define NV_PFB_WBC_FLUSH                                     (1 << 16)
+
+
+#define NV_PRAMDAC_NVPLL_COEFF                           0x00000500
+#   define NV_PRAMDAC_NVPLL_COEFF_MDIV                        0x000000FF
+#   define NV_PRAMDAC_NVPLL_COEFF_NDIV                        0x0000FF00
+#   define NV_PRAMDAC_NVPLL_COEFF_PDIV                        0x00070000
+#define NV_PRAMDAC_MPLL_COEFF                            0x00000504
+#   define NV_PRAMDAC_MPLL_COEFF_MDIV                         0x000000FF
+#   define NV_PRAMDAC_MPLL_COEFF_NDIV                         0x0000FF00
+#   define NV_PRAMDAC_MPLL_COEFF_PDIV                         0x00070000
+#define NV_PRAMDAC_VPLL_COEFF                            0x00000508
+#   define NV_PRAMDAC_VPLL_COEFF_MDIV                         0x000000FF
+#   define NV_PRAMDAC_VPLL_COEFF_NDIV                         0x0000FF00
+#   define NV_PRAMDAC_VPLL_COEFF_PDIV                         0x00070000
+#define NV_PRAMDAC_PLL_TEST_COUNTER                      0x00000514
+#   define NV_PRAMDAC_PLL_TEST_COUNTER_NOOFIPCLKS             0x000003FF
+#   define NV_PRAMDAC_PLL_TEST_COUNTER_VALUE                  0x0000FFFF
+#   define NV_PRAMDAC_PLL_TEST_COUNTER_ENABLE                  (1 << 16)
+#   define NV_PRAMDAC_PLL_TEST_COUNTER_RESET                   (1 << 20)
+#   define NV_PRAMDAC_PLL_TEST_COUNTER_SOURCE                 0x03000000
+#   define NV_PRAMDAC_PLL_TEST_COUNTER_VPLL2_LOCK              (1 << 27)
+#   define NV_PRAMDAC_PLL_TEST_COUNTER_PDIV_RST                (1 << 28)
+#   define NV_PRAMDAC_PLL_TEST_COUNTER_NVPLL_LOCK              (1 << 29)
+#   define NV_PRAMDAC_PLL_TEST_COUNTER_MPLL_LOCK               (1 << 30)
+#   define NV_PRAMDAC_PLL_TEST_COUNTER_VPLL_LOCK               (1 << 31)
+
+
+#define NV_USER_DMA_PUT                                  0x40
+#define NV_USER_DMA_GET                                  0x44
+#define NV_USER_REF                                      0x48
+
+
+
+/* DMA objects */
+#define NV_DMA_FROM_MEMORY_CLASS                         0x02
+#define NV_DMA_TO_MEMORY_CLASS                           0x03
+#define NV_DMA_IN_MEMORY_CLASS                           0x3d
+
+#define NV_DMA_CLASS                                          0x00000FFF
+#define NV_DMA_PAGE_TABLE                                      (1 << 12)
+#define NV_DMA_PAGE_ENTRY                                      (1 << 13)
+#define NV_DMA_FLAGS_ACCESS                                    (1 << 14)
+#define NV_DMA_FLAGS_MAPPING_COHERENCY                         (1 << 15)
+#define NV_DMA_TARGET                                         0x00030000
+#   define NV_DMA_TARGET_NVM                                      0x00000000
+#   define NV_DMA_TARGET_NVM_TILED                                0x00010000
+#   define NV_DMA_TARGET_PCI                                      0x00020000
+#   define NV_DMA_TARGET_AGP                                      0x00030000
+#define NV_DMA_ADJUST                                         0xFFF00000
+
+#define NV_DMA_ADDRESS                                        0xFFFFF000
+
+
+#define NV_RAMHT_HANDLE                                       0xFFFFFFFF
+#define NV_RAMHT_INSTANCE                                     0x0000FFFF
+#define NV_RAMHT_ENGINE                                       0x00030000
+#   define NV_RAMHT_ENGINE_SW                                     0x00000000
+#   define NV_RAMHT_ENGINE_GRAPHICS                               0x00010000
+#   define NV_RAMHT_ENGINE_DVD                                    0x00020000
+#define NV_RAMHT_CHID                                         0x1F000000
+#define NV_RAMHT_STATUS                                       0x80000000
+
+
+
+/* graphic classes and methods */
+#define NV_SET_OBJECT                                        0x00000000
+
+
+#define NV_CONTEXT_SURFACES_2D                           0x0062
+#   define NV062_SET_OBJECT                                   0x00000000
+#   define NV062_SET_CONTEXT_DMA_IMAGE_SOURCE                 0x00000184
+#   define NV062_SET_CONTEXT_DMA_IMAGE_DESTIN                 0x00000188
+#   define NV062_SET_COLOR_FORMAT                             0x00000300
+#       define NV062_SET_COLOR_FORMAT_LE_Y8                    0x01
+#       define NV062_SET_COLOR_FORMAT_LE_R5G6B5                0x04
+#       define NV062_SET_COLOR_FORMAT_LE_A8R8G8B8              0x0A
+#   define NV062_SET_PITCH                                    0x00000304
+#   define NV062_SET_OFFSET_SOURCE                            0x00000308
+#   define NV062_SET_OFFSET_DESTIN                            0x0000030C
+
+#define NV_IMAGE_BLIT                                    0x009F
+#   define NV09F_SET_OBJECT                                   0x00000000
+#   define NV09F_SET_CONTEXT_SURFACES                         0x0000019C
+#   define NV09F_SET_OPERATION                                0x000002FC
+#       define NV09F_SET_OPERATION_SRCCOPY                        3
+#   define NV09F_CONTROL_POINT_IN                             0x00000300
+#   define NV09F_CONTROL_POINT_OUT                            0x00000304
+#   define NV09F_SIZE                                         0x00000308
+
+
+#define NV_KELVIN_PRIMITIVE                              0x0097
+#   define NV097_SET_OBJECT                                   0x00000000
+#   define NV097_NO_OPERATION                                 0x00000100
+#   define NV097_WAIT_FOR_IDLE                                0x00000110
+#   define NV097_SET_FLIP_READ                                0x00000120
+#   define NV097_SET_FLIP_WRITE                               0x00000124
+#   define NV097_SET_FLIP_MODULO                              0x00000128
+#   define NV097_FLIP_INCREMENT_WRITE                         0x0000012C
+#   define NV097_FLIP_STALL                                   0x00000130
+#   define NV097_SET_CONTEXT_DMA_NOTIFIES                     0x00000180
+#   define NV097_SET_CONTEXT_DMA_A                            0x00000184
+#   define NV097_SET_CONTEXT_DMA_B                            0x00000188
+#   define NV097_SET_CONTEXT_DMA_STATE                        0x00000190
+#   define NV097_SET_CONTEXT_DMA_COLOR                        0x00000194
+#   define NV097_SET_CONTEXT_DMA_ZETA                         0x00000198
+#   define NV097_SET_CONTEXT_DMA_VERTEX_A                     0x0000019C
+#   define NV097_SET_CONTEXT_DMA_VERTEX_B                     0x000001A0
+#   define NV097_SET_CONTEXT_DMA_SEMAPHORE                    0x000001A4
+#   define NV097_SET_CONTEXT_DMA_REPORT                       0x000001A8
+#   define NV097_SET_SURFACE_CLIP_HORIZONTAL                  0x00000200
+#       define NV097_SET_SURFACE_CLIP_HORIZONTAL_X                0x0000FFFF
+#       define NV097_SET_SURFACE_CLIP_HORIZONTAL_WIDTH            0xFFFF0000
+#   define NV097_SET_SURFACE_CLIP_VERTICAL                    0x00000204
+#       define NV097_SET_SURFACE_CLIP_VERTICAL_Y                  0x0000FFFF
+#       define NV097_SET_SURFACE_CLIP_VERTICAL_HEIGHT             0xFFFF0000
+#   define NV097_SET_SURFACE_FORMAT                           0x00000208
+#       define NV097_SET_SURFACE_FORMAT_COLOR                     0x0000000F
+#           define NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5     0x01
+#           define NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_O1R5G5B5     0x02
+#           define NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5                0x03
+#           define NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8     0x04
+#           define NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_O8R8G8B8     0x05
+#           define NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_Z1A7R8G8B8 0x06
+#           define NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_O1A7R8G8B8 0x07
+#           define NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8              0x08
+#           define NV097_SET_SURFACE_FORMAT_COLOR_LE_B8                    0x09
+#           define NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8                  0x0A
+#       define NV097_SET_SURFACE_FORMAT_ZETA                      0x000000F0
+#           define NV097_SET_SURFACE_FORMAT_ZETA_Z16                       1
+#           define NV097_SET_SURFACE_FORMAT_ZETA_Z24S8                     2
+#       define NV097_SET_SURFACE_FORMAT_TYPE                      0x00000F00
+#           define NV097_SET_SURFACE_FORMAT_TYPE_PITCH                     0x1
+#           define NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE                   0x2
+#       define NV097_SET_SURFACE_FORMAT_ANTI_ALIASING             0x0000F000
+#           define NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_1         0
+#           define NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_CORNER_2  1
+#           define NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_SQUARE_OFFSET_4  2
+#       define NV097_SET_SURFACE_FORMAT_WIDTH                     0x00FF0000
+#       define NV097_SET_SURFACE_FORMAT_HEIGHT                    0xFF000000
+#   define NV097_SET_SURFACE_PITCH                            0x0000020C
+#       define NV097_SET_SURFACE_PITCH_COLOR                      0x0000FFFF
+#       define NV097_SET_SURFACE_PITCH_ZETA                       0xFFFF0000
+#   define NV097_SET_SURFACE_COLOR_OFFSET                     0x00000210
+#   define NV097_SET_SURFACE_ZETA_OFFSET                      0x00000214
+#   define NV097_SET_COMBINER_ALPHA_ICW                       0x00000260
+#   define NV097_SET_COMBINER_SPECULAR_FOG_CW0                0x00000288
+#   define NV097_SET_COMBINER_SPECULAR_FOG_CW1                0x0000028C
+#   define NV097_SET_CONTROL0                                 0x00000290
+#       define NV097_SET_CONTROL0_STENCIL_WRITE_ENABLE            (1 << 0)
+#       define NV097_SET_CONTROL0_Z_FORMAT                        (1 << 12)
+#       define NV097_SET_CONTROL0_Z_PERSPECTIVE_ENABLE            (1 << 16)
+#   define NV097_SET_FOG_MODE                                 0x0000029C
+#       define NV097_SET_FOG_MODE_V_LINEAR                        0x2601
+#       define NV097_SET_FOG_MODE_V_EXP                           0x800
+#       define NV097_SET_FOG_MODE_V_EXP2                          0x801
+#       define NV097_SET_FOG_MODE_V_EXP_ABS                       0x802
+#       define NV097_SET_FOG_MODE_V_EXP2_ABS                      0x803
+#       define NV097_SET_FOG_MODE_V_LINEAR_ABS                    0x804
+#   define NV097_SET_FOG_GEN_MODE                             0x000002A0
+#       define NV097_SET_FOG_GEN_MODE_V_SPEC_ALPHA                0
+#       define NV097_SET_FOG_GEN_MODE_V_RADIAL                    1
+#       define NV097_SET_FOG_GEN_MODE_V_PLANAR                    2
+#       define NV097_SET_FOG_GEN_MODE_V_ABS_PLANAR                3
+#       define NV097_SET_FOG_GEN_MODE_V_FOG_X                     6
+#   define NV097_SET_FOG_ENABLE                               0x000002A4
+#   define NV097_SET_FOG_COLOR                                0x000002A8
+#       define NV097_SET_FOG_COLOR_RED                            0x000000FF
+#       define NV097_SET_FOG_COLOR_GREEN                          0x0000FF00
+#       define NV097_SET_FOG_COLOR_BLUE                           0x00FF0000
+#       define NV097_SET_FOG_COLOR_ALPHA                          0xFF000000
+#   define NV097_SET_ALPHA_TEST_ENABLE                        0x00000300
+#   define NV097_SET_BLEND_ENABLE                             0x00000304
+#   define NV097_SET_CULL_FACE_ENABLE                         0x00000308
+#   define NV097_SET_DEPTH_TEST_ENABLE                        0x0000030C
+#   define NV097_SET_DITHER_ENABLE                            0x00000310
+#   define NV097_SET_LIGHTING_ENABLE                          0x00000314
+#   define NV097_SET_SKIN_MODE                                0x00000328
+#       define NV097_SET_SKIN_MODE_OFF                            0
+#       define NV097_SET_SKIN_MODE_2G                             1
+#       define NV097_SET_SKIN_MODE_2                              2
+#       define NV097_SET_SKIN_MODE_3G                             3
+#       define NV097_SET_SKIN_MODE_3                              4
+#       define NV097_SET_SKIN_MODE_4G                             5
+#       define NV097_SET_SKIN_MODE_4                              6
+#   define NV097_SET_STENCIL_TEST_ENABLE                      0x0000032C
+#   define NV097_SET_POLY_OFFSET_POINT_ENABLE                 0x00000330
+#   define NV097_SET_POLY_OFFSET_LINE_ENABLE                  0x00000334
+#   define NV097_SET_POLY_OFFSET_FILL_ENABLE                  0x00000338
+#   define NV097_SET_ALPHA_FUNC                               0x0000033C
+#   define NV097_SET_ALPHA_REF                                0x00000340
+#   define NV097_SET_BLEND_FUNC_SFACTOR                       0x00000344
+#       define NV097_SET_BLEND_FUNC_SFACTOR_V_ZERO                0x0000
+#       define NV097_SET_BLEND_FUNC_SFACTOR_V_ONE                 0x0001
+#       define NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_COLOR           0x0300
+#       define NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_SRC_COLOR 0x0301
+#       define NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_ALPHA           0x0302
+#       define NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_SRC_ALPHA 0x0303
+#       define NV097_SET_BLEND_FUNC_SFACTOR_V_DST_ALPHA           0x0304
+#       define NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_DST_ALPHA 0x0305
+#       define NV097_SET_BLEND_FUNC_SFACTOR_V_DST_COLOR           0x0306
+#       define NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_DST_COLOR 0x0307
+#       define NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_ALPHA_SATURATE  0x0308
+#       define NV097_SET_BLEND_FUNC_SFACTOR_V_CONSTANT_COLOR      0x8001
+#       define NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_CONSTANT_COLOR 0x8002
+#       define NV097_SET_BLEND_FUNC_SFACTOR_V_CONSTANT_ALPHA      0x8003
+#       define NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_CONSTANT_ALPHA 0x8004
+#   define NV097_SET_BLEND_FUNC_DFACTOR                       0x00000348
+#       define NV097_SET_BLEND_FUNC_DFACTOR_V_ZERO                0x0000
+#       define NV097_SET_BLEND_FUNC_DFACTOR_V_ONE                 0x0001
+#       define NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_COLOR           0x0300
+#       define NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_SRC_COLOR 0x0301
+#       define NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_ALPHA           0x0302
+#       define NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_SRC_ALPHA 0x0303
+#       define NV097_SET_BLEND_FUNC_DFACTOR_V_DST_ALPHA           0x0304
+#       define NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_DST_ALPHA 0x0305
+#       define NV097_SET_BLEND_FUNC_DFACTOR_V_DST_COLOR           0x0306
+#       define NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_DST_COLOR 0x0307
+#       define NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_ALPHA_SATURATE  0x0308
+#       define NV097_SET_BLEND_FUNC_DFACTOR_V_CONSTANT_COLOR      0x8001
+#       define NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_CONSTANT_COLOR 0x8002
+#       define NV097_SET_BLEND_FUNC_DFACTOR_V_CONSTANT_ALPHA      0x8003
+#       define NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_CONSTANT_ALPHA 0x8004
+#   define NV097_SET_BLEND_COLOR                              0x0000034C
+#   define NV097_SET_BLEND_EQUATION                           0x00000350
+#       define NV097_SET_BLEND_EQUATION_V_FUNC_SUBTRACT           0x800A
+#       define NV097_SET_BLEND_EQUATION_V_FUNC_REVERSE_SUBTRACT   0x800B
+#       define NV097_SET_BLEND_EQUATION_V_FUNC_ADD                0x8006
+#       define NV097_SET_BLEND_EQUATION_V_MIN                     0x8007
+#       define NV097_SET_BLEND_EQUATION_V_MAX                     0x8008
+#       define NV097_SET_BLEND_EQUATION_V_FUNC_REVERSE_SUBTRACT_SIGNED 0xF005
+#       define NV097_SET_BLEND_EQUATION_V_FUNC_ADD_SIGNED         0xF006
+#   define NV097_SET_DEPTH_FUNC                               0x00000354
+#   define NV097_SET_COLOR_MASK                               0x00000358
+#       define NV097_SET_COLOR_MASK_BLUE_WRITE_ENABLE             (1 << 0)
+#       define NV097_SET_COLOR_MASK_GREEN_WRITE_ENABLE            (1 << 8)
+#       define NV097_SET_COLOR_MASK_RED_WRITE_ENABLE              (1 << 16)
+#       define NV097_SET_COLOR_MASK_ALPHA_WRITE_ENABLE            (1 << 24)
+#   define NV097_SET_DEPTH_MASK                               0x0000035C
+#   define NV097_SET_STENCIL_MASK                             0x00000360
+#   define NV097_SET_STENCIL_FUNC                             0x00000364
+#   define NV097_SET_STENCIL_FUNC_REF                         0x00000368
+#   define NV097_SET_STENCIL_FUNC_MASK                        0x0000036C
+#   define NV097_SET_STENCIL_OP_FAIL                          0x00000370
+#   define NV097_SET_STENCIL_OP_ZFAIL                         0x00000374
+#   define NV097_SET_STENCIL_OP_ZPASS                         0x00000378
+#       define NV097_SET_STENCIL_OP_V_KEEP                        0x1E00
+#       define NV097_SET_STENCIL_OP_V_ZERO                        0x0000
+#       define NV097_SET_STENCIL_OP_V_REPLACE                     0x1E01
+#       define NV097_SET_STENCIL_OP_V_INCRSAT                     0x1E02
+#       define NV097_SET_STENCIL_OP_V_DECRSAT                     0x1E03
+#       define NV097_SET_STENCIL_OP_V_INVERT                      0x150A
+#       define NV097_SET_STENCIL_OP_V_INCR                        0x8507
+#       define NV097_SET_STENCIL_OP_V_DECR                        0x8508
+#   define NV097_SET_POLYGON_OFFSET_SCALE_FACTOR              0x00000384
+#   define NV097_SET_POLYGON_OFFSET_BIAS                      0x00000388
+#   define NV097_SET_FRONT_POLYGON_MODE                       0x0000038C
+#       define NV097_SET_FRONT_POLYGON_MODE_V_POINT               0x1B00
+#       define NV097_SET_FRONT_POLYGON_MODE_V_LINE                0x1B01
+#       define NV097_SET_FRONT_POLYGON_MODE_V_FILL                0x1B02
+#   define NV097_SET_BACK_POLYGON_MODE                        0x00000390
+#   define NV097_SET_CLIP_MIN                                 0x00000394
+#   define NV097_SET_CLIP_MAX                                 0x00000398
+#   define NV097_SET_CULL_FACE                                0x0000039C
+#       define NV097_SET_CULL_FACE_V_FRONT                         0x404
+#       define NV097_SET_CULL_FACE_V_BACK                          0x405
+#       define NV097_SET_CULL_FACE_V_FRONT_AND_BACK                0x408
+#   define NV097_SET_FRONT_FACE                               0x000003A0
+#       define NV097_SET_FRONT_FACE_V_CW                           0x900
+#       define NV097_SET_FRONT_FACE_V_CCW                          0x901
+#   define NV097_SET_NORMALIZATION_ENABLE                     0x000003A4
+#   define NV097_SET_LIGHT_ENABLE_MASK                        0x000003BC
+#           define NV097_SET_LIGHT_ENABLE_MASK_LIGHT0_OFF           0
+#           define NV097_SET_LIGHT_ENABLE_MASK_LIGHT0_INFINITE      1
+#           define NV097_SET_LIGHT_ENABLE_MASK_LIGHT0_LOCAL         2
+#           define NV097_SET_LIGHT_ENABLE_MASK_LIGHT0_SPOT          3
+#   define NV097_SET_TEXGEN_S                                 0x000003C0
+#       define NV097_SET_TEXGEN_S_DISABLE                         0x0000
+#       define NV097_SET_TEXGEN_S_EYE_LINEAR                      0x2400
+#       define NV097_SET_TEXGEN_S_OBJECT_LINEAR                   0x2401
+#       define NV097_SET_TEXGEN_S_SPHERE_MAP                      0x2402
+#       define NV097_SET_TEXGEN_S_REFLECTION_MAP                  0x8512
+#       define NV097_SET_TEXGEN_S_NORMAL_MAP                      0x8511
+#   define NV097_SET_TEXGEN_T                                 0x000003C4
+#   define NV097_SET_TEXGEN_R                                 0x000003C8
+#   define NV097_SET_TEXGEN_Q                                 0x000003CC
+#   define NV097_SET_TEXTURE_MATRIX_ENABLE                    0x00000420
+#   define NV097_SET_PROJECTION_MATRIX                        0x00000440
+#   define NV097_SET_MODEL_VIEW_MATRIX                        0x00000480
+#   define NV097_SET_INVERSE_MODEL_VIEW_MATRIX                0x00000580
+#   define NV097_SET_COMPOSITE_MATRIX                         0x00000680
+#   define NV097_SET_TEXTURE_MATRIX                           0x000006C0
+#   define NV097_SET_FOG_PARAMS                               0x000009C0
+#   define NV097_SET_TEXGEN_PLANE_S                           0x00000840
+#   define NV097_SET_TEXGEN_PLANE_T                           0x00000850
+#   define NV097_SET_TEXGEN_PLANE_R                           0x00000860
+#   define NV097_SET_TEXGEN_PLANE_Q                           0x00000870
+#   define NV097_SET_TEXGEN_VIEW_MODEL                        0x000009CC
+#       define NV097_SET_TEXGEN_VIEW_MODEL_LOCAL_VIEWER           0
+#       define NV097_SET_TEXGEN_VIEW_MODEL_INFINITE_VIEWER        1
+#   define NV097_SET_FOG_PLANE                                0x000009D0
+#   define NV097_SET_SCENE_AMBIENT_COLOR                      0x00000A10
+#   define NV097_SET_VIEWPORT_OFFSET                          0x00000A20
+#   define NV097_SET_EYE_POSITION                             0x00000A50
+#   define NV097_SET_COMBINER_FACTOR0                         0x00000A60
+#   define NV097_SET_COMBINER_FACTOR1                         0x00000A80
+#   define NV097_SET_COMBINER_ALPHA_OCW                       0x00000AA0
+#   define NV097_SET_COMBINER_COLOR_ICW                       0x00000AC0
+#   define NV097_SET_VIEWPORT_SCALE                           0x00000AF0
+#   define NV097_SET_TRANSFORM_PROGRAM                        0x00000B00
+#   define NV097_SET_TRANSFORM_CONSTANT                       0x00000B80
+#   define NV097_SET_VERTEX3F                                 0x00001500
+#   define NV097_SET_BACK_LIGHT_AMBIENT_COLOR                 0x00000C00
+#   define NV097_SET_BACK_LIGHT_DIFFUSE_COLOR                 0x00000C0C
+#   define NV097_SET_BACK_LIGHT_SPECULAR_COLOR                0x00000C18
+#   define NV097_SET_LIGHT_AMBIENT_COLOR                      0x00001000
+#   define NV097_SET_LIGHT_DIFFUSE_COLOR                      0x0000100C
+#   define NV097_SET_LIGHT_SPECULAR_COLOR                     0x00001018
+#   define NV097_SET_LIGHT_LOCAL_RANGE                        0x00001024
+#   define NV097_SET_LIGHT_INFINITE_HALF_VECTOR               0x00001028
+#   define NV097_SET_LIGHT_INFINITE_DIRECTION                 0x00001034
+#   define NV097_SET_LIGHT_SPOT_FALLOFF                       0x00001040
+#   define NV097_SET_LIGHT_SPOT_DIRECTION                     0x0000104C
+#   define NV097_SET_LIGHT_LOCAL_POSITION                     0x0000105C
+#   define NV097_SET_LIGHT_LOCAL_ATTENUATION                  0x00001068
+#   define NV097_SET_VERTEX4F                                 0x00001518
+#   define NV097_SET_VERTEX_DATA_ARRAY_OFFSET                 0x00001720
+#   define NV097_SET_VERTEX_DATA_ARRAY_FORMAT                 0x00001760
+#       define NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE            0x0000000F
+#           define NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D     0
+#           define NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1         1
+#           define NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F          2
+#           define NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL     4
+#           define NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K       5
+#           define NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP        6
+#       define NV097_SET_VERTEX_DATA_ARRAY_FORMAT_SIZE            0x000000F0
+#       define NV097_SET_VERTEX_DATA_ARRAY_FORMAT_STRIDE          0xFFFFFF00
+#   define NV097_SET_LOGIC_OP_ENABLE                          0x000017BC
+#   define NV097_SET_LOGIC_OP                                 0x000017C0
+#   define NV097_CLEAR_REPORT_VALUE                           0x000017C8
+#       define NV097_CLEAR_REPORT_VALUE_TYPE                      0xFFFFFFFF
+#           define NV097_CLEAR_REPORT_VALUE_TYPE_ZPASS_PIXEL_CNT      1
+#   define NV097_SET_ZPASS_PIXEL_COUNT_ENABLE                 0x000017CC
+#   define NV097_GET_REPORT                                   0x000017D0
+#       define NV097_GET_REPORT_OFFSET                            0x00FFFFFF
+#       define NV097_GET_REPORT_TYPE                              0xFF000000
+#           define NV097_GET_REPORT_TYPE_ZPASS_PIXEL_CNT              1
+#   define NV097_SET_EYE_DIRECTION                            0x000017E0
+#   define NV097_SET_SHADER_CLIP_PLANE_MODE                   0x000017F8
+#   define NV097_SET_BEGIN_END                                0x000017FC
+#       define NV097_SET_BEGIN_END_OP_END                         0x00
+#       define NV097_SET_BEGIN_END_OP_POINTS                      0x01
+#       define NV097_SET_BEGIN_END_OP_LINES                       0x02
+#       define NV097_SET_BEGIN_END_OP_LINE_LOOP                   0x03
+#       define NV097_SET_BEGIN_END_OP_LINE_STRIP                  0x04
+#       define NV097_SET_BEGIN_END_OP_TRIANGLES                   0x05
+#       define NV097_SET_BEGIN_END_OP_TRIANGLE_STRIP              0x06
+#       define NV097_SET_BEGIN_END_OP_TRIANGLE_FAN                0x07
+#       define NV097_SET_BEGIN_END_OP_QUADS                       0x08
+#       define NV097_SET_BEGIN_END_OP_QUAD_STRIP                  0x09
+#       define NV097_SET_BEGIN_END_OP_POLYGON                     0x0A
+#   define NV097_ARRAY_ELEMENT16                              0x00001800
+#   define NV097_ARRAY_ELEMENT32                              0x00001808
+#   define NV097_DRAW_ARRAYS                                  0x00001810
+#       define NV097_DRAW_ARRAYS_COUNT                            0xFF000000
+#       define NV097_DRAW_ARRAYS_START_INDEX                      0x00FFFFFF
+#   define NV097_INLINE_ARRAY                                 0x00001818
+#   define NV097_SET_EYE_VECTOR                               0x0000181C
+#   define NV097_SET_VERTEX_DATA2F_M                          0x00001880
+#   define NV097_SET_VERTEX_DATA4F_M                          0x00001A00
+#   define NV097_SET_VERTEX_DATA2S                            0x00001900
+#   define NV097_SET_VERTEX_DATA4UB                           0x00001940
+#   define NV097_SET_VERTEX_DATA4S_M                          0x00001980
+#   define NV097_SET_TEXTURE_OFFSET                           0x00001B00
+#   define NV097_SET_TEXTURE_FORMAT                           0x00001B04
+#       define NV097_SET_TEXTURE_FORMAT_CONTEXT_DMA               0x00000003
+#       define NV097_SET_TEXTURE_FORMAT_CUBEMAP_ENABLE            (1 << 2)
+#       define NV097_SET_TEXTURE_FORMAT_BORDER_SOURCE             (1 << 3)
+#           define NV097_SET_TEXTURE_FORMAT_BORDER_SOURCE_TEXTURE   0
+#           define NV097_SET_TEXTURE_FORMAT_BORDER_SOURCE_COLOR     1
+#       define NV097_SET_TEXTURE_FORMAT_DIMENSIONALITY            0x000000F0
+#       define NV097_SET_TEXTURE_FORMAT_COLOR                     0x0000FF00
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8             0x00
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8            0x01
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5       0x02
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5       0x03
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4       0x04
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5         0x05
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8       0x06
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8       0x07
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8    0x0B
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5   0x0C
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8  0x0E
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8  0x0F
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5 0x10
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5   0x11
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8 0x12
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8       0x13
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8             0x19
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8           0x1A
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8      0x1B
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5 0x1C
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4 0x1D
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8 0x1E
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8       0x1F
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8Y8     0x20
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8 0x24
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5         0x27
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8           0x28
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8           0x29
+# define NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED 0x2E
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FIXED 0x30
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16      0x35
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8       0x3A
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8       0x3C
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8 0x3F
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8 0x40
+#           define NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8 0x41
+#       define NV097_SET_TEXTURE_FORMAT_MIPMAP_LEVELS             0x000F0000
+#       define NV097_SET_TEXTURE_FORMAT_BASE_SIZE_U               0x00F00000
+#       define NV097_SET_TEXTURE_FORMAT_BASE_SIZE_V               0x0F000000
+#       define NV097_SET_TEXTURE_FORMAT_BASE_SIZE_P               0xF0000000
+#   define NV097_SET_TEXTURE_ADDRESS                          0x00001B08
+#   define NV097_SET_TEXTURE_CONTROL0                         0x00001B0C
+#       define NV097_SET_TEXTURE_CONTROL0_ENABLE                 (1 << 30)
+#       define NV097_SET_TEXTURE_CONTROL0_MIN_LOD_CLAMP           0x3FFC0000
+#       define NV097_SET_TEXTURE_CONTROL0_MAX_LOD_CLAMP           0x0003FFC0
+#   define NV097_SET_TEXTURE_CONTROL1                         0x00001B10
+#       define NV097_SET_TEXTURE_CONTROL1_IMAGE_PITCH             0xFFFF0000
+#   define NV097_SET_TEXTURE_FILTER                           0x00001B14
+#       define NV097_SET_TEXTURE_FILTER_MIPMAP_LOD_BIAS           0x00001FFF
+#       define NV097_SET_TEXTURE_FILTER_MIN                       0x00FF0000
+#       define NV097_SET_TEXTURE_FILTER_MAG                       0x0F000000
+#       define NV097_SET_TEXTURE_FILTER_ASIGNED                   (1 << 28)
+#       define NV097_SET_TEXTURE_FILTER_RSIGNED                   (1 << 29)
+#       define NV097_SET_TEXTURE_FILTER_GSIGNED                   (1 << 30)
+#       define NV097_SET_TEXTURE_FILTER_BSIGNED                   (1 << 31)
+#   define NV097_SET_TEXTURE_IMAGE_RECT                       0x00001B1C
+#       define NV097_SET_TEXTURE_IMAGE_RECT_WIDTH                 0xFFFF0000
+#       define NV097_SET_TEXTURE_IMAGE_RECT_HEIGHT                0x0000FFFF
+#   define NV097_SET_TEXTURE_PALETTE                          0x00001B20
+#       define NV097_SET_TEXTURE_PALETTE_CONTEXT_DMA              (1 << 0)
+#       define NV097_SET_TEXTURE_PALETTE_LENGTH                   0x0000000C
+#         define NV097_SET_TEXTURE_PALETTE_LENGTH_256               0
+#         define NV097_SET_TEXTURE_PALETTE_LENGTH_128               1
+#         define NV097_SET_TEXTURE_PALETTE_LENGTH_64                2
+#         define NV097_SET_TEXTURE_PALETTE_LENGTH_32                3
+#       define NV097_SET_TEXTURE_PALETTE_OFFSET                   0xFFFFFFC0
+#   define NV097_SET_TEXTURE_BORDER_COLOR                     0x00001B24
+#   define NV097_SET_TEXTURE_SET_BUMP_ENV_MAT                 0x00001B28
+#   define NV097_SET_TEXTURE_SET_BUMP_ENV_SCALE               0x00001B38
+#   define NV097_SET_TEXTURE_SET_BUMP_ENV_OFFSET              0x00001B3C
+#   define NV097_SET_SEMAPHORE_OFFSET                         0x00001D6C
+#   define NV097_BACK_END_WRITE_SEMAPHORE_RELEASE             0x00001D70
+#   define NV097_SET_ZSTENCIL_CLEAR_VALUE                     0x00001D8C
+#   define NV097_SET_COLOR_CLEAR_VALUE                        0x00001D90
+#   define NV097_CLEAR_SURFACE                                0x00001D94
+#       define NV097_CLEAR_SURFACE_Z                              (1 << 0)
+#       define NV097_CLEAR_SURFACE_STENCIL                        (1 << 1)
+#       define NV097_CLEAR_SURFACE_COLOR                          0x000000F0
+#       define NV097_CLEAR_SURFACE_R                                (1 << 4)
+#       define NV097_CLEAR_SURFACE_G                                (1 << 5)
+#       define NV097_CLEAR_SURFACE_B                                (1 << 6)
+#       define NV097_CLEAR_SURFACE_A                                (1 << 7)
+#   define NV097_SET_CLEAR_RECT_HORIZONTAL                    0x00001D98
+#   define NV097_SET_CLEAR_RECT_VERTICAL                      0x00001D9C
+#   define NV097_SET_SPECULAR_FOG_FACTOR                      0x00001E20
+#   define NV097_SET_COMBINER_COLOR_OCW                       0x00001E40
+#   define NV097_SET_COMBINER_CONTROL                         0x00001E60
+#   define NV097_SET_SHADOW_ZSLOPE_THRESHOLD                  0x00001E68
+#   define NV097_SET_SHADER_STAGE_PROGRAM                     0x00001E70
+#   define NV097_SET_SHADER_OTHER_STAGE_INPUT                 0x00001E78
+#   define NV097_SET_TRANSFORM_EXECUTION_MODE                 0x00001E94
+#       define NV097_SET_TRANSFORM_EXECUTION_MODE_MODE            0x00000003
+#       define NV097_SET_TRANSFORM_EXECUTION_MODE_RANGE_MODE      0xFFFFFFFC
+#   define NV097_SET_TRANSFORM_PROGRAM_CXT_WRITE_EN           0x00001E98
+#   define NV097_SET_TRANSFORM_PROGRAM_LOAD                   0x00001E9C
+#   define NV097_SET_TRANSFORM_PROGRAM_START                  0x00001EA0
+#   define NV097_SET_TRANSFORM_CONSTANT_LOAD                  0x00001EA4
+
+/* vertex processing (cheops) context layout */
+#define NV_IGRAPH_XF_XFCTX_CMAT0                     0x00
+#define NV_IGRAPH_XF_XFCTX_PMAT0                     0x04
+#define NV_IGRAPH_XF_XFCTX_MMAT0                     0x08
+#define NV_IGRAPH_XF_XFCTX_IMMAT0                    0x0c
+#define NV_IGRAPH_XF_XFCTX_MMAT1                     0x10
+#define NV_IGRAPH_XF_XFCTX_IMMAT1                    0x14
+#define NV_IGRAPH_XF_XFCTX_MMAT2                     0x18
+#define NV_IGRAPH_XF_XFCTX_IMMAT2                    0x1c
+#define NV_IGRAPH_XF_XFCTX_MMAT3                     0x20
+#define NV_IGRAPH_XF_XFCTX_IMMAT3                    0x24
+#define NV_IGRAPH_XF_XFCTX_LIT0                      0x28
+#define NV_IGRAPH_XF_XFCTX_LIT1                      0x29
+#define NV_IGRAPH_XF_XFCTX_LIT2                      0x2a
+#define NV_IGRAPH_XF_XFCTX_LIT3                      0x2b
+#define NV_IGRAPH_XF_XFCTX_LIT4                      0x2c
+#define NV_IGRAPH_XF_XFCTX_LIT5                      0x2d
+#define NV_IGRAPH_XF_XFCTX_LIT6                      0x2e
+#define NV_IGRAPH_XF_XFCTX_LIT7                      0x2f
+#define NV_IGRAPH_XF_XFCTX_SPOT0                     0x30
+#define NV_IGRAPH_XF_XFCTX_SPOT1                     0x31
+#define NV_IGRAPH_XF_XFCTX_SPOT2                     0x32
+#define NV_IGRAPH_XF_XFCTX_SPOT3                     0x33
+#define NV_IGRAPH_XF_XFCTX_SPOT4                     0x34
+#define NV_IGRAPH_XF_XFCTX_SPOT5                     0x35
+#define NV_IGRAPH_XF_XFCTX_SPOT6                     0x36
+#define NV_IGRAPH_XF_XFCTX_SPOT7                     0x37
+#define NV_IGRAPH_XF_XFCTX_EYEP                      0x38
+#define NV_IGRAPH_XF_XFCTX_FOG                       0x39
+#define NV_IGRAPH_XF_XFCTX_VPSCL                     0x3a
+#define NV_IGRAPH_XF_XFCTX_VPOFF                     0x3b
+#define NV_IGRAPH_XF_XFCTX_CONS0                     0x3c
+#define NV_IGRAPH_XF_XFCTX_CONS1                     0x3d
+#define NV_IGRAPH_XF_XFCTX_CONS2                     0x3e
+#define NV_IGRAPH_XF_XFCTX_CONS3                     0x3f
+#define NV_IGRAPH_XF_XFCTX_TG0MAT                    0x40
+#define NV_IGRAPH_XF_XFCTX_T0MAT                     0x44
+#define NV_IGRAPH_XF_XFCTX_TG1MAT                    0x48
+#define NV_IGRAPH_XF_XFCTX_T1MAT                     0x4c
+#define NV_IGRAPH_XF_XFCTX_TG2MAT                    0x50
+#define NV_IGRAPH_XF_XFCTX_T2MAT                     0x54
+#define NV_IGRAPH_XF_XFCTX_TG3MAT                    0x58
+#define NV_IGRAPH_XF_XFCTX_T3MAT                     0x5c
+#define NV_IGRAPH_XF_XFCTX_PRSPACE                   0x60
+
+/* lighting (zoser) context layout */
+#define NV_IGRAPH_XF_LTCTXA_L0_K                     0x00
+#define NV_IGRAPH_XF_LTCTXA_L0_SPT                   0x01
+#define NV_IGRAPH_XF_LTCTXA_L1_K                     0x02
+#define NV_IGRAPH_XF_LTCTXA_L1_SPT                   0x03
+#define NV_IGRAPH_XF_LTCTXA_L2_K                     0x04
+#define NV_IGRAPH_XF_LTCTXA_L2_SPT                   0x05
+#define NV_IGRAPH_XF_LTCTXA_L3_K                     0x06
+#define NV_IGRAPH_XF_LTCTXA_L3_SPT                   0x07
+#define NV_IGRAPH_XF_LTCTXA_L4_K                     0x08
+#define NV_IGRAPH_XF_LTCTXA_L4_SPT                   0x09
+#define NV_IGRAPH_XF_LTCTXA_L5_K                     0x0a
+#define NV_IGRAPH_XF_LTCTXA_L5_SPT                   0x0b
+#define NV_IGRAPH_XF_LTCTXA_L6_K                     0x0c
+#define NV_IGRAPH_XF_LTCTXA_L6_SPT                   0x0d
+#define NV_IGRAPH_XF_LTCTXA_L7_K                     0x0e
+#define NV_IGRAPH_XF_LTCTXA_L7_SPT                   0x0f
+#define NV_IGRAPH_XF_LTCTXA_EYED                     0x10
+#define NV_IGRAPH_XF_LTCTXA_FR_AMB                   0x11
+#define NV_IGRAPH_XF_LTCTXA_BR_AMB                   0x12
+#define NV_IGRAPH_XF_LTCTXA_CM_COL                   0x13
+#define NV_IGRAPH_XF_LTCTXA_BCM_COL                  0x14
+#define NV_IGRAPH_XF_LTCTXA_FOG_K                    0x15
+#define NV_IGRAPH_XF_LTCTXA_ZERO                     0x16
+#define NV_IGRAPH_XF_LTCTXA_PT0                      0x17
+#define NV_IGRAPH_XF_LTCTXA_FOGLIN                   0x18
+
+#define NV_IGRAPH_XF_LTCTXB_L0_AMB                   0x00
+#define NV_IGRAPH_XF_LTCTXB_L0_DIF                   0x01
+#define NV_IGRAPH_XF_LTCTXB_L0_SPC                   0x02
+#define NV_IGRAPH_XF_LTCTXB_L0_BAMB                  0x03
+#define NV_IGRAPH_XF_LTCTXB_L0_BDIF                  0x04
+#define NV_IGRAPH_XF_LTCTXB_L0_BSPC                  0x05
+#define NV_IGRAPH_XF_LTCTXB_L1_AMB                   0x06
+#define NV_IGRAPH_XF_LTCTXB_L1_DIF                   0x07
+#define NV_IGRAPH_XF_LTCTXB_L1_SPC                   0x08
+#define NV_IGRAPH_XF_LTCTXB_L1_BAMB                  0x09
+#define NV_IGRAPH_XF_LTCTXB_L1_BDIF                  0x0a
+#define NV_IGRAPH_XF_LTCTXB_L1_BSPC                  0x0b
+#define NV_IGRAPH_XF_LTCTXB_L2_AMB                   0x0c
+#define NV_IGRAPH_XF_LTCTXB_L2_DIF                   0x0d
+#define NV_IGRAPH_XF_LTCTXB_L2_SPC                   0x0e
+#define NV_IGRAPH_XF_LTCTXB_L2_BAMB                  0x0f
+#define NV_IGRAPH_XF_LTCTXB_L2_BDIF                  0x10
+#define NV_IGRAPH_XF_LTCTXB_L2_BSPC                  0x11
+#define NV_IGRAPH_XF_LTCTXB_L3_AMB                   0x12
+#define NV_IGRAPH_XF_LTCTXB_L3_DIF                   0x13
+#define NV_IGRAPH_XF_LTCTXB_L3_SPC                   0x14
+#define NV_IGRAPH_XF_LTCTXB_L3_BAMB                  0x15
+#define NV_IGRAPH_XF_LTCTXB_L3_BDIF                  0x16
+#define NV_IGRAPH_XF_LTCTXB_L3_BSPC                  0x17
+#define NV_IGRAPH_XF_LTCTXB_L4_AMB                   0x18
+#define NV_IGRAPH_XF_LTCTXB_L4_DIF                   0x19
+#define NV_IGRAPH_XF_LTCTXB_L4_SPC                   0x1a
+#define NV_IGRAPH_XF_LTCTXB_L4_BAMB                  0x1b
+#define NV_IGRAPH_XF_LTCTXB_L4_BDIF                  0x1c
+#define NV_IGRAPH_XF_LTCTXB_L4_BSPC                  0x1d
+#define NV_IGRAPH_XF_LTCTXB_L5_AMB                   0x1e
+#define NV_IGRAPH_XF_LTCTXB_L5_DIF                   0x1f
+#define NV_IGRAPH_XF_LTCTXB_L5_SPC                   0x20
+#define NV_IGRAPH_XF_LTCTXB_L5_BAMB                  0x21
+#define NV_IGRAPH_XF_LTCTXB_L5_BDIF                  0x22
+#define NV_IGRAPH_XF_LTCTXB_L5_BSPC                  0x23
+#define NV_IGRAPH_XF_LTCTXB_L6_AMB                   0x24
+#define NV_IGRAPH_XF_LTCTXB_L6_DIF                   0x25
+#define NV_IGRAPH_XF_LTCTXB_L6_SPC                   0x26
+#define NV_IGRAPH_XF_LTCTXB_L6_BAMB                  0x27
+#define NV_IGRAPH_XF_LTCTXB_L6_BDIF                  0x28
+#define NV_IGRAPH_XF_LTCTXB_L6_BSPC                  0x29
+#define NV_IGRAPH_XF_LTCTXB_L7_AMB                   0x2a
+#define NV_IGRAPH_XF_LTCTXB_L7_DIF                   0x2b
+#define NV_IGRAPH_XF_LTCTXB_L7_SPC                   0x2c
+#define NV_IGRAPH_XF_LTCTXB_L7_BAMB                  0x2d
+#define NV_IGRAPH_XF_LTCTXB_L7_BDIF                  0x2e
+#define NV_IGRAPH_XF_LTCTXB_L7_BSPC                  0x2f
+#define NV_IGRAPH_XF_LTCTXB_PT1                      0x30
+#define NV_IGRAPH_XF_LTCTXB_ONE                      0x31
+#define NV_IGRAPH_XF_LTCTXB_VPOFFSET                 0x32
+
+#define NV_IGRAPH_XF_LTC1_ZERO1                      0x00
+#define NV_IGRAPH_XF_LTC1_l0                         0x01
+#define NV_IGRAPH_XF_LTC1_Bl0                        0x02
+#define NV_IGRAPH_XF_LTC1_PP                         0x03
+#define NV_IGRAPH_XF_LTC1_r0                         0x04
+#define NV_IGRAPH_XF_LTC1_r1                         0x05
+#define NV_IGRAPH_XF_LTC1_r2                         0x06
+#define NV_IGRAPH_XF_LTC1_r3                         0x07
+#define NV_IGRAPH_XF_LTC1_r4                         0x08
+#define NV_IGRAPH_XF_LTC1_r5                         0x09
+#define NV_IGRAPH_XF_LTC1_r6                         0x0a
+#define NV_IGRAPH_XF_LTC1_r7                         0x0b
+#define NV_IGRAPH_XF_LTC1_L0                         0x0c
+#define NV_IGRAPH_XF_LTC1_L1                         0x0d
+#define NV_IGRAPH_XF_LTC1_L2                         0x0e
+#define NV_IGRAPH_XF_LTC1_L3                         0x0f
+#define NV_IGRAPH_XF_LTC1_L4                         0x10
+#define NV_IGRAPH_XF_LTC1_L5                         0x11
+#define NV_IGRAPH_XF_LTC1_L6                         0x12
+#define NV_IGRAPH_XF_LTC1_L7                         0x13
+
+
+#define NV2A_VERTEX_ATTR_POSITION       0
+#define NV2A_VERTEX_ATTR_WEIGHT         1
+#define NV2A_VERTEX_ATTR_NORMAL         2
+#define NV2A_VERTEX_ATTR_DIFFUSE        3
+#define NV2A_VERTEX_ATTR_SPECULAR       4
+#define NV2A_VERTEX_ATTR_FOG            5
+#define NV2A_VERTEX_ATTR_POINT_SIZE     6
+#define NV2A_VERTEX_ATTR_BACK_DIFFUSE   7
+#define NV2A_VERTEX_ATTR_BACK_SPECULAR  8
+#define NV2A_VERTEX_ATTR_TEXTURE0       9
+#define NV2A_VERTEX_ATTR_TEXTURE1       10
+#define NV2A_VERTEX_ATTR_TEXTURE2       11
+#define NV2A_VERTEX_ATTR_TEXTURE3       12
+#define NV2A_VERTEX_ATTR_RESERVED1      13
+#define NV2A_VERTEX_ATTR_RESERVED2      14
+#define NV2A_VERTEX_ATTR_RESERVED3      15
+
+#define NV2A_CRYSTAL_FREQ 13500000
+#define NV2A_NUM_CHANNELS 32
+#define NV2A_NUM_SUBCHANNELS 8
+#define NV2A_CACHE1_SIZE 128
+
+#define NV2A_MAX_BATCH_LENGTH 0x1FFFF
+#define NV2A_VERTEXSHADER_ATTRIBUTES 16
+#define NV2A_MAX_TEXTURES 4
+
+#define NV2A_MAX_TRANSFORM_PROGRAM_LENGTH 136
+#define NV2A_VERTEXSHADER_CONSTANTS 192
+#define NV2A_MAX_LIGHTS 8
+
+#define NV2A_LTCTXA_COUNT  26
+#define NV2A_LTCTXB_COUNT  52
+#define NV2A_LTC1_COUNT    20
diff --git a/hw/xbox/nv2a_psh.c b/hw/xbox/nv2a_psh.c
new file mode 100644
index 0000000000..40775b576e
--- /dev/null
+++ b/hw/xbox/nv2a_psh.c
@@ -0,0 +1,847 @@
+/*
+ * QEMU Geforce NV2A pixel shader translation
+ *
+ * Copyright (c) 2013 espes
+ * Copyright (c) 2015 Jannik Vogel
+ *
+ * Based on:
+ * Cxbx, PixelShader.cpp
+ * Copyright (c) 2004 Aaron Robinson <caustik@caustik.com>
+ *                    Kingofc <kingofc@freenet.de>
+ * Xeon, XBD3DPixelShader.cpp
+ * Copyright (c) 2003 _SF_
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "qapi/qmp/qstring.h"
+
+#include "hw/xbox/nv2a_shaders_common.h"
+#include "hw/xbox/nv2a_psh.h"
+
+/*
+ * This implements translation of register combiners into glsl
+ * fragment shaders, but all terminology is in terms of Xbox DirectX
+ * pixel shaders, since I wanted to be lazy while referencing existing
+ * work / stealing code.
+ *
+ * For some background, see the OpenGL extension:
+ * https://www.opengl.org/registry/specs/NV/register_combiners.txt
+ */
+
+
+enum PS_TEXTUREMODES
+{                                 // valid in stage 0 1 2 3
+    PS_TEXTUREMODES_NONE=                 0x00L, // * * * *
+    PS_TEXTUREMODES_PROJECT2D=            0x01L, // * * * *
+    PS_TEXTUREMODES_PROJECT3D=            0x02L, // * * * *
+    PS_TEXTUREMODES_CUBEMAP=              0x03L, // * * * *
+    PS_TEXTUREMODES_PASSTHRU=             0x04L, // * * * *
+    PS_TEXTUREMODES_CLIPPLANE=            0x05L, // * * * *
+    PS_TEXTUREMODES_BUMPENVMAP=           0x06L, // - * * *
+    PS_TEXTUREMODES_BUMPENVMAP_LUM=       0x07L, // - * * *
+    PS_TEXTUREMODES_BRDF=                 0x08L, // - - * *
+    PS_TEXTUREMODES_DOT_ST=               0x09L, // - - * *
+    PS_TEXTUREMODES_DOT_ZW=               0x0aL, // - - * *
+    PS_TEXTUREMODES_DOT_RFLCT_DIFF=       0x0bL, // - - * -
+    PS_TEXTUREMODES_DOT_RFLCT_SPEC=       0x0cL, // - - - *
+    PS_TEXTUREMODES_DOT_STR_3D=           0x0dL, // - - - *
+    PS_TEXTUREMODES_DOT_STR_CUBE=         0x0eL, // - - - *
+    PS_TEXTUREMODES_DPNDNT_AR=            0x0fL, // - * * *
+    PS_TEXTUREMODES_DPNDNT_GB=            0x10L, // - * * *
+    PS_TEXTUREMODES_DOTPRODUCT=           0x11L, // - * * -
+    PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST= 0x12L, // - - - *
+    // 0x13-0x1f reserved
+};
+
+enum PS_INPUTMAPPING
+{
+    PS_INPUTMAPPING_UNSIGNED_IDENTITY= 0x00L, // max(0,x)         OK for final combiner
+    PS_INPUTMAPPING_UNSIGNED_INVERT=   0x20L, // 1 - max(0,x)     OK for final combiner
+    PS_INPUTMAPPING_EXPAND_NORMAL=     0x40L, // 2*max(0,x) - 1   invalid for final combiner
+    PS_INPUTMAPPING_EXPAND_NEGATE=     0x60L, // 1 - 2*max(0,x)   invalid for final combiner
+    PS_INPUTMAPPING_HALFBIAS_NORMAL=   0x80L, // max(0,x) - 1/2   invalid for final combiner
+    PS_INPUTMAPPING_HALFBIAS_NEGATE=   0xa0L, // 1/2 - max(0,x)   invalid for final combiner
+    PS_INPUTMAPPING_SIGNED_IDENTITY=   0xc0L, // x                invalid for final combiner
+    PS_INPUTMAPPING_SIGNED_NEGATE=     0xe0L, // -x               invalid for final combiner
+};
+
+enum PS_REGISTER
+{
+    PS_REGISTER_ZERO=              0x00L, // r
+    PS_REGISTER_DISCARD=           0x00L, // w
+    PS_REGISTER_C0=                0x01L, // r
+    PS_REGISTER_C1=                0x02L, // r
+    PS_REGISTER_FOG=               0x03L, // r
+    PS_REGISTER_V0=                0x04L, // r/w
+    PS_REGISTER_V1=                0x05L, // r/w
+    PS_REGISTER_T0=                0x08L, // r/w
+    PS_REGISTER_T1=                0x09L, // r/w
+    PS_REGISTER_T2=                0x0aL, // r/w
+    PS_REGISTER_T3=                0x0bL, // r/w
+    PS_REGISTER_R0=                0x0cL, // r/w
+    PS_REGISTER_R1=                0x0dL, // r/w
+    PS_REGISTER_V1R0_SUM=          0x0eL, // r
+    PS_REGISTER_EF_PROD=           0x0fL, // r
+
+    PS_REGISTER_ONE=               PS_REGISTER_ZERO | PS_INPUTMAPPING_UNSIGNED_INVERT, // OK for final combiner
+    PS_REGISTER_NEGATIVE_ONE=      PS_REGISTER_ZERO | PS_INPUTMAPPING_EXPAND_NORMAL,   // invalid for final combiner
+    PS_REGISTER_ONE_HALF=          PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NEGATE, // invalid for final combiner
+    PS_REGISTER_NEGATIVE_ONE_HALF= PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NORMAL, // invalid for final combiner
+};
+
+enum PS_COMBINERCOUNTFLAGS
+{
+    PS_COMBINERCOUNT_MUX_LSB=     0x0000L, // mux on r0.a lsb
+    PS_COMBINERCOUNT_MUX_MSB=     0x0001L, // mux on r0.a msb
+
+    PS_COMBINERCOUNT_SAME_C0=     0x0000L, // c0 same in each stage
+    PS_COMBINERCOUNT_UNIQUE_C0=   0x0010L, // c0 unique in each stage
+
+    PS_COMBINERCOUNT_SAME_C1=     0x0000L, // c1 same in each stage
+    PS_COMBINERCOUNT_UNIQUE_C1=   0x0100L  // c1 unique in each stage
+};
+
+enum PS_COMBINEROUTPUT
+{
+    PS_COMBINEROUTPUT_IDENTITY=            0x00L, // y = x
+    PS_COMBINEROUTPUT_BIAS=                0x08L, // y = x - 0.5
+    PS_COMBINEROUTPUT_SHIFTLEFT_1=         0x10L, // y = x*2
+    PS_COMBINEROUTPUT_SHIFTLEFT_1_BIAS=    0x18L, // y = (x - 0.5)*2
+    PS_COMBINEROUTPUT_SHIFTLEFT_2=         0x20L, // y = x*4
+    PS_COMBINEROUTPUT_SHIFTRIGHT_1=        0x30L, // y = x/2
+
+    PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA=    0x80L, // RGB only
+
+    PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA=    0x40L, // RGB only
+
+    PS_COMBINEROUTPUT_AB_MULTIPLY=         0x00L,
+    PS_COMBINEROUTPUT_AB_DOT_PRODUCT=      0x02L, // RGB only
+
+    PS_COMBINEROUTPUT_CD_MULTIPLY=         0x00L,
+    PS_COMBINEROUTPUT_CD_DOT_PRODUCT=      0x01L, // RGB only
+
+    PS_COMBINEROUTPUT_AB_CD_SUM=           0x00L, // 3rd output is AB+CD
+    PS_COMBINEROUTPUT_AB_CD_MUX=           0x04L, // 3rd output is MUX(AB,CD) based on R0.a
+};
+
+enum PS_CHANNEL
+{
+    PS_CHANNEL_RGB=   0x00, // used as RGB source
+    PS_CHANNEL_BLUE=  0x00, // used as ALPHA source
+    PS_CHANNEL_ALPHA= 0x10, // used as RGB or ALPHA source
+};
+
+
+enum PS_FINALCOMBINERSETTING
+{
+    PS_FINALCOMBINERSETTING_CLAMP_SUM=     0x80, // V1+R0 sum clamped to [0,1]
+
+    PS_FINALCOMBINERSETTING_COMPLEMENT_V1= 0x40, // unsigned invert mapping
+
+    PS_FINALCOMBINERSETTING_COMPLEMENT_R0= 0x20, // unsigned invert mapping
+};
+
+
+
+// Structures to describe the PS definition
+
+struct InputInfo {
+    int reg, mod, chan;
+    bool invert;
+};
+
+struct InputVarInfo {
+    struct InputInfo a, b, c, d;
+};
+
+struct FCInputInfo {
+    struct InputInfo a, b, c, d, e, f, g;
+    int c0, c1;
+    //uint32_t c0_value, c1_value;
+    bool c0_used, c1_used;
+    bool v1r0_sum, clamp_sum, inv_v1, inv_r0, enabled;
+};
+
+struct OutputInfo {
+    int ab, cd, muxsum, flags, ab_op, cd_op, muxsum_op,
+        mapping, ab_alphablue, cd_alphablue;
+};
+
+struct PSStageInfo {
+    struct InputVarInfo rgb_input, alpha_input;
+    struct OutputInfo rgb_output, alpha_output;
+    int c0, c1;
+    //uint32_t c0_value, c1_value;
+    bool c0_used, c1_used;
+};
+
+struct PixelShader {
+    PshState state;
+
+    int num_stages, flags;
+    struct PSStageInfo stage[8];
+    struct FCInputInfo final_input;
+    int tex_modes[4], input_tex[4];
+
+    //uint32_t dot_mapping, input_texture;
+
+    QString *varE, *varF;
+    QString *code;
+    int cur_stage;
+
+    int num_var_refs;
+    char var_refs[32][32];
+    int num_const_refs;
+    char const_refs[32][32];
+};
+
+static void add_var_ref(struct PixelShader *ps, const char *var)
+{
+    int i;
+    for (i=0; i<ps->num_var_refs; i++) {
+        if (strcmp((char*)ps->var_refs[i], var) == 0) return;
+    }
+    strcpy((char*)ps->var_refs[ps->num_var_refs++], var);
+}
+
+static void add_const_ref(struct PixelShader *ps, const char *var)
+{
+    int i;
+    for (i=0; i<ps->num_const_refs; i++) {
+        if (strcmp((char*)ps->const_refs[i], var) == 0) return;
+    }
+    strcpy((char*)ps->const_refs[ps->num_const_refs++], var);
+}
+
+// Get the code for a variable used in the program
+static QString* get_var(struct PixelShader *ps, int reg, bool is_dest)
+{
+    switch (reg) {
+    case PS_REGISTER_DISCARD:
+        if (is_dest) {
+            return qstring_from_str("");
+        } else {
+            return qstring_from_str("0.0");
+        }
+        break;
+    case PS_REGISTER_C0:
+        /* TODO: should the final stage really always be unique? */
+        if (ps->flags & PS_COMBINERCOUNT_UNIQUE_C0 || ps->cur_stage == 8) {
+            QString *reg = qstring_from_fmt("c_%d_%d", ps->cur_stage, 0);
+            add_const_ref(ps, qstring_get_str(reg));
+            if (ps->cur_stage == 8) {
+                ps->final_input.c0_used = true;
+            } else {
+                ps->stage[ps->cur_stage].c0_used = true;
+            }
+            return reg;
+        } else {  // Same c0
+            add_const_ref(ps, "c_0_0");
+            ps->stage[0].c0_used = true;
+            return qstring_from_str("c_0_0");
+        }
+        break;
+    case PS_REGISTER_C1:
+        if (ps->flags & PS_COMBINERCOUNT_UNIQUE_C1 || ps->cur_stage == 8) {
+            QString *reg = qstring_from_fmt("c_%d_%d", ps->cur_stage, 1);
+            add_const_ref(ps, qstring_get_str(reg));
+            if (ps->cur_stage == 8) {
+                ps->final_input.c1_used = true;
+            } else {
+                ps->stage[ps->cur_stage].c1_used = true;
+            }
+            return reg;
+        } else {  // Same c1
+            add_const_ref(ps, "c_0_1");
+            ps->stage[0].c1_used = true;
+            return qstring_from_str("c_0_1");
+        }
+        break;
+    case PS_REGISTER_FOG:
+        return qstring_from_str("pFog");
+    case PS_REGISTER_V0:
+        return qstring_from_str("v0");
+    case PS_REGISTER_V1:
+        return qstring_from_str("v1");
+    case PS_REGISTER_T0:
+        return qstring_from_str("t0");
+    case PS_REGISTER_T1:
+        return qstring_from_str("t1");
+    case PS_REGISTER_T2:
+        return qstring_from_str("t2");
+    case PS_REGISTER_T3:
+        return qstring_from_str("t3");
+    case PS_REGISTER_R0:
+        add_var_ref(ps, "r0");
+        return qstring_from_str("r0");
+    case PS_REGISTER_R1:
+        add_var_ref(ps, "r1");
+        return qstring_from_str("r1");
+    case PS_REGISTER_V1R0_SUM:
+        add_var_ref(ps, "r0");
+        return qstring_from_str("(v1 + r0)");
+    case PS_REGISTER_EF_PROD:
+        return qstring_from_fmt("(%s * %s)", qstring_get_str(ps->varE),
+                                qstring_get_str(ps->varF));
+    default:
+        assert(false);
+        break;
+    }
+}
+
+// Get input variable code
+static QString* get_input_var(struct PixelShader *ps, struct InputInfo in, bool is_alpha)
+{
+    QString *reg = get_var(ps, in.reg, false);
+
+    if (strcmp(qstring_get_str(reg), "0.0") != 0
+        && (in.reg != PS_REGISTER_EF_PROD
+            || strstr(qstring_get_str(reg), ".a") == NULL)) {
+        switch (in.chan) {
+        case PS_CHANNEL_RGB:
+            if (is_alpha) {
+                qstring_append(reg, ".b");
+            } else {
+                qstring_append(reg, ".rgb");
+            }
+            break;
+        case PS_CHANNEL_ALPHA:
+            qstring_append(reg, ".a");
+            break;
+        default:
+            assert(false);
+            break;
+        }
+    }
+
+    QString *res;
+    switch (in.mod) {
+    case PS_INPUTMAPPING_SIGNED_IDENTITY:
+    case PS_INPUTMAPPING_UNSIGNED_IDENTITY:
+        QINCREF(reg);
+        res = reg;
+        break;
+    case PS_INPUTMAPPING_UNSIGNED_INVERT:
+        res = qstring_from_fmt("(1.0 - %s)", qstring_get_str(reg));
+        break;
+    case PS_INPUTMAPPING_EXPAND_NORMAL: // TODO: Change to max(0, x)??
+        res = qstring_from_fmt("(2.0 * %s - 1.0)", qstring_get_str(reg));
+        break;
+    case PS_INPUTMAPPING_EXPAND_NEGATE:
+        res = qstring_from_fmt("(1.0 - 2.0 * %s)", qstring_get_str(reg));
+        break;
+    case PS_INPUTMAPPING_HALFBIAS_NORMAL:
+        res = qstring_from_fmt("(%s - 0.5)", qstring_get_str(reg));
+        break;
+    case PS_INPUTMAPPING_HALFBIAS_NEGATE:
+        res = qstring_from_fmt("(0.5 - %s)", qstring_get_str(reg));
+        break;
+    case PS_INPUTMAPPING_SIGNED_NEGATE:
+        res = qstring_from_fmt("-%s", qstring_get_str(reg));
+        break;
+    default:
+        assert(false);
+        break;
+    }
+
+    QDECREF(reg);
+    return res;
+}
+
+// Get code for the output mapping of a stage
+static QString* get_output(QString *reg, int mapping)
+{
+    QString *res;
+    switch (mapping) {
+    case PS_COMBINEROUTPUT_IDENTITY:
+        QINCREF(reg);
+        res = reg;
+        break;
+    case PS_COMBINEROUTPUT_BIAS:
+        res = qstring_from_fmt("(%s - 0.5)", qstring_get_str(reg));
+        break;
+    case PS_COMBINEROUTPUT_SHIFTLEFT_1:
+        res = qstring_from_fmt("(%s * 2.0)", qstring_get_str(reg));
+        break;
+    case PS_COMBINEROUTPUT_SHIFTLEFT_1_BIAS:
+        res = qstring_from_fmt("((%s - 0.5) * 2.0)", qstring_get_str(reg));
+        break;
+    case PS_COMBINEROUTPUT_SHIFTLEFT_2:
+        res = qstring_from_fmt("(%s * 4.0)", qstring_get_str(reg));
+        break;
+    case PS_COMBINEROUTPUT_SHIFTRIGHT_1:
+        res = qstring_from_fmt("(%s / 2.0)", qstring_get_str(reg));
+        break;
+    default:
+        assert(false);
+        break;
+    }
+    return res;
+}
+
+// Add the HLSL code for a stage
+static void add_stage_code(struct PixelShader *ps,
+                           struct InputVarInfo input, struct OutputInfo output,
+                           const char *write_mask, bool is_alpha)
+{
+    QString *a = get_input_var(ps, input.a, is_alpha);
+    QString *b = get_input_var(ps, input.b, is_alpha);
+    QString *c = get_input_var(ps, input.c, is_alpha);
+    QString *d = get_input_var(ps, input.d, is_alpha);
+
+    const char *caster = "";
+    if (strlen(write_mask) == 3) {
+        caster = "vec3";
+    }
+
+    QString *ab;
+    if (output.ab_op == PS_COMBINEROUTPUT_AB_DOT_PRODUCT) {
+        ab = qstring_from_fmt("dot(%s, %s)",
+                              qstring_get_str(a), qstring_get_str(b));
+    } else {
+        ab = qstring_from_fmt("(%s * %s)",
+                              qstring_get_str(a), qstring_get_str(b));
+    }
+
+    QString *cd;
+    if (output.cd_op == PS_COMBINEROUTPUT_CD_DOT_PRODUCT) {
+        cd = qstring_from_fmt("dot(%s, %s)",
+                              qstring_get_str(c), qstring_get_str(d));
+    } else {
+        cd = qstring_from_fmt("(%s * %s)",
+                              qstring_get_str(c), qstring_get_str(d));
+    }
+
+    QString *ab_mapping = get_output(ab, output.mapping);
+    QString *cd_mapping = get_output(cd, output.mapping);
+    QString *ab_dest = get_var(ps, output.ab, true);
+    QString *cd_dest = get_var(ps, output.cd, true);
+    QString *sum_dest = get_var(ps, output.muxsum, true);
+
+    if (qstring_get_length(ab_dest)) {
+        qstring_append_fmt(ps->code, "%s.%s = %s(%s);\n",
+                           qstring_get_str(ab_dest), write_mask, caster, qstring_get_str(ab_mapping));
+    } else {
+        QDECREF(ab_dest);
+        QINCREF(ab_mapping);
+        ab_dest = ab_mapping;
+    }
+
+    if (qstring_get_length(cd_dest)) {
+        qstring_append_fmt(ps->code, "%s.%s = %s(%s);\n",
+                           qstring_get_str(cd_dest), write_mask, caster, qstring_get_str(cd_mapping));
+    } else {
+        QDECREF(cd_dest);
+        QINCREF(cd_mapping);
+        cd_dest = cd_mapping;
+    }
+
+    if (!is_alpha && output.flags & PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA) {
+        qstring_append_fmt(ps->code, "%s.a = %s.b;\n",
+                           qstring_get_str(ab_dest), qstring_get_str(ab_dest));
+    }
+    if (!is_alpha && output.flags & PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA) {
+        qstring_append_fmt(ps->code, "%s.a = %s.b;\n",
+                           qstring_get_str(cd_dest), qstring_get_str(cd_dest));
+    }
+
+    QString *sum;
+    if (output.muxsum_op == PS_COMBINEROUTPUT_AB_CD_SUM) {
+        sum = qstring_from_fmt("(%s + %s)", qstring_get_str(ab), qstring_get_str(cd));
+    } else {
+        sum = qstring_from_fmt("((r0.a >= 0.5) ? %s : %s)",
+                               qstring_get_str(cd), qstring_get_str(ab));
+    }
+
+    QString *sum_mapping = get_output(sum, output.mapping);
+    if (qstring_get_length(sum_dest)) {
+        qstring_append_fmt(ps->code, "%s.%s = %s(%s);\n",
+                           qstring_get_str(sum_dest), write_mask, caster, qstring_get_str(sum_mapping));
+    }
+
+    QDECREF(a);
+    QDECREF(b);
+    QDECREF(c);
+    QDECREF(d);
+    QDECREF(ab);
+    QDECREF(cd);
+    QDECREF(ab_mapping);
+    QDECREF(cd_mapping);
+    QDECREF(ab_dest);
+    QDECREF(cd_dest);
+    QDECREF(sum_dest);
+    QDECREF(sum);
+    QDECREF(sum_mapping);
+}
+
+// Add code for the final combiner stage
+static void add_final_stage_code(struct PixelShader *ps, struct FCInputInfo final)
+{
+    ps->varE = get_input_var(ps, final.e, false);
+    ps->varF = get_input_var(ps, final.f, false);
+
+    QString *a = get_input_var(ps, final.a, false);
+    QString *b = get_input_var(ps, final.b, false);
+    QString *c = get_input_var(ps, final.c, false);
+    QString *d = get_input_var(ps, final.d, false);
+    QString *g = get_input_var(ps, final.g, false);
+
+    add_var_ref(ps, "r0");
+    qstring_append_fmt(ps->code, "r0.rgb = %s + mix(vec3(%s), vec3(%s), vec3(%s));\n",
+                       qstring_get_str(d), qstring_get_str(c),
+                       qstring_get_str(b), qstring_get_str(a));
+    /* FIXME: Is .x correctly here? */
+    qstring_append_fmt(ps->code, "r0.a = vec3(%s).x;\n", qstring_get_str(g));
+
+    QDECREF(a);
+    QDECREF(b);
+    QDECREF(c);
+    QDECREF(d);
+    QDECREF(g);
+
+    QDECREF(ps->varE);
+    QDECREF(ps->varF);
+    ps->varE = ps->varF = NULL;
+}
+
+
+
+static QString* psh_convert(struct PixelShader *ps)
+{
+    int i;
+
+    QString *preflight = qstring_new();
+    qstring_append(preflight, STRUCT_VERTEX_DATA);
+    qstring_append(preflight, "noperspective in VertexData g_vtx;\n");
+    qstring_append(preflight, "#define vtx g_vtx\n");
+    qstring_append(preflight, "\n");
+    qstring_append(preflight, "out vec4 fragColor;\n");
+    qstring_append(preflight, "\n");
+    qstring_append(preflight, "uniform vec4 fogColor;\n");
+
+    /* calculate perspective-correct inputs */
+    QString *vars = qstring_new();
+    qstring_append(vars, "vec4 pD0 = vtx.D0 / vtx.inv_w;\n");
+    qstring_append(vars, "vec4 pD1 = vtx.D1 / vtx.inv_w;\n");
+    qstring_append(vars, "vec4 pB0 = vtx.B0 / vtx.inv_w;\n");
+    qstring_append(vars, "vec4 pB1 = vtx.B1 / vtx.inv_w;\n");
+    qstring_append(vars, "vec4 pFog = vec4(fogColor.rgb, clamp(vtx.Fog / vtx.inv_w, 0.0, 1.0));\n");
+    qstring_append(vars, "vec4 pT0 = vtx.T0 / vtx.inv_w;\n");
+    qstring_append(vars, "vec4 pT1 = vtx.T1 / vtx.inv_w;\n");
+    qstring_append(vars, "vec4 pT2 = vtx.T2 / vtx.inv_w;\n");
+    qstring_append(vars, "vec4 pT3 = vtx.T3 / vtx.inv_w;\n");
+    qstring_append(vars, "\n");
+    qstring_append(vars, "vec4 v0 = pD0;\n");
+    qstring_append(vars, "vec4 v1 = pD1;\n");
+
+    ps->code = qstring_new();
+
+    for (i = 0; i < 4; i++) {
+
+        const char *sampler_type = NULL;
+
+        switch (ps->tex_modes[i]) {
+        case PS_TEXTUREMODES_NONE:
+            qstring_append_fmt(vars, "vec4 t%d = vec4(0.0); /* PS_TEXTUREMODES_NONE */\n",
+                               i);
+            break;
+        case PS_TEXTUREMODES_PROJECT2D:
+            if (ps->state.rect_tex[i]) {
+                sampler_type = "sampler2DRect";
+            } else {
+                sampler_type = "sampler2D";
+            }
+            qstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, pT%d.xyw);\n",
+                               i, i, i);
+            break;
+        case PS_TEXTUREMODES_PROJECT3D:
+            sampler_type = "sampler3D";
+            qstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, pT%d.xyzw);\n",
+                               i, i, i);
+            break;
+        case PS_TEXTUREMODES_CUBEMAP:
+            sampler_type = "samplerCube";
+            qstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, pT%d.xyz / pT%d.w);\n",
+                               i, i, i, i);
+            break;
+        case PS_TEXTUREMODES_PASSTHRU:
+            qstring_append_fmt(vars, "vec4 t%d = pT%d;\n", i, i);
+            break;
+        case PS_TEXTUREMODES_CLIPPLANE: {
+            int j;
+            qstring_append_fmt(vars, "vec4 t%d = vec4(0.0); /* PS_TEXTUREMODES_CLIPPLANE */\n",
+                               i);
+            for (j = 0; j < 4; j++) {
+                qstring_append_fmt(vars, "  if(pT%d.%c %s 0.0) { discard; };\n",
+                                   i, "xyzw"[j],
+                                   ps->state.compare_mode[i][j] ? ">=" : "<");
+            }
+            break;
+        }
+        case PS_TEXTUREMODES_BUMPENVMAP:
+            assert(!ps->state.rect_tex[i]);
+            sampler_type = "sampler2D";
+            qstring_append_fmt(preflight, "uniform mat2 bumpMat%d;\n", i);
+            /* FIXME: Do bumpMat swizzle on CPU before upload */
+            qstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, pT%d.xy + t%d.rg * mat2(bumpMat%d[0].xy,bumpMat%d[1].yx));\n",
+                               i, i, i, ps->input_tex[i], i, i);
+            break;
+        case PS_TEXTUREMODES_BUMPENVMAP_LUM:
+            qstring_append_fmt(preflight, "uniform float bumpScale%d;\n", i);
+            qstring_append_fmt(preflight, "uniform float bumpOffset%d;\n", i);
+            qstring_append_fmt(ps->code, "/* BUMPENVMAP_LUM for stage %d */\n", i);
+            qstring_append_fmt(ps->code, "t%d = t%d * (bumpScale%d * t%d.b + bumpOffset%d);\n",
+                               i, i, i, ps->input_tex[i], i);
+            /* Now the same as BUMPENVMAP */
+            assert(!ps->state.rect_tex[i]);
+            sampler_type = "sampler2D";
+            qstring_append_fmt(preflight, "uniform mat2 bumpMat%d;\n", i);
+            /* FIXME: Do bumpMat swizzle on CPU before upload */
+            qstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, pT%d.xy + t%d.rg * mat2(bumpMat%d[0].xy,bumpMat%d[1].yx));\n",
+                               i, i, i, ps->input_tex[i], i, i);
+            break;
+        case PS_TEXTUREMODES_BRDF:
+            qstring_append_fmt(vars, "vec4 t%d = vec4(0.0); /* PS_TEXTUREMODES_BRDF */\n",
+                               i);
+            assert(false); /* Unimplemented */
+            break;
+        case PS_TEXTUREMODES_DOT_ST:
+            qstring_append_fmt(vars, "vec4 t%d = vec4(0.0); /* PS_TEXTUREMODES_DOT_ST */\n",
+                               i);
+            assert(false); /* Unimplemented */
+            break;
+        case PS_TEXTUREMODES_DOT_ZW:
+            qstring_append_fmt(vars, "vec4 t%d = vec4(0.0); /* PS_TEXTUREMODES_DOT_ZW */\n",
+                               i);
+            assert(false); /* Unimplemented */
+            break;
+        case PS_TEXTUREMODES_DOT_RFLCT_DIFF:
+            qstring_append_fmt(vars, "vec4 t%d = vec4(0.0); /* PS_TEXTUREMODES_DOT_RFLCT_DIFF */\n",
+                               i);
+            assert(false); /* Unimplemented */
+            break;
+        case PS_TEXTUREMODES_DOT_RFLCT_SPEC:
+            qstring_append_fmt(vars, "vec4 t%d = vec4(0.0); /* PS_TEXTUREMODES_DOT_RFLCT_SPEC */\n",
+                               i);
+            assert(false); /* Unimplemented */
+            break;
+        case PS_TEXTUREMODES_DOT_STR_3D:
+            qstring_append_fmt(vars, "vec4 t%d = vec4(0.0); /* PS_TEXTUREMODES_DOT_STR_3D */\n",
+                               i);
+            assert(false); /* Unimplemented */
+            break;
+        case PS_TEXTUREMODES_DOT_STR_CUBE:
+            qstring_append_fmt(vars, "vec4 t%d = vec4(0.0); /* PS_TEXTUREMODES_DOT_STR_CUBE */\n",
+                               i);
+            assert(false); /* Unimplemented */
+            break;
+        case PS_TEXTUREMODES_DPNDNT_AR:
+            assert(!ps->state.rect_tex[i]);
+            sampler_type = "sampler2D";
+            qstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, t%d.ar);\n",
+                               i, i, ps->input_tex[i]);
+            break;
+        case PS_TEXTUREMODES_DPNDNT_GB:
+            assert(!ps->state.rect_tex[i]);
+            sampler_type = "sampler2D";
+            qstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, t%d.gb);\n",
+                               i, i, ps->input_tex[i]);
+            break;
+        case PS_TEXTUREMODES_DOTPRODUCT:
+            qstring_append_fmt(vars, "vec4 t%d = vec4(dot(pT%d.xyz, t%d.rgb));\n",
+                               i, i, ps->input_tex[i]);
+            break;
+        case PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST:
+            qstring_append_fmt(vars, "vec4 t%d = vec4(0.0); /* PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST */\n",
+                               i);
+            assert(false); /* Unimplemented */
+            break;
+        default:
+            fprintf(stderr, "Unknown ps tex mode: 0x%x\n", ps->tex_modes[i]);
+            assert(false);
+            break;
+        }
+        
+        if (sampler_type != NULL) {
+            qstring_append_fmt(preflight, "uniform %s texSamp%d;\n", sampler_type, i);
+
+            /* As this means a texture fetch does happen, do alphakill */
+            if (ps->state.alphakill[i]) {
+                qstring_append_fmt(vars, "if (t%d.a == 0.0) { discard; };\n",
+                                   i);
+            }
+        }
+    }
+
+    for (i = 0; i < ps->num_stages; i++) {
+        ps->cur_stage = i;
+        qstring_append_fmt(ps->code, "// Stage %d\n", i);
+        add_stage_code(ps, ps->stage[i].rgb_input, ps->stage[i].rgb_output, "rgb", false);
+        add_stage_code(ps, ps->stage[i].alpha_input, ps->stage[i].alpha_output, "a", true);
+    }
+
+    if (ps->final_input.enabled) {
+        ps->cur_stage = 8;
+        qstring_append(ps->code, "// Final Combiner\n");
+        add_final_stage_code(ps, ps->final_input);
+    }
+
+    for (i = 0; i < ps->num_var_refs; i++) {
+        qstring_append_fmt(vars, "vec4 %s;\n", ps->var_refs[i]);
+        if (strcmp(ps->var_refs[i], "r0") == 0) {
+            if (ps->tex_modes[0] != PS_TEXTUREMODES_NONE) {
+                qstring_append(vars, "r0.a = t0.a;\n");
+            } else {
+                qstring_append(vars, "r0.a = 1.0;\n");
+            }
+        }
+    }
+    for (i = 0; i < ps->num_const_refs; i++) {
+        qstring_append_fmt(preflight, "uniform vec4 %s;\n", ps->const_refs[i]);
+    }
+
+    if (ps->state.alpha_test && ps->state.alpha_func != ALPHA_FUNC_ALWAYS) {
+        qstring_append_fmt(preflight, "uniform float alphaRef;\n");
+        if (ps->state.alpha_func == ALPHA_FUNC_NEVER) {
+            qstring_append(ps->code, "discard;\n");
+        } else {
+            const char* alpha_op;
+            switch (ps->state.alpha_func) {
+            case ALPHA_FUNC_LESS: alpha_op = "<"; break;
+            case ALPHA_FUNC_EQUAL: alpha_op = "=="; break;
+            case ALPHA_FUNC_LEQUAL: alpha_op = "<="; break;
+            case ALPHA_FUNC_GREATER: alpha_op = ">"; break;
+            case ALPHA_FUNC_NOTEQUAL: alpha_op = "!="; break;
+            case ALPHA_FUNC_GEQUAL: alpha_op = ">="; break;
+            default:
+                assert(false);
+                break;
+            }
+            qstring_append_fmt(ps->code, "if (!(r0.a %s alphaRef)) discard;\n",
+                               alpha_op);
+        }
+    }
+
+    QString *final = qstring_new();
+    qstring_append(final, "#version 330\n\n");
+    qstring_append(final, qstring_get_str(preflight));
+    qstring_append(final, "void main() {\n");
+    qstring_append(final, qstring_get_str(vars));
+    qstring_append(final, qstring_get_str(ps->code));
+    qstring_append(final, "fragColor = r0;\n");
+    qstring_append(final, "}\n");
+
+    QDECREF(preflight);
+    QDECREF(vars);
+    QDECREF(ps->code);
+
+    return final;
+}
+
+static void parse_input(struct InputInfo *var, int value)
+{
+    var->reg = value & 0xF;
+    var->chan = value & 0x10;
+    var->mod = value & 0xE0;
+}
+
+static void parse_combiner_inputs(uint32_t value,
+                                struct InputInfo *a, struct InputInfo *b,
+                                struct InputInfo *c, struct InputInfo *d)
+{
+    parse_input(d, value & 0xFF);
+    parse_input(c, (value >> 8) & 0xFF);
+    parse_input(b, (value >> 16) & 0xFF);
+    parse_input(a, (value >> 24) & 0xFF);
+}
+
+static void parse_combiner_output(uint32_t value, struct OutputInfo *out)
+{
+    out->cd = value & 0xF;
+    out->ab = (value >> 4) & 0xF;
+    out->muxsum = (value >> 8) & 0xF;
+    int flags = value >> 12;
+    out->flags = flags;
+    out->cd_op = flags & 1;
+    out->ab_op = flags & 2;
+    out->muxsum_op = flags & 4;
+    out->mapping = flags & 0x38;
+    out->ab_alphablue = flags & 0x80;
+    out->cd_alphablue = flags & 0x40;
+}
+
+QString *psh_translate(const PshState state)
+{
+    int i;
+    struct PixelShader ps;
+    memset(&ps, 0, sizeof(ps));
+
+    ps.state = state;
+
+    ps.num_stages = state.combiner_control & 0xFF;
+    ps.flags = state.combiner_control >> 8;
+    for (i = 0; i < 4; i++) {
+        ps.tex_modes[i] = (state.shader_stage_program >> (i * 5)) & 0x1F;
+    }
+
+    ps.input_tex[0] = -1;
+    ps.input_tex[1] = 0;
+    ps.input_tex[2] = (state.other_stage_input >> 16) & 0xF;
+    ps.input_tex[3] = (state.other_stage_input >> 20) & 0xF;
+    for (i = 0; i < ps.num_stages; i++) {
+        parse_combiner_inputs(state.rgb_inputs[i],
+            &ps.stage[i].rgb_input.a, &ps.stage[i].rgb_input.b,
+            &ps.stage[i].rgb_input.c, &ps.stage[i].rgb_input.d);
+        parse_combiner_inputs(state.alpha_inputs[i],
+            &ps.stage[i].alpha_input.a, &ps.stage[i].alpha_input.b,
+            &ps.stage[i].alpha_input.c, &ps.stage[i].alpha_input.d);
+
+        parse_combiner_output(state.rgb_outputs[i], &ps.stage[i].rgb_output);
+        parse_combiner_output(state.alpha_outputs[i], &ps.stage[i].alpha_output);
+        //ps.stage[i].c0 = (pDef->PSC0Mapping >> (i * 4)) & 0xF;
+        //ps.stage[i].c1 = (pDef->PSC1Mapping >> (i * 4)) & 0xF;
+        //ps.stage[i].c0_value = constant_0[i];
+        //ps.stage[i].c1_value = constant_1[i];
+    }
+
+    struct InputInfo blank;
+    ps.final_input.enabled = state.final_inputs_0 || state.final_inputs_1;
+    if (ps.final_input.enabled) {
+        parse_combiner_inputs(state.final_inputs_0,
+                              &ps.final_input.a, &ps.final_input.b,
+                              &ps.final_input.c, &ps.final_input.d);
+        parse_combiner_inputs(state.final_inputs_1,
+                              &ps.final_input.e, &ps.final_input.f,
+                              &ps.final_input.g, &blank);
+        int flags = state.final_inputs_1 & 0xFF;
+        ps.final_input.clamp_sum = flags & PS_FINALCOMBINERSETTING_CLAMP_SUM;
+        ps.final_input.inv_v1 = flags & PS_FINALCOMBINERSETTING_COMPLEMENT_V1;
+        ps.final_input.inv_r0 = flags & PS_FINALCOMBINERSETTING_COMPLEMENT_R0;
+        //ps.final_input.c0 = (pDef->PSFinalCombinerConstants >> 0) & 0xF;
+        //ps.final_input.c1 = (pDef->PSFinalCombinerConstants >> 4) & 0xF;
+        //ps.final_input.c0_value = final_constant_0;
+        //ps.final_input.c1_value = final_constant_1;
+    }
+
+
+
+    return psh_convert(&ps);
+}
diff --git a/hw/xbox/nv2a_psh.h b/hw/xbox/nv2a_psh.h
new file mode 100644
index 0000000000..cb55ba775c
--- /dev/null
+++ b/hw/xbox/nv2a_psh.h
@@ -0,0 +1,59 @@
+/*
+ * QEMU Geforce NV2A pixel shader translation
+ *
+ * Copyright (c) 2013 espes
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#ifndef HW_NV2A_PSH_H
+#define HW_NV2A_PSH_H
+
+#include "qapi/qmp/qstring.h"
+
+enum PshAlphaFunc {
+    ALPHA_FUNC_NEVER,
+    ALPHA_FUNC_LESS,
+    ALPHA_FUNC_EQUAL,
+    ALPHA_FUNC_LEQUAL,
+    ALPHA_FUNC_GREATER,
+    ALPHA_FUNC_NOTEQUAL,
+    ALPHA_FUNC_GEQUAL,
+    ALPHA_FUNC_ALWAYS,
+};
+
+typedef struct PshState {
+    /* fragment shader - register combiner stuff */
+    uint32_t combiner_control;
+    uint32_t shader_stage_program;
+    uint32_t other_stage_input;
+    uint32_t final_inputs_0;
+    uint32_t final_inputs_1;
+
+    uint32_t rgb_inputs[8], rgb_outputs[8];
+    uint32_t alpha_inputs[8], alpha_outputs[8];
+
+    bool rect_tex[4];
+    bool compare_mode[4][4];
+    bool alphakill[4];
+
+    bool alpha_test;
+    enum PshAlphaFunc alpha_func;
+} PshState;
+
+QString *psh_translate(const PshState state);
+
+#endif
diff --git a/hw/xbox/nv2a_shaders.c b/hw/xbox/nv2a_shaders.c
new file mode 100644
index 0000000000..9eb37ec5ba
--- /dev/null
+++ b/hw/xbox/nv2a_shaders.c
@@ -0,0 +1,944 @@
+/*
+ * QEMU Geforce NV2A shader generator
+ *
+ * Copyright (c) 2015 espes
+ * Copyright (c) 2015 Jannik Vogel
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu-common.h"
+#include "hw/xbox/nv2a_debug.h"
+#include "hw/xbox/nv2a_shaders_common.h"
+#include "hw/xbox/nv2a_shaders.h"
+
+static QString* generate_geometry_shader(
+                                      enum ShaderPolygonMode polygon_front_mode,
+                                      enum ShaderPolygonMode polygon_back_mode,
+                                      enum ShaderPrimitiveMode primitive_mode,
+                                      GLenum *gl_primitive_mode)
+{
+
+    /* FIXME: Missing support for 2-sided-poly mode */
+    assert(polygon_front_mode == polygon_back_mode);
+    enum ShaderPolygonMode polygon_mode = polygon_front_mode;
+
+    /* POINT mode shouldn't require any special work */
+    if (polygon_mode == POLY_MODE_POINT) {
+        *gl_primitive_mode = GL_POINTS;
+        return NULL;
+    }
+
+    /* Handle LINE and FILL mode */
+    const char *layout_in = NULL;
+    const char *layout_out = NULL;
+    const char *body = NULL;
+    switch (primitive_mode) {
+    case PRIM_TYPE_POINTS: *gl_primitive_mode = GL_POINTS; return NULL;
+    case PRIM_TYPE_LINES: *gl_primitive_mode = GL_LINES; return NULL;
+    case PRIM_TYPE_LINE_LOOP: *gl_primitive_mode = GL_LINE_LOOP; return NULL;
+    case PRIM_TYPE_LINE_STRIP: *gl_primitive_mode = GL_LINE_STRIP; return NULL;
+    case PRIM_TYPE_TRIANGLES:
+        *gl_primitive_mode = GL_TRIANGLES;
+        if (polygon_mode == POLY_MODE_FILL) { return NULL; }
+        assert(polygon_mode == POLY_MODE_LINE);
+        layout_in = "layout(triangles) in;\n";
+        layout_out = "layout(line_strip, max_vertices = 4) out;\n";
+        body = "  emit_vertex(0);\n"
+               "  emit_vertex(1);\n"
+               "  emit_vertex(2);\n"
+               "  emit_vertex(0);\n"
+               "  EndPrimitive();\n";
+        break;
+    case PRIM_TYPE_TRIANGLE_STRIP:
+        *gl_primitive_mode = GL_TRIANGLE_STRIP;
+        if (polygon_mode == POLY_MODE_FILL) { return NULL; }
+        assert(polygon_mode == POLY_MODE_LINE);
+        layout_in = "layout(triangles) in;\n";
+        layout_out = "layout(line_strip, max_vertices = 4) out;\n";
+        /* Imagine a quad made of a tristrip, the comments tell you which
+         * vertex we are using */
+        body = "  if ((gl_PrimitiveIDIn & 1) == 0) {\n"
+               "    if (gl_PrimitiveIDIn == 0) {\n"
+               "      emit_vertex(0);\n" /* bottom right */
+               "    }\n"
+               "    emit_vertex(1);\n" /* top right */
+               "    emit_vertex(2);\n" /* bottom left */
+               "    emit_vertex(0);\n" /* bottom right */
+               "  } else {\n"
+               "    emit_vertex(2);\n" /* bottom left */
+               "    emit_vertex(1);\n" /* top left */
+               "    emit_vertex(0);\n" /* top right */
+               "  }\n"
+               "  EndPrimitive();\n";
+        break;
+    case PRIM_TYPE_TRIANGLE_FAN:
+        *gl_primitive_mode = GL_TRIANGLE_FAN;
+        if (polygon_mode == POLY_MODE_FILL) { return NULL; }
+        assert(polygon_mode == POLY_MODE_LINE);
+        layout_in = "layout(triangles) in;\n";
+        layout_out = "layout(line_strip, max_vertices = 4) out;\n";
+        body = "  if (gl_PrimitiveIDIn == 0) {\n"
+               "    emit_vertex(0);\n"
+               "  }\n"
+               "  emit_vertex(1);\n"
+               "  emit_vertex(2);\n"
+               "  emit_vertex(0);\n"
+               "  EndPrimitive();\n";
+        break;
+    case PRIM_TYPE_QUADS:
+        *gl_primitive_mode = GL_LINES_ADJACENCY;
+        layout_in = "layout(lines_adjacency) in;\n";
+        if (polygon_mode == POLY_MODE_LINE) {
+            layout_out = "layout(line_strip, max_vertices = 5) out;\n";
+            body = "  emit_vertex(0);\n"
+                   "  emit_vertex(1);\n"
+                   "  emit_vertex(2);\n"
+                   "  emit_vertex(3);\n"
+                   "  emit_vertex(0);\n"
+                   "  EndPrimitive();\n";
+        } else if (polygon_mode == POLY_MODE_FILL) {
+            layout_out = "layout(triangle_strip, max_vertices = 4) out;\n";
+            body = "  emit_vertex(0);\n"
+                   "  emit_vertex(1);\n"
+                   "  emit_vertex(3);\n"
+                   "  emit_vertex(2);\n"
+                   "  EndPrimitive();\n";
+        } else {
+            assert(false);
+            return NULL;
+        }
+        break;
+    case PRIM_TYPE_QUAD_STRIP:
+        *gl_primitive_mode = GL_LINE_STRIP_ADJACENCY;
+        layout_in = "layout(lines_adjacency) in;\n";
+        if (polygon_mode == POLY_MODE_LINE) {
+            layout_out = "layout(line_strip, max_vertices = 5) out;\n";
+            body = "  if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n"
+                   "  if (gl_PrimitiveIDIn == 0) {\n"
+                   "    emit_vertex(0);\n"
+                   "  }\n"
+                   "  emit_vertex(1);\n"
+                   "  emit_vertex(3);\n"
+                   "  emit_vertex(2);\n"
+                   "  emit_vertex(0);\n"
+                   "  EndPrimitive();\n";
+        } else if (polygon_mode == POLY_MODE_FILL) {
+            layout_out = "layout(triangle_strip, max_vertices = 4) out;\n";
+            body = "  if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n"
+                   "  emit_vertex(0);\n"
+                   "  emit_vertex(1);\n"
+                   "  emit_vertex(2);\n"
+                   "  emit_vertex(3);\n"
+                   "  EndPrimitive();\n";
+        } else {
+            assert(false);
+            return NULL;
+        }
+        break;
+    case PRIM_TYPE_POLYGON:
+        if (polygon_mode == POLY_MODE_LINE) {
+            *gl_primitive_mode = GL_LINE_LOOP;
+        } else if (polygon_mode == POLY_MODE_FILL) {
+            *gl_primitive_mode = GL_TRIANGLE_FAN;
+        } else {
+            assert(false);
+        }
+        return NULL;
+    default:
+        assert(false);
+        return NULL;
+    }
+
+    /* generate a geometry shader to support deprecated primitive types */
+    assert(layout_in);
+    assert(layout_out);
+    assert(body);
+    QString* s = qstring_from_str("#version 330\n"
+                                  "\n");
+    qstring_append(s, layout_in);
+    qstring_append(s, layout_out);
+    qstring_append(s, "\n"
+                      STRUCT_VERTEX_DATA
+                      "noperspective in VertexData v_vtx[];\n"
+                      "noperspective out VertexData g_vtx;\n"
+                      "\n"
+                      "void emit_vertex(int index) {\n"
+                      "  gl_Position = gl_in[index].gl_Position;\n"
+                      "  gl_PointSize = gl_in[index].gl_PointSize;\n"
+                      "  g_vtx = v_vtx[index];\n"
+                      "  EmitVertex();\n"
+                      "}\n"
+                      "\n"
+                      "void main() {\n");
+    qstring_append(s, body);
+    qstring_append(s, "}\n");
+
+    return s;
+}
+
+static void append_skinning_code(QString* str, bool mix,
+                                 unsigned int count, const char* type,
+                                 const char* output, const char* input,
+                                 const char* matrix, const char* swizzle)
+{
+
+    if (count == 0) {
+        qstring_append_fmt(str, "%s %s = (%s * %s0).%s;\n",
+                           type, output, input, matrix, swizzle);
+    } else {
+        qstring_append_fmt(str, "%s %s = %s(0.0);\n", type, output, type);
+        if (mix) {
+            /* Tweening */
+            if (count == 2) {
+                qstring_append_fmt(str,
+                                   "%s += mix((%s * %s1).%s,\n"
+                                   "          (%s * %s0).%s, weight.x);\n",
+                                   output,
+                                   input, matrix, swizzle,
+                                   input, matrix, swizzle);
+            } else {
+                /* FIXME: Not sure how blend weights are calculated */
+                assert(false);
+            }
+        } else {
+            /* Individual matrices */
+            int i;
+            for (i = 0; i < count; i++) {
+                char c = "xyzw"[i];
+                qstring_append_fmt(str, "%s += (%s * %s%d * weight.%c).%s;\n",
+                                   output, input, matrix, i, c,
+                                   swizzle);
+            }
+            assert(false); /* FIXME: Untested */
+        }
+    }
+}
+
+#define GLSL_C(idx) "c[" stringify(idx) "]"
+#define GLSL_LTCTXA(idx) "ltctxa[" stringify(idx) "]"
+
+#define GLSL_C_MAT4(idx) \
+    "mat4(" GLSL_C(idx) ", " GLSL_C(idx+1) ", " \
+            GLSL_C(idx+2) ", " GLSL_C(idx+3) ")"
+
+#define GLSL_DEFINE(a, b) "#define " stringify(a) " " b "\n"
+
+static void generate_fixed_function(const ShaderState state,
+                                    QString *header, QString *body)
+{
+    int i, j;
+
+    /* generate vertex shader mimicking fixed function */
+    qstring_append(header,
+"#define position      v0\n"
+"#define weight        v1\n"
+"#define normal        v2.xyz\n"
+"#define diffuse       v3\n"
+"#define specular      v4\n"
+"#define fogCoord      v5.x\n"
+"#define pointSize     v6\n"
+"#define backDiffuse   v7\n"
+"#define backSpecular  v8\n"
+"#define texture0      v9\n"
+"#define texture1      v10\n"
+"#define texture2      v11\n"
+"#define texture3      v12\n"
+"#define reserved1     v13\n"
+"#define reserved2     v14\n"
+"#define reserved3     v15\n"
+"\n"
+"uniform vec4 ltctxa[" stringify(NV2A_LTCTXA_COUNT) "];\n"
+"uniform vec4 ltctxb[" stringify(NV2A_LTCTXB_COUNT) "];\n"
+"uniform vec4 ltc1[" stringify(NV2A_LTC1_COUNT) "];\n"
+"\n"
+GLSL_DEFINE(projectionMat, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_PMAT0))
+GLSL_DEFINE(compositeMat, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_CMAT0))
+"\n"
+GLSL_DEFINE(texPlaneS0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 0))
+GLSL_DEFINE(texPlaneT0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 1))
+GLSL_DEFINE(texPlaneQ0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 2))
+GLSL_DEFINE(texPlaneR0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 3))
+"\n"
+GLSL_DEFINE(texPlaneS1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 0))
+GLSL_DEFINE(texPlaneT1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 1))
+GLSL_DEFINE(texPlaneQ1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 2))
+GLSL_DEFINE(texPlaneR1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 3))
+"\n"
+GLSL_DEFINE(texPlaneS2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 0))
+GLSL_DEFINE(texPlaneT2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 1))
+GLSL_DEFINE(texPlaneQ2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 2))
+GLSL_DEFINE(texPlaneR2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 3))
+"\n"
+GLSL_DEFINE(texPlaneS3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 0))
+GLSL_DEFINE(texPlaneT3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 1))
+GLSL_DEFINE(texPlaneQ3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 2))
+GLSL_DEFINE(texPlaneR3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 3))
+"\n"
+GLSL_DEFINE(modelViewMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT0))
+GLSL_DEFINE(modelViewMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT1))
+GLSL_DEFINE(modelViewMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT2))
+GLSL_DEFINE(modelViewMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT3))
+"\n"
+GLSL_DEFINE(invModelViewMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT0))
+GLSL_DEFINE(invModelViewMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT1))
+GLSL_DEFINE(invModelViewMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT2))
+GLSL_DEFINE(invModelViewMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT3))
+"\n"
+GLSL_DEFINE(eyePosition, GLSL_C(NV_IGRAPH_XF_XFCTX_EYEP))
+"\n"
+"#define lightAmbientColor(i) "
+    "ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_AMB) " + (i)*6].xyz\n"
+"#define lightDiffuseColor(i) "
+    "ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_DIF) " + (i)*6].xyz\n"
+"#define lightSpecularColor(i) "
+    "ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_SPC) " + (i)*6].xyz\n"
+"\n"
+"#define lightSpotFalloff(i) "
+    "ltctxa[" stringify(NV_IGRAPH_XF_LTCTXA_L0_K) " + (i)*2].xyz\n"
+"#define lightSpotDirection(i) "
+    "ltctxa[" stringify(NV_IGRAPH_XF_LTCTXA_L0_SPT) " + (i)*2]\n"
+"\n"
+"#define lightLocalRange(i) "
+    "ltc1[" stringify(NV_IGRAPH_XF_LTC1_r0) " + (i)].x\n"
+"\n"
+GLSL_DEFINE(sceneAmbientColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_FR_AMB) ".xyz")
+"\n"
+"uniform mat4 invViewport;\n"
+"\n");
+
+    /* Skinning */
+    unsigned int count;
+    bool mix;
+    switch (state.skinning) {
+    case SKINNING_OFF:
+        mix = false; count = 0; break;
+    case SKINNING_1WEIGHTS:
+        mix = true; count = 2; break;
+    case SKINNING_2WEIGHTS:
+        mix = true; count = 3; break;
+    case SKINNING_3WEIGHTS:
+        mix = true; count = 4; break;
+    case SKINNING_2WEIGHTS2MATRICES:
+        mix = false; count = 2; break;
+    case SKINNING_3WEIGHTS3MATRICES:
+        mix = false; count = 3; break;
+    case SKINNING_4WEIGHTS4MATRICES:
+        mix = false; count = 4; break;
+    default:
+        assert(false);
+        break;
+    }
+    qstring_append_fmt(body, "/* Skinning mode %d */\n",
+                       state.skinning);
+
+    append_skinning_code(body, mix, count, "vec4",
+                         "tPosition", "position",
+                         "modelViewMat", "xyzw");
+    append_skinning_code(body, mix, count, "vec3",
+                         "tNormal", "vec4(normal, 0.0)",
+                         "invModelViewMat", "xyz");
+
+    /* Normalization */
+    if (state.normalization) {
+        qstring_append(body, "tNormal = normalize(tNormal);\n");
+    }
+
+    /* Texgen */
+    for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
+        qstring_append_fmt(body, "/* Texgen for stage %d */\n",
+                           i);
+        /* Set each component individually */
+        /* FIXME: could be nicer if some channels share the same texgen */
+        for (j = 0; j < 4; j++) {
+            /* TODO: TexGen View Model missing! */
+            char c = "xyzw"[j];
+            char cSuffix = "STRQ"[j];
+            switch (state.texgen[i][j]) {
+            case TEXGEN_DISABLE:
+                qstring_append_fmt(body, "oT%d.%c = texture%d.%c;\n",
+                                   i, c, i, c);
+                break;
+            case TEXGEN_EYE_LINEAR:
+                qstring_append_fmt(body, "oT%d.%c = dot(texPlane%c%d, tPosition);\n",
+                                   i, c, cSuffix, i);
+                break;
+            case TEXGEN_OBJECT_LINEAR:
+                qstring_append_fmt(body, "oT%d.%c = dot(texPlane%c%d, position);\n",
+                                   i, c, cSuffix, i);
+                assert(false); /* Untested */
+                break;
+            case TEXGEN_SPHERE_MAP:
+                assert(i < 2);  /* Channels S,T only! */
+                qstring_append(body, "{\n");
+                /* FIXME: u, r and m only have to be calculated once */
+                qstring_append(body, "  vec3 u = normalize(tPosition.xyz);\n");
+                //FIXME: tNormal before or after normalization? Always normalize?
+                qstring_append(body, "  vec3 r = reflect(u, tNormal);\n");
+
+                /* FIXME: This would consume 1 division fewer and *might* be
+                 *        faster than length:
+                 *   // [z=1/(2*x) => z=1/x*0.5]
+                 *   vec3 ro = r + vec3(0.0, 0.0, 1.0);
+                 *   float m = inversesqrt(dot(ro,ro))*0.5;
+                 */
+
+                qstring_append(body, "  float invM = 1.0 / (2.0 * length(r + vec3(0.0, 0.0, 1.0)));\n");
+                qstring_append_fmt(body, "  oT%d.%c = r.%c * invM + 0.5;\n",
+                                   i, c, c);
+                qstring_append(body, "}\n");
+                assert(false); /* Untested */
+                break;
+            case TEXGEN_REFLECTION_MAP:
+                assert(i < 3); /* Channels S,T,R only! */
+                qstring_append(body, "{\n");
+                /* FIXME: u and r only have to be calculated once, can share the one from SPHERE_MAP */
+                qstring_append(body, "  vec3 u = normalize(tPosition.xyz);\n");
+                qstring_append(body, "  vec3 r = reflect(u, tNormal);\n");
+                qstring_append_fmt(body, "  oT%d.%c = r.%c;\n",
+                                   i, c, c);
+                qstring_append(body, "}\n");
+                break;
+            case TEXGEN_NORMAL_MAP:
+                assert(i < 3); /* Channels S,T,R only! */
+                qstring_append_fmt(body, "oT%d.%c = tNormal.%c;\n",
+                                   i, c, c);
+                break;
+            default:
+                assert(false);
+                break;
+            }
+        }
+    }
+
+    /* Apply texture matrices */
+    for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
+        if (state.texture_matrix_enable[i]) {
+            qstring_append_fmt(body,
+                               "oT%d = oT%d * texMat%d;\n",
+                               i, i, i);
+        }
+    }
+
+    /* Lighting */
+    if (state.lighting) {
+
+        //FIXME: Do 2 passes if we want 2 sided-lighting?
+        qstring_append(body, "oD0 = vec4(sceneAmbientColor, diffuse.a);\n");
+        qstring_append(body, "oD1 = vec4(0.0, 0.0, 0.0, specular.a);\n");
+
+        for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
+            if (state.light[i] == LIGHT_OFF) {
+                continue;
+            }
+
+            /* FIXME: It seems that we only have to handle the surface colors if
+             *        they are not part of the material [= vertex colors].
+             *        If they are material the cpu will premultiply light
+             *        colors
+             */
+
+            qstring_append_fmt(body, "/* Light %d */ {\n", i);
+
+            if (state.light[i] == LIGHT_LOCAL
+                    || state.light[i] == LIGHT_SPOT) {
+
+                qstring_append_fmt(header,
+                    "uniform vec3 lightLocalPosition%d;\n"
+                    "uniform vec3 lightLocalAttenuation%d;\n",
+                    i, i);
+                qstring_append_fmt(body,
+                    "  vec3 VP = lightLocalPosition%d - tPosition.xyz/tPosition.w;\n"
+                    "  float d = length(VP);\n"
+//FIXME: if (d > lightLocalRange) { .. don't process this light .. } /* inclusive?! */ - what about directional lights?
+                    "  VP = normalize(VP);\n"
+                    "  float attenuation = 1.0 / (lightLocalAttenuation%d.x\n"
+                    "                               + lightLocalAttenuation%d.y * d\n"
+                    "                               + lightLocalAttenuation%d.z * d * d);\n"
+                    "  vec3 halfVector = normalize(VP + eyePosition.xyz / eyePosition.w);\n" /* FIXME: Not sure if eyePosition is correct */
+                    "  float nDotVP = max(0.0, dot(tNormal, VP));\n"
+                    "  float nDotHV = max(0.0, dot(tNormal, halfVector));\n",
+                    i, i, i, i);
+
+            }
+
+            switch(state.light[i]) {
+            case LIGHT_INFINITE:
+
+                /* lightLocalRange will be 1e+30 here */
+
+                qstring_append_fmt(header,
+                    "uniform vec3 lightInfiniteHalfVector%d;\n"
+                    "uniform vec3 lightInfiniteDirection%d;\n",
+                    i, i);
+                qstring_append_fmt(body,
+                    "  float attenuation = 1.0;\n"
+                    "  float nDotVP = max(0.0, dot(tNormal, normalize(vec3(lightInfiniteDirection%d))));\n"
+                    "  float nDotHV = max(0.0, dot(tNormal, vec3(lightInfiniteHalfVector%d)));\n",
+                    i, i);
+
+                /* FIXME: Do specular */
+
+                /* FIXME: tBackDiffuse */
+
+                break;
+            case LIGHT_LOCAL:
+                /* Everything done already */
+                break;
+            case LIGHT_SPOT:
+                assert(false);
+                /*FIXME: calculate falloff */
+                break;
+            default:
+                assert(false);
+                break;
+            }
+
+            qstring_append_fmt(body,
+                "  float pf;\n"
+                "  if (nDotVP == 0.0) {\n"
+                "    pf = 0.0;\n"
+                "  } else {\n"
+                "    pf = pow(nDotHV, /* specular(l, m, n, l1, m1, n1) */ 0.001);\n"
+                "  }\n"
+                "  vec3 lightAmbient = lightAmbientColor(%d) * attenuation;\n"
+                "  vec3 lightDiffuse = lightDiffuseColor(%d) * attenuation * nDotVP;\n"
+                "  vec3 lightSpecular = lightSpecularColor(%d) * pf;\n",
+                i, i, i);
+
+            qstring_append(body,
+                "  oD0.xyz += lightAmbient;\n");
+
+            qstring_append(body,
+                "  oD0.xyz += diffuse.xyz * lightDiffuse;\n");
+
+            qstring_append(body,
+                "  oD1.xyz += specular.xyz * lightSpecular;\n");
+
+            qstring_append(body, "}\n");
+        }
+    } else {
+        qstring_append(body, "  oD0 = diffuse;\n");
+        qstring_append(body, "  oD1 = specular;\n");
+    }
+    qstring_append(body, "  oB0 = backDiffuse;\n");
+    qstring_append(body, "  oB1 = backSpecular;\n");
+
+    /* Fog */
+    if (state.fog_enable) {
+
+        /* From: https://www.opengl.org/registry/specs/NV/fog_distance.txt */
+        switch(state.foggen) {
+        case FOGGEN_SPEC_ALPHA:
+            /* FIXME: Do we have to clamp here? */
+            qstring_append(body, "  float fogDistance = clamp(specular.a, 0.0, 1.0);\n");
+            break;
+        case FOGGEN_RADIAL:
+            qstring_append(body, "  float fogDistance = length(tPosition.xyz);\n");
+            break;
+        case FOGGEN_PLANAR:
+        case FOGGEN_ABS_PLANAR:
+            qstring_append(body, "  float fogDistance = dot(fogPlane.xyz, tPosition.xyz) + fogPlane.w;\n");
+            if (state.foggen == FOGGEN_ABS_PLANAR) {
+                qstring_append(body, "  fogDistance = abs(fogDistance);\n");
+            }
+            break;
+        case FOGGEN_FOG_X:
+            qstring_append(body, "  float fogDistance = fogCoord;\n");
+            break;
+        default:
+            assert(false);
+            break;
+        }
+
+    }
+
+    /* If skinning is off the composite matrix already includes the MV matrix */
+    if (state.skinning == SKINNING_OFF) {
+        qstring_append(body, "  tPosition = position;\n");
+    }
+
+    qstring_append(body,
+    "   oPos = invViewport * (tPosition * compositeMat);\n"
+    "   oPos.z = oPos.z * 2.0 - oPos.w;\n");
+
+    qstring_append(body, "  vtx.inv_w = 1.0 / oPos.w;\n");
+
+}
+
+static QString *generate_vertex_shader(const ShaderState state,
+                                       char vtx_prefix)
+{
+    int i;
+    QString *header = qstring_from_str(
+"#version 330\n"
+"\n"
+"uniform vec2 clipRange;\n"
+"uniform vec2 surfaceSize;\n"
+"\n"
+/* All constants in 1 array declaration */
+"uniform vec4 c[" stringify(NV2A_VERTEXSHADER_CONSTANTS) "];\n"
+"\n"
+"uniform vec4 fogColor;\n"
+"uniform float fogParam[2];\n"
+"\n"
+
+GLSL_DEFINE(fogPlane, GLSL_C(NV_IGRAPH_XF_XFCTX_FOG))
+GLSL_DEFINE(texMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T0MAT))
+GLSL_DEFINE(texMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T1MAT))
+GLSL_DEFINE(texMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T2MAT))
+GLSL_DEFINE(texMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T3MAT))
+
+"\n"
+"vec4 oPos = vec4(0.0,0.0,0.0,1.0);\n"
+"vec4 oD0 = vec4(0.0,0.0,0.0,1.0);\n"
+"vec4 oD1 = vec4(0.0,0.0,0.0,1.0);\n"
+"vec4 oB0 = vec4(0.0,0.0,0.0,1.0);\n"
+"vec4 oB1 = vec4(0.0,0.0,0.0,1.0);\n"
+"vec4 oPts = vec4(0.0,0.0,0.0,1.0);\n"
+/* FIXME: NV_vertex_program says: "FOGC is the transformed vertex's fog
+ * coordinate. The register's first floating-point component is interpolated
+ * across the assembled primitive during rasterization and used as the fog
+ * distance to compute per-fragment the fog factor when fog is enabled.
+ * However, if both fog and vertex program mode are enabled, but the FOGC
+ * vertex result register is not written, the fog factor is overridden to
+ * 1.0. The register's other three components are ignored."
+ *
+ * That probably means it will read back as vec4(0.0, 0.0, 0.0, 1.0) but
+ * will be set to 1.0 AFTER the VP if it was never written?
+ * We should test on real hardware..
+ *
+ * We'll force 1.0 for oFog.x for now.
+ */
+"vec4 oFog = vec4(1.0,0.0,0.0,1.0);\n"
+"vec4 oT0 = vec4(0.0,0.0,0.0,1.0);\n"
+"vec4 oT1 = vec4(0.0,0.0,0.0,1.0);\n"
+"vec4 oT2 = vec4(0.0,0.0,0.0,1.0);\n"
+"vec4 oT3 = vec4(0.0,0.0,0.0,1.0);\n"
+"\n"
+STRUCT_VERTEX_DATA);
+
+    qstring_append_fmt(header, "noperspective out VertexData %c_vtx;\n",
+                       vtx_prefix);
+    qstring_append_fmt(header, "#define vtx %c_vtx\n",
+                       vtx_prefix);
+    qstring_append(header, "\n");
+    for(i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+        qstring_append_fmt(header, "in vec4 v%d;\n", i);
+    }
+    qstring_append(header, "\n");
+
+    QString *body = qstring_from_str("void main() {\n");
+
+    if (state.fixed_function) {
+        generate_fixed_function(state, header, body);
+
+    } else if (state.vertex_program) {
+        vsh_translate(VSH_VERSION_XVS,
+                      (uint32_t*)state.program_data,
+                      state.program_length,
+                      state.z_perspective,
+                      header, body);
+    } else {
+        assert(false);
+    }
+
+
+    /* Fog */
+
+    if (state.fog_enable) {
+
+        if (state.vertex_program) {
+            /* FIXME: Does foggen do something here? Let's do some tracking..
+             *
+             *   "RollerCoaster Tycoon" has
+             *      state.vertex_program = true; state.foggen == FOGGEN_PLANAR
+             *      but expects oFog.x as fogdistance?! Writes oFog.xyzw = v0.z
+             */
+            qstring_append(body, "  float fogDistance = oFog.x;\n");
+        }
+
+        /* FIXME: Do this per pixel? */
+
+        switch (state.fog_mode) {
+        case FOG_MODE_LINEAR:
+        case FOG_MODE_LINEAR_ABS:
+
+            /* f = (end - d) / (end - start)
+             *    fogParam[1] = 1 / (end - start)
+             *    fogParam[0] = 1 + end * fogParam[1];
+             */
+
+            qstring_append(body, "  float fogFactor = fogParam[0] + fogDistance * fogParam[1];\n");
+            qstring_append(body, "  fogFactor -= 1.0;\n"); /* FIXME: WHHYYY?!! */
+            break;
+        case FOG_MODE_EXP:
+        case FOG_MODE_EXP_ABS:
+
+            /* f = 1 / (e^(d * density))
+             *    fogParam[1] = -density / (2 * ln(256))
+             *    fogParam[0] = 1.5
+             */
+
+            qstring_append(body, "  float fogFactor = fogParam[0] + exp2(fogDistance * fogParam[1] * 16.0);\n");
+            qstring_append(body, "  fogFactor -= 1.5;\n"); /* FIXME: WHHYYY?!! */
+            break;
+        case FOG_MODE_EXP2:
+        case FOG_MODE_EXP2_ABS:
+
+            /* f = 1 / (e^((d * density)^2))
+             *    fogParam[1] = -density / (2 * sqrt(ln(256)))
+             *    fogParam[0] = 1.5
+             */
+
+            qstring_append(body, "  float fogFactor = fogParam[0] + exp2(-fogDistance * fogDistance * fogParam[1] * fogParam[1] * 32.0);\n");
+            qstring_append(body, "  fogFactor -= 1.5;\n"); /* FIXME: WHHYYY?!! */
+            break;
+        default:
+            assert(false);
+            break;
+        }
+        /* Calculate absolute for the modes which need it */
+        switch (state.fog_mode) {
+        case FOG_MODE_LINEAR_ABS:
+        case FOG_MODE_EXP_ABS:
+        case FOG_MODE_EXP2_ABS:
+            qstring_append(body, "  fogFactor = abs(fogFactor);\n");
+            break;
+        default:
+            break;
+        }
+        /* FIXME: What about fog alpha?! */
+        qstring_append(body, "  oFog.xyzw = vec4(fogFactor);\n");
+    } else {
+        /* FIXME: Is the fog still calculated / passed somehow?!
+         */
+        qstring_append(body, "  oFog.xyzw = vec4(1.0);\n");
+    }
+
+    /* Set outputs */
+    qstring_append(body, "\n"
+                      "  vtx.D0 = clamp(oD0, 0.0, 1.0) * vtx.inv_w;\n"
+                      "  vtx.D1 = clamp(oD1, 0.0, 1.0) * vtx.inv_w;\n"
+                      "  vtx.B0 = clamp(oB0, 0.0, 1.0) * vtx.inv_w;\n"
+                      "  vtx.B1 = clamp(oB1, 0.0, 1.0) * vtx.inv_w;\n"
+                      "  vtx.Fog = oFog.x * vtx.inv_w;\n"
+                      "  vtx.T0 = oT0 * vtx.inv_w;\n"
+                      "  vtx.T1 = oT1 * vtx.inv_w;\n"
+                      "  vtx.T2 = oT2 * vtx.inv_w;\n"
+                      "  vtx.T3 = oT3 * vtx.inv_w;\n"
+                      "  gl_Position = oPos;\n"
+                      "  gl_PointSize = oPts.x;\n"
+                      "\n"
+                      "}\n");
+
+
+    /* Return combined header + source */
+    qstring_append(header, qstring_get_str(body));
+    QDECREF(body);
+    return header;
+
+}
+
+static GLuint create_gl_shader(GLenum gl_shader_type,
+                               const char *code,
+                               const char *name)
+{
+    GLint compiled = 0;
+
+    NV2A_GL_DGROUP_BEGIN("Creating new %s", name);
+
+    NV2A_DPRINTF("compile new %s, code:\n%s\n", name, code);
+
+    GLuint shader = glCreateShader(gl_shader_type);
+    glShaderSource(shader, 1, &code, 0);
+    glCompileShader(shader);
+
+    /* Check it compiled */
+    compiled = 0;
+    glGetShaderiv(shader, GL_COMPILE_STATUS, &compiled);
+    if (!compiled) {
+        GLchar* log;
+        GLint log_length;
+        glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length);
+        log = g_malloc(log_length * sizeof(GLchar));
+        glGetShaderInfoLog(shader, log_length, NULL, log);
+        fprintf(stderr, "nv2a: %s compilation failed: %s\n", name, log);
+        g_free(log);
+
+        NV2A_GL_DGROUP_END();
+        abort();
+    }
+
+    NV2A_GL_DGROUP_END();
+
+    return shader;
+}
+
+ShaderBinding* generate_shaders(const ShaderState state)
+{
+    int i, j;
+    char tmp[64];
+
+    char vtx_prefix;
+    GLuint program = glCreateProgram();
+
+    /* Create an option geometry shader and find primitive type */
+
+    GLenum gl_primitive_mode;
+    QString* geometry_shader_code =
+        generate_geometry_shader(state.polygon_front_mode,
+                                 state.polygon_back_mode,
+                                 state.primitive_mode,
+                                 &gl_primitive_mode);
+    if (geometry_shader_code) {
+        const char* geometry_shader_code_str =
+             qstring_get_str(geometry_shader_code);
+
+        GLuint geometry_shader = create_gl_shader(GL_GEOMETRY_SHADER,
+                                                  geometry_shader_code_str,
+                                                  "geometry shader");
+        glAttachShader(program, geometry_shader);
+
+        QDECREF(geometry_shader_code);
+
+        vtx_prefix = 'v';
+    } else {
+        vtx_prefix = 'g';
+    }
+
+    /* create the vertex shader */
+
+    QString *vertex_shader_code = generate_vertex_shader(state, vtx_prefix);
+    GLuint vertex_shader = create_gl_shader(GL_VERTEX_SHADER,
+                                            qstring_get_str(vertex_shader_code),
+                                            "vertex shader");
+    glAttachShader(program, vertex_shader);
+    QDECREF(vertex_shader_code);
+
+
+    /* Bind attributes for vertices */
+    for(i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+        snprintf(tmp, sizeof(tmp), "v%d", i);
+        glBindAttribLocation(program, i, tmp);
+    }
+
+
+    /* generate a fragment shader from register combiners */
+
+    QString *fragment_shader_code = psh_translate(state.psh);
+
+    const char *fragment_shader_code_str = qstring_get_str(fragment_shader_code);
+
+    GLuint fragment_shader = create_gl_shader(GL_FRAGMENT_SHADER,
+                                              fragment_shader_code_str,
+                                              "fragment shader");
+    glAttachShader(program, fragment_shader);
+
+    QDECREF(fragment_shader_code);
+
+
+    /* link the program */
+    glLinkProgram(program);
+    GLint linked = 0;
+    glGetProgramiv(program, GL_LINK_STATUS, &linked);
+    if(!linked) {
+        GLchar log[2048];
+        glGetProgramInfoLog(program, 2048, NULL, log);
+        fprintf(stderr, "nv2a: shader linking failed: %s\n", log);
+        abort();
+    }
+
+    glUseProgram(program);
+
+    /* set texture samplers */
+    for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
+        char samplerName[16];
+        snprintf(samplerName, sizeof(samplerName), "texSamp%d", i);
+        GLint texSampLoc = glGetUniformLocation(program, samplerName);
+        if (texSampLoc >= 0) {
+            glUniform1i(texSampLoc, i);
+        }
+    }
+
+    /* validate the program */
+    glValidateProgram(program);
+    GLint valid = 0;
+    glGetProgramiv(program, GL_VALIDATE_STATUS, &valid);
+    if (!valid) {
+        GLchar log[1024];
+        glGetProgramInfoLog(program, 1024, NULL, log);
+        fprintf(stderr, "nv2a: shader validation failed: %s\n", log);
+        abort();
+    }
+
+    ShaderBinding* ret = g_malloc0(sizeof(ShaderBinding));
+    ret->gl_program = program;
+    ret->gl_primitive_mode = gl_primitive_mode;
+
+    /* lookup fragment shader uniforms */
+    for (i=0; i<=8; i++) {
+        for (j=0; j<2; j++) {
+            snprintf(tmp, sizeof(tmp), "c_%d_%d", i, j);
+            ret->psh_constant_loc[i][j] = glGetUniformLocation(program, tmp);
+        }
+    }
+    ret->alpha_ref_loc = glGetUniformLocation(program, "alphaRef");
+    for (i = 1; i < NV2A_MAX_TEXTURES; i++) {
+        snprintf(tmp, sizeof(tmp), "bumpMat%d", i);
+        ret->bump_mat_loc[i] = glGetUniformLocation(program, tmp);
+        snprintf(tmp, sizeof(tmp), "bumpScale%d", i);
+        ret->bump_scale_loc[i] = glGetUniformLocation(program, tmp);
+        snprintf(tmp, sizeof(tmp), "bumpOffset%d", i);
+        ret->bump_offset_loc[i] = glGetUniformLocation(program, tmp);
+    }
+
+    /* lookup vertex shader uniforms */
+    for(i = 0; i < NV2A_VERTEXSHADER_CONSTANTS; i++) {
+        snprintf(tmp, sizeof(tmp), "c[%d]", i);
+        ret->vsh_constant_loc[i] = glGetUniformLocation(program, tmp);
+    }
+    ret->surface_size_loc = glGetUniformLocation(program, "surfaceSize");
+    ret->clip_range_loc = glGetUniformLocation(program, "clipRange");
+    ret->fog_color_loc = glGetUniformLocation(program, "fogColor");
+    ret->fog_param_loc[0] = glGetUniformLocation(program, "fogParam[0]");
+    ret->fog_param_loc[1] = glGetUniformLocation(program, "fogParam[1]");
+
+    ret->inv_viewport_loc = glGetUniformLocation(program, "invViewport");
+    for (i = 0; i < NV2A_LTCTXA_COUNT; i++) {
+        snprintf(tmp, sizeof(tmp), "ltctxa[%d]", i);
+        ret->ltctxa_loc[i] = glGetUniformLocation(program, tmp);
+    }
+    for (i = 0; i < NV2A_LTCTXB_COUNT; i++) {
+        snprintf(tmp, sizeof(tmp), "ltctxb[%d]", i);
+        ret->ltctxb_loc[i] = glGetUniformLocation(program, tmp);
+    }
+    for (i = 0; i < NV2A_LTC1_COUNT; i++) {
+        snprintf(tmp, sizeof(tmp), "ltc1[%d]", i);
+        ret->ltc1_loc[i] = glGetUniformLocation(program, tmp);
+    }
+    for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
+        snprintf(tmp, sizeof(tmp), "lightInfiniteHalfVector%d", i);
+        ret->light_infinite_half_vector_loc[i] = glGetUniformLocation(program, tmp);
+        snprintf(tmp, sizeof(tmp), "lightInfiniteDirection%d", i);
+        ret->light_infinite_direction_loc[i] = glGetUniformLocation(program, tmp);
+
+        snprintf(tmp, sizeof(tmp), "lightLocalPosition%d", i);
+        ret->light_local_position_loc[i] = glGetUniformLocation(program, tmp);
+        snprintf(tmp, sizeof(tmp), "lightLocalAttenuation%d", i);
+        ret->light_local_attenuation_loc[i] = glGetUniformLocation(program, tmp);
+    }
+
+    return ret;
+}
diff --git a/hw/xbox/nv2a_shaders.h b/hw/xbox/nv2a_shaders.h
new file mode 100644
index 0000000000..b2a41aaf31
--- /dev/null
+++ b/hw/xbox/nv2a_shaders.h
@@ -0,0 +1,115 @@
+/*
+ * QEMU Geforce NV2A shader generator
+ *
+ * Copyright (c) 2015 espes
+ * Copyright (c) 2015 Jannik Vogel
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_NV2A_SHADERS_H
+#define HW_NV2A_SHADERS_H
+
+#include "qapi/qmp/qstring.h"
+#include "gl/gloffscreen.h"
+
+#include "nv2a_vsh.h"
+#include "nv2a_psh.h"
+#include "nv2a_int.h"
+
+
+enum ShaderPrimitiveMode {
+    PRIM_TYPE_NONE,
+    PRIM_TYPE_POINTS,
+    PRIM_TYPE_LINES,
+    PRIM_TYPE_LINE_LOOP,
+    PRIM_TYPE_LINE_STRIP,
+    PRIM_TYPE_TRIANGLES,
+    PRIM_TYPE_TRIANGLE_STRIP,
+    PRIM_TYPE_TRIANGLE_FAN,
+    PRIM_TYPE_QUADS,
+    PRIM_TYPE_QUAD_STRIP,
+    PRIM_TYPE_POLYGON,
+};
+
+enum ShaderPolygonMode {
+    POLY_MODE_FILL,
+    POLY_MODE_POINT,
+    POLY_MODE_LINE,
+};
+
+typedef struct ShaderState {
+    PshState psh;
+
+    bool texture_matrix_enable[4];
+    enum VshTexgen texgen[4][4];
+
+    bool fog_enable;
+    enum VshFoggen foggen;
+    enum VshFogMode fog_mode;
+
+    enum VshSkinning skinning;
+
+    bool normalization;
+
+    bool lighting;
+    enum VshLight light[NV2A_MAX_LIGHTS];
+
+    bool fixed_function;
+
+    /* vertex program */
+    bool vertex_program;
+    uint32_t program_data[NV2A_MAX_TRANSFORM_PROGRAM_LENGTH][VSH_TOKEN_SIZE];
+    int program_length;
+    bool z_perspective;
+
+    /* primitive format for geometry shader */
+    enum ShaderPolygonMode polygon_front_mode;
+    enum ShaderPolygonMode polygon_back_mode;
+    enum ShaderPrimitiveMode primitive_mode;
+} ShaderState;
+
+typedef struct ShaderBinding {
+    GLuint gl_program;
+    GLenum gl_primitive_mode;
+
+    GLint psh_constant_loc[9][2];
+    GLint alpha_ref_loc;
+
+    GLint bump_mat_loc[NV2A_MAX_TEXTURES];
+    GLint bump_scale_loc[NV2A_MAX_TEXTURES];
+    GLint bump_offset_loc[NV2A_MAX_TEXTURES];
+
+    GLint surface_size_loc;
+    GLint clip_range_loc;
+
+    GLint vsh_constant_loc[NV2A_VERTEXSHADER_CONSTANTS];
+
+    GLint inv_viewport_loc;
+    GLint ltctxa_loc[NV2A_LTCTXA_COUNT];
+    GLint ltctxb_loc[NV2A_LTCTXB_COUNT];
+    GLint ltc1_loc[NV2A_LTC1_COUNT];
+
+    GLint fog_color_loc;
+    GLint fog_param_loc[2];
+    GLint light_infinite_half_vector_loc[NV2A_MAX_LIGHTS];
+    GLint light_infinite_direction_loc[NV2A_MAX_LIGHTS];
+    GLint light_local_position_loc[NV2A_MAX_LIGHTS];
+    GLint light_local_attenuation_loc[NV2A_MAX_LIGHTS];
+
+} ShaderBinding;
+
+ShaderBinding* generate_shaders(const ShaderState state);
+
+#endif
diff --git a/hw/xbox/nv2a_shaders_common.h b/hw/xbox/nv2a_shaders_common.h
new file mode 100644
index 0000000000..c8e1a962ff
--- /dev/null
+++ b/hw/xbox/nv2a_shaders_common.h
@@ -0,0 +1,37 @@
+/*
+ * QEMU Geforce NV2A shader common definitions
+ *
+ * Copyright (c) 2015 espes
+ * Copyright (c) 2015 Jannik Vogel
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_NV2A_SHADERS_COMMON_H
+#define HW_NV2A_SHADERS_COMMON_H
+
+#define STRUCT_VERTEX_DATA "struct VertexData {\n" \
+                           "  float inv_w;\n" \
+                           "  vec4 D0;\n" \
+                           "  vec4 D1;\n" \
+                           "  vec4 B0;\n" \
+                           "  vec4 B1;\n" \
+                           "  float Fog;\n" \
+                           "  vec4 T0;\n" \
+                           "  vec4 T1;\n" \
+                           "  vec4 T2;\n" \
+                           "  vec4 T3;\n" \
+                           "};\n"
+
+#endif
diff --git a/hw/xbox/nv2a_vsh.c b/hw/xbox/nv2a_vsh.c
new file mode 100644
index 0000000000..bf3f5f3bf3
--- /dev/null
+++ b/hw/xbox/nv2a_vsh.c
@@ -0,0 +1,768 @@
+/*
+ * QEMU Geforce NV2A vertex shader translation
+ *
+ * Copyright (c) 2014 Jannik Vogel
+ * Copyright (c) 2012 espes
+ *
+ * Based on:
+ * Cxbx, VertexShader.cpp
+ * Copyright (c) 2004 Aaron Robinson <caustik@caustik.com>
+ *                    Kingofc <kingofc@freenet.de>
+ * Dxbx, uPushBuffer.pas
+ * Copyright (c) 2007 Shadow_tj, PatrickvL
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <assert.h>
+
+#include "hw/xbox/nv2a_shaders_common.h"
+#include "hw/xbox/nv2a_vsh.h"
+
+#define VSH_D3DSCM_CORRECTION 96
+
+
+typedef enum {
+    PARAM_UNKNOWN = 0,
+    PARAM_R,
+    PARAM_V,
+    PARAM_C
+} VshParameterType;
+
+typedef enum {
+    OUTPUT_C = 0,
+    OUTPUT_O
+} VshOutputType;
+
+typedef enum {
+    OMUX_MAC = 0,
+    OMUX_ILU
+} VshOutputMux;
+
+typedef enum {
+    ILU_NOP = 0,
+    ILU_MOV,
+    ILU_RCP,
+    ILU_RCC,
+    ILU_RSQ,
+    ILU_EXP,
+    ILU_LOG,
+    ILU_LIT
+} VshILU;
+
+typedef enum {
+    MAC_NOP,
+    MAC_MOV,
+    MAC_MUL,
+    MAC_ADD,
+    MAC_MAD,
+    MAC_DP3,
+    MAC_DPH,
+    MAC_DP4,
+    MAC_DST,
+    MAC_MIN,
+    MAC_MAX,
+    MAC_SLT,
+    MAC_SGE,
+    MAC_ARL
+} VshMAC;
+
+typedef enum {
+    SWIZZLE_X = 0,
+    SWIZZLE_Y,
+    SWIZZLE_Z,
+    SWIZZLE_W
+} VshSwizzle;
+
+
+typedef struct VshFieldMapping {
+    VshFieldName field_name;
+    uint8_t subtoken;
+    uint8_t start_bit;
+    uint8_t bit_length;
+} VshFieldMapping;
+
+static const VshFieldMapping field_mapping[] = {
+    // Field Name         DWORD BitPos BitSize
+    {  FLD_ILU,              1,   25,     3 },
+    {  FLD_MAC,              1,   21,     4 },
+    {  FLD_CONST,            1,   13,     8 },
+    {  FLD_V,                1,    9,     4 },
+    // INPUT A
+    {  FLD_A_NEG,            1,    8,     1 },
+    {  FLD_A_SWZ_X,          1,    6,     2 },
+    {  FLD_A_SWZ_Y,          1,    4,     2 },
+    {  FLD_A_SWZ_Z,          1,    2,     2 },
+    {  FLD_A_SWZ_W,          1,    0,     2 },
+    {  FLD_A_R,              2,   28,     4 },
+    {  FLD_A_MUX,            2,   26,     2 },
+    // INPUT B
+    {  FLD_B_NEG,            2,   25,     1 },
+    {  FLD_B_SWZ_X,          2,   23,     2 },
+    {  FLD_B_SWZ_Y,          2,   21,     2 },
+    {  FLD_B_SWZ_Z,          2,   19,     2 },
+    {  FLD_B_SWZ_W,          2,   17,     2 },
+    {  FLD_B_R,              2,   13,     4 },
+    {  FLD_B_MUX,            2,   11,     2 },
+    // INPUT C
+    {  FLD_C_NEG,            2,   10,     1 },
+    {  FLD_C_SWZ_X,          2,    8,     2 },
+    {  FLD_C_SWZ_Y,          2,    6,     2 },
+    {  FLD_C_SWZ_Z,          2,    4,     2 },
+    {  FLD_C_SWZ_W,          2,    2,     2 },
+    {  FLD_C_R_HIGH,         2,    0,     2 },
+    {  FLD_C_R_LOW,          3,   30,     2 },
+    {  FLD_C_MUX,            3,   28,     2 },
+    // Output
+    {  FLD_OUT_MAC_MASK,     3,   24,     4 },
+    {  FLD_OUT_R,            3,   20,     4 },
+    {  FLD_OUT_ILU_MASK,     3,   16,     4 },
+    {  FLD_OUT_O_MASK,       3,   12,     4 },
+    {  FLD_OUT_ORB,          3,   11,     1 },
+    {  FLD_OUT_ADDRESS,      3,    3,     8 },
+    {  FLD_OUT_MUX,          3,    2,     1 },
+    // Other
+    {  FLD_A0X,              3,    1,     1 },
+    {  FLD_FINAL,            3,    0,     1 }
+};
+
+
+typedef struct VshOpcodeParams {
+    bool A;
+    bool B;
+    bool C;
+} VshOpcodeParams;
+
+static const VshOpcodeParams ilu_opcode_params[] = {
+    /* ILU OP       ParamA ParamB ParamC */
+    /* ILU_NOP */ { false, false, false }, // Dxbx note : Unused
+    /* ILU_MOV */ { false, false, true  },
+    /* ILU_RCP */ { false, false, true  },
+    /* ILU_RCC */ { false, false, true  },
+    /* ILU_RSQ */ { false, false, true  },
+    /* ILU_EXP */ { false, false, true  },
+    /* ILU_LOG */ { false, false, true  },
+    /* ILU_LIT */ { false, false, true  },
+};
+
+static const VshOpcodeParams mac_opcode_params[] = {
+    /* MAC OP      ParamA  ParamB ParamC */
+    /* MAC_NOP */ { false, false, false }, // Dxbx note : Unused
+    /* MAC_MOV */ { true,  false, false },
+    /* MAC_MUL */ { true,  true,  false },
+    /* MAC_ADD */ { true,  false, true  },
+    /* MAC_MAD */ { true,  true,  true  },
+    /* MAC_DP3 */ { true,  true,  false },
+    /* MAC_DPH */ { true,  true,  false },
+    /* MAC_DP4 */ { true,  true,  false },
+    /* MAC_DST */ { true,  true,  false },
+    /* MAC_MIN */ { true,  true,  false },
+    /* MAC_MAX */ { true,  true,  false },
+    /* MAC_SLT */ { true,  true,  false },
+    /* MAC_SGE */ { true,  true,  false },
+    /* MAC_ARL */ { true,  false, false },
+};
+
+
+static const char* mask_str[] = {
+            // xyzw xyzw
+    ",",     // 0000 ____
+    ",w",   // 0001 ___w
+    ",z",   // 0010 __z_
+    ",zw",  // 0011 __zw
+    ",y",   // 0100 _y__
+    ",yw",  // 0101 _y_w
+    ",yz",  // 0110 _yz_
+    ",yzw", // 0111 _yzw
+    ",x",   // 1000 x___
+    ",xw",  // 1001 x__w
+    ",xz",  // 1010 x_z_
+    ",xzw", // 1011 x_zw
+    ",xy",  // 1100 xy__
+    ",xyw", // 1101 xy_w
+    ",xyz", // 1110 xyz_
+    ",xyzw" // 1111 xyzw
+};
+
+/* Note: OpenGL seems to be case-sensitive, and requires upper-case opcodes! */
+static const char* mac_opcode[] = {
+    "NOP",
+    "MOV",
+    "MUL",
+    "ADD",
+    "MAD",
+    "DP3",
+    "DPH",
+    "DP4",
+    "DST",
+    "MIN",
+    "MAX",
+    "SLT",
+    "SGE",
+    "ARL A0.x", // Dxbx note : Alias for "mov a0.x"
+};
+
+static const char* ilu_opcode[] = {
+    "NOP",
+    "MOV",
+    "RCP",
+    "RCC",
+    "RSQ",
+    "EXP",
+    "LOG",
+    "LIT",
+};
+
+static bool ilu_force_scalar[] = {
+    false,
+    false,
+    true,
+    true,
+    true,
+    true,
+    true,
+    false,
+};
+
+static const char* out_reg_name[] = {
+    "oPos",
+    "???",
+    "???",
+    "oD0",
+    "oD1",
+    "oFog",
+    "oPts",
+    "oB0",
+    "oB1",
+    "oT0",
+    "oT1",
+    "oT2",
+    "oT3",
+    "???",
+    "???",
+    "A0.x",
+};
+
+
+
+// Retrieves a number of bits in the instruction token
+static int vsh_get_from_token(const uint32_t *shader_token,
+                              uint8_t subtoken,
+                              uint8_t start_bit,
+                              uint8_t bit_length)
+{
+    return (shader_token[subtoken] >> start_bit) & ~(0xFFFFFFFF << bit_length);
+}
+
+uint8_t vsh_get_field(const uint32_t *shader_token, VshFieldName field_name)
+{
+
+    return (uint8_t)(vsh_get_from_token(shader_token,
+                                        field_mapping[field_name].subtoken,
+                                        field_mapping[field_name].start_bit,
+                                        field_mapping[field_name].bit_length));
+}
+
+
+// Converts the C register address to disassembly format
+static int16_t convert_c_register(const int16_t c_reg)
+{
+    int16_t r = ((((c_reg >> 5) & 7) - 3) * 32) + (c_reg & 31);
+    r += VSH_D3DSCM_CORRECTION; /* to map -96..95 to 0..191 */
+    return r; //FIXME: = c_reg?!
+}
+
+
+
+static QString* decode_swizzle(const uint32_t *shader_token,
+                               VshFieldName swizzle_field)
+{
+    const char* swizzle_str = "xyzw";
+    VshSwizzle x, y, z, w;
+
+    /* some microcode instructions force a scalar value */
+    if (swizzle_field == FLD_C_SWZ_X
+        && ilu_force_scalar[vsh_get_field(shader_token, FLD_ILU)]) {
+        x = y = z = w = vsh_get_field(shader_token, swizzle_field);
+    } else {
+        x = vsh_get_field(shader_token, swizzle_field++);
+        y = vsh_get_field(shader_token, swizzle_field++);
+        z = vsh_get_field(shader_token, swizzle_field++);
+        w = vsh_get_field(shader_token, swizzle_field);
+    }
+
+    if (x == SWIZZLE_X && y == SWIZZLE_Y
+        && z == SWIZZLE_Z && w == SWIZZLE_W) {
+        /* Don't print the swizzle if it's .xyzw */
+        return qstring_from_str(""); // Will turn ".xyzw" into "."
+    /* Don't print duplicates */
+    } else if (x == y && y == z && z == w) {
+        return qstring_from_str((char[]){'.', swizzle_str[x], '\0'});
+    } else if (y == z && z == w) {
+        return qstring_from_str((char[]){'.',
+            swizzle_str[x], swizzle_str[y], '\0'});
+    } else if (z == w) {
+        return qstring_from_str((char[]){'.',
+            swizzle_str[x], swizzle_str[y], swizzle_str[z], '\0'});
+    } else {
+        return qstring_from_str((char[]){'.',
+                                       swizzle_str[x], swizzle_str[y],
+                                       swizzle_str[z], swizzle_str[w],
+                                       '\0'}); // Normal swizzle mask
+    }
+}
+
+static QString* decode_opcode_input(const uint32_t *shader_token,
+                                    VshParameterType param,
+                                    VshFieldName neg_field,
+                                    int reg_num)
+{
+    /* This function decodes a vertex shader opcode parameter into a string.
+     * Input A, B or C is controlled via the Param and NEG fieldnames,
+     * the R-register address for each input is already given by caller. */
+
+    QString *ret_str = qstring_new();
+
+
+    if (vsh_get_field(shader_token, neg_field) > 0) {
+        qstring_append_chr(ret_str, '-');
+    }
+
+    /* PARAM_R uses the supplied reg_num, but the other two need to be
+     * determined */
+    char tmp[40];
+    switch (param) {
+    case PARAM_R:
+        snprintf(tmp, sizeof(tmp), "R%d", reg_num);
+        break;
+    case PARAM_V:
+        reg_num = vsh_get_field(shader_token, FLD_V);
+        snprintf(tmp, sizeof(tmp), "v%d", reg_num);
+        break;
+    case PARAM_C:
+        reg_num = convert_c_register(vsh_get_field(shader_token, FLD_CONST));
+        if (vsh_get_field(shader_token, FLD_A0X) > 0) {
+            //FIXME: does this really require the "correction" doe in convert_c_register?!
+            snprintf(tmp, sizeof(tmp), "c[A0+%d]", reg_num);
+        } else {
+            snprintf(tmp, sizeof(tmp), "c[%d]", reg_num);
+        }
+        break;
+    default:
+        fprintf(stderr, "Unknown vs param: 0x%x\n", param);
+        assert(false);
+        break;
+    }
+    qstring_append(ret_str, tmp);
+
+    {
+        /* swizzle bits are next to the neg bit */
+        QString *swizzle_str = decode_swizzle(shader_token, neg_field+1);
+        qstring_append(ret_str, qstring_get_str(swizzle_str));
+        QDECREF(swizzle_str);
+    }
+
+    return ret_str;
+}
+
+
+static QString* decode_opcode(const uint32_t *shader_token,
+                              VshOutputMux out_mux,
+                              uint32_t mask,
+                              const char *opcode,
+                              const char *inputs)
+{
+    QString *ret = qstring_new();
+    int reg_num = vsh_get_field(shader_token, FLD_OUT_R);
+
+    /* Test for paired opcodes (in other words : Are both <> NOP?) */
+    if (out_mux == OMUX_MAC
+          &&  vsh_get_field(shader_token, FLD_ILU) != ILU_NOP
+          && reg_num == 1) {
+        /* Ignore paired MAC opcodes that write to R1 */
+        mask = 0;
+    } else if (out_mux == OMUX_ILU
+               && vsh_get_field(shader_token, FLD_MAC) != MAC_NOP) {
+        /* Paired ILU opcodes can only write to R1 */
+        reg_num = 1;
+    }
+
+    if (strcmp(opcode, mac_opcode[MAC_ARL]) == 0) {
+        qstring_append_fmt(ret, "  ARL(A0%s);\n", inputs);
+    } else if (mask > 0) {
+        qstring_append_fmt(ret, "  %s(R%d%s%s);\n",
+                           opcode, reg_num, mask_str[mask], inputs);
+    }
+
+    /* See if we must add a muxed opcode too: */
+    if (vsh_get_field(shader_token, FLD_OUT_MUX) == out_mux
+        /* Only if it's not masked away: */
+        && vsh_get_field(shader_token, FLD_OUT_O_MASK) != 0) {
+
+        qstring_append(ret, "  ");
+        qstring_append(ret, opcode);
+        qstring_append(ret, "(");
+
+        if (vsh_get_field(shader_token, FLD_OUT_ORB) == OUTPUT_C) {
+            /* TODO : Emulate writeable const registers */
+            qstring_append(ret, "c");
+            qstring_append_int(ret,
+                convert_c_register(
+                    vsh_get_field(shader_token, FLD_OUT_ADDRESS)));
+        } else {
+            qstring_append(ret,
+                out_reg_name[
+                    vsh_get_field(shader_token, FLD_OUT_ADDRESS) & 0xF]);
+        }
+        qstring_append(ret,
+            mask_str[
+                vsh_get_field(shader_token, FLD_OUT_O_MASK)]);
+        qstring_append(ret, inputs);
+        qstring_append(ret, ");\n");
+    }
+
+    return ret;
+}
+
+
+static QString* decode_token(const uint32_t *shader_token)
+{
+    QString *ret;
+
+    /* Since it's potentially used twice, decode input C once: */
+    QString *input_c =
+        decode_opcode_input(shader_token,
+                            vsh_get_field(shader_token, FLD_C_MUX),
+                            FLD_C_NEG,
+                            (vsh_get_field(shader_token, FLD_C_R_HIGH) << 2)
+                                | vsh_get_field(shader_token, FLD_C_R_LOW));
+
+    /* See what MAC opcode is written to (if not masked away): */
+    VshMAC mac = vsh_get_field(shader_token, FLD_MAC);
+    if (mac != MAC_NOP) {
+        QString *inputs_mac = qstring_new();
+        if (mac_opcode_params[mac].A) {
+            QString *input_a =
+                decode_opcode_input(shader_token,
+                                    vsh_get_field(shader_token, FLD_A_MUX),
+                                    FLD_A_NEG,
+                                    vsh_get_field(shader_token, FLD_A_R));
+            qstring_append(inputs_mac, ", ");
+            qstring_append(inputs_mac, qstring_get_str(input_a));
+            QDECREF(input_a);
+        }
+        if (mac_opcode_params[mac].B) {
+            QString *input_b =
+                decode_opcode_input(shader_token,
+                                    vsh_get_field(shader_token, FLD_B_MUX),
+                                    FLD_B_NEG,
+                                    vsh_get_field(shader_token, FLD_B_R));
+            qstring_append(inputs_mac, ", ");
+            qstring_append(inputs_mac, qstring_get_str(input_b));
+            QDECREF(input_b);
+        }
+        if (mac_opcode_params[mac].C) {
+            qstring_append(inputs_mac, ", ");
+            qstring_append(inputs_mac, qstring_get_str(input_c));
+        }
+
+        /* Then prepend these inputs with the actual opcode, mask, and input : */
+        ret = decode_opcode(shader_token,
+                            OMUX_MAC,
+                            vsh_get_field(shader_token, FLD_OUT_MAC_MASK),
+                            mac_opcode[mac],
+                            qstring_get_str(inputs_mac));
+        QDECREF(inputs_mac);
+    } else {
+        ret = qstring_new();
+    }
+
+    /* See if a ILU opcode is present too: */
+    VshILU ilu = vsh_get_field(shader_token, FLD_ILU);
+    if (ilu != ILU_NOP) {
+        QString *inputs_c = qstring_from_str(", ");
+        qstring_append(inputs_c, qstring_get_str(input_c));
+
+        /* Append the ILU opcode, mask and (the already determined) input C: */
+        QString *ilu_op =
+            decode_opcode(shader_token,
+                          OMUX_ILU,
+                          vsh_get_field(shader_token, FLD_OUT_ILU_MASK),
+                          ilu_opcode[ilu],
+                          qstring_get_str(inputs_c));
+
+        qstring_append(ret, qstring_get_str(ilu_op));
+
+        QDECREF(inputs_c);
+        QDECREF(ilu_op);
+    }
+
+    QDECREF(input_c);
+
+    return ret;
+}
+
+static const char* vsh_header =
+    "\n"
+    "int A0 = 0;\n"
+    "\n"
+    "vec4 R0 = vec4(0.0,0.0,0.0,0.0);\n"
+    "vec4 R1 = vec4(0.0,0.0,0.0,0.0);\n"
+    "vec4 R2 = vec4(0.0,0.0,0.0,0.0);\n"
+    "vec4 R3 = vec4(0.0,0.0,0.0,0.0);\n"
+    "vec4 R4 = vec4(0.0,0.0,0.0,0.0);\n"
+    "vec4 R5 = vec4(0.0,0.0,0.0,0.0);\n"
+    "vec4 R6 = vec4(0.0,0.0,0.0,0.0);\n"
+    "vec4 R7 = vec4(0.0,0.0,0.0,0.0);\n"
+    "vec4 R8 = vec4(0.0,0.0,0.0,0.0);\n"
+    "vec4 R9 = vec4(0.0,0.0,0.0,0.0);\n"
+    "vec4 R10 = vec4(0.0,0.0,0.0,0.0);\n"
+    "vec4 R11 = vec4(0.0,0.0,0.0,0.0);\n"
+    "#define R12 oPos\n" /* R12 is a mirror of oPos */
+    "\n"
+
+    /* See:
+     * http://msdn.microsoft.com/en-us/library/windows/desktop/bb174703%28v=vs.85%29.aspx
+     * https://www.opengl.org/registry/specs/NV/vertex_program1_1.txt
+     */
+    "\n"
+//QQQ #ifdef NICE_CODE
+    "/* Converts the input to vec4, pads with last component */\n"
+    "vec4 _in(float v) { return vec4(v); }\n"
+    "vec4 _in(vec2 v) { return v.xyyy; }\n"
+    "vec4 _in(vec3 v) { return v.xyzz; }\n"
+    "vec4 _in(vec4 v) { return v.xyzw; }\n"
+//#else
+//    "/* Make sure input is always a vec4 */\n"
+//   "#define _in(v) vec4(v)\n"
+//#endif
+    "\n"
+    "#define INFINITY (1.0 / 0.0)\n"
+    "\n"
+    "#define MOV(dest, mask, src) dest.mask = _MOV(_in(src)).mask\n"
+    "vec4 _MOV(vec4 src)\n"
+    "{\n"
+    "  return src;\n"
+    "}\n"
+    "\n"
+    "#define MUL(dest, mask, src0, src1) dest.mask = _MUL(_in(src0), _in(src1)).mask\n"
+    "vec4 _MUL(vec4 src0, vec4 src1)\n" 
+    "{\n"
+    "  return src0 * src1;\n"
+    "}\n"
+    "\n"
+    "#define ADD(dest, mask, src0, src1) dest.mask = _ADD(_in(src0), _in(src1)).mask\n"
+    "vec4 _ADD(vec4 src0, vec4 src1)\n" 
+    "{\n"
+    "  return src0 + src1;\n"
+    "}\n"
+    "\n"
+    "#define MAD(dest, mask, src0, src1, src2) dest.mask = _MAD(_in(src0), _in(src1), _in(src2)).mask\n"
+    "vec4 _MAD(vec4 src0, vec4 src1, vec4 src2)\n" 
+    "{\n"
+    "  return src0 * src1 + src2;\n"
+    "}\n"
+    "\n"
+    "#define DP3(dest, mask, src0, src1) dest.mask = _DP3(_in(src0), _in(src1)).mask\n"
+    "vec4 _DP3(vec4 src0, vec4 src1)\n"
+    "{\n"
+    "  return vec4(dot(src0.xyz, src1.xyz));\n"
+    "}\n"
+    "\n"
+    "#define DPH(dest, mask, src0, src1) dest.mask = _DPH(_in(src0), _in(src1)).mask\n"
+    "vec4 _DPH(vec4 src0, vec4 src1)\n"
+    "{\n"
+    "  return vec4(dot(vec4(src0.xyz, 1.0), src1));\n"
+    "}\n"
+    "\n"
+    "#define DP4(dest, mask, src0, src1) dest.mask = _DP4(_in(src0), _in(src1)).mask\n"
+    "vec4 _DP4(vec4 src0, vec4 src1)\n"
+    "{\n"
+    "  return vec4(dot(src0, src1));\n"
+    "}\n"
+    "\n"
+    "#define DST(dest, mask, src0, src1) dest.mask = _DST(_in(src0), _in(src1)).mask\n"
+    "vec4 _DST(vec4 src0, vec4 src1)\n"
+    "{\n"
+    "  return vec4(1.0,\n"
+    "              src0.y * src1.y,\n"
+    "              src0.z,\n"
+    "              src1.w);\n"
+    "}\n"
+    "\n"
+    "#define MIN(dest, mask, src0, src1) dest.mask = _MIN(_in(src0), _in(src1)).mask\n"
+    "vec4 _MIN(vec4 src0, vec4 src1)\n"
+    "{\n"
+    "  return min(src0, src1);\n"
+    "}\n"
+    "\n"
+    "#define MAX(dest, mask, src0, src1) dest.mask = _MAX(_in(src0), _in(src1)).mask\n"
+    "vec4 _MAX(vec4 src0, vec4 src1)\n"
+    "{\n"
+    "  return max(src0, src1);\n"
+    "}\n"
+    "\n"
+    "#define SLT(dest, mask, src0, src1) dest.mask = _SLT(_in(src0), _in(src1)).mask\n"
+    "vec4 _SLT(vec4 src0, vec4 src1)\n"
+    "{\n"
+    "  return vec4(lessThan(src0, src1));\n"
+    "}\n"
+    "\n"
+    "#define ARL(dest, src) dest = _ARL(_in(src).x)\n"
+    "int _ARL(float src)\n"
+    "{\n"
+    "  return int(floor(src));\n"
+    "}\n"
+    "\n"
+    "#define SGE(dest, mask, src0, src1) dest.mask = _SGE(_in(src0), _in(src1)).mask\n"
+    "vec4 _SGE(vec4 src0, vec4 src1)\n"
+    "{\n"
+    "  return vec4(greaterThanEqual(src0, src1));\n"
+    "}\n"
+    "\n"
+    "#define RCP(dest, mask, src) dest.mask = _RCP(_in(src).x).mask\n"
+    "vec4 _RCP(float src)\n"
+    "{\n"
+    "  return vec4(1.0 / src);\n"
+    "}\n"
+    "\n"
+    "#define RCC(dest, mask, src) dest.mask = _RCC(_in(src).x).mask\n"
+    "vec4 _RCC(float src)\n"
+    "{\n"
+    "  float t = 1.0 / src;\n"
+    "  if (t > 0.0) {\n"
+    "    t = clamp(t, 5.42101e-020, 1.884467e+019);\n"
+    "  } else {\n"
+    "    t = clamp(t, -1.884467e+019, -5.42101e-020);\n"
+    "  }\n"
+    "  return vec4(t);\n"
+    "}\n"
+    "\n"
+    "#define RSQ(dest, mask, src) dest.mask = _RSQ(_in(src).x).mask\n"
+    "vec4 _RSQ(float src)\n"
+    "{\n"
+    "  if (src == 0.0) { return vec4(INFINITY); }\n"
+    "  if (isinf(src)) { return vec4(0.0); }\n"
+    "  return vec4(inversesqrt(abs(src)));\n"
+    "}\n"
+    "\n"
+    "#define EXP(dest, mask, src) dest.mask = _EXP(_in(src).x).mask\n"
+    "vec4 _EXP(float src)\n"
+    "{\n"
+    "  return vec4(exp2(src));\n"
+    "}\n"
+    "\n"
+    "#define LOG(dest, mask, src) dest.mask = _LOG(_in(src).x).mask\n"
+    "vec4 _LOG(float src)\n"
+    "{\n"
+    "  return vec4(log2(src));\n"
+    "}\n"
+    "\n"
+    "#define LIT(dest, mask, src) dest.mask = _LIT(_in(src)).mask\n"
+    "vec4 _LIT(vec4 src)\n"
+    "{\n"
+    "  vec4 s = src;\n"
+    "  float epsilon = 1.0 / 256.0;\n"
+    "  s.w = clamp(s.w, -(128.0 - epsilon), 128.0 - epsilon);\n"
+    "  s.x = max(s.x, 0.0);\n"
+    "  s.y = max(s.y, 0.0);\n"
+    "  vec4 t = vec4(1.0, 0.0, 0.0, 1.0);\n"
+    "  t.y = s.x;\n"
+#if 1
+    "  t.z = (s.x > 0.0) ? exp2(s.w * log2(s.y)) : 0.0;\n"
+#else
+    "  t.z = (s.x > 0.0) ? pow(s.y, s.w) : 0.0;\n"
+#endif
+    "  return t;\n"
+    "}\n";
+
+void vsh_translate(uint16_t version,
+                   const uint32_t *tokens,
+                   unsigned int length,
+                   bool z_perspective,
+                   QString *header, QString *body)
+{
+
+    qstring_append(header, vsh_header);
+
+    bool has_final = false;
+    int slot;
+    for (slot=0; slot < length; slot++) {
+        const uint32_t* cur_token = &tokens[slot * VSH_TOKEN_SIZE];
+        QString *token_str = decode_token(cur_token);
+        qstring_append_fmt(body,
+                           "  /* Slot %d: 0x%08X 0x%08X 0x%08X 0x%08X */",
+                           slot,
+                           cur_token[0],cur_token[1],cur_token[2],cur_token[3]);
+        qstring_append(body, "\n");
+        qstring_append(body, qstring_get_str(token_str));
+        qstring_append(body, "\n");
+        QDECREF(token_str);
+
+        if (vsh_get_field(cur_token, FLD_FINAL)) {
+            has_final = true;
+            break;
+        }
+    }
+    assert(has_final);
+
+    /* pre-divide and output the generated W so we can do persepctive correct
+     * interpolation manually. OpenGL can't, since we give it a W of 1 to work
+     * around the perspective divide */
+    qstring_append(body,
+        "  if (oPos.w == 0.0 || isinf(oPos.w)) {\n"
+        "    vtx.inv_w = 1.0;\n"
+        "  } else {\n"
+        "    vtx.inv_w = 1.0 / oPos.w;\n"
+        "  }\n"
+    );
+
+    qstring_append(body,
+        /* the shaders leave the result in screen space, while
+         * opengl expects it in clip space.
+         * TODO: the pixel-center co-ordinate differences should handled
+         */
+        "  oPos.x = 2.0 * (oPos.x - surfaceSize.x * 0.5) / surfaceSize.x;\n"
+        "  oPos.y = -2.0 * (oPos.y - surfaceSize.y * 0.5) / surfaceSize.y;\n"
+    );
+    if (z_perspective) {
+        qstring_append(body, "  oPos.z = oPos.w;\n");
+    }
+    qstring_append(body,
+        /* Map the clip range into clip space so z is clipped correctly.
+         * Note this makes the values in the depth buffer wrong. This should be
+         * handled with gl_ClipDistance instead, but that has performance issues
+         * on OS X.
+         */
+        "  if (clipRange.y != clipRange.x) {\n"
+        "    oPos.z = (oPos.z - 0.5 * (clipRange.x + clipRange.y)) / (0.5 * (clipRange.y - clipRange.x));\n"
+        "  }\n"
+
+        /* Correct for the perspective divide */
+        "  if (oPos.w < 0.0) {\n"
+            /* undo the perspective divide in the case where the point would be
+             * clipped so opengl can clip it correctly */
+        "    oPos.xyz *= oPos.w;\n"
+        "  } else {\n"
+            /* we don't want the OpenGL perspective divide to happen, but we
+             * can't multiply by W because it could be meaningless here */
+        "    oPos.w = 1.0;\n"
+        "  }\n"
+    );
+
+}
+
diff --git a/hw/xbox/nv2a_vsh.h b/hw/xbox/nv2a_vsh.h
new file mode 100644
index 0000000000..f3a9e662d2
--- /dev/null
+++ b/hw/xbox/nv2a_vsh.h
@@ -0,0 +1,142 @@
+/*
+ * QEMU Geforce NV2A vertex shader translation
+ *
+ * Copyright (c) 2012 espes
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#ifndef HW_NV2A_VSH_H
+#define HW_NV2A_VSH_H
+
+#include <stdbool.h>
+#include "qapi/qmp/qstring.h"
+
+enum VshLight {
+    LIGHT_OFF,
+    LIGHT_INFINITE,
+    LIGHT_LOCAL,
+    LIGHT_SPOT
+};
+
+enum VshTexgen {
+    TEXGEN_DISABLE,
+    TEXGEN_EYE_LINEAR,
+    TEXGEN_OBJECT_LINEAR,
+    TEXGEN_SPHERE_MAP,
+    TEXGEN_NORMAL_MAP,
+    TEXGEN_REFLECTION_MAP,
+};
+
+enum VshFogMode {
+    FOG_MODE_LINEAR,
+    FOG_MODE_EXP,
+    FOG_MODE_ERROR2, /* Doesn't exist */
+    FOG_MODE_EXP2,
+    FOG_MODE_LINEAR_ABS,
+    FOG_MODE_EXP_ABS,
+    FOG_MODE_ERROR6, /* Doesn't exist */
+    FOG_MODE_EXP2_ABS
+};
+
+enum VshFoggen {
+    FOGGEN_SPEC_ALPHA,
+    FOGGEN_RADIAL,
+    FOGGEN_PLANAR,
+    FOGGEN_ABS_PLANAR,
+    FOGGEN_ERROR4,
+    FOGGEN_ERROR5,
+    FOGGEN_FOG_X
+};
+
+enum VshSkinning {
+    SKINNING_OFF,
+    SKINNING_1WEIGHTS,
+    SKINNING_2WEIGHTS,
+    SKINNING_3WEIGHTS,
+    SKINNING_2WEIGHTS2MATRICES,
+    SKINNING_3WEIGHTS3MATRICES,
+    SKINNING_4WEIGHTS4MATRICES,
+};
+
+// vs.1.1, not an official value
+#define VSH_VERSION_VS                     0xF078
+
+// Xbox vertex shader
+#define VSH_VERSION_XVS                    0x2078
+
+// Xbox vertex state shader
+#define VSH_VERSION_XVSS                   0x7378
+
+// Xbox vertex read/write shader
+#define VSH_VERSION_XVSW                   0x7778
+
+#define VSH_TOKEN_SIZE 4
+
+typedef enum {
+    FLD_ILU = 0,
+    FLD_MAC,
+    FLD_CONST,
+    FLD_V,
+    // Input A
+    FLD_A_NEG,
+    FLD_A_SWZ_X,
+    FLD_A_SWZ_Y,
+    FLD_A_SWZ_Z,
+    FLD_A_SWZ_W,
+    FLD_A_R,
+    FLD_A_MUX,
+    // Input B
+    FLD_B_NEG,
+    FLD_B_SWZ_X,
+    FLD_B_SWZ_Y,
+    FLD_B_SWZ_Z,
+    FLD_B_SWZ_W,
+    FLD_B_R,
+    FLD_B_MUX,
+    // Input C
+    FLD_C_NEG,
+    FLD_C_SWZ_X,
+    FLD_C_SWZ_Y,
+    FLD_C_SWZ_Z,
+    FLD_C_SWZ_W,
+    FLD_C_R_HIGH,
+    FLD_C_R_LOW,
+    FLD_C_MUX,
+    // Output
+    FLD_OUT_MAC_MASK,
+    FLD_OUT_R,
+    FLD_OUT_ILU_MASK,
+    FLD_OUT_O_MASK,
+    FLD_OUT_ORB,
+    FLD_OUT_ADDRESS,
+    FLD_OUT_MUX,
+    // Relative addressing
+    FLD_A0X,
+    // Final instruction
+    FLD_FINAL
+} VshFieldName;
+
+uint8_t vsh_get_field(const uint32_t *shader_token, VshFieldName field_name);
+
+void vsh_translate(uint16_t version,
+                   const uint32_t *tokens,
+                   unsigned int length,
+                   bool z_perspective,
+                   QString *header, QString *body);
+
+
+#endif
diff --git a/hw/xbox/nvnet.c b/hw/xbox/nvnet.c
new file mode 100644
index 0000000000..ce5de502da
--- /dev/null
+++ b/hw/xbox/nvnet.c
@@ -0,0 +1,1033 @@
+/*
+ * QEMU nForce Ethernet Controller implementation
+ *
+ * Copyright (c) 2013 espes
+ * Copyright (c) 2015 Matt Borgerson
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include "hw/hw.h"
+#include "hw/i386/pc.h"
+#include "hw/pci/pci.h"
+#include "net/net.h"
+#include "qemu/iov.h"
+
+#define IOPORT_SIZE 0x8
+#define MMIO_SIZE   0x400
+
+#ifdef DEBUG
+#   define NVNET_DPRINTF(format, ...) printf(format, ## __VA_ARGS__)
+#   define NVNET_DUMP_PACKETS_TO_SCREEN
+#else
+#   define NVNET_DPRINTF(format, ...) do { } while (0)
+#endif
+
+static NetClientInfo net_nvnet_info;
+static Property nvnet_properties[];
+
+/*******************************************************************************
+ * Various Device Register Definitions (Derived from forcedeth.c)
+ ******************************************************************************/
+
+#define DEV_NEED_LASTPACKET1 0x0001
+#define DEV_IRQMASK_1        0x0002
+#define DEV_IRQMASK_2        0x0004
+#define DEV_NEED_TIMERIRQ    0x0008
+
+enum {
+    NvRegIrqStatus = 0x000,
+#       define NVREG_IRQSTAT_BIT1     0x002
+#       define NVREG_IRQSTAT_BIT4     0x010
+#       define NVREG_IRQSTAT_MIIEVENT 0x040
+#       define NVREG_IRQSTAT_MASK     0x1ff
+    NvRegIrqMask = 0x004,
+#       define NVREG_IRQ_RX           0x0002
+#       define NVREG_IRQ_RX_NOBUF     0x0004
+#       define NVREG_IRQ_TX_ERR       0x0008
+#       define NVREG_IRQ_TX2          0x0010
+#       define NVREG_IRQ_TIMER        0x0020
+#       define NVREG_IRQ_LINK         0x0040
+#       define NVREG_IRQ_TX1          0x0100
+#       define NVREG_IRQMASK_WANTED_1 0x005f
+#       define NVREG_IRQMASK_WANTED_2 0x0147
+#       define NVREG_IRQ_UNKNOWN      (~(NVREG_IRQ_RX|NVREG_IRQ_RX_NOBUF|\
+    NVREG_IRQ_TX_ERR|NVREG_IRQ_TX2|NVREG_IRQ_TIMER|NVREG_IRQ_LINK|\
+    NVREG_IRQ_TX1))
+    NvRegUnknownSetupReg6 = 0x008,
+#       define NVREG_UNKSETUP6_VAL 3
+/*
+ * NVREG_POLL_DEFAULT is the interval length of the timer source on the nic
+ * NVREG_POLL_DEFAULT=97 would result in an interval length of 1 ms
+ */
+    NvRegPollingInterval = 0x00c,
+#       define NVREG_POLL_DEFAULT 970
+    NvRegMisc1 = 0x080,
+#       define NVREG_MISC1_HD    0x02
+#       define NVREG_MISC1_FORCE 0x3b0f3c
+    NvRegTransmitterControl = 0x084,
+#       define NVREG_XMITCTL_START 0x01
+    NvRegTransmitterStatus = 0x088,
+#       define NVREG_XMITSTAT_BUSY 0x01
+    NvRegPacketFilterFlags = 0x8c,
+#       define NVREG_PFF_ALWAYS  0x7F0008
+#       define NVREG_PFF_PROMISC 0x80
+#       define NVREG_PFF_MYADDR  0x20
+    NvRegOffloadConfig = 0x90,
+#       define NVREG_OFFLOAD_HOMEPHY 0x601
+#       define NVREG_OFFLOAD_NORMAL  0x5ee
+    NvRegReceiverControl = 0x094,
+#       define NVREG_RCVCTL_START 0x01
+    NvRegReceiverStatus = 0x98,
+#       define NVREG_RCVSTAT_BUSY  0x01
+    NvRegRandomSeed = 0x9c,
+#       define NVREG_RNDSEED_MASK  0x00ff
+#       define NVREG_RNDSEED_FORCE 0x7f00
+    NvRegUnknownSetupReg1 = 0xA0,
+#       define NVREG_UNKSETUP1_VAL 0x16070f
+    NvRegUnknownSetupReg2 = 0xA4,
+#       define NVREG_UNKSETUP2_VAL 0x16
+    NvRegMacAddrA = 0xA8,
+    NvRegMacAddrB = 0xAC,
+    NvRegMulticastAddrA = 0xB0,
+#       define NVREG_MCASTADDRA_FORCE  0x01
+    NvRegMulticastAddrB = 0xB4,
+    NvRegMulticastMaskA = 0xB8,
+    NvRegMulticastMaskB = 0xBC,
+    NvRegTxRingPhysAddr = 0x100,
+    NvRegRxRingPhysAddr = 0x104,
+    NvRegRingSizes = 0x108,
+#       define NVREG_RINGSZ_TXSHIFT 0
+#       define NVREG_RINGSZ_RXSHIFT 16
+    NvRegUnknownTransmitterReg = 0x10c,
+    NvRegLinkSpeed = 0x110,
+#       define NVREG_LINKSPEED_FORCE 0x10000
+#       define NVREG_LINKSPEED_10    10
+#       define NVREG_LINKSPEED_100   100
+#       define NVREG_LINKSPEED_1000  1000
+    NvRegUnknownSetupReg5 = 0x130,
+#       define NVREG_UNKSETUP5_BIT31 (1<<31)
+    NvRegUnknownSetupReg3 = 0x134,
+#       define NVREG_UNKSETUP3_VAL1 0x200010
+    NvRegUnknownSetupReg8 = 0x13C,
+#       define NVREG_UNKSETUP8_VAL1 0x300010
+    NvRegUnknownSetupReg7 = 0x140,
+#       define NVREG_UNKSETUP7_VAL 0x300010
+    NvRegTxRxControl = 0x144,
+#       define NVREG_TXRXCTL_KICK  0x0001
+#       define NVREG_TXRXCTL_BIT1  0x0002
+#       define NVREG_TXRXCTL_BIT2  0x0004
+#       define NVREG_TXRXCTL_IDLE  0x0008
+#       define NVREG_TXRXCTL_RESET 0x0010
+    NvRegMIIStatus = 0x180,
+#       define NVREG_MIISTAT_ERROR      0x0001
+#       define NVREG_MIISTAT_LINKCHANGE 0x0008
+#       define NVREG_MIISTAT_MASK       0x000f
+#       define NVREG_MIISTAT_MASK2      0x000f
+    NvRegUnknownSetupReg4 = 0x184,
+#       define NVREG_UNKSETUP4_VAL 8
+    NvRegAdapterControl = 0x188,
+#       define NVREG_ADAPTCTL_START    0x02
+#       define NVREG_ADAPTCTL_LINKUP   0x04
+#       define NVREG_ADAPTCTL_PHYVALID 0x4000
+#       define NVREG_ADAPTCTL_RUNNING  0x100000
+#       define NVREG_ADAPTCTL_PHYSHIFT 24
+    NvRegMIISpeed = 0x18c,
+#       define NVREG_MIISPEED_BIT8 (1<<8)
+#       define NVREG_MIIDELAY  5
+    NvRegMIIControl = 0x190,
+#       define NVREG_MIICTL_INUSE 0x10000
+#       define NVREG_MIICTL_WRITE 0x08000
+#       define NVREG_MIICTL_ADDRSHIFT  5
+    NvRegMIIData = 0x194,
+    NvRegWakeUpFlags = 0x200,
+#       define NVREG_WAKEUPFLAGS_VAL               0x7770
+#       define NVREG_WAKEUPFLAGS_BUSYSHIFT         24
+#       define NVREG_WAKEUPFLAGS_ENABLESHIFT       16
+#       define NVREG_WAKEUPFLAGS_D3SHIFT           12
+#       define NVREG_WAKEUPFLAGS_D2SHIFT           8
+#       define NVREG_WAKEUPFLAGS_D1SHIFT           4
+#       define NVREG_WAKEUPFLAGS_D0SHIFT           0
+#       define NVREG_WAKEUPFLAGS_ACCEPT_MAGPAT     0x01
+#       define NVREG_WAKEUPFLAGS_ACCEPT_WAKEUPPAT  0x02
+#       define NVREG_WAKEUPFLAGS_ACCEPT_LINKCHANGE 0x04
+    NvRegPatternCRC = 0x204,
+    NvRegPatternMask = 0x208,
+    NvRegPowerCap = 0x268,
+#       define NVREG_POWERCAP_D3SUPP (1<<30)
+#       define NVREG_POWERCAP_D2SUPP (1<<26)
+#       define NVREG_POWERCAP_D1SUPP (1<<25)
+    NvRegPowerState = 0x26c,
+#       define NVREG_POWERSTATE_POWEREDUP 0x8000
+#       define NVREG_POWERSTATE_VALID     0x0100
+#       define NVREG_POWERSTATE_MASK      0x0003
+#       define NVREG_POWERSTATE_D0        0x0000
+#       define NVREG_POWERSTATE_D1        0x0001
+#       define NVREG_POWERSTATE_D2        0x0002
+#       define NVREG_POWERSTATE_D3        0x0003
+};
+
+#define NV_TX_LASTPACKET      (1<<0)
+#define NV_TX_RETRYERROR      (1<<3)
+#define NV_TX_LASTPACKET1     (1<<8)
+#define NV_TX_DEFERRED        (1<<10)
+#define NV_TX_CARRIERLOST     (1<<11)
+#define NV_TX_LATECOLLISION   (1<<12)
+#define NV_TX_UNDERFLOW       (1<<13)
+#define NV_TX_ERROR           (1<<14)
+#define NV_TX_VALID           (1<<15)
+#define NV_RX_DESCRIPTORVALID (1<<0)
+#define NV_RX_MISSEDFRAME     (1<<1)
+#define NV_RX_SUBSTRACT1      (1<<3)
+#define NV_RX_BIT4            (1<<4)
+#define NV_RX_ERROR1          (1<<7)
+#define NV_RX_ERROR2          (1<<8)
+#define NV_RX_ERROR3          (1<<9)
+#define NV_RX_ERROR4          (1<<10)
+#define NV_RX_CRCERR          (1<<11)
+#define NV_RX_OVERFLOW        (1<<12)
+#define NV_RX_FRAMINGERR      (1<<13)
+#define NV_RX_ERROR           (1<<14)
+#define NV_RX_AVAIL           (1<<15)
+
+/* Miscelaneous hardware related defines: */
+#define NV_PCI_REGSZ          0x270
+
+/* various timeout delays: all in usec */
+#define NV_TXRX_RESET_DELAY   4
+#define NV_TXSTOP_DELAY1      10
+#define NV_TXSTOP_DELAY1MAX   500000
+#define NV_TXSTOP_DELAY2      100
+#define NV_RXSTOP_DELAY1      10
+#define NV_RXSTOP_DELAY1MAX   500000
+#define NV_RXSTOP_DELAY2      100
+#define NV_SETUP5_DELAY       5
+#define NV_SETUP5_DELAYMAX    50000
+#define NV_POWERUP_DELAY      5
+#define NV_POWERUP_DELAYMAX   5000
+#define NV_MIIBUSY_DELAY      50
+#define NV_MIIPHY_DELAY       10
+#define NV_MIIPHY_DELAYMAX    10000
+#define NV_WAKEUPPATTERNS     5
+#define NV_WAKEUPMASKENTRIES  4
+
+/* General driver defaults */
+#define NV_WATCHDOG_TIMEO     (2*HZ)
+#define DEFAULT_MTU           1500
+
+#define RX_RING               4
+#define TX_RING               2
+/* limited to 1 packet until we understand NV_TX_LASTPACKET */
+#define TX_LIMIT_STOP         10
+#define TX_LIMIT_START        5
+
+/* rx/tx mac addr + type + vlan + align + slack*/
+#define RX_NIC_BUFSIZE        (DEFAULT_MTU + 64)
+/* even more slack */
+#define RX_ALLOC_BUFSIZE      (DEFAULT_MTU + 128)
+
+#define OOM_REFILL            (1+HZ/20)
+#define POLL_WAIT             (1+HZ/100)
+
+#define MII_READ      (-1)
+#define MII_PHYSID1   0x02    /* PHYS ID 1                   */
+#define MII_PHYSID2   0x03    /* PHYS ID 2                   */
+#define MII_BMCR      0x00    /* Basic mode control register */
+#define MII_BMSR      0x01    /* Basic mode status register  */
+#define MII_ADVERTISE 0x04    /* Advertisement control reg   */
+#define MII_LPA       0x05    /* Link partner ability reg    */
+
+#define BMSR_ANEGCOMPLETE 0x0020 /* Auto-negotiation complete   */
+#define BMSR_BIT2         0x0004 /* Unknown... */
+
+/* Link partner ability register. */
+#define LPA_SLCT     0x001f  /* Same as advertise selector  */
+#define LPA_10HALF   0x0020  /* Can do 10mbps half-duplex   */
+#define LPA_10FULL   0x0040  /* Can do 10mbps full-duplex   */
+#define LPA_100HALF  0x0080  /* Can do 100mbps half-duplex  */
+#define LPA_100FULL  0x0100  /* Can do 100mbps full-duplex  */
+#define LPA_100BASE4 0x0200  /* Can do 100mbps 4k packets   */
+#define LPA_RESV     0x1c00  /* Unused...                   */
+#define LPA_RFAULT   0x2000  /* Link partner faulted        */
+#define LPA_LPACK    0x4000  /* Link partner acked us       */
+#define LPA_NPAGE    0x8000  /* Next page bit               */
+
+/*******************************************************************************
+ * Primary State Structure
+ ******************************************************************************/
+
+typedef struct NvNetState {
+    PCIDevice    dev;
+    NICState     *nic;
+    NICConf      conf;
+    MemoryRegion mmio, io;
+    uint8_t      regs[MMIO_SIZE/4];
+    uint32_t     phy_regs[6];
+    uint8_t      tx_ring_index;
+    uint8_t      tx_ring_size;
+    uint8_t      rx_ring_index;
+    uint8_t      rx_ring_size;
+    uint8_t      txrx_dma_buf[RX_ALLOC_BUFSIZE];
+    FILE         *packet_dump_file;
+    char         *packet_dump_path;
+} NvNetState;
+
+struct RingDesc {
+    uint32_t packet_buffer;
+    uint16_t length;
+    uint16_t flags;
+};
+
+/*******************************************************************************
+ * Helper Macros
+ ******************************************************************************/
+
+#define NVNET_DEVICE(obj) \
+    OBJECT_CHECK(NvNetState, (obj), "nvnet")
+
+/*******************************************************************************
+ * Prototypes
+ ******************************************************************************/
+
+/* Init */
+static int nvnet_initfn(PCIDevice *dev);
+static void nvnet_uninit(PCIDevice *dev);
+static void nvnet_class_init(ObjectClass *klass, void *data);
+static void nvnet_cleanup(NetClientState *nc);
+static void nvnet_reset(void *opaque);
+static void qdev_nvnet_reset(DeviceState *dev);
+static void nvnet_class_init(ObjectClass *klass, void *data);
+static void nvnet_register(void);
+
+/* MMIO / IO / Phy / Device Register Access */
+static uint64_t nvnet_mmio_read(void *opaque,
+    hwaddr addr, unsigned int size);
+static void nvnet_mmio_write(void *opaque,
+    hwaddr addr, uint64_t val, unsigned int size);
+static uint32_t nvnet_get_reg(NvNetState *s,
+    hwaddr addr, unsigned int size);
+static void nvnet_set_reg(NvNetState *s,
+    hwaddr addr, uint32_t val, unsigned int size);
+static uint64_t nvnet_io_read(void *opaque,
+    hwaddr addr, unsigned int size);
+static void nvnet_io_write(void *opaque,
+    hwaddr addr, uint64_t val, unsigned int size);
+static int nvnet_mii_rw(NvNetState *s,
+    uint64_t val);
+
+/* Link State */
+static void nvnet_link_down(NvNetState *s);
+static void nvnet_link_up(NvNetState *s);
+static void nvnet_set_link_status(NetClientState *nc);
+
+/* Interrupts */
+static void nvnet_update_irq(NvNetState *s);
+
+/* Packet Tx/Rx */
+static void nvnet_send_packet(NvNetState *s,
+    const uint8_t *buf, int size);
+static ssize_t nvnet_dma_packet_to_guest(NvNetState *s,
+    const uint8_t *buf, size_t size);
+static ssize_t nvnet_dma_packet_from_guest(NvNetState *s);
+static int nvnet_can_receive(NetClientState *nc);
+static ssize_t nvnet_receive(NetClientState *nc,
+    const uint8_t *buf, size_t size);
+static ssize_t nvnet_receive_iov(NetClientState *nc,
+    const struct iovec *iov, int iovcnt);
+
+/* Utility Functions */
+static void nvnet_hex_dump(NvNetState *s, const uint8_t *buf, int size);
+
+#ifdef DEBUG
+static const char *nvnet_get_reg_name(hwaddr addr);
+static const char *nvnet_get_mii_reg_name(uint8_t reg);
+#endif
+
+/*******************************************************************************
+ * IRQ
+ ******************************************************************************/
+
+/*
+ * Update IRQ status
+ */
+static void nvnet_update_irq(NvNetState *s)
+{
+    if (nvnet_get_reg(s, NvRegIrqMask,   4) &&
+        nvnet_get_reg(s, NvRegIrqStatus, 4)) {
+        NVNET_DPRINTF("Asserting IRQ\n");
+        pci_irq_assert(&s->dev);
+    } else {
+        pci_irq_deassert(&s->dev);
+    }
+}
+
+/*******************************************************************************
+ * Register Control
+ ******************************************************************************/
+
+/*
+ * Read backing store for a device register.
+ */
+static uint32_t nvnet_get_reg(NvNetState *s, hwaddr addr, unsigned int size)
+{
+    switch (size) {
+    case 4:
+        assert((addr & 3) == 0); /* Unaligned register access. */
+        return ((uint32_t *)s->regs)[addr>>2];
+
+    case 2:
+        assert((addr & 1) == 0); /* Unaligned register access. */
+        return ((uint16_t *)s->regs)[addr>>1];
+
+    case 1:
+        return s->regs[addr];
+
+    default:
+        assert(0); /* Unsupported register access. */
+    }
+}
+
+/*
+ * Write backing store for a device register.
+ */
+static void nvnet_set_reg(NvNetState *s,
+                          hwaddr addr, uint32_t val, unsigned int size)
+{
+    switch (size) {
+    case 4:
+        assert((addr & 3) == 0); /* Unaligned register access. */
+        ((uint32_t *)s->regs)[addr>>2] = val;
+        break;
+
+    case 2:
+        assert((addr & 1) == 0); /* Unaligned register access. */
+        ((uint16_t *)s->regs)[addr>>1] = (uint16_t)val;
+        break;
+
+    case 1:
+        s->regs[addr] = (uint8_t)val;
+        break;
+
+    default:
+        assert(0); /* Unsupported register access. */
+    }
+}
+
+/*******************************************************************************
+ * PHY Control
+ ******************************************************************************/
+
+/*
+ * Read from PHY.
+ */
+static int nvnet_mii_rw(NvNetState *s, uint64_t val)
+{
+    uint32_t mii_ctl;
+    int write, retval, phy_addr, reg;
+
+    retval   = 0;
+    mii_ctl  = nvnet_get_reg(s, NvRegMIIControl, 4);
+    phy_addr = (mii_ctl >> NVREG_MIICTL_ADDRSHIFT) & 0x1f;
+    reg      = mii_ctl & ((1 << NVREG_MIICTL_ADDRSHIFT) - 1);
+    write    = mii_ctl & NVREG_MIICTL_WRITE;
+
+    NVNET_DPRINTF("nvnet mii %s: phy 0x%x %s [0x%x]\n",
+        write ? "write" : "read", phy_addr, nvnet_get_mii_reg_name(reg), reg);
+
+    if (phy_addr != 1) {
+        return -1;
+    }
+
+    if (write) {
+        return retval;
+    }
+
+    switch (reg) {
+    case MII_BMSR:
+        /* Phy initialization code waits for BIT2 to be set.. If not set,
+         * software may report controller as not running */
+        retval = BMSR_ANEGCOMPLETE | BMSR_BIT2;
+        break;
+
+    case MII_ADVERTISE:
+        /* Fall through... */
+
+    case MII_LPA:
+        retval = LPA_10HALF | LPA_10FULL;
+        retval |= LPA_100HALF | LPA_100FULL | LPA_100BASE4;
+        break;
+
+    default:
+        break;
+    }
+
+    return retval;
+}
+
+/*******************************************************************************
+ * MMIO Read/Write
+ ******************************************************************************/
+
+/*
+ * Handler for guest reads from MMIO ranges owned by this device.
+ */
+static uint64_t nvnet_mmio_read(void *opaque, hwaddr addr, unsigned int size)
+{
+    NvNetState *s;
+    uint64_t retval;
+
+    s = NVNET_DEVICE(opaque);
+
+    switch (addr) {
+    case NvRegMIIData:
+        assert(size == 4);
+        retval = nvnet_mii_rw(s, MII_READ);
+        break;
+
+    case NvRegMIIControl:
+        retval = nvnet_get_reg(s, addr, size);
+        retval &= ~NVREG_MIICTL_INUSE;
+        break;
+
+    case NvRegMIIStatus:
+        retval = 0;
+        break;
+
+    default:
+        retval = nvnet_get_reg(s, addr, size);
+        break;
+    }
+
+    NVNET_DPRINTF("nvnet mmio: read %s [0x%llx] <- 0x%llx\n",
+        nvnet_get_reg_name(addr & ~3), addr, retval);
+
+    return retval;
+}
+
+/*
+ * Handler for guest writes to MMIO ranges owned by this device.
+ */
+static void nvnet_mmio_write(void *opaque, hwaddr addr,
+                             uint64_t val, unsigned int size)
+{
+    NvNetState *s;
+    uint32_t temp;
+
+    s = NVNET_DEVICE(opaque);
+
+    NVNET_DPRINTF("nvnet mmio: write %s [0x%llx] = 0x%llx\n",
+        nvnet_get_reg_name(addr & ~3), addr, val);
+
+    switch (addr) {
+    case NvRegRingSizes:
+        nvnet_set_reg(s, addr, val, size);
+        s->rx_ring_size = ((val >> NVREG_RINGSZ_RXSHIFT) & 0xffff)+1;
+        s->tx_ring_size = ((val >> NVREG_RINGSZ_TXSHIFT) & 0xffff)+1;
+        break;
+
+    case NvRegMIIData:
+        nvnet_mii_rw(s, val);
+        break;
+
+    case NvRegTxRxControl:
+        if (val == NVREG_TXRXCTL_KICK) {
+            NVNET_DPRINTF("NvRegTxRxControl = NVREG_TXRXCTL_KICK!\n");
+            nvnet_dma_packet_from_guest(s);
+        }
+
+        if (val & NVREG_TXRXCTL_BIT2) {
+            nvnet_set_reg(s, NvRegTxRxControl, NVREG_TXRXCTL_IDLE, 4);
+            break;
+        }
+
+        if (val & NVREG_TXRXCTL_BIT1) {
+            nvnet_set_reg(s, NvRegIrqStatus, 0, 4);
+            break;
+        } else if (val == 0) {
+            temp = nvnet_get_reg(s, NvRegUnknownSetupReg3, 4);
+            if (temp == NVREG_UNKSETUP3_VAL1) {
+                /* forcedeth waits for this bit to be set... */
+                nvnet_set_reg(s, NvRegUnknownSetupReg5,
+                                 NVREG_UNKSETUP5_BIT31, 4);
+                break;
+            }
+        }
+
+        nvnet_set_reg(s, NvRegTxRxControl, val, size);
+        break;
+
+    case NvRegIrqMask:
+        nvnet_set_reg(s, addr, val, size);
+        nvnet_update_irq(s);
+        break;
+
+    case NvRegIrqStatus:
+        nvnet_set_reg(s, addr, nvnet_get_reg(s, addr, size) & ~val, size);
+        nvnet_update_irq(s);
+        break;
+
+    default:
+        nvnet_set_reg(s, addr, val, size);
+        break;
+    }
+}
+
+static const MemoryRegionOps nvnet_mmio_ops = {
+    .read = nvnet_mmio_read,
+    .write = nvnet_mmio_write,
+};
+
+/*******************************************************************************
+ * Packet TX/RX
+ ******************************************************************************/
+
+static void nvnet_send_packet(NvNetState *s, const uint8_t *buf, int size)
+{
+    NetClientState *nc = qemu_get_queue(s->nic);
+
+    NVNET_DPRINTF("nvnet: Sending packet!\n");
+    nvnet_hex_dump(s, buf, size);
+    qemu_send_packet(nc, buf, size);
+}
+
+static int nvnet_can_receive(NetClientState *nc)
+{
+    NVNET_DPRINTF("nvnet_can_receive called\n");
+    return 1;
+}
+
+static ssize_t nvnet_receive(NetClientState *nc,
+                             const uint8_t *buf, size_t size)
+{
+    const struct iovec iov = {
+        .iov_base = (uint8_t *)buf,
+        .iov_len = size
+    };
+
+    NVNET_DPRINTF("nvnet_receive called\n");
+    return nvnet_receive_iov(nc, &iov, 1);
+}
+
+static ssize_t nvnet_receive_iov(NetClientState *nc,
+                                 const struct iovec *iov, int iovcnt)
+{
+    NvNetState *s = qemu_get_nic_opaque(nc);
+    size_t size = iov_size(iov, iovcnt);
+
+    NVNET_DPRINTF("nvnet: Packet received!\n");
+
+    if (size > sizeof(s->txrx_dma_buf)) {
+        NVNET_DPRINTF("nvnet_receive_iov packet too large!\n");
+        assert(0);
+        return -1;
+    }
+
+    iov_to_buf(iov, iovcnt, 0, s->txrx_dma_buf, size);
+    nvnet_hex_dump(s, s->txrx_dma_buf, size);
+    return nvnet_dma_packet_to_guest(s, s->txrx_dma_buf, size);
+}
+
+static ssize_t nvnet_dma_packet_to_guest(NvNetState *s,
+                                         const uint8_t *buf, size_t size)
+{
+    struct RingDesc desc;
+    int i;
+
+    for (i = 0; i < s->rx_ring_size; i++) {
+        /* Read current ring descriptor */
+        s->rx_ring_index %= s->rx_ring_size;
+        dma_addr_t rx_ring_addr = nvnet_get_reg(s, NvRegRxRingPhysAddr, 4);
+        rx_ring_addr += s->rx_ring_index*sizeof(desc);
+        pci_dma_read(&s->dev, rx_ring_addr, &desc, sizeof(desc));
+        NVNET_DPRINTF("Looking at ring descriptor %d (0x%llx): ",
+                      s->rx_ring_index, rx_ring_addr);
+        NVNET_DPRINTF("Buffer: 0x%x, ", desc.packet_buffer);
+        NVNET_DPRINTF("Length: 0x%x, ", desc.length);
+        NVNET_DPRINTF("Flags: 0x%x\n", desc.flags);
+
+        s->rx_ring_index += 1;
+
+        if (!(desc.flags & NV_RX_AVAIL) || !(desc.length >= size)) {
+            continue;
+        }
+
+        /* Transfer packet from device to memory */
+        NVNET_DPRINTF("Transferring packet, size 0x%zx, to memory at 0x%x\n",
+                      size, desc.packet_buffer);
+        pci_dma_write(&s->dev, desc.packet_buffer, buf, size);
+
+        /* Update descriptor indicating the packet is waiting */
+        desc.length = size;
+        desc.flags  = NV_RX_BIT4 | NV_RX_DESCRIPTORVALID;
+        pci_dma_write(&s->dev, rx_ring_addr, &desc, sizeof(desc));
+        NVNET_DPRINTF("Updated ring descriptor: ");
+        NVNET_DPRINTF("Length: 0x%x, ", desc.length);
+        NVNET_DPRINTF("Flags: 0x%x\n", desc.flags);
+
+        /* Trigger interrupt */
+        NVNET_DPRINTF("Triggering interrupt\n");
+        nvnet_set_reg(s, NvRegIrqStatus, NVREG_IRQSTAT_BIT1, 4);
+        nvnet_update_irq(s);
+        return size;
+    }
+
+    /* Could not find free buffer, or packet too large. */
+    NVNET_DPRINTF("Could not find free buffer!\n");
+    return -1;
+}
+
+static ssize_t nvnet_dma_packet_from_guest(NvNetState *s)
+{
+    struct RingDesc desc;
+    bool is_last_packet;
+    int i;
+
+    for (i = 0; i < s->tx_ring_size; i++) {
+        /* Read ring descriptor */
+        s->tx_ring_index %= s->tx_ring_size;
+        dma_addr_t tx_ring_addr = nvnet_get_reg(s, NvRegTxRingPhysAddr, 4);
+        tx_ring_addr += s->tx_ring_index * sizeof(desc);
+        pci_dma_read(&s->dev, tx_ring_addr, &desc, sizeof(desc));
+        NVNET_DPRINTF("Looking at ring desc %d (%llx): ",
+                      s->tx_ring_index, tx_ring_addr);
+        NVNET_DPRINTF("Buffer: 0x%x, ", desc.packet_buffer);
+        NVNET_DPRINTF("Length: 0x%x, ", desc.length);
+        NVNET_DPRINTF("Flags: 0x%x\n", desc.flags);
+
+        s->tx_ring_index += 1;
+
+        if (!(desc.flags & NV_TX_VALID)) {
+            continue;
+        }
+
+        /* Transfer packet from guest memory */
+        NVNET_DPRINTF("Sending packet...\n");
+        pci_dma_read(&s->dev, desc.packet_buffer,
+                              s->txrx_dma_buf, desc.length+1);
+        nvnet_send_packet(s, s->txrx_dma_buf, desc.length+1);
+
+        /* Update descriptor */
+        is_last_packet = desc.flags & NV_TX_LASTPACKET;
+        desc.flags &= ~(NV_TX_VALID | NV_TX_RETRYERROR | NV_TX_DEFERRED |
+            NV_TX_CARRIERLOST | NV_TX_LATECOLLISION | NV_TX_UNDERFLOW |
+            NV_TX_ERROR);
+        desc.length = desc.length+5;
+        pci_dma_write(&s->dev, tx_ring_addr, &desc, sizeof(desc));
+
+        if (is_last_packet) {
+            NVNET_DPRINTF("  -- Last packet\n");
+            break;
+        }
+    }
+
+    /* Trigger interrupt */
+    NVNET_DPRINTF("Triggering interrupt\n");
+    nvnet_set_reg(s, NvRegIrqStatus, NVREG_IRQSTAT_BIT4, 4);
+    nvnet_update_irq(s);
+
+    return 0;
+}
+
+/*******************************************************************************
+ * Link Status Control
+ ******************************************************************************/
+
+static void nvnet_link_down(NvNetState *s)
+{
+    NVNET_DPRINTF("nvnet_link_down called\n");
+}
+
+static void nvnet_link_up(NvNetState *s)
+{
+    NVNET_DPRINTF("nvnet_link_up called\n");
+}
+
+static void nvnet_set_link_status(NetClientState *nc)
+{
+    NvNetState *s = qemu_get_nic_opaque(nc);
+    if (nc->link_down) {
+        nvnet_link_down(s);
+    } else {
+        nvnet_link_up(s);
+    }
+}
+
+/*******************************************************************************
+ * IO Read/Write
+ ******************************************************************************/
+
+static uint64_t nvnet_io_read(void *opaque, hwaddr addr, unsigned int size)
+{
+    NVNET_DPRINTF("nvnet io: read [0x%llx]\n", addr);
+    return 0;
+}
+
+static void nvnet_io_write(void *opaque,
+                           hwaddr addr, uint64_t val, unsigned int size)
+{
+    NVNET_DPRINTF("nvnet io: [0x%llx] = 0x%llx\n", addr, val);
+}
+
+static const MemoryRegionOps nvnet_io_ops = {
+    .read  = nvnet_io_read,
+    .write = nvnet_io_write,
+};
+
+/*******************************************************************************
+ * Init
+ ******************************************************************************/
+
+static int nvnet_initfn(PCIDevice *pci_dev)
+{
+    DeviceState *dev = DEVICE(pci_dev);
+    NvNetState *s = NVNET_DEVICE(pci_dev);
+
+    pci_dev->config[PCI_INTERRUPT_PIN] = 0x01;
+
+    s->packet_dump_file = NULL;
+    if (s->packet_dump_path && *s->packet_dump_path != '\x00') {
+        s->packet_dump_file = fopen(s->packet_dump_path, "wb");
+        if (!s->packet_dump_file) {
+            fprintf(stderr, "Failed to open %s for writing!\n",
+                            s->packet_dump_path);
+            return -1;
+        }
+    }
+
+    memset(s->regs, 0, sizeof(s->regs));
+
+    s->rx_ring_index = 0;
+    s->rx_ring_size  = 0;
+    s->tx_ring_index = 0;
+    s->tx_ring_size  = 0;
+
+    memory_region_init_io(&s->mmio, OBJECT(dev), &nvnet_mmio_ops, s,
+        "nvnet-mmio", MMIO_SIZE);
+    pci_register_bar(&s->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->mmio);
+
+    memory_region_init_io(&s->io, OBJECT(dev), &nvnet_io_ops, s,
+        "nvnet-io", IOPORT_SIZE);
+    pci_register_bar(&s->dev, 1, PCI_BASE_ADDRESS_SPACE_IO, &s->io);
+
+    qemu_macaddr_default_if_unset(&s->conf.macaddr);
+    s->nic = qemu_new_nic(&net_nvnet_info, &s->conf,
+        object_get_typename(OBJECT(s)), dev->id, s);
+    assert(s->nic);
+
+    s->regs[NvRegMacAddrA+0x00] = s->conf.macaddr.a[0];
+    s->regs[NvRegMacAddrA+0x01] = s->conf.macaddr.a[1];
+    s->regs[NvRegMacAddrA+0x02] = s->conf.macaddr.a[2];
+    s->regs[NvRegMacAddrA+0x03] = s->conf.macaddr.a[3];
+    s->regs[NvRegMacAddrB+0x00] = s->conf.macaddr.a[4];
+    s->regs[NvRegMacAddrB+0x01] = s->conf.macaddr.a[5];
+
+    return 0;
+}
+
+static void nvnet_uninit(PCIDevice *dev)
+{
+    NvNetState *s = NVNET_DEVICE(dev);
+
+    if (s->packet_dump_file) {
+        fclose(s->packet_dump_file);
+    }
+
+    memory_region_destroy(&s->mmio);
+    memory_region_destroy(&s->io);
+    qemu_del_nic(s->nic);
+}
+
+void nvnet_cleanup(NetClientState *nc)
+{
+}
+
+static void nvnet_reset(void *opaque)
+{
+    NvNetState *s = opaque;
+
+    if (qemu_get_queue(s->nic)->link_down) {
+        nvnet_link_down(s);
+    }
+}
+
+static void qdev_nvnet_reset(DeviceState *dev)
+{
+    NvNetState *s = NVNET_DEVICE(dev);
+    nvnet_reset(s);
+}
+
+/*******************************************************************************
+ * Utility Functions
+ ******************************************************************************/
+
+static void hex_dump(FILE *f, const uint8_t *buf, int size)
+{
+    int len, i, j, c;
+
+    for (i = 0; i < size; i += 16) {
+        len = size - i;
+        if (len > 16) {
+            len = 16;
+        }
+        fprintf(f, "%08x ", i);
+        for (j = 0; j < 16; j++) {
+            if (j < len) {
+                fprintf(f, " %02x", buf[i+j]);
+            } else {
+                fprintf(f, "   ");
+            }
+        }
+        fprintf(f, " ");
+        for (j = 0; j < len; j++) {
+            c = buf[i+j];
+            if (c < ' ' || c > '~') {
+                c = '.';
+            }
+            fprintf(f, "%c", c);
+        }
+        fprintf(f, "\n");
+    }
+}
+
+static void nvnet_hex_dump(NvNetState *s, const uint8_t *buf, int size)
+{
+#ifdef NVNET_DUMP_PACKETS_TO_SCREEN
+    hex_dump(stdout, buf, size);
+#endif
+    if (s->packet_dump_file) {
+        hex_dump(s->packet_dump_file, buf, size);
+    }
+}
+
+#ifdef DEBUG
+/*
+ * Return register name given the offset of the device register.
+ */
+static const char *nvnet_get_reg_name(hwaddr addr)
+{
+    switch (addr) {
+    case NvRegIrqStatus:             return "NvRegIrqStatus";
+    case NvRegIrqMask:               return "NvRegIrqMask";
+    case NvRegUnknownSetupReg6:      return "NvRegUnknownSetupReg6";
+    case NvRegPollingInterval:       return "NvRegPollingInterval";
+    case NvRegMisc1:                 return "NvRegMisc1";
+    case NvRegTransmitterControl:    return "NvRegTransmitterControl";
+    case NvRegTransmitterStatus:     return "NvRegTransmitterStatus";
+    case NvRegPacketFilterFlags:     return "NvRegPacketFilterFlags";
+    case NvRegOffloadConfig:         return "NvRegOffloadConfig";
+    case NvRegReceiverControl:       return "NvRegReceiverControl";
+    case NvRegReceiverStatus:        return "NvRegReceiverStatus";
+    case NvRegRandomSeed:            return "NvRegRandomSeed";
+    case NvRegUnknownSetupReg1:      return "NvRegUnknownSetupReg1";
+    case NvRegUnknownSetupReg2:      return "NvRegUnknownSetupReg2";
+    case NvRegMacAddrA:              return "NvRegMacAddrA";
+    case NvRegMacAddrB:              return "NvRegMacAddrB";
+    case NvRegMulticastAddrA:        return "NvRegMulticastAddrA";
+    case NvRegMulticastAddrB:        return "NvRegMulticastAddrB";
+    case NvRegMulticastMaskA:        return "NvRegMulticastMaskA";
+    case NvRegMulticastMaskB:        return "NvRegMulticastMaskB";
+    case NvRegTxRingPhysAddr:        return "NvRegTxRingPhysAddr";
+    case NvRegRxRingPhysAddr:        return "NvRegRxRingPhysAddr";
+    case NvRegRingSizes:             return "NvRegRingSizes";
+    case NvRegUnknownTransmitterReg: return "NvRegUnknownTransmitterReg";
+    case NvRegLinkSpeed:             return "NvRegLinkSpeed";
+    case NvRegUnknownSetupReg5:      return "NvRegUnknownSetupReg5";
+    case NvRegUnknownSetupReg3:      return "NvRegUnknownSetupReg3";
+    case NvRegUnknownSetupReg8:      return "NvRegUnknownSetupReg8";
+    case NvRegUnknownSetupReg7:      return "NvRegUnknownSetupReg7";
+    case NvRegTxRxControl:           return "NvRegTxRxControl";
+    case NvRegMIIStatus:             return "NvRegMIIStatus";
+    case NvRegUnknownSetupReg4:      return "NvRegUnknownSetupReg4";
+    case NvRegAdapterControl:        return "NvRegAdapterControl";
+    case NvRegMIISpeed:              return "NvRegMIISpeed";
+    case NvRegMIIControl:            return "NvRegMIIControl";
+    case NvRegMIIData:               return "NvRegMIIData";
+    case NvRegWakeUpFlags:           return "NvRegWakeUpFlags";
+    case NvRegPatternCRC:            return "NvRegPatternCRC";
+    case NvRegPatternMask:           return "NvRegPatternMask";
+    case NvRegPowerCap:              return "NvRegPowerCap";
+    case NvRegPowerState:            return "NvRegPowerState";
+    default:                         return "Unknown";
+    }
+}
+#endif
+
+
+#ifdef DEBUG
+/*
+ * Get PHY register name.
+ */
+static const char *nvnet_get_mii_reg_name(uint8_t reg)
+{
+    switch (reg) {
+    case MII_PHYSID1:   return "MII_PHYSID1";
+    case MII_PHYSID2:   return "MII_PHYSID2";
+    case MII_BMCR:      return "MII_BMCR";
+    case MII_BMSR:      return "MII_BMSR";
+    case MII_ADVERTISE: return "MII_ADVERTISE";
+    case MII_LPA:       return "MII_LPA";
+    default:            return "Unknown";
+    }
+}
+#endif
+
+/*******************************************************************************
+ * Properties
+ ******************************************************************************/
+
+static void nvnet_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+    k->vendor_id = PCI_VENDOR_ID_NVIDIA;
+    k->device_id = PCI_DEVICE_ID_NVIDIA_NVENET_1;
+    k->revision  = 210;
+    k->class_id  = PCI_CLASS_NETWORK_ETHERNET;
+    k->init      = nvnet_initfn;
+    k->exit      = nvnet_uninit;
+
+    dc->desc  = "nForce Ethernet Controller";
+    dc->reset = qdev_nvnet_reset;
+    dc->props = nvnet_properties;
+}
+
+static Property nvnet_properties[] = {
+    DEFINE_NIC_PROPERTIES(NvNetState, conf),
+    DEFINE_PROP_STRING("dump", NvNetState, packet_dump_path),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static NetClientInfo net_nvnet_info = {
+    .type                = NET_CLIENT_OPTIONS_KIND_NIC,
+    .size                = sizeof(NICState),
+    .can_receive         = nvnet_can_receive,
+    .receive             = nvnet_receive,
+    .receive_iov         = nvnet_receive_iov,
+    .cleanup             = nvnet_cleanup,
+    .link_status_changed = nvnet_set_link_status,
+};
+
+static const TypeInfo nvnet_info = {
+    .name                = "nvnet",
+    .parent              = TYPE_PCI_DEVICE,
+    .instance_size       = sizeof(NvNetState),
+    .class_init          = nvnet_class_init,
+};
+
+static void nvnet_register(void)
+{
+    type_register_static(&nvnet_info);
+}
+type_init(nvnet_register);
+
diff --git a/hw/xbox/smbus_adm1032.c b/hw/xbox/smbus_adm1032.c
new file mode 100644
index 0000000000..b6f6ffd25d
--- /dev/null
+++ b/hw/xbox/smbus_adm1032.c
@@ -0,0 +1,86 @@
+/*
+ * QEMU SMBus ADM1032 Temperature Monitor
+ *
+ * Copyright (c) 2012 espes
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "hw/hw.h"
+#include "hw/i2c/i2c.h"
+#include "hw/i2c/smbus.h"
+
+#define DEBUG
+
+static uint8_t tm_read_data(SMBusDevice *dev, uint8_t cmd, int n)
+{
+    #ifdef DEBUG
+        printf("tm_read_data: addr=0x%02x cmd=0x%02x n=%d\n",
+               dev->i2c.address, cmd, n);
+    #endif
+    
+    switch (cmd) {
+        case 0x0:
+        case 0x1:
+            return 50;
+        default:
+            break;
+    }
+
+    return 0;
+}
+
+static int tm_init(SMBusDevice *dev)
+{
+    return 0;
+}
+
+
+static void smbus_adm1032_class_initfn(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    SMBusDeviceClass *sc = SMBUS_DEVICE_CLASS(klass);
+
+    sc->init = tm_init;
+    sc->read_data = tm_read_data;
+}
+
+static TypeInfo smbus_adm1032_info = {
+    .name = "smbus-adm1032",
+    .parent = TYPE_SMBUS_DEVICE,
+    .instance_size = sizeof(SMBusDevice),
+    .class_init = smbus_adm1032_class_initfn,
+};
+
+
+static void smbus_adm1032_register_devices(void)
+{
+    type_register_static(&smbus_adm1032_info);
+}
+
+type_init(smbus_adm1032_register_devices)
+
+
+void smbus_adm1032_init(i2c_bus *smbus, int address)
+{
+    DeviceState *tm;
+    tm = qdev_create((BusState *)smbus, "smbus-adm1032");
+    qdev_prop_set_uint8(tm, "address", address);
+    qdev_init_nofail(tm);
+}
diff --git a/hw/xbox/smbus_cx25871.c b/hw/xbox/smbus_cx25871.c
new file mode 100644
index 0000000000..9c1d78f26f
--- /dev/null
+++ b/hw/xbox/smbus_cx25871.c
@@ -0,0 +1,117 @@
+/*
+ * QEMU SMBus Conexant CX25871 Video Encoder
+ *
+ * Copyright (c) 2012 espes
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/hw.h"
+#include "hw/i2c/i2c.h"
+#include "hw/i2c/smbus.h"
+
+typedef struct SMBusCX25871Device {
+    SMBusDevice smbusdev;
+
+    uint8_t registers[256];
+} SMBusCX25871Device;
+
+//#define DEBUG
+
+static void cx_quick_cmd(SMBusDevice *dev, uint8_t read)
+{
+#ifdef DEBUG
+    printf("cx_quick_cmd: addr=0x%02x read=%d\n", dev->i2c.address, read);
+#endif
+}
+
+static void cx_send_byte(SMBusDevice *dev, uint8_t val)
+{
+#ifdef DEBUG
+    printf("cx_send_byte: addr=0x%02x val=0x%02x\n",
+           dev->i2c.address, val);
+#endif
+}
+
+static uint8_t cx_receive_byte(SMBusDevice *dev)
+{
+#ifdef DEBUG
+    printf("cx_receive_byte: addr=0x%02x\n",
+           dev->i2c.address);
+#endif
+    return 0;
+}
+
+static void cx_write_data(SMBusDevice *dev, uint8_t cmd, uint8_t *buf, int len)
+{
+    SMBusCX25871Device *cx = (SMBusCX25871Device *) dev;
+#ifdef DEBUG
+    printf("cx_write_byte: addr=0x%02x cmd=0x%02x val=0x%02x\n",
+           dev->i2c.address, cmd, buf[0]);
+#endif
+
+    memcpy(cx->registers+cmd, buf, MIN(len, 256-cmd));
+}
+
+static uint8_t cx_read_data(SMBusDevice *dev, uint8_t cmd, int n)
+{
+    SMBusCX25871Device *cx = (SMBusCX25871Device *) dev;
+    #ifdef DEBUG
+        printf("cx_read_data: addr=0x%02x cmd=0x%02x n=%d\n",
+               dev->i2c.address, cmd, n);
+    #endif
+    
+    return cx->registers[cmd];
+}
+
+static int smbus_cx_init(SMBusDevice *dev)
+{
+    return 0;
+}
+
+static void smbus_cx25871_class_initfn(ObjectClass *klass, void *data)
+{
+    SMBusDeviceClass *sc = SMBUS_DEVICE_CLASS(klass);
+
+    sc->init = smbus_cx_init;
+    sc->quick_cmd = cx_quick_cmd;
+    sc->send_byte = cx_send_byte;
+    sc->receive_byte = cx_receive_byte;
+    sc->write_data = cx_write_data;
+    sc->read_data = cx_read_data;
+}
+
+static TypeInfo smbus_cx25871_info = {
+    .name = "smbus-cx25871",
+    .parent = TYPE_SMBUS_DEVICE,
+    .instance_size = sizeof(SMBusCX25871Device),
+    .class_init = smbus_cx25871_class_initfn,
+};
+
+
+static void smbus_cx25871_register_devices(void)
+{
+    type_register_static(&smbus_cx25871_info);
+}
+
+type_init(smbus_cx25871_register_devices)
+
+
+void smbus_cx25871_init(i2c_bus *smbus, int address)
+{
+    DeviceState *cx;
+    cx = qdev_create((BusState *)smbus, "smbus-cx25871");
+    qdev_prop_set_uint8(cx, "address", address);
+    qdev_init_nofail(cx);
+}
diff --git a/hw/xbox/smbus_xbox_smc.c b/hw/xbox/smbus_xbox_smc.c
new file mode 100644
index 0000000000..00a8b41a65
--- /dev/null
+++ b/hw/xbox/smbus_xbox_smc.c
@@ -0,0 +1,227 @@
+/*
+ * QEMU SMBus Xbox System Management Controller
+ *
+ * Copyright (c) 2011 espes
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/hw.h"
+#include "hw/i2c/i2c.h"
+#include "hw/i2c/smbus.h"
+#include "qemu/config-file.h"
+#include "sysemu/sysemu.h"
+
+/*
+ * Hardware is a PIC16LC
+ * http://www.xbox-linux.org/wiki/PIC
+ */
+
+#define SMC_REG_VER                 0x01
+#define SMC_REG_POWER               0x02
+#define     SMC_REG_POWER_RESET         0x01
+#define     SMC_REG_POWER_CYCLE         0x40
+#define     SMC_REG_POWER_SHUTDOWN      0x80
+#define SMC_REG_TRAYSTATE           0x03
+#define SMC_REG_AVPACK              0x04
+#define     SMC_REG_AVPACK_SCART        0x00
+#define     SMC_REG_AVPACK_HDTV         0x01
+#define     SMC_REG_AVPACK_VGA_SOG      0x02
+#define     SMC_REG_AVPACK_SVIDEO       0x04
+#define     SMC_REG_AVPACK_COMPOSITE    0x06
+#define     SMC_REG_AVPACK_VGA          0x07
+#define SMC_REG_FANMODE             0x05
+#define SMC_REG_FANSPEED            0x06
+#define SMC_REG_LEDMODE             0x07
+#define SMC_REG_LEDSEQ              0x08
+#define SMC_REG_CPUTEMP             0x09
+#define SMC_REG_BOARDTEMP           0x0a
+#define SMC_REG_TRAYEJECT           0x0c
+#define SMC_REG_INTACK              0x0d
+#define SMC_REG_INTSTATUS           0x11
+#define     SMC_REG_INTSTATUS_POWER         0x01
+#define     SMC_REG_INTSTATUS_TRAYCLOSED    0x02
+#define     SMC_REG_INTSTATUS_TRAYOPENING   0x04
+#define     SMC_REG_INTSTATUS_AVPACK_PLUG   0x08
+#define     SMC_REG_INTSTATUS_AVPACK_UNPLUG 0x10
+#define     SMC_REG_INTSTATUS_EJECT_BUTTON  0x20
+#define     SMC_REG_INTSTATUS_TRAYCLOSING   0x40
+#define SMC_REG_RESETONEJECT        0x19
+#define SMC_REG_INTEN               0x1a
+#define SMC_REG_SCRATCH             0x1b
+#define     SMC_REG_SCRATCH_SHORT_ANIMATION 0x04
+
+static const char* smc_version_string = "P01";
+
+
+//#define DEBUG
+
+typedef struct SMBusSMCDevice {
+    SMBusDevice smbusdev;
+    int version_string_index;
+    uint8_t scratch_reg;
+} SMBusSMCDevice;
+
+static void smc_quick_cmd(SMBusDevice *dev, uint8_t read)
+{
+#ifdef DEBUG
+    printf("smc_quick_cmd: addr=0x%02x read=%d\n", dev->i2c.address, read);
+#endif
+}
+
+static void smc_send_byte(SMBusDevice *dev, uint8_t val)
+{
+#ifdef DEBUG
+    printf("smc_send_byte: addr=0x%02x val=0x%02x\n",
+           dev->i2c.address, val);
+#endif
+}
+
+static uint8_t smc_receive_byte(SMBusDevice *dev)
+{
+#ifdef DEBUG
+    printf("smc_receive_byte: addr=0x%02x\n",
+           dev->i2c.address);
+#endif
+    return 0;
+}
+
+static void smc_write_data(SMBusDevice *dev, uint8_t cmd, uint8_t *buf, int len)
+{
+    SMBusSMCDevice *smc = (SMBusSMCDevice *) dev;
+#ifdef DEBUG
+    printf("smc_write_byte: addr=0x%02x cmd=0x%02x val=0x%02x\n",
+           dev->i2c.address, cmd, buf[0]);
+#endif
+
+    switch(cmd) {
+    case SMC_REG_VER:
+        /* version string reset */
+        smc->version_string_index = buf[0];
+        break;
+
+    case SMC_REG_POWER:
+        if (buf[0] & (SMC_REG_POWER_RESET | SMC_REG_POWER_CYCLE))
+            qemu_system_reset_request();
+        else if (buf[0] & SMC_REG_POWER_SHUTDOWN)
+            qemu_system_shutdown_request();
+        break;
+
+    case SMC_REG_SCRATCH:
+        smc->scratch_reg = buf[0];
+        break;
+
+    /* challenge response
+     * (http://www.xbox-linux.org/wiki/PIC_Challenge_Handshake_Sequence) */
+    case 0x20:
+        break;
+    case 0x21:
+        break;
+
+    default:
+        break;
+    }
+}
+
+static uint8_t smc_read_data(SMBusDevice *dev, uint8_t cmd, int n)
+{
+    SMBusSMCDevice *smc = (SMBusSMCDevice *) dev;
+    #ifdef DEBUG
+        printf("smc_read_data: addr=0x%02x cmd=0x%02x n=%d\n",
+               dev->i2c.address, cmd, n);
+    #endif
+
+    switch(cmd) {
+    case SMC_REG_VER:
+        return smc_version_string[
+            smc->version_string_index++%(sizeof(smc_version_string)-1)];
+
+    case SMC_REG_AVPACK:
+        /* pretend to have a composite av pack plugged in */
+        return SMC_REG_AVPACK_COMPOSITE;
+
+    case SMC_REG_SCRATCH:
+        return smc->scratch_reg;
+
+    /* challenge request:
+     * must be non-0 */
+    case 0x1c:
+        return 0x52;
+    case 0x1d:
+        return 0x72;
+    case 0x1e:
+        return 0xea;
+    case 0x1f:
+        return 0x46;
+
+    default:
+        break;
+    }
+
+    return 0;
+}
+
+static int smbus_smc_init(SMBusDevice *dev)
+{
+    QemuOpts *opts;
+    SMBusSMCDevice *smc = (SMBusSMCDevice *)dev;
+
+    smc->version_string_index = 0;
+    smc->scratch_reg = 0;
+
+    opts = qemu_opts_find(qemu_find_opts("machine"), NULL);
+    if (opts && qemu_opt_get_bool(opts, "short_animation", 0)) {
+        smc->scratch_reg = SMC_REG_SCRATCH_SHORT_ANIMATION;
+    }
+
+    return 0;
+}
+
+
+static void smbus_smc_class_initfn(ObjectClass *klass, void *data)
+{
+    SMBusDeviceClass *sc = SMBUS_DEVICE_CLASS(klass);
+
+    sc->init = smbus_smc_init;
+    sc->quick_cmd = smc_quick_cmd;
+    sc->send_byte = smc_send_byte;
+    sc->receive_byte = smc_receive_byte;
+    sc->write_data = smc_write_data;
+    sc->read_data = smc_read_data;
+}
+
+static TypeInfo smbus_smc_info = {
+    .name = "smbus-xbox-smc",
+    .parent = TYPE_SMBUS_DEVICE,
+    .instance_size = sizeof(SMBusSMCDevice),
+    .class_init = smbus_smc_class_initfn,
+};
+
+
+
+static void smbus_smc_register_devices(void)
+{
+    type_register_static(&smbus_smc_info);
+}
+
+type_init(smbus_smc_register_devices)
+
+
+void smbus_xbox_smc_init(i2c_bus *smbus, int address)
+{
+    DeviceState *smc;
+    smc = qdev_create((BusState *)smbus, "smbus-xbox-smc");
+    qdev_prop_set_uint8(smc, "address", address);
+    qdev_init_nofail(smc);
+}
diff --git a/hw/xbox/swizzle.c b/hw/xbox/swizzle.c
new file mode 100644
index 0000000000..b3180d95f7
--- /dev/null
+++ b/hw/xbox/swizzle.c
@@ -0,0 +1,165 @@
+/*
+ * QEMU texture swizzling routines
+ *
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2013 espes
+ * Copyright (c) 2007-2010 The Nouveau Project.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+#include "qemu/osdep.h"
+
+#include "hw/xbox/swizzle.h"
+
+/* This should be pretty straightforward.
+ * It creates a bit pattern like ..zyxzyxzyx from ..xxx, ..yyy and ..zzz
+ * If there are no bits left from any component it will pack the other masks
+ * more tighly (Example: zzxzxzyx = Fewer x than z and even fewer y)
+ */
+static void generate_swizzle_masks(unsigned int width,
+                                   unsigned int height,
+                                   unsigned int depth,
+                                   uint32_t* mask_x,
+                                   uint32_t* mask_y,
+                                   uint32_t* mask_z)
+{
+    uint32_t x = 0, y = 0, z = 0;
+    uint32_t bit = 1;
+    uint32_t mask_bit = 1;
+    bool done;
+    do {
+        done = true;
+        if (bit < width) { x |= mask_bit; mask_bit <<= 1; done = false; }
+        if (bit < height) { y |= mask_bit; mask_bit <<= 1; done = false; }
+        if (bit < depth) { z |= mask_bit; mask_bit <<= 1; done = false; }
+        bit <<= 1;
+    } while(!done);
+    assert(x ^ y ^ z == (mask_bit - 1));
+    *mask_x = x;
+    *mask_y = y;
+    *mask_z = z;
+}
+
+/* This fills a pattern with a value if your value has bits abcd and your
+ * pattern is 11010100100 this will return: 0a0b0c00d00
+ */
+static uint32_t fill_pattern(uint32_t pattern, uint32_t value)
+{
+    uint32_t result = 0;
+    uint32_t bit = 1;
+    while(value) {
+        if (pattern & bit) {
+            /* Copy bit to result */
+            result |= value & 1 ? bit : 0;
+            value >>= 1;
+        }
+        bit <<= 1;
+    }
+    return result;
+}
+
+static unsigned int get_swizzled_offset(
+    unsigned int x, unsigned int y, unsigned int z,
+    uint32_t mask_x, uint32_t mask_y, uint32_t mask_z,
+    unsigned int bytes_per_pixel)
+{
+    return bytes_per_pixel * (fill_pattern(mask_x, x)
+                           | fill_pattern(mask_y, y)
+                           | fill_pattern(mask_z, z));
+}
+
+void swizzle_box(
+    const uint8_t *src_buf,
+    unsigned int width,
+    unsigned int height,
+    unsigned int depth,
+    uint8_t *dst_buf,
+    unsigned int row_pitch,
+    unsigned int slice_pitch,
+    unsigned int bytes_per_pixel)
+{
+    uint32_t mask_x, mask_y, mask_z;
+    generate_swizzle_masks(width, height, depth, &mask_x, &mask_y, &mask_z);
+
+    int x, y, z;
+    for (z = 0; z < depth; z++) {
+        for (y = 0; y < height; y++) {
+            for (x = 0; x < width; x++) {
+                const uint8_t *src = src_buf
+                                         + y * row_pitch + x * bytes_per_pixel;
+                uint8_t *dst = dst_buf + get_swizzled_offset(x, y, 0,
+                                                             mask_x, mask_y, 0,
+                                                             bytes_per_pixel);
+                memcpy(dst, src, bytes_per_pixel);
+            }
+        }
+        src_buf += slice_pitch;
+    }
+}
+
+void unswizzle_box(
+    const uint8_t *src_buf,
+    unsigned int width,
+    unsigned int height,
+    unsigned int depth,
+    uint8_t *dst_buf,
+    unsigned int row_pitch,
+    unsigned int slice_pitch,
+    unsigned int bytes_per_pixel)
+{
+    uint32_t mask_x, mask_y, mask_z;
+    generate_swizzle_masks(width, height, depth, &mask_x, &mask_y, &mask_z);
+
+    int x, y, z;
+    for (z = 0; z < depth; z++) {
+        for (y = 0; y < height; y++) {
+            for (x = 0; x < width; x++) {
+                const uint8_t *src = src_buf
+                    + get_swizzled_offset(x, y, z, mask_x, mask_y, mask_z,
+                                          bytes_per_pixel);
+                uint8_t *dst = dst_buf + y * row_pitch + x * bytes_per_pixel;
+                memcpy(dst, src, bytes_per_pixel);
+            }
+        }
+        dst_buf += slice_pitch;
+    }
+}
+
+void unswizzle_rect(
+    const uint8_t *src_buf,
+    unsigned int width,
+    unsigned int height,
+    uint8_t *dst_buf,
+    unsigned int pitch,
+    unsigned int bytes_per_pixel)
+{
+    unswizzle_box(src_buf, width, height, 1, dst_buf, pitch, 0, bytes_per_pixel);
+}
+
+void swizzle_rect(
+    const uint8_t *src_buf,
+    unsigned int width,
+    unsigned int height,
+    uint8_t *dst_buf,
+    unsigned int pitch,
+    unsigned int bytes_per_pixel)
+{
+    swizzle_box(src_buf, width, height, 1, dst_buf, pitch, 0, bytes_per_pixel);
+}
diff --git a/hw/xbox/swizzle.h b/hw/xbox/swizzle.h
new file mode 100644
index 0000000000..14528a4865
--- /dev/null
+++ b/hw/xbox/swizzle.h
@@ -0,0 +1,62 @@
+/*
+ * QEMU texture swizzling routines
+ *
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2013 espes
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#ifndef HW_XBOX_SWIZZLE_H
+#define HW_XBOX_SWIZZLE_H
+
+void swizzle_box(
+    const uint8_t *src_buf,
+    unsigned int width,
+    unsigned int height,
+    unsigned int depth,
+    uint8_t *dst_buf,
+    unsigned int row_pitch,
+    unsigned int slice_pitch,
+    unsigned int bytes_per_pixel);
+
+void unswizzle_box(
+    const uint8_t *src_buf,
+    unsigned int width,
+    unsigned int height,
+    unsigned int depth,
+    uint8_t *dst_buf,
+    unsigned int row_pitch,
+    unsigned int slice_pitch,
+    unsigned int bytes_per_pixel);
+
+void unswizzle_rect(
+    const uint8_t *src_buf,
+    unsigned int width,
+    unsigned int height,
+    uint8_t *dst_buf,
+    unsigned int pitch,
+    unsigned int bytes_per_pixel);
+
+void swizzle_rect(
+    const uint8_t *src_buf,
+    unsigned int width,
+    unsigned int height,
+    uint8_t *dst_buf,
+    unsigned int pitch,
+    unsigned int bytes_per_pixel);
+
+#endif
diff --git a/hw/xbox/xbox.c b/hw/xbox/xbox.c
new file mode 100644
index 0000000000..8efc9f82a9
--- /dev/null
+++ b/hw/xbox/xbox.c
@@ -0,0 +1,358 @@
+/*
+ * QEMU Xbox System Emulator
+ *
+ * Copyright (c) 2012 espes
+ *
+ * Based on pc.c
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/hw.h"
+#include "sysemu/arch_init.h"
+#include "hw/i386/pc.h"
+#include "hw/pci/pci.h"
+#include "hw/boards.h"
+#include "hw/ide.h"
+#include "hw/timer/mc146818rtc.h"
+#include "hw/timer/i8254.h"
+#include "hw/audio/pcspk.h"
+#include "sysemu/sysemu.h"
+#include "hw/cpu/icc_bus.h"
+#include "hw/sysbus.h"
+#include "hw/i2c/smbus.h"
+#include "sysemu/blockdev.h"
+#include "hw/loader.h"
+#include "exec/address-spaces.h"
+
+#include "hw/xbox/xbox_pci.h"
+#include "hw/xbox/nv2a.h"
+
+#include "hw/xbox/xbox.h"
+
+#include "net/net.h"
+
+/* mostly from pc_memory_init */
+static void xbox_memory_init(MemoryRegion *system_memory,
+                             ram_addr_t mem_size,
+                             MemoryRegion *rom_memory,
+                             MemoryRegion **ram_memory)
+{
+    MemoryRegion *ram;
+
+    int ret;
+    char *filename;
+    int bios_size;
+    MemoryRegion *bios;
+
+    MemoryRegion *map_bios;
+    uint32_t map_loc;
+
+    /* Allocate RAM.  We allocate it as a single memory region and use
+     * aliases to address portions of it, mostly for backwards compatibility
+     * with older qemus that used qemu_ram_alloc().
+     */
+    ram = g_malloc(sizeof(*ram));
+    memory_region_init_ram(ram, NULL, "xbox.ram", mem_size);
+    vmstate_register_ram_global(ram);
+    *ram_memory = ram;
+    memory_region_add_subregion(system_memory, 0, ram);
+
+
+    /* Load the bios. (mostly from pc_sysfw)
+     * Can't use it verbatim, since we need the bios repeated
+     * over top 1MB of memory.
+     */
+    if (bios_name == NULL) {
+        bios_name = "bios.bin";
+    }
+    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+    if (filename) {
+        bios_size = get_image_size(filename);
+    } else {
+        bios_size = -1;
+    }
+    if (bios_size <= 0 ||
+        (bios_size % 65536) != 0) {
+        goto bios_error;
+    }
+    bios = g_malloc(sizeof(*bios));
+    memory_region_init_ram(bios, NULL, "xbox.bios", bios_size);
+    vmstate_register_ram_global(bios);
+    memory_region_set_readonly(bios, true);
+    ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1);
+    if (ret != 0) {
+bios_error:
+        fprintf(stderr, "qemu: could not load xbox BIOS '%s'\n", bios_name);
+        exit(1);
+    }
+    if (filename) {
+        g_free(filename);
+    }
+
+
+    /* map the bios repeated at the top of memory */
+    for (map_loc=(uint32_t)(-bios_size); map_loc >= 0xff000000; map_loc-=bios_size) {
+        map_bios = g_malloc(sizeof(*map_bios));
+        memory_region_init_alias(map_bios, NULL, NULL, bios, 0, bios_size);
+
+        memory_region_add_subregion(rom_memory, map_loc, map_bios);
+        memory_region_set_readonly(map_bios, true);
+    }
+
+    /*memory_region_add_subregion(rom_memory,
+                                (uint32_t)(-bios_size),
+                                bios);
+    */
+
+}
+
+/* mostly from pc_init1 */
+void xbox_init_common(QEMUMachineInitArgs *args,
+                      const uint8_t *default_eeprom,
+                      ISABus **out_isa_bus)
+{
+    int i;
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+
+    PCIBus *host_bus;
+    ISABus *isa_bus;
+
+    MemoryRegion *ram_memory;
+    MemoryRegion *pci_memory;
+
+    qemu_irq *cpu_irq;
+    qemu_irq *gsi;
+    qemu_irq *i8259;
+    GSIState *gsi_state;
+
+    PCIDevice *ide_dev;
+    BusState *idebus[MAX_IDE_BUS];
+
+    ISADevice *rtc_state;
+    ISADevice *pit;
+    i2c_bus *smbus;
+    PCIBus *agp_bus;
+
+
+    DeviceState *icc_bridge;
+    icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE);
+    object_property_add_child(qdev_get_machine(), "icc-bridge",
+                              OBJECT(icc_bridge), NULL);
+
+    pc_cpus_init(cpu_model, icc_bridge);
+
+    pci_memory = g_new(MemoryRegion, 1);
+    memory_region_init(pci_memory, NULL, "pci", INT64_MAX);
+
+    /* allocate ram and load rom/bios */
+    xbox_memory_init(get_system_memory(), ram_size,
+                     pci_memory, &ram_memory);
+
+
+    gsi_state = g_malloc0(sizeof(*gsi_state));
+    gsi = qemu_allocate_irqs(gsi_handler, gsi_state, GSI_NUM_PINS);
+
+
+    /* init buses */
+    xbox_pci_init(gsi,
+                  get_system_memory(), get_system_io(),
+                  pci_memory, ram_memory,
+                  &host_bus,
+                  &isa_bus,
+                  &smbus,
+                  &agp_bus);
+
+
+    /* irq shit */
+    isa_bus_irqs(isa_bus, gsi);
+    cpu_irq = pc_allocate_cpu_irq();
+    i8259 = i8259_init(isa_bus, cpu_irq[0]);
+
+    for (i = 0; i < ISA_NUM_IRQS; i++) {
+        gsi_state->i8259_irq[i] = i8259[i];
+    }
+
+
+    /* basic device init */
+    rtc_state = rtc_init(isa_bus, 2000, NULL);
+    pit = pit_init(isa_bus, 0x40, 0, NULL);
+
+    /* does apparently have a pc speaker, though not used? */
+    pcspk_init(isa_bus, pit);
+
+    /* IDE shit
+     * piix3's ide be right for now, maybe
+     */
+    DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
+    ide_drive_get(hd, MAX_IDE_BUS);
+    ide_dev = pci_piix3_ide_init(host_bus, hd, PCI_DEVFN(9, 0));
+
+    idebus[0] = qdev_get_child_bus(&ide_dev->qdev, "ide.0");
+    idebus[1] = qdev_get_child_bus(&ide_dev->qdev, "ide.1");
+    
+    // xbox bios wants this bit pattern set to mark the data as valid
+    uint8_t bits = 0x55;
+    for (i = 0x10; i < 0x70; i++) {
+        rtc_set_memory(rtc_state, i, bits);
+        bits = ~bits;
+    }
+    bits = 0x55;
+    for (i = 0x80; i < 0x100; i++) {
+        rtc_set_memory(rtc_state, i, bits);
+        bits = ~bits;
+    }
+
+    /* smbus devices */
+    uint8_t *eeprom_buf = g_malloc0(256);
+    memcpy(eeprom_buf, default_eeprom, 256);
+    smbus_eeprom_init_single(smbus, 0x54, eeprom_buf);
+    
+    smbus_xbox_smc_init(smbus, 0x10);
+    smbus_cx25871_init(smbus, 0x45);
+    smbus_adm1032_init(smbus, 0x4c);
+
+
+    /* USB */
+    PCIDevice *usb1 = pci_create(host_bus, PCI_DEVFN(3, 0), "pci-ohci");
+    qdev_prop_set_uint32(&usb1->qdev, "num-ports", 4);
+    qdev_init_nofail(&usb1->qdev);
+
+    PCIDevice *usb0 = pci_create(host_bus, PCI_DEVFN(2, 0), "pci-ohci");
+    qdev_prop_set_uint32(&usb0->qdev, "num-ports", 4);
+    qdev_init_nofail(&usb0->qdev);
+
+    /* Ethernet! */
+    PCIDevice *nvnet = pci_create(host_bus, PCI_DEVFN(4, 0), "nvnet");
+
+    for (i = 0; i < nb_nics; i++) {
+        NICInfo *nd = &nd_table[i];
+        qemu_check_nic_model(nd, "nvnet");
+        qdev_set_nic_properties(&nvnet->qdev, nd);
+        qdev_init_nofail(&nvnet->qdev);
+    }
+
+    /* APU! */
+    PCIDevice *apu = pci_create_simple(host_bus, PCI_DEVFN(5, 0), "mcpx-apu");
+
+    /* ACI! */
+    PCIDevice *aci = pci_create_simple(host_bus, PCI_DEVFN(6, 0), "mcpx-aci");
+
+    /* GPU! */
+    nv2a_init(agp_bus, PCI_DEVFN(0, 0), ram_memory);
+
+    *out_isa_bus = isa_bus;
+}
+
+static void xbox_init(QEMUMachineInitArgs *args)
+{
+#if 0
+    /* Placeholder blank eeprom for xbox 1.0:
+     *   Serial number 000000000000
+     *   Mac address 00:00:00:00:00:00
+     *   ...etc.
+     */
+    const uint8_t eeprom[] = {
+        0x25, 0x42, 0x88, 0x24, 0xA3, 0x1A, 0x7D, 0xF4,
+        0xEE, 0x53, 0x3F, 0x39, 0x5D, 0x27, 0x98, 0x0E,
+        0x58, 0xB3, 0x26, 0xC3, 0x70, 0x82, 0xE5, 0xC6,
+        0xF7, 0xC5, 0x54, 0x38, 0xA0, 0x58, 0xB9, 0x5D,
+        0xB7, 0x27, 0xC7, 0xB1, 0x67, 0xCF, 0x99, 0x3E,
+        0xC8, 0x6E, 0xC8, 0x53, 0xEF, 0x7C, 0x01, 0x37,
+        0x6F, 0x6E, 0x2F, 0x6F, 0x30, 0x30, 0x30, 0x30,
+        0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+    };
+#endif
+    /* bunnie's eeprom */
+    const uint8_t eeprom[] = {
+        0xe3, 0x1c, 0x5c, 0x23, 0x6a, 0x58, 0x68, 0x37,
+        0xb7, 0x12, 0x26, 0x6c, 0x99, 0x11, 0x30, 0xd1,
+        0xe2, 0x3e, 0x4d, 0x56, 0xf7, 0x73, 0x2b, 0x73,
+        0x85, 0xfe, 0x7f, 0x0a, 0x08, 0xef, 0x15, 0x3c,
+        0x77, 0xee, 0x6d, 0x4e, 0x93, 0x2f, 0x28, 0xee,
+        0xf8, 0x61, 0xf7, 0x94, 0x17, 0x1f, 0xfc, 0x11,
+        0x0b, 0x84, 0x44, 0xed, 0x31, 0x30, 0x35, 0x35,
+        0x38, 0x31, 0x31, 0x31, 0x34, 0x30, 0x30, 0x33,
+        0x00, 0x50, 0xf2, 0x4f, 0x65, 0x52, 0x00, 0x00,
+        0x0a, 0x1e, 0x35, 0x33, 0x71, 0x85, 0x31, 0x4d,
+        0x59, 0x12, 0x38, 0x48, 0x1c, 0x91, 0x53, 0x60,
+        0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x75, 0x61, 0x57, 0xfb, 0x2c, 0x01, 0x00, 0x00,
+        0x45, 0x53, 0x54, 0x00, 0x45, 0x44, 0x54, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x0a, 0x05, 0x00, 0x02, 0x04, 0x01, 0x00, 0x02,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0xc4, 0xff, 0xff, 0xff,
+        0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+    };
+
+    ISABus *isa_bus;
+    xbox_init_common(args, eeprom, &isa_bus);
+}
+
+static QEMUMachine xbox_machine = {
+    .name = "xbox",
+    .desc = "Microsoft Xbox",
+    .init = xbox_init,
+    .max_cpus = 1,
+    .no_floppy = 1,
+    .no_cdrom = 1,
+    .no_sdcard = 1,
+    PC_DEFAULT_MACHINE_OPTIONS
+};
+
+static void xbox_machine_init(void) {
+    qemu_register_machine(&xbox_machine);
+}
+
+machine_init(xbox_machine_init);
\ No newline at end of file
diff --git a/hw/xbox/xbox.h b/hw/xbox/xbox.h
new file mode 100644
index 0000000000..9c0c6b1346
--- /dev/null
+++ b/hw/xbox/xbox.h
@@ -0,0 +1,32 @@
+/*
+ * QEMU Xbox System Emulator
+ *
+ * Copyright (c) 2013 espes
+ *
+ * Based on pc.c
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_XBOX_H
+#define HW_XBOX_H
+
+#define MAX_IDE_BUS 2
+
+void xbox_init_common(QEMUMachineInitArgs *args,
+                      const uint8_t *default_eeprom,
+                      ISABus **out_isa_bus);
+
+#endif
\ No newline at end of file
diff --git a/hw/xbox/xbox_pci.c b/hw/xbox/xbox_pci.c
new file mode 100644
index 0000000000..8e119536aa
--- /dev/null
+++ b/hw/xbox/xbox_pci.c
@@ -0,0 +1,555 @@
+/*
+ * QEMU Xbox PCI buses implementation
+ *
+ * Copyright (c) 2012 espes
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include "hw/hw.h"
+#include "qemu/range.h"
+#include "hw/isa/isa.h"
+#include "hw/sysbus.h"
+#include "hw/loader.h"
+#include "qemu/config-file.h"
+#include "hw/i386/pc.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/pci/pci_bridge.h"
+#include "exec/address-spaces.h"
+#include "qemu-common.h"
+
+#include "hw/xbox/acpi_xbox.h"
+#include "hw/xbox/amd_smbus.h"
+
+#include "hw/xbox/xbox_pci.h"
+
+
+ /*
+  * xbox chipset based on nForce 420, which was based on AMD-760
+  * 
+  * http://support.amd.com/us/ChipsetMotherboard_TechDocs/24494.pdf
+  * http://support.amd.com/us/ChipsetMotherboard_TechDocs/24416.pdf
+  * http://support.amd.com/us/ChipsetMotherboard_TechDocs/24467.pdf
+  *
+  * http://support.amd.com/us/ChipsetMotherboard_TechDocs/24462.pdf
+  *
+  * - 'NV2A' combination northbridge/gpu
+  * - 'MCPX' combination southbridge/apu
+  */
+
+
+//#define DEBUG
+
+#ifdef DEBUG
+# define XBOXPCI_DPRINTF(format, ...)     printf(format, ## __VA_ARGS__)
+#else
+# define XBOXPCI_DPRINTF(format, ...)     do { } while (0)
+#endif
+
+
+
+#define XBOX_NUM_INT_IRQS 8
+#define XBOX_NUM_PIRQS    4
+
+#define XBOX_NUM_PIC_IRQS 16
+
+#define XBOX_LPC_ACPI_IRQ_ROUT 0x64
+#define XBOX_LPC_PIRQ_ROUT     0x68
+#define XBOX_LPC_INT_IRQ_ROUT  0x6C
+
+static void xbox_lpc_set_irq(void *opaque, int pirq, int level)
+{
+    XBOX_LPCState *lpc = opaque;
+
+    assert(pirq >= 0);
+    assert(pirq < XBOX_NUM_INT_IRQS + XBOX_NUM_PIRQS);
+
+    int pic_irq = 0;
+
+    if (pirq < XBOX_NUM_INT_IRQS) {
+        /* devices on the internal bus */
+        uint32_t routing = pci_get_long(lpc->dev.config + XBOX_LPC_INT_IRQ_ROUT);
+        pic_irq = (routing >> (pirq*4)) & 0xF;
+
+        if (pic_irq == 0) {
+            return;
+        }
+    } else {
+        /* pirqs */
+        pirq -= XBOX_NUM_INT_IRQS;
+        pic_irq = lpc->dev.config[XBOX_LPC_PIRQ_ROUT + pirq];
+    }
+
+    if (pic_irq >= XBOX_NUM_PIC_IRQS) {
+        return;
+    }
+    qemu_set_irq(lpc->pic[pic_irq], level);
+}
+
+static int xbox_lpc_map_irq(PCIDevice *pci_dev, int intx)
+{
+    int slot = PCI_SLOT(pci_dev->devfn);
+    switch (slot) {
+    /* devices on the internal bus */
+    case 2: return 0; /* usb0 */
+    case 3: return 1; /* usb1 */
+    case 4: return 2; /* nic */
+    case 5: return 3; /* apu */
+    case 6: return 4; /* aci */
+    case 9: return 6; /* ide */
+
+    case 30: /* agp bridge -> PIRQC? */
+        return XBOX_NUM_INT_IRQS + 2;
+    default:
+        /* don't actually know how this should work */
+        assert(false);
+        return XBOX_NUM_INT_IRQS + ((slot + intx) & 3);
+    }
+}
+
+static void xbox_lpc_set_acpi_irq(void *opaque, int irq_num, int level)
+{
+    XBOX_LPCState *lpc = opaque;
+    assert(irq_num == 0 || irq_num == 1);
+
+    uint32_t routing = pci_get_long(lpc->dev.config + XBOX_LPC_ACPI_IRQ_ROUT);
+    int irq = (routing >> (irq_num*8)) & 0xff;
+
+    if (irq == 0 || irq >= XBOX_NUM_PIC_IRQS) {
+        return;
+    }
+    qemu_set_irq(lpc->pic[irq], level);
+}
+
+
+
+void xbox_pci_init(qemu_irq *pic,
+                   MemoryRegion *address_space_mem,
+                   MemoryRegion *address_space_io,
+                   MemoryRegion *pci_memory,
+                   MemoryRegion *ram_memory,
+                   PCIBus **out_host_bus,
+                   ISABus **out_isa_bus,
+                   i2c_bus **out_smbus,
+                   PCIBus **out_agp_bus)
+{
+    DeviceState *host;
+    PCIHostState *host_state;
+    PCIBus *host_bus;
+    PCIDevice *bridge;
+    XBOX_PCIState *bridge_state;
+
+    /* pci host bus */
+    host = qdev_create(NULL, "xbox-pcihost");
+    host_state = PCI_HOST_BRIDGE(host);
+
+    host_bus = pci_bus_new(host, NULL,
+                           pci_memory, address_space_io, 0, TYPE_PCI_BUS);
+    host_state->bus = host_bus;
+    qdev_init_nofail(host);
+
+    bridge = pci_create_simple_multifunction(host_bus, PCI_DEVFN(0, 0),
+                                             true, "xbox-pci");
+    bridge_state = XBOX_PCI_DEVICE(bridge);
+    bridge_state->ram_memory = ram_memory;
+    bridge_state->pci_address_space = pci_memory;
+    bridge_state->system_memory = address_space_mem;
+
+    /* PCI hole */
+    /* TODO: move to xbox-pci init */
+    memory_region_init_alias(&bridge_state->pci_hole, OBJECT(bridge),
+                             "pci-hole",
+                             bridge_state->pci_address_space,
+                             ram_size,
+                             0x100000000ULL - ram_size);    
+    memory_region_add_subregion(bridge_state->system_memory, ram_size,
+                                &bridge_state->pci_hole);
+
+
+    /* lpc bridge */
+    PCIDevice *lpc = pci_create_simple_multifunction(host_bus, PCI_DEVFN(1, 0),
+                                                     true, "xbox-lpc");
+    XBOX_LPCState *lpc_state = XBOX_LPC_DEVICE(lpc);
+    lpc_state->pic = pic;
+
+    pci_bus_irqs(host_bus, xbox_lpc_set_irq, xbox_lpc_map_irq, lpc_state,
+                 XBOX_NUM_INT_IRQS + XBOX_NUM_PIRQS);
+
+    qemu_irq *acpi_irq = qemu_allocate_irqs(xbox_lpc_set_acpi_irq,
+                                            lpc_state, 2);
+    xbox_pm_init(lpc, &lpc_state->pm, acpi_irq[0]);
+    //xbox_lpc_reset(&s->dev.qdev);
+
+
+    /* smbus */
+    PCIDevice *smbus = pci_create_simple_multifunction(host_bus, PCI_DEVFN(1, 1),
+                                                       true, "xbox-smbus");
+
+    XBOX_SMBState *smbus_state = XBOX_SMBUS_DEVICE(smbus);
+    amd756_smbus_init(&smbus->qdev, &smbus_state->smb, acpi_irq[1]);
+
+
+    /* AGP bus */
+    PCIDevice *agp = pci_create_simple(host_bus, PCI_DEVFN(30, 0), "xbox-agp");
+    //qdev = &br->dev.qdev;
+    //qdev_init_nofail(qdev);
+    PCIBus *agp_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(agp));
+
+
+
+    *out_host_bus = host_bus;
+    *out_isa_bus = lpc_state->isa_bus;
+    *out_smbus = smbus_state->smb.smbus;
+    *out_agp_bus = agp_bus;
+}
+
+
+#define XBOX_SMBUS_BASE_BAR 1
+
+static void xbox_smb_ioport_writeb(void *opaque, hwaddr addr,
+                                   uint64_t val, unsigned size)
+{
+    XBOX_SMBState *s = opaque;
+
+    uint64_t offset = addr - s->dev.io_regions[XBOX_SMBUS_BASE_BAR].addr;
+    amd756_smb_ioport_writeb(&s->smb, offset, val);
+}
+
+static uint64_t xbox_smb_ioport_readb(void *opaque, hwaddr addr,
+                                      unsigned size)
+{
+    XBOX_SMBState *s = opaque;
+
+    uint64_t offset = addr - s->dev.io_regions[XBOX_SMBUS_BASE_BAR].addr;
+    return amd756_smb_ioport_readb(&s->smb, offset);
+}
+
+static const MemoryRegionOps xbox_smbus_ops = {
+    .read = xbox_smb_ioport_readb,
+    .write = xbox_smb_ioport_writeb,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 1,
+    },
+};
+
+static int xbox_smbus_initfn(PCIDevice *dev)
+{
+    XBOX_SMBState *s = XBOX_SMBUS_DEVICE(dev);
+
+    memory_region_init_io(&s->smb_bar, OBJECT(dev), &xbox_smbus_ops,
+                          s, "xbox-smbus-bar", 32);
+    pci_register_bar(dev, XBOX_SMBUS_BASE_BAR, PCI_BASE_ADDRESS_SPACE_IO,
+                     &s->smb_bar);
+
+    return 0;
+}
+
+
+static void xbox_smbus_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+    k->init         = xbox_smbus_initfn;
+    k->vendor_id    = PCI_VENDOR_ID_NVIDIA;
+    k->device_id    = PCI_DEVICE_ID_NVIDIA_NFORCE_SMBUS;
+    k->revision     = 161;
+    k->class_id     = PCI_CLASS_SERIAL_SMBUS;
+
+    dc->desc        = "nForce PCI System Management";
+    dc->no_user     = 1;
+}
+
+static const TypeInfo xbox_smbus_info = {
+    .name = "xbox-smbus",
+    .parent = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(XBOX_SMBState),
+    .class_init = xbox_smbus_class_init,
+};
+
+
+
+static int xbox_lpc_initfn(PCIDevice *d)
+{
+    XBOX_LPCState *s = XBOX_LPC_DEVICE(d);
+    ISABus *isa_bus;
+
+    isa_bus = isa_bus_new(&d->qdev, get_system_io());
+    s->isa_bus = isa_bus;
+
+
+    /* southbridge chip contains and controls bootrom image.
+     * can't load it through loader.c because it overlaps with the bios...
+     * We really should just commandeer the entire top 16Mb.
+     */
+    QemuOpts *machine_opts = qemu_opts_find(qemu_find_opts("machine"), NULL);
+    if (machine_opts) {
+        const char *bootrom_file = qemu_opt_get(machine_opts, "bootrom");
+
+        int rc, fd = -1;
+        if (bootrom_file) {
+            char *filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bootrom_file);
+            assert(filename);
+
+            s->bootrom_size = get_image_size(filename);
+            if (s->bootrom_size != 512) {
+                fprintf(stderr, "MCPX bootrom should be 512 bytes, got %d\n",
+                        s->bootrom_size);
+                return -1;
+            }
+
+            fd = open(filename, O_RDONLY | O_BINARY);
+            assert(fd >= 0);
+            rc = read(fd, s->bootrom_data, s->bootrom_size);
+            assert(rc == s->bootrom_size);
+
+            close(fd);
+        }
+    }
+
+
+    return 0;
+}
+
+
+
+static void xbox_lpc_reset(DeviceState *dev)
+{
+    PCIDevice *d = PCI_DEVICE(dev);
+    XBOX_LPCState *s = XBOX_LPC_DEVICE(d);
+
+
+    if (s->bootrom_size) {
+        /* qemu's memory region shit is actually kinda broken -
+         * Trying to execute off a non-page-aligned memory region
+         * is fucked, so we can't just map in the bootrom.
+         *
+         * We need to be able to disable it at runtime, and
+         * it shouldn't be visible ontop of the bios mirrors. It'll have to
+         * be a hack.
+         *
+         * Be lazy for now and just write it ontop of the bios.
+         *
+         * (We do this here since loader.c loads roms into memory in a reset
+         * handler, and here we /should/ be handled after it.)
+         */
+
+        hwaddr bootrom_addr = (uint32_t)(-s->bootrom_size);
+        cpu_physical_memory_write_rom(bootrom_addr,
+                                      s->bootrom_data,
+                                      s->bootrom_size);
+     }
+
+}
+
+
+#if 0
+/* Xbox 1.1 uses a config register instead of a bar to set the pm base address */
+#define XBOX_LPC_PMBASE 0x84
+#define XBOX_LPC_PMBASE_ADDRESS_MASK 0xff00
+#define XBOX_LPC_PMBASE_DEFAULT 0x1
+
+static void xbox_lpc_pmbase_update(XBOX_LPCState *s)
+{
+    uint32_t pm_io_base = pci_get_long(s->dev.config + XBOX_LPC_PMBASE);
+    pm_io_base &= XBOX_LPC_PMBASE_ADDRESS_MASK;
+
+    xbox_pm_iospace_update(&s->pm, pm_io_base);
+}
+
+static void xbox_lpc_reset(DeviceState *dev)
+{
+    PCIDevice *d = PCI_DEVICE(dev);
+    XBOX_LPCState *s = XBOX_LPC_DEVICE(d);
+
+    pci_set_long(s->dev.config + XBOX_LPC_PMBASE, XBOX_LPC_PMBASE_DEFAULT);
+    xbox_lpc_pmbase_update(s);
+}
+
+static void xbox_lpc_config_write(PCIDevice *dev,
+                                    uint32_t addr, uint32_t val, int len)
+{
+    XBOX_LPCState *s = XBOX_LPC_DEVICE(dev);
+
+    pci_default_write_config(dev, addr, val, len);
+    if (ranges_overlap(addr, len, XBOX_LPC_PMBASE, 2)) {
+        xbox_lpc_pmbase_update(s);
+    }
+}
+
+static int xbox_lpc_post_load(void *opaque, int version_id)
+{
+    XBOX_LPCState *s = opaque;
+    xbox_lpc_pmbase_update(s);
+    return 0;
+}
+
+static const VMStateDescription vmstate_xbox_lpc = {
+    .name = "XBOX LPC",
+    .version_id = 1,
+    .post_load = xbox_lpc_post_load,
+};
+#endif
+
+static void xbox_lpc_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+    k->no_hotplug   = 1;
+    k->init         = xbox_lpc_initfn;
+    //k->config_write = xbox_lpc_config_write;
+    k->vendor_id    = PCI_VENDOR_ID_NVIDIA;
+    k->device_id    = PCI_DEVICE_ID_NVIDIA_NFORCE_LPC;
+    k->revision     = 212;
+    k->class_id     = PCI_CLASS_BRIDGE_ISA;
+
+    dc->desc        = "nForce LPC Bridge";
+    dc->no_user     = 1;
+    dc->reset       = xbox_lpc_reset;
+    //dc->vmsd        = &vmstate_xbox_lpc;
+}
+
+static const TypeInfo xbox_lpc_info = {
+    .name = "xbox-lpc",
+    .parent = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(XBOX_LPCState),
+    .class_init = xbox_lpc_class_init,
+};
+
+
+
+
+static int xbox_agp_initfn(PCIDevice *d)
+{
+    pci_set_word(d->config + PCI_PREF_MEMORY_BASE, PCI_PREF_RANGE_TYPE_32);
+    pci_set_word(d->config + PCI_PREF_MEMORY_LIMIT, PCI_PREF_RANGE_TYPE_32);
+    return pci_bridge_initfn(d, TYPE_PCI_BUS);
+}
+
+static void xbox_agp_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+    k->init         = xbox_agp_initfn;
+    k->exit         = pci_bridge_exitfn;
+    k->config_write = pci_bridge_write_config;
+    k->is_bridge    = 1;
+    k->vendor_id    = PCI_VENDOR_ID_NVIDIA;
+    k->device_id    = PCI_DEVICE_ID_NVIDIA_NFORCE_AGP;
+    k->revision     = 161;
+
+    dc->desc        = "nForce AGP to PCI Bridge";
+    dc->reset       = pci_bridge_reset;
+}
+
+static const TypeInfo xbox_agp_info = {
+    .name          = "xbox-agp",
+    .parent        = TYPE_PCI_BRIDGE,
+    .instance_size = sizeof(PCIBridge),
+    .class_init    = xbox_agp_class_init,
+};
+
+
+
+
+
+
+static int xbox_pci_initfn(PCIDevice *d)
+{
+    //XBOX_PCIState *s = DO_UPCAST(XBOX_PCIState, dev, dev);
+
+    return 0;
+}
+
+static void xbox_pci_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+    k->no_hotplug = 1;
+    k->init = xbox_pci_initfn;
+    //k->config_write = xbox_pci_write_config;
+    k->vendor_id = PCI_VENDOR_ID_NVIDIA;
+    k->device_id = PCI_DEVICE_ID_NVIDIA_XBOX_PCHB;
+    k->revision = 161;
+    k->class_id = PCI_CLASS_BRIDGE_HOST;
+
+    dc->desc = "Xbox PCI Host";
+    dc->no_user = 1;
+}
+
+static const TypeInfo xbox_pci_info = {
+    .name          = "xbox-pci",
+    .parent        = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(XBOX_PCIState),
+    .class_init    = xbox_pci_class_init,
+};
+
+
+
+#define CONFIG_ADDR 0xcf8
+#define CONFIG_DATA 0xcfc
+
+static int xbox_pcihost_initfn(SysBusDevice *dev)
+{
+    PCIHostState *s = PCI_HOST_BRIDGE(dev);
+
+    memory_region_init_io(&s->conf_mem, OBJECT(dev),
+                          &pci_host_conf_le_ops, s,
+                          "pci-conf-idx", 4);
+    sysbus_add_io(dev, CONFIG_ADDR, &s->conf_mem);
+    sysbus_init_ioports(&s->busdev, CONFIG_ADDR, 4);
+
+    memory_region_init_io(&s->data_mem, OBJECT(dev),
+                          &pci_host_data_le_ops, s,
+                          "pci-conf-data", 4);
+    sysbus_add_io(dev, CONFIG_DATA, &s->data_mem);
+    sysbus_init_ioports(&s->busdev, CONFIG_DATA, 4);
+
+    return 0;
+}
+
+
+static void xbox_pcihost_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
+
+    k->init = xbox_pcihost_initfn;
+    dc->no_user = 1;
+}
+
+static const TypeInfo xbox_pcihost_info = {
+    .name          = "xbox-pcihost",
+    .parent        = TYPE_PCI_HOST_BRIDGE,
+    .instance_size = sizeof(PCIHostState),
+    .class_init    = xbox_pcihost_class_init,
+};
+
+
+static void xboxpci_register_types(void)
+{
+    type_register(&xbox_pcihost_info);
+    type_register(&xbox_pci_info);
+    type_register(&xbox_agp_info);
+
+    type_register(&xbox_lpc_info);
+    type_register(&xbox_smbus_info);
+}
+
+type_init(xboxpci_register_types)
diff --git a/hw/xbox/xbox_pci.h b/hw/xbox/xbox_pci.h
new file mode 100644
index 0000000000..8cd24670c6
--- /dev/null
+++ b/hw/xbox/xbox_pci.h
@@ -0,0 +1,83 @@
+/*
+ * QEMU Xbox PCI buses implementation
+ *
+ * Copyright (c) 2012 espes
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#ifndef HW_XBOX_PCI_H
+#define HW_XBOX_PCI_H
+
+#include "hw/hw.h"
+#include "hw/isa/isa.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/pci_host.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/xbox/amd_smbus.h"
+#include "hw/acpi/acpi.h"
+#include "hw/xbox/acpi_xbox.h"
+
+
+typedef struct XBOX_PCIState {
+    PCIDevice dev;
+
+    MemoryRegion *ram_memory;
+    MemoryRegion *pci_address_space;
+    MemoryRegion *system_memory;
+    MemoryRegion pci_hole;
+} XBOX_PCIState;
+
+typedef struct XBOX_SMBState {
+    PCIDevice dev;
+
+    AMD756SMBus smb;
+    MemoryRegion smb_bar;
+} XBOX_SMBState;
+
+typedef struct XBOX_LPCState {
+    PCIDevice dev;
+
+    ISABus *isa_bus;
+    XBOX_PMRegs pm;
+    qemu_irq *pic;
+
+    int bootrom_size;
+    uint8_t bootrom_data[512];
+} XBOX_LPCState;
+
+#define XBOX_PCI_DEVICE(obj) \
+    OBJECT_CHECK(XBOX_PCIState, (obj), "xbox-pci")
+
+#define XBOX_SMBUS_DEVICE(obj) \
+    OBJECT_CHECK(XBOX_SMBState, (obj), "xbox-smbus")
+
+#define XBOX_LPC_DEVICE(obj) \
+    OBJECT_CHECK(XBOX_LPCState, (obj), "xbox-lpc")
+
+
+
+void xbox_pci_init(qemu_irq *pic,
+                   MemoryRegion *address_space_mem,
+                   MemoryRegion *address_space_io,
+                   MemoryRegion *pci_memory,
+                   MemoryRegion *ram_memory,
+                   PCIBus **out_host_bus,
+                   ISABus **out_isa_bus,
+                   i2c_bus **out_smbus,
+                   PCIBus **out_agp_bus);
+
+#endif
\ No newline at end of file
diff --git a/hw/xbox/xid.c b/hw/xbox/xid.c
new file mode 100644
index 0000000000..df58c2a314
--- /dev/null
+++ b/hw/xbox/xid.c
@@ -0,0 +1,445 @@
+/*
+ * QEMU USB XID Devices
+ *
+ * Copyright (c) 2013 espes
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/hw.h"
+#include "ui/console.h"
+#include "hw/usb.h"
+#include "hw/usb/desc.h"
+
+//#define DEBUG_XID
+#ifdef DEBUG_XID
+#define DPRINTF printf
+#else
+#define DPRINTF(...)
+#endif
+
+/*
+ * http://xbox-linux.cvs.sourceforge.net/viewvc/xbox-linux/kernel-2.6/drivers/usb/input/xpad.c
+ * http://euc.jp/periphs/xbox-controller.en.html
+ * http://euc.jp/periphs/xbox-pad-desc.txt
+ */
+
+#define USB_CLASS_XID  0x58
+#define USB_DT_XID     0x42
+
+
+#define HID_GET_REPORT       0x01
+#define HID_SET_REPORT       0x09
+#define XID_GET_CAPABILITIES 0x01
+
+
+
+typedef struct XIDDesc {
+    uint8_t bLength;
+    uint8_t bDescriptorType;
+    uint16_t bcdXid;
+    uint8_t bType;
+    uint8_t bSubType;
+    uint8_t bMaxInputReportSize;
+    uint8_t bMaxOutputReportSize;
+    uint16_t wAlternateProductIds[4];
+} QEMU_PACKED XIDDesc;
+
+typedef struct XIDGamepadReport {
+    uint8_t bReportId;
+    uint8_t bLength;
+    uint16_t wButtons;
+    uint8_t bAnalogButtons[8];
+    int16_t sThumbLX;
+    int16_t sThumbLY;
+    int16_t sThumbRX;
+    int16_t sThumbRY;
+} QEMU_PACKED XIDGamepadReport;
+
+typedef struct XIDGamepadOutputReport {
+    uint8_t report_id; //FIXME: is this correct?
+    uint8_t length;
+    uint16_t left_actuator_strength;
+    uint16_t right_actuator_strength;
+} QEMU_PACKED XIDGamepadOutputReport;
+
+
+typedef struct USBXIDState {
+    USBDevice dev;
+    USBEndpoint *intr;
+
+    const XIDDesc *xid_desc;
+
+    QEMUPutKbdEntry *kbd_entry;
+    bool in_dirty;
+    XIDGamepadReport in_state;
+    XIDGamepadOutputReport out_state;
+} USBXIDState;
+
+static const USBDescIface desc_iface_xbox_gamepad = {
+    .bInterfaceNumber              = 0,
+    .bNumEndpoints                 = 2,
+    .bInterfaceClass               = USB_CLASS_XID,
+    .bInterfaceSubClass            = 0x42,
+    .bInterfaceProtocol            = 0x00,
+    .eps = (USBDescEndpoint[]) {
+        {
+            .bEndpointAddress      = USB_DIR_IN | 0x02,
+            .bmAttributes          = USB_ENDPOINT_XFER_INT,
+            .wMaxPacketSize        = 0x20,
+            .bInterval             = 4,
+        },
+        {
+            .bEndpointAddress      = USB_DIR_OUT | 0x02,
+            .bmAttributes          = USB_ENDPOINT_XFER_INT,
+            .wMaxPacketSize        = 0x20,
+            .bInterval             = 4,
+        },
+    },
+};
+
+static const USBDescDevice desc_device_xbox_gamepad = {
+    .bcdUSB                        = 0x0110,
+    .bMaxPacketSize0               = 0x40,
+    .bNumConfigurations            = 1,
+    .confs = (USBDescConfig[]) {
+        {
+            .bNumInterfaces        = 1,
+            .bConfigurationValue   = 1,
+            .bmAttributes          = 0x80,
+            .bMaxPower             = 50,
+            .nif = 1,
+            .ifs = &desc_iface_xbox_gamepad,
+        },
+    },
+};
+
+static const USBDesc desc_xbox_gamepad = {
+    .id = {
+        .idVendor          = 0x045e,
+        .idProduct         = 0x0202,
+        .bcdDevice         = 0x0100,
+    },
+    .full = &desc_device_xbox_gamepad,
+};
+
+static const XIDDesc desc_xid_xbox_gamepad = {
+    .bLength = 0x10,
+    .bDescriptorType = USB_DT_XID,
+    .bcdXid = 1,
+    .bType = 1,
+    .bSubType = 1,
+    .bMaxInputReportSize = 0x20,
+    .bMaxOutputReportSize = 0x6,
+    .wAlternateProductIds = {-1, -1, -1, -1},
+};
+
+
+#define GAMEPAD_A                0
+#define GAMEPAD_B                1
+#define GAMEPAD_X                2
+#define GAMEPAD_Y                3
+#define GAMEPAD_BLACK            4
+#define GAMEPAD_WHITE            5
+#define GAMEPAD_LEFT_TRIGGER     6
+#define GAMEPAD_RIGHT_TRIGGER    7
+
+#define GAMEPAD_DPAD_UP          8
+#define GAMEPAD_DPAD_DOWN        9
+#define GAMEPAD_DPAD_LEFT        10
+#define GAMEPAD_DPAD_RIGHT       11
+#define GAMEPAD_START            12
+#define GAMEPAD_BACK             13
+#define GAMEPAD_LEFT_THUMB       14
+#define GAMEPAD_RIGHT_THUMB      15
+
+#define GAMEPAD_LEFT_THUMB_UP    16
+#define GAMEPAD_LEFT_THUMB_DOWN  17
+#define GAMEPAD_LEFT_THUMB_LEFT  18
+#define GAMEPAD_LEFT_THUMB_RIGHT 19
+
+#define GAMEPAD_RIGHT_THUMB_UP    20
+#define GAMEPAD_RIGHT_THUMB_DOWN  21
+#define GAMEPAD_RIGHT_THUMB_LEFT  22
+#define GAMEPAD_RIGHT_THUMB_RIGHT 23
+
+static const int gamepad_mapping[] = {
+    [0 ... Q_KEY_CODE_MAX] = -1,
+
+    [Q_KEY_CODE_UP]    = GAMEPAD_DPAD_UP,
+    [Q_KEY_CODE_KP_8]  = GAMEPAD_DPAD_UP,
+    [Q_KEY_CODE_DOWN]  = GAMEPAD_DPAD_DOWN,
+    [Q_KEY_CODE_KP_2]  = GAMEPAD_DPAD_DOWN,
+    [Q_KEY_CODE_LEFT]  = GAMEPAD_DPAD_LEFT,
+    [Q_KEY_CODE_KP_4]  = GAMEPAD_DPAD_LEFT,
+    [Q_KEY_CODE_RIGHT] = GAMEPAD_DPAD_RIGHT,
+    [Q_KEY_CODE_KP_6]  = GAMEPAD_DPAD_RIGHT,
+
+    [Q_KEY_CODE_RET]   = GAMEPAD_START,
+    [Q_KEY_CODE_BACKSPACE] = GAMEPAD_BACK,
+
+    [Q_KEY_CODE_W]     = GAMEPAD_X,
+    [Q_KEY_CODE_E]     = GAMEPAD_Y,
+    [Q_KEY_CODE_S]     = GAMEPAD_A,
+    [Q_KEY_CODE_D]     = GAMEPAD_B,
+    [Q_KEY_CODE_X]     = GAMEPAD_WHITE,
+    [Q_KEY_CODE_C]     = GAMEPAD_BLACK,
+
+    [Q_KEY_CODE_Q]     = GAMEPAD_LEFT_TRIGGER,
+    [Q_KEY_CODE_R]     = GAMEPAD_RIGHT_TRIGGER,
+
+    [Q_KEY_CODE_V]     = GAMEPAD_LEFT_THUMB,
+    [Q_KEY_CODE_T]     = GAMEPAD_LEFT_THUMB_UP,
+    [Q_KEY_CODE_F]     = GAMEPAD_LEFT_THUMB_LEFT,
+    [Q_KEY_CODE_G]     = GAMEPAD_LEFT_THUMB_DOWN,
+    [Q_KEY_CODE_H]     = GAMEPAD_LEFT_THUMB_RIGHT,
+
+    [Q_KEY_CODE_M]     = GAMEPAD_RIGHT_THUMB,
+    [Q_KEY_CODE_I]     = GAMEPAD_RIGHT_THUMB_UP,
+    [Q_KEY_CODE_J]     = GAMEPAD_RIGHT_THUMB_LEFT,
+    [Q_KEY_CODE_K]     = GAMEPAD_RIGHT_THUMB_DOWN,
+    [Q_KEY_CODE_L]     = GAMEPAD_RIGHT_THUMB_RIGHT,
+};
+
+static void xbox_gamepad_keyboard_event(void *opaque, int keycode)
+{
+    USBXIDState *s = opaque;
+
+    bool up = keycode & 0x80;
+    QKeyCode code = index_from_keycode(keycode & 0x7f);
+    if (code >= Q_KEY_CODE_MAX) return;
+
+    int button = gamepad_mapping[code];
+
+    DPRINTF("xid keyboard_event %x - %d %d %d\n", keycode, code, button, up);
+
+    uint16_t mask;
+    switch (button) {
+    case GAMEPAD_A ... GAMEPAD_RIGHT_TRIGGER:
+        s->in_state.bAnalogButtons[button] = up?0:0xff;
+        break;
+    case GAMEPAD_DPAD_UP ... GAMEPAD_RIGHT_THUMB:
+        mask = (1 << (button-GAMEPAD_DPAD_UP));
+        s->in_state.wButtons &= ~mask;
+        if (!up) s->in_state.wButtons |= mask;
+        break;
+
+    case GAMEPAD_LEFT_THUMB_UP:
+        s->in_state.sThumbLY = up?0:32767;
+        break;
+    case GAMEPAD_LEFT_THUMB_DOWN:
+        s->in_state.sThumbLY = up?0:-32768;
+        break;
+    case GAMEPAD_LEFT_THUMB_LEFT:
+        s->in_state.sThumbLX = up?0:-32768;
+        break;
+    case GAMEPAD_LEFT_THUMB_RIGHT:
+        s->in_state.sThumbLX = up?0:32767;
+        break;
+
+    case GAMEPAD_RIGHT_THUMB_UP:
+        s->in_state.sThumbRY = up?0:32767;
+        break;
+    case GAMEPAD_RIGHT_THUMB_DOWN:
+        s->in_state.sThumbRY = up?0:-32768;
+        break;
+    case GAMEPAD_RIGHT_THUMB_LEFT:
+        s->in_state.sThumbRX = up?0:-32768;
+        break;
+    case GAMEPAD_RIGHT_THUMB_RIGHT:
+        s->in_state.sThumbRX = up?0:32767;
+        break;
+    default:
+        break;
+    }
+
+    s->in_dirty = true;
+}
+
+
+static void usb_xid_handle_reset(USBDevice *dev)
+{
+    DPRINTF("xid reset\n");
+}
+
+static void usb_xid_handle_control(USBDevice *dev, USBPacket *p,
+               int request, int value, int index, int length, uint8_t *data)
+{
+    USBXIDState *s = DO_UPCAST(USBXIDState, dev, dev);
+
+    DPRINTF("xid handle_control 0x%x 0x%x\n", request, value);
+
+    int ret = usb_desc_handle_control(dev, p, request, value, index, length, data);
+    if (ret >= 0) {
+        DPRINTF("xid handled by usb_desc_handle_control: %d\n", ret);
+        return;
+    }
+
+    switch (request) {
+    /* HID requests */
+    case ClassInterfaceRequest | HID_GET_REPORT:
+        DPRINTF("xid GET_REPORT 0x%x\n", value);
+        if (value == 0x100) { /* input */
+            assert(s->in_state.bLength <= length);
+//          s->in_state.bReportId++; /* FIXME: I'm not sure if bReportId is just a counter */
+            memcpy(data, &s->in_state, s->in_state.bLength);
+            p->actual_length = s->in_state.bLength;
+        } else {
+            assert(false);
+        }
+        break;
+    case ClassInterfaceOutRequest | HID_SET_REPORT:
+        DPRINTF("xid SET_REPORT 0x%x\n", value);
+        if (value == 0x200) { /* output */
+            /* Read length, then the entire packet */
+            memcpy(&s->out_state, data, sizeof(s->out_state));
+            assert(s->out_state.length == sizeof(s->out_state));
+            assert(s->out_state.length <= length);
+            //FIXME: Check actuator endianess
+            DPRINTF("Set rumble power to 0x%x, 0x%x\n",
+                    s->out_state.left_actuator_strength,
+                    s->out_state.right_actuator_strength);
+            p->actual_length = s->out_state.length;
+        } else {
+            assert(false);
+        }
+        break;
+    /* XID requests */
+    case VendorInterfaceRequest | USB_REQ_GET_DESCRIPTOR:
+        DPRINTF("xid GET_DESCRIPTOR 0x%x\n", value);
+        if (value == 0x4200) {
+            assert(s->xid_desc->bLength <= length);
+            memcpy(data, s->xid_desc, s->xid_desc->bLength);
+            p->actual_length = s->xid_desc->bLength;
+        } else {
+            assert(false);
+        }
+        break;
+    case VendorInterfaceRequest | XID_GET_CAPABILITIES:
+        DPRINTF("xid XID_GET_CAPABILITIES 0x%x\n", value);
+        /* FIXME: ! */
+        p->status = USB_RET_STALL;
+        //assert(false);
+        break;
+    case ((USB_DIR_IN|USB_TYPE_CLASS|USB_RECIP_DEVICE)<<8)
+             | USB_REQ_GET_DESCRIPTOR:
+        /* FIXME: ! */
+        DPRINTF("xid unknown xpad request 0x%x: value = 0x%x\n",
+                request, value);
+        memset(data, 0x00, length);
+        //FIXME: Intended for the hub: usbd_get_hub_descriptor, UT_READ_CLASS?!
+        p->status = USB_RET_STALL;
+        //assert(false);
+        break;
+    case ((USB_DIR_OUT|USB_TYPE_STANDARD|USB_RECIP_ENDPOINT)<<8)
+             | USB_REQ_CLEAR_FEATURE:
+        /* FIXME: ! */
+        DPRINTF("xid unknown xpad request 0x%x: value = 0x%x\n",
+                request, value);
+        memset(data, 0x00, length);
+        p->status = USB_RET_STALL;
+        break;
+    default:
+        DPRINTF("xid USB stalled on request 0x%x value 0x%x\n", request, value);
+        p->status = USB_RET_STALL;
+        assert(false);
+        break;
+    }
+}
+
+static void usb_xid_handle_data(USBDevice *dev, USBPacket *p)
+{
+    USBXIDState *s = DO_UPCAST(USBXIDState, dev, dev);
+
+    DPRINTF("xid handle_data 0x%x %d 0x%zx\n", p->pid, p->ep->nr, p->iov.size);
+
+    switch (p->pid) {
+    case USB_TOKEN_IN:
+        if (p->ep->nr == 2) {
+            if (s->in_dirty) {
+                usb_packet_copy(p, &s->in_state, s->in_state.bLength);
+                s->in_dirty = false;
+            } else {
+                p->status = USB_RET_NAK;
+            }
+        } else {
+            assert(false);
+        }
+        break;
+    case USB_TOKEN_OUT:
+        p->status = USB_RET_STALL;
+        break;
+    default:
+        p->status = USB_RET_STALL;
+        assert(false);
+        break;
+    }
+}
+
+static void usb_xid_handle_destroy(USBDevice *dev)
+{
+    DPRINTF("xid handle_destroy\n");
+}
+
+static void usb_xid_class_initfn(ObjectClass *klass, void *data)
+{
+    USBDeviceClass *uc = USB_DEVICE_CLASS(klass);
+
+    uc->handle_reset   = usb_xid_handle_reset;
+    uc->handle_control = usb_xid_handle_control;
+    uc->handle_data    = usb_xid_handle_data;
+    uc->handle_destroy = usb_xid_handle_destroy;
+    uc->handle_attach  = usb_desc_attach;
+}
+
+static int usb_xbox_gamepad_initfn(USBDevice *dev)
+{
+    USBXIDState *s = DO_UPCAST(USBXIDState, dev, dev);
+    usb_desc_init(dev);
+    s->intr = usb_ep_get(dev, USB_TOKEN_IN, 2);
+
+    s->in_state.bLength = sizeof(s->in_state);
+    s->out_state.length = sizeof(s->out_state);
+    s->kbd_entry = qemu_add_kbd_event_handler(xbox_gamepad_keyboard_event, s);
+    s->xid_desc = &desc_xid_xbox_gamepad;
+
+    return 0;
+}
+
+static void usb_xbox_gamepad_class_initfn(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    USBDeviceClass *uc = USB_DEVICE_CLASS(klass);
+
+    usb_xid_class_initfn(klass, data);
+    uc->init           = usb_xbox_gamepad_initfn;
+    uc->product_desc   = "Microsoft Xbox Controller";
+    uc->usb_desc       = &desc_xbox_gamepad;
+    //dc->vmsd = &vmstate_usb_kbd;
+    set_bit(DEVICE_CATEGORY_INPUT, dc->categories);
+}
+
+static const TypeInfo usb_xbox_gamepad_info = {
+    .name          = "usb-xbox-gamepad",
+    .parent        = TYPE_USB_DEVICE,
+    .instance_size = sizeof(USBXIDState),
+    .class_init    = usb_xbox_gamepad_class_initfn,
+};
+
+static void usb_xid_register_types(void)
+{
+    type_register_static(&usb_xbox_gamepad_info);
+}
+
+type_init(usb_xid_register_types)