Fix and refactor NV2A code

This patch does the following: - Fixes up things for Qemu 2.x compat - Factors out the high-level NV2A blocks into separate files - Updates g-lru-cache for latest glib compat (github.com/chergert/glrucache@c10af24) - Changes texture hashing algorithm from FNV to xxH v0.6.5
2018-06-26 14:40:01 -07:00 · 2018-06-26 14:40:01 -07:00 · 584dbda1d6
parent cff0d97e35
commit 584dbda1d6
33 changed files with 6247 additions and 4397 deletions
--- a/hw/xbox/g-lru-cache.c
+++ b/hw/xbox/g-lru-cache.c
@ -1,338 +0,0 @@
-/* g-lru-cache.c
- *
- * Copyright (C) 2009 - Christian Hergert
- *
- * This is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- * 
- * This is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- * 
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-/* 
- * Ideally, you want to use fast_get. This is because we are using a
- * GStaticRWLock which is indeed slower than a mutex if you have lots of writer
- * acquisitions. This doesn't make it a true LRU, though, as the oldest
- * retrieval from strorage is the first item evicted.
- */
-
-#include "g-lru-cache.h"
-
-// #define DEBUG
-
-#define LRU_CACHE_PRIVATE(object)          \
-    (G_TYPE_INSTANCE_GET_PRIVATE((object), \
-    G_TYPE_LRU_CACHE,                      \
-    GLruCachePrivate))
-
-struct _GLruCachePrivate
-{
-    GStaticRWLock   rw_lock;
-    guint           max_size;
-    gboolean        fast_get;
-    
-    GHashTable     *hash_table;
-    GEqualFunc      key_equal_func;
-    GCopyFunc       key_copy_func;
-    GList          *newest;
-    GList          *oldest;
-    
-    GLookupFunc     retrieve_func;
-    
-    gpointer        user_data;
-    GDestroyNotify  user_destroy_func;
-};
-
-G_DEFINE_TYPE (GLruCache, g_lru_cache, G_TYPE_OBJECT);
-
-static void
-g_lru_cache_finalize (GObject *object)
-{
-    GLruCachePrivate *priv = LRU_CACHE_PRIVATE (object);
-    
-    if (priv->user_data && priv->user_destroy_func)
-        priv->user_destroy_func (priv->user_data);
-    
-    priv->user_data = NULL;
-    priv->user_destroy_func = NULL;
-    
-    g_hash_table_destroy (priv->hash_table);
-    priv->hash_table = NULL;
-    
-    g_list_free (priv->newest);
-    priv->newest = NULL;
-    priv->oldest = NULL;
-    
-    G_OBJECT_CLASS (g_lru_cache_parent_class)->finalize (object);
-}
-
-static void
-g_lru_cache_class_init (GLruCacheClass *klass)
-{
-    GObjectClass *object_class = G_OBJECT_CLASS (klass);
-    
-    object_class->finalize = g_lru_cache_finalize;
-
-    g_type_class_add_private (object_class, sizeof (GLruCachePrivate));
-}
-
-static void
-g_lru_cache_init (GLruCache *self)
-{
-    self->priv = LRU_CACHE_PRIVATE (self);
-    
-    self->priv->max_size = 1024;
-    self->priv->fast_get = FALSE;
-    g_static_rw_lock_init (&self->priv->rw_lock);
-}
-
-static void
-g_lru_cache_evict_n_oldest_locked (GLruCache *self, gint n)
-{
-    GList *victim;
-    gint   i;
-    
-    for (i = 0; i < n; i++)
-    {
-        victim = self->priv->oldest;
-        
-        if (victim == NULL)
-            return;
-        
-        if (victim->prev)
-            victim->prev->next = NULL;
-        
-        self->priv->oldest = victim->prev;
-        g_hash_table_remove (self->priv->hash_table, victim->data);
-        
-        if (self->priv->newest == victim)
-            self->priv->newest = NULL;
-        
-        g_list_free1 (victim); /* victim->data is owned by hashtable */
-    }
-    
-#ifdef DEBUG
-    g_assert (g_hash_table_size (self->priv->hash_table) == g_list_length (self->priv->newest));
-#endif
-}
-
-GLruCache*
-g_lru_cache_new (GHashFunc      hash_func,
-                 GEqualFunc     key_equal_func,
-                 GCopyFunc      key_copy_func,
-                 GLookupFunc    retrieve_func,
-                 GDestroyNotify key_destroy_func,
-                 GDestroyNotify value_destroy_func,
-                 gpointer       user_data,
-                 GDestroyNotify user_destroy_func)
-{
-    GLruCache *self = g_object_new (G_TYPE_LRU_CACHE, NULL);
-    
-    self->priv->hash_table = g_hash_table_new_full (hash_func,
-                                                    key_equal_func,
-                                                    key_destroy_func,
-                                                    value_destroy_func);
-    
-    self->priv->key_equal_func = key_equal_func;
-    self->priv->key_copy_func = key_copy_func;
-    self->priv->retrieve_func = retrieve_func;
-    self->priv->user_data = user_data;
-    self->priv->user_destroy_func = user_destroy_func;
-    
-    return self;
-}
-
-void
-g_lru_cache_set_max_size (GLruCache *self, guint max_size)
-{
-    g_return_if_fail (G_IS_LRU_CACHE (self));
-    
-    guint old_max_size = self->priv->max_size;
-    
-    g_static_rw_lock_writer_lock (&(self->priv->rw_lock));
-    
-    self->priv->max_size = max_size;
-    
-    if (old_max_size > max_size)
-        g_lru_cache_evict_n_oldest_locked (self, old_max_size - max_size);
-    
-    g_static_rw_lock_writer_unlock (&(self->priv->rw_lock));
-}
-
-guint
-g_lru_cache_get_max_size (GLruCache *self)
-{
-    g_return_val_if_fail (G_IS_LRU_CACHE (self), -1);
-    return self->priv->max_size;
-}
-
-guint
-g_lru_cache_get_size (GLruCache *self)
-{
-    g_return_val_if_fail (G_IS_LRU_CACHE (self), -1);
-    return g_hash_table_size (self->priv->hash_table);
-}
-
-gpointer
-g_lru_cache_get (GLruCache *self, gpointer key)
-{
-    g_return_val_if_fail (G_IS_LRU_CACHE (self), NULL);
-    
-    gpointer value;
-    
-    g_static_rw_lock_reader_lock (&(self->priv->rw_lock));
-    
-    value = g_hash_table_lookup (self->priv->hash_table, key);
-    
-#ifdef DEBUG
-    if (value)
-        g_debug ("Cache Hit!");
-    else
-        g_debug ("Cache miss");
-#endif
-    
-    g_static_rw_lock_reader_unlock (&(self->priv->rw_lock));
-    
-    if (!value)
-    {
-        g_static_rw_lock_writer_lock (&(self->priv->rw_lock));
-        
-        if (!g_hash_table_lookup (self->priv->hash_table, key))
-        {
-            if (g_hash_table_size (self->priv->hash_table) >= self->priv->max_size)
-#ifdef DEBUG
-            {
-                g_debug ("We are at capacity, must evict oldest");
-#endif
-                g_lru_cache_evict_n_oldest_locked (self, 1);
-#ifdef DEBUG
-            }
-            
-            g_debug ("Retrieving value from external resource");
-#endif
-
-            value = self->priv->retrieve_func (key, self->priv->user_data);
-            
-            if (self->priv->key_copy_func)
-                g_hash_table_insert (self->priv->hash_table,
-                    self->priv->key_copy_func (key, self->priv->user_data),
-                    value);
-            else
-                g_hash_table_insert (self->priv->hash_table, key, value);
-            
-            self->priv->newest = g_list_prepend (self->priv->newest, key);
-            
-            if (self->priv->oldest == NULL)
-                self->priv->oldest = self->priv->newest;
-        }
-#ifdef DEBUG
-        else g_debug ("Lost storage race with another thread");
-#endif
-        
-        g_static_rw_lock_writer_unlock (&(self->priv->rw_lock));
-    }
-
-    /* fast_get means that we do not reposition the item to the head
-     * of the list. it essentially makes the lru, a lru from storage,
-     * not lru to user.
-     */
-
-    else if (!self->priv->fast_get &&
-             !self->priv->key_equal_func (key, self->priv->newest->data))
-    {
-#ifdef DEBUG
-        g_debug ("Making item most recent");
-#endif
-
-        g_static_rw_lock_writer_lock (&(self->priv->rw_lock));
-
-        GList *list = self->priv->newest;
-        GList *tmp;
-        GEqualFunc equal = self->priv->key_equal_func;
-
-        for (tmp = list; tmp; tmp = tmp->next)
-        {
-            if (equal (key, tmp->data))
-            {
-                GList *tmp1 = g_list_remove_link (list, tmp);
-                self->priv->newest = g_list_prepend (tmp1, tmp);
-                break;
-            }
-        }
-
-        g_static_rw_lock_writer_unlock (&(self->priv->rw_lock));
-    }
-    
-    return value;
-}
-
-void
-g_lru_cache_evict (GLruCache *self, gpointer key)
-{
-    g_return_if_fail (G_IS_LRU_CACHE (self));
-    
-    GEqualFunc  equal = self->priv->key_equal_func;
-    GList      *list  = NULL;
-    
-    g_static_rw_lock_writer_lock (&(self->priv->rw_lock));
-    
-    if (equal (key, self->priv->oldest))
-    {
-        g_lru_cache_evict_n_oldest_locked (self, 1);
-    }
-    else
-    {        
-        for (list = self->priv->newest; list; list = list->next)
-        {
-            /* key, list->data is owned by hashtable */
-            if (equal (key, list->data))
-            {
-                self->priv->newest = g_list_remove_link (self->priv->newest, list);
-                g_list_free (list);
-                break;
-            }
-        }
-        g_hash_table_remove (self->priv->hash_table, key);
-    }
-    
-    g_static_rw_lock_writer_unlock (&(self->priv->rw_lock));
-}
-
-void
-g_lru_cache_clear (GLruCache *self)
-{
-    g_return_if_fail (G_IS_LRU_CACHE (self));
-    
-    g_static_rw_lock_writer_lock (&(self->priv->rw_lock));
-    
-    g_hash_table_remove_all (self->priv->hash_table);
-    g_list_free (self->priv->newest);
-    
-    self->priv->oldest = NULL;
-    self->priv->newest = NULL;
-    
-    g_static_rw_lock_writer_unlock (&(self->priv->rw_lock));
-}
-
-void
-g_lru_cache_set_fast_get (GLruCache *self, gboolean fast_get)
-{
-    g_return_if_fail (G_IS_LRU_CACHE (self));
-    self->priv->fast_get = fast_get;
-}
-
-gboolean
-g_lru_cache_get_fast_get (GLruCache *self)
-{
-    g_return_val_if_fail (G_IS_LRU_CACHE (self), FALSE);
-    return self->priv->fast_get;
-}
-
--- a/hw/xbox/nv2a/Makefile.objs
+++ b/hw/xbox/nv2a/Makefile.objs
@ -0,0 +1,32 @@
+obj-y += g-lru-cache.o
+obj-y += swizzle.o
+
+obj-y += nv2a.o
+obj-y += nv2a_debug.o
+obj-y += nv2a_shaders.o
+
+###
+# These are just #included into nv2a.c for build time savings
+#
+# obj-y += nv2a_pbus.o
+# obj-y += nv2a_pcrtc.o
+# obj-y += nv2a_pfb.o
+# obj-y += nv2a_pfifo.o
+# obj-y += nv2a_pgraph.o
+# obj-y += nv2a_pmc.o
+# obj-y += nv2a_pramdac.o
+# obj-y += nv2a_prmcio.o
+# obj-y += nv2a_prmvio.o
+# obj-y += nv2a_ptimer.o
+# obj-y += nv2a_pvideo.o
+# obj-y += nv2a_user.o
+# obj-y += nv2a_stubs.o
+###
+
+obj-y += nv2a_psh.o
+obj-y += nv2a_vsh.o
+
+obj-y += gl/
+
+obj-y += xxhash.o
+xxhash.o-cflags := -O3 -DXXH_FORCE_MEMORY_ACCESS=2
--- a/hw/xbox/nv2a/g-lru-cache.c
+++ b/hw/xbox/nv2a/g-lru-cache.c
@ -0,0 +1,372 @@
+/* g-lru-cache.c
+ *
+ * Copyright (C) 2009 - Christian Hergert
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ * 
+ * This is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ * 
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/* 
+ * Ideally, you want to use fast_get. This is because we are using a
+ * GStaticRWLock which is indeed slower than a mutex if you have lots of writer
+ * acquisitions. This doesn't make it a true LRU, though, as the oldest
+ * retrieval from strorage is the first item evicted.
+ */
+
+#include "g-lru-cache.h"
+
+#ifndef DEBUG
+#define DEBUG 0
+#endif
+
+#define LRU_CACHE_PRIVATE(object)          \
+    (G_TYPE_INSTANCE_GET_PRIVATE((object), \
+    G_TYPE_LRU_CACHE,                      \
+    GLruCachePrivate))
+
+struct _GLruCachePrivate
+{
+	GRWLock         rw_lock;
+	guint           max_size;
+	gboolean        fast_get;
+	
+	GHashTable     *hash_table;
+	GEqualFunc      key_equal_func;
+	GCopyFunc       key_copy_func;
+	GList          *newest;
+	GList          *oldest;
+	
+	GLookupFunc     retrieve_func;
+	
+	gpointer        user_data;
+	GDestroyNotify  user_destroy_func;
+};
+
+G_DEFINE_TYPE (GLruCache, g_lru_cache, G_TYPE_OBJECT);
+
+static void
+g_lru_cache_finalize (GObject *object)
+{
+	GLruCachePrivate *priv = LRU_CACHE_PRIVATE (object);
+	
+	if (priv->user_data && priv->user_destroy_func)
+		priv->user_destroy_func (priv->user_data);
+	
+	priv->user_data = NULL;
+	priv->user_destroy_func = NULL;
+	
+	g_hash_table_destroy (priv->hash_table);
+	priv->hash_table = NULL;
+	
+	g_list_free (priv->newest);
+	priv->newest = NULL;
+	priv->oldest = NULL;
+	
+	G_OBJECT_CLASS (g_lru_cache_parent_class)->finalize (object);
+}
+
+static void
+g_lru_cache_class_init (GLruCacheClass *klass)
+{
+	GObjectClass *object_class = G_OBJECT_CLASS (klass);
+	
+	object_class->finalize = g_lru_cache_finalize;
+
+	g_type_class_add_private (object_class, sizeof (GLruCachePrivate));
+}
+
+static void
+g_lru_cache_init (GLruCache *self)
+{
+	self->priv = LRU_CACHE_PRIVATE (self);
+	
+	self->priv->max_size = 1024;
+	self->priv->fast_get = FALSE;
+	g_rw_lock_init (&self->priv->rw_lock);
+}
+
+static void
+g_lru_cache_evict_n_oldest_locked (GLruCache *self, gint n)
+{
+	GList *victim;
+	gint   i;
+	
+	for (i = 0; i < n; i++)
+	{
+		victim = self->priv->oldest;
+		
+		if (victim == NULL)
+			return;
+		
+		if (victim->prev)
+			victim->prev->next = NULL;
+		
+		self->priv->oldest = victim->prev;
+		g_hash_table_remove (self->priv->hash_table, victim->data);
+		
+		if (self->priv->newest == victim)
+			self->priv->newest = NULL;
+		
+		g_list_free1 (victim); /* victim->data is owned by hashtable */
+	}
+	
+#if DEBUG
+	g_assert (g_hash_table_size (self->priv->hash_table) == g_list_length (self->priv->newest));
+#endif
+}
+
+GLruCache*
+g_lru_cache_new (GHashFunc      key_hash_func,
+                 GEqualFunc     key_equal_func,
+                 GLookupFunc    retrieve_func,
+                 gpointer       user_data,
+                 GDestroyNotify user_destroy_func)
+{
+	return g_lru_cache_new_full (0,
+	                             NULL,
+	                             NULL,
+	                             0,
+	                             NULL,
+	                             NULL,
+	                             key_hash_func,
+	                             key_equal_func,
+	                             retrieve_func,
+	                             user_data,
+	                             user_destroy_func);
+}
+
+GLruCache*
+g_lru_cache_new_full (GType          key_type,
+                      GCopyFunc      key_copy_func,
+                      GDestroyNotify key_destroy_func,
+                      GType          value_type,
+                      GCopyFunc      value_copy_func,
+                      GDestroyNotify value_destroy_func,
+                      GHashFunc      key_hash_func,
+                      GEqualFunc     key_equal_func,
+                      GLookupFunc    retrieve_func,
+                      gpointer       user_data,
+                      GDestroyNotify user_destroy_func)
+{
+	GLruCache *self = g_object_new (G_TYPE_LRU_CACHE, NULL);
+	
+	self->priv->hash_table = g_hash_table_new_full (key_hash_func,
+	                                                key_equal_func,
+	                                                key_destroy_func,
+	                                                value_destroy_func);
+	
+	self->priv->key_equal_func = key_equal_func;
+	self->priv->key_copy_func = key_copy_func;
+	self->priv->retrieve_func = retrieve_func;
+	self->priv->user_data = user_data;
+	self->priv->user_destroy_func = user_destroy_func;
+	
+	return self;
+}
+
+void
+g_lru_cache_set_max_size (GLruCache *self, guint max_size)
+{
+	g_return_if_fail (G_IS_LRU_CACHE (self));
+	
+	guint old_max_size = self->priv->max_size;
+	
+	g_rw_lock_writer_lock (&(self->priv->rw_lock));
+	
+	self->priv->max_size = max_size;
+	
+	if (old_max_size > max_size)
+		g_lru_cache_evict_n_oldest_locked (self, old_max_size - max_size);
+	
+	g_rw_lock_writer_unlock (&(self->priv->rw_lock));
+}
+
+guint
+g_lru_cache_get_max_size (GLruCache *self)
+{
+	g_return_val_if_fail (G_IS_LRU_CACHE (self), -1);
+	return self->priv->max_size;
+}
+
+guint
+g_lru_cache_get_size (GLruCache *self)
+{
+	g_return_val_if_fail (G_IS_LRU_CACHE (self), -1);
+	return g_hash_table_size (self->priv->hash_table);
+}
+
+gpointer
+g_lru_cache_get (GLruCache *self, gpointer key, GError **error)
+{
+	g_return_val_if_fail (G_IS_LRU_CACHE (self), NULL);
+	
+	gpointer value;
+	GError *retrieve_error = NULL;
+	
+	g_rw_lock_reader_lock (&(self->priv->rw_lock));
+	
+	value = g_hash_table_lookup (self->priv->hash_table, key);
+	
+#if DEBUG
+	if (value)
+		g_debug ("Cache Hit!");
+	else
+		g_debug ("Cache miss");
+#endif
+	
+	g_rw_lock_reader_unlock (&(self->priv->rw_lock));
+	
+	if (!value)
+	{
+		g_rw_lock_writer_lock (&(self->priv->rw_lock));
+		
+		if (!g_hash_table_lookup (self->priv->hash_table, key))
+		{
+			if (g_hash_table_size (self->priv->hash_table) >= self->priv->max_size)
+#if DEBUG
+			{
+				g_debug ("We are at capacity, must evict oldest");
+#endif
+				g_lru_cache_evict_n_oldest_locked (self, 1);
+#if DEBUG
+			}
+			
+			g_debug ("Retrieving value from external resource");
+#endif
+
+			value = self->priv->retrieve_func (key,
+			                                   self->priv->user_data,
+			                                   &retrieve_error);
+
+			if (G_UNLIKELY (retrieve_error != NULL))
+			{
+				g_propagate_error (error, retrieve_error);
+				return value; /* likely 'NULL', but we should be transparent */
+			}
+			
+			if (self->priv->key_copy_func)
+				g_hash_table_insert (self->priv->hash_table,
+					self->priv->key_copy_func (key, self->priv->user_data),
+					value);
+			else
+				g_hash_table_insert (self->priv->hash_table, key, value);
+			
+			self->priv->newest = g_list_prepend (self->priv->newest, key);
+			
+			if (self->priv->oldest == NULL)
+				self->priv->oldest = self->priv->newest;
+		}
+#if DEBUG
+		else g_debug ("Lost storage race with another thread");
+#endif
+		
+		g_rw_lock_writer_unlock (&(self->priv->rw_lock));
+	}
+
+	/* fast_get means that we do not reposition the item to the head
+	 * of the list. it essentially makes the lru, a lru from storage,
+	 * not lru to user.
+	 */
+
+	else if (!self->priv->fast_get &&
+	         !self->priv->key_equal_func (key, self->priv->newest->data))
+	{
+#if DEBUG
+		g_debug ("Making item most recent");
+#endif
+
+		g_rw_lock_writer_lock (&(self->priv->rw_lock));
+
+		GList *list = self->priv->newest;
+		GList *tmp;
+		GEqualFunc equal = self->priv->key_equal_func;
+
+		for (tmp = list; tmp; tmp = tmp->next)
+		{
+			if (equal (key, tmp->data))
+			{
+				GList *tmp1 = g_list_remove_link (list, tmp);
+				self->priv->newest = g_list_prepend (tmp1, tmp);
+				break;
+			}
+		}
+
+		g_rw_lock_writer_unlock (&(self->priv->rw_lock));
+	}
+	
+	return value;
+}
+
+void
+g_lru_cache_evict (GLruCache *self, gpointer key)
+{
+	g_return_if_fail (G_IS_LRU_CACHE (self));
+	
+	GEqualFunc  equal = self->priv->key_equal_func;
+	GList      *list  = NULL;
+	
+	g_rw_lock_writer_lock (&(self->priv->rw_lock));
+	
+	if (equal (key, self->priv->oldest))
+	{
+		g_lru_cache_evict_n_oldest_locked (self, 1);
+	}
+	else
+	{
+		g_hash_table_remove (self->priv->hash_table, key);
+		
+		for (list = self->priv->newest; list; list = list->next)
+		{
+			if (equal (key, list->data))
+			{
+				self->priv->newest = g_list_remove_link (self->priv->newest, list);
+				g_list_free (list);
+				break;
+			}
+		}
+	}
+	
+	g_rw_lock_writer_unlock (&(self->priv->rw_lock));
+}
+
+void
+g_lru_cache_clear (GLruCache *self)
+{
+	g_return_if_fail (G_IS_LRU_CACHE (self));
+	
+	g_rw_lock_writer_lock (&(self->priv->rw_lock));
+	
+	g_hash_table_remove_all (self->priv->hash_table);
+	g_list_free (self->priv->newest);
+	
+	self->priv->oldest = NULL;
+	self->priv->newest = NULL;
+	
+	g_rw_lock_writer_unlock (&(self->priv->rw_lock));
+}
+
+void
+g_lru_cache_set_fast_get (GLruCache *self, gboolean fast_get)
+{
+	g_return_if_fail (G_IS_LRU_CACHE (self));
+	self->priv->fast_get = fast_get;
+}
+
+gboolean
+g_lru_cache_get_fast_get (GLruCache *self)
+{
+	g_return_val_if_fail (G_IS_LRU_CACHE (self), FALSE);
+	return self->priv->fast_get;
+}
+
--- a/hw/xbox/nv2a/g-lru-cache.h
+++ b/hw/xbox/nv2a/g-lru-cache.h
@ -20,46 +20,59 @@
 #ifndef __G_LRU_CACHE_H__
 #define __G_LRU_CACHE_H__

+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #include <glib.h>
 #include <glib-object.h>

 G_BEGIN_DECLS

-#define G_TYPE_LRU_CACHE        (g_lru_cache_get_type ())
-#define G_LRU_CACHE(obj)        (G_TYPE_CHECK_INSTANCE_CAST ((obj), G_TYPE_LRU_CACHE, GLruCache))
-#define G_LRU_CACHE_CONST(obj)      (G_TYPE_CHECK_INSTANCE_CAST ((obj), G_TYPE_LRU_CACHE, GLruCache const))
-#define G_LRU_CACHE_CLASS(klass)    (G_TYPE_CHECK_CLASS_CAST ((klass), G_TYPE_LRU_CACHE, GLruCacheClass))
-#define G_IS_LRU_CACHE(obj)     (G_TYPE_CHECK_INSTANCE_TYPE ((obj), G_TYPE_LRU_CACHE))
-#define G_IS_LRU_CACHE_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), G_TYPE_LRU_CACHE))
-#define G_LRU_CACHE_GET_CLASS(obj)  (G_TYPE_INSTANCE_GET_CLASS ((obj), G_TYPE_LRU_CACHE, GLruCacheClass))
+#define G_TYPE_LRU_CACHE		(g_lru_cache_get_type ())
+#define G_LRU_CACHE(obj)		(G_TYPE_CHECK_INSTANCE_CAST ((obj), G_TYPE_LRU_CACHE, GLruCache))
+#define G_LRU_CACHE_CONST(obj)		(G_TYPE_CHECK_INSTANCE_CAST ((obj), G_TYPE_LRU_CACHE, GLruCache const))
+#define G_LRU_CACHE_CLASS(klass)	(G_TYPE_CHECK_CLASS_CAST ((klass), G_TYPE_LRU_CACHE, GLruCacheClass))
+#define G_IS_LRU_CACHE(obj)		(G_TYPE_CHECK_INSTANCE_TYPE ((obj), G_TYPE_LRU_CACHE))
+#define G_IS_LRU_CACHE_CLASS(klass)	(G_TYPE_CHECK_CLASS_TYPE ((klass), G_TYPE_LRU_CACHE))
+#define G_LRU_CACHE_GET_CLASS(obj)	(G_TYPE_INSTANCE_GET_CLASS ((obj), G_TYPE_LRU_CACHE, GLruCacheClass))
 #define G_LOOKUP_FUNC(func)             ((GLookupFunc)func)

-typedef struct _GLruCache       GLruCache;
-typedef struct _GLruCacheClass  GLruCacheClass;
-typedef struct _GLruCachePrivate    GLruCachePrivate;
+typedef struct _GLruCache		GLruCache;
+typedef struct _GLruCacheClass		GLruCacheClass;
+typedef struct _GLruCachePrivate	GLruCachePrivate;

-typedef gpointer (*GLookupFunc) (gpointer key, gpointer user_data);
+typedef gpointer (*GLookupFunc) (gpointer key, gpointer user_data, GError **error);

 struct _GLruCache
 {
-    GObject parent;
-    
-    GLruCachePrivate *priv;
+	GObject parent;
+	
+	GLruCachePrivate *priv;
 };

 struct _GLruCacheClass
 {
-    GObjectClass parent_class;
+	GObjectClass parent_class;
 };

 GType      g_lru_cache_get_type     (void) G_GNUC_CONST;

-GLruCache* g_lru_cache_new          (GHashFunc      hash_func,
+GLruCache* g_lru_cache_new          (GHashFunc      key_hash_func,
                                     GEqualFunc     key_equal_func,
-                                     GCopyFunc      key_copy_func,
                                     GLookupFunc    retrieve_func,
+                                     gpointer       user_data,
+                                     GDestroyNotify user_destroy_func);
+
+GLruCache* g_lru_cache_new_full     (GType          key_type,
+                                     GCopyFunc      key_copy_func,
                                     GDestroyNotify key_destroy_func,
+                                     GType          value_type,
+                                     GCopyFunc      value_copy_func,
                                     GDestroyNotify value_destroy_func,
+                                     GHashFunc      key_hash_func,
+                                     GEqualFunc     key_equal_func,
+                                     GLookupFunc    retrieve_func,
                                     gpointer       user_data,
                                     GDestroyNotify user_destroy_func);

@ -68,7 +81,7 @@ guint      g_lru_cache_get_max_size (GLruCache *self);

 guint      g_lru_cache_get_size     (GLruCache *self);

-gpointer   g_lru_cache_get          (GLruCache *self, gpointer key);
+gpointer   g_lru_cache_get          (GLruCache *self, gpointer key, GError **error);
 void       g_lru_cache_evict        (GLruCache *self, gpointer key);
 void       g_lru_cache_clear        (GLruCache *self);

@ -77,4 +90,8 @@ void       g_lru_cache_set_fast_get (GLruCache *self, gboolean fast_get);

 G_END_DECLS

+#ifdef __cplusplus
+}
+#endif
+
 #endif /* __G_LRU_CACHE_H__ */
--- a/hw/xbox/nv2a/nv2a.c
+++ b/hw/xbox/nv2a/nv2a.c
@ -0,0 +1,568 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018 Matt Borgerson
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/error-report.h"
+#include <assert.h>
+#include "nv2a.h"
+#include "hw/display/vga_regs.h"
+
+#ifdef __WINNT__
+// HACK: mingw-w64 doesn't provide ffs, for now we just shove it here
+// TODO: decide on a better location
+int ffs(register int valu)
+{
+    register int bit;
+
+    if (valu == 0)
+        return 0;
+
+    for (bit = 1; !(valu & 1); bit++)
+        valu >>= 1;
+
+    return bit;
+}
+#endif
+
+DMAObject nv_dma_load(NV2AState *d, hwaddr dma_obj_address);
+void *nv_dma_map(NV2AState *d, hwaddr dma_obj_address, hwaddr *len);
+void nv2a_init(PCIBus *bus, int devfn, MemoryRegion *ram);
+
+void update_irq(NV2AState *d)
+{
+    /* PFIFO */
+    if (d->pfifo.pending_interrupts & d->pfifo.enabled_interrupts) {
+        d->pmc.pending_interrupts |= NV_PMC_INTR_0_PFIFO;
+    } else {
+        d->pmc.pending_interrupts &= ~NV_PMC_INTR_0_PFIFO;
+    }
+
+    /* PCRTC */
+    if (d->pcrtc.pending_interrupts & d->pcrtc.enabled_interrupts) {
+        d->pmc.pending_interrupts |= NV_PMC_INTR_0_PCRTC;
+    } else {
+        d->pmc.pending_interrupts &= ~NV_PMC_INTR_0_PCRTC;
+    }
+
+    /* PGRAPH */
+    if (d->pgraph.pending_interrupts & d->pgraph.enabled_interrupts) {
+        d->pmc.pending_interrupts |= NV_PMC_INTR_0_PGRAPH;
+    } else {
+        d->pmc.pending_interrupts &= ~NV_PMC_INTR_0_PGRAPH;
+    }
+
+    if (d->pmc.pending_interrupts && d->pmc.enabled_interrupts) {
+        NV2A_DPRINTF("raise irq\n");
+        pci_irq_assert(&d->dev);
+    } else {
+        pci_irq_deassert(&d->dev);
+    }
+}
+
+DMAObject nv_dma_load(NV2AState *d, hwaddr dma_obj_address)
+{
+    assert(dma_obj_address < memory_region_size(&d->ramin));
+
+    uint32_t *dma_obj = (uint32_t*)(d->ramin_ptr + dma_obj_address);
+    uint32_t flags = ldl_le_p(dma_obj);
+    uint32_t limit = ldl_le_p(dma_obj + 1);
+    uint32_t frame = ldl_le_p(dma_obj + 2);
+
+    return (DMAObject){
+        .dma_class = GET_MASK(flags, NV_DMA_CLASS),
+        .dma_target = GET_MASK(flags, NV_DMA_TARGET),
+        .address = (frame & NV_DMA_ADDRESS) | GET_MASK(flags, NV_DMA_ADJUST),
+        .limit = limit,
+    };
+}
+
+void *nv_dma_map(NV2AState *d, hwaddr dma_obj_address, hwaddr *len)
+{
+    assert(dma_obj_address < memory_region_size(&d->ramin));
+
+    DMAObject dma = nv_dma_load(d, dma_obj_address);
+
+    /* TODO: Handle targets and classes properly */
+    NV2A_DPRINTF("dma_map %x, %x, %" HWADDR_PRIx " %" HWADDR_PRIx "\n",
+                 dma.dma_class, dma.dma_target, dma.address, dma.limit);
+
+    dma.address &= 0x07FFFFFF;
+
+    // assert(dma.address + dma.limit < memory_region_size(d->vram));
+    *len = dma.limit;
+    return d->vram_ptr + dma.address;
+}
+
+#define STUB 0
+
+#if STUB
+void *pfifo_puller_thread(void *opaque) { return NULL; }
+void pgraph_init(NV2AState *d){}
+static void pfifo_run_pusher(NV2AState *d){}
+void pgraph_destroy(PGRAPHState *pg){}
+static uint8_t cliptobyte(int x)
+{
+    return (uint8_t)((x < 0) ? 0 : ((x > 255) ? 255 : x));
+}
+static void convert_yuy2_to_rgb(const uint8_t *line, unsigned int ix,
+                                uint8_t *r, uint8_t *g, uint8_t* b) {
+    int c, d, e;
+    c = (int)line[ix * 2] - 16;
+    if (ix % 2) {
+        d = (int)line[ix * 2 - 1] - 128;
+        e = (int)line[ix * 2 + 1] - 128;
+    } else {
+        d = (int)line[ix * 2 + 1] - 128;
+        e = (int)line[ix * 2 + 3] - 128;
+    }
+    *r = cliptobyte((298 * c + 409 * e + 128) >> 8);
+    *g = cliptobyte((298 * c - 100 * d - 208 * e + 128) >> 8);
+    *b = cliptobyte((298 * c + 516 * d + 128) >> 8);
+}
+#endif
+
+#define DEFINE_PROTO(prefix) \
+    uint64_t prefix ## _read(void *opaque, hwaddr addr, unsigned int size); \
+    void prefix ## _write(void *opaque, hwaddr addr, uint64_t val, unsigned int size);
+
+DEFINE_PROTO(pmc)
+DEFINE_PROTO(pbus)
+DEFINE_PROTO(pfifo)
+DEFINE_PROTO(prma)
+DEFINE_PROTO(pvideo)
+DEFINE_PROTO(ptimer)
+DEFINE_PROTO(pcounter)
+DEFINE_PROTO(pvpe)
+DEFINE_PROTO(ptv)
+DEFINE_PROTO(prmfb)
+DEFINE_PROTO(prmvio)
+DEFINE_PROTO(pfb)
+DEFINE_PROTO(pstraps)
+DEFINE_PROTO(pgraph)
+DEFINE_PROTO(pcrtc)
+DEFINE_PROTO(prmcio)
+DEFINE_PROTO(pramdac)
+DEFINE_PROTO(prmdio)
+DEFINE_PROTO(pramin)
+DEFINE_PROTO(user)
+
+#undef DEFINE_PROTO
+
+#include "nv2a_pbus.c"
+#include "nv2a_pcrtc.c"
+#include "nv2a_pfb.c"
+#if !STUB
+#include "nv2a_pgraph.c"
+#include "nv2a_pfifo.c"
+#endif
+#include "nv2a_pmc.c"
+#include "nv2a_pramdac.c"
+#include "nv2a_prmcio.c"
+#include "nv2a_prmvio.c"
+#include "nv2a_ptimer.c"
+#include "nv2a_pvideo.c"
+#include "nv2a_stubs.c"
+#include "nv2a_user.c"
+
+#if STUB
+void pgraph_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size)
+{
+    reg_log_write(NV_PGRAPH, addr, val);
+}
+
+uint64_t pgraph_read(void *opaque, 
+                                   hwaddr addr, unsigned int size)
+{
+    reg_log_read(NV_PGRAPH, addr, 0);
+    return 0;
+}
+
+void pfifo_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size)
+{
+    reg_log_write(NV_PFIFO, addr, val);
+}
+
+uint64_t pfifo_read(void *opaque, 
+                                   hwaddr addr, unsigned int size)
+{
+    reg_log_read(NV_PFIFO, addr, 0);
+    return 0;
+}
+#endif
+
+const struct NV2ABlockInfo blocktable[] = {
+
+#define ENTRY(NAME, OFFSET, SIZE, RDFUNC, WRFUNC) \
+    [ NV_ ## NAME ]  = { \
+        .name = #NAME, .offset = OFFSET, .size = SIZE, \
+        .ops = { .read = RDFUNC, .write = WRFUNC }, \
+    }, \
+
+    ENTRY(PMC,      0x000000, 0x001000, pmc_read,      pmc_write)
+    ENTRY(PBUS,     0x001000, 0x001000, pbus_read,     pbus_write)
+    ENTRY(PFIFO,    0x002000, 0x002000, pfifo_read,    pfifo_write)
+    ENTRY(PRMA,     0x007000, 0x001000, prma_read,     prma_write)
+    ENTRY(PVIDEO,   0x008000, 0x001000, pvideo_read,   pvideo_write)
+    ENTRY(PTIMER,   0x009000, 0x001000, ptimer_read,   ptimer_write)
+    ENTRY(PCOUNTER, 0x00a000, 0x001000, pcounter_read, pcounter_write)
+    ENTRY(PVPE,     0x00b000, 0x001000, pvpe_read,     pvpe_write)
+    ENTRY(PTV,      0x00d000, 0x001000, ptv_read,      ptv_write)
+    ENTRY(PRMFB,    0x0a0000, 0x020000, prmfb_read,    prmfb_write)
+    ENTRY(PRMVIO,   0x0c0000, 0x001000, prmvio_read,   prmvio_write)
+    ENTRY(PFB,      0x100000, 0x001000, pfb_read,      pfb_write)
+    ENTRY(PSTRAPS,  0x101000, 0x001000, pstraps_read,  pstraps_write)
+    ENTRY(PGRAPH,   0x400000, 0x002000, pgraph_read,   pgraph_write)
+    ENTRY(PCRTC,    0x600000, 0x001000, pcrtc_read,    pcrtc_write)
+    ENTRY(PRMCIO,   0x601000, 0x001000, prmcio_read,   prmcio_write)
+    ENTRY(PRAMDAC,  0x680000, 0x001000, pramdac_read,  pramdac_write)
+    ENTRY(PRMDIO,   0x681000, 0x001000, prmdio_read,   prmdio_write)
+    // ENTRY(PRAMIN,   0x700000, 0x100000, pramin_read,   pramin_write)
+    ENTRY(USER,     0x800000, 0x800000, user_read,     user_write)
+#undef ENTRY
+};
+
+const int blocktable_len = ARRAY_SIZE(blocktable);
+
+// static const char* nv2a_reg_names[] = {};
+
+void reg_log_read(int block, hwaddr addr, uint64_t val) {
+    if (blocktable[block].name) {
+        // hwaddr naddr = blocktable[block].offset + addr;
+        // if (naddr < ARRAY_SIZE(nv2a_reg_names) && nv2a_reg_names[naddr]) {
+        //     NV2A_DPRINTF("%s: read [%s] -> 0x%" PRIx64 "\n",
+        //             blocktable[block].name, nv2a_reg_names[naddr], val);
+        // } else {
+            NV2A_DPRINTF("%s: read [%" HWADDR_PRIx "] -> 0x%" PRIx64 "\n",
+                    blocktable[block].name, addr, val);
+        // }
+    } else {
+        NV2A_DPRINTF("(%d?): read [%" HWADDR_PRIx "] -> 0x%" PRIx64 "\n",
+                block, addr, val);
+    }
+}
+
+void reg_log_write(int block, hwaddr addr, uint64_t val) {
+    if (blocktable[block].name) {
+        // hwaddr naddr = blocktable[block].offset + addr;
+        // if (naddr < ARRAY_SIZE(nv2a_reg_names) && nv2a_reg_names[naddr]) {
+        //     NV2A_DPRINTF("%s: [%s] = 0x%" PRIx64 "\n",
+        //             blocktable[block].name, nv2a_reg_names[naddr], val);
+        // } else {
+            NV2A_DPRINTF("%s: [%" HWADDR_PRIx "] = 0x%" PRIx64 "\n",
+                    blocktable[block].name, addr, val);
+        // }
+    } else {
+        NV2A_DPRINTF("(%d?): [%" HWADDR_PRIx "] = 0x%" PRIx64 "\n",
+                block, addr, val);
+    }
+}
+
+#if 0
+/* FIXME: Probably totally wrong */
+static inline unsigned int rgb_to_pixel8(unsigned int r, unsigned int g,
+                                         unsigned int b)
+{
+    return ((r >> 5) << 5) | ((g >> 5) << 2) | (b >> 6);
+}
+static inline unsigned int rgb_to_pixel16(unsigned int r, unsigned int g,
+                                          unsigned int b)
+{
+    return ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3);
+}
+static inline unsigned int rgb_to_pixel32(unsigned int r, unsigned int g,
+                                          unsigned int b)
+{
+    return (r << 16) | (g << 8) | b;
+}
+
+static void nv2a_overlay_draw_line(VGACommonState *vga, uint8_t *line, int y)
+{
+    NV2A_DPRINTF("nv2a_overlay_draw_line\n");
+
+    NV2AState *d = container_of(vga, NV2AState, vga);
+    DisplaySurface *surface = qemu_console_surface(d->vga.con);
+
+    int surf_bpp = surface_bytes_per_pixel(surface);
+    int surf_width = surface_width(surface);
+
+    if (!(d->pvideo.regs[NV_PVIDEO_BUFFER] & NV_PVIDEO_BUFFER_0_USE)) return;
+
+    hwaddr base = d->pvideo.regs[NV_PVIDEO_BASE];
+    hwaddr limit = d->pvideo.regs[NV_PVIDEO_LIMIT];
+    hwaddr offset = d->pvideo.regs[NV_PVIDEO_OFFSET];
+
+    int in_width = GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN],
+                            NV_PVIDEO_SIZE_IN_WIDTH);
+    int in_height = GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN],
+                             NV_PVIDEO_SIZE_IN_HEIGHT);
+    int in_s = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN],
+                        NV_PVIDEO_POINT_IN_S);
+    int in_t = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN],
+                        NV_PVIDEO_POINT_IN_T);
+    int in_pitch = GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT],
+                            NV_PVIDEO_FORMAT_PITCH);
+    int in_color = GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT],
+                            NV_PVIDEO_FORMAT_COLOR);
+
+    // TODO: support other color formats
+    assert(in_color == NV_PVIDEO_FORMAT_COLOR_LE_CR8YB8CB8YA8);
+
+    int out_width = GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT],
+                             NV_PVIDEO_SIZE_OUT_WIDTH);
+    int out_height = GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT],
+                             NV_PVIDEO_SIZE_OUT_HEIGHT);
+    int out_x = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT],
+                         NV_PVIDEO_POINT_OUT_X);
+    int out_y = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT],
+                         NV_PVIDEO_POINT_OUT_Y);
+
+
+    if (y < out_y || y >= out_y + out_height) return;
+
+    // TODO: scaling, color keys
+
+    int in_y = y - out_y;
+    if (in_y >= in_height) return;
+
+    assert(offset + in_pitch * (in_y + 1) <= limit);
+    uint8_t *in_line = d->vram_ptr + base + offset + in_pitch * in_y;
+
+    int x;
+    for (x=0; x<out_width; x++) {
+        int ox = out_x + x;
+        if (ox >= surf_width) break;
+        int ix = in_s + x;
+        if (ix >= in_width) break;
+
+        uint8_t r,g,b;
+        convert_yuy2_to_rgb(in_line, ix, &r, &g, &b);
+
+        // unsigned int pixel = vga->rgb_to_pixel(r, g, b);
+        switch (surf_bpp) {
+        case 1:
+            ((uint8_t*)line)[ox] = (uint8_t)rgb_to_pixel8(r,g,b);
+            break;
+        case 2:
+            ((uint16_t*)line)[ox] = (uint16_t)rgb_to_pixel16(r,g,b);
+            break;
+        case 4:
+            ((uint32_t*)line)[ox] = (uint32_t)rgb_to_pixel32(r,g,b);
+            break;
+        default:
+            assert(false);
+            break;
+        }
+    }
+}
+#endif
+
+static int nv2a_get_bpp(VGACommonState *s)
+{
+    if ((s->cr[0x28] & 3) == 3) {
+        return 32;
+    }
+    return (s->cr[0x28] & 3) * 8;
+}
+
+static void nv2a_get_offsets(VGACommonState *s,
+                             uint32_t *pline_offset,
+                             uint32_t *pstart_addr,
+                             uint32_t *pline_compare)
+{
+    NV2AState *d = container_of(s, NV2AState, vga);
+    uint32_t start_addr, line_offset, line_compare;
+
+    line_offset = s->cr[0x13]
+        | ((s->cr[0x19] & 0xe0) << 3)
+        | ((s->cr[0x25] & 0x20) << 6);
+    line_offset <<= 3;
+    *pline_offset = line_offset;
+
+    start_addr = d->pcrtc.start / 4;
+    *pstart_addr = start_addr;
+
+    line_compare = s->cr[VGA_CRTC_LINE_COMPARE] |
+        ((s->cr[VGA_CRTC_OVERFLOW] & 0x10) << 4) |
+        ((s->cr[VGA_CRTC_MAX_SCAN] & 0x40) << 3);
+    *pline_compare = line_compare;
+}
+
+static void nv2a_vga_gfx_update(void *opaque)
+{
+    VGACommonState *vga = opaque;
+    vga->hw_ops->gfx_update(vga);
+
+    NV2AState *d = container_of(vga, NV2AState, vga);
+    d->pcrtc.pending_interrupts |= NV_PCRTC_INTR_0_VBLANK;
+    update_irq(d);
+}
+
+static void nv2a_init_memory(NV2AState *d, MemoryRegion *ram)
+{
+    /* xbox is UMA - vram *is* ram */
+    d->vram = ram;
+
+     /* PCI exposed vram */
+    memory_region_init_alias(&d->vram_pci, OBJECT(d), "nv2a-vram-pci", d->vram,
+                             0, memory_region_size(d->vram));
+    pci_register_bar(&d->dev, 1, PCI_BASE_ADDRESS_MEM_PREFETCH, &d->vram_pci);
+
+
+    /* RAMIN - should be in vram somewhere, but not quite sure where atm */
+    memory_region_init_ram(&d->ramin, OBJECT(d), "nv2a-ramin", 0x100000, &error_fatal);
+    /* memory_region_init_alias(&d->ramin, "nv2a-ramin", &d->vram,
+                         memory_region_size(d->vram) - 0x100000,
+                         0x100000); */
+
+    memory_region_add_subregion(&d->mmio, 0x700000, &d->ramin);
+
+
+    d->vram_ptr = memory_region_get_ram_ptr(d->vram);
+    d->ramin_ptr = memory_region_get_ram_ptr(&d->ramin);
+
+    memory_region_set_log(d->vram, true, DIRTY_MEMORY_NV2A);
+    memory_region_set_dirty(d->vram, 0, memory_region_size(d->vram));
+
+    /* hacky. swap out vga's vram */
+    memory_region_destroy(&d->vga.vram);
+    // memory_region_unref(&d->vga.vram); // FIXME: Is ths right?
+    memory_region_init_alias(&d->vga.vram, OBJECT(d), "vga.vram",
+                             d->vram, 0, memory_region_size(d->vram));
+    d->vga.vram_ptr = memory_region_get_ram_ptr(&d->vga.vram);
+    vga_dirty_log_start(&d->vga);
+
+
+    pgraph_init(d);
+
+    /* fire up puller */
+    qemu_thread_create(&d->pfifo.puller_thread, "nv2a.puller_thread",
+                       pfifo_puller_thread,
+                       d, QEMU_THREAD_JOINABLE);
+}
+
+static void nv2a_realize(PCIDevice *dev, Error **errp)
+{
+    int i;
+    NV2AState *d;
+
+    d = NV2A_DEVICE(dev);
+
+    dev->config[PCI_INTERRUPT_PIN] = 0x01;
+
+    d->pcrtc.start = 0;
+
+    d->pramdac.core_clock_coeff = 0x00011c01; /* 189MHz...? */
+    d->pramdac.core_clock_freq = 189000000;
+    d->pramdac.memory_clock_coeff = 0;
+    d->pramdac.video_clock_coeff = 0x0003C20D; /* 25182Khz...? */
+
+    /* legacy VGA shit */
+    VGACommonState *vga = &d->vga;
+    vga_common_reset(vga);
+
+    vga->vram_size_mb = 64;
+    /* seems to start in color mode */
+    vga->msr = VGA_MIS_COLOR;
+
+    vga_common_init(vga, OBJECT(dev), false); // FIXME: true or false? idk
+    vga->get_bpp = nv2a_get_bpp;
+    vga->get_offsets = nv2a_get_offsets;
+    // vga->overlay_draw_line = nv2a_overlay_draw_line;
+
+    d->hw_ops = *vga->hw_ops;
+    d->hw_ops.gfx_update = nv2a_vga_gfx_update;
+    vga->con = graphic_console_init(DEVICE(dev), 0, &d->hw_ops, vga);
+
+    /* mmio */
+    memory_region_init(&d->mmio, OBJECT(dev), "nv2a-mmio", 0x1000000);
+    pci_register_bar(&d->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
+
+    for (i=0; i<ARRAY_SIZE(blocktable); i++) {
+        if (!blocktable[i].name) continue;
+        memory_region_init_io(&d->block_mmio[i], OBJECT(dev),
+                              &blocktable[i].ops, d,
+                              blocktable[i].name, blocktable[i].size);
+        memory_region_add_subregion(&d->mmio, blocktable[i].offset,
+                                    &d->block_mmio[i]);
+    }
+
+    /* init fifo cache1 */
+    qemu_mutex_init(&d->pfifo.cache1.cache_lock);
+    qemu_cond_init(&d->pfifo.cache1.cache_cond);
+    QSIMPLEQ_INIT(&d->pfifo.cache1.cache);
+    QSIMPLEQ_INIT(&d->pfifo.cache1.working_cache);
+}
+
+static void nv2a_exitfn(PCIDevice *dev)
+{
+    NV2AState *d;
+    d = NV2A_DEVICE(dev);
+
+    d->exiting = true;
+    qemu_cond_signal(&d->pfifo.cache1.cache_cond);
+    qemu_thread_join(&d->pfifo.puller_thread);
+
+    qemu_mutex_destroy(&d->pfifo.cache1.cache_lock);
+    qemu_cond_destroy(&d->pfifo.cache1.cache_cond);
+
+    pgraph_destroy(&d->pgraph);
+}
+
+static void nv2a_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+    k->vendor_id = PCI_VENDOR_ID_NVIDIA;
+    k->device_id = PCI_DEVICE_ID_NVIDIA_GEFORCE_NV2A;
+    k->revision  = 161;
+    k->class_id  = PCI_CLASS_DISPLAY_3D;
+    k->realize   = nv2a_realize;
+    k->exit      = nv2a_exitfn;
+
+    dc->desc = "GeForce NV2A Integrated Graphics";
+}
+
+static const TypeInfo nv2a_info = {
+    .name          = "nv2a",
+    .parent        = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(NV2AState),
+    .class_init    = nv2a_class_init,
+    .interfaces          = (InterfaceInfo[]) {
+        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
+        { },
+    },
+};
+
+static void nv2a_register(void)
+{
+    type_register_static(&nv2a_info);
+}
+type_init(nv2a_register);
+
+void nv2a_init(PCIBus *bus, int devfn, MemoryRegion *ram)
+{
+    PCIDevice *dev = pci_create_simple(bus, devfn, "nv2a");
+    NV2AState *d = NV2A_DEVICE(dev);
+    nv2a_init_memory(d, ram);
+}
--- a/hw/xbox/nv2a/nv2a.h
+++ b/hw/xbox/nv2a/nv2a.h
@ -0,0 +1,446 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018 Matt Borgerson
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_NV2A_H
+#define HW_NV2A_H
+
+#include "hw/hw.h"
+#include "hw/i386/pc.h"
+#include "ui/console.h"
+#include "hw/pci/pci.h"
+#include "ui/console.h"
+#include "hw/display/vga.h"
+#include "hw/display/vga_int.h"
+#include "qemu/thread.h"
+#include "qapi/qmp/qstring.h"
+#include "cpu.h"
+
+#include "g-lru-cache.h"
+#include "swizzle.h"
+#include "nv2a_shaders.h"
+#include "nv2a_debug.h"
+#include "nv2a_int.h"
+
+#include "gl/gloffscreen.h"
+#include "gl/glextensions.h"
+
+#define USE_TEXTURE_CACHE
+
+#define GET_MASK(v, mask) (((v) & (mask)) >> (ffs(mask)-1))
+
+#define SET_MASK(v, mask, val) ({                                    \
+        const unsigned int __val = (val);                             \
+        const unsigned int __mask = (mask);                          \
+        (v) &= ~(__mask);                                            \
+        (v) |= ((__val) << (ffs(__mask)-1)) & (__mask);              \
+    })
+
+#define CASE_4(v, step)                                              \
+    case (v):                                                        \
+    case (v)+(step):                                                 \
+    case (v)+(step)*2:                                               \
+    case (v)+(step)*3
+
+
+#define NV2A_DEVICE(obj) \
+    OBJECT_CHECK(NV2AState, (obj), "nv2a")
+
+void reg_log_read(int block, hwaddr addr, uint64_t val);
+void reg_log_write(int block, hwaddr addr, uint64_t val);
+
+enum FifoMode {
+    FIFO_PIO = 0,
+    FIFO_DMA = 1,
+};
+
+enum FIFOEngine {
+    ENGINE_SOFTWARE = 0,
+    ENGINE_GRAPHICS = 1,
+    ENGINE_DVD = 2,
+};
+
+typedef struct DMAObject {
+    unsigned int dma_class;
+    unsigned int dma_target;
+    hwaddr address;
+    hwaddr limit;
+} DMAObject;
+
+typedef struct VertexAttribute {
+    bool dma_select;
+    hwaddr offset;
+
+    /* inline arrays are packed in order?
+     * Need to pass the offset to converted attributes */
+    unsigned int inline_array_offset;
+
+    float inline_value[4];
+
+    unsigned int format;
+    unsigned int size; /* size of the data type */
+    unsigned int count; /* number of components */
+    uint32_t stride;
+
+    bool needs_conversion;
+    uint8_t *converted_buffer;
+    unsigned int converted_elements;
+    unsigned int converted_size;
+    unsigned int converted_count;
+
+    float *inline_buffer;
+
+    GLint gl_count;
+    GLenum gl_type;
+    GLboolean gl_normalize;
+
+    GLuint gl_converted_buffer;
+    GLuint gl_inline_buffer;
+} VertexAttribute;
+
+typedef struct Surface {
+    bool draw_dirty;
+    bool buffer_dirty;
+    bool write_enabled_cache;
+    unsigned int pitch;
+
+    hwaddr offset;
+} Surface;
+
+typedef struct SurfaceShape {
+    unsigned int z_format;
+    unsigned int color_format;
+    unsigned int zeta_format;
+    unsigned int log_width, log_height;
+    unsigned int clip_x, clip_y;
+    unsigned int clip_width, clip_height;
+    unsigned int anti_aliasing;
+} SurfaceShape;
+
+typedef struct TextureShape {
+    bool cubemap;
+    unsigned int dimensionality;
+    unsigned int color_format;
+    unsigned int levels;
+    unsigned int width, height, depth;
+
+    unsigned int min_mipmap_level, max_mipmap_level;
+    unsigned int pitch;
+} TextureShape;
+
+typedef struct TextureKey {
+    TextureShape state;
+    uint64_t data_hash;
+    uint8_t* texture_data;
+    uint8_t* palette_data;
+} TextureKey;
+
+typedef struct TextureBinding {
+    GLenum gl_target;
+    GLuint gl_texture;
+    unsigned int refcnt;
+} TextureBinding;
+
+typedef struct KelvinState {
+    hwaddr dma_notifies;
+    hwaddr dma_state;
+    hwaddr dma_semaphore;
+    unsigned int semaphore_offset;
+} KelvinState;
+
+typedef struct ContextSurfaces2DState {
+    hwaddr dma_image_source;
+    hwaddr dma_image_dest;
+    unsigned int color_format;
+    unsigned int source_pitch, dest_pitch;
+    hwaddr source_offset, dest_offset;
+
+} ContextSurfaces2DState;
+
+typedef struct ImageBlitState {
+    hwaddr context_surfaces;
+    unsigned int operation;
+    unsigned int in_x, in_y;
+    unsigned int out_x, out_y;
+    unsigned int width, height;
+
+} ImageBlitState;
+
+typedef struct GraphicsObject {
+    uint8_t graphics_class;
+    union {
+        ContextSurfaces2DState context_surfaces_2d;
+
+        ImageBlitState image_blit;
+
+        KelvinState kelvin;
+    } data;
+} GraphicsObject;
+
+typedef struct GraphicsSubchannel {
+    hwaddr object_instance;
+    GraphicsObject object;
+    uint32_t object_cache[5];
+} GraphicsSubchannel;
+
+typedef struct GraphicsContext {
+    bool channel_3d;
+    unsigned int subchannel;
+} GraphicsContext;
+
+
+typedef struct PGRAPHState {
+    QemuMutex lock;
+
+    uint32_t pending_interrupts;
+    uint32_t enabled_interrupts;
+    QemuCond interrupt_cond;
+
+    hwaddr context_table;
+    hwaddr context_address;
+
+
+    unsigned int trapped_method;
+    unsigned int trapped_subchannel;
+    unsigned int trapped_channel_id;
+    uint32_t trapped_data[2];
+    uint32_t notify_source;
+
+    bool fifo_access;
+    QemuCond fifo_access_cond;
+
+    QemuCond flip_3d;
+
+    unsigned int channel_id;
+    bool channel_valid;
+    GraphicsContext context[NV2A_NUM_CHANNELS];
+
+    hwaddr dma_color, dma_zeta;
+    Surface surface_color, surface_zeta;
+    unsigned int surface_type;
+    SurfaceShape surface_shape;
+    SurfaceShape last_surface_shape;
+
+    hwaddr dma_a, dma_b;
+    GLruCache *texture_cache;
+    bool texture_dirty[NV2A_MAX_TEXTURES];
+    TextureBinding *texture_binding[NV2A_MAX_TEXTURES];
+
+    GHashTable *shader_cache;
+    ShaderBinding *shader_binding;
+
+    bool texture_matrix_enable[NV2A_MAX_TEXTURES];
+
+    /* FIXME: Move to NV_PGRAPH_BUMPMAT... */
+    float bump_env_matrix[NV2A_MAX_TEXTURES-1][4]; /* 3 allowed stages with 2x2 matrix each */
+
+    GloContext *gl_context;
+    GLuint gl_framebuffer;
+    GLuint gl_color_buffer, gl_zeta_buffer;
+    GraphicsSubchannel subchannel_data[NV2A_NUM_SUBCHANNELS];
+
+    hwaddr dma_report;
+    hwaddr report_offset;
+    bool zpass_pixel_count_enable;
+    unsigned int zpass_pixel_count_result;
+    unsigned int gl_zpass_pixel_count_query_count;
+    GLuint* gl_zpass_pixel_count_queries;
+
+    hwaddr dma_vertex_a, dma_vertex_b;
+
+    unsigned int primitive_mode;
+
+    bool enable_vertex_program_write;
+
+    uint32_t program_data[NV2A_MAX_TRANSFORM_PROGRAM_LENGTH][VSH_TOKEN_SIZE];
+
+    uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];
+    bool vsh_constants_dirty[NV2A_VERTEXSHADER_CONSTANTS];
+
+    /* lighting constant arrays */
+    uint32_t ltctxa[NV2A_LTCTXA_COUNT][4];
+    bool ltctxa_dirty[NV2A_LTCTXA_COUNT];
+    uint32_t ltctxb[NV2A_LTCTXB_COUNT][4];
+    bool ltctxb_dirty[NV2A_LTCTXB_COUNT];
+    uint32_t ltc1[NV2A_LTC1_COUNT][4];
+    bool ltc1_dirty[NV2A_LTC1_COUNT];
+
+    // should figure out where these are in lighting context
+    float light_infinite_half_vector[NV2A_MAX_LIGHTS][3];
+    float light_infinite_direction[NV2A_MAX_LIGHTS][3];
+    float light_local_position[NV2A_MAX_LIGHTS][3];
+    float light_local_attenuation[NV2A_MAX_LIGHTS][3];
+
+    VertexAttribute vertex_attributes[NV2A_VERTEXSHADER_ATTRIBUTES];
+
+    unsigned int inline_array_length;
+    uint32_t inline_array[NV2A_MAX_BATCH_LENGTH];
+    GLuint gl_inline_array_buffer;
+
+    unsigned int inline_elements_length;
+    uint32_t inline_elements[NV2A_MAX_BATCH_LENGTH];
+
+    unsigned int inline_buffer_length;
+
+    unsigned int draw_arrays_length;
+    unsigned int draw_arrays_max_count;
+    /* FIXME: Unknown size, possibly endless, 1000 will do for now */
+    GLint gl_draw_arrays_start[1000];
+    GLsizei gl_draw_arrays_count[1000];
+
+    GLuint gl_element_buffer;
+    GLuint gl_memory_buffer;
+    GLuint gl_vertex_array;
+
+    uint32_t regs[0x2000];
+} PGRAPHState;
+
+
+typedef struct CacheEntry {
+    QSIMPLEQ_ENTRY(CacheEntry) entry;
+    unsigned int method : 14;
+    unsigned int subchannel : 3;
+    bool nonincreasing;
+    uint32_t parameter;
+} CacheEntry;
+
+typedef struct Cache1State {
+    unsigned int channel_id;
+    enum FifoMode mode;
+
+    /* Pusher state */
+    bool push_enabled;
+    bool dma_push_enabled;
+    bool dma_push_suspended;
+    hwaddr dma_instance;
+
+    bool method_nonincreasing;
+    unsigned int method : 14;
+    unsigned int subchannel : 3;
+    unsigned int method_count : 24;
+    uint32_t dcount;
+    bool subroutine_active;
+    hwaddr subroutine_return;
+    hwaddr get_jmp_shadow;
+    uint32_t rsvd_shadow;
+    uint32_t data_shadow;
+    uint32_t error;
+
+    bool pull_enabled;
+    enum FIFOEngine bound_engines[NV2A_NUM_SUBCHANNELS];
+    enum FIFOEngine last_engine;
+
+    /* The actual command queue */
+    QemuMutex cache_lock;
+    QemuCond cache_cond;
+    QSIMPLEQ_HEAD(, CacheEntry) cache;
+    QSIMPLEQ_HEAD(, CacheEntry) working_cache;
+} Cache1State;
+
+typedef struct ChannelControl {
+    hwaddr dma_put;
+    hwaddr dma_get;
+    uint32_t ref;
+} ChannelControl;
+
+typedef struct NV2AState {
+    PCIDevice dev;
+    qemu_irq irq;
+    bool exiting;
+
+    VGACommonState vga;
+    GraphicHwOps hw_ops;
+    QEMUTimer *vblank_timer;
+
+    MemoryRegion *vram;
+    MemoryRegion vram_pci;
+    uint8_t *vram_ptr;
+    MemoryRegion ramin;
+    uint8_t *ramin_ptr;
+
+    MemoryRegion mmio;
+    MemoryRegion block_mmio[NV_NUM_BLOCKS];
+
+    struct {
+        uint32_t pending_interrupts;
+        uint32_t enabled_interrupts;
+    } pmc;
+
+    struct {
+        QemuThread puller_thread;
+        uint32_t pending_interrupts;
+        uint32_t enabled_interrupts;
+        Cache1State cache1;
+        uint32_t regs[0x2000];
+    } pfifo;
+
+    struct {
+        uint32_t regs[0x1000];
+    } pvideo;
+
+    struct {
+        uint32_t pending_interrupts;
+        uint32_t enabled_interrupts;
+        uint32_t numerator;
+        uint32_t denominator;
+        uint32_t alarm_time;
+    } ptimer;
+
+    struct {
+        uint32_t regs[0x1000];
+    } pfb;
+
+    struct PGRAPHState pgraph;
+
+    struct {
+        uint32_t pending_interrupts;
+        uint32_t enabled_interrupts;
+        hwaddr start;
+    } pcrtc;
+
+    struct {
+        uint32_t core_clock_coeff;
+        uint64_t core_clock_freq;
+        uint32_t memory_clock_coeff;
+        uint32_t video_clock_coeff;
+    } pramdac;
+
+    struct {
+        ChannelControl channel_control[NV2A_NUM_CHANNELS];
+    } user;
+
+} NV2AState;
+
+typedef struct NV2ABlockInfo {
+    const char* name;
+    hwaddr offset;
+    uint64_t size;
+    MemoryRegionOps ops;
+} NV2ABlockInfo;
+
+extern const struct NV2ABlockInfo blocktable[];
+extern const int blocktable_len;
+
+void pgraph_init(NV2AState *d);
+void *pfifo_puller_thread(void *opaque);
+void pgraph_destroy(PGRAPHState *pg);
+void update_irq(NV2AState *d);
+
+#endif
--- a/hw/xbox/nv2a/nv2a_debug.c
+++ b/hw/xbox/nv2a/nv2a_debug.c
@ -18,14 +18,15 @@
 * along with this program; if not, see <http://www.gnu.org/licenses/>.
 */

-#include "hw/xbox/nv2a_debug.h"
-
 #ifdef DEBUG_NV2A_GL

+#include "qemu/osdep.h"
+
 #include <stdio.h>
 #include <stdarg.h>
 #include <assert.h>

+#include "nv2a_debug.h"
 #include "gl/glextensions.h"

 void gl_debug_message(bool cc, const char *fmt, ...)
--- a/hw/xbox/nv2a/nv2a_debug.h
+++ b/hw/xbox/nv2a/nv2a_debug.h
--- a/hw/xbox/nv2a/nv2a_int.h
+++ b/hw/xbox/nv2a/nv2a_int.h
--- a/hw/xbox/nv2a/nv2a_pbus.c
+++ b/hw/xbox/nv2a/nv2a_pbus.c
@ -0,0 +1,59 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018 Matt Borgerson
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* PBUS - bus control */
+uint64_t pbus_read(void *opaque, hwaddr addr, unsigned int size)
+{
+    NV2AState *d = opaque;
+
+    uint64_t r = 0;
+    switch (addr) {
+    case NV_PBUS_PCI_NV_0:
+        r = pci_get_long(d->dev.config + PCI_VENDOR_ID);
+        break;
+    case NV_PBUS_PCI_NV_1:
+        r = pci_get_long(d->dev.config + PCI_COMMAND);
+        break;
+    case NV_PBUS_PCI_NV_2:
+        r = pci_get_long(d->dev.config + PCI_CLASS_REVISION);
+        break;
+    default:
+        break;
+    }
+
+    reg_log_read(NV_PBUS, addr, r);
+    return r;
+}
+
+void pbus_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size)
+{
+    NV2AState *d = opaque;
+
+    reg_log_write(NV_PBUS, addr, val);
+
+    switch (addr) {
+    case NV_PBUS_PCI_NV_1:
+        pci_set_long(d->dev.config + PCI_COMMAND, val);
+        break;
+    default:
+        break;
+    }
+}
--- a/hw/xbox/nv2a/nv2a_pcrtc.c
+++ b/hw/xbox/nv2a/nv2a_pcrtc.c
@ -0,0 +1,72 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018 Matt Borgerson
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+uint64_t pcrtc_read(void *opaque, hwaddr addr, unsigned int size)
+{
+    NV2AState *d = (NV2AState *)opaque;
+
+    uint64_t r = 0;
+    switch (addr) {
+        case NV_PCRTC_INTR_0:
+            r = d->pcrtc.pending_interrupts;
+            break;
+        case NV_PCRTC_INTR_EN_0:
+            r = d->pcrtc.enabled_interrupts;
+            break;
+        case NV_PCRTC_START:
+            r = d->pcrtc.start;
+            break;
+        default:
+            break;
+    }
+
+    reg_log_read(NV_PCRTC, addr, r);
+    return r;
+}
+
+void pcrtc_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size)
+{
+    NV2AState *d = (NV2AState *)opaque;
+
+    reg_log_write(NV_PCRTC, addr, val);
+
+    switch (addr) {
+    case NV_PCRTC_INTR_0:
+        d->pcrtc.pending_interrupts &= ~val;
+        update_irq(d);
+        break;
+    case NV_PCRTC_INTR_EN_0:
+        d->pcrtc.enabled_interrupts = val;
+        update_irq(d);
+        break;
+    case NV_PCRTC_START:
+        val &= 0x07FFFFFF;
+        // assert(val < memory_region_size(d->vram));
+        d->pcrtc.start = val;
+
+        NV2A_DPRINTF("PCRTC_START - %x %x %x %x\n",
+                d->vram_ptr[val+64], d->vram_ptr[val+64+1],
+                d->vram_ptr[val+64+2], d->vram_ptr[val+64+3]);
+        break;
+    default:
+        break;
+    }
+}
--- a/hw/xbox/nv2a/nv2a_pfb.c
+++ b/hw/xbox/nv2a/nv2a_pfb.c
@ -0,0 +1,58 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018 Matt Borgerson
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+uint64_t pfb_read(void *opaque, hwaddr addr, unsigned int size)
+{
+    NV2AState *d = (NV2AState *)opaque;
+
+    uint64_t r = 0;
+    switch (addr) {
+    case NV_PFB_CFG0:
+        /* 3-4 memory partitions. The debug bios checks this. */
+        r = 3;
+        break;
+    case NV_PFB_CSTATUS:
+        r = memory_region_size(d->vram);
+        break;
+    case NV_PFB_WBC:
+        r = 0; /* Flush not pending. */
+        break;
+    default:
+        r = d->pfb.regs[addr];
+        break;
+    }
+
+    reg_log_read(NV_PFB, addr, r);
+    return r;
+}
+
+void pfb_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size)
+{
+    NV2AState *d = (NV2AState *)opaque;
+
+    reg_log_write(NV_PFB, addr, val);
+
+    switch (addr) {
+    default:
+        d->pfb.regs[addr] = val;
+        break;
+    }
+}
--- a/hw/xbox/nv2a/nv2a_pfifo.c
+++ b/hw/xbox/nv2a/nv2a_pfifo.c
@ -0,0 +1,513 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018 Matt Borgerson
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+typedef struct RAMHTEntry {
+    uint32_t handle;
+    hwaddr instance;
+    enum FIFOEngine engine;
+    unsigned int channel_id : 5;
+    bool valid;
+} RAMHTEntry;
+
+static void pfifo_run_pusher(NV2AState *d);
+void *pfifo_puller_thread(void *opaque);
+static uint32_t ramht_hash(NV2AState *d, uint32_t handle);
+static RAMHTEntry ramht_lookup(NV2AState *d, uint32_t handle);
+
+/* PFIFO - MMIO and DMA FIFO submission to PGRAPH and VPE */
+uint64_t pfifo_read(void *opaque, hwaddr addr, unsigned int size)
+{
+    int i;
+    NV2AState *d = (NV2AState *)opaque;
+
+    uint64_t r = 0;
+    switch (addr) {
+    case NV_PFIFO_INTR_0:
+        r = d->pfifo.pending_interrupts;
+        break;
+    case NV_PFIFO_INTR_EN_0:
+        r = d->pfifo.enabled_interrupts;
+        break;
+    case NV_PFIFO_RUNOUT_STATUS:
+        r = NV_PFIFO_RUNOUT_STATUS_LOW_MARK; /* low mark empty */
+        break;
+    case NV_PFIFO_CACHE1_PUSH0:
+        r = d->pfifo.cache1.push_enabled;
+        break;
+    case NV_PFIFO_CACHE1_PUSH1:
+        SET_MASK(r, NV_PFIFO_CACHE1_PUSH1_CHID, d->pfifo.cache1.channel_id);
+        SET_MASK(r, NV_PFIFO_CACHE1_PUSH1_MODE, d->pfifo.cache1.mode);
+        break;
+    case NV_PFIFO_CACHE1_STATUS:
+        qemu_mutex_lock(&d->pfifo.cache1.cache_lock);
+        if (QSIMPLEQ_EMPTY(&d->pfifo.cache1.cache)) {
+            r |= NV_PFIFO_CACHE1_STATUS_LOW_MARK; /* low mark empty */
+        }
+        qemu_mutex_unlock(&d->pfifo.cache1.cache_lock);
+        break;
+    case NV_PFIFO_CACHE1_DMA_PUSH:
+        SET_MASK(r, NV_PFIFO_CACHE1_DMA_PUSH_ACCESS,
+                 d->pfifo.cache1.dma_push_enabled);
+        SET_MASK(r, NV_PFIFO_CACHE1_DMA_PUSH_STATUS,
+                 d->pfifo.cache1.dma_push_suspended);
+        SET_MASK(r, NV_PFIFO_CACHE1_DMA_PUSH_BUFFER, 1); /* buffer emoty */
+        break;
+    case NV_PFIFO_CACHE1_DMA_STATE:
+        SET_MASK(r, NV_PFIFO_CACHE1_DMA_STATE_METHOD_TYPE,
+                 d->pfifo.cache1.method_nonincreasing);
+        SET_MASK(r, NV_PFIFO_CACHE1_DMA_STATE_METHOD,
+                 d->pfifo.cache1.method >> 2);
+        SET_MASK(r, NV_PFIFO_CACHE1_DMA_STATE_SUBCHANNEL,
+                 d->pfifo.cache1.subchannel);
+        SET_MASK(r, NV_PFIFO_CACHE1_DMA_STATE_METHOD_COUNT,
+                 d->pfifo.cache1.method_count);
+        SET_MASK(r, NV_PFIFO_CACHE1_DMA_STATE_ERROR,
+                 d->pfifo.cache1.error);
+        break;
+    case NV_PFIFO_CACHE1_DMA_INSTANCE:
+        SET_MASK(r, NV_PFIFO_CACHE1_DMA_INSTANCE_ADDRESS,
+                 d->pfifo.cache1.dma_instance >> 4);
+        break;
+    case NV_PFIFO_CACHE1_DMA_PUT:
+        r = d->user.channel_control[d->pfifo.cache1.channel_id].dma_put;
+        break;
+    case NV_PFIFO_CACHE1_DMA_GET:
+        r = d->user.channel_control[d->pfifo.cache1.channel_id].dma_get;
+        break;
+    case NV_PFIFO_CACHE1_DMA_SUBROUTINE:
+        r = d->pfifo.cache1.subroutine_return
+            | d->pfifo.cache1.subroutine_active;
+        break;
+    case NV_PFIFO_CACHE1_PULL0:
+        qemu_mutex_lock(&d->pfifo.cache1.cache_lock);
+        r = d->pfifo.cache1.pull_enabled;
+        qemu_mutex_unlock(&d->pfifo.cache1.cache_lock);
+        break;
+    case NV_PFIFO_CACHE1_ENGINE:
+        qemu_mutex_lock(&d->pfifo.cache1.cache_lock);
+        for (i=0; i<NV2A_NUM_SUBCHANNELS; i++) {
+            r |= d->pfifo.cache1.bound_engines[i] << (i*2);
+        }
+        qemu_mutex_unlock(&d->pfifo.cache1.cache_lock);
+        break;
+    case NV_PFIFO_CACHE1_DMA_DCOUNT:
+        r = d->pfifo.cache1.dcount;
+        break;
+    case NV_PFIFO_CACHE1_DMA_GET_JMP_SHADOW:
+        r = d->pfifo.cache1.get_jmp_shadow;
+        break;
+    case NV_PFIFO_CACHE1_DMA_RSVD_SHADOW:
+        r = d->pfifo.cache1.rsvd_shadow;
+        break;
+    case NV_PFIFO_CACHE1_DMA_DATA_SHADOW:
+        r = d->pfifo.cache1.data_shadow;
+        break;
+    default:
+        r = d->pfifo.regs[addr];
+        break;
+    }
+
+    reg_log_read(NV_PFIFO, addr, r);
+    return r;
+}
+
+void pfifo_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size)
+{
+    int i;
+    NV2AState *d = (NV2AState *)opaque;
+
+    reg_log_write(NV_PFIFO, addr, val);
+
+    switch (addr) {
+    case NV_PFIFO_INTR_0:
+        d->pfifo.pending_interrupts &= ~val;
+        update_irq(d);
+        break;
+    case NV_PFIFO_INTR_EN_0:
+        d->pfifo.enabled_interrupts = val;
+        update_irq(d);
+        break;
+
+    case NV_PFIFO_CACHE1_PUSH0:
+        d->pfifo.cache1.push_enabled = val & NV_PFIFO_CACHE1_PUSH0_ACCESS;
+        break;
+    case NV_PFIFO_CACHE1_PUSH1:
+        d->pfifo.cache1.channel_id = GET_MASK(val, NV_PFIFO_CACHE1_PUSH1_CHID);
+        d->pfifo.cache1.mode = (enum FifoMode)GET_MASK(val, NV_PFIFO_CACHE1_PUSH1_MODE);
+        assert(d->pfifo.cache1.channel_id < NV2A_NUM_CHANNELS);
+        break;
+    case NV_PFIFO_CACHE1_DMA_PUSH:
+        d->pfifo.cache1.dma_push_enabled =
+            GET_MASK(val, NV_PFIFO_CACHE1_DMA_PUSH_ACCESS);
+        if (d->pfifo.cache1.dma_push_suspended
+             && !GET_MASK(val, NV_PFIFO_CACHE1_DMA_PUSH_STATUS)) {
+            d->pfifo.cache1.dma_push_suspended = false;
+            pfifo_run_pusher(d);
+        }
+        d->pfifo.cache1.dma_push_suspended =
+            GET_MASK(val, NV_PFIFO_CACHE1_DMA_PUSH_STATUS);
+        break;
+    case NV_PFIFO_CACHE1_DMA_STATE:
+        d->pfifo.cache1.method_nonincreasing =
+            GET_MASK(val, NV_PFIFO_CACHE1_DMA_STATE_METHOD_TYPE);
+        d->pfifo.cache1.method =
+            GET_MASK(val, NV_PFIFO_CACHE1_DMA_STATE_METHOD) << 2;
+        d->pfifo.cache1.subchannel =
+            GET_MASK(val, NV_PFIFO_CACHE1_DMA_STATE_SUBCHANNEL);
+        d->pfifo.cache1.method_count =
+            GET_MASK(val, NV_PFIFO_CACHE1_DMA_STATE_METHOD_COUNT);
+        d->pfifo.cache1.error =
+            GET_MASK(val, NV_PFIFO_CACHE1_DMA_STATE_ERROR);
+        break;
+    case NV_PFIFO_CACHE1_DMA_INSTANCE:
+        d->pfifo.cache1.dma_instance =
+            GET_MASK(val, NV_PFIFO_CACHE1_DMA_INSTANCE_ADDRESS) << 4;
+        break;
+    case NV_PFIFO_CACHE1_DMA_PUT:
+        d->user.channel_control[d->pfifo.cache1.channel_id].dma_put = val;
+        break;
+    case NV_PFIFO_CACHE1_DMA_GET:
+        d->user.channel_control[d->pfifo.cache1.channel_id].dma_get = val;
+        break;
+    case NV_PFIFO_CACHE1_DMA_SUBROUTINE:
+        d->pfifo.cache1.subroutine_return =
+            (val & NV_PFIFO_CACHE1_DMA_SUBROUTINE_RETURN_OFFSET);
+        d->pfifo.cache1.subroutine_active =
+            (val & NV_PFIFO_CACHE1_DMA_SUBROUTINE_STATE);
+        break;
+    case NV_PFIFO_CACHE1_PULL0:
+        qemu_mutex_lock(&d->pfifo.cache1.cache_lock);
+        if ((val & NV_PFIFO_CACHE1_PULL0_ACCESS)
+             && !d->pfifo.cache1.pull_enabled) {
+            d->pfifo.cache1.pull_enabled = true;
+
+            /* the puller thread should wake up */
+            qemu_cond_signal(&d->pfifo.cache1.cache_cond);
+        } else if (!(val & NV_PFIFO_CACHE1_PULL0_ACCESS)
+                     && d->pfifo.cache1.pull_enabled) {
+            d->pfifo.cache1.pull_enabled = false;
+        }
+        qemu_mutex_unlock(&d->pfifo.cache1.cache_lock);
+        break;
+    case NV_PFIFO_CACHE1_ENGINE:
+        qemu_mutex_lock(&d->pfifo.cache1.cache_lock);
+        for (i=0; i<NV2A_NUM_SUBCHANNELS; i++) {
+            d->pfifo.cache1.bound_engines[i] = (enum FIFOEngine)((val >> (i*2)) & 3);
+        }
+        qemu_mutex_unlock(&d->pfifo.cache1.cache_lock);
+        break;
+    case NV_PFIFO_CACHE1_DMA_DCOUNT:
+        d->pfifo.cache1.dcount =
+            (val & NV_PFIFO_CACHE1_DMA_DCOUNT_VALUE);
+        break;
+    case NV_PFIFO_CACHE1_DMA_GET_JMP_SHADOW:
+        d->pfifo.cache1.get_jmp_shadow =
+            (val & NV_PFIFO_CACHE1_DMA_GET_JMP_SHADOW_OFFSET);
+        break;
+    case NV_PFIFO_CACHE1_DMA_RSVD_SHADOW:
+        d->pfifo.cache1.rsvd_shadow = val;
+        break;
+    case NV_PFIFO_CACHE1_DMA_DATA_SHADOW:
+        d->pfifo.cache1.data_shadow = val;
+        break;
+    default:
+        d->pfifo.regs[addr] = val;
+        break;
+    }
+}
+
+
+/* pusher should be fine to run from a mimo handler
+ * whenever's it's convenient */
+static void pfifo_run_pusher(NV2AState *d) {
+    uint8_t channel_id;
+    ChannelControl *control;
+    Cache1State *state;
+    CacheEntry *command;
+    uint8_t *dma;
+    hwaddr dma_len;
+    uint32_t word;
+
+    /* TODO: How is cache1 selected? */
+    state = &d->pfifo.cache1;
+    channel_id = state->channel_id;
+    control = &d->user.channel_control[channel_id];
+
+    if (!state->push_enabled) return;
+
+
+    /* only handling DMA for now... */
+
+    /* Channel running DMA */
+    uint32_t channel_modes = d->pfifo.regs[NV_PFIFO_MODE];
+    assert(channel_modes & (1 << channel_id));
+    assert(state->mode == FIFO_DMA);
+
+    if (!state->dma_push_enabled) return;
+    if (state->dma_push_suspended) return;
+
+    /* We're running so there should be no pending errors... */
+    assert(state->error == NV_PFIFO_CACHE1_DMA_STATE_ERROR_NONE);
+
+    dma = (uint8_t*)nv_dma_map(d, state->dma_instance, &dma_len);
+
+    NV2A_DPRINTF("DMA pusher: max 0x%" HWADDR_PRIx ", 0x%" HWADDR_PRIx " - 0x%" HWADDR_PRIx "\n",
+                 dma_len, control->dma_get, control->dma_put);
+
+    /* based on the convenient pseudocode in envytools */
+    while (control->dma_get != control->dma_put) {
+        if (control->dma_get >= dma_len) {
+
+            state->error = NV_PFIFO_CACHE1_DMA_STATE_ERROR_PROTECTION;
+            break;
+        }
+
+        word = ldl_le_p((uint32_t*)(dma + control->dma_get));
+        control->dma_get += 4;
+
+        if (state->method_count) {
+            /* data word of methods command */
+            state->data_shadow = word;
+
+            command = (CacheEntry*)g_malloc0(sizeof(CacheEntry));
+            command->method = state->method;
+            command->subchannel = state->subchannel;
+            command->nonincreasing = state->method_nonincreasing;
+            command->parameter = word;
+            qemu_mutex_lock(&state->cache_lock);
+            QSIMPLEQ_INSERT_TAIL(&state->cache, command, entry);
+            qemu_cond_signal(&state->cache_cond);
+            qemu_mutex_unlock(&state->cache_lock);
+
+            if (!state->method_nonincreasing) {
+                state->method += 4;
+            }
+            state->method_count--;
+            state->dcount++;
+        } else {
+            /* no command active - this is the first word of a new one */
+            state->rsvd_shadow = word;
+            /* match all forms */
+            if ((word & 0xe0000003) == 0x20000000) {
+                /* old jump */
+                state->get_jmp_shadow = control->dma_get;
+                control->dma_get = word & 0x1fffffff;
+                NV2A_DPRINTF("pb OLD_JMP 0x%" HWADDR_PRIx "\n", control->dma_get);
+            } else if ((word & 3) == 1) {
+                /* jump */
+                state->get_jmp_shadow = control->dma_get;
+                control->dma_get = word & 0xfffffffc;
+                NV2A_DPRINTF("pb JMP 0x%" HWADDR_PRIx "\n", control->dma_get);
+            } else if ((word & 3) == 2) {
+                /* call */
+                if (state->subroutine_active) {
+                    state->error = NV_PFIFO_CACHE1_DMA_STATE_ERROR_CALL;
+                    break;
+                }
+                state->subroutine_return = control->dma_get;
+                state->subroutine_active = true;
+                control->dma_get = word & 0xfffffffc;
+                NV2A_DPRINTF("pb CALL 0x%" HWADDR_PRIx "\n", control->dma_get);
+            } else if (word == 0x00020000) {
+                /* return */
+                if (!state->subroutine_active) {
+                    state->error = NV_PFIFO_CACHE1_DMA_STATE_ERROR_RETURN;
+                    break;
+                }
+                control->dma_get = state->subroutine_return;
+                state->subroutine_active = false;
+                NV2A_DPRINTF("pb RET 0x%" HWADDR_PRIx "\n", control->dma_get);
+            } else if ((word & 0xe0030003) == 0) {
+                /* increasing methods */
+                state->method = word & 0x1fff;
+                state->subchannel = (word >> 13) & 7;
+                state->method_count = (word >> 18) & 0x7ff;
+                state->method_nonincreasing = false;
+                state->dcount = 0;
+            } else if ((word & 0xe0030003) == 0x40000000) {
+                /* non-increasing methods */
+                state->method = word & 0x1fff;
+                state->subchannel = (word >> 13) & 7;
+                state->method_count = (word >> 18) & 0x7ff;
+                state->method_nonincreasing = true;
+                state->dcount = 0;
+            } else {
+                NV2A_DPRINTF("pb reserved cmd 0x%" HWADDR_PRIx " - 0x%x\n",
+                             control->dma_get, word);
+                state->error = NV_PFIFO_CACHE1_DMA_STATE_ERROR_RESERVED_CMD;
+                break;
+            }
+        }
+    }
+
+    NV2A_DPRINTF("DMA pusher done: max 0x%" HWADDR_PRIx ", 0x%" HWADDR_PRIx " - 0x%" HWADDR_PRIx "\n",
+                 dma_len, control->dma_get, control->dma_put);
+
+    if (state->error) {
+        NV2A_DPRINTF("pb error: %d\n", state->error);
+        assert(false);
+
+        state->dma_push_suspended = true;
+
+        d->pfifo.pending_interrupts |= NV_PFIFO_INTR_0_DMA_PUSHER;
+        update_irq(d);
+    }
+}
+
+void *pfifo_puller_thread(void *opaque)
+{
+    NV2AState *d = (NV2AState*)opaque;
+    Cache1State *state = &d->pfifo.cache1;
+
+    glo_set_current(d->pgraph.gl_context);
+
+    while (true) {
+        qemu_mutex_lock(&state->cache_lock);
+        while (QSIMPLEQ_EMPTY(&state->cache) || !state->pull_enabled) {
+            qemu_cond_wait(&state->cache_cond, &state->cache_lock);
+
+            if (d->exiting) {
+                qemu_mutex_unlock(&state->cache_lock);
+                glo_set_current(NULL);
+                return 0;
+            }
+        }
+        QSIMPLEQ_CONCAT(&state->working_cache, &state->cache);
+        qemu_mutex_unlock(&state->cache_lock);
+
+        qemu_mutex_lock(&d->pgraph.lock);
+
+        while (!QSIMPLEQ_EMPTY(&state->working_cache)) {
+            CacheEntry * command = QSIMPLEQ_FIRST(&state->working_cache);
+            QSIMPLEQ_REMOVE_HEAD(&state->working_cache, entry);
+
+            if (command->method == 0) {
+                // qemu_mutex_lock_iothread();
+                RAMHTEntry entry = ramht_lookup(d, command->parameter);
+                assert(entry.valid);
+
+                assert(entry.channel_id == state->channel_id);
+                // qemu_mutex_unlock_iothread();
+
+                switch (entry.engine) {
+                case ENGINE_GRAPHICS:
+                    pgraph_context_switch(d, entry.channel_id);
+                    pgraph_wait_fifo_access(d);
+                    pgraph_method(d, command->subchannel, 0, entry.instance);
+                    break;
+                default:
+                    assert(false);
+                    break;
+                }
+
+                /* the engine is bound to the subchannel */
+                qemu_mutex_lock(&state->cache_lock);
+                state->bound_engines[command->subchannel] = entry.engine;
+                state->last_engine = entry.engine;
+                qemu_mutex_unlock(&state->cache_lock);
+            } else if (command->method >= 0x100) {
+                /* method passed to engine */
+
+                uint32_t parameter = command->parameter;
+
+                /* methods that take objects.
+                 * TODO: Check this range is correct for the nv2a */
+                if (command->method >= 0x180 && command->method < 0x200) {
+                    //qemu_mutex_lock_iothread();
+                    RAMHTEntry entry = ramht_lookup(d, parameter);
+                    assert(entry.valid);
+                    assert(entry.channel_id == state->channel_id);
+                    parameter = entry.instance;
+                    //qemu_mutex_unlock_iothread();
+                }
+
+                // qemu_mutex_lock(&state->cache_lock);
+                enum FIFOEngine engine = state->bound_engines[command->subchannel];
+                // qemu_mutex_unlock(&state->cache_lock);
+
+                switch (engine) {
+                case ENGINE_GRAPHICS:
+                    pgraph_wait_fifo_access(d);
+                    pgraph_method(d, command->subchannel,
+                                  command->method, parameter);
+                    break;
+                default:
+                    assert(false);
+                    break;
+                }
+
+                // qemu_mutex_lock(&state->cache_lock);
+                state->last_engine = state->bound_engines[command->subchannel];
+                // qemu_mutex_unlock(&state->cache_lock);
+            }
+
+            g_free(command);
+        }
+
+        qemu_mutex_unlock(&d->pgraph.lock);
+    }
+
+    return 0;
+}
+
+static uint32_t ramht_hash(NV2AState *d, uint32_t handle)
+{
+    unsigned int ramht_size =
+        1 << (GET_MASK(d->pfifo.regs[NV_PFIFO_RAMHT], NV_PFIFO_RAMHT_SIZE)+12);
+
+    /* XXX: Think this is different to what nouveau calculates... */
+    unsigned int bits = ffs(ramht_size)-2;
+
+    uint32_t hash = 0;
+    while (handle) {
+        hash ^= (handle & ((1 << bits) - 1));
+        handle >>= bits;
+    }
+    hash ^= d->pfifo.cache1.channel_id << (bits - 4);
+
+    return hash;
+}
+
+static RAMHTEntry ramht_lookup(NV2AState *d, uint32_t handle)
+{
+    unsigned int ramht_size =
+        1 << (GET_MASK(d->pfifo.regs[NV_PFIFO_RAMHT], NV_PFIFO_RAMHT_SIZE)+12);
+
+    uint32_t hash = ramht_hash(d, handle);
+    assert(hash * 8 < ramht_size);
+
+    uint32_t ramht_address =
+        GET_MASK(d->pfifo.regs[NV_PFIFO_RAMHT],
+                 NV_PFIFO_RAMHT_BASE_ADDRESS) << 12;
+
+    uint8_t *entry_ptr = d->ramin_ptr + ramht_address + hash * 8;
+
+    uint32_t entry_handle = ldl_le_p((uint32_t*)entry_ptr);
+    uint32_t entry_context = ldl_le_p((uint32_t*)(entry_ptr + 4));
+
+    return (RAMHTEntry){
+        .handle = entry_handle,
+        .instance = (entry_context & NV_RAMHT_INSTANCE) << 4,
+        .engine = (enum FIFOEngine)((entry_context & NV_RAMHT_ENGINE) >> 16),
+        .channel_id = (entry_context & NV_RAMHT_CHID) >> 24,
+        .valid = entry_context & NV_RAMHT_STATUS,
+    };
+}
--- a/hw/xbox/nv2a/nv2a_pgraph.c
+++ b/hw/xbox/nv2a/nv2a_pgraph.c
--- a/hw/xbox/nv2a/nv2a_pmc.c
+++ b/hw/xbox/nv2a/nv2a_pmc.c
@ -0,0 +1,71 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018 Matt Borgerson
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* PMC - card master control */
+uint64_t pmc_read(void *opaque, hwaddr addr, unsigned int size)
+{
+    NV2AState *d = (NV2AState *)opaque;
+
+    uint64_t r = 0;
+    switch (addr) {
+    case NV_PMC_BOOT_0:
+        /* chipset and stepping:
+         * NV2A, A02, Rev 0 */
+
+        r = 0x02A000A2;
+        break;
+    case NV_PMC_INTR_0:
+        /* Shows which functional units have pending IRQ */
+        r = d->pmc.pending_interrupts;
+        break;
+    case NV_PMC_INTR_EN_0:
+        /* Selects which functional units can cause IRQs */
+        r = d->pmc.enabled_interrupts;
+        break;
+    default:
+        break;
+    }
+
+    reg_log_read(NV_PMC, addr, r);
+    return r;
+}
+
+void pmc_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size)
+{
+    NV2AState *d = (NV2AState *)opaque;
+
+    reg_log_write(NV_PMC, addr, val);
+
+    switch (addr) {
+    case NV_PMC_INTR_0:
+        /* the bits of the interrupts to clear are wrtten */
+        d->pmc.pending_interrupts &= ~val;
+        update_irq(d);
+        break;
+    case NV_PMC_INTR_EN_0:
+        d->pmc.enabled_interrupts = val;
+        update_irq(d);
+        break;
+    default:
+        break;
+    }
+}
+
--- a/hw/xbox/nv2a/nv2a_pramdac.c
+++ b/hw/xbox/nv2a/nv2a_pramdac.c
@ -0,0 +1,87 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018 Matt Borgerson
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+uint64_t pramdac_read(void *opaque, hwaddr addr, unsigned int size)
+{
+    NV2AState *d = (NV2AState *)opaque;
+
+    uint64_t r = 0;
+    switch (addr & ~3) {
+    case NV_PRAMDAC_NVPLL_COEFF:
+        r = d->pramdac.core_clock_coeff;
+        break;
+    case NV_PRAMDAC_MPLL_COEFF:
+        r = d->pramdac.memory_clock_coeff;
+        break;
+    case NV_PRAMDAC_VPLL_COEFF:
+        r = d->pramdac.video_clock_coeff;
+        break;
+    case NV_PRAMDAC_PLL_TEST_COUNTER:
+        /* emulated PLLs locked instantly? */
+        r = NV_PRAMDAC_PLL_TEST_COUNTER_VPLL2_LOCK
+             | NV_PRAMDAC_PLL_TEST_COUNTER_NVPLL_LOCK
+             | NV_PRAMDAC_PLL_TEST_COUNTER_MPLL_LOCK
+             | NV_PRAMDAC_PLL_TEST_COUNTER_VPLL_LOCK;
+        break;
+    default:
+        break;
+    }
+
+    /* Surprisingly, QEMU doesn't handle unaligned access for you properly */
+    r >>= 32 - 8 * size - 8 * (addr & 3);
+
+    NV2A_DPRINTF("PRAMDAC: read %d [0x%" HWADDR_PRIx "] -> %llx\n", size, addr, r);
+    return r;
+}
+
+void pramdac_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size)
+{
+    NV2AState *d = (NV2AState *)opaque;
+    uint32_t m, n, p;
+
+    reg_log_write(NV_PRAMDAC, addr, val);
+
+    switch (addr) {
+    case NV_PRAMDAC_NVPLL_COEFF:
+        d->pramdac.core_clock_coeff = val;
+
+        m = val & NV_PRAMDAC_NVPLL_COEFF_MDIV;
+        n = (val & NV_PRAMDAC_NVPLL_COEFF_NDIV) >> 8;
+        p = (val & NV_PRAMDAC_NVPLL_COEFF_PDIV) >> 16;
+
+        if (m == 0) {
+            d->pramdac.core_clock_freq = 0;
+        } else {
+            d->pramdac.core_clock_freq = (NV2A_CRYSTAL_FREQ * n)
+                                          / (1 << p) / m;
+        }
+
+        break;
+    case NV_PRAMDAC_MPLL_COEFF:
+        d->pramdac.memory_clock_coeff = val;
+        break;
+    case NV_PRAMDAC_VPLL_COEFF:
+        d->pramdac.video_clock_coeff = val;
+        break;
+    default:
+        break;
+    }
+}
--- a/hw/xbox/nv2a/nv2a_prmcio.c
+++ b/hw/xbox/nv2a/nv2a_prmcio.c
@ -0,0 +1,55 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018 Matt Borgerson
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* PRMCIO - aliases VGA CRTC and attribute controller registers */
+uint64_t prmcio_read(void *opaque,
+                                  hwaddr addr, unsigned int size)
+{
+    NV2AState *d = opaque;
+    uint64_t r = vga_ioport_read(&d->vga, addr);
+
+    reg_log_read(NV_PRMCIO, addr, r);
+    return r;
+}
+void prmcio_write(void *opaque, hwaddr addr,
+                               uint64_t val, unsigned int size)
+{
+    NV2AState *d = opaque;
+
+    reg_log_write(NV_PRMCIO, addr, val);
+
+    switch (addr) {
+    case VGA_ATT_W:
+        /* Cromwell sets attrs without enabling VGA_AR_ENABLE_DISPLAY
+         * (which should result in a blank screen).
+         * Either nvidia's hardware is lenient or it is set through
+         * something else. The former seems more likely.
+         */
+        if (d->vga.ar_flip_flop == 0) {
+            val |= VGA_AR_ENABLE_DISPLAY;
+        }
+        break;
+    default:
+        break;
+    }
+
+    vga_ioport_write(&d->vga, addr, val);
+}
--- a/hw/xbox/nv2a/nv2a_prmvio.c
+++ b/hw/xbox/nv2a/nv2a_prmvio.c
@ -2,6 +2,8 @@
 * QEMU Geforce NV2A implementation
 *
 * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018 Matt Borgerson
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
@ -17,9 +19,22 @@
 * along with this program; if not, see <http://www.gnu.org/licenses/>.
 */

-#ifndef HW_NV2A_H
-#define HW_NV2A_H
+/* PRMVIO - aliases VGA sequencer and graphics controller registers */
+uint64_t prmvio_read(void *opaque,
+                                  hwaddr addr, unsigned int size)
+{
+    NV2AState *d = opaque;
+    uint64_t r = vga_ioport_read(&d->vga, addr);

-void nv2a_init(PCIBus *bus, int devfn, MemoryRegion *ram);
+    reg_log_read(NV_PRMVIO, addr, r);
+    return r;
+}
+void prmvio_write(void *opaque, hwaddr addr,
+                               uint64_t val, unsigned int size)
+{
+    NV2AState *d = opaque;

-#endif
+    reg_log_write(NV_PRMVIO, addr, val);
+
+    vga_ioport_write(&d->vga, addr, val);
+}
--- a/hw/xbox/nv2a/nv2a_psh.c
+++ b/hw/xbox/nv2a/nv2a_psh.c
@ -25,6 +25,8 @@
 * along with this program; if not, see <http://www.gnu.org/licenses/>.
 */

+#include "qemu/osdep.h"
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@ -33,8 +35,8 @@

 #include "qapi/qmp/qstring.h"

-#include "hw/xbox/nv2a_shaders_common.h"
-#include "hw/xbox/nv2a_psh.h"
+#include "nv2a_shaders_common.h"
+#include "nv2a_psh.h"

 /*
 * This implements translation of register combiners into glsl
--- a/hw/xbox/nv2a/nv2a_psh.h
+++ b/hw/xbox/nv2a/nv2a_psh.h
--- a/hw/xbox/nv2a/nv2a_ptimer.c
+++ b/hw/xbox/nv2a/nv2a_ptimer.c
@ -0,0 +1,89 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018 Matt Borgerson
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* PIMTER - time measurement and time-based alarms */
+static uint64_t ptimer_get_clock(NV2AState *d)
+{
+    return muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+                    d->pramdac.core_clock_freq * d->ptimer.numerator,
+                    NANOSECONDS_PER_SECOND * d->ptimer.denominator);
+}
+
+uint64_t ptimer_read(void *opaque, hwaddr addr, unsigned int size)
+{
+    NV2AState *d = opaque;
+
+    uint64_t r = 0;
+    switch (addr) {
+    case NV_PTIMER_INTR_0:
+        r = d->ptimer.pending_interrupts;
+        break;
+    case NV_PTIMER_INTR_EN_0:
+        r = d->ptimer.enabled_interrupts;
+        break;
+    case NV_PTIMER_NUMERATOR:
+        r = d->ptimer.numerator;
+        break;
+    case NV_PTIMER_DENOMINATOR:
+        r = d->ptimer.denominator;
+        break;
+    case NV_PTIMER_TIME_0:
+        r = (ptimer_get_clock(d) & 0x7ffffff) << 5;
+        break;
+    case NV_PTIMER_TIME_1:
+        r = (ptimer_get_clock(d) >> 27) & 0x1fffffff;
+        break;
+    default:
+        break;
+    }
+
+    reg_log_read(NV_PTIMER, addr, r);
+    return r;
+}
+
+void ptimer_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size)
+{
+    NV2AState *d = opaque;
+
+    reg_log_write(NV_PTIMER, addr, val);
+
+    switch (addr) {
+    case NV_PTIMER_INTR_0:
+        d->ptimer.pending_interrupts &= ~val;
+        update_irq(d);
+        break;
+    case NV_PTIMER_INTR_EN_0:
+        d->ptimer.enabled_interrupts = val;
+        update_irq(d);
+        break;
+    case NV_PTIMER_DENOMINATOR:
+        d->ptimer.denominator = val;
+        break;
+    case NV_PTIMER_NUMERATOR:
+        d->ptimer.numerator = val;
+        break;
+    case NV_PTIMER_ALARM_0:
+        d->ptimer.alarm_time = val;
+        break;
+    default:
+        break;
+    }
+}
--- a/hw/xbox/nv2a/nv2a_pvideo.c
+++ b/hw/xbox/nv2a/nv2a_pvideo.c
@ -0,0 +1,73 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018 Matt Borgerson
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+static void pvideo_vga_invalidate(NV2AState *d)
+{
+    int y1 = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT],
+                      NV_PVIDEO_POINT_OUT_Y);
+    int y2 = y1 + GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT],
+                           NV_PVIDEO_SIZE_OUT_HEIGHT);
+    NV2A_DPRINTF("pvideo_vga_invalidate %d %d\n", y1, y2);
+    vga_invalidate_scanlines(&d->vga, y1, y2);
+}
+
+uint64_t pvideo_read(void *opaque,
+                            hwaddr addr, unsigned int size)
+{
+    NV2AState *d = opaque;
+
+    uint64_t r = 0;
+    switch (addr) {
+    case NV_PVIDEO_STOP:
+        r = 0;
+        break;
+    default:
+        r = d->pvideo.regs[addr];
+        break;
+    }
+
+    reg_log_read(NV_PVIDEO, addr, r);
+    return r;
+}
+
+void pvideo_write(void *opaque, hwaddr addr,
+                         uint64_t val, unsigned int size)
+{
+    NV2AState *d = opaque;
+
+    reg_log_write(NV_PVIDEO, addr, val);
+
+    switch (addr) {
+    case NV_PVIDEO_BUFFER:
+        d->pvideo.regs[addr] = val;
+        // d->vga.enable_overlay = true;
+        pvideo_vga_invalidate(d);
+        break;
+    case NV_PVIDEO_STOP:
+        d->pvideo.regs[NV_PVIDEO_BUFFER] = 0;
+        // d->vga.enable_overlay = false;
+        pvideo_vga_invalidate(d);
+        break;
+    default:
+        d->pvideo.regs[addr] = val;
+        break;
+    }
+}
--- a/hw/xbox/nv2a/nv2a_shaders.c
+++ b/hw/xbox/nv2a/nv2a_shaders.c
@ -18,10 +18,56 @@
 * along with this program; if not, see <http://www.gnu.org/licenses/>.
 */

+#include "qemu/osdep.h"
 #include "qemu-common.h"
-#include "hw/xbox/nv2a_debug.h"
-#include "hw/xbox/nv2a_shaders_common.h"
-#include "hw/xbox/nv2a_shaders.h"
+#include "nv2a_debug.h"
+#include "nv2a_shaders_common.h"
+#include "nv2a_shaders.h"
+
+void qstring_append_fmt(QString *qstring, const char *fmt, ...)
+{
+    va_list ap;
+    va_start(ap, fmt);
+    qstring_append_va(qstring, fmt, ap);
+    va_end(ap);
+}
+
+QString *qstring_from_fmt(const char *fmt, ...)
+{
+    QString *ret = qstring_new();
+    va_list ap;
+    va_start(ap, fmt);
+    qstring_append_va(ret, fmt, ap);
+    va_end(ap);
+
+    return ret;
+}
+
+void qstring_append_va(QString *qstring, const char *fmt, va_list va)
+{
+    char scratch[256];
+
+    va_list ap;
+    va_copy(ap, va);
+    const int len = vsnprintf(scratch, sizeof(scratch), fmt, ap);
+    va_end(ap);
+
+    if (len == 0) {
+        return;
+    } else if (len < sizeof(scratch)) {
+        qstring_append(qstring, scratch);
+        return;
+    }
+
+    /* overflowed out scratch buffer, alloc and try again */
+    char *buf = g_malloc(len + 1);
+    va_copy(ap, va);
+    vsnprintf(buf, len + 1, fmt, ap);
+    va_end(ap);
+
+    qstring_append(qstring, buf);
+    g_free(buf);
+}

 static QString* generate_geometry_shader(
                                      enum ShaderPolygonMode polygon_front_mode,
--- a/hw/xbox/nv2a/nv2a_shaders.h
+++ b/hw/xbox/nv2a/nv2a_shaders.h
--- a/hw/xbox/nv2a/nv2a_shaders_common.h
+++ b/hw/xbox/nv2a/nv2a_shaders_common.h
@ -34,4 +34,9 @@
                           "  vec4 T3;\n" \
                           "};\n"

+
+void qstring_append_fmt(QString *qstring, const char *fmt, ...);
+QString *qstring_from_fmt(const char *fmt, ...);
+void qstring_append_va(QString *qstring, const char *fmt, va_list va);
+
 #endif
--- a/hw/xbox/nv2a/nv2a_stubs.c
+++ b/hw/xbox/nv2a/nv2a_stubs.c
@ -0,0 +1,118 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018 Matt Borgerson
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+uint64_t prma_read(void *opaque,
+                                  hwaddr addr, unsigned int size)
+{
+    reg_log_read(NV_PRMA, addr, 0);
+    return 0;
+}
+void prma_write(void *opaque, hwaddr addr,
+                               uint64_t val, unsigned int size)
+{
+    reg_log_write(NV_PRMA, addr, val);
+}
+
+uint64_t pcounter_read(void *opaque,
+                                  hwaddr addr, unsigned int size)
+{
+    reg_log_read(NV_PCOUNTER, addr, 0);
+    return 0;
+}
+void pcounter_write(void *opaque, hwaddr addr,
+                               uint64_t val, unsigned int size)
+{
+    reg_log_write(NV_PCOUNTER, addr, val);
+}
+
+uint64_t pvpe_read(void *opaque,
+                                  hwaddr addr, unsigned int size)
+{
+    reg_log_read(NV_PVPE, addr, 0);
+    return 0;
+}
+void pvpe_write(void *opaque, hwaddr addr,
+                               uint64_t val, unsigned int size)
+{
+    reg_log_write(NV_PVPE, addr, val);
+}
+
+uint64_t ptv_read(void *opaque,
+                                  hwaddr addr, unsigned int size)
+{
+    reg_log_read(NV_PTV, addr, 0);
+    return 0;
+}
+void ptv_write(void *opaque, hwaddr addr,
+                               uint64_t val, unsigned int size)
+{
+    reg_log_write(NV_PTV, addr, val);
+}
+
+uint64_t prmfb_read(void *opaque,
+                                  hwaddr addr, unsigned int size)
+{
+    reg_log_read(NV_PRMFB, addr, 0);
+    return 0;
+}
+void prmfb_write(void *opaque, hwaddr addr,
+                               uint64_t val, unsigned int size)
+{
+    reg_log_write(NV_PRMFB, addr, val);
+}
+
+uint64_t prmdio_read(void *opaque,
+                                  hwaddr addr, unsigned int size)
+{
+    reg_log_read(NV_PRMDIO, addr, 0);
+    return 0;
+}
+void prmdio_write(void *opaque, hwaddr addr,
+                               uint64_t val, unsigned int size)
+{
+    reg_log_write(NV_PRMDIO, addr, val);
+}
+
+uint64_t pstraps_read(void *opaque,
+                                  hwaddr addr, unsigned int size)
+{
+    reg_log_read(NV_PSTRAPS, addr, 0);
+    return 0;
+}
+void pstraps_write(void *opaque, hwaddr addr,
+                               uint64_t val, unsigned int size)
+{
+    reg_log_write(NV_PSTRAPS, addr, val);
+}
+
+/* PRAMIN - RAMIN access */
+/*
+uint64_t pramin_read(void *opaque,
+                                 hwaddr addr, unsigned int size)
+{
+    NV2A_DPRINTF("nv2a PRAMIN: read [0x%" HWADDR_PRIx "] -> 0x%" HWADDR_PRIx "\n", addr, r);
+    return 0;
+}
+void pramin_write(void *opaque, hwaddr addr,
+                              uint64_t val, unsigned int size)
+{
+    NV2A_DPRINTF("nv2a PRAMIN: [0x%" HWADDR_PRIx "] = 0x%02llx\n", addr, val);
+}*/
--- a/hw/xbox/nv2a/nv2a_user.c
+++ b/hw/xbox/nv2a/nv2a_user.c
@ -0,0 +1,95 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018 Matt Borgerson
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* USER - PFIFO MMIO and DMA submission area */
+uint64_t user_read(void *opaque, hwaddr addr, unsigned int size)
+{
+    NV2AState *d = (NV2AState *)opaque;
+
+    unsigned int channel_id = addr >> 16;
+    assert(channel_id < NV2A_NUM_CHANNELS);
+
+    ChannelControl *control = &d->user.channel_control[channel_id];
+
+    uint32_t channel_modes = d->pfifo.regs[NV_PFIFO_MODE];
+
+    uint64_t r = 0;
+    if (channel_modes & (1 << channel_id)) {
+        /* DMA Mode */
+        switch (addr & 0xFFFF) {
+        case NV_USER_DMA_PUT:
+            r = control->dma_put;
+            break;
+        case NV_USER_DMA_GET:
+            r = control->dma_get;
+            break;
+        case NV_USER_REF:
+            r = control->ref;
+            break;
+        default:
+            break;
+        }
+    } else {
+        /* PIO Mode */
+        assert(false);
+    }
+
+    reg_log_read(NV_USER, addr, r);
+    return r;
+}
+
+void user_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size)
+{
+    NV2AState *d = (NV2AState *)opaque;
+
+    reg_log_write(NV_USER, addr, val);
+
+    unsigned int channel_id = addr >> 16;
+    assert(channel_id < NV2A_NUM_CHANNELS);
+
+    ChannelControl *control = &d->user.channel_control[channel_id];
+
+    uint32_t channel_modes = d->pfifo.regs[NV_PFIFO_MODE];
+    if (channel_modes & (1 << channel_id)) {
+        /* DMA Mode */
+        switch (addr & 0xFFFF) {
+        case NV_USER_DMA_PUT:
+            control->dma_put = val;
+
+            if (d->pfifo.cache1.push_enabled) {
+                pfifo_run_pusher(d);
+            }
+            break;
+        case NV_USER_DMA_GET:
+            control->dma_get = val;
+            break;
+        case NV_USER_REF:
+            control->ref = val;
+            break;
+        default:
+            break;
+        }
+    } else {
+        /* PIO Mode */
+        assert(false);
+    }
+
+}
--- a/hw/xbox/nv2a/nv2a_vsh.c
+++ b/hw/xbox/nv2a/nv2a_vsh.c
@ -25,13 +25,15 @@
 * along with this program; if not, see <http://www.gnu.org/licenses/>.
 */

+#include "qemu/osdep.h"
+
 #include <stdio.h>
 #include <string.h>
 #include <stdbool.h>
 #include <assert.h>

-#include "hw/xbox/nv2a_shaders_common.h"
-#include "hw/xbox/nv2a_vsh.h"
+#include "nv2a_shaders_common.h"
+#include "nv2a_vsh.h"

 #define VSH_D3DSCM_CORRECTION 96

--- a/hw/xbox/nv2a/nv2a_vsh.h
+++ b/hw/xbox/nv2a/nv2a_vsh.h
--- a/hw/xbox/nv2a/swizzle.c
+++ b/hw/xbox/nv2a/swizzle.c
@ -26,7 +26,7 @@
 #include <assert.h>
 #include "qemu/osdep.h"

-#include "hw/xbox/swizzle.h"
+#include "swizzle.h"

 /* This should be pretty straightforward.
 * It creates a bit pattern like ..zyxzyxzyx from ..xxx, ..yyy and ..zzz
--- a/hw/xbox/nv2a/swizzle.h
+++ b/hw/xbox/nv2a/swizzle.h
--- a/hw/xbox/nv2a/xxhash.c
+++ b/hw/xbox/nv2a/xxhash.c
--- a/hw/xbox/nv2a/xxhash.h
+++ b/hw/xbox/nv2a/xxhash.h
@ -0,0 +1,328 @@
+/*
+   xxHash - Extremely Fast Hash algorithm
+   Header File
+   Copyright (C) 2012-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+/* Notice extracted from xxHash homepage :
+
+xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
+It also successfully passes all tests from the SMHasher suite.
+
+Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
+
+Name            Speed       Q.Score   Author
+xxHash          5.4 GB/s     10
+CrapWow         3.2 GB/s      2       Andrew
+MumurHash 3a    2.7 GB/s     10       Austin Appleby
+SpookyHash      2.0 GB/s     10       Bob Jenkins
+SBox            1.4 GB/s      9       Bret Mulvey
+Lookup3         1.2 GB/s      9       Bob Jenkins
+SuperFastHash   1.2 GB/s      1       Paul Hsieh
+CityHash64      1.05 GB/s    10       Pike & Alakuijala
+FNV             0.55 GB/s     5       Fowler, Noll, Vo
+CRC32           0.43 GB/s     9
+MD5-32          0.33 GB/s    10       Ronald L. Rivest
+SHA1-32         0.28 GB/s    10
+
+Q.Score is a measure of quality of the hash function.
+It depends on successfully passing SMHasher test set.
+10 is a perfect score.
+
+A 64-bit version, named XXH64, is available since r35.
+It offers much better speed, but for 64-bit applications only.
+Name     Speed on 64 bits    Speed on 32 bits
+XXH64       13.8 GB/s            1.9 GB/s
+XXH32        6.8 GB/s            6.0 GB/s
+*/
+
+#ifndef XXHASH_H_5627135585666179
+#define XXHASH_H_5627135585666179 1
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* ****************************
+*  Definitions
+******************************/
+#include <stddef.h>   /* size_t */
+typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
+
+
+/* ****************************
+ *  API modifier
+ ******************************/
+/** XXH_INLINE_ALL (and XXH_PRIVATE_API)
+ *  This is useful to include xxhash functions in `static` mode
+ *  in order to inline them, and remove their symbol from the public list.
+ *  Inlining can offer dramatic performance improvement on small keys.
+ *  Methodology :
+ *     #define XXH_INLINE_ALL
+ *     #include "xxhash.h"
+ * `xxhash.c` is automatically included.
+ *  It's not useful to compile and link it as a separate module.
+ */
+#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
+#  ifndef XXH_STATIC_LINKING_ONLY
+#    define XXH_STATIC_LINKING_ONLY
+#  endif
+#  if defined(__GNUC__)
+#    define XXH_PUBLIC_API static __inline __attribute__((unused))
+#  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#    define XXH_PUBLIC_API static inline
+#  elif defined(_MSC_VER)
+#    define XXH_PUBLIC_API static __inline
+#  else
+     /* this version may generate warnings for unused static functions */
+#    define XXH_PUBLIC_API static
+#  endif
+#else
+#  define XXH_PUBLIC_API   /* do nothing */
+#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
+
+/*! XXH_NAMESPACE, aka Namespace Emulation :
+ *
+ * If you want to include _and expose_ xxHash functions from within your own library,
+ * but also want to avoid symbol collisions with other libraries which may also include xxHash,
+ *
+ * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
+ * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values).
+ *
+ * Note that no change is required within the calling program as long as it includes `xxhash.h` :
+ * regular symbol name will be automatically translated by this header.
+ */
+#ifdef XXH_NAMESPACE
+#  define XXH_CAT(A,B) A##B
+#  define XXH_NAME2(A,B) XXH_CAT(A,B)
+#  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
+#  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
+#  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
+#  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
+#  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
+#  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
+#  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
+#  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
+#  define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
+#  define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
+#  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
+#  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
+#  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
+#  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
+#  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
+#  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
+#  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
+#  define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
+#  define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
+#endif
+
+
+/* *************************************
+*  Version
+***************************************/
+#define XXH_VERSION_MAJOR    0
+#define XXH_VERSION_MINOR    6
+#define XXH_VERSION_RELEASE  5
+#define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
+XXH_PUBLIC_API unsigned XXH_versionNumber (void);
+
+
+/*-**********************************************************************
+*  32-bit hash
+************************************************************************/
+typedef unsigned int XXH32_hash_t;
+
+/*! XXH32() :
+    Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input".
+    The memory between input & input+length must be valid (allocated and read-accessible).
+    "seed" can be used to alter the result predictably.
+    Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */
+XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
+
+/*======   Streaming   ======*/
+typedef struct XXH32_state_s XXH32_state_t;   /* incomplete type */
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
+XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
+
+XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned int seed);
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
+
+/*
+ * Streaming functions generate the xxHash of an input provided in multiple segments.
+ * Note that, for small input, they are slower than single-call functions, due to state management.
+ * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
+ *
+ * XXH state must first be allocated, using XXH*_createState() .
+ *
+ * Start a new hash by initializing state with a seed, using XXH*_reset().
+ *
+ * Then, feed the hash state by calling XXH*_update() as many times as necessary.
+ * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
+ *
+ * Finally, a hash value can be produced anytime, by using XXH*_digest().
+ * This function returns the nn-bits hash as an int or long long.
+ *
+ * It's still possible to continue inserting input into the hash state after a digest,
+ * and generate some new hashes later on, by calling again XXH*_digest().
+ *
+ * When done, free XXH state space if it was allocated dynamically.
+ */
+
+/*======   Canonical representation   ======*/
+
+typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
+
+/* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
+ * The canonical representation uses human-readable write convention, aka big-endian (large digits first).
+ * These functions allow transformation of hash result into and from its canonical format.
+ * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
+ */
+
+
+#ifndef XXH_NO_LONG_LONG
+/*-**********************************************************************
+*  64-bit hash
+************************************************************************/
+typedef unsigned long long XXH64_hash_t;
+
+/*! XXH64() :
+    Calculate the 64-bit hash of sequence of length "len" stored at memory address "input".
+    "seed" can be used to alter the result predictably.
+    This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark).
+*/
+XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
+
+/*======   Streaming   ======*/
+typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
+XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, unsigned long long seed);
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
+
+/*======   Canonical representation   ======*/
+typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
+#endif  /* XXH_NO_LONG_LONG */
+
+
+
+#ifdef XXH_STATIC_LINKING_ONLY
+
+/* ================================================================================================
+   This section contains declarations which are not guaranteed to remain stable.
+   They may change in future versions, becoming incompatible with a different version of the library.
+   These declarations should only be used with static linking.
+   Never use them in association with dynamic linking !
+=================================================================================================== */
+
+/* These definitions are only present to allow
+ * static allocation of XXH state, on stack or in a struct for example.
+ * Never **ever** use members directly. */
+
+#if !defined (__VMS) \
+  && (defined (__cplusplus) \
+  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+#   include <stdint.h>
+
+struct XXH32_state_s {
+   uint32_t total_len_32;
+   uint32_t large_len;
+   uint32_t v1;
+   uint32_t v2;
+   uint32_t v3;
+   uint32_t v4;
+   uint32_t mem32[4];
+   uint32_t memsize;
+   uint32_t reserved;   /* never read nor write, might be removed in a future version */
+};   /* typedef'd to XXH32_state_t */
+
+struct XXH64_state_s {
+   uint64_t total_len;
+   uint64_t v1;
+   uint64_t v2;
+   uint64_t v3;
+   uint64_t v4;
+   uint64_t mem64[4];
+   uint32_t memsize;
+   uint32_t reserved[2];          /* never read nor write, might be removed in a future version */
+};   /* typedef'd to XXH64_state_t */
+
+# else
+
+struct XXH32_state_s {
+   unsigned total_len_32;
+   unsigned large_len;
+   unsigned v1;
+   unsigned v2;
+   unsigned v3;
+   unsigned v4;
+   unsigned mem32[4];
+   unsigned memsize;
+   unsigned reserved;   /* never read nor write, might be removed in a future version */
+};   /* typedef'd to XXH32_state_t */
+
+#   ifndef XXH_NO_LONG_LONG  /* remove 64-bit support */
+struct XXH64_state_s {
+   unsigned long long total_len;
+   unsigned long long v1;
+   unsigned long long v2;
+   unsigned long long v3;
+   unsigned long long v4;
+   unsigned long long mem64[4];
+   unsigned memsize;
+   unsigned reserved[2];     /* never read nor write, might be removed in a future version */
+};   /* typedef'd to XXH64_state_t */
+#    endif
+
+# endif
+
+
+#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
+#  include "xxhash.c"   /* include xxhash function bodies as `static`, for inlining */
+#endif
+
+#endif /* XXH_STATIC_LINKING_ONLY */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* XXHASH_H_5627135585666179 */