From 584dbda1d63833dabf6d53e0bfc23a685f45d2b0 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Tue, 26 Jun 2018 14:40:01 -0700 Subject: [PATCH] Fix and refactor NV2A code This patch does the following: - Fixes up things for Qemu 2.x compat - Factors out the high-level NV2A blocks into separate files - Updates g-lru-cache for latest glib compat (github.com/chergert/glrucache@c10af24) - Changes texture hashing algorithm from FNV to xxH v0.6.5 --- hw/xbox/g-lru-cache.c | 338 -- hw/xbox/nv2a/Makefile.objs | 32 + hw/xbox/nv2a/g-lru-cache.c | 372 ++ hw/xbox/{ => nv2a}/g-lru-cache.h | 53 +- hw/xbox/nv2a/nv2a.c | 568 ++ hw/xbox/nv2a/nv2a.h | 446 ++ hw/xbox/{ => nv2a}/nv2a_debug.c | 5 +- hw/xbox/{ => nv2a}/nv2a_debug.h | 0 hw/xbox/{ => nv2a}/nv2a_int.h | 0 hw/xbox/nv2a/nv2a_pbus.c | 59 + hw/xbox/nv2a/nv2a_pcrtc.c | 72 + hw/xbox/nv2a/nv2a_pfb.c | 58 + hw/xbox/nv2a/nv2a_pfifo.c | 513 ++ hw/xbox/{nv2a.c => nv2a/nv2a_pgraph.c} | 6089 ++++++++-------------- hw/xbox/nv2a/nv2a_pmc.c | 71 + hw/xbox/nv2a/nv2a_pramdac.c | 87 + hw/xbox/nv2a/nv2a_prmcio.c | 55 + hw/xbox/{nv2a.h => nv2a/nv2a_prmvio.c} | 23 +- hw/xbox/{ => nv2a}/nv2a_psh.c | 6 +- hw/xbox/{ => nv2a}/nv2a_psh.h | 0 hw/xbox/nv2a/nv2a_ptimer.c | 89 + hw/xbox/nv2a/nv2a_pvideo.c | 73 + hw/xbox/{ => nv2a}/nv2a_shaders.c | 52 +- hw/xbox/{ => nv2a}/nv2a_shaders.h | 0 hw/xbox/{ => nv2a}/nv2a_shaders_common.h | 5 + hw/xbox/nv2a/nv2a_stubs.c | 118 + hw/xbox/nv2a/nv2a_user.c | 95 + hw/xbox/{ => nv2a}/nv2a_vsh.c | 6 +- hw/xbox/{ => nv2a}/nv2a_vsh.h | 0 hw/xbox/{ => nv2a}/swizzle.c | 2 +- hw/xbox/{ => nv2a}/swizzle.h | 0 hw/xbox/nv2a/xxhash.c | 1029 ++++ hw/xbox/nv2a/xxhash.h | 328 ++ 33 files changed, 6247 insertions(+), 4397 deletions(-) delete mode 100644 hw/xbox/g-lru-cache.c create mode 100644 hw/xbox/nv2a/Makefile.objs create mode 100644 hw/xbox/nv2a/g-lru-cache.c rename hw/xbox/{ => nv2a}/g-lru-cache.h (59%) create mode 100644 hw/xbox/nv2a/nv2a.c create mode 100644 hw/xbox/nv2a/nv2a.h rename hw/xbox/{ => nv2a}/nv2a_debug.c (97%) rename hw/xbox/{ => nv2a}/nv2a_debug.h (100%) rename hw/xbox/{ => nv2a}/nv2a_int.h (100%) create mode 100644 hw/xbox/nv2a/nv2a_pbus.c create mode 100644 hw/xbox/nv2a/nv2a_pcrtc.c create mode 100644 hw/xbox/nv2a/nv2a_pfb.c create mode 100644 hw/xbox/nv2a/nv2a_pfifo.c rename hw/xbox/{nv2a.c => nv2a/nv2a_pgraph.c} (69%) create mode 100644 hw/xbox/nv2a/nv2a_pmc.c create mode 100644 hw/xbox/nv2a/nv2a_pramdac.c create mode 100644 hw/xbox/nv2a/nv2a_prmcio.c rename hw/xbox/{nv2a.h => nv2a/nv2a_prmvio.c} (54%) rename hw/xbox/{ => nv2a}/nv2a_psh.c (99%) rename hw/xbox/{ => nv2a}/nv2a_psh.h (100%) create mode 100644 hw/xbox/nv2a/nv2a_ptimer.c create mode 100644 hw/xbox/nv2a/nv2a_pvideo.c rename hw/xbox/{ => nv2a}/nv2a_shaders.c (97%) rename hw/xbox/{ => nv2a}/nv2a_shaders.h (100%) rename hw/xbox/{ => nv2a}/nv2a_shaders_common.h (87%) create mode 100644 hw/xbox/nv2a/nv2a_stubs.c create mode 100644 hw/xbox/nv2a/nv2a_user.c rename hw/xbox/{ => nv2a}/nv2a_vsh.c (99%) rename hw/xbox/{ => nv2a}/nv2a_vsh.h (100%) rename hw/xbox/{ => nv2a}/swizzle.c (99%) rename hw/xbox/{ => nv2a}/swizzle.h (100%) create mode 100644 hw/xbox/nv2a/xxhash.c create mode 100644 hw/xbox/nv2a/xxhash.h diff --git a/hw/xbox/g-lru-cache.c b/hw/xbox/g-lru-cache.c deleted file mode 100644 index 95b87b4356..0000000000 --- a/hw/xbox/g-lru-cache.c +++ /dev/null @@ -1,338 +0,0 @@ -/* g-lru-cache.c - * - * Copyright (C) 2009 - Christian Hergert - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ - -/* - * Ideally, you want to use fast_get. This is because we are using a - * GStaticRWLock which is indeed slower than a mutex if you have lots of writer - * acquisitions. This doesn't make it a true LRU, though, as the oldest - * retrieval from strorage is the first item evicted. - */ - -#include "g-lru-cache.h" - -// #define DEBUG - -#define LRU_CACHE_PRIVATE(object) \ - (G_TYPE_INSTANCE_GET_PRIVATE((object), \ - G_TYPE_LRU_CACHE, \ - GLruCachePrivate)) - -struct _GLruCachePrivate -{ - GStaticRWLock rw_lock; - guint max_size; - gboolean fast_get; - - GHashTable *hash_table; - GEqualFunc key_equal_func; - GCopyFunc key_copy_func; - GList *newest; - GList *oldest; - - GLookupFunc retrieve_func; - - gpointer user_data; - GDestroyNotify user_destroy_func; -}; - -G_DEFINE_TYPE (GLruCache, g_lru_cache, G_TYPE_OBJECT); - -static void -g_lru_cache_finalize (GObject *object) -{ - GLruCachePrivate *priv = LRU_CACHE_PRIVATE (object); - - if (priv->user_data && priv->user_destroy_func) - priv->user_destroy_func (priv->user_data); - - priv->user_data = NULL; - priv->user_destroy_func = NULL; - - g_hash_table_destroy (priv->hash_table); - priv->hash_table = NULL; - - g_list_free (priv->newest); - priv->newest = NULL; - priv->oldest = NULL; - - G_OBJECT_CLASS (g_lru_cache_parent_class)->finalize (object); -} - -static void -g_lru_cache_class_init (GLruCacheClass *klass) -{ - GObjectClass *object_class = G_OBJECT_CLASS (klass); - - object_class->finalize = g_lru_cache_finalize; - - g_type_class_add_private (object_class, sizeof (GLruCachePrivate)); -} - -static void -g_lru_cache_init (GLruCache *self) -{ - self->priv = LRU_CACHE_PRIVATE (self); - - self->priv->max_size = 1024; - self->priv->fast_get = FALSE; - g_static_rw_lock_init (&self->priv->rw_lock); -} - -static void -g_lru_cache_evict_n_oldest_locked (GLruCache *self, gint n) -{ - GList *victim; - gint i; - - for (i = 0; i < n; i++) - { - victim = self->priv->oldest; - - if (victim == NULL) - return; - - if (victim->prev) - victim->prev->next = NULL; - - self->priv->oldest = victim->prev; - g_hash_table_remove (self->priv->hash_table, victim->data); - - if (self->priv->newest == victim) - self->priv->newest = NULL; - - g_list_free1 (victim); /* victim->data is owned by hashtable */ - } - -#ifdef DEBUG - g_assert (g_hash_table_size (self->priv->hash_table) == g_list_length (self->priv->newest)); -#endif -} - -GLruCache* -g_lru_cache_new (GHashFunc hash_func, - GEqualFunc key_equal_func, - GCopyFunc key_copy_func, - GLookupFunc retrieve_func, - GDestroyNotify key_destroy_func, - GDestroyNotify value_destroy_func, - gpointer user_data, - GDestroyNotify user_destroy_func) -{ - GLruCache *self = g_object_new (G_TYPE_LRU_CACHE, NULL); - - self->priv->hash_table = g_hash_table_new_full (hash_func, - key_equal_func, - key_destroy_func, - value_destroy_func); - - self->priv->key_equal_func = key_equal_func; - self->priv->key_copy_func = key_copy_func; - self->priv->retrieve_func = retrieve_func; - self->priv->user_data = user_data; - self->priv->user_destroy_func = user_destroy_func; - - return self; -} - -void -g_lru_cache_set_max_size (GLruCache *self, guint max_size) -{ - g_return_if_fail (G_IS_LRU_CACHE (self)); - - guint old_max_size = self->priv->max_size; - - g_static_rw_lock_writer_lock (&(self->priv->rw_lock)); - - self->priv->max_size = max_size; - - if (old_max_size > max_size) - g_lru_cache_evict_n_oldest_locked (self, old_max_size - max_size); - - g_static_rw_lock_writer_unlock (&(self->priv->rw_lock)); -} - -guint -g_lru_cache_get_max_size (GLruCache *self) -{ - g_return_val_if_fail (G_IS_LRU_CACHE (self), -1); - return self->priv->max_size; -} - -guint -g_lru_cache_get_size (GLruCache *self) -{ - g_return_val_if_fail (G_IS_LRU_CACHE (self), -1); - return g_hash_table_size (self->priv->hash_table); -} - -gpointer -g_lru_cache_get (GLruCache *self, gpointer key) -{ - g_return_val_if_fail (G_IS_LRU_CACHE (self), NULL); - - gpointer value; - - g_static_rw_lock_reader_lock (&(self->priv->rw_lock)); - - value = g_hash_table_lookup (self->priv->hash_table, key); - -#ifdef DEBUG - if (value) - g_debug ("Cache Hit!"); - else - g_debug ("Cache miss"); -#endif - - g_static_rw_lock_reader_unlock (&(self->priv->rw_lock)); - - if (!value) - { - g_static_rw_lock_writer_lock (&(self->priv->rw_lock)); - - if (!g_hash_table_lookup (self->priv->hash_table, key)) - { - if (g_hash_table_size (self->priv->hash_table) >= self->priv->max_size) -#ifdef DEBUG - { - g_debug ("We are at capacity, must evict oldest"); -#endif - g_lru_cache_evict_n_oldest_locked (self, 1); -#ifdef DEBUG - } - - g_debug ("Retrieving value from external resource"); -#endif - - value = self->priv->retrieve_func (key, self->priv->user_data); - - if (self->priv->key_copy_func) - g_hash_table_insert (self->priv->hash_table, - self->priv->key_copy_func (key, self->priv->user_data), - value); - else - g_hash_table_insert (self->priv->hash_table, key, value); - - self->priv->newest = g_list_prepend (self->priv->newest, key); - - if (self->priv->oldest == NULL) - self->priv->oldest = self->priv->newest; - } -#ifdef DEBUG - else g_debug ("Lost storage race with another thread"); -#endif - - g_static_rw_lock_writer_unlock (&(self->priv->rw_lock)); - } - - /* fast_get means that we do not reposition the item to the head - * of the list. it essentially makes the lru, a lru from storage, - * not lru to user. - */ - - else if (!self->priv->fast_get && - !self->priv->key_equal_func (key, self->priv->newest->data)) - { -#ifdef DEBUG - g_debug ("Making item most recent"); -#endif - - g_static_rw_lock_writer_lock (&(self->priv->rw_lock)); - - GList *list = self->priv->newest; - GList *tmp; - GEqualFunc equal = self->priv->key_equal_func; - - for (tmp = list; tmp; tmp = tmp->next) - { - if (equal (key, tmp->data)) - { - GList *tmp1 = g_list_remove_link (list, tmp); - self->priv->newest = g_list_prepend (tmp1, tmp); - break; - } - } - - g_static_rw_lock_writer_unlock (&(self->priv->rw_lock)); - } - - return value; -} - -void -g_lru_cache_evict (GLruCache *self, gpointer key) -{ - g_return_if_fail (G_IS_LRU_CACHE (self)); - - GEqualFunc equal = self->priv->key_equal_func; - GList *list = NULL; - - g_static_rw_lock_writer_lock (&(self->priv->rw_lock)); - - if (equal (key, self->priv->oldest)) - { - g_lru_cache_evict_n_oldest_locked (self, 1); - } - else - { - for (list = self->priv->newest; list; list = list->next) - { - /* key, list->data is owned by hashtable */ - if (equal (key, list->data)) - { - self->priv->newest = g_list_remove_link (self->priv->newest, list); - g_list_free (list); - break; - } - } - g_hash_table_remove (self->priv->hash_table, key); - } - - g_static_rw_lock_writer_unlock (&(self->priv->rw_lock)); -} - -void -g_lru_cache_clear (GLruCache *self) -{ - g_return_if_fail (G_IS_LRU_CACHE (self)); - - g_static_rw_lock_writer_lock (&(self->priv->rw_lock)); - - g_hash_table_remove_all (self->priv->hash_table); - g_list_free (self->priv->newest); - - self->priv->oldest = NULL; - self->priv->newest = NULL; - - g_static_rw_lock_writer_unlock (&(self->priv->rw_lock)); -} - -void -g_lru_cache_set_fast_get (GLruCache *self, gboolean fast_get) -{ - g_return_if_fail (G_IS_LRU_CACHE (self)); - self->priv->fast_get = fast_get; -} - -gboolean -g_lru_cache_get_fast_get (GLruCache *self) -{ - g_return_val_if_fail (G_IS_LRU_CACHE (self), FALSE); - return self->priv->fast_get; -} - diff --git a/hw/xbox/nv2a/Makefile.objs b/hw/xbox/nv2a/Makefile.objs new file mode 100644 index 0000000000..1a42f60b47 --- /dev/null +++ b/hw/xbox/nv2a/Makefile.objs @@ -0,0 +1,32 @@ +obj-y += g-lru-cache.o +obj-y += swizzle.o + +obj-y += nv2a.o +obj-y += nv2a_debug.o +obj-y += nv2a_shaders.o + +### +# These are just #included into nv2a.c for build time savings +# +# obj-y += nv2a_pbus.o +# obj-y += nv2a_pcrtc.o +# obj-y += nv2a_pfb.o +# obj-y += nv2a_pfifo.o +# obj-y += nv2a_pgraph.o +# obj-y += nv2a_pmc.o +# obj-y += nv2a_pramdac.o +# obj-y += nv2a_prmcio.o +# obj-y += nv2a_prmvio.o +# obj-y += nv2a_ptimer.o +# obj-y += nv2a_pvideo.o +# obj-y += nv2a_user.o +# obj-y += nv2a_stubs.o +### + +obj-y += nv2a_psh.o +obj-y += nv2a_vsh.o + +obj-y += gl/ + +obj-y += xxhash.o +xxhash.o-cflags := -O3 -DXXH_FORCE_MEMORY_ACCESS=2 diff --git a/hw/xbox/nv2a/g-lru-cache.c b/hw/xbox/nv2a/g-lru-cache.c new file mode 100644 index 0000000000..b5075f3b4f --- /dev/null +++ b/hw/xbox/nv2a/g-lru-cache.c @@ -0,0 +1,372 @@ +/* g-lru-cache.c + * + * Copyright (C) 2009 - Christian Hergert + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * Ideally, you want to use fast_get. This is because we are using a + * GStaticRWLock which is indeed slower than a mutex if you have lots of writer + * acquisitions. This doesn't make it a true LRU, though, as the oldest + * retrieval from strorage is the first item evicted. + */ + +#include "g-lru-cache.h" + +#ifndef DEBUG +#define DEBUG 0 +#endif + +#define LRU_CACHE_PRIVATE(object) \ + (G_TYPE_INSTANCE_GET_PRIVATE((object), \ + G_TYPE_LRU_CACHE, \ + GLruCachePrivate)) + +struct _GLruCachePrivate +{ + GRWLock rw_lock; + guint max_size; + gboolean fast_get; + + GHashTable *hash_table; + GEqualFunc key_equal_func; + GCopyFunc key_copy_func; + GList *newest; + GList *oldest; + + GLookupFunc retrieve_func; + + gpointer user_data; + GDestroyNotify user_destroy_func; +}; + +G_DEFINE_TYPE (GLruCache, g_lru_cache, G_TYPE_OBJECT); + +static void +g_lru_cache_finalize (GObject *object) +{ + GLruCachePrivate *priv = LRU_CACHE_PRIVATE (object); + + if (priv->user_data && priv->user_destroy_func) + priv->user_destroy_func (priv->user_data); + + priv->user_data = NULL; + priv->user_destroy_func = NULL; + + g_hash_table_destroy (priv->hash_table); + priv->hash_table = NULL; + + g_list_free (priv->newest); + priv->newest = NULL; + priv->oldest = NULL; + + G_OBJECT_CLASS (g_lru_cache_parent_class)->finalize (object); +} + +static void +g_lru_cache_class_init (GLruCacheClass *klass) +{ + GObjectClass *object_class = G_OBJECT_CLASS (klass); + + object_class->finalize = g_lru_cache_finalize; + + g_type_class_add_private (object_class, sizeof (GLruCachePrivate)); +} + +static void +g_lru_cache_init (GLruCache *self) +{ + self->priv = LRU_CACHE_PRIVATE (self); + + self->priv->max_size = 1024; + self->priv->fast_get = FALSE; + g_rw_lock_init (&self->priv->rw_lock); +} + +static void +g_lru_cache_evict_n_oldest_locked (GLruCache *self, gint n) +{ + GList *victim; + gint i; + + for (i = 0; i < n; i++) + { + victim = self->priv->oldest; + + if (victim == NULL) + return; + + if (victim->prev) + victim->prev->next = NULL; + + self->priv->oldest = victim->prev; + g_hash_table_remove (self->priv->hash_table, victim->data); + + if (self->priv->newest == victim) + self->priv->newest = NULL; + + g_list_free1 (victim); /* victim->data is owned by hashtable */ + } + +#if DEBUG + g_assert (g_hash_table_size (self->priv->hash_table) == g_list_length (self->priv->newest)); +#endif +} + +GLruCache* +g_lru_cache_new (GHashFunc key_hash_func, + GEqualFunc key_equal_func, + GLookupFunc retrieve_func, + gpointer user_data, + GDestroyNotify user_destroy_func) +{ + return g_lru_cache_new_full (0, + NULL, + NULL, + 0, + NULL, + NULL, + key_hash_func, + key_equal_func, + retrieve_func, + user_data, + user_destroy_func); +} + +GLruCache* +g_lru_cache_new_full (GType key_type, + GCopyFunc key_copy_func, + GDestroyNotify key_destroy_func, + GType value_type, + GCopyFunc value_copy_func, + GDestroyNotify value_destroy_func, + GHashFunc key_hash_func, + GEqualFunc key_equal_func, + GLookupFunc retrieve_func, + gpointer user_data, + GDestroyNotify user_destroy_func) +{ + GLruCache *self = g_object_new (G_TYPE_LRU_CACHE, NULL); + + self->priv->hash_table = g_hash_table_new_full (key_hash_func, + key_equal_func, + key_destroy_func, + value_destroy_func); + + self->priv->key_equal_func = key_equal_func; + self->priv->key_copy_func = key_copy_func; + self->priv->retrieve_func = retrieve_func; + self->priv->user_data = user_data; + self->priv->user_destroy_func = user_destroy_func; + + return self; +} + +void +g_lru_cache_set_max_size (GLruCache *self, guint max_size) +{ + g_return_if_fail (G_IS_LRU_CACHE (self)); + + guint old_max_size = self->priv->max_size; + + g_rw_lock_writer_lock (&(self->priv->rw_lock)); + + self->priv->max_size = max_size; + + if (old_max_size > max_size) + g_lru_cache_evict_n_oldest_locked (self, old_max_size - max_size); + + g_rw_lock_writer_unlock (&(self->priv->rw_lock)); +} + +guint +g_lru_cache_get_max_size (GLruCache *self) +{ + g_return_val_if_fail (G_IS_LRU_CACHE (self), -1); + return self->priv->max_size; +} + +guint +g_lru_cache_get_size (GLruCache *self) +{ + g_return_val_if_fail (G_IS_LRU_CACHE (self), -1); + return g_hash_table_size (self->priv->hash_table); +} + +gpointer +g_lru_cache_get (GLruCache *self, gpointer key, GError **error) +{ + g_return_val_if_fail (G_IS_LRU_CACHE (self), NULL); + + gpointer value; + GError *retrieve_error = NULL; + + g_rw_lock_reader_lock (&(self->priv->rw_lock)); + + value = g_hash_table_lookup (self->priv->hash_table, key); + +#if DEBUG + if (value) + g_debug ("Cache Hit!"); + else + g_debug ("Cache miss"); +#endif + + g_rw_lock_reader_unlock (&(self->priv->rw_lock)); + + if (!value) + { + g_rw_lock_writer_lock (&(self->priv->rw_lock)); + + if (!g_hash_table_lookup (self->priv->hash_table, key)) + { + if (g_hash_table_size (self->priv->hash_table) >= self->priv->max_size) +#if DEBUG + { + g_debug ("We are at capacity, must evict oldest"); +#endif + g_lru_cache_evict_n_oldest_locked (self, 1); +#if DEBUG + } + + g_debug ("Retrieving value from external resource"); +#endif + + value = self->priv->retrieve_func (key, + self->priv->user_data, + &retrieve_error); + + if (G_UNLIKELY (retrieve_error != NULL)) + { + g_propagate_error (error, retrieve_error); + return value; /* likely 'NULL', but we should be transparent */ + } + + if (self->priv->key_copy_func) + g_hash_table_insert (self->priv->hash_table, + self->priv->key_copy_func (key, self->priv->user_data), + value); + else + g_hash_table_insert (self->priv->hash_table, key, value); + + self->priv->newest = g_list_prepend (self->priv->newest, key); + + if (self->priv->oldest == NULL) + self->priv->oldest = self->priv->newest; + } +#if DEBUG + else g_debug ("Lost storage race with another thread"); +#endif + + g_rw_lock_writer_unlock (&(self->priv->rw_lock)); + } + + /* fast_get means that we do not reposition the item to the head + * of the list. it essentially makes the lru, a lru from storage, + * not lru to user. + */ + + else if (!self->priv->fast_get && + !self->priv->key_equal_func (key, self->priv->newest->data)) + { +#if DEBUG + g_debug ("Making item most recent"); +#endif + + g_rw_lock_writer_lock (&(self->priv->rw_lock)); + + GList *list = self->priv->newest; + GList *tmp; + GEqualFunc equal = self->priv->key_equal_func; + + for (tmp = list; tmp; tmp = tmp->next) + { + if (equal (key, tmp->data)) + { + GList *tmp1 = g_list_remove_link (list, tmp); + self->priv->newest = g_list_prepend (tmp1, tmp); + break; + } + } + + g_rw_lock_writer_unlock (&(self->priv->rw_lock)); + } + + return value; +} + +void +g_lru_cache_evict (GLruCache *self, gpointer key) +{ + g_return_if_fail (G_IS_LRU_CACHE (self)); + + GEqualFunc equal = self->priv->key_equal_func; + GList *list = NULL; + + g_rw_lock_writer_lock (&(self->priv->rw_lock)); + + if (equal (key, self->priv->oldest)) + { + g_lru_cache_evict_n_oldest_locked (self, 1); + } + else + { + g_hash_table_remove (self->priv->hash_table, key); + + for (list = self->priv->newest; list; list = list->next) + { + if (equal (key, list->data)) + { + self->priv->newest = g_list_remove_link (self->priv->newest, list); + g_list_free (list); + break; + } + } + } + + g_rw_lock_writer_unlock (&(self->priv->rw_lock)); +} + +void +g_lru_cache_clear (GLruCache *self) +{ + g_return_if_fail (G_IS_LRU_CACHE (self)); + + g_rw_lock_writer_lock (&(self->priv->rw_lock)); + + g_hash_table_remove_all (self->priv->hash_table); + g_list_free (self->priv->newest); + + self->priv->oldest = NULL; + self->priv->newest = NULL; + + g_rw_lock_writer_unlock (&(self->priv->rw_lock)); +} + +void +g_lru_cache_set_fast_get (GLruCache *self, gboolean fast_get) +{ + g_return_if_fail (G_IS_LRU_CACHE (self)); + self->priv->fast_get = fast_get; +} + +gboolean +g_lru_cache_get_fast_get (GLruCache *self) +{ + g_return_val_if_fail (G_IS_LRU_CACHE (self), FALSE); + return self->priv->fast_get; +} + diff --git a/hw/xbox/g-lru-cache.h b/hw/xbox/nv2a/g-lru-cache.h similarity index 59% rename from hw/xbox/g-lru-cache.h rename to hw/xbox/nv2a/g-lru-cache.h index f55b22ebab..096e631846 100644 --- a/hw/xbox/g-lru-cache.h +++ b/hw/xbox/nv2a/g-lru-cache.h @@ -20,46 +20,59 @@ #ifndef __G_LRU_CACHE_H__ #define __G_LRU_CACHE_H__ +#ifdef __cplusplus +extern "C" { +#endif + #include #include G_BEGIN_DECLS -#define G_TYPE_LRU_CACHE (g_lru_cache_get_type ()) -#define G_LRU_CACHE(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), G_TYPE_LRU_CACHE, GLruCache)) -#define G_LRU_CACHE_CONST(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), G_TYPE_LRU_CACHE, GLruCache const)) -#define G_LRU_CACHE_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), G_TYPE_LRU_CACHE, GLruCacheClass)) -#define G_IS_LRU_CACHE(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), G_TYPE_LRU_CACHE)) -#define G_IS_LRU_CACHE_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), G_TYPE_LRU_CACHE)) -#define G_LRU_CACHE_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), G_TYPE_LRU_CACHE, GLruCacheClass)) +#define G_TYPE_LRU_CACHE (g_lru_cache_get_type ()) +#define G_LRU_CACHE(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), G_TYPE_LRU_CACHE, GLruCache)) +#define G_LRU_CACHE_CONST(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), G_TYPE_LRU_CACHE, GLruCache const)) +#define G_LRU_CACHE_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), G_TYPE_LRU_CACHE, GLruCacheClass)) +#define G_IS_LRU_CACHE(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), G_TYPE_LRU_CACHE)) +#define G_IS_LRU_CACHE_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), G_TYPE_LRU_CACHE)) +#define G_LRU_CACHE_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS ((obj), G_TYPE_LRU_CACHE, GLruCacheClass)) #define G_LOOKUP_FUNC(func) ((GLookupFunc)func) -typedef struct _GLruCache GLruCache; -typedef struct _GLruCacheClass GLruCacheClass; -typedef struct _GLruCachePrivate GLruCachePrivate; +typedef struct _GLruCache GLruCache; +typedef struct _GLruCacheClass GLruCacheClass; +typedef struct _GLruCachePrivate GLruCachePrivate; -typedef gpointer (*GLookupFunc) (gpointer key, gpointer user_data); +typedef gpointer (*GLookupFunc) (gpointer key, gpointer user_data, GError **error); struct _GLruCache { - GObject parent; - - GLruCachePrivate *priv; + GObject parent; + + GLruCachePrivate *priv; }; struct _GLruCacheClass { - GObjectClass parent_class; + GObjectClass parent_class; }; GType g_lru_cache_get_type (void) G_GNUC_CONST; -GLruCache* g_lru_cache_new (GHashFunc hash_func, +GLruCache* g_lru_cache_new (GHashFunc key_hash_func, GEqualFunc key_equal_func, - GCopyFunc key_copy_func, GLookupFunc retrieve_func, + gpointer user_data, + GDestroyNotify user_destroy_func); + +GLruCache* g_lru_cache_new_full (GType key_type, + GCopyFunc key_copy_func, GDestroyNotify key_destroy_func, + GType value_type, + GCopyFunc value_copy_func, GDestroyNotify value_destroy_func, + GHashFunc key_hash_func, + GEqualFunc key_equal_func, + GLookupFunc retrieve_func, gpointer user_data, GDestroyNotify user_destroy_func); @@ -68,7 +81,7 @@ guint g_lru_cache_get_max_size (GLruCache *self); guint g_lru_cache_get_size (GLruCache *self); -gpointer g_lru_cache_get (GLruCache *self, gpointer key); +gpointer g_lru_cache_get (GLruCache *self, gpointer key, GError **error); void g_lru_cache_evict (GLruCache *self, gpointer key); void g_lru_cache_clear (GLruCache *self); @@ -77,4 +90,8 @@ void g_lru_cache_set_fast_get (GLruCache *self, gboolean fast_get); G_END_DECLS +#ifdef __cplusplus +} +#endif + #endif /* __G_LRU_CACHE_H__ */ diff --git a/hw/xbox/nv2a/nv2a.c b/hw/xbox/nv2a/nv2a.c new file mode 100644 index 0000000000..6feb587385 --- /dev/null +++ b/hw/xbox/nv2a/nv2a.c @@ -0,0 +1,568 @@ +/* + * QEMU Geforce NV2A implementation + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018 Matt Borgerson + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/error-report.h" +#include +#include "nv2a.h" +#include "hw/display/vga_regs.h" + +#ifdef __WINNT__ +// HACK: mingw-w64 doesn't provide ffs, for now we just shove it here +// TODO: decide on a better location +int ffs(register int valu) +{ + register int bit; + + if (valu == 0) + return 0; + + for (bit = 1; !(valu & 1); bit++) + valu >>= 1; + + return bit; +} +#endif + +DMAObject nv_dma_load(NV2AState *d, hwaddr dma_obj_address); +void *nv_dma_map(NV2AState *d, hwaddr dma_obj_address, hwaddr *len); +void nv2a_init(PCIBus *bus, int devfn, MemoryRegion *ram); + +void update_irq(NV2AState *d) +{ + /* PFIFO */ + if (d->pfifo.pending_interrupts & d->pfifo.enabled_interrupts) { + d->pmc.pending_interrupts |= NV_PMC_INTR_0_PFIFO; + } else { + d->pmc.pending_interrupts &= ~NV_PMC_INTR_0_PFIFO; + } + + /* PCRTC */ + if (d->pcrtc.pending_interrupts & d->pcrtc.enabled_interrupts) { + d->pmc.pending_interrupts |= NV_PMC_INTR_0_PCRTC; + } else { + d->pmc.pending_interrupts &= ~NV_PMC_INTR_0_PCRTC; + } + + /* PGRAPH */ + if (d->pgraph.pending_interrupts & d->pgraph.enabled_interrupts) { + d->pmc.pending_interrupts |= NV_PMC_INTR_0_PGRAPH; + } else { + d->pmc.pending_interrupts &= ~NV_PMC_INTR_0_PGRAPH; + } + + if (d->pmc.pending_interrupts && d->pmc.enabled_interrupts) { + NV2A_DPRINTF("raise irq\n"); + pci_irq_assert(&d->dev); + } else { + pci_irq_deassert(&d->dev); + } +} + +DMAObject nv_dma_load(NV2AState *d, hwaddr dma_obj_address) +{ + assert(dma_obj_address < memory_region_size(&d->ramin)); + + uint32_t *dma_obj = (uint32_t*)(d->ramin_ptr + dma_obj_address); + uint32_t flags = ldl_le_p(dma_obj); + uint32_t limit = ldl_le_p(dma_obj + 1); + uint32_t frame = ldl_le_p(dma_obj + 2); + + return (DMAObject){ + .dma_class = GET_MASK(flags, NV_DMA_CLASS), + .dma_target = GET_MASK(flags, NV_DMA_TARGET), + .address = (frame & NV_DMA_ADDRESS) | GET_MASK(flags, NV_DMA_ADJUST), + .limit = limit, + }; +} + +void *nv_dma_map(NV2AState *d, hwaddr dma_obj_address, hwaddr *len) +{ + assert(dma_obj_address < memory_region_size(&d->ramin)); + + DMAObject dma = nv_dma_load(d, dma_obj_address); + + /* TODO: Handle targets and classes properly */ + NV2A_DPRINTF("dma_map %x, %x, %" HWADDR_PRIx " %" HWADDR_PRIx "\n", + dma.dma_class, dma.dma_target, dma.address, dma.limit); + + dma.address &= 0x07FFFFFF; + + // assert(dma.address + dma.limit < memory_region_size(d->vram)); + *len = dma.limit; + return d->vram_ptr + dma.address; +} + +#define STUB 0 + +#if STUB +void *pfifo_puller_thread(void *opaque) { return NULL; } +void pgraph_init(NV2AState *d){} +static void pfifo_run_pusher(NV2AState *d){} +void pgraph_destroy(PGRAPHState *pg){} +static uint8_t cliptobyte(int x) +{ + return (uint8_t)((x < 0) ? 0 : ((x > 255) ? 255 : x)); +} +static void convert_yuy2_to_rgb(const uint8_t *line, unsigned int ix, + uint8_t *r, uint8_t *g, uint8_t* b) { + int c, d, e; + c = (int)line[ix * 2] - 16; + if (ix % 2) { + d = (int)line[ix * 2 - 1] - 128; + e = (int)line[ix * 2 + 1] - 128; + } else { + d = (int)line[ix * 2 + 1] - 128; + e = (int)line[ix * 2 + 3] - 128; + } + *r = cliptobyte((298 * c + 409 * e + 128) >> 8); + *g = cliptobyte((298 * c - 100 * d - 208 * e + 128) >> 8); + *b = cliptobyte((298 * c + 516 * d + 128) >> 8); +} +#endif + +#define DEFINE_PROTO(prefix) \ + uint64_t prefix ## _read(void *opaque, hwaddr addr, unsigned int size); \ + void prefix ## _write(void *opaque, hwaddr addr, uint64_t val, unsigned int size); + +DEFINE_PROTO(pmc) +DEFINE_PROTO(pbus) +DEFINE_PROTO(pfifo) +DEFINE_PROTO(prma) +DEFINE_PROTO(pvideo) +DEFINE_PROTO(ptimer) +DEFINE_PROTO(pcounter) +DEFINE_PROTO(pvpe) +DEFINE_PROTO(ptv) +DEFINE_PROTO(prmfb) +DEFINE_PROTO(prmvio) +DEFINE_PROTO(pfb) +DEFINE_PROTO(pstraps) +DEFINE_PROTO(pgraph) +DEFINE_PROTO(pcrtc) +DEFINE_PROTO(prmcio) +DEFINE_PROTO(pramdac) +DEFINE_PROTO(prmdio) +DEFINE_PROTO(pramin) +DEFINE_PROTO(user) + +#undef DEFINE_PROTO + +#include "nv2a_pbus.c" +#include "nv2a_pcrtc.c" +#include "nv2a_pfb.c" +#if !STUB +#include "nv2a_pgraph.c" +#include "nv2a_pfifo.c" +#endif +#include "nv2a_pmc.c" +#include "nv2a_pramdac.c" +#include "nv2a_prmcio.c" +#include "nv2a_prmvio.c" +#include "nv2a_ptimer.c" +#include "nv2a_pvideo.c" +#include "nv2a_stubs.c" +#include "nv2a_user.c" + +#if STUB +void pgraph_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size) +{ + reg_log_write(NV_PGRAPH, addr, val); +} + +uint64_t pgraph_read(void *opaque, + hwaddr addr, unsigned int size) +{ + reg_log_read(NV_PGRAPH, addr, 0); + return 0; +} + +void pfifo_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size) +{ + reg_log_write(NV_PFIFO, addr, val); +} + +uint64_t pfifo_read(void *opaque, + hwaddr addr, unsigned int size) +{ + reg_log_read(NV_PFIFO, addr, 0); + return 0; +} +#endif + +const struct NV2ABlockInfo blocktable[] = { + +#define ENTRY(NAME, OFFSET, SIZE, RDFUNC, WRFUNC) \ + [ NV_ ## NAME ] = { \ + .name = #NAME, .offset = OFFSET, .size = SIZE, \ + .ops = { .read = RDFUNC, .write = WRFUNC }, \ + }, \ + + ENTRY(PMC, 0x000000, 0x001000, pmc_read, pmc_write) + ENTRY(PBUS, 0x001000, 0x001000, pbus_read, pbus_write) + ENTRY(PFIFO, 0x002000, 0x002000, pfifo_read, pfifo_write) + ENTRY(PRMA, 0x007000, 0x001000, prma_read, prma_write) + ENTRY(PVIDEO, 0x008000, 0x001000, pvideo_read, pvideo_write) + ENTRY(PTIMER, 0x009000, 0x001000, ptimer_read, ptimer_write) + ENTRY(PCOUNTER, 0x00a000, 0x001000, pcounter_read, pcounter_write) + ENTRY(PVPE, 0x00b000, 0x001000, pvpe_read, pvpe_write) + ENTRY(PTV, 0x00d000, 0x001000, ptv_read, ptv_write) + ENTRY(PRMFB, 0x0a0000, 0x020000, prmfb_read, prmfb_write) + ENTRY(PRMVIO, 0x0c0000, 0x001000, prmvio_read, prmvio_write) + ENTRY(PFB, 0x100000, 0x001000, pfb_read, pfb_write) + ENTRY(PSTRAPS, 0x101000, 0x001000, pstraps_read, pstraps_write) + ENTRY(PGRAPH, 0x400000, 0x002000, pgraph_read, pgraph_write) + ENTRY(PCRTC, 0x600000, 0x001000, pcrtc_read, pcrtc_write) + ENTRY(PRMCIO, 0x601000, 0x001000, prmcio_read, prmcio_write) + ENTRY(PRAMDAC, 0x680000, 0x001000, pramdac_read, pramdac_write) + ENTRY(PRMDIO, 0x681000, 0x001000, prmdio_read, prmdio_write) + // ENTRY(PRAMIN, 0x700000, 0x100000, pramin_read, pramin_write) + ENTRY(USER, 0x800000, 0x800000, user_read, user_write) +#undef ENTRY +}; + +const int blocktable_len = ARRAY_SIZE(blocktable); + +// static const char* nv2a_reg_names[] = {}; + +void reg_log_read(int block, hwaddr addr, uint64_t val) { + if (blocktable[block].name) { + // hwaddr naddr = blocktable[block].offset + addr; + // if (naddr < ARRAY_SIZE(nv2a_reg_names) && nv2a_reg_names[naddr]) { + // NV2A_DPRINTF("%s: read [%s] -> 0x%" PRIx64 "\n", + // blocktable[block].name, nv2a_reg_names[naddr], val); + // } else { + NV2A_DPRINTF("%s: read [%" HWADDR_PRIx "] -> 0x%" PRIx64 "\n", + blocktable[block].name, addr, val); + // } + } else { + NV2A_DPRINTF("(%d?): read [%" HWADDR_PRIx "] -> 0x%" PRIx64 "\n", + block, addr, val); + } +} + +void reg_log_write(int block, hwaddr addr, uint64_t val) { + if (blocktable[block].name) { + // hwaddr naddr = blocktable[block].offset + addr; + // if (naddr < ARRAY_SIZE(nv2a_reg_names) && nv2a_reg_names[naddr]) { + // NV2A_DPRINTF("%s: [%s] = 0x%" PRIx64 "\n", + // blocktable[block].name, nv2a_reg_names[naddr], val); + // } else { + NV2A_DPRINTF("%s: [%" HWADDR_PRIx "] = 0x%" PRIx64 "\n", + blocktable[block].name, addr, val); + // } + } else { + NV2A_DPRINTF("(%d?): [%" HWADDR_PRIx "] = 0x%" PRIx64 "\n", + block, addr, val); + } +} + +#if 0 +/* FIXME: Probably totally wrong */ +static inline unsigned int rgb_to_pixel8(unsigned int r, unsigned int g, + unsigned int b) +{ + return ((r >> 5) << 5) | ((g >> 5) << 2) | (b >> 6); +} +static inline unsigned int rgb_to_pixel16(unsigned int r, unsigned int g, + unsigned int b) +{ + return ((r >> 3) << 11) | ((g >> 2) << 5) | (b >> 3); +} +static inline unsigned int rgb_to_pixel32(unsigned int r, unsigned int g, + unsigned int b) +{ + return (r << 16) | (g << 8) | b; +} + +static void nv2a_overlay_draw_line(VGACommonState *vga, uint8_t *line, int y) +{ + NV2A_DPRINTF("nv2a_overlay_draw_line\n"); + + NV2AState *d = container_of(vga, NV2AState, vga); + DisplaySurface *surface = qemu_console_surface(d->vga.con); + + int surf_bpp = surface_bytes_per_pixel(surface); + int surf_width = surface_width(surface); + + if (!(d->pvideo.regs[NV_PVIDEO_BUFFER] & NV_PVIDEO_BUFFER_0_USE)) return; + + hwaddr base = d->pvideo.regs[NV_PVIDEO_BASE]; + hwaddr limit = d->pvideo.regs[NV_PVIDEO_LIMIT]; + hwaddr offset = d->pvideo.regs[NV_PVIDEO_OFFSET]; + + int in_width = GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], + NV_PVIDEO_SIZE_IN_WIDTH); + int in_height = GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], + NV_PVIDEO_SIZE_IN_HEIGHT); + int in_s = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN], + NV_PVIDEO_POINT_IN_S); + int in_t = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN], + NV_PVIDEO_POINT_IN_T); + int in_pitch = GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], + NV_PVIDEO_FORMAT_PITCH); + int in_color = GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], + NV_PVIDEO_FORMAT_COLOR); + + // TODO: support other color formats + assert(in_color == NV_PVIDEO_FORMAT_COLOR_LE_CR8YB8CB8YA8); + + int out_width = GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], + NV_PVIDEO_SIZE_OUT_WIDTH); + int out_height = GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], + NV_PVIDEO_SIZE_OUT_HEIGHT); + int out_x = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], + NV_PVIDEO_POINT_OUT_X); + int out_y = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], + NV_PVIDEO_POINT_OUT_Y); + + + if (y < out_y || y >= out_y + out_height) return; + + // TODO: scaling, color keys + + int in_y = y - out_y; + if (in_y >= in_height) return; + + assert(offset + in_pitch * (in_y + 1) <= limit); + uint8_t *in_line = d->vram_ptr + base + offset + in_pitch * in_y; + + int x; + for (x=0; x= surf_width) break; + int ix = in_s + x; + if (ix >= in_width) break; + + uint8_t r,g,b; + convert_yuy2_to_rgb(in_line, ix, &r, &g, &b); + + // unsigned int pixel = vga->rgb_to_pixel(r, g, b); + switch (surf_bpp) { + case 1: + ((uint8_t*)line)[ox] = (uint8_t)rgb_to_pixel8(r,g,b); + break; + case 2: + ((uint16_t*)line)[ox] = (uint16_t)rgb_to_pixel16(r,g,b); + break; + case 4: + ((uint32_t*)line)[ox] = (uint32_t)rgb_to_pixel32(r,g,b); + break; + default: + assert(false); + break; + } + } +} +#endif + +static int nv2a_get_bpp(VGACommonState *s) +{ + if ((s->cr[0x28] & 3) == 3) { + return 32; + } + return (s->cr[0x28] & 3) * 8; +} + +static void nv2a_get_offsets(VGACommonState *s, + uint32_t *pline_offset, + uint32_t *pstart_addr, + uint32_t *pline_compare) +{ + NV2AState *d = container_of(s, NV2AState, vga); + uint32_t start_addr, line_offset, line_compare; + + line_offset = s->cr[0x13] + | ((s->cr[0x19] & 0xe0) << 3) + | ((s->cr[0x25] & 0x20) << 6); + line_offset <<= 3; + *pline_offset = line_offset; + + start_addr = d->pcrtc.start / 4; + *pstart_addr = start_addr; + + line_compare = s->cr[VGA_CRTC_LINE_COMPARE] | + ((s->cr[VGA_CRTC_OVERFLOW] & 0x10) << 4) | + ((s->cr[VGA_CRTC_MAX_SCAN] & 0x40) << 3); + *pline_compare = line_compare; +} + +static void nv2a_vga_gfx_update(void *opaque) +{ + VGACommonState *vga = opaque; + vga->hw_ops->gfx_update(vga); + + NV2AState *d = container_of(vga, NV2AState, vga); + d->pcrtc.pending_interrupts |= NV_PCRTC_INTR_0_VBLANK; + update_irq(d); +} + +static void nv2a_init_memory(NV2AState *d, MemoryRegion *ram) +{ + /* xbox is UMA - vram *is* ram */ + d->vram = ram; + + /* PCI exposed vram */ + memory_region_init_alias(&d->vram_pci, OBJECT(d), "nv2a-vram-pci", d->vram, + 0, memory_region_size(d->vram)); + pci_register_bar(&d->dev, 1, PCI_BASE_ADDRESS_MEM_PREFETCH, &d->vram_pci); + + + /* RAMIN - should be in vram somewhere, but not quite sure where atm */ + memory_region_init_ram(&d->ramin, OBJECT(d), "nv2a-ramin", 0x100000, &error_fatal); + /* memory_region_init_alias(&d->ramin, "nv2a-ramin", &d->vram, + memory_region_size(d->vram) - 0x100000, + 0x100000); */ + + memory_region_add_subregion(&d->mmio, 0x700000, &d->ramin); + + + d->vram_ptr = memory_region_get_ram_ptr(d->vram); + d->ramin_ptr = memory_region_get_ram_ptr(&d->ramin); + + memory_region_set_log(d->vram, true, DIRTY_MEMORY_NV2A); + memory_region_set_dirty(d->vram, 0, memory_region_size(d->vram)); + + /* hacky. swap out vga's vram */ + memory_region_destroy(&d->vga.vram); + // memory_region_unref(&d->vga.vram); // FIXME: Is ths right? + memory_region_init_alias(&d->vga.vram, OBJECT(d), "vga.vram", + d->vram, 0, memory_region_size(d->vram)); + d->vga.vram_ptr = memory_region_get_ram_ptr(&d->vga.vram); + vga_dirty_log_start(&d->vga); + + + pgraph_init(d); + + /* fire up puller */ + qemu_thread_create(&d->pfifo.puller_thread, "nv2a.puller_thread", + pfifo_puller_thread, + d, QEMU_THREAD_JOINABLE); +} + +static void nv2a_realize(PCIDevice *dev, Error **errp) +{ + int i; + NV2AState *d; + + d = NV2A_DEVICE(dev); + + dev->config[PCI_INTERRUPT_PIN] = 0x01; + + d->pcrtc.start = 0; + + d->pramdac.core_clock_coeff = 0x00011c01; /* 189MHz...? */ + d->pramdac.core_clock_freq = 189000000; + d->pramdac.memory_clock_coeff = 0; + d->pramdac.video_clock_coeff = 0x0003C20D; /* 25182Khz...? */ + + /* legacy VGA shit */ + VGACommonState *vga = &d->vga; + vga_common_reset(vga); + + vga->vram_size_mb = 64; + /* seems to start in color mode */ + vga->msr = VGA_MIS_COLOR; + + vga_common_init(vga, OBJECT(dev), false); // FIXME: true or false? idk + vga->get_bpp = nv2a_get_bpp; + vga->get_offsets = nv2a_get_offsets; + // vga->overlay_draw_line = nv2a_overlay_draw_line; + + d->hw_ops = *vga->hw_ops; + d->hw_ops.gfx_update = nv2a_vga_gfx_update; + vga->con = graphic_console_init(DEVICE(dev), 0, &d->hw_ops, vga); + + /* mmio */ + memory_region_init(&d->mmio, OBJECT(dev), "nv2a-mmio", 0x1000000); + pci_register_bar(&d->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio); + + for (i=0; iblock_mmio[i], OBJECT(dev), + &blocktable[i].ops, d, + blocktable[i].name, blocktable[i].size); + memory_region_add_subregion(&d->mmio, blocktable[i].offset, + &d->block_mmio[i]); + } + + /* init fifo cache1 */ + qemu_mutex_init(&d->pfifo.cache1.cache_lock); + qemu_cond_init(&d->pfifo.cache1.cache_cond); + QSIMPLEQ_INIT(&d->pfifo.cache1.cache); + QSIMPLEQ_INIT(&d->pfifo.cache1.working_cache); +} + +static void nv2a_exitfn(PCIDevice *dev) +{ + NV2AState *d; + d = NV2A_DEVICE(dev); + + d->exiting = true; + qemu_cond_signal(&d->pfifo.cache1.cache_cond); + qemu_thread_join(&d->pfifo.puller_thread); + + qemu_mutex_destroy(&d->pfifo.cache1.cache_lock); + qemu_cond_destroy(&d->pfifo.cache1.cache_cond); + + pgraph_destroy(&d->pgraph); +} + +static void nv2a_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->vendor_id = PCI_VENDOR_ID_NVIDIA; + k->device_id = PCI_DEVICE_ID_NVIDIA_GEFORCE_NV2A; + k->revision = 161; + k->class_id = PCI_CLASS_DISPLAY_3D; + k->realize = nv2a_realize; + k->exit = nv2a_exitfn; + + dc->desc = "GeForce NV2A Integrated Graphics"; +} + +static const TypeInfo nv2a_info = { + .name = "nv2a", + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(NV2AState), + .class_init = nv2a_class_init, + .interfaces = (InterfaceInfo[]) { + { INTERFACE_CONVENTIONAL_PCI_DEVICE }, + { }, + }, +}; + +static void nv2a_register(void) +{ + type_register_static(&nv2a_info); +} +type_init(nv2a_register); + +void nv2a_init(PCIBus *bus, int devfn, MemoryRegion *ram) +{ + PCIDevice *dev = pci_create_simple(bus, devfn, "nv2a"); + NV2AState *d = NV2A_DEVICE(dev); + nv2a_init_memory(d, ram); +} diff --git a/hw/xbox/nv2a/nv2a.h b/hw/xbox/nv2a/nv2a.h new file mode 100644 index 0000000000..e015020363 --- /dev/null +++ b/hw/xbox/nv2a/nv2a.h @@ -0,0 +1,446 @@ +/* + * QEMU Geforce NV2A implementation + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018 Matt Borgerson + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef HW_NV2A_H +#define HW_NV2A_H + +#include "hw/hw.h" +#include "hw/i386/pc.h" +#include "ui/console.h" +#include "hw/pci/pci.h" +#include "ui/console.h" +#include "hw/display/vga.h" +#include "hw/display/vga_int.h" +#include "qemu/thread.h" +#include "qapi/qmp/qstring.h" +#include "cpu.h" + +#include "g-lru-cache.h" +#include "swizzle.h" +#include "nv2a_shaders.h" +#include "nv2a_debug.h" +#include "nv2a_int.h" + +#include "gl/gloffscreen.h" +#include "gl/glextensions.h" + +#define USE_TEXTURE_CACHE + +#define GET_MASK(v, mask) (((v) & (mask)) >> (ffs(mask)-1)) + +#define SET_MASK(v, mask, val) ({ \ + const unsigned int __val = (val); \ + const unsigned int __mask = (mask); \ + (v) &= ~(__mask); \ + (v) |= ((__val) << (ffs(__mask)-1)) & (__mask); \ + }) + +#define CASE_4(v, step) \ + case (v): \ + case (v)+(step): \ + case (v)+(step)*2: \ + case (v)+(step)*3 + + +#define NV2A_DEVICE(obj) \ + OBJECT_CHECK(NV2AState, (obj), "nv2a") + +void reg_log_read(int block, hwaddr addr, uint64_t val); +void reg_log_write(int block, hwaddr addr, uint64_t val); + +enum FifoMode { + FIFO_PIO = 0, + FIFO_DMA = 1, +}; + +enum FIFOEngine { + ENGINE_SOFTWARE = 0, + ENGINE_GRAPHICS = 1, + ENGINE_DVD = 2, +}; + +typedef struct DMAObject { + unsigned int dma_class; + unsigned int dma_target; + hwaddr address; + hwaddr limit; +} DMAObject; + +typedef struct VertexAttribute { + bool dma_select; + hwaddr offset; + + /* inline arrays are packed in order? + * Need to pass the offset to converted attributes */ + unsigned int inline_array_offset; + + float inline_value[4]; + + unsigned int format; + unsigned int size; /* size of the data type */ + unsigned int count; /* number of components */ + uint32_t stride; + + bool needs_conversion; + uint8_t *converted_buffer; + unsigned int converted_elements; + unsigned int converted_size; + unsigned int converted_count; + + float *inline_buffer; + + GLint gl_count; + GLenum gl_type; + GLboolean gl_normalize; + + GLuint gl_converted_buffer; + GLuint gl_inline_buffer; +} VertexAttribute; + +typedef struct Surface { + bool draw_dirty; + bool buffer_dirty; + bool write_enabled_cache; + unsigned int pitch; + + hwaddr offset; +} Surface; + +typedef struct SurfaceShape { + unsigned int z_format; + unsigned int color_format; + unsigned int zeta_format; + unsigned int log_width, log_height; + unsigned int clip_x, clip_y; + unsigned int clip_width, clip_height; + unsigned int anti_aliasing; +} SurfaceShape; + +typedef struct TextureShape { + bool cubemap; + unsigned int dimensionality; + unsigned int color_format; + unsigned int levels; + unsigned int width, height, depth; + + unsigned int min_mipmap_level, max_mipmap_level; + unsigned int pitch; +} TextureShape; + +typedef struct TextureKey { + TextureShape state; + uint64_t data_hash; + uint8_t* texture_data; + uint8_t* palette_data; +} TextureKey; + +typedef struct TextureBinding { + GLenum gl_target; + GLuint gl_texture; + unsigned int refcnt; +} TextureBinding; + +typedef struct KelvinState { + hwaddr dma_notifies; + hwaddr dma_state; + hwaddr dma_semaphore; + unsigned int semaphore_offset; +} KelvinState; + +typedef struct ContextSurfaces2DState { + hwaddr dma_image_source; + hwaddr dma_image_dest; + unsigned int color_format; + unsigned int source_pitch, dest_pitch; + hwaddr source_offset, dest_offset; + +} ContextSurfaces2DState; + +typedef struct ImageBlitState { + hwaddr context_surfaces; + unsigned int operation; + unsigned int in_x, in_y; + unsigned int out_x, out_y; + unsigned int width, height; + +} ImageBlitState; + +typedef struct GraphicsObject { + uint8_t graphics_class; + union { + ContextSurfaces2DState context_surfaces_2d; + + ImageBlitState image_blit; + + KelvinState kelvin; + } data; +} GraphicsObject; + +typedef struct GraphicsSubchannel { + hwaddr object_instance; + GraphicsObject object; + uint32_t object_cache[5]; +} GraphicsSubchannel; + +typedef struct GraphicsContext { + bool channel_3d; + unsigned int subchannel; +} GraphicsContext; + + +typedef struct PGRAPHState { + QemuMutex lock; + + uint32_t pending_interrupts; + uint32_t enabled_interrupts; + QemuCond interrupt_cond; + + hwaddr context_table; + hwaddr context_address; + + + unsigned int trapped_method; + unsigned int trapped_subchannel; + unsigned int trapped_channel_id; + uint32_t trapped_data[2]; + uint32_t notify_source; + + bool fifo_access; + QemuCond fifo_access_cond; + + QemuCond flip_3d; + + unsigned int channel_id; + bool channel_valid; + GraphicsContext context[NV2A_NUM_CHANNELS]; + + hwaddr dma_color, dma_zeta; + Surface surface_color, surface_zeta; + unsigned int surface_type; + SurfaceShape surface_shape; + SurfaceShape last_surface_shape; + + hwaddr dma_a, dma_b; + GLruCache *texture_cache; + bool texture_dirty[NV2A_MAX_TEXTURES]; + TextureBinding *texture_binding[NV2A_MAX_TEXTURES]; + + GHashTable *shader_cache; + ShaderBinding *shader_binding; + + bool texture_matrix_enable[NV2A_MAX_TEXTURES]; + + /* FIXME: Move to NV_PGRAPH_BUMPMAT... */ + float bump_env_matrix[NV2A_MAX_TEXTURES-1][4]; /* 3 allowed stages with 2x2 matrix each */ + + GloContext *gl_context; + GLuint gl_framebuffer; + GLuint gl_color_buffer, gl_zeta_buffer; + GraphicsSubchannel subchannel_data[NV2A_NUM_SUBCHANNELS]; + + hwaddr dma_report; + hwaddr report_offset; + bool zpass_pixel_count_enable; + unsigned int zpass_pixel_count_result; + unsigned int gl_zpass_pixel_count_query_count; + GLuint* gl_zpass_pixel_count_queries; + + hwaddr dma_vertex_a, dma_vertex_b; + + unsigned int primitive_mode; + + bool enable_vertex_program_write; + + uint32_t program_data[NV2A_MAX_TRANSFORM_PROGRAM_LENGTH][VSH_TOKEN_SIZE]; + + uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4]; + bool vsh_constants_dirty[NV2A_VERTEXSHADER_CONSTANTS]; + + /* lighting constant arrays */ + uint32_t ltctxa[NV2A_LTCTXA_COUNT][4]; + bool ltctxa_dirty[NV2A_LTCTXA_COUNT]; + uint32_t ltctxb[NV2A_LTCTXB_COUNT][4]; + bool ltctxb_dirty[NV2A_LTCTXB_COUNT]; + uint32_t ltc1[NV2A_LTC1_COUNT][4]; + bool ltc1_dirty[NV2A_LTC1_COUNT]; + + // should figure out where these are in lighting context + float light_infinite_half_vector[NV2A_MAX_LIGHTS][3]; + float light_infinite_direction[NV2A_MAX_LIGHTS][3]; + float light_local_position[NV2A_MAX_LIGHTS][3]; + float light_local_attenuation[NV2A_MAX_LIGHTS][3]; + + VertexAttribute vertex_attributes[NV2A_VERTEXSHADER_ATTRIBUTES]; + + unsigned int inline_array_length; + uint32_t inline_array[NV2A_MAX_BATCH_LENGTH]; + GLuint gl_inline_array_buffer; + + unsigned int inline_elements_length; + uint32_t inline_elements[NV2A_MAX_BATCH_LENGTH]; + + unsigned int inline_buffer_length; + + unsigned int draw_arrays_length; + unsigned int draw_arrays_max_count; + /* FIXME: Unknown size, possibly endless, 1000 will do for now */ + GLint gl_draw_arrays_start[1000]; + GLsizei gl_draw_arrays_count[1000]; + + GLuint gl_element_buffer; + GLuint gl_memory_buffer; + GLuint gl_vertex_array; + + uint32_t regs[0x2000]; +} PGRAPHState; + + +typedef struct CacheEntry { + QSIMPLEQ_ENTRY(CacheEntry) entry; + unsigned int method : 14; + unsigned int subchannel : 3; + bool nonincreasing; + uint32_t parameter; +} CacheEntry; + +typedef struct Cache1State { + unsigned int channel_id; + enum FifoMode mode; + + /* Pusher state */ + bool push_enabled; + bool dma_push_enabled; + bool dma_push_suspended; + hwaddr dma_instance; + + bool method_nonincreasing; + unsigned int method : 14; + unsigned int subchannel : 3; + unsigned int method_count : 24; + uint32_t dcount; + bool subroutine_active; + hwaddr subroutine_return; + hwaddr get_jmp_shadow; + uint32_t rsvd_shadow; + uint32_t data_shadow; + uint32_t error; + + bool pull_enabled; + enum FIFOEngine bound_engines[NV2A_NUM_SUBCHANNELS]; + enum FIFOEngine last_engine; + + /* The actual command queue */ + QemuMutex cache_lock; + QemuCond cache_cond; + QSIMPLEQ_HEAD(, CacheEntry) cache; + QSIMPLEQ_HEAD(, CacheEntry) working_cache; +} Cache1State; + +typedef struct ChannelControl { + hwaddr dma_put; + hwaddr dma_get; + uint32_t ref; +} ChannelControl; + +typedef struct NV2AState { + PCIDevice dev; + qemu_irq irq; + bool exiting; + + VGACommonState vga; + GraphicHwOps hw_ops; + QEMUTimer *vblank_timer; + + MemoryRegion *vram; + MemoryRegion vram_pci; + uint8_t *vram_ptr; + MemoryRegion ramin; + uint8_t *ramin_ptr; + + MemoryRegion mmio; + MemoryRegion block_mmio[NV_NUM_BLOCKS]; + + struct { + uint32_t pending_interrupts; + uint32_t enabled_interrupts; + } pmc; + + struct { + QemuThread puller_thread; + uint32_t pending_interrupts; + uint32_t enabled_interrupts; + Cache1State cache1; + uint32_t regs[0x2000]; + } pfifo; + + struct { + uint32_t regs[0x1000]; + } pvideo; + + struct { + uint32_t pending_interrupts; + uint32_t enabled_interrupts; + uint32_t numerator; + uint32_t denominator; + uint32_t alarm_time; + } ptimer; + + struct { + uint32_t regs[0x1000]; + } pfb; + + struct PGRAPHState pgraph; + + struct { + uint32_t pending_interrupts; + uint32_t enabled_interrupts; + hwaddr start; + } pcrtc; + + struct { + uint32_t core_clock_coeff; + uint64_t core_clock_freq; + uint32_t memory_clock_coeff; + uint32_t video_clock_coeff; + } pramdac; + + struct { + ChannelControl channel_control[NV2A_NUM_CHANNELS]; + } user; + +} NV2AState; + +typedef struct NV2ABlockInfo { + const char* name; + hwaddr offset; + uint64_t size; + MemoryRegionOps ops; +} NV2ABlockInfo; + +extern const struct NV2ABlockInfo blocktable[]; +extern const int blocktable_len; + +void pgraph_init(NV2AState *d); +void *pfifo_puller_thread(void *opaque); +void pgraph_destroy(PGRAPHState *pg); +void update_irq(NV2AState *d); + +#endif diff --git a/hw/xbox/nv2a_debug.c b/hw/xbox/nv2a/nv2a_debug.c similarity index 97% rename from hw/xbox/nv2a_debug.c rename to hw/xbox/nv2a/nv2a_debug.c index 7e1b38907e..f16b539776 100644 --- a/hw/xbox/nv2a_debug.c +++ b/hw/xbox/nv2a/nv2a_debug.c @@ -18,14 +18,15 @@ * along with this program; if not, see . */ -#include "hw/xbox/nv2a_debug.h" - #ifdef DEBUG_NV2A_GL +#include "qemu/osdep.h" + #include #include #include +#include "nv2a_debug.h" #include "gl/glextensions.h" void gl_debug_message(bool cc, const char *fmt, ...) diff --git a/hw/xbox/nv2a_debug.h b/hw/xbox/nv2a/nv2a_debug.h similarity index 100% rename from hw/xbox/nv2a_debug.h rename to hw/xbox/nv2a/nv2a_debug.h diff --git a/hw/xbox/nv2a_int.h b/hw/xbox/nv2a/nv2a_int.h similarity index 100% rename from hw/xbox/nv2a_int.h rename to hw/xbox/nv2a/nv2a_int.h diff --git a/hw/xbox/nv2a/nv2a_pbus.c b/hw/xbox/nv2a/nv2a_pbus.c new file mode 100644 index 0000000000..ea9e085d29 --- /dev/null +++ b/hw/xbox/nv2a/nv2a_pbus.c @@ -0,0 +1,59 @@ +/* + * QEMU Geforce NV2A implementation + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018 Matt Borgerson + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +/* PBUS - bus control */ +uint64_t pbus_read(void *opaque, hwaddr addr, unsigned int size) +{ + NV2AState *d = opaque; + + uint64_t r = 0; + switch (addr) { + case NV_PBUS_PCI_NV_0: + r = pci_get_long(d->dev.config + PCI_VENDOR_ID); + break; + case NV_PBUS_PCI_NV_1: + r = pci_get_long(d->dev.config + PCI_COMMAND); + break; + case NV_PBUS_PCI_NV_2: + r = pci_get_long(d->dev.config + PCI_CLASS_REVISION); + break; + default: + break; + } + + reg_log_read(NV_PBUS, addr, r); + return r; +} + +void pbus_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size) +{ + NV2AState *d = opaque; + + reg_log_write(NV_PBUS, addr, val); + + switch (addr) { + case NV_PBUS_PCI_NV_1: + pci_set_long(d->dev.config + PCI_COMMAND, val); + break; + default: + break; + } +} diff --git a/hw/xbox/nv2a/nv2a_pcrtc.c b/hw/xbox/nv2a/nv2a_pcrtc.c new file mode 100644 index 0000000000..b190c8046e --- /dev/null +++ b/hw/xbox/nv2a/nv2a_pcrtc.c @@ -0,0 +1,72 @@ +/* + * QEMU Geforce NV2A implementation + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018 Matt Borgerson + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +uint64_t pcrtc_read(void *opaque, hwaddr addr, unsigned int size) +{ + NV2AState *d = (NV2AState *)opaque; + + uint64_t r = 0; + switch (addr) { + case NV_PCRTC_INTR_0: + r = d->pcrtc.pending_interrupts; + break; + case NV_PCRTC_INTR_EN_0: + r = d->pcrtc.enabled_interrupts; + break; + case NV_PCRTC_START: + r = d->pcrtc.start; + break; + default: + break; + } + + reg_log_read(NV_PCRTC, addr, r); + return r; +} + +void pcrtc_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size) +{ + NV2AState *d = (NV2AState *)opaque; + + reg_log_write(NV_PCRTC, addr, val); + + switch (addr) { + case NV_PCRTC_INTR_0: + d->pcrtc.pending_interrupts &= ~val; + update_irq(d); + break; + case NV_PCRTC_INTR_EN_0: + d->pcrtc.enabled_interrupts = val; + update_irq(d); + break; + case NV_PCRTC_START: + val &= 0x07FFFFFF; + // assert(val < memory_region_size(d->vram)); + d->pcrtc.start = val; + + NV2A_DPRINTF("PCRTC_START - %x %x %x %x\n", + d->vram_ptr[val+64], d->vram_ptr[val+64+1], + d->vram_ptr[val+64+2], d->vram_ptr[val+64+3]); + break; + default: + break; + } +} diff --git a/hw/xbox/nv2a/nv2a_pfb.c b/hw/xbox/nv2a/nv2a_pfb.c new file mode 100644 index 0000000000..daadc69c70 --- /dev/null +++ b/hw/xbox/nv2a/nv2a_pfb.c @@ -0,0 +1,58 @@ +/* + * QEMU Geforce NV2A implementation + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018 Matt Borgerson + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +uint64_t pfb_read(void *opaque, hwaddr addr, unsigned int size) +{ + NV2AState *d = (NV2AState *)opaque; + + uint64_t r = 0; + switch (addr) { + case NV_PFB_CFG0: + /* 3-4 memory partitions. The debug bios checks this. */ + r = 3; + break; + case NV_PFB_CSTATUS: + r = memory_region_size(d->vram); + break; + case NV_PFB_WBC: + r = 0; /* Flush not pending. */ + break; + default: + r = d->pfb.regs[addr]; + break; + } + + reg_log_read(NV_PFB, addr, r); + return r; +} + +void pfb_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size) +{ + NV2AState *d = (NV2AState *)opaque; + + reg_log_write(NV_PFB, addr, val); + + switch (addr) { + default: + d->pfb.regs[addr] = val; + break; + } +} diff --git a/hw/xbox/nv2a/nv2a_pfifo.c b/hw/xbox/nv2a/nv2a_pfifo.c new file mode 100644 index 0000000000..748f29bf7a --- /dev/null +++ b/hw/xbox/nv2a/nv2a_pfifo.c @@ -0,0 +1,513 @@ +/* + * QEMU Geforce NV2A implementation + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018 Matt Borgerson + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +typedef struct RAMHTEntry { + uint32_t handle; + hwaddr instance; + enum FIFOEngine engine; + unsigned int channel_id : 5; + bool valid; +} RAMHTEntry; + +static void pfifo_run_pusher(NV2AState *d); +void *pfifo_puller_thread(void *opaque); +static uint32_t ramht_hash(NV2AState *d, uint32_t handle); +static RAMHTEntry ramht_lookup(NV2AState *d, uint32_t handle); + +/* PFIFO - MMIO and DMA FIFO submission to PGRAPH and VPE */ +uint64_t pfifo_read(void *opaque, hwaddr addr, unsigned int size) +{ + int i; + NV2AState *d = (NV2AState *)opaque; + + uint64_t r = 0; + switch (addr) { + case NV_PFIFO_INTR_0: + r = d->pfifo.pending_interrupts; + break; + case NV_PFIFO_INTR_EN_0: + r = d->pfifo.enabled_interrupts; + break; + case NV_PFIFO_RUNOUT_STATUS: + r = NV_PFIFO_RUNOUT_STATUS_LOW_MARK; /* low mark empty */ + break; + case NV_PFIFO_CACHE1_PUSH0: + r = d->pfifo.cache1.push_enabled; + break; + case NV_PFIFO_CACHE1_PUSH1: + SET_MASK(r, NV_PFIFO_CACHE1_PUSH1_CHID, d->pfifo.cache1.channel_id); + SET_MASK(r, NV_PFIFO_CACHE1_PUSH1_MODE, d->pfifo.cache1.mode); + break; + case NV_PFIFO_CACHE1_STATUS: + qemu_mutex_lock(&d->pfifo.cache1.cache_lock); + if (QSIMPLEQ_EMPTY(&d->pfifo.cache1.cache)) { + r |= NV_PFIFO_CACHE1_STATUS_LOW_MARK; /* low mark empty */ + } + qemu_mutex_unlock(&d->pfifo.cache1.cache_lock); + break; + case NV_PFIFO_CACHE1_DMA_PUSH: + SET_MASK(r, NV_PFIFO_CACHE1_DMA_PUSH_ACCESS, + d->pfifo.cache1.dma_push_enabled); + SET_MASK(r, NV_PFIFO_CACHE1_DMA_PUSH_STATUS, + d->pfifo.cache1.dma_push_suspended); + SET_MASK(r, NV_PFIFO_CACHE1_DMA_PUSH_BUFFER, 1); /* buffer emoty */ + break; + case NV_PFIFO_CACHE1_DMA_STATE: + SET_MASK(r, NV_PFIFO_CACHE1_DMA_STATE_METHOD_TYPE, + d->pfifo.cache1.method_nonincreasing); + SET_MASK(r, NV_PFIFO_CACHE1_DMA_STATE_METHOD, + d->pfifo.cache1.method >> 2); + SET_MASK(r, NV_PFIFO_CACHE1_DMA_STATE_SUBCHANNEL, + d->pfifo.cache1.subchannel); + SET_MASK(r, NV_PFIFO_CACHE1_DMA_STATE_METHOD_COUNT, + d->pfifo.cache1.method_count); + SET_MASK(r, NV_PFIFO_CACHE1_DMA_STATE_ERROR, + d->pfifo.cache1.error); + break; + case NV_PFIFO_CACHE1_DMA_INSTANCE: + SET_MASK(r, NV_PFIFO_CACHE1_DMA_INSTANCE_ADDRESS, + d->pfifo.cache1.dma_instance >> 4); + break; + case NV_PFIFO_CACHE1_DMA_PUT: + r = d->user.channel_control[d->pfifo.cache1.channel_id].dma_put; + break; + case NV_PFIFO_CACHE1_DMA_GET: + r = d->user.channel_control[d->pfifo.cache1.channel_id].dma_get; + break; + case NV_PFIFO_CACHE1_DMA_SUBROUTINE: + r = d->pfifo.cache1.subroutine_return + | d->pfifo.cache1.subroutine_active; + break; + case NV_PFIFO_CACHE1_PULL0: + qemu_mutex_lock(&d->pfifo.cache1.cache_lock); + r = d->pfifo.cache1.pull_enabled; + qemu_mutex_unlock(&d->pfifo.cache1.cache_lock); + break; + case NV_PFIFO_CACHE1_ENGINE: + qemu_mutex_lock(&d->pfifo.cache1.cache_lock); + for (i=0; ipfifo.cache1.bound_engines[i] << (i*2); + } + qemu_mutex_unlock(&d->pfifo.cache1.cache_lock); + break; + case NV_PFIFO_CACHE1_DMA_DCOUNT: + r = d->pfifo.cache1.dcount; + break; + case NV_PFIFO_CACHE1_DMA_GET_JMP_SHADOW: + r = d->pfifo.cache1.get_jmp_shadow; + break; + case NV_PFIFO_CACHE1_DMA_RSVD_SHADOW: + r = d->pfifo.cache1.rsvd_shadow; + break; + case NV_PFIFO_CACHE1_DMA_DATA_SHADOW: + r = d->pfifo.cache1.data_shadow; + break; + default: + r = d->pfifo.regs[addr]; + break; + } + + reg_log_read(NV_PFIFO, addr, r); + return r; +} + +void pfifo_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size) +{ + int i; + NV2AState *d = (NV2AState *)opaque; + + reg_log_write(NV_PFIFO, addr, val); + + switch (addr) { + case NV_PFIFO_INTR_0: + d->pfifo.pending_interrupts &= ~val; + update_irq(d); + break; + case NV_PFIFO_INTR_EN_0: + d->pfifo.enabled_interrupts = val; + update_irq(d); + break; + + case NV_PFIFO_CACHE1_PUSH0: + d->pfifo.cache1.push_enabled = val & NV_PFIFO_CACHE1_PUSH0_ACCESS; + break; + case NV_PFIFO_CACHE1_PUSH1: + d->pfifo.cache1.channel_id = GET_MASK(val, NV_PFIFO_CACHE1_PUSH1_CHID); + d->pfifo.cache1.mode = (enum FifoMode)GET_MASK(val, NV_PFIFO_CACHE1_PUSH1_MODE); + assert(d->pfifo.cache1.channel_id < NV2A_NUM_CHANNELS); + break; + case NV_PFIFO_CACHE1_DMA_PUSH: + d->pfifo.cache1.dma_push_enabled = + GET_MASK(val, NV_PFIFO_CACHE1_DMA_PUSH_ACCESS); + if (d->pfifo.cache1.dma_push_suspended + && !GET_MASK(val, NV_PFIFO_CACHE1_DMA_PUSH_STATUS)) { + d->pfifo.cache1.dma_push_suspended = false; + pfifo_run_pusher(d); + } + d->pfifo.cache1.dma_push_suspended = + GET_MASK(val, NV_PFIFO_CACHE1_DMA_PUSH_STATUS); + break; + case NV_PFIFO_CACHE1_DMA_STATE: + d->pfifo.cache1.method_nonincreasing = + GET_MASK(val, NV_PFIFO_CACHE1_DMA_STATE_METHOD_TYPE); + d->pfifo.cache1.method = + GET_MASK(val, NV_PFIFO_CACHE1_DMA_STATE_METHOD) << 2; + d->pfifo.cache1.subchannel = + GET_MASK(val, NV_PFIFO_CACHE1_DMA_STATE_SUBCHANNEL); + d->pfifo.cache1.method_count = + GET_MASK(val, NV_PFIFO_CACHE1_DMA_STATE_METHOD_COUNT); + d->pfifo.cache1.error = + GET_MASK(val, NV_PFIFO_CACHE1_DMA_STATE_ERROR); + break; + case NV_PFIFO_CACHE1_DMA_INSTANCE: + d->pfifo.cache1.dma_instance = + GET_MASK(val, NV_PFIFO_CACHE1_DMA_INSTANCE_ADDRESS) << 4; + break; + case NV_PFIFO_CACHE1_DMA_PUT: + d->user.channel_control[d->pfifo.cache1.channel_id].dma_put = val; + break; + case NV_PFIFO_CACHE1_DMA_GET: + d->user.channel_control[d->pfifo.cache1.channel_id].dma_get = val; + break; + case NV_PFIFO_CACHE1_DMA_SUBROUTINE: + d->pfifo.cache1.subroutine_return = + (val & NV_PFIFO_CACHE1_DMA_SUBROUTINE_RETURN_OFFSET); + d->pfifo.cache1.subroutine_active = + (val & NV_PFIFO_CACHE1_DMA_SUBROUTINE_STATE); + break; + case NV_PFIFO_CACHE1_PULL0: + qemu_mutex_lock(&d->pfifo.cache1.cache_lock); + if ((val & NV_PFIFO_CACHE1_PULL0_ACCESS) + && !d->pfifo.cache1.pull_enabled) { + d->pfifo.cache1.pull_enabled = true; + + /* the puller thread should wake up */ + qemu_cond_signal(&d->pfifo.cache1.cache_cond); + } else if (!(val & NV_PFIFO_CACHE1_PULL0_ACCESS) + && d->pfifo.cache1.pull_enabled) { + d->pfifo.cache1.pull_enabled = false; + } + qemu_mutex_unlock(&d->pfifo.cache1.cache_lock); + break; + case NV_PFIFO_CACHE1_ENGINE: + qemu_mutex_lock(&d->pfifo.cache1.cache_lock); + for (i=0; ipfifo.cache1.bound_engines[i] = (enum FIFOEngine)((val >> (i*2)) & 3); + } + qemu_mutex_unlock(&d->pfifo.cache1.cache_lock); + break; + case NV_PFIFO_CACHE1_DMA_DCOUNT: + d->pfifo.cache1.dcount = + (val & NV_PFIFO_CACHE1_DMA_DCOUNT_VALUE); + break; + case NV_PFIFO_CACHE1_DMA_GET_JMP_SHADOW: + d->pfifo.cache1.get_jmp_shadow = + (val & NV_PFIFO_CACHE1_DMA_GET_JMP_SHADOW_OFFSET); + break; + case NV_PFIFO_CACHE1_DMA_RSVD_SHADOW: + d->pfifo.cache1.rsvd_shadow = val; + break; + case NV_PFIFO_CACHE1_DMA_DATA_SHADOW: + d->pfifo.cache1.data_shadow = val; + break; + default: + d->pfifo.regs[addr] = val; + break; + } +} + + +/* pusher should be fine to run from a mimo handler + * whenever's it's convenient */ +static void pfifo_run_pusher(NV2AState *d) { + uint8_t channel_id; + ChannelControl *control; + Cache1State *state; + CacheEntry *command; + uint8_t *dma; + hwaddr dma_len; + uint32_t word; + + /* TODO: How is cache1 selected? */ + state = &d->pfifo.cache1; + channel_id = state->channel_id; + control = &d->user.channel_control[channel_id]; + + if (!state->push_enabled) return; + + + /* only handling DMA for now... */ + + /* Channel running DMA */ + uint32_t channel_modes = d->pfifo.regs[NV_PFIFO_MODE]; + assert(channel_modes & (1 << channel_id)); + assert(state->mode == FIFO_DMA); + + if (!state->dma_push_enabled) return; + if (state->dma_push_suspended) return; + + /* We're running so there should be no pending errors... */ + assert(state->error == NV_PFIFO_CACHE1_DMA_STATE_ERROR_NONE); + + dma = (uint8_t*)nv_dma_map(d, state->dma_instance, &dma_len); + + NV2A_DPRINTF("DMA pusher: max 0x%" HWADDR_PRIx ", 0x%" HWADDR_PRIx " - 0x%" HWADDR_PRIx "\n", + dma_len, control->dma_get, control->dma_put); + + /* based on the convenient pseudocode in envytools */ + while (control->dma_get != control->dma_put) { + if (control->dma_get >= dma_len) { + + state->error = NV_PFIFO_CACHE1_DMA_STATE_ERROR_PROTECTION; + break; + } + + word = ldl_le_p((uint32_t*)(dma + control->dma_get)); + control->dma_get += 4; + + if (state->method_count) { + /* data word of methods command */ + state->data_shadow = word; + + command = (CacheEntry*)g_malloc0(sizeof(CacheEntry)); + command->method = state->method; + command->subchannel = state->subchannel; + command->nonincreasing = state->method_nonincreasing; + command->parameter = word; + qemu_mutex_lock(&state->cache_lock); + QSIMPLEQ_INSERT_TAIL(&state->cache, command, entry); + qemu_cond_signal(&state->cache_cond); + qemu_mutex_unlock(&state->cache_lock); + + if (!state->method_nonincreasing) { + state->method += 4; + } + state->method_count--; + state->dcount++; + } else { + /* no command active - this is the first word of a new one */ + state->rsvd_shadow = word; + /* match all forms */ + if ((word & 0xe0000003) == 0x20000000) { + /* old jump */ + state->get_jmp_shadow = control->dma_get; + control->dma_get = word & 0x1fffffff; + NV2A_DPRINTF("pb OLD_JMP 0x%" HWADDR_PRIx "\n", control->dma_get); + } else if ((word & 3) == 1) { + /* jump */ + state->get_jmp_shadow = control->dma_get; + control->dma_get = word & 0xfffffffc; + NV2A_DPRINTF("pb JMP 0x%" HWADDR_PRIx "\n", control->dma_get); + } else if ((word & 3) == 2) { + /* call */ + if (state->subroutine_active) { + state->error = NV_PFIFO_CACHE1_DMA_STATE_ERROR_CALL; + break; + } + state->subroutine_return = control->dma_get; + state->subroutine_active = true; + control->dma_get = word & 0xfffffffc; + NV2A_DPRINTF("pb CALL 0x%" HWADDR_PRIx "\n", control->dma_get); + } else if (word == 0x00020000) { + /* return */ + if (!state->subroutine_active) { + state->error = NV_PFIFO_CACHE1_DMA_STATE_ERROR_RETURN; + break; + } + control->dma_get = state->subroutine_return; + state->subroutine_active = false; + NV2A_DPRINTF("pb RET 0x%" HWADDR_PRIx "\n", control->dma_get); + } else if ((word & 0xe0030003) == 0) { + /* increasing methods */ + state->method = word & 0x1fff; + state->subchannel = (word >> 13) & 7; + state->method_count = (word >> 18) & 0x7ff; + state->method_nonincreasing = false; + state->dcount = 0; + } else if ((word & 0xe0030003) == 0x40000000) { + /* non-increasing methods */ + state->method = word & 0x1fff; + state->subchannel = (word >> 13) & 7; + state->method_count = (word >> 18) & 0x7ff; + state->method_nonincreasing = true; + state->dcount = 0; + } else { + NV2A_DPRINTF("pb reserved cmd 0x%" HWADDR_PRIx " - 0x%x\n", + control->dma_get, word); + state->error = NV_PFIFO_CACHE1_DMA_STATE_ERROR_RESERVED_CMD; + break; + } + } + } + + NV2A_DPRINTF("DMA pusher done: max 0x%" HWADDR_PRIx ", 0x%" HWADDR_PRIx " - 0x%" HWADDR_PRIx "\n", + dma_len, control->dma_get, control->dma_put); + + if (state->error) { + NV2A_DPRINTF("pb error: %d\n", state->error); + assert(false); + + state->dma_push_suspended = true; + + d->pfifo.pending_interrupts |= NV_PFIFO_INTR_0_DMA_PUSHER; + update_irq(d); + } +} + +void *pfifo_puller_thread(void *opaque) +{ + NV2AState *d = (NV2AState*)opaque; + Cache1State *state = &d->pfifo.cache1; + + glo_set_current(d->pgraph.gl_context); + + while (true) { + qemu_mutex_lock(&state->cache_lock); + while (QSIMPLEQ_EMPTY(&state->cache) || !state->pull_enabled) { + qemu_cond_wait(&state->cache_cond, &state->cache_lock); + + if (d->exiting) { + qemu_mutex_unlock(&state->cache_lock); + glo_set_current(NULL); + return 0; + } + } + QSIMPLEQ_CONCAT(&state->working_cache, &state->cache); + qemu_mutex_unlock(&state->cache_lock); + + qemu_mutex_lock(&d->pgraph.lock); + + while (!QSIMPLEQ_EMPTY(&state->working_cache)) { + CacheEntry * command = QSIMPLEQ_FIRST(&state->working_cache); + QSIMPLEQ_REMOVE_HEAD(&state->working_cache, entry); + + if (command->method == 0) { + // qemu_mutex_lock_iothread(); + RAMHTEntry entry = ramht_lookup(d, command->parameter); + assert(entry.valid); + + assert(entry.channel_id == state->channel_id); + // qemu_mutex_unlock_iothread(); + + switch (entry.engine) { + case ENGINE_GRAPHICS: + pgraph_context_switch(d, entry.channel_id); + pgraph_wait_fifo_access(d); + pgraph_method(d, command->subchannel, 0, entry.instance); + break; + default: + assert(false); + break; + } + + /* the engine is bound to the subchannel */ + qemu_mutex_lock(&state->cache_lock); + state->bound_engines[command->subchannel] = entry.engine; + state->last_engine = entry.engine; + qemu_mutex_unlock(&state->cache_lock); + } else if (command->method >= 0x100) { + /* method passed to engine */ + + uint32_t parameter = command->parameter; + + /* methods that take objects. + * TODO: Check this range is correct for the nv2a */ + if (command->method >= 0x180 && command->method < 0x200) { + //qemu_mutex_lock_iothread(); + RAMHTEntry entry = ramht_lookup(d, parameter); + assert(entry.valid); + assert(entry.channel_id == state->channel_id); + parameter = entry.instance; + //qemu_mutex_unlock_iothread(); + } + + // qemu_mutex_lock(&state->cache_lock); + enum FIFOEngine engine = state->bound_engines[command->subchannel]; + // qemu_mutex_unlock(&state->cache_lock); + + switch (engine) { + case ENGINE_GRAPHICS: + pgraph_wait_fifo_access(d); + pgraph_method(d, command->subchannel, + command->method, parameter); + break; + default: + assert(false); + break; + } + + // qemu_mutex_lock(&state->cache_lock); + state->last_engine = state->bound_engines[command->subchannel]; + // qemu_mutex_unlock(&state->cache_lock); + } + + g_free(command); + } + + qemu_mutex_unlock(&d->pgraph.lock); + } + + return 0; +} + +static uint32_t ramht_hash(NV2AState *d, uint32_t handle) +{ + unsigned int ramht_size = + 1 << (GET_MASK(d->pfifo.regs[NV_PFIFO_RAMHT], NV_PFIFO_RAMHT_SIZE)+12); + + /* XXX: Think this is different to what nouveau calculates... */ + unsigned int bits = ffs(ramht_size)-2; + + uint32_t hash = 0; + while (handle) { + hash ^= (handle & ((1 << bits) - 1)); + handle >>= bits; + } + hash ^= d->pfifo.cache1.channel_id << (bits - 4); + + return hash; +} + +static RAMHTEntry ramht_lookup(NV2AState *d, uint32_t handle) +{ + unsigned int ramht_size = + 1 << (GET_MASK(d->pfifo.regs[NV_PFIFO_RAMHT], NV_PFIFO_RAMHT_SIZE)+12); + + uint32_t hash = ramht_hash(d, handle); + assert(hash * 8 < ramht_size); + + uint32_t ramht_address = + GET_MASK(d->pfifo.regs[NV_PFIFO_RAMHT], + NV_PFIFO_RAMHT_BASE_ADDRESS) << 12; + + uint8_t *entry_ptr = d->ramin_ptr + ramht_address + hash * 8; + + uint32_t entry_handle = ldl_le_p((uint32_t*)entry_ptr); + uint32_t entry_context = ldl_le_p((uint32_t*)(entry_ptr + 4)); + + return (RAMHTEntry){ + .handle = entry_handle, + .instance = (entry_context & NV_RAMHT_INSTANCE) << 4, + .engine = (enum FIFOEngine)((entry_context & NV_RAMHT_ENGINE) >> 16), + .channel_id = (entry_context & NV_RAMHT_CHID) >> 24, + .valid = entry_context & NV_RAMHT_STATUS, + }; +} diff --git a/hw/xbox/nv2a.c b/hw/xbox/nv2a/nv2a_pgraph.c similarity index 69% rename from hw/xbox/nv2a.c rename to hw/xbox/nv2a/nv2a_pgraph.c index b444931bbb..c2cefa9bfb 100644 --- a/hw/xbox/nv2a.c +++ b/hw/xbox/nv2a/nv2a_pgraph.c @@ -3,6 +3,7 @@ * * Copyright (c) 2012 espes * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018 Matt Borgerson * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -17,28 +18,8 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ -#include "hw/hw.h" -#include "hw/i386/pc.h" -#include "ui/console.h" -#include "hw/pci/pci.h" -#include "ui/console.h" -#include "hw/display/vga.h" -#include "hw/display/vga_int.h" -#include "qemu/queue.h" -#include "qemu/thread.h" -#include "qapi/qmp/qstring.h" -#include "gl/gloffscreen.h" -#include "gl/glextensions.h" -#include "hw/xbox/g-lru-cache.h" -#include "hw/xbox/swizzle.h" -#include "hw/xbox/nv2a_shaders.h" -#include "hw/xbox/nv2a_debug.h" - -#include "hw/xbox/nv2a.h" -#include "hw/xbox/nv2a_int.h" - -#define USE_TEXTURE_CACHE +#include "xxhash.h" static const GLenum pgraph_texture_min_filter_map[] = { 0, @@ -280,2316 +261,183 @@ static const SurfaceColorFormatInfo kelvin_surface_color_format_map[] = { {4, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV}, }; -#define GET_MASK(v, mask) (((v) & (mask)) >> (ffs(mask)-1)) - -#define SET_MASK(v, mask, val) ({ \ - const unsigned int __val = (val); \ - const unsigned int __mask = (mask); \ - (v) &= ~(__mask); \ - (v) |= ((__val) << (ffs(__mask)-1)) & (__mask); \ - }) - -#define CASE_4(v, step) \ - case (v): \ - case (v)+(step): \ - case (v)+(step)*2: \ - case (v)+(step)*3 - - -enum FIFOEngine { - ENGINE_SOFTWARE = 0, - ENGINE_GRAPHICS = 1, - ENGINE_DVD = 2, -}; - -typedef struct RAMHTEntry { - uint32_t handle; - hwaddr instance; - enum FIFOEngine engine; - unsigned int channel_id : 5; - bool valid; -} RAMHTEntry; - -typedef struct DMAObject { - unsigned int dma_class; - unsigned int dma_target; - hwaddr address; - hwaddr limit; -} DMAObject; - -typedef struct VertexAttribute { - bool dma_select; - hwaddr offset; - - /* inline arrays are packed in order? - * Need to pass the offset to converted attributes */ - unsigned int inline_array_offset; - - float inline_value[4]; - - unsigned int format; - unsigned int size; /* size of the data type */ - unsigned int count; /* number of components */ - uint32_t stride; - - bool needs_conversion; - uint8_t *converted_buffer; - unsigned int converted_elements; - unsigned int converted_size; - unsigned int converted_count; - - float *inline_buffer; - - GLint gl_count; - GLenum gl_type; - GLboolean gl_normalize; - - GLuint gl_converted_buffer; - GLuint gl_inline_buffer; -} VertexAttribute; - -typedef struct Surface { - bool draw_dirty; - bool buffer_dirty; - bool write_enabled_cache; - unsigned int pitch; - - hwaddr offset; -} Surface; - -typedef struct SurfaceShape { - unsigned int z_format; - unsigned int color_format; - unsigned int zeta_format; - unsigned int log_width, log_height; - unsigned int clip_x, clip_y; - unsigned int clip_width, clip_height; - unsigned int anti_aliasing; -} SurfaceShape; - -typedef struct TextureShape { - bool cubemap; - unsigned int dimensionality; - unsigned int color_format; - unsigned int levels; - unsigned int width, height, depth; - - unsigned int min_mipmap_level, max_mipmap_level; - unsigned int pitch; -} TextureShape; - -typedef struct TextureKey { - TextureShape state; - uint64_t data_hash; - uint8_t* texture_data; - uint8_t* palette_data; -} TextureKey; - -typedef struct TextureBinding { - GLenum gl_target; - GLuint gl_texture; - unsigned int refcnt; -} TextureBinding; - - -typedef struct KelvinState { - hwaddr object_instance; -} KelvinState; - -typedef struct ContextSurfaces2DState { - hwaddr object_instance; - hwaddr dma_image_source; - hwaddr dma_image_dest; - unsigned int color_format; - unsigned int source_pitch, dest_pitch; - hwaddr source_offset, dest_offset; -} ContextSurfaces2DState; - -typedef struct ImageBlitState { - hwaddr object_instance; - hwaddr context_surfaces; - unsigned int operation; - unsigned int in_x, in_y; - unsigned int out_x, out_y; - unsigned int width, height; -} ImageBlitState; - - -typedef struct PGRAPHState { - QemuMutex lock; - - uint32_t pending_interrupts; - uint32_t enabled_interrupts; - QemuCond interrupt_cond; - - /* subchannels state we're not sure the location of... */ - ContextSurfaces2DState context_surfaces_2d; - ImageBlitState image_blit; - KelvinState kelvin; - - QemuCond fifo_access_cond; - QemuCond flip_3d; - - hwaddr dma_color, dma_zeta; - Surface surface_color, surface_zeta; - unsigned int surface_type; - SurfaceShape surface_shape; - SurfaceShape last_surface_shape; - - hwaddr dma_a, dma_b; - GLruCache *texture_cache; - bool texture_dirty[NV2A_MAX_TEXTURES]; - TextureBinding *texture_binding[NV2A_MAX_TEXTURES]; - - GHashTable *shader_cache; - ShaderBinding *shader_binding; - - bool texture_matrix_enable[NV2A_MAX_TEXTURES]; - - /* FIXME: Move to NV_PGRAPH_BUMPMAT... */ - float bump_env_matrix[NV2A_MAX_TEXTURES-1][4]; /* 3 allowed stages with 2x2 matrix each */ - - GloContext *gl_context; - GLuint gl_framebuffer; - GLuint gl_color_buffer, gl_zeta_buffer; - - hwaddr dma_state; - hwaddr dma_notifies; - hwaddr dma_semaphore; - - hwaddr dma_report; - hwaddr report_offset; - bool zpass_pixel_count_enable; - unsigned int zpass_pixel_count_result; - unsigned int gl_zpass_pixel_count_query_count; - GLuint* gl_zpass_pixel_count_queries; - - hwaddr dma_vertex_a, dma_vertex_b; - - unsigned int primitive_mode; - - bool enable_vertex_program_write; - - uint32_t program_data[NV2A_MAX_TRANSFORM_PROGRAM_LENGTH][VSH_TOKEN_SIZE]; - - uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4]; - bool vsh_constants_dirty[NV2A_VERTEXSHADER_CONSTANTS]; - - /* lighting constant arrays */ - uint32_t ltctxa[NV2A_LTCTXA_COUNT][4]; - bool ltctxa_dirty[NV2A_LTCTXA_COUNT]; - uint32_t ltctxb[NV2A_LTCTXB_COUNT][4]; - bool ltctxb_dirty[NV2A_LTCTXB_COUNT]; - uint32_t ltc1[NV2A_LTC1_COUNT][4]; - bool ltc1_dirty[NV2A_LTC1_COUNT]; - - // should figure out where these are in lighting context - float light_infinite_half_vector[NV2A_MAX_LIGHTS][3]; - float light_infinite_direction[NV2A_MAX_LIGHTS][3]; - float light_local_position[NV2A_MAX_LIGHTS][3]; - float light_local_attenuation[NV2A_MAX_LIGHTS][3]; - - VertexAttribute vertex_attributes[NV2A_VERTEXSHADER_ATTRIBUTES]; - - unsigned int inline_array_length; - uint32_t inline_array[NV2A_MAX_BATCH_LENGTH]; - GLuint gl_inline_array_buffer; - - unsigned int inline_elements_length; - uint32_t inline_elements[NV2A_MAX_BATCH_LENGTH]; - - unsigned int inline_buffer_length; - - unsigned int draw_arrays_length; - unsigned int draw_arrays_max_count; - /* FIXME: Unknown size, possibly endless, 1000 will do for now */ - GLint gl_draw_arrays_start[1000]; - GLsizei gl_draw_arrays_count[1000]; - - GLuint gl_element_buffer; - GLuint gl_memory_buffer; - - GLuint gl_vertex_array; - - uint32_t regs[0x2000]; -} PGRAPHState; - - -typedef struct NV2AState { - PCIDevice dev; - qemu_irq irq; - - bool exiting; - - VGACommonState vga; - GraphicHwOps hw_ops; - - QEMUTimer *vblank_timer; - - MemoryRegion *vram; - MemoryRegion vram_pci; - uint8_t *vram_ptr; - MemoryRegion ramin; - uint8_t *ramin_ptr; - - MemoryRegion mmio; - - MemoryRegion block_mmio[NV_NUM_BLOCKS]; - - struct { - uint32_t pending_interrupts; - uint32_t enabled_interrupts; - } pmc; - - struct { - uint32_t pending_interrupts; - uint32_t enabled_interrupts; - - QemuMutex lock; - QemuThread puller_thread; - QemuCond puller_cond; - QemuThread pusher_thread; - QemuCond pusher_cond; - - uint32_t regs[0x2000]; - } pfifo; - - struct { - uint32_t regs[0x1000]; - } pvideo; - - struct { - uint32_t pending_interrupts; - uint32_t enabled_interrupts; - - uint32_t numerator; - uint32_t denominator; - - uint32_t alarm_time; - } ptimer; - - struct { - uint32_t regs[0x1000]; - } pfb; - - struct PGRAPHState pgraph; - - struct { - uint32_t pending_interrupts; - uint32_t enabled_interrupts; - - hwaddr start; - } pcrtc; - - struct { - uint32_t core_clock_coeff; - uint64_t core_clock_freq; - uint32_t memory_clock_coeff; - uint32_t video_clock_coeff; - } pramdac; - -} NV2AState; - - -#define NV2A_DEVICE(obj) \ - OBJECT_CHECK(NV2AState, (obj), "nv2a") - -static void reg_log_read(int block, hwaddr addr, uint64_t val); -static void reg_log_write(int block, hwaddr addr, uint64_t val); -static void pgraph_method_log(unsigned int subchannel, - unsigned int graphics_class, - unsigned int method, uint32_t parameter); - -static uint64_t fnv_hash(const uint8_t *data, size_t len) +uint64_t pgraph_read(void *opaque, hwaddr addr, unsigned int size); +void pgraph_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size); + +static void pgraph_context_switch(NV2AState *d, unsigned int channel_id); +static void pgraph_set_context_user(NV2AState *d, uint32_t val); +static void pgraph_wait_fifo_access(NV2AState *d); +static void pgraph_method_log(unsigned int subchannel, unsigned int graphics_class, unsigned int method, uint32_t parameter); +static void pgraph_allocate_inline_buffer_vertices(PGRAPHState *pg, unsigned int attr); +static void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg); +static void pgraph_shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, bool binding_changed, bool vertex_program, bool fixed_function); +static void pgraph_bind_shaders(PGRAPHState *pg); +static bool pgraph_framebuffer_dirty(PGRAPHState *pg); +static bool pgraph_color_write_enabled(PGRAPHState *pg); +static bool pgraph_zeta_write_enabled(PGRAPHState *pg); +static void pgraph_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta); +static void pgraph_update_surface_part(NV2AState *d, bool upload, bool color); +static void pgraph_update_surface(NV2AState *d, bool upload, bool color_write, bool zeta_write); +static void pgraph_bind_textures(NV2AState *d); +static void pgraph_apply_anti_aliasing_factor(PGRAPHState *pg, unsigned int *width, unsigned int *height); +static void pgraph_get_surface_dimensions(PGRAPHState *pg, unsigned int *width, unsigned int *height); +static void pgraph_update_memory_buffer(NV2AState *d, hwaddr addr, hwaddr size, bool f); +static void pgraph_bind_vertex_attributes(NV2AState *d, unsigned int num_elements, bool inline_data, unsigned int inline_stride); +static unsigned int pgraph_bind_inline_array(NV2AState *d); +static void load_graphics_object(NV2AState *d, hwaddr instance_address, GraphicsObject *obj); +static GraphicsObject* lookup_graphics_object(PGRAPHState *s, hwaddr instance_address); +static float convert_f16_to_float(uint16_t f16); +static float convert_f24_to_float(uint32_t f24); +static uint8_t cliptobyte(int x); +static void convert_yuy2_to_rgb(const uint8_t *line, unsigned int ix, uint8_t *r, uint8_t *g, uint8_t* b); +static uint8_t* convert_texture_data(const TextureShape s, const uint8_t *data, const uint8_t *palette_data, unsigned int width, unsigned int height, unsigned int depth, unsigned int row_pitch, unsigned int slice_pitch); +static void upload_gl_texture(GLenum gl_target, const TextureShape s, const uint8_t *texture_data, const uint8_t *palette_data); +static TextureBinding* generate_texture(const TextureShape s, const uint8_t *texture_data, const uint8_t *palette_data); +static guint texture_key_hash(gconstpointer key); +static gboolean texture_key_equal(gconstpointer a, gconstpointer b); +static gpointer texture_key_retrieve(gpointer key, gpointer user_data, GError **error); +static void texture_key_destroy(gpointer data); +static void texture_binding_destroy(gpointer data); +static guint shader_hash(gconstpointer key); +static gboolean shader_equal(gconstpointer a, gconstpointer b); +static unsigned int kelvin_map_stencil_op(uint32_t parameter); +static unsigned int kelvin_map_polygon_mode(uint32_t parameter); +static unsigned int kelvin_map_texgen(uint32_t parameter, unsigned int channel); +static uint64_t fnv_hash(const uint8_t *data, size_t len); +static uint64_t fast_hash(const uint8_t *data, size_t len, unsigned int samples); + +/* PGRAPH - accelerated 2d/3d drawing engine */ +uint64_t pgraph_read(void *opaque, hwaddr addr, unsigned int size) { - /* 64 bit Fowler/Noll/Vo FNV-1a hash code */ - uint64_t hval = 0xcbf29ce484222325ULL; - const uint8_t *dp = data; - const uint8_t *de = data + len; - while (dp < de) { - hval ^= (uint64_t) *dp++; - hval += (hval << 1) + (hval << 4) + (hval << 5) + - (hval << 7) + (hval << 8) + (hval << 40); - } + NV2AState *d = (NV2AState *)opaque; - return (guint)hval; -} + qemu_mutex_lock(&d->pgraph.lock); -static uint64_t fast_hash(const uint8_t *data, size_t len, unsigned int samples) -{ -#ifdef __SSE4_2__ - uint64_t h[4] = {len, 0, 0, 0}; - assert(samples > 0); - - if (len < 8 || len % 8) { - return fnv_hash(data, len); - } - - assert(len >= 8 && len % 8 == 0); - const uint64_t *dp = (const uint64_t*)data; - const uint64_t *de = dp + (len / 8); - size_t step = len / 8 / samples; - if (step == 0) step = 1; - - while (dp < de - step * 3) { - h[0] = __builtin_ia32_crc32di(h[0], dp[step * 0]); - h[1] = __builtin_ia32_crc32di(h[1], dp[step * 1]); - h[2] = __builtin_ia32_crc32di(h[2], dp[step * 2]); - h[3] = __builtin_ia32_crc32di(h[3], dp[step * 3]); - dp += step * 4; - } - if (dp < de - step * 0) - h[0] = __builtin_ia32_crc32di(h[0], dp[step * 0]); - if (dp < de - step * 1) - h[1] = __builtin_ia32_crc32di(h[1], dp[step * 1]); - if (dp < de - step * 2) - h[2] = __builtin_ia32_crc32di(h[2], dp[step * 2]); - - return h[0] + (h[1] << 10) + (h[2] << 21) + (h[3] << 32); -#else - return fnv_hash(data, len); -#endif -} - -static void update_irq(NV2AState *d) -{ - /* PFIFO */ - if (d->pfifo.pending_interrupts & d->pfifo.enabled_interrupts) { - d->pmc.pending_interrupts |= NV_PMC_INTR_0_PFIFO; - } else { - d->pmc.pending_interrupts &= ~NV_PMC_INTR_0_PFIFO; - } - - /* PCRTC */ - if (d->pcrtc.pending_interrupts & d->pcrtc.enabled_interrupts) { - d->pmc.pending_interrupts |= NV_PMC_INTR_0_PCRTC; - } else { - d->pmc.pending_interrupts &= ~NV_PMC_INTR_0_PCRTC; - } - - /* PGRAPH */ - if (d->pgraph.pending_interrupts & d->pgraph.enabled_interrupts) { - d->pmc.pending_interrupts |= NV_PMC_INTR_0_PGRAPH; - } else { - d->pmc.pending_interrupts &= ~NV_PMC_INTR_0_PGRAPH; - } - - if (d->pmc.pending_interrupts && d->pmc.enabled_interrupts) { - NV2A_DPRINTF("raise irq\n"); - pci_irq_assert(&d->dev); - } else { - pci_irq_deassert(&d->dev); - } -} - -static uint32_t ramht_hash(NV2AState *d, uint32_t handle) -{ - unsigned int ramht_size = - 1 << (GET_MASK(d->pfifo.regs[NV_PFIFO_RAMHT], NV_PFIFO_RAMHT_SIZE)+12); - - /* XXX: Think this is different to what nouveau calculates... */ - unsigned int bits = ffs(ramht_size)-2; - - uint32_t hash = 0; - while (handle) { - hash ^= (handle & ((1 << bits) - 1)); - handle >>= bits; - } - - unsigned int channel_id = GET_MASK(d->pfifo.regs[NV_PFIFO_CACHE1_PUSH1], - NV_PFIFO_CACHE1_PUSH1_CHID); - hash ^= channel_id << (bits - 4); - - return hash; -} - - -static RAMHTEntry ramht_lookup(NV2AState *d, uint32_t handle) -{ - hwaddr ramht_size = - 1 << (GET_MASK(d->pfifo.regs[NV_PFIFO_RAMHT], NV_PFIFO_RAMHT_SIZE)+12); - - uint32_t hash = ramht_hash(d, handle); - assert(hash * 8 < ramht_size); - - hwaddr ramht_address = - GET_MASK(d->pfifo.regs[NV_PFIFO_RAMHT], - NV_PFIFO_RAMHT_BASE_ADDRESS) << 12; - - assert(ramht_address + hash * 8 < memory_region_size(&d->ramin)); - - uint8_t *entry_ptr = d->ramin_ptr + ramht_address + hash * 8; - - uint32_t entry_handle = ldl_le_p((uint32_t*)entry_ptr); - uint32_t entry_context = ldl_le_p((uint32_t*)(entry_ptr + 4)); - - return (RAMHTEntry){ - .handle = entry_handle, - .instance = (entry_context & NV_RAMHT_INSTANCE) << 4, - .engine = (entry_context & NV_RAMHT_ENGINE) >> 16, - .channel_id = (entry_context & NV_RAMHT_CHID) >> 24, - .valid = entry_context & NV_RAMHT_STATUS, - }; -} - -static DMAObject nv_dma_load(NV2AState *d, hwaddr dma_obj_address) -{ - assert(dma_obj_address < memory_region_size(&d->ramin)); - - uint32_t *dma_obj = (uint32_t*)(d->ramin_ptr + dma_obj_address); - uint32_t flags = ldl_le_p(dma_obj); - uint32_t limit = ldl_le_p(dma_obj + 1); - uint32_t frame = ldl_le_p(dma_obj + 2); - - return (DMAObject){ - .dma_class = GET_MASK(flags, NV_DMA_CLASS), - .dma_target = GET_MASK(flags, NV_DMA_TARGET), - .address = (frame & NV_DMA_ADDRESS) | GET_MASK(flags, NV_DMA_ADJUST), - .limit = limit, - }; -} - -static void *nv_dma_map(NV2AState *d, hwaddr dma_obj_address, hwaddr *len) -{ - DMAObject dma = nv_dma_load(d, dma_obj_address); - - /* TODO: Handle targets and classes properly */ - NV2A_DPRINTF("dma_map %" HWADDR_PRIx " - %x, %x, %" HWADDR_PRIx " %" HWADDR_PRIx "\n", - dma_obj_address, - dma.dma_class, dma.dma_target, dma.address, dma.limit); - - dma.address &= 0x07FFFFFF; - - assert(dma.address < memory_region_size(d->vram)); - // assert(dma.address + dma.limit < memory_region_size(d->vram)); - *len = dma.limit; - return d->vram_ptr + dma.address; -} - -/* 16 bit to [0.0, F16_MAX = 511.9375] */ -static float convert_f16_to_float(uint16_t f16) { - if (f16 == 0x0000) { return 0.0; } - uint32_t i = (f16 << 11) + 0x3C000000; - return *(float*)&i; -} - -/* 24 bit to [0.0, F24_MAX] */ -static float convert_f24_to_float(uint32_t f24) { - assert(!(f24 >> 24)); - f24 &= 0xFFFFFF; - if (f24 == 0x000000) { return 0.0; } - uint32_t i = f24 << 7; - return *(float*)&i; -} - -static void pgraph_update_memory_buffer(NV2AState *d, hwaddr addr, hwaddr size, - bool f) -{ - glBindBuffer(GL_ARRAY_BUFFER, d->pgraph.gl_memory_buffer); - - hwaddr end = TARGET_PAGE_ALIGN(addr + size); - addr &= TARGET_PAGE_MASK; - assert(end < memory_region_size(d->vram)); - if (f || memory_region_test_and_clear_dirty(d->vram, - addr, - end - addr, - DIRTY_MEMORY_NV2A)) { - glBufferSubData(GL_ARRAY_BUFFER, addr, end - addr, d->vram_ptr + addr); - } -} - -static void pgraph_bind_vertex_attributes(NV2AState *d, - unsigned int num_elements, - bool inline_data, - unsigned int inline_stride) -{ - int i, j; - PGRAPHState *pg = &d->pgraph; - - if (inline_data) { - NV2A_GL_DGROUP_BEGIN("%s (num_elements: %d inline stride: %d)", - __func__, num_elements, inline_stride); - } else { - NV2A_GL_DGROUP_BEGIN("%s (num_elements: %d)", __func__, num_elements); - } - - for (i=0; ivertex_attributes[i]; - if (attribute->count) { - uint8_t *data; - unsigned int in_stride; - if (inline_data && attribute->needs_conversion) { - data = (uint8_t*)pg->inline_array - + attribute->inline_array_offset; - in_stride = inline_stride; - } else { - hwaddr dma_len; - if (attribute->dma_select) { - data = nv_dma_map(d, pg->dma_vertex_b, &dma_len); - } else { - data = nv_dma_map(d, pg->dma_vertex_a, &dma_len); - } - - assert(attribute->offset < dma_len); - data += attribute->offset; - - in_stride = attribute->stride; - } - - if (attribute->needs_conversion) { - NV2A_DPRINTF("converted %d\n", i); - - unsigned int out_stride = attribute->converted_size - * attribute->converted_count; - - if (num_elements > attribute->converted_elements) { - attribute->converted_buffer = g_realloc( - attribute->converted_buffer, - num_elements * out_stride); - } - - for (j=attribute->converted_elements; jconverted_buffer + j * out_stride; - - switch (attribute->format) { - case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP: { - uint32_t p = ldl_le_p((uint32_t*)in); - float *xyz = (float*)out; - xyz[0] = ((int32_t)(((p >> 0) & 0x7FF) << 21) >> 21) - / 1023.0f; - xyz[1] = ((int32_t)(((p >> 11) & 0x7FF) << 21) >> 21) - / 1023.0f; - xyz[2] = ((int32_t)(((p >> 22) & 0x3FF) << 22) >> 22) - / 511.0f; - break; - } - default: - assert(false); - break; - } - } - - - glBindBuffer(GL_ARRAY_BUFFER, attribute->gl_converted_buffer); - if (num_elements != attribute->converted_elements) { - glBufferData(GL_ARRAY_BUFFER, - num_elements * out_stride, - attribute->converted_buffer, - GL_DYNAMIC_DRAW); - attribute->converted_elements = num_elements; - } - - - glVertexAttribPointer(i, - attribute->converted_count, - attribute->gl_type, - attribute->gl_normalize, - out_stride, - 0); - } else if (inline_data) { - glBindBuffer(GL_ARRAY_BUFFER, pg->gl_inline_array_buffer); - glVertexAttribPointer(i, - attribute->gl_count, - attribute->gl_type, - attribute->gl_normalize, - inline_stride, - (void*)(uintptr_t)attribute->inline_array_offset); - } else { - hwaddr addr = data - d->vram_ptr; - pgraph_update_memory_buffer(d, addr, - num_elements * attribute->stride, - false); - glVertexAttribPointer(i, - attribute->gl_count, - attribute->gl_type, - attribute->gl_normalize, - attribute->stride, - (void*)addr); - } - glEnableVertexAttribArray(i); - } else { - glDisableVertexAttribArray(i); - - glVertexAttrib4fv(i, attribute->inline_value); - } - } - NV2A_GL_DGROUP_END(); -} - -static unsigned int pgraph_bind_inline_array(NV2AState *d) -{ - int i; - - PGRAPHState *pg = &d->pgraph; - - unsigned int offset = 0; - for (i=0; ivertex_attributes[i]; - if (attribute->count) { - attribute->inline_array_offset = offset; - - NV2A_DPRINTF("bind inline attribute %d size=%d, count=%d\n", - i, attribute->size, attribute->count); - offset += attribute->size * attribute->count; - assert(offset % 4 == 0); - } - } - - unsigned int vertex_size = offset; - - - unsigned int index_count = pg->inline_array_length*4 / vertex_size; - - NV2A_DPRINTF("draw inline array %d, %d\n", vertex_size, index_count); - - glBindBuffer(GL_ARRAY_BUFFER, pg->gl_inline_array_buffer); - glBufferData(GL_ARRAY_BUFFER, pg->inline_array_length*4, pg->inline_array, - GL_DYNAMIC_DRAW); - - pgraph_bind_vertex_attributes(d, index_count, true, vertex_size); - - return index_count; -} - -static uint8_t cliptobyte(int x) -{ - return (uint8_t)((x < 0) ? 0 : ((x > 255) ? 255 : x)); -} - -static void convert_yuy2_to_rgb(const uint8_t *line, unsigned int ix, - uint8_t *r, uint8_t *g, uint8_t* b) { - int c, d, e; - c = (int)line[ix * 2] - 16; - if (ix % 2) { - d = (int)line[ix * 2 - 1] - 128; - e = (int)line[ix * 2 + 1] - 128; - } else { - d = (int)line[ix * 2 + 1] - 128; - e = (int)line[ix * 2 + 3] - 128; - } - *r = cliptobyte((298 * c + 409 * e + 128) >> 8); - *g = cliptobyte((298 * c - 100 * d - 208 * e + 128) >> 8); - *b = cliptobyte((298 * c + 516 * d + 128) >> 8); -} - -static uint8_t* convert_texture_data(const TextureShape s, - const uint8_t *data, - const uint8_t *palette_data, - unsigned int width, - unsigned int height, - unsigned int depth, - unsigned int row_pitch, - unsigned int slice_pitch) -{ - if (s.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8) { - assert(depth == 1); /* FIXME */ - uint8_t* converted_data = g_malloc(width * height * 4); - int x, y; - for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) { - uint8_t index = data[y * row_pitch + x]; - uint32_t color = *(uint32_t*)(palette_data + index * 4); - *(uint32_t*)(converted_data + y * width * 4 + x * 4) = color; - } - } - return converted_data; - } else if (s.color_format - == NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8) { - assert(depth == 1); /* FIXME */ - uint8_t* converted_data = g_malloc(width * height * 4); - int x, y; - for (y = 0; y < height; y++) { - const uint8_t* line = &data[y * s.width * 2]; - for (x = 0; x < width; x++) { - uint8_t* pixel = &converted_data[(y * s.width + x) * 4]; - /* FIXME: Actually needs uyvy? */ - convert_yuy2_to_rgb(line, x, &pixel[0], &pixel[1], &pixel[2]); - pixel[3] = 255; - } - } - return converted_data; - } else if (s.color_format - == NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5) { - assert(depth == 1); /* FIXME */ - uint8_t *converted_data = g_malloc(width * height * 3); - int x, y; - for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) { - uint16_t rgb655 = *(uint16_t*)(data + y * row_pitch + x * 2); - int8_t *pixel = (int8_t*)&converted_data[(y * width + x) * 3]; - /* Maps 5 bit G and B signed value range to 8 bit - * signed values. R is probably unsigned. - */ - rgb655 ^= (1 << 9) | (1 << 4); - pixel[0] = ((rgb655 & 0xFC00) >> 10) * 0x7F / 0x3F; - pixel[1] = ((rgb655 & 0x03E0) >> 5) * 0xFF / 0x1F - 0x80; - pixel[2] = (rgb655 & 0x001F) * 0xFF / 0x1F - 0x80; - } - } - return converted_data; - } else { - return NULL; - } -} - -static void upload_gl_texture(GLenum gl_target, - const TextureShape s, - const uint8_t *texture_data, - const uint8_t *palette_data) -{ - ColorFormatInfo f = kelvin_color_format_map[s.color_format]; - - switch(gl_target) { - case GL_TEXTURE_1D: - assert(false); + uint64_t r = 0; + switch (addr) { + case NV_PGRAPH_INTR: + r = d->pgraph.pending_interrupts; break; - case GL_TEXTURE_RECTANGLE: { - /* Can't handle strides unaligned to pixels */ - assert(s.pitch % f.bytes_per_pixel == 0); - glPixelStorei(GL_UNPACK_ROW_LENGTH, - s.pitch / f.bytes_per_pixel); - - uint8_t *converted = convert_texture_data(s, texture_data, - palette_data, - s.width, s.height, 1, - s.pitch, 0); - - glTexImage2D(gl_target, 0, f.gl_internal_format, - s.width, s.height, 0, - f.gl_format, f.gl_type, - converted ? converted : texture_data); - - if (converted) { - g_free(converted); - } - - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + case NV_PGRAPH_INTR_EN: + r = d->pgraph.enabled_interrupts; break; - } - case GL_TEXTURE_2D: - case GL_TEXTURE_CUBE_MAP_POSITIVE_X: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: - case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: - case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: { - - unsigned int width = s.width, height = s.height; - - int level; - for (level = 0; level < s.levels; level++) { - if (f.gl_format == 0) { /* compressed */ - - width = MAX(width, 4); height = MAX(height, 4); - - unsigned int block_size; - if (f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) { - block_size = 8; - } else { - block_size = 16; - } - - glCompressedTexImage2D(gl_target, level, f.gl_internal_format, - width, height, 0, - width/4 * height/4 * block_size, - texture_data); - - texture_data += width/4 * height/4 * block_size; - } else { - - width = MAX(width, 1); height = MAX(height, 1); - - unsigned int pitch = width * f.bytes_per_pixel; - uint8_t *unswizzled = g_malloc(height * pitch); - unswizzle_rect(texture_data, width, height, - unswizzled, pitch, f.bytes_per_pixel); - - uint8_t *converted = convert_texture_data(s, unswizzled, - palette_data, - width, height, 1, - pitch, 0); - - glTexImage2D(gl_target, level, f.gl_internal_format, - width, height, 0, - f.gl_format, f.gl_type, - converted ? converted : unswizzled); - - if (converted) { - g_free(converted); - } - g_free(unswizzled); - - texture_data += width * height * f.bytes_per_pixel; - } - - width /= 2; - height /= 2; - } - + case NV_PGRAPH_NSOURCE: + r = d->pgraph.notify_source; break; - } - case GL_TEXTURE_3D: { - - unsigned int width = s.width, height = s.height, depth = s.depth; - - assert(f.gl_format != 0); /* FIXME: compressed not supported yet */ - assert(f.linear == false); - - int level; - for (level = 0; level < s.levels; level++) { - - unsigned int row_pitch = width * f.bytes_per_pixel; - unsigned int slice_pitch = row_pitch * height; - uint8_t *unswizzled = g_malloc(slice_pitch * depth); - unswizzle_box(texture_data, width, height, depth, unswizzled, - row_pitch, slice_pitch, f.bytes_per_pixel); - - uint8_t *converted = convert_texture_data(s, unswizzled, - palette_data, - width, height, depth, - row_pitch, slice_pitch); - - glTexImage3D(gl_target, level, f.gl_internal_format, - width, height, depth, 0, - f.gl_format, f.gl_type, - converted ? converted : unswizzled); - - if (converted) { - g_free(converted); - } - g_free(unswizzled); - - texture_data += width * height * depth * f.bytes_per_pixel; - - width /= 2; - height /= 2; - depth /= 2; - } + case NV_PGRAPH_CTX_USER: + SET_MASK(r, NV_PGRAPH_CTX_USER_CHANNEL_3D, + d->pgraph.context[d->pgraph.channel_id].channel_3d); + SET_MASK(r, NV_PGRAPH_CTX_USER_CHANNEL_3D_VALID, 1); + SET_MASK(r, NV_PGRAPH_CTX_USER_SUBCH, + d->pgraph.context[d->pgraph.channel_id].subchannel << 13); + SET_MASK(r, NV_PGRAPH_CTX_USER_CHID, d->pgraph.channel_id); break; - } - default: - assert(false); + case NV_PGRAPH_TRAPPED_ADDR: + SET_MASK(r, NV_PGRAPH_TRAPPED_ADDR_CHID, d->pgraph.trapped_channel_id); + SET_MASK(r, NV_PGRAPH_TRAPPED_ADDR_SUBCH, d->pgraph.trapped_subchannel); + SET_MASK(r, NV_PGRAPH_TRAPPED_ADDR_MTHD, d->pgraph.trapped_method); break; - } -} - -static TextureBinding* generate_texture(const TextureShape s, - const uint8_t *texture_data, - const uint8_t *palette_data) -{ - ColorFormatInfo f = kelvin_color_format_map[s.color_format]; - - /* Create a new opengl texture */ - GLuint gl_texture; - glGenTextures(1, &gl_texture); - - GLenum gl_target; - if (s.cubemap) { - assert(f.linear == false); - assert(s.dimensionality == 2); - gl_target = GL_TEXTURE_CUBE_MAP; - } else { - if (f.linear) { - /* linear textures use unnormalised texcoords. - * GL_TEXTURE_RECTANGLE_ARB conveniently also does, but - * does not allow repeat and mirror wrap modes. - * (or mipmapping, but xbox d3d says 'Non swizzled and non - * compressed textures cannot be mip mapped.') - * Not sure if that'll be an issue. */ - - /* FIXME: GLSL 330 provides us with textureSize()! Use that? */ - gl_target = GL_TEXTURE_RECTANGLE; - assert(s.dimensionality == 2); - } else { - switch(s.dimensionality) { - case 1: gl_target = GL_TEXTURE_1D; break; - case 2: gl_target = GL_TEXTURE_2D; break; - case 3: gl_target = GL_TEXTURE_3D; break; - default: - assert(false); - break; - } - } - } - - glBindTexture(gl_target, gl_texture); - - NV2A_GL_DLABEL(GL_TEXTURE, gl_texture, - "format: 0x%02X%s, %d dimensions%s, width: %d, height: %d, depth: %d", - s.color_format, f.linear ? "" : " (SZ)", - s.dimensionality, s.cubemap ? " (Cubemap)" : "", - s.width, s.height, s.depth); - - if (gl_target == GL_TEXTURE_CUBE_MAP) { - - size_t length = 0; - unsigned int w = s.width, h = s.height; - int level; - for (level = 0; level < s.levels; level++) { - /* FIXME: This is wrong for compressed textures and textures with 1x? non-square mipmaps */ - length += w * h * f.bytes_per_pixel; - w /= 2; - h /= 2; - } - - upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_X, - s, texture_data + 0 * length, palette_data); - upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_X, - s, texture_data + 1 * length, palette_data); - upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_Y, - s, texture_data + 2 * length, palette_data); - upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, - s, texture_data + 3 * length, palette_data); - upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_Z, - s, texture_data + 4 * length, palette_data); - upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, - s, texture_data + 5 * length, palette_data); - } else { - upload_gl_texture(gl_target, s, texture_data, palette_data); - } - - /* Linear textures don't support mipmapping */ - if (!f.linear) { - glTexParameteri(gl_target, GL_TEXTURE_BASE_LEVEL, - s.min_mipmap_level); - glTexParameteri(gl_target, GL_TEXTURE_MAX_LEVEL, - s.levels - 1); - } - - if (f.gl_swizzle_mask[0] != 0 || f.gl_swizzle_mask[1] != 0 - || f.gl_swizzle_mask[2] != 0 || f.gl_swizzle_mask[3] != 0) { - glTexParameteriv(gl_target, GL_TEXTURE_SWIZZLE_RGBA, - (const GLint *)f.gl_swizzle_mask); - } - - TextureBinding* ret = g_malloc(sizeof(TextureBinding)); - ret->gl_target = gl_target; - ret->gl_texture = gl_texture; - ret->refcnt = 1; - return ret; -} - -/* functions for texture LRU cache */ -static guint texture_key_hash(gconstpointer key) -{ - const TextureKey *k = key; - uint64_t state_hash = fnv_hash( - (const uint8_t*)&k->state, sizeof(TextureShape)); - return state_hash ^ k->data_hash; -} -static gboolean texture_key_equal(gconstpointer a, gconstpointer b) -{ - const TextureKey *ak = a, *bk = b; - return memcmp(&ak->state, &bk->state, sizeof(TextureShape)) == 0 - && ak->data_hash == bk->data_hash; -} -static gpointer texture_key_retrieve(gpointer key, gpointer user_data) -{ - const TextureKey *k = key; - TextureBinding *v = generate_texture(k->state, - k->texture_data, - k->palette_data); - return v; -} -static void texture_key_destroy(gpointer data) -{ - g_free(data); -} -static void texture_binding_destroy(gpointer data) -{ - TextureBinding *binding = data; - assert(binding->refcnt > 0); - binding->refcnt--; - if (binding->refcnt == 0) { - glDeleteTextures(1, &binding->gl_texture); - g_free(binding); - } -} - -static void pgraph_bind_textures(NV2AState *d) -{ - int i; - PGRAPHState *pg = &d->pgraph; - - NV2A_GL_DGROUP_BEGIN("%s", __func__); - - for (i=0; iregs[NV_PGRAPH_TEXCTL0_0 + i*4]; - uint32_t ctl_1 = pg->regs[NV_PGRAPH_TEXCTL1_0 + i*4]; - uint32_t fmt = pg->regs[NV_PGRAPH_TEXFMT0 + i*4]; - uint32_t filter = pg->regs[NV_PGRAPH_TEXFILTER0 + i*4]; - uint32_t address = pg->regs[NV_PGRAPH_TEXADDRESS0 + i*4]; - uint32_t palette = pg->regs[NV_PGRAPH_TEXPALETTE0 + i*4]; - - bool enabled = GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_ENABLE); - unsigned int min_mipmap_level = - GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_MIN_LOD_CLAMP); - unsigned int max_mipmap_level = - GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_MAX_LOD_CLAMP); - - unsigned int pitch = - GET_MASK(ctl_1, NV_PGRAPH_TEXCTL1_0_IMAGE_PITCH); - - unsigned int dma_select = - GET_MASK(fmt, NV_PGRAPH_TEXFMT0_CONTEXT_DMA); - bool cubemap = - GET_MASK(fmt, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE); - unsigned int dimensionality = - GET_MASK(fmt, NV_PGRAPH_TEXFMT0_DIMENSIONALITY); - unsigned int color_format = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_COLOR); - unsigned int levels = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_MIPMAP_LEVELS); - unsigned int log_width = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_U); - unsigned int log_height = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_V); - unsigned int log_depth = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_P); - - unsigned int rect_width = - GET_MASK(pg->regs[NV_PGRAPH_TEXIMAGERECT0 + i*4], - NV_PGRAPH_TEXIMAGERECT0_WIDTH); - unsigned int rect_height = - GET_MASK(pg->regs[NV_PGRAPH_TEXIMAGERECT0 + i*4], - NV_PGRAPH_TEXIMAGERECT0_HEIGHT); - - unsigned int lod_bias = - GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIPMAP_LOD_BIAS); - unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN); - unsigned int mag_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG); - - unsigned int addru = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRU); - unsigned int addrv = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRV); - unsigned int addrp = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRP); - - unsigned int border_source = GET_MASK(fmt, - NV_PGRAPH_TEXFMT0_BORDER_SOURCE); - uint32_t border_color = pg->regs[NV_PGRAPH_BORDERCOLOR0 + i*4]; - - unsigned int offset = pg->regs[NV_PGRAPH_TEXOFFSET0 + i*4]; - - bool palette_dma_select = - GET_MASK(palette, NV_PGRAPH_TEXPALETTE0_CONTEXT_DMA); - unsigned int palette_length_index = - GET_MASK(palette, NV_PGRAPH_TEXPALETTE0_LENGTH); - unsigned int palette_offset = - palette & NV_PGRAPH_TEXPALETTE0_OFFSET; - - unsigned int palette_length = 0; - switch (palette_length_index) { - case NV_PGRAPH_TEXPALETTE0_LENGTH_256: palette_length = 256; break; - case NV_PGRAPH_TEXPALETTE0_LENGTH_128: palette_length = 128; break; - case NV_PGRAPH_TEXPALETTE0_LENGTH_64: palette_length = 64; break; - case NV_PGRAPH_TEXPALETTE0_LENGTH_32: palette_length = 32; break; - default: assert(false); break; - } - - /* Check for unsupported features */ - assert(!(filter & NV_PGRAPH_TEXFILTER0_ASIGNED)); - assert(!(filter & NV_PGRAPH_TEXFILTER0_RSIGNED)); - assert(!(filter & NV_PGRAPH_TEXFILTER0_GSIGNED)); - assert(!(filter & NV_PGRAPH_TEXFILTER0_BSIGNED)); - - glActiveTexture(GL_TEXTURE0 + i); - if (!enabled) { - glBindTexture(GL_TEXTURE_CUBE_MAP, 0); - glBindTexture(GL_TEXTURE_RECTANGLE, 0); - glBindTexture(GL_TEXTURE_1D, 0); - glBindTexture(GL_TEXTURE_2D, 0); - glBindTexture(GL_TEXTURE_3D, 0); - continue; - } - - if (!pg->texture_dirty[i] && pg->texture_binding[i]) { - glBindTexture(pg->texture_binding[i]->gl_target, - pg->texture_binding[i]->gl_texture); - continue; - } - - NV2A_DPRINTF(" texture %d is format 0x%x, off 0x%x (r %d, %d or %d, %d, %d; %d%s)," - " filter %x %x, levels %d-%d %d bias %d\n", - i, color_format, offset, - rect_width, rect_height, - 1 << log_width, 1 << log_height, 1 << log_depth, - pitch, - cubemap ? "; cubemap" : "", - min_filter, mag_filter, - min_mipmap_level, max_mipmap_level, levels, - lod_bias); - - assert(color_format < ARRAY_SIZE(kelvin_color_format_map)); - ColorFormatInfo f = kelvin_color_format_map[color_format]; - if (f.bytes_per_pixel == 0) { - fprintf(stderr, "nv2a: unimplemented texture color format 0x%x\n", - color_format); - abort(); - } - - unsigned int width, height, depth; - if (f.linear) { - assert(dimensionality == 2); - width = rect_width; - height = rect_height; - depth = 1; - } else { - width = 1 << log_width; - height = 1 << log_height; - depth = 1 << log_depth; - - /* FIXME: What about 3D mipmaps? */ - levels = MIN(levels, max_mipmap_level + 1); - if (f.gl_format != 0) { - /* Discard mipmap levels that would be smaller than 1x1. - * FIXME: Is this actually needed? - * - * >> Level 0: 32 x 4 - * Level 1: 16 x 2 - * Level 2: 8 x 1 - * Level 3: 4 x 1 - * Level 4: 2 x 1 - * Level 5: 1 x 1 - */ - levels = MIN(levels, MAX(log_width, log_height) + 1); - } else { - /* OpenGL requires DXT textures to always have a width and - * height a multiple of 4. The Xbox and DirectX handles DXT - * textures smaller than 4 by padding the reset of the block. - * - * See: - * https://msdn.microsoft.com/en-us/library/windows/desktop/bb204843(v=vs.85).aspx - * https://msdn.microsoft.com/en-us/library/windows/desktop/bb694531%28v=vs.85%29.aspx#Virtual_Size - * - * Work around this for now by discarding mipmap levels that - * would result in too-small textures. A correct solution - * will be to decompress these levels manually, or add texture - * sampling logic. - * - * >> Level 0: 64 x 8 - * Level 1: 32 x 4 - * Level 2: 16 x 2 << Ignored - * >> Level 0: 16 x 16 - * Level 1: 8 x 8 - * Level 2: 4 x 4 << OK! - */ - if (log_width < 2 || log_height < 2) { - /* Base level is smaller than 4x4... */ - levels = 1; - } else { - levels = MIN(levels, MIN(log_width, log_height) - 1); - } - } - assert(levels > 0); - } - - hwaddr dma_len; - uint8_t *texture_data; - if (dma_select) { - texture_data = nv_dma_map(d, pg->dma_b, &dma_len); - } else { - texture_data = nv_dma_map(d, pg->dma_a, &dma_len); - } - assert(offset < dma_len); - texture_data += offset; - - hwaddr palette_dma_len; - uint8_t *palette_data; - if (palette_dma_select) { - palette_data = nv_dma_map(d, pg->dma_b, &palette_dma_len); - } else { - palette_data = nv_dma_map(d, pg->dma_a, &palette_dma_len); - } - assert(palette_offset < palette_dma_len); - palette_data += palette_offset; - - NV2A_DPRINTF(" - 0x%tx\n", texture_data - d->vram_ptr); - - size_t length = 0; - if (f.linear) { - assert(cubemap == false); - assert(dimensionality == 2); - length = height * pitch; - } else { - if (dimensionality >= 2) { - unsigned int w = width, h = height; - int level; - if (f.gl_format != 0) { - for (level = 0; level < levels; level++) { - w = MAX(w, 1); h = MAX(h, 1); - length += w * h * f.bytes_per_pixel; - w /= 2; - h /= 2; - } - } else { - /* Compressed textures are a bit different */ - unsigned int block_size; - if (f.gl_internal_format == - GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) { - block_size = 8; - } else { - block_size = 16; - } - - for (level = 0; level < levels; level++) { - w = MAX(w, 4); h = MAX(h, 4); - length += w/4 * h/4 * block_size; - w /= 2; h /= 2; - } - } - if (cubemap) { - assert(dimensionality == 2); - length *= 6; - } - if (dimensionality >= 3) { - length *= depth; - } - } - } - - TextureShape state = { - .cubemap = cubemap, - .dimensionality = dimensionality, - .color_format = color_format, - .levels = levels, - .width = width, - .height = height, - .depth = depth, - .min_mipmap_level = min_mipmap_level, - .max_mipmap_level = max_mipmap_level, - .pitch = pitch, - }; - -#ifdef USE_TEXTURE_CACHE - TextureKey key = { - .state = state, - .data_hash = fast_hash(texture_data, length, 5003) - ^ fnv_hash(palette_data, palette_length), - .texture_data = texture_data, - .palette_data = palette_data, - }; - - gpointer cache_key = g_malloc(sizeof(TextureKey)); - memcpy(cache_key, &key, sizeof(TextureKey)); - - TextureBinding *binding = g_lru_cache_get(pg->texture_cache, cache_key); - assert(binding); - binding->refcnt++; -#else - TextureBinding *binding = generate_texture(state, - texture_data, palette_data); -#endif - - glBindTexture(binding->gl_target, binding->gl_texture); - - - if (f.linear) { - /* somtimes games try to set mipmap min filters on linear textures. - * this could indicate a bug... */ - switch (min_filter) { - case NV_PGRAPH_TEXFILTER0_MIN_BOX_NEARESTLOD: - case NV_PGRAPH_TEXFILTER0_MIN_BOX_TENT_LOD: - min_filter = NV_PGRAPH_TEXFILTER0_MIN_BOX_LOD0; - break; - case NV_PGRAPH_TEXFILTER0_MIN_TENT_NEARESTLOD: - case NV_PGRAPH_TEXFILTER0_MIN_TENT_TENT_LOD: - min_filter = NV_PGRAPH_TEXFILTER0_MIN_TENT_LOD0; - break; - } - } - - glTexParameteri(binding->gl_target, GL_TEXTURE_MIN_FILTER, - pgraph_texture_min_filter_map[min_filter]); - glTexParameteri(binding->gl_target, GL_TEXTURE_MAG_FILTER, - pgraph_texture_mag_filter_map[mag_filter]); - - /* Texture wrapping */ - assert(addru < ARRAY_SIZE(pgraph_texture_addr_map)); - glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_S, - pgraph_texture_addr_map[addru]); - if (dimensionality > 1) { - assert(addrv < ARRAY_SIZE(pgraph_texture_addr_map)); - glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_T, - pgraph_texture_addr_map[addrv]); - } - if (dimensionality > 2) { - assert(addrp < ARRAY_SIZE(pgraph_texture_addr_map)); - glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_R, - pgraph_texture_addr_map[addrp]); - } - - /* FIXME: Only upload if necessary? [s, t or r = GL_CLAMP_TO_BORDER] */ - if (border_source == NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR) { - GLfloat gl_border_color[] = { - /* FIXME: Color channels might be wrong order */ - ((border_color >> 16) & 0xFF) / 255.0f, /* red */ - ((border_color >> 8) & 0xFF) / 255.0f, /* green */ - (border_color & 0xFF) / 255.0f, /* blue */ - ((border_color >> 24) & 0xFF) / 255.0f /* alpha */ - }; - glTexParameterfv(binding->gl_target, GL_TEXTURE_BORDER_COLOR, - gl_border_color); - } - - if (pg->texture_binding[i]) { - texture_binding_destroy(pg->texture_binding[i]); - } - pg->texture_binding[i] = binding; - pg->texture_dirty[i] = false; - } - NV2A_GL_DGROUP_END(); -} - -static void pgraph_apply_anti_aliasing_factor(PGRAPHState *pg, - unsigned int *width, - unsigned int *height) -{ - switch (pg->surface_shape.anti_aliasing) { - case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_1: + case NV_PGRAPH_TRAPPED_DATA_LOW: + r = d->pgraph.trapped_data[0]; break; - case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_CORNER_2: - if (width) { *width *= 2; } + case NV_PGRAPH_FIFO: + SET_MASK(r, NV_PGRAPH_FIFO_ACCESS, d->pgraph.fifo_access); break; - case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_SQUARE_OFFSET_4: - if (width) { *width *= 2; } - if (height) { *height *= 2; } + case NV_PGRAPH_CHANNEL_CTX_TABLE: + r = d->pgraph.context_table >> 4; + break; + case NV_PGRAPH_CHANNEL_CTX_POINTER: + r = d->pgraph.context_address >> 4; break; default: - assert(false); + r = d->pgraph.regs[addr]; break; } -} -static void pgraph_get_surface_dimensions(PGRAPHState *pg, - unsigned int *width, - unsigned int *height) + qemu_mutex_unlock(&d->pgraph.lock); + + reg_log_read(NV_PGRAPH, addr, r); + return r; +} +static void pgraph_set_context_user(NV2AState *d, uint32_t val) { - bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE); - if (swizzle) { - *width = 1 << pg->surface_shape.log_width; - *height = 1 << pg->surface_shape.log_height; - } else { - *width = pg->surface_shape.clip_width; - *height = pg->surface_shape.clip_height; - } -} + d->pgraph.channel_id = (val & NV_PGRAPH_CTX_USER_CHID) >> 24; -/* hash and equality for shader cache hash table */ -static guint shader_hash(gconstpointer key) + d->pgraph.context[d->pgraph.channel_id].channel_3d = + GET_MASK(val, NV_PGRAPH_CTX_USER_CHANNEL_3D); + d->pgraph.context[d->pgraph.channel_id].subchannel = + GET_MASK(val, NV_PGRAPH_CTX_USER_SUBCH); +} +void pgraph_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size) { - return fnv_hash(key, sizeof(ShaderState)); -} -static gboolean shader_equal(gconstpointer a, gconstpointer b) -{ - const ShaderState *as = a, *bs = b; - return memcmp(as, bs, sizeof(ShaderState)) == 0; -} + NV2AState *d = (NV2AState *)opaque; -static void pgraph_shader_update_constants(PGRAPHState *pg, - ShaderBinding *binding, - bool binding_changed, - bool vertex_program, - bool fixed_function) -{ - int i, j; + reg_log_write(NV_PGRAPH, addr, val); - /* update combiner constants */ - for (i = 0; i<= 8; i++) { - uint32_t constant[2]; - if (i == 8) { - /* final combiner */ - constant[0] = pg->regs[NV_PGRAPH_SPECFOGFACTOR0]; - constant[1] = pg->regs[NV_PGRAPH_SPECFOGFACTOR1]; - } else { - constant[0] = pg->regs[NV_PGRAPH_COMBINEFACTOR0 + i * 4]; - constant[1] = pg->regs[NV_PGRAPH_COMBINEFACTOR1 + i * 4]; + qemu_mutex_lock(&d->pgraph.lock); + + switch (addr) { + case NV_PGRAPH_INTR: + d->pgraph.pending_interrupts &= ~val; + qemu_cond_broadcast(&d->pgraph.interrupt_cond); + break; + case NV_PGRAPH_INTR_EN: + d->pgraph.enabled_interrupts = val; + break; + case NV_PGRAPH_CTX_CONTROL: + d->pgraph.channel_valid = (val & NV_PGRAPH_CTX_CONTROL_CHID); + break; + case NV_PGRAPH_CTX_USER: + pgraph_set_context_user(d, val); + break; + case NV_PGRAPH_INCREMENT: + if (val & NV_PGRAPH_INCREMENT_READ_3D) { + SET_MASK(d->pgraph.regs[NV_PGRAPH_SURFACE], + NV_PGRAPH_SURFACE_READ_3D, + (GET_MASK(d->pgraph.regs[NV_PGRAPH_SURFACE], + NV_PGRAPH_SURFACE_READ_3D)+1) + % GET_MASK(d->pgraph.regs[NV_PGRAPH_SURFACE], + NV_PGRAPH_SURFACE_MODULO_3D) ); + qemu_cond_broadcast(&d->pgraph.flip_3d); + } + break; + case NV_PGRAPH_FIFO: + d->pgraph.fifo_access = GET_MASK(val, NV_PGRAPH_FIFO_ACCESS); + qemu_cond_broadcast(&d->pgraph.fifo_access_cond); + break; + case NV_PGRAPH_CHANNEL_CTX_TABLE: + d->pgraph.context_table = + (val & NV_PGRAPH_CHANNEL_CTX_TABLE_INST) << 4; + break; + case NV_PGRAPH_CHANNEL_CTX_POINTER: + d->pgraph.context_address = + (val & NV_PGRAPH_CHANNEL_CTX_POINTER_INST) << 4; + break; + case NV_PGRAPH_CHANNEL_CTX_TRIGGER: + + if (val & NV_PGRAPH_CHANNEL_CTX_TRIGGER_READ_IN) { + NV2A_DPRINTF("PGRAPH: read channel %d context from %" HWADDR_PRIx "\n", + d->pgraph.channel_id, d->pgraph.context_address); + + uint8_t *context_ptr = d->ramin_ptr + d->pgraph.context_address; + uint32_t context_user = ldl_le_p((uint32_t*)context_ptr); + + NV2A_DPRINTF(" - CTX_USER = 0x%x\n", context_user); + + + pgraph_set_context_user(d, context_user); + } + if (val & NV_PGRAPH_CHANNEL_CTX_TRIGGER_WRITE_OUT) { + /* do stuff ... */ } - for (j = 0; j < 2; j++) { - GLint loc = binding->psh_constant_loc[i][j]; - if (loc != -1) { - float value[4]; - value[0] = (float) ((constant[j] >> 16) & 0xFF) / 255.0f; - value[1] = (float) ((constant[j] >> 8) & 0xFF) / 255.0f; - value[2] = (float) (constant[j] & 0xFF) / 255.0f; - value[3] = (float) ((constant[j] >> 24) & 0xFF) / 255.0f; - - glUniform4fv(loc, 1, value); - } - } - } - if (binding->alpha_ref_loc != -1) { - float alpha_ref = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_ALPHAREF) / 255.0; - glUniform1f(binding->alpha_ref_loc, alpha_ref); - } - - - /* For each texture stage */ - for (i = 0; i < NV2A_MAX_TEXTURES; i++) { - // char name[32]; - GLint loc; - - /* Bump luminance only during stages 1 - 3 */ - if (i > 0) { - loc = binding->bump_mat_loc[i]; - if (loc != -1) { - glUniformMatrix2fv(loc, 1, GL_FALSE, pg->bump_env_matrix[i - 1]); - } - loc = binding->bump_scale_loc[i]; - if (loc != -1) { - glUniform1f(loc, *(float*)&pg->regs[ - NV_PGRAPH_BUMPSCALE1 + (i - 1) * 4]); - } - loc = binding->bump_offset_loc[i]; - if (loc != -1) { - glUniform1f(loc, *(float*)&pg->regs[ - NV_PGRAPH_BUMPOFFSET1 + (i - 1) * 4]); - } - } - - } - - if (binding->fog_color_loc != -1) { - uint32_t fog_color = pg->regs[NV_PGRAPH_FOGCOLOR]; - glUniform4f(binding->fog_color_loc, - GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_RED) / 255.0, - GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_GREEN) / 255.0, - GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_BLUE) / 255.0, - GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_ALPHA) / 255.0); - } - if (binding->fog_param_loc[0] != -1) { - glUniform1f(binding->fog_param_loc[0], - *(float*)&pg->regs[NV_PGRAPH_FOGPARAM0]); - } - if (binding->fog_param_loc[1] != -1) { - glUniform1f(binding->fog_param_loc[1], - *(float*)&pg->regs[NV_PGRAPH_FOGPARAM1]); - } - - - float zclip_max = *(float*)&pg->regs[NV_PGRAPH_ZCLIPMAX]; - float zclip_min = *(float*)&pg->regs[NV_PGRAPH_ZCLIPMIN]; - - if (fixed_function) { - /* update lighting constants */ - struct { - uint32_t* v; - bool* dirty; - GLint* locs; - size_t len; - } lighting_arrays[] = { - {&pg->ltctxa[0][0], &pg->ltctxa_dirty[0], binding->ltctxa_loc, NV2A_LTCTXA_COUNT}, - {&pg->ltctxb[0][0], &pg->ltctxb_dirty[0], binding->ltctxb_loc, NV2A_LTCTXB_COUNT}, - {&pg->ltc1[0][0], &pg->ltc1_dirty[0], binding->ltc1_loc, NV2A_LTC1_COUNT}, - }; - - for (i=0; ilight_infinite_half_vector_loc[i]; - if (loc != -1) { - glUniform3fv(loc, 1, pg->light_infinite_half_vector[i]); - } - loc = binding->light_infinite_direction_loc[i]; - if (loc != -1) { - glUniform3fv(loc, 1, pg->light_infinite_direction[i]); - } - - loc = binding->light_local_position_loc[i]; - if (loc != -1) { - glUniform3fv(loc, 1, pg->light_local_position[i]); - } - loc = binding->light_local_attenuation_loc[i]; - if (loc != -1) { - glUniform3fv(loc, 1, pg->light_local_attenuation[i]); - } - } - - /* estimate the viewport by assuming it matches the surface ... */ - //FIXME: Get surface dimensions? - float m11 = 0.5 * pg->surface_shape.clip_width; - float m22 = -0.5 * pg->surface_shape.clip_height; - float m33 = zclip_max - zclip_min; - //float m41 = m11; - //float m42 = -m22; - float m43 = zclip_min; - //float m44 = 1.0; - - if (m33 == 0.0) { - m33 = 1.0; - } - float invViewport[16] = { - 1.0/m11, 0, 0, 0, - 0, 1.0/m22, 0, 0, - 0, 0, 1.0/m33, 0, - -1.0, 1.0, -m43/m33, 1.0 - }; - - if (binding->inv_viewport_loc != -1) { - glUniformMatrix4fv(binding->inv_viewport_loc, - 1, GL_FALSE, &invViewport[0]); - } - - } - - /* update vertex program constants */ - for (i=0; ivsh_constants_dirty[i] && !binding_changed) continue; - - GLint loc = binding->vsh_constant_loc[i]; - //assert(loc != -1); - if (loc != -1) { - glUniform4fv(loc, 1, (const GLfloat*)pg->vsh_constants[i]); - } - pg->vsh_constants_dirty[i] = false; - } - - if (binding->surface_size_loc != -1) { - glUniform2f(binding->surface_size_loc, pg->surface_shape.clip_width, - pg->surface_shape.clip_height); - } - - if (binding->clip_range_loc != -1) { - glUniform2f(binding->clip_range_loc, zclip_min, zclip_max); - } - -} - -static void pgraph_bind_shaders(PGRAPHState *pg) -{ - int i, j; - - bool vertex_program = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D], - NV_PGRAPH_CSV0_D_MODE) == 2; - - bool fixed_function = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D], - NV_PGRAPH_CSV0_D_MODE) == 0; - - int program_start = GET_MASK(pg->regs[NV_PGRAPH_CSV0_C], - NV_PGRAPH_CSV0_C_CHEOPS_PROGRAM_START); - - NV2A_GL_DGROUP_BEGIN("%s (VP: %s FFP: %s)", __func__, - vertex_program ? "yes" : "no", - fixed_function ? "yes" : "no"); - - ShaderBinding* old_binding = pg->shader_binding; - - ShaderState state = { - .psh = (PshState){ - /* register combier stuff */ - .combiner_control = pg->regs[NV_PGRAPH_COMBINECTL], - .shader_stage_program = pg->regs[NV_PGRAPH_SHADERPROG], - .other_stage_input = pg->regs[NV_PGRAPH_SHADERCTL], - .final_inputs_0 = pg->regs[NV_PGRAPH_COMBINESPECFOG0], - .final_inputs_1 = pg->regs[NV_PGRAPH_COMBINESPECFOG1], - - .alpha_test = pg->regs[NV_PGRAPH_CONTROL_0] - & NV_PGRAPH_CONTROL_0_ALPHATESTENABLE, - .alpha_func = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], - NV_PGRAPH_CONTROL_0_ALPHAFUNC), - }, - - /* fixed function stuff */ - .skinning = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D], - NV_PGRAPH_CSV0_D_SKIN), - .lighting = GET_MASK(pg->regs[NV_PGRAPH_CSV0_C], - NV_PGRAPH_CSV0_C_LIGHTING), - .normalization = pg->regs[NV_PGRAPH_CSV0_C] - & NV_PGRAPH_CSV0_C_NORMALIZATION_ENABLE, - - .fixed_function = fixed_function, - - /* vertex program stuff */ - .vertex_program = vertex_program, - .z_perspective = pg->regs[NV_PGRAPH_CONTROL_0] - & NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE, - - /* geometry shader stuff */ - .primitive_mode = pg->primitive_mode, - .polygon_front_mode = GET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_FRONTFACEMODE), - .polygon_back_mode = GET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_BACKFACEMODE), - }; - - state.program_length = 0; - memset(state.program_data, 0, sizeof(state.program_data)); - - if (vertex_program) { - // copy in vertex program tokens - for (i = program_start; i < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH; i++) { - uint32_t *cur_token = (uint32_t*)&pg->program_data[i]; - memcpy(&state.program_data[state.program_length], - cur_token, - VSH_TOKEN_SIZE * sizeof(uint32_t)); - state.program_length++; - - if (vsh_get_field(cur_token, FLD_FINAL)) { - break; - } - } - } - - /* Texgen */ - for (i = 0; i < 4; i++) { - unsigned int reg = (i < 2) ? NV_PGRAPH_CSV1_A : NV_PGRAPH_CSV1_B; - for (j = 0; j < 4; j++) { - unsigned int masks[] = { - (i % 2) ? NV_PGRAPH_CSV1_A_T1_S : NV_PGRAPH_CSV1_A_T0_S, - (i % 2) ? NV_PGRAPH_CSV1_A_T1_T : NV_PGRAPH_CSV1_A_T0_T, - (i % 2) ? NV_PGRAPH_CSV1_A_T1_R : NV_PGRAPH_CSV1_A_T0_R, - (i % 2) ? NV_PGRAPH_CSV1_A_T1_Q : NV_PGRAPH_CSV1_A_T0_Q - }; - state.texgen[i][j] = GET_MASK(pg->regs[reg], masks[j]); - } - } - - /* Fog */ - state.fog_enable = pg->regs[NV_PGRAPH_CONTROL_3] - & NV_PGRAPH_CONTROL_3_FOGENABLE; - if (state.fog_enable) { - /*FIXME: Use CSV0_D? */ - state.fog_mode = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], - NV_PGRAPH_CONTROL_3_FOG_MODE); - state.foggen = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D], - NV_PGRAPH_CSV0_D_FOGGENMODE); - } else { - /* FIXME: Do we still pass the fogmode? */ - state.fog_mode = 0; - state.foggen = 0; - } - - /* Texture matrices */ - for (i = 0; i < 4; i++) { - state.texture_matrix_enable[i] = pg->texture_matrix_enable[i]; - } - - /* Lighting */ - if (state.lighting) { - for (i = 0; i < NV2A_MAX_LIGHTS; i++) { - state.light[i] = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D], - NV_PGRAPH_CSV0_D_LIGHT0 << (i * 2)); - } - } - - for (i = 0; i < 8; i++) { - state.psh.rgb_inputs[i] = pg->regs[NV_PGRAPH_COMBINECOLORI0 + i * 4]; - state.psh.rgb_outputs[i] = pg->regs[NV_PGRAPH_COMBINECOLORO0 + i * 4]; - state.psh.alpha_inputs[i] = pg->regs[NV_PGRAPH_COMBINEALPHAI0 + i * 4]; - state.psh.alpha_outputs[i] = pg->regs[NV_PGRAPH_COMBINEALPHAO0 + i * 4]; - //constant_0[i] = pg->regs[NV_PGRAPH_COMBINEFACTOR0 + i * 4]; - //constant_1[i] = pg->regs[NV_PGRAPH_COMBINEFACTOR1 + i * 4]; - } - - for (i = 0; i < 4; i++) { - state.psh.rect_tex[i] = false; - bool enabled = pg->regs[NV_PGRAPH_TEXCTL0_0 + i*4] - & NV_PGRAPH_TEXCTL0_0_ENABLE; - unsigned int color_format = - GET_MASK(pg->regs[NV_PGRAPH_TEXFMT0 + i*4], - NV_PGRAPH_TEXFMT0_COLOR); - - if (enabled && kelvin_color_format_map[color_format].linear) { - state.psh.rect_tex[i] = true; - } - - for (j = 0; j < 4; j++) { - state.psh.compare_mode[i][j] = - (pg->regs[NV_PGRAPH_SHADERCLIPMODE] >> (4 * i + j)) & 1; - } - state.psh.alphakill[i] = pg->regs[NV_PGRAPH_TEXCTL0_0 + i*4] - & NV_PGRAPH_TEXCTL0_0_ALPHAKILLEN; - } - - ShaderBinding* cached_shader = g_hash_table_lookup(pg->shader_cache, &state); - if (cached_shader) { - pg->shader_binding = cached_shader; - } else { - pg->shader_binding = generate_shaders(state); - - /* cache it */ - ShaderState *cache_state = g_malloc(sizeof(*cache_state)); - memcpy(cache_state, &state, sizeof(*cache_state)); - g_hash_table_insert(pg->shader_cache, cache_state, - (gpointer)pg->shader_binding); - } - - bool binding_changed = (pg->shader_binding != old_binding); - - glUseProgram(pg->shader_binding->gl_program); - - pgraph_shader_update_constants(pg, pg->shader_binding, binding_changed, - vertex_program, fixed_function); - - NV2A_GL_DGROUP_END(); -} - -static bool pgraph_framebuffer_dirty(PGRAPHState *pg) -{ - bool shape_changed = memcmp(&pg->surface_shape, &pg->last_surface_shape, - sizeof(SurfaceShape)) != 0; - if (!shape_changed || (!pg->surface_shape.color_format - && !pg->surface_shape.zeta_format)) { - return false; - } - return true; -} - -static bool pgraph_color_write_enabled(PGRAPHState *pg) -{ - return pg->regs[NV_PGRAPH_CONTROL_0] & ( - NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE - | NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE - | NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE - | NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE); -} - -static bool pgraph_zeta_write_enabled(PGRAPHState *pg) -{ - return pg->regs[NV_PGRAPH_CONTROL_0] & ( - NV_PGRAPH_CONTROL_0_ZWRITEENABLE - | NV_PGRAPH_CONTROL_0_STENCIL_WRITE_ENABLE); -} - -static void pgraph_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta) -{ - NV2A_DPRINTF("pgraph_set_surface_dirty(%d, %d) -- %d %d\n", - color, zeta, - pgraph_color_write_enabled(pg), pgraph_zeta_write_enabled(pg)); - /* FIXME: Does this apply to CLEARs too? */ - color = color && pgraph_color_write_enabled(pg); - zeta = zeta && pgraph_zeta_write_enabled(pg); - pg->surface_color.draw_dirty |= color; - pg->surface_zeta.draw_dirty |= zeta; -} - -static void pgraph_update_surface_part(NV2AState *d, bool upload, bool color) { - PGRAPHState *pg = &d->pgraph; - - unsigned int width, height; - pgraph_get_surface_dimensions(pg, &width, &height); - pgraph_apply_anti_aliasing_factor(pg, &width, &height); - - Surface *surface; - hwaddr dma_address; - GLuint *gl_buffer; - unsigned int bytes_per_pixel; - GLenum gl_internal_format, gl_format, gl_type, gl_attachment; - - if (color) { - surface = &pg->surface_color; - dma_address = pg->dma_color; - gl_buffer = &pg->gl_color_buffer; - - assert(pg->surface_shape.color_format != 0); - assert(pg->surface_shape.color_format - < ARRAY_SIZE(kelvin_surface_color_format_map)); - SurfaceColorFormatInfo f = - kelvin_surface_color_format_map[pg->surface_shape.color_format]; - if (f.bytes_per_pixel == 0) { - fprintf(stderr, "nv2a: unimplemented color surface format 0x%x\n", - pg->surface_shape.color_format); - abort(); - } - - bytes_per_pixel = f.bytes_per_pixel; - gl_internal_format = f.gl_internal_format; - gl_format = f.gl_format; - gl_type = f.gl_type; - gl_attachment = GL_COLOR_ATTACHMENT0; - - } else { - surface = &pg->surface_zeta; - dma_address = pg->dma_zeta; - gl_buffer = &pg->gl_zeta_buffer; - - assert(pg->surface_shape.zeta_format != 0); - switch (pg->surface_shape.zeta_format) { - case NV097_SET_SURFACE_FORMAT_ZETA_Z16: - bytes_per_pixel = 2; - gl_format = GL_DEPTH_COMPONENT; - gl_attachment = GL_DEPTH_ATTACHMENT; - if (pg->surface_shape.z_format) { - gl_type = GL_HALF_FLOAT; - gl_internal_format = GL_DEPTH_COMPONENT32F; - } else { - gl_type = GL_UNSIGNED_SHORT; - gl_internal_format = GL_DEPTH_COMPONENT16; - } - break; - case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8: - bytes_per_pixel = 4; - gl_format = GL_DEPTH_STENCIL; - gl_attachment = GL_DEPTH_STENCIL_ATTACHMENT; - if (pg->surface_shape.z_format) { - assert(false); - gl_type = GL_FLOAT_32_UNSIGNED_INT_24_8_REV; - gl_internal_format = GL_DEPTH32F_STENCIL8; - } else { - gl_type = GL_UNSIGNED_INT_24_8; - gl_internal_format = GL_DEPTH24_STENCIL8; - } - break; - default: - assert(false); - break; - } - } - - - DMAObject dma = nv_dma_load(d, dma_address); - /* There's a bunch of bugs that could cause us to hit this function - * at the wrong time and get a invalid dma object. - * Check that it's sane. */ - assert(dma.dma_class == NV_DMA_IN_MEMORY_CLASS); - - assert(dma.address + surface->offset != 0); - assert(surface->offset <= dma.limit); - assert(surface->offset + surface->pitch * height <= dma.limit + 1); - - hwaddr data_len; - uint8_t *data = nv_dma_map(d, dma_address, &data_len); - - /* TODO */ - // assert(pg->surface_clip_x == 0 && pg->surface_clip_y == 0); - - bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE); - - uint8_t *buf = data + surface->offset; - if (swizzle) { - buf = g_malloc(height * surface->pitch); - } - - bool dirty = surface->buffer_dirty; - if (color) { - dirty |= memory_region_test_and_clear_dirty(d->vram, - dma.address + surface->offset, - surface->pitch * height, - DIRTY_MEMORY_NV2A); - } - if (upload && dirty) { - /* surface modified (or moved) by the cpu. - * copy it into the opengl renderbuffer */ - assert(!surface->draw_dirty); - - assert(surface->pitch % bytes_per_pixel == 0); - - if (swizzle) { - unswizzle_rect(data + surface->offset, - width, height, - buf, - surface->pitch, - bytes_per_pixel); - } - - if (!color) { - /* need to clear the depth_stencil and depth attachment for zeta */ - glFramebufferTexture2D(GL_FRAMEBUFFER, - GL_DEPTH_ATTACHMENT, - GL_TEXTURE_2D, - 0, 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, - GL_DEPTH_STENCIL_ATTACHMENT, - GL_TEXTURE_2D, - 0, 0); - } - - glFramebufferTexture2D(GL_FRAMEBUFFER, - gl_attachment, - GL_TEXTURE_2D, - 0, 0); - - if (*gl_buffer) { - glDeleteTextures(1, gl_buffer); - *gl_buffer = 0; - } - - glGenTextures(1, gl_buffer); - glBindTexture(GL_TEXTURE_2D, *gl_buffer); - - /* This is VRAM so we can't do this inplace! */ - uint8_t *flipped_buf = g_malloc(width * height * bytes_per_pixel); - unsigned int irow; - for (irow = 0; irow < height; irow++) { - memcpy(&flipped_buf[width * (height - irow - 1) - * bytes_per_pixel], - &buf[surface->pitch * irow], - width * bytes_per_pixel); - } - - glTexImage2D(GL_TEXTURE_2D, 0, gl_internal_format, - width, height, 0, - gl_format, gl_type, - flipped_buf); - - g_free(flipped_buf); - - glFramebufferTexture2D(GL_FRAMEBUFFER, - gl_attachment, - GL_TEXTURE_2D, - *gl_buffer, 0); - - assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) - == GL_FRAMEBUFFER_COMPLETE); - - if (color) { - pgraph_update_memory_buffer(d, dma.address + surface->offset, - surface->pitch * height, true); - } - surface->buffer_dirty = false; - - - uint8_t *out = data + surface->offset + 64; - NV2A_DPRINTF("upload_surface %s 0x%" HWADDR_PRIx " - 0x%" HWADDR_PRIx ", " - "(0x%" HWADDR_PRIx " - 0x%" HWADDR_PRIx ", " - "%d %d, %d %d, %d) - %x %x %x %x\n", - color ? "color" : "zeta", - dma.address, dma.address + dma.limit, - dma.address + surface->offset, - dma.address + surface->pitch * height, - pg->surface_shape.clip_x, pg->surface_shape.clip_y, - pg->surface_shape.clip_width, - pg->surface_shape.clip_height, - surface->pitch, - out[0], out[1], out[2], out[3]); - - } - - if (!upload && surface->draw_dirty) { - /* read the opengl framebuffer into the surface */ - - glo_readpixels(gl_format, gl_type, - bytes_per_pixel, surface->pitch, - width, height, - buf); - assert(glGetError() == GL_NO_ERROR); - - if (swizzle) { - swizzle_rect(buf, - width, height, - data + surface->offset, - surface->pitch, - bytes_per_pixel); - } - - memory_region_set_client_dirty(d->vram, - dma.address + surface->offset, - surface->pitch * height, - DIRTY_MEMORY_VGA); - - if (color) { - pgraph_update_memory_buffer(d, dma.address + surface->offset, - surface->pitch * height, true); - } - - surface->draw_dirty = false; - surface->write_enabled_cache = false; - - uint8_t *out = data + surface->offset + 64; - NV2A_DPRINTF("read_surface %s 0x%" HWADDR_PRIx " - 0x%" HWADDR_PRIx ", " - "(0x%" HWADDR_PRIx " - 0x%" HWADDR_PRIx ", " - "%d %d, %d %d, %d) - %x %x %x %x\n", - color ? "color" : "zeta", - dma.address, dma.address + dma.limit, - dma.address + surface->offset, - dma.address + surface->pitch * pg->surface_shape.clip_height, - pg->surface_shape.clip_x, pg->surface_shape.clip_y, - pg->surface_shape.clip_width, pg->surface_shape.clip_height, - surface->pitch, - out[0], out[1], out[2], out[3]); - - } - - if (swizzle) { - g_free(buf); - } -} - -static void pgraph_update_surface(NV2AState *d, bool upload, - bool color_write, bool zeta_write) -{ - PGRAPHState *pg = &d->pgraph; - - pg->surface_shape.z_format = GET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], - NV_PGRAPH_SETUPRASTER_Z_FORMAT); - - /* FIXME: Does this apply to CLEARs too? */ - color_write = color_write && pgraph_color_write_enabled(pg); - zeta_write = zeta_write && pgraph_zeta_write_enabled(pg); - - if (upload && pgraph_framebuffer_dirty(pg)) { - assert(!pg->surface_color.draw_dirty); - assert(!pg->surface_zeta.draw_dirty); - - pg->surface_color.buffer_dirty = true; - pg->surface_zeta.buffer_dirty = true; - - glFramebufferTexture2D(GL_FRAMEBUFFER, - GL_COLOR_ATTACHMENT0, - GL_TEXTURE_2D, - 0, 0); - - if (pg->gl_color_buffer) { - glDeleteTextures(1, &pg->gl_color_buffer); - pg->gl_color_buffer = 0; - } - - glFramebufferTexture2D(GL_FRAMEBUFFER, - GL_DEPTH_ATTACHMENT, - GL_TEXTURE_2D, - 0, 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, - GL_DEPTH_STENCIL_ATTACHMENT, - GL_TEXTURE_2D, - 0, 0); - - if (pg->gl_zeta_buffer) { - glDeleteTextures(1, &pg->gl_zeta_buffer); - pg->gl_zeta_buffer = 0; - } - - memcpy(&pg->last_surface_shape, &pg->surface_shape, - sizeof(SurfaceShape)); - } - - if ((color_write || (!upload && pg->surface_color.write_enabled_cache)) - && (upload || pg->surface_color.draw_dirty)) { - pgraph_update_surface_part(d, upload, true); - } - - - if ((zeta_write || (!upload && pg->surface_zeta.write_enabled_cache)) - && (upload || pg->surface_zeta.draw_dirty)) { - pgraph_update_surface_part(d, upload, false); - } -} - - -static void pgraph_init(NV2AState *d) -{ - int i; - - PGRAPHState *pg = &d->pgraph; - - qemu_mutex_init(&pg->lock); - qemu_cond_init(&pg->interrupt_cond); - qemu_cond_init(&pg->fifo_access_cond); - qemu_cond_init(&pg->flip_3d); - - /* fire up opengl */ - - pg->gl_context = glo_context_create(); - assert(pg->gl_context); - -#ifdef DEBUG_NV2A_GL - glEnable(GL_DEBUG_OUTPUT); -#endif - - glextensions_init(); - - /* DXT textures */ - assert(glo_check_extension("GL_EXT_texture_compression_s3tc")); - /* Internal RGB565 texture format */ - assert(glo_check_extension("GL_ARB_ES2_compatibility")); - - GLint max_vertex_attributes; - glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_vertex_attributes); - assert(max_vertex_attributes >= NV2A_VERTEXSHADER_ATTRIBUTES); - - - glGenFramebuffers(1, &pg->gl_framebuffer); - glBindFramebuffer(GL_FRAMEBUFFER, pg->gl_framebuffer); - - /* need a valid framebuffer to start with */ - glGenTextures(1, &pg->gl_color_buffer); - glBindTexture(GL_TEXTURE_2D, pg->gl_color_buffer); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 640, 480, - 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - GL_TEXTURE_2D, pg->gl_color_buffer, 0); - - assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) - == GL_FRAMEBUFFER_COMPLETE); - - //glPolygonMode( GL_FRONT_AND_BACK, GL_LINE ); - - pg->texture_cache = g_lru_cache_new( - texture_key_hash, texture_key_equal, - NULL, texture_key_retrieve, - texture_key_destroy, texture_binding_destroy, - NULL, NULL); - g_lru_cache_set_max_size(pg->texture_cache, 512); - - pg->shader_cache = g_hash_table_new(shader_hash, shader_equal); - - - for (i=0; ivertex_attributes[i].gl_converted_buffer); - glGenBuffers(1, &pg->vertex_attributes[i].gl_inline_buffer); - } - glGenBuffers(1, &pg->gl_inline_array_buffer); - glGenBuffers(1, &pg->gl_element_buffer); - - glGenBuffers(1, &pg->gl_memory_buffer); - glBindBuffer(GL_ARRAY_BUFFER, pg->gl_memory_buffer); - glBufferData(GL_ARRAY_BUFFER, - memory_region_size(d->vram), - NULL, - GL_DYNAMIC_DRAW); - - glGenVertexArrays(1, &pg->gl_vertex_array); - glBindVertexArray(pg->gl_vertex_array); - - assert(glGetError() == GL_NO_ERROR); - - glo_set_current(NULL); -} - -static void pgraph_destroy(PGRAPHState *pg) -{ - qemu_mutex_destroy(&pg->lock); - qemu_cond_destroy(&pg->interrupt_cond); - qemu_cond_destroy(&pg->fifo_access_cond); - qemu_cond_destroy(&pg->flip_3d); - - glo_set_current(pg->gl_context); - - if (pg->gl_color_buffer) { - glDeleteTextures(1, &pg->gl_color_buffer); - } - if (pg->gl_zeta_buffer) { - glDeleteTextures(1, &pg->gl_zeta_buffer); - } - glDeleteFramebuffers(1, &pg->gl_framebuffer); - - // TODO: clear out shader cached - // TODO: clear out texture cache - - glo_set_current(NULL); - - glo_context_destroy(pg->gl_context); -} - -static unsigned int kelvin_map_stencil_op(uint32_t parameter) -{ - unsigned int op; - switch (parameter) { - case NV097_SET_STENCIL_OP_V_KEEP: - op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_KEEP; break; - case NV097_SET_STENCIL_OP_V_ZERO: - op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_ZERO; break; - case NV097_SET_STENCIL_OP_V_REPLACE: - op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_REPLACE; break; - case NV097_SET_STENCIL_OP_V_INCRSAT: - op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INCRSAT; break; - case NV097_SET_STENCIL_OP_V_DECRSAT: - op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_DECRSAT; break; - case NV097_SET_STENCIL_OP_V_INVERT: - op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INVERT; break; - case NV097_SET_STENCIL_OP_V_INCR: - op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INCR; break; - case NV097_SET_STENCIL_OP_V_DECR: - op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_DECR; break; + break; default: - assert(false); + d->pgraph.regs[addr] = val; break; } - return op; -} -static unsigned int kelvin_map_polygon_mode(uint32_t parameter) -{ - unsigned int mode; - switch (parameter) { - case NV097_SET_FRONT_POLYGON_MODE_V_POINT: - mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_POINT; break; - case NV097_SET_FRONT_POLYGON_MODE_V_LINE: - mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_LINE; break; - case NV097_SET_FRONT_POLYGON_MODE_V_FILL: - mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_FILL; break; - default: - assert(false); - break; - } - return mode; -} - -static unsigned int kelvin_map_texgen(uint32_t parameter, unsigned int channel) -{ - assert(channel < 4); - unsigned int texgen; - switch (parameter) { - case NV097_SET_TEXGEN_S_DISABLE: - texgen = NV_PGRAPH_CSV1_A_T0_S_DISABLE; break; - case NV097_SET_TEXGEN_S_EYE_LINEAR: - texgen = NV_PGRAPH_CSV1_A_T0_S_EYE_LINEAR; break; - case NV097_SET_TEXGEN_S_OBJECT_LINEAR: - texgen = NV_PGRAPH_CSV1_A_T0_S_OBJECT_LINEAR; break; - case NV097_SET_TEXGEN_S_SPHERE_MAP: - assert(channel < 2); - texgen = NV_PGRAPH_CSV1_A_T0_S_SPHERE_MAP; break; - case NV097_SET_TEXGEN_S_REFLECTION_MAP: - assert(channel < 3); - texgen = NV_PGRAPH_CSV1_A_T0_S_REFLECTION_MAP; break; - case NV097_SET_TEXGEN_S_NORMAL_MAP: - assert(channel < 3); - texgen = NV_PGRAPH_CSV1_A_T0_S_NORMAL_MAP; break; - default: - assert(false); - break; - } - return texgen; -} - -static void pgraph_allocate_inline_buffer_vertices(PGRAPHState *pg, - unsigned int attr) -{ - int i; - VertexAttribute *attribute = &pg->vertex_attributes[attr]; - - if (attribute->inline_buffer || pg->inline_buffer_length == 0) { - return; - } - - /* Now upload the previous attribute value */ - attribute->inline_buffer = g_malloc(NV2A_MAX_BATCH_LENGTH - * sizeof(float) * 4); - for (i = 0; i < pg->inline_buffer_length; i++) { - memcpy(&attribute->inline_buffer[i * 4], - attribute->inline_value, - sizeof(float) * 4); - } -} - -static void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg) -{ - int i; - - assert(pg->inline_buffer_length < NV2A_MAX_BATCH_LENGTH); - - for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { - VertexAttribute *attribute = &pg->vertex_attributes[i]; - if (attribute->inline_buffer) { - memcpy(&attribute->inline_buffer[ - pg->inline_buffer_length * 4], - attribute->inline_value, - sizeof(float) * 4); - } - } - - pg->inline_buffer_length++; + qemu_mutex_unlock(&d->pgraph.lock); } static void pgraph_method(NV2AState *d, @@ -2598,65 +446,39 @@ static void pgraph_method(NV2AState *d, uint32_t parameter) { int i; + GraphicsSubchannel *subchannel_data; + GraphicsObject *object; + unsigned int slot; PGRAPHState *pg = &d->pgraph; - bool channel_valid = - d->pgraph.regs[NV_PGRAPH_CTX_CONTROL] & NV_PGRAPH_CTX_CONTROL_CHID; - assert(channel_valid); + assert(pg->channel_valid); + subchannel_data = &pg->subchannel_data[subchannel]; + object = &subchannel_data->object; - unsigned channel_id = GET_MASK(pg->regs[NV_PGRAPH_CTX_USER], NV_PGRAPH_CTX_USER_CHID); + ContextSurfaces2DState *context_surfaces_2d + = &object->data.context_surfaces_2d; + ImageBlitState *image_blit = &object->data.image_blit; + KelvinState *kelvin = &object->data.kelvin; - ContextSurfaces2DState *context_surfaces_2d = &pg->context_surfaces_2d; - ImageBlitState *image_blit = &pg->image_blit; - KelvinState *kelvin = &pg->kelvin; - assert(subchannel < 8); + + pgraph_method_log(subchannel, object->graphics_class, method, parameter); if (method == NV_SET_OBJECT) { - assert(parameter < memory_region_size(&d->ramin)); - uint8_t *obj_ptr = d->ramin_ptr + parameter; + subchannel_data->object_instance = parameter; - uint32_t ctx_1 = ldl_le_p((uint32_t*)obj_ptr); - uint32_t ctx_2 = ldl_le_p((uint32_t*)(obj_ptr+4)); - uint32_t ctx_3 = ldl_le_p((uint32_t*)(obj_ptr+8)); - uint32_t ctx_4 = ldl_le_p((uint32_t*)(obj_ptr+12)); - uint32_t ctx_5 = parameter; - - pg->regs[NV_PGRAPH_CTX_CACHE1 + subchannel * 4] = ctx_1; - pg->regs[NV_PGRAPH_CTX_CACHE2 + subchannel * 4] = ctx_2; - pg->regs[NV_PGRAPH_CTX_CACHE3 + subchannel * 4] = ctx_3; - pg->regs[NV_PGRAPH_CTX_CACHE4 + subchannel * 4] = ctx_4; - pg->regs[NV_PGRAPH_CTX_CACHE5 + subchannel * 4] = ctx_5; - } - - // is this right? - pg->regs[NV_PGRAPH_CTX_SWITCH1] = pg->regs[NV_PGRAPH_CTX_CACHE1 + subchannel * 4]; - pg->regs[NV_PGRAPH_CTX_SWITCH2] = pg->regs[NV_PGRAPH_CTX_CACHE2 + subchannel * 4]; - pg->regs[NV_PGRAPH_CTX_SWITCH3] = pg->regs[NV_PGRAPH_CTX_CACHE3 + subchannel * 4]; - pg->regs[NV_PGRAPH_CTX_SWITCH4] = pg->regs[NV_PGRAPH_CTX_CACHE4 + subchannel * 4]; - pg->regs[NV_PGRAPH_CTX_SWITCH5] = pg->regs[NV_PGRAPH_CTX_CACHE5 + subchannel * 4]; - - uint32_t graphics_class = GET_MASK(pg->regs[NV_PGRAPH_CTX_SWITCH1], - NV_PGRAPH_CTX_SWITCH1_GRCLASS); - - // NV2A_DPRINTF("graphics_class %d 0x%x\n", subchannel, graphics_class); - pgraph_method_log(subchannel, graphics_class, method, parameter); - - if (subchannel != 0) { - // catches context switching issues on xbox d3d - assert(graphics_class != 0x97); + //qemu_mutex_lock_iothread(); + load_graphics_object(d, parameter, object); + //qemu_mutex_unlock_iothread(); + return; } /* ugly switch for now */ - switch (graphics_class) { + switch (object->graphics_class) { case NV_CONTEXT_SURFACES_2D: { switch (method) { - case NV062_SET_OBJECT: - context_surfaces_2d->object_instance = parameter; - break; - case NV062_SET_CONTEXT_DMA_IMAGE_SOURCE: context_surfaces_2d->dma_image_source = parameter; break; @@ -2679,10 +501,6 @@ static void pgraph_method(NV2AState *d, } break; } case NV_IMAGE_BLIT: { switch (method) { - case NV09F_SET_OBJECT: - image_blit->object_instance = parameter; - break; - case NV09F_SET_CONTEXT_SURFACES: image_blit->context_surfaces = parameter; break; @@ -2706,9 +524,14 @@ static void pgraph_method(NV2AState *d, NV2A_GL_DPRINTF(true, "NV09F_SET_OPERATION_SRCCOPY"); - ContextSurfaces2DState *context_surfaces = context_surfaces_2d; - assert(context_surfaces->object_instance - == image_blit->context_surfaces); + GraphicsObject *context_surfaces_obj = + lookup_graphics_object(pg, image_blit->context_surfaces); + assert(context_surfaces_obj); + assert(context_surfaces_obj->graphics_class + == NV_CONTEXT_SURFACES_2D); + + ContextSurfaces2DState *context_surfaces = + &context_surfaces_obj->data.context_surfaces_2d; unsigned int bytes_per_pixel; switch (context_surfaces->color_format) { @@ -2730,13 +553,13 @@ static void pgraph_method(NV2AState *d, hwaddr source_dma_len, dest_dma_len; uint8_t *source, *dest; - source = nv_dma_map(d, context_surfaces->dma_image_source, - &source_dma_len); + source = (uint8_t*)nv_dma_map(d, context_surfaces->dma_image_source, + &source_dma_len); assert(context_surfaces->source_offset < source_dma_len); source += context_surfaces->source_offset; - dest = nv_dma_map(d, context_surfaces->dma_image_dest, - &dest_dma_len); + dest = (uint8_t*)nv_dma_map(d, context_surfaces->dma_image_dest, + &dest_dma_len); assert(context_surfaces->dest_offset < dest_dma_len); dest += context_surfaces->dest_offset; @@ -2766,10 +589,6 @@ static void pgraph_method(NV2AState *d, case NV_KELVIN_PRIMITIVE: { switch (method) { - case NV097_SET_OBJECT: - kelvin->object_instance = parameter; - break; - case NV097_NO_OPERATION: /* The bios uses nop as a software method call - * it seems to expect a notify interrupt if the parameter isn't 0. @@ -2780,14 +599,12 @@ static void pgraph_method(NV2AState *d, if (parameter != 0) { assert(!(pg->pending_interrupts & NV_PGRAPH_INTR_ERROR)); - SET_MASK(pg->regs[NV_PGRAPH_TRAPPED_ADDR], - NV_PGRAPH_TRAPPED_ADDR_CHID, channel_id); - SET_MASK(pg->regs[NV_PGRAPH_TRAPPED_ADDR], - NV_PGRAPH_TRAPPED_ADDR_SUBCH, subchannel); - SET_MASK(pg->regs[NV_PGRAPH_TRAPPED_ADDR], - NV_PGRAPH_TRAPPED_ADDR_MTHD, method); - pg->regs[NV_PGRAPH_TRAPPED_DATA_LOW] = parameter; - pg->regs[NV_PGRAPH_NSOURCE] = NV_PGRAPH_NSOURCE_NOTIFICATION; /* TODO: check this */ + + pg->trapped_channel_id = pg->channel_id; + pg->trapped_subchannel = subchannel; + pg->trapped_method = method; + pg->trapped_data[0] = parameter; + pg->notify_source = NV_PGRAPH_NSOURCE_NOTIFICATION; /* TODO: check this */ pg->pending_interrupts |= NV_PGRAPH_INTR_ERROR; qemu_mutex_unlock(&pg->lock); @@ -2840,6 +657,18 @@ static void pgraph_method(NV2AState *d, break; } case NV097_FLIP_STALL: +#if 0 + // HACK HACK HACK + glBindFramebuffer(GL_READ_FRAMEBUFFER, pg->gl_framebuffer); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + glBlitFramebuffer(0, 0, 640, 480, 0, 0, 640, 480, GL_COLOR_BUFFER_BIT, GL_NEAREST); + SDL_GL_SwapWindow(d->sdl_window); // ugh + assert(glGetError() == GL_NO_ERROR); + glBindFramebuffer(GL_READ_FRAMEBUFFER, pg->gl_framebuffer); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, pg->gl_framebuffer); + glBindFramebuffer(GL_FRAMEBUFFER, pg->gl_framebuffer); + // HACK HACK HACK +#endif pgraph_update_surface(d, false, true, true); while (true) { @@ -2858,9 +687,8 @@ static void pgraph_method(NV2AState *d, NV2A_DPRINTF("flip stall done\n"); break; - // TODO: these should be loading the dma objects from ramin here? case NV097_SET_CONTEXT_DMA_NOTIFIES: - pg->dma_notifies = parameter; + kelvin->dma_notifies = parameter; break; case NV097_SET_CONTEXT_DMA_A: pg->dma_a = parameter; @@ -2869,7 +697,7 @@ static void pgraph_method(NV2AState *d, pg->dma_b = parameter; break; case NV097_SET_CONTEXT_DMA_STATE: - pg->dma_state = parameter; + kelvin->dma_state = parameter; break; case NV097_SET_CONTEXT_DMA_COLOR: /* try to get any straggling draws in before the surface's changed :/ */ @@ -2887,7 +715,7 @@ static void pgraph_method(NV2AState *d, pg->dma_vertex_b = parameter; break; case NV097_SET_CONTEXT_DMA_SEMAPHORE: - pg->dma_semaphore = parameter; + kelvin->dma_semaphore = parameter; break; case NV097_SET_CONTEXT_DMA_REPORT: pg->dma_report = parameter; @@ -3847,8 +1675,8 @@ static void pgraph_method(NV2AState *d, pg->gl_zpass_pixel_count_query_count = 0; hwaddr report_dma_len; - uint8_t *report_data = nv_dma_map(d, pg->dma_report, - &report_dma_len); + uint8_t *report_data = (uint8_t*)nv_dma_map(d, pg->dma_report, + &report_dma_len); assert(offset < report_dma_len); report_data += offset; @@ -4147,7 +1975,7 @@ static void pgraph_method(NV2AState *d, GLuint gl_query; glGenQueries(1, &gl_query); pg->gl_zpass_pixel_count_query_count++; - pg->gl_zpass_pixel_count_queries = g_realloc( + pg->gl_zpass_pixel_count_queries = (GLuint*)g_realloc( pg->gl_zpass_pixel_count_queries, sizeof(GLuint) * pg->gl_zpass_pixel_count_query_count); pg->gl_zpass_pixel_count_queries[ @@ -4398,7 +2226,7 @@ static void pgraph_method(NV2AState *d, } case NV097_SET_SEMAPHORE_OFFSET: - pg->regs[NV_PGRAPH_SEMAPHOREOFFSET] = parameter; + kelvin->semaphore_offset = parameter; break; case NV097_BACK_END_WRITE_SEMAPHORE_RELEASE: { @@ -4407,13 +2235,11 @@ static void pgraph_method(NV2AState *d, //qemu_mutex_unlock(&d->pgraph.lock); //qemu_mutex_lock_iothread(); - uint32_t semaphore_offset = pg->regs[NV_PGRAPH_SEMAPHOREOFFSET]; - hwaddr semaphore_dma_len; - uint8_t *semaphore_data = nv_dma_map(d, pg->dma_semaphore, - &semaphore_dma_len); - assert(semaphore_offset < semaphore_dma_len); - semaphore_data += semaphore_offset; + uint8_t *semaphore_data = (uint8_t*)nv_dma_map(d, kelvin->dma_semaphore, + &semaphore_dma_len); + assert(kelvin->semaphore_offset < semaphore_dma_len); + semaphore_data += kelvin->semaphore_offset; stl_le_p((uint32_t*)semaphore_data, parameter); @@ -4675,13 +2501,13 @@ static void pgraph_method(NV2AState *d, default: NV2A_GL_DPRINTF(true, " unhandled (0x%02x 0x%08x)", - graphics_class, method); + object->graphics_class, method); break; } break; } default: NV2A_GL_DPRINTF(true, " unhandled (0x%02x 0x%08x)", - graphics_class, method); + object->graphics_class, method); break; } @@ -4690,20 +2516,13 @@ static void pgraph_method(NV2AState *d, static void pgraph_context_switch(NV2AState *d, unsigned int channel_id) { - bool channel_valid = - d->pgraph.regs[NV_PGRAPH_CTX_CONTROL] & NV_PGRAPH_CTX_CONTROL_CHID; - unsigned pgraph_channel_id = GET_MASK(d->pgraph.regs[NV_PGRAPH_CTX_USER], NV_PGRAPH_CTX_USER_CHID); - - bool valid = channel_valid && pgraph_channel_id == channel_id; + bool valid; + valid = d->pgraph.channel_valid && d->pgraph.channel_id == channel_id; if (!valid) { - SET_MASK(d->pgraph.regs[NV_PGRAPH_TRAPPED_ADDR], - NV_PGRAPH_TRAPPED_ADDR_CHID, channel_id); - - NV2A_DPRINTF("pgraph switching to ch %d\n", channel_id); - - /* TODO: hardware context switching */ - assert(!(d->pgraph.regs[NV_PGRAPH_DEBUG_3] - & NV_PGRAPH_DEBUG_3_HW_CONTEXT_SWITCH)); + d->pgraph.trapped_channel_id = channel_id; + } + if (!valid) { + NV2A_DPRINTF("puller needs to switch to ch %d\n", channel_id); qemu_mutex_unlock(&d->pgraph.lock); qemu_mutex_lock_iothread(); @@ -4713,7 +2532,6 @@ static void pgraph_context_switch(NV2AState *d, unsigned int channel_id) qemu_mutex_lock(&d->pgraph.lock); qemu_mutex_unlock_iothread(); - // wait for the interrupt to be serviced while (d->pgraph.pending_interrupts & NV_PGRAPH_INTR_CONTEXT_SWITCH) { qemu_cond_wait(&d->pgraph.interrupt_cond, &d->pgraph.lock); } @@ -4721,1409 +2539,13 @@ static void pgraph_context_switch(NV2AState *d, unsigned int channel_id) } static void pgraph_wait_fifo_access(NV2AState *d) { - while (!(d->pgraph.regs[NV_PGRAPH_FIFO] & NV_PGRAPH_FIFO_ACCESS)) { + while (!d->pgraph.fifo_access) { qemu_cond_wait(&d->pgraph.fifo_access_cond, &d->pgraph.lock); } } -static void pfifo_run_puller(NV2AState *d) -{ - uint32_t *pull0 = &d->pfifo.regs[NV_PFIFO_CACHE1_PULL0]; - uint32_t *pull1 = &d->pfifo.regs[NV_PFIFO_CACHE1_PULL1]; - uint32_t *engine_reg = &d->pfifo.regs[NV_PFIFO_CACHE1_ENGINE]; +// static const char* nv2a_method_names[] = {}; - uint32_t *status = &d->pfifo.regs[NV_PFIFO_CACHE1_STATUS]; - uint32_t *get_reg = &d->pfifo.regs[NV_PFIFO_CACHE1_GET]; - uint32_t *put_reg = &d->pfifo.regs[NV_PFIFO_CACHE1_PUT]; - - // TODO - // CacheEntry working_cache[NV2A_CACHE1_SIZE]; - // int working_cache_size = 0; - // pull everything into our own queue - - // TODO think more about locking - - while (true) { - if (!GET_MASK(*pull0, NV_PFIFO_CACHE1_PULL0_ACCESS)) return; - - /* empty cache1 */ - if (*status & NV_PFIFO_CACHE1_STATUS_LOW_MARK) break; - - uint32_t get = *get_reg; - uint32_t put = *put_reg; - - assert(get < 128*4 && (get % 4) == 0); - uint32_t method_entry = d->pfifo.regs[NV_PFIFO_CACHE1_METHOD + get*2]; - uint32_t parameter = d->pfifo.regs[NV_PFIFO_CACHE1_DATA + get*2]; - - uint32_t new_get = (get+4) & 0x1fc; - *get_reg = new_get; - - if (new_get == put) { - // set low mark - *status |= NV_PFIFO_CACHE1_STATUS_LOW_MARK; - } - if (*status & NV_PFIFO_CACHE1_STATUS_HIGH_MARK) { - // unset high mark - *status &= ~NV_PFIFO_CACHE1_STATUS_HIGH_MARK; - // signal pusher - qemu_cond_signal(&d->pfifo.pusher_cond); - } - - - uint32_t method = method_entry & 0x1FFC; - uint32_t subchannel = GET_MASK(method_entry, NV_PFIFO_CACHE1_METHOD_SUBCHANNEL); - - // NV2A_DPRINTF("pull %d 0x%x 0x%x - subch %d\n", get/4, method_entry, parameter, subchannel); - - if (method == 0) { - RAMHTEntry entry = ramht_lookup(d, parameter); - assert(entry.valid); - - // assert(entry.channel_id == state->channel_id); - - assert(entry.engine == ENGINE_GRAPHICS); - - - /* the engine is bound to the subchannel */ - assert(subchannel < 8); - SET_MASK(*engine_reg, 3 << (4*subchannel), entry.engine); - SET_MASK(*pull1, NV_PFIFO_CACHE1_PULL1_ENGINE, entry.engine); - // NV2A_DPRINTF("engine_reg1 %d 0x%x\n", subchannel, *engine_reg); - - - // TODO: this is fucked - qemu_mutex_lock(&d->pgraph.lock); - //make pgraph busy - qemu_mutex_unlock(&d->pfifo.lock); - - pgraph_context_switch(d, entry.channel_id); - pgraph_wait_fifo_access(d); - pgraph_method(d, subchannel, 0, entry.instance); - - // make pgraph not busy - qemu_mutex_unlock(&d->pgraph.lock); - qemu_mutex_lock(&d->pfifo.lock); - - } else if (method >= 0x100) { - // method passed to engine - - /* methods that take objects. - * TODO: Check this range is correct for the nv2a */ - if (method >= 0x180 && method < 0x200) { - //qemu_mutex_lock_iothread(); - RAMHTEntry entry = ramht_lookup(d, parameter); - assert(entry.valid); - // assert(entry.channel_id == state->channel_id); - parameter = entry.instance; - //qemu_mutex_unlock_iothread(); - } - - enum FIFOEngine engine = GET_MASK(*engine_reg, 3 << (4*subchannel)); - // NV2A_DPRINTF("engine_reg2 %d 0x%x\n", subchannel, *engine_reg); - assert(engine == ENGINE_GRAPHICS); - SET_MASK(*pull1, NV_PFIFO_CACHE1_PULL1_ENGINE, engine); - - // TODO: this is fucked - qemu_mutex_lock(&d->pgraph.lock); - //make pgraph busy - qemu_mutex_unlock(&d->pfifo.lock); - - pgraph_wait_fifo_access(d); - pgraph_method(d, subchannel, method, parameter); - - // make pgraph not busy - qemu_mutex_unlock(&d->pgraph.lock); - qemu_mutex_lock(&d->pfifo.lock); - } else { - assert(false); - } - - } -} - -static void* pfifo_puller_thread(void *arg) -{ - NV2AState *d = arg; - - glo_set_current(d->pgraph.gl_context); - - qemu_mutex_lock(&d->pfifo.lock); - while (true) { - pfifo_run_puller(d); - qemu_cond_wait(&d->pfifo.puller_cond, &d->pfifo.lock); - - if (d->exiting) { - break; - } - } - qemu_mutex_unlock(&d->pfifo.lock); - - return NULL; -} - -static void pfifo_run_pusher(NV2AState *d) -{ - uint32_t *push0 = &d->pfifo.regs[NV_PFIFO_CACHE1_PUSH0]; - uint32_t *push1 = &d->pfifo.regs[NV_PFIFO_CACHE1_PUSH1]; - uint32_t *dma_subroutine = &d->pfifo.regs[NV_PFIFO_CACHE1_DMA_SUBROUTINE]; - uint32_t *dma_state = &d->pfifo.regs[NV_PFIFO_CACHE1_DMA_STATE]; - uint32_t *dma_push = &d->pfifo.regs[NV_PFIFO_CACHE1_DMA_PUSH]; - uint32_t *dma_get = &d->pfifo.regs[NV_PFIFO_CACHE1_DMA_GET]; - uint32_t *dma_put = &d->pfifo.regs[NV_PFIFO_CACHE1_DMA_PUT]; - uint32_t *dma_dcount = &d->pfifo.regs[NV_PFIFO_CACHE1_DMA_DCOUNT]; - - uint32_t *status = &d->pfifo.regs[NV_PFIFO_CACHE1_STATUS]; - uint32_t *get_reg = &d->pfifo.regs[NV_PFIFO_CACHE1_GET]; - uint32_t *put_reg = &d->pfifo.regs[NV_PFIFO_CACHE1_PUT]; - - if (!GET_MASK(*push0, NV_PFIFO_CACHE1_PUSH0_ACCESS)) return; - if (!GET_MASK(*dma_push, NV_PFIFO_CACHE1_DMA_PUSH_ACCESS)) return; - - /* suspended */ - if (GET_MASK(*dma_push, NV_PFIFO_CACHE1_DMA_PUSH_STATUS)) return; - - // TODO: should we become busy here?? - // NV_PFIFO_CACHE1_DMA_PUSH_STATE _BUSY - - unsigned int channel_id = GET_MASK(*push1, - NV_PFIFO_CACHE1_PUSH1_CHID); - - - /* Channel running DMA mode */ - uint32_t channel_modes = d->pfifo.regs[NV_PFIFO_MODE]; - assert(channel_modes & (1 << channel_id)); - - assert(GET_MASK(*push1, NV_PFIFO_CACHE1_PUSH1_MODE) - == NV_PFIFO_CACHE1_PUSH1_MODE_DMA); - - /* We're running so there should be no pending errors... */ - assert(GET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_ERROR) - == NV_PFIFO_CACHE1_DMA_STATE_ERROR_NONE); - - hwaddr dma_instance = - GET_MASK(d->pfifo.regs[NV_PFIFO_CACHE1_DMA_INSTANCE], - NV_PFIFO_CACHE1_DMA_INSTANCE_ADDRESS) << 4; - - hwaddr dma_len; - uint8_t *dma = nv_dma_map(d, dma_instance, &dma_len); - - while (true) { - uint32_t dma_get_v = *dma_get; - uint32_t dma_put_v = *dma_put; - if (dma_get_v == dma_put_v) break; - if (dma_get_v >= dma_len) { - assert(false); - SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_ERROR, - NV_PFIFO_CACHE1_DMA_STATE_ERROR_PROTECTION); - break; - } - - uint32_t word = ldl_le_p((uint32_t*)(dma + dma_get_v)); - dma_get_v += 4; - - uint32_t method_type = - GET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_METHOD_TYPE); - uint32_t method_subchannel = - GET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_SUBCHANNEL); - uint32_t method = - GET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_METHOD) << 2; - uint32_t method_count = - GET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_METHOD_COUNT); - - uint32_t subroutine_state = - GET_MASK(*dma_subroutine, NV_PFIFO_CACHE1_DMA_SUBROUTINE_STATE); - - if (method_count) { - /* full */ - if (*status & NV_PFIFO_CACHE1_STATUS_HIGH_MARK) return; - - - /* data word of methods command */ - d->pfifo.regs[NV_PFIFO_CACHE1_DMA_DATA_SHADOW] = word; - - uint32_t put = *put_reg; - uint32_t get = *get_reg; - - assert((method & 3) == 0); - uint32_t method_entry = 0; - SET_MASK(method_entry, NV_PFIFO_CACHE1_METHOD_ADDRESS, method >> 2); - SET_MASK(method_entry, NV_PFIFO_CACHE1_METHOD_TYPE, method_type); - SET_MASK(method_entry, NV_PFIFO_CACHE1_METHOD_SUBCHANNEL, method_subchannel); - - // NV2A_DPRINTF("push %d 0x%x 0x%x - subch %d\n", put/4, method_entry, word, method_subchannel); - - assert(put < 128*4 && (put%4) == 0); - d->pfifo.regs[NV_PFIFO_CACHE1_METHOD + put*2] = method_entry; - d->pfifo.regs[NV_PFIFO_CACHE1_DATA + put*2] = word; - - uint32_t new_put = (put+4) & 0x1fc; - *put_reg = new_put; - if (new_put == get) { - // set high mark - *status |= NV_PFIFO_CACHE1_STATUS_HIGH_MARK; - } - if (*status & NV_PFIFO_CACHE1_STATUS_LOW_MARK) { - // unset low mark - *status &= ~NV_PFIFO_CACHE1_STATUS_LOW_MARK; - // signal puller - qemu_cond_signal(&d->pfifo.puller_cond); - } - - if (method_type == NV_PFIFO_CACHE1_DMA_STATE_METHOD_TYPE_INC) { - SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_METHOD, - (method + 4) >> 2); - } - SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_METHOD_COUNT, - method_count - 1); - (*dma_dcount)++; - } else { - /* no command active - this is the first word of a new one */ - d->pfifo.regs[NV_PFIFO_CACHE1_DMA_RSVD_SHADOW] = word; - - /* match all forms */ - if ((word & 0xe0000003) == 0x20000000) { - /* old jump */ - d->pfifo.regs[NV_PFIFO_CACHE1_DMA_GET_JMP_SHADOW] = - dma_get_v; - dma_get_v = word & 0x1fffffff; - NV2A_DPRINTF("pb OLD_JMP 0x%x\n", dma_get_v); - } else if ((word & 3) == 1) { - /* jump */ - d->pfifo.regs[NV_PFIFO_CACHE1_DMA_GET_JMP_SHADOW] = - dma_get_v; - dma_get_v = word & 0xfffffffc; - NV2A_DPRINTF("pb JMP 0x%x\n", dma_get_v); - } else if ((word & 3) == 2) { - /* call */ - if (subroutine_state) { - SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_ERROR, - NV_PFIFO_CACHE1_DMA_STATE_ERROR_CALL); - break; - } else { - *dma_subroutine = dma_get_v; - SET_MASK(*dma_subroutine, - NV_PFIFO_CACHE1_DMA_SUBROUTINE_STATE, 1); - dma_get_v = word & 0xfffffffc; - NV2A_DPRINTF("pb CALL 0x%x\n", dma_get_v); - } - } else if (word == 0x00020000) { - /* return */ - if (!subroutine_state) { - SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_ERROR, - NV_PFIFO_CACHE1_DMA_STATE_ERROR_RETURN); - // break; - } else { - dma_get_v = *dma_subroutine & 0xfffffffc; - SET_MASK(*dma_subroutine, - NV_PFIFO_CACHE1_DMA_SUBROUTINE_STATE, 0); - NV2A_DPRINTF("pb RET 0x%x\n", dma_get_v); - } - } else if ((word & 0xe0030003) == 0) { - /* increasing methods */ - SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_METHOD, - (word & 0x1fff) >> 2 ); - SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_SUBCHANNEL, - (word >> 13) & 7); - SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_METHOD_COUNT, - (word >> 18) & 0x7ff); - SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_METHOD_TYPE, - NV_PFIFO_CACHE1_DMA_STATE_METHOD_TYPE_INC); - *dma_dcount = 0; - } else if ((word & 0xe0030003) == 0x40000000) { - /* non-increasing methods */ - SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_METHOD, - (word & 0x1fff) >> 2 ); - SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_SUBCHANNEL, - (word >> 13) & 7); - SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_METHOD_COUNT, - (word >> 18) & 0x7ff); - SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_METHOD_TYPE, - NV_PFIFO_CACHE1_DMA_STATE_METHOD_TYPE_NON_INC); - *dma_dcount = 0; - } else { - NV2A_DPRINTF("pb reserved cmd 0x%x - 0x%x\n", - dma_get_v, word); - SET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_ERROR, - NV_PFIFO_CACHE1_DMA_STATE_ERROR_RESERVED_CMD); - // break; - assert(false); - } - } - - *dma_get = dma_get_v; - - if (GET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_ERROR)) { - break; - } - } - - // NV2A_DPRINTF("DMA pusher done: max 0x%" HWADDR_PRIx ", 0x%" HWADDR_PRIx " - 0x%" HWADDR_PRIx "\n", - // dma_len, control->dma_get, control->dma_put); - - uint32_t error = GET_MASK(*dma_state, NV_PFIFO_CACHE1_DMA_STATE_ERROR); - if (error) { - NV2A_DPRINTF("pb error: %d\n", error); - assert(false); - - SET_MASK(*dma_push, NV_PFIFO_CACHE1_DMA_PUSH_STATUS, 1); /* suspended */ - - // d->pfifo.pending_interrupts |= NV_PFIFO_INTR_0_DMA_PUSHER; - // update_irq(d); - } -} - -static void* pfifo_pusher_thread(void *arg) -{ - NV2AState *d = arg; - - qemu_mutex_lock(&d->pfifo.lock); - while (true) { - pfifo_run_pusher(d); - qemu_cond_wait(&d->pfifo.pusher_cond, &d->pfifo.lock); - - if (d->exiting) { - break; - } - } - qemu_mutex_unlock(&d->pfifo.lock); - - return NULL; -} - - - - - -/* PMC - card master control */ -static uint64_t pmc_read(void *opaque, - hwaddr addr, unsigned int size) -{ - NV2AState *d = opaque; - - uint64_t r = 0; - switch (addr) { - case NV_PMC_BOOT_0: - /* chipset and stepping: - * NV2A, A02, Rev 0 */ - - r = 0x02A000A2; - break; - case NV_PMC_INTR_0: - /* Shows which functional units have pending IRQ */ - r = d->pmc.pending_interrupts; - break; - case NV_PMC_INTR_EN_0: - /* Selects which functional units can cause IRQs */ - r = d->pmc.enabled_interrupts; - break; - default: - break; - } - - reg_log_read(NV_PMC, addr, r); - return r; -} -static void pmc_write(void *opaque, hwaddr addr, - uint64_t val, unsigned int size) -{ - NV2AState *d = opaque; - - reg_log_write(NV_PMC, addr, val); - - switch (addr) { - case NV_PMC_INTR_0: - /* the bits of the interrupts to clear are wrtten */ - d->pmc.pending_interrupts &= ~val; - update_irq(d); - break; - case NV_PMC_INTR_EN_0: - d->pmc.enabled_interrupts = val; - update_irq(d); - break; - default: - break; - } -} - - -/* PBUS - bus control */ -static uint64_t pbus_read(void *opaque, - hwaddr addr, unsigned int size) -{ - NV2AState *d = opaque; - - uint64_t r = 0; - switch (addr) { - case NV_PBUS_PCI_NV_0: - r = pci_get_long(d->dev.config + PCI_VENDOR_ID); - break; - case NV_PBUS_PCI_NV_1: - r = pci_get_long(d->dev.config + PCI_COMMAND); - break; - case NV_PBUS_PCI_NV_2: - r = pci_get_long(d->dev.config + PCI_CLASS_REVISION); - break; - default: - break; - } - - reg_log_read(NV_PBUS, addr, r); - return r; -} -static void pbus_write(void *opaque, hwaddr addr, - uint64_t val, unsigned int size) -{ - NV2AState *d = opaque; - - reg_log_write(NV_PBUS, addr, val); - - switch (addr) { - case NV_PBUS_PCI_NV_1: - pci_set_long(d->dev.config + PCI_COMMAND, val); - break; - default: - break; - } -} - - -/* PFIFO - MMIO and DMA FIFO submission to PGRAPH and VPE */ -static uint64_t pfifo_read(void *opaque, - hwaddr addr, unsigned int size) -{ - int i; - NV2AState *d = opaque; - - qemu_mutex_lock(&d->pfifo.lock); - - uint64_t r = 0; - switch (addr) { - case NV_PFIFO_INTR_0: - r = d->pfifo.pending_interrupts; - break; - case NV_PFIFO_INTR_EN_0: - r = d->pfifo.enabled_interrupts; - break; - case NV_PFIFO_RUNOUT_STATUS: - r = NV_PFIFO_RUNOUT_STATUS_LOW_MARK; /* low mark empty */ - break; - default: - r = d->pfifo.regs[addr]; - break; - } - - qemu_mutex_unlock(&d->pfifo.lock); - - reg_log_read(NV_PFIFO, addr, r); - return r; -} -static void pfifo_write(void *opaque, hwaddr addr, - uint64_t val, unsigned int size) -{ - int i; - NV2AState *d = opaque; - - reg_log_write(NV_PFIFO, addr, val); - - qemu_mutex_lock(&d->pfifo.lock); - - switch (addr) { - case NV_PFIFO_INTR_0: - d->pfifo.pending_interrupts &= ~val; - update_irq(d); - break; - case NV_PFIFO_INTR_EN_0: - d->pfifo.enabled_interrupts = val; - update_irq(d); - break; - default: - d->pfifo.regs[addr] = val; - break; - } - - qemu_cond_broadcast(&d->pfifo.pusher_cond); - qemu_cond_broadcast(&d->pfifo.puller_cond); - - qemu_mutex_unlock(&d->pfifo.lock); -} - - -static uint64_t prma_read(void *opaque, - hwaddr addr, unsigned int size) -{ - reg_log_read(NV_PRMA, addr, 0); - return 0; -} -static void prma_write(void *opaque, hwaddr addr, - uint64_t val, unsigned int size) -{ - reg_log_write(NV_PRMA, addr, val); -} - - -static void pvideo_vga_invalidate(NV2AState *d) -{ - int y1 = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], - NV_PVIDEO_POINT_OUT_Y); - int y2 = y1 + GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], - NV_PVIDEO_SIZE_OUT_HEIGHT); - NV2A_DPRINTF("pvideo_vga_invalidate %d %d\n", y1, y2); - vga_invalidate_scanlines(&d->vga, y1, y2); -} - -static uint64_t pvideo_read(void *opaque, - hwaddr addr, unsigned int size) -{ - NV2AState *d = opaque; - - uint64_t r = 0; - switch (addr) { - case NV_PVIDEO_STOP: - r = 0; - break; - default: - r = d->pvideo.regs[addr]; - break; - } - - reg_log_read(NV_PVIDEO, addr, r); - return r; -} -static void pvideo_write(void *opaque, hwaddr addr, - uint64_t val, unsigned int size) -{ - NV2AState *d = opaque; - - reg_log_write(NV_PVIDEO, addr, val); - - switch (addr) { - case NV_PVIDEO_BUFFER: - d->pvideo.regs[addr] = val; - d->vga.enable_overlay = true; - pvideo_vga_invalidate(d); - break; - case NV_PVIDEO_STOP: - d->pvideo.regs[NV_PVIDEO_BUFFER] = 0; - d->vga.enable_overlay = false; - pvideo_vga_invalidate(d); - break; - default: - d->pvideo.regs[addr] = val; - break; - } -} - - - - -/* PIMTER - time measurement and time-based alarms */ -static uint64_t ptimer_get_clock(NV2AState *d) -{ - return muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), - d->pramdac.core_clock_freq * d->ptimer.numerator, - get_ticks_per_sec() * d->ptimer.denominator); -} -static uint64_t ptimer_read(void *opaque, - hwaddr addr, unsigned int size) -{ - NV2AState *d = opaque; - - uint64_t r = 0; - switch (addr) { - case NV_PTIMER_INTR_0: - r = d->ptimer.pending_interrupts; - break; - case NV_PTIMER_INTR_EN_0: - r = d->ptimer.enabled_interrupts; - break; - case NV_PTIMER_NUMERATOR: - r = d->ptimer.numerator; - break; - case NV_PTIMER_DENOMINATOR: - r = d->ptimer.denominator; - break; - case NV_PTIMER_TIME_0: - r = (ptimer_get_clock(d) & 0x7ffffff) << 5; - break; - case NV_PTIMER_TIME_1: - r = (ptimer_get_clock(d) >> 27) & 0x1fffffff; - break; - default: - break; - } - - reg_log_read(NV_PTIMER, addr, r); - return r; -} -static void ptimer_write(void *opaque, hwaddr addr, - uint64_t val, unsigned int size) -{ - NV2AState *d = opaque; - - reg_log_write(NV_PTIMER, addr, val); - - switch (addr) { - case NV_PTIMER_INTR_0: - d->ptimer.pending_interrupts &= ~val; - update_irq(d); - break; - case NV_PTIMER_INTR_EN_0: - d->ptimer.enabled_interrupts = val; - update_irq(d); - break; - case NV_PTIMER_DENOMINATOR: - d->ptimer.denominator = val; - break; - case NV_PTIMER_NUMERATOR: - d->ptimer.numerator = val; - break; - case NV_PTIMER_ALARM_0: - d->ptimer.alarm_time = val; - break; - default: - break; - } -} - - -static uint64_t pcounter_read(void *opaque, - hwaddr addr, unsigned int size) -{ - reg_log_read(NV_PCOUNTER, addr, 0); - return 0; -} -static void pcounter_write(void *opaque, hwaddr addr, - uint64_t val, unsigned int size) -{ - reg_log_write(NV_PCOUNTER, addr, val); -} - - -static uint64_t pvpe_read(void *opaque, - hwaddr addr, unsigned int size) -{ - reg_log_read(NV_PVPE, addr, 0); - return 0; -} -static void pvpe_write(void *opaque, hwaddr addr, - uint64_t val, unsigned int size) -{ - reg_log_write(NV_PVPE, addr, val); -} - - -static uint64_t ptv_read(void *opaque, - hwaddr addr, unsigned int size) -{ - reg_log_read(NV_PTV, addr, 0); - return 0; -} -static void ptv_write(void *opaque, hwaddr addr, - uint64_t val, unsigned int size) -{ - reg_log_write(NV_PTV, addr, val); -} - - -static uint64_t prmfb_read(void *opaque, - hwaddr addr, unsigned int size) -{ - reg_log_read(NV_PRMFB, addr, 0); - return 0; -} -static void prmfb_write(void *opaque, hwaddr addr, - uint64_t val, unsigned int size) -{ - reg_log_write(NV_PRMFB, addr, val); -} - - -/* PRMVIO - aliases VGA sequencer and graphics controller registers */ -static uint64_t prmvio_read(void *opaque, - hwaddr addr, unsigned int size) -{ - NV2AState *d = opaque; - uint64_t r = vga_ioport_read(&d->vga, addr); - - reg_log_read(NV_PRMVIO, addr, r); - return r; -} -static void prmvio_write(void *opaque, hwaddr addr, - uint64_t val, unsigned int size) -{ - NV2AState *d = opaque; - - reg_log_write(NV_PRMVIO, addr, val); - - vga_ioport_write(&d->vga, addr, val); -} - - -static uint64_t pfb_read(void *opaque, - hwaddr addr, unsigned int size) -{ - NV2AState *d = opaque; - - uint64_t r = 0; - switch (addr) { - case NV_PFB_CFG0: - /* 3-4 memory partitions. The debug bios checks this. */ - r = 3; - break; - case NV_PFB_CSTATUS: - r = memory_region_size(d->vram); - break; - case NV_PFB_WBC: - r = 0; /* Flush not pending. */ - break; - default: - r = d->pfb.regs[addr]; - break; - } - - reg_log_read(NV_PFB, addr, r); - return r; -} -static void pfb_write(void *opaque, hwaddr addr, - uint64_t val, unsigned int size) -{ - NV2AState *d = opaque; - - reg_log_write(NV_PFB, addr, val); - - switch (addr) { - default: - d->pfb.regs[addr] = val; - break; - } -} - - -static uint64_t pstraps_read(void *opaque, - hwaddr addr, unsigned int size) -{ - reg_log_read(NV_PSTRAPS, addr, 0); - return 0; -} -static void pstraps_write(void *opaque, hwaddr addr, - uint64_t val, unsigned int size) -{ - reg_log_write(NV_PSTRAPS, addr, val); -} - -/* PGRAPH - accelerated 2d/3d drawing engine */ -static uint64_t pgraph_read(void *opaque, - hwaddr addr, unsigned int size) -{ - NV2AState *d = opaque; - - qemu_mutex_lock(&d->pgraph.lock); - - uint64_t r = 0; - switch (addr) { - case NV_PGRAPH_INTR: - r = d->pgraph.pending_interrupts; - break; - case NV_PGRAPH_INTR_EN: - r = d->pgraph.enabled_interrupts; - break; - default: - r = d->pgraph.regs[addr]; - break; - } - - qemu_mutex_unlock(&d->pgraph.lock); - - reg_log_read(NV_PGRAPH, addr, r); - return r; -} -static void pgraph_write(void *opaque, hwaddr addr, - uint64_t val, unsigned int size) -{ - NV2AState *d = opaque; - - reg_log_write(NV_PGRAPH, addr, val); - - qemu_mutex_lock(&d->pgraph.lock); - - switch (addr) { - case NV_PGRAPH_INTR: - d->pgraph.pending_interrupts &= ~val; - qemu_cond_broadcast(&d->pgraph.interrupt_cond); - break; - case NV_PGRAPH_INTR_EN: - d->pgraph.enabled_interrupts = val; - break; - case NV_PGRAPH_INCREMENT: - if (val & NV_PGRAPH_INCREMENT_READ_3D) { - SET_MASK(d->pgraph.regs[NV_PGRAPH_SURFACE], - NV_PGRAPH_SURFACE_READ_3D, - (GET_MASK(d->pgraph.regs[NV_PGRAPH_SURFACE], - NV_PGRAPH_SURFACE_READ_3D)+1) - % GET_MASK(d->pgraph.regs[NV_PGRAPH_SURFACE], - NV_PGRAPH_SURFACE_MODULO_3D) ); - qemu_cond_broadcast(&d->pgraph.flip_3d); - } - break; - case NV_PGRAPH_CHANNEL_CTX_TRIGGER: { - hwaddr context_address = - GET_MASK(d->pgraph.regs[NV_PGRAPH_CHANNEL_CTX_POINTER], NV_PGRAPH_CHANNEL_CTX_POINTER_INST) << 4; - - if (val & NV_PGRAPH_CHANNEL_CTX_TRIGGER_READ_IN) { - unsigned pgraph_channel_id = - GET_MASK(d->pgraph.regs[NV_PGRAPH_CTX_USER], NV_PGRAPH_CTX_USER_CHID); - - NV2A_DPRINTF("PGRAPH: read channel %d context from %" HWADDR_PRIx "\n", - pgraph_channel_id, context_address); - - assert(context_address < memory_region_size(&d->ramin)); - - uint8_t *context_ptr = d->ramin_ptr + context_address; - uint32_t context_user = ldl_le_p((uint32_t*)context_ptr); - - NV2A_DPRINTF(" - CTX_USER = 0x%x\n", context_user); - - d->pgraph.regs[NV_PGRAPH_CTX_USER] = context_user; - // pgraph_set_context_user(d, context_user); - } - if (val & NV_PGRAPH_CHANNEL_CTX_TRIGGER_WRITE_OUT) { - /* do stuff ... */ - } - - break; - } - default: - d->pgraph.regs[addr] = val; - break; - } - - // events - switch (addr) { - case NV_PGRAPH_FIFO: - qemu_cond_broadcast(&d->pgraph.fifo_access_cond); - break; - } - - qemu_mutex_unlock(&d->pgraph.lock); -} - - -static uint64_t pcrtc_read(void *opaque, - hwaddr addr, unsigned int size) -{ - NV2AState *d = opaque; - - uint64_t r = 0; - switch (addr) { - case NV_PCRTC_INTR_0: - r = d->pcrtc.pending_interrupts; - break; - case NV_PCRTC_INTR_EN_0: - r = d->pcrtc.enabled_interrupts; - break; - case NV_PCRTC_START: - r = d->pcrtc.start; - break; - default: - break; - } - - reg_log_read(NV_PCRTC, addr, r); - return r; -} -static void pcrtc_write(void *opaque, hwaddr addr, - uint64_t val, unsigned int size) -{ - NV2AState *d = opaque; - - reg_log_write(NV_PCRTC, addr, val); - - switch (addr) { - case NV_PCRTC_INTR_0: - d->pcrtc.pending_interrupts &= ~val; - update_irq(d); - break; - case NV_PCRTC_INTR_EN_0: - d->pcrtc.enabled_interrupts = val; - update_irq(d); - break; - case NV_PCRTC_START: - val &= 0x07FFFFFF; - assert(val < memory_region_size(d->vram)); - d->pcrtc.start = val; - - NV2A_DPRINTF("PCRTC_START - %x %x %x %x\n", - d->vram_ptr[val+64], d->vram_ptr[val+64+1], - d->vram_ptr[val+64+2], d->vram_ptr[val+64+3]); - break; - default: - break; - } -} - - -/* PRMCIO - aliases VGA CRTC and attribute controller registers */ -static uint64_t prmcio_read(void *opaque, - hwaddr addr, unsigned int size) -{ - NV2AState *d = opaque; - uint64_t r = vga_ioport_read(&d->vga, addr); - - reg_log_read(NV_PRMCIO, addr, r); - return r; -} -static void prmcio_write(void *opaque, hwaddr addr, - uint64_t val, unsigned int size) -{ - NV2AState *d = opaque; - - reg_log_write(NV_PRMCIO, addr, val); - - switch (addr) { - case VGA_ATT_W: - /* Cromwell sets attrs without enabling VGA_AR_ENABLE_DISPLAY - * (which should result in a blank screen). - * Either nvidia's hardware is lenient or it is set through - * something else. The former seems more likely. - */ - if (d->vga.ar_flip_flop == 0) { - val |= VGA_AR_ENABLE_DISPLAY; - } - break; - default: - break; - } - - vga_ioport_write(&d->vga, addr, val); -} - - -static uint64_t pramdac_read(void *opaque, - hwaddr addr, unsigned int size) -{ - NV2AState *d = opaque; - - uint64_t r = 0; - switch (addr & ~3) { - case NV_PRAMDAC_NVPLL_COEFF: - r = d->pramdac.core_clock_coeff; - break; - case NV_PRAMDAC_MPLL_COEFF: - r = d->pramdac.memory_clock_coeff; - break; - case NV_PRAMDAC_VPLL_COEFF: - r = d->pramdac.video_clock_coeff; - break; - case NV_PRAMDAC_PLL_TEST_COUNTER: - /* emulated PLLs locked instantly? */ - r = NV_PRAMDAC_PLL_TEST_COUNTER_VPLL2_LOCK - | NV_PRAMDAC_PLL_TEST_COUNTER_NVPLL_LOCK - | NV_PRAMDAC_PLL_TEST_COUNTER_MPLL_LOCK - | NV_PRAMDAC_PLL_TEST_COUNTER_VPLL_LOCK; - break; - default: - break; - } - - /* Surprisingly, QEMU doesn't handle unaligned access for you properly */ - r >>= 32 - 8 * size - 8 * (addr & 3); - - NV2A_DPRINTF("PRAMDAC: read %d [0x%" HWADDR_PRIx "] -> %" HWADDR_PRIx "\n", size, addr, r); - return r; -} -static void pramdac_write(void *opaque, hwaddr addr, - uint64_t val, unsigned int size) -{ - NV2AState *d = opaque; - uint32_t m, n, p; - - reg_log_write(NV_PRAMDAC, addr, val); - - switch (addr) { - case NV_PRAMDAC_NVPLL_COEFF: - d->pramdac.core_clock_coeff = val; - - m = val & NV_PRAMDAC_NVPLL_COEFF_MDIV; - n = (val & NV_PRAMDAC_NVPLL_COEFF_NDIV) >> 8; - p = (val & NV_PRAMDAC_NVPLL_COEFF_PDIV) >> 16; - - if (m == 0) { - d->pramdac.core_clock_freq = 0; - } else { - d->pramdac.core_clock_freq = (NV2A_CRYSTAL_FREQ * n) - / (1 << p) / m; - } - - break; - case NV_PRAMDAC_MPLL_COEFF: - d->pramdac.memory_clock_coeff = val; - break; - case NV_PRAMDAC_VPLL_COEFF: - d->pramdac.video_clock_coeff = val; - break; - default: - break; - } -} - - -static uint64_t prmdio_read(void *opaque, - hwaddr addr, unsigned int size) -{ - reg_log_read(NV_PRMDIO, addr, 0); - return 0; -} -static void prmdio_write(void *opaque, hwaddr addr, - uint64_t val, unsigned int size) -{ - reg_log_write(NV_PRMDIO, addr, val); -} - - -/* PRAMIN - RAMIN access */ -/* -static uint64_t pramin_read(void *opaque, - hwaddr addr, unsigned int size) -{ - NV2A_DPRINTF("nv2a PRAMIN: read [0x%" HWADDR_PRIx "] -> 0x%" HWADDR_PRIx "\n", addr, r); - return 0; -} -static void pramin_write(void *opaque, hwaddr addr, - uint64_t val, unsigned int size) -{ - NV2A_DPRINTF("nv2a PRAMIN: [0x%" HWADDR_PRIx "] = 0x%02llx\n", addr, val); -}*/ - - -/* USER - PFIFO MMIO and DMA submission area */ -static uint64_t user_read(void *opaque, - hwaddr addr, unsigned int size) -{ - NV2AState *d = opaque; - - unsigned int channel_id = addr >> 16; - assert(channel_id < NV2A_NUM_CHANNELS); - - qemu_mutex_lock(&d->pfifo.lock); - - uint32_t channel_modes = d->pfifo.regs[NV_PFIFO_MODE]; - - uint64_t r = 0; - if (channel_modes & (1 << channel_id)) { - /* DMA Mode */ - - unsigned int cur_channel_id = - GET_MASK(d->pfifo.regs[NV_PFIFO_CACHE1_PUSH1], - NV_PFIFO_CACHE1_PUSH1_CHID); - - if (channel_id == cur_channel_id) { - switch (addr & 0xFFFF) { - case NV_USER_DMA_PUT: - r = d->pfifo.regs[NV_PFIFO_CACHE1_DMA_PUT]; - break; - case NV_USER_DMA_GET: - r = d->pfifo.regs[NV_PFIFO_CACHE1_DMA_GET]; - break; - case NV_USER_REF: - r = d->pfifo.regs[NV_PFIFO_CACHE1_REF]; - break; - default: - break; - } - } else { - /* ramfc */ - assert(false); - } - } else { - /* PIO Mode */ - assert(false); - } - - qemu_mutex_unlock(&d->pfifo.lock); - - reg_log_read(NV_USER, addr, r); - return r; -} -static void user_write(void *opaque, hwaddr addr, - uint64_t val, unsigned int size) -{ - NV2AState *d = opaque; - - reg_log_write(NV_USER, addr, val); - - unsigned int channel_id = addr >> 16; - assert(channel_id < NV2A_NUM_CHANNELS); - - qemu_mutex_lock(&d->pfifo.lock); - - uint32_t channel_modes = d->pfifo.regs[NV_PFIFO_MODE]; - if (channel_modes & (1 << channel_id)) { - /* DMA Mode */ - unsigned int cur_channel_id = - GET_MASK(d->pfifo.regs[NV_PFIFO_CACHE1_PUSH1], - NV_PFIFO_CACHE1_PUSH1_CHID); - - if (channel_id == cur_channel_id) { - switch (addr & 0xFFFF) { - case NV_USER_DMA_PUT: - d->pfifo.regs[NV_PFIFO_CACHE1_DMA_PUT] = val; - break; - case NV_USER_DMA_GET: - d->pfifo.regs[NV_PFIFO_CACHE1_DMA_GET] = val; - break; - case NV_USER_REF: - d->pfifo.regs[NV_PFIFO_CACHE1_REF] = val; - break; - default: - assert(false); - break; - } - - // kick pfifo - qemu_cond_broadcast(&d->pfifo.pusher_cond); - qemu_cond_broadcast(&d->pfifo.puller_cond); - - } else { - /* ramfc */ - assert(false); - } - } else { - /* PIO Mode */ - assert(false); - } - - qemu_mutex_unlock(&d->pfifo.lock); - -} - - - - -typedef struct NV2ABlockInfo { - const char* name; - hwaddr offset; - uint64_t size; - MemoryRegionOps ops; -} NV2ABlockInfo; - -static const struct NV2ABlockInfo blocktable[] = { - [ NV_PMC ] = { - .name = "PMC", - .offset = 0x000000, - .size = 0x001000, - .ops = { - .read = pmc_read, - .write = pmc_write, - }, - }, - [ NV_PBUS ] = { - .name = "PBUS", - .offset = 0x001000, - .size = 0x001000, - .ops = { - .read = pbus_read, - .write = pbus_write, - }, - }, - [ NV_PFIFO ] = { - .name = "PFIFO", - .offset = 0x002000, - .size = 0x002000, - .ops = { - .read = pfifo_read, - .write = pfifo_write, - }, - }, - [ NV_PRMA ] = { - .name = "PRMA", - .offset = 0x007000, - .size = 0x001000, - .ops = { - .read = prma_read, - .write = prma_write, - }, - }, - [ NV_PVIDEO ] = { - .name = "PVIDEO", - .offset = 0x008000, - .size = 0x001000, - .ops = { - .read = pvideo_read, - .write = pvideo_write, - }, - }, - [ NV_PTIMER ] = { - .name = "PTIMER", - .offset = 0x009000, - .size = 0x001000, - .ops = { - .read = ptimer_read, - .write = ptimer_write, - }, - }, - [ NV_PCOUNTER ] = { - .name = "PCOUNTER", - .offset = 0x00a000, - .size = 0x001000, - .ops = { - .read = pcounter_read, - .write = pcounter_write, - }, - }, - [ NV_PVPE ] = { - .name = "PVPE", - .offset = 0x00b000, - .size = 0x001000, - .ops = { - .read = pvpe_read, - .write = pvpe_write, - }, - }, - [ NV_PTV ] = { - .name = "PTV", - .offset = 0x00d000, - .size = 0x001000, - .ops = { - .read = ptv_read, - .write = ptv_write, - }, - }, - [ NV_PRMFB ] = { - .name = "PRMFB", - .offset = 0x0a0000, - .size = 0x020000, - .ops = { - .read = prmfb_read, - .write = prmfb_write, - }, - }, - [ NV_PRMVIO ] = { - .name = "PRMVIO", - .offset = 0x0c0000, - .size = 0x001000, - .ops = { - .read = prmvio_read, - .write = prmvio_write, - }, - }, - [ NV_PFB ] = { - .name = "PFB", - .offset = 0x100000, - .size = 0x001000, - .ops = { - .read = pfb_read, - .write = pfb_write, - }, - }, - [ NV_PSTRAPS ] = { - .name = "PSTRAPS", - .offset = 0x101000, - .size = 0x001000, - .ops = { - .read = pstraps_read, - .write = pstraps_write, - }, - }, - [ NV_PGRAPH ] = { - .name = "PGRAPH", - .offset = 0x400000, - .size = 0x002000, - .ops = { - .read = pgraph_read, - .write = pgraph_write, - }, - }, - [ NV_PCRTC ] = { - .name = "PCRTC", - .offset = 0x600000, - .size = 0x001000, - .ops = { - .read = pcrtc_read, - .write = pcrtc_write, - }, - }, - [ NV_PRMCIO ] = { - .name = "PRMCIO", - .offset = 0x601000, - .size = 0x001000, - .ops = { - .read = prmcio_read, - .write = prmcio_write, - }, - }, - [ NV_PRAMDAC ] = { - .name = "PRAMDAC", - .offset = 0x680000, - .size = 0x001000, - .ops = { - .read = pramdac_read, - .write = pramdac_write, - }, - }, - [ NV_PRMDIO ] = { - .name = "PRMDIO", - .offset = 0x681000, - .size = 0x001000, - .ops = { - .read = prmdio_read, - .write = prmdio_write, - }, - }, - /*[ NV_PRAMIN ] = { - .name = "PRAMIN", - .offset = 0x700000, - .size = 0x100000, - .ops = { - .read = pramin_read, - .write = pramin_write, - }, - },*/ - [ NV_USER ] = { - .name = "USER", - .offset = 0x800000, - .size = 0x800000, - .ops = { - .read = user_read, - .write = user_write, - }, - }, -}; - -static const char* nv2a_reg_names[] = {}; -static const char* nv2a_method_names[] = {}; - -static void reg_log_read(int block, hwaddr addr, uint64_t val) { - if (blocktable[block].name) { - hwaddr naddr = blocktable[block].offset + addr; - if (naddr < ARRAY_SIZE(nv2a_reg_names) && nv2a_reg_names[naddr]) { - NV2A_DPRINTF("%s: read [%s] -> 0x%" PRIx64 "\n", - blocktable[block].name, nv2a_reg_names[naddr], val); - } else { - NV2A_DPRINTF("%s: read [%" HWADDR_PRIx "] -> 0x%" PRIx64 "\n", - blocktable[block].name, addr, val); - } - } else { - NV2A_DPRINTF("(%d?): read [%" HWADDR_PRIx "] -> 0x%" PRIx64 "\n", - block, addr, val); - } -} - -static void reg_log_write(int block, hwaddr addr, uint64_t val) { - if (blocktable[block].name) { - hwaddr naddr = blocktable[block].offset + addr; - if (naddr < ARRAY_SIZE(nv2a_reg_names) && nv2a_reg_names[naddr]) { - NV2A_DPRINTF("%s: [%s] = 0x%" PRIx64 "\n", - blocktable[block].name, nv2a_reg_names[naddr], val); - } else { - NV2A_DPRINTF("%s: [%" HWADDR_PRIx "] = 0x%" PRIx64 "\n", - blocktable[block].name, addr, val); - } - } else { - NV2A_DPRINTF("(%d?): [%" HWADDR_PRIx "] = 0x%" PRIx64 "\n", - block, addr, val); - } -} static void pgraph_method_log(unsigned int subchannel, unsigned int graphics_class, unsigned int method, uint32_t parameter) { @@ -6134,28 +2556,28 @@ static void pgraph_method_log(unsigned int subchannel, subchannel, last, count); } if (method != 0x1800) { - const char* method_name = NULL; - unsigned int nmethod = 0; - switch (graphics_class) { - case NV_KELVIN_PRIMITIVE: - nmethod = method | (0x5c << 16); - break; - case NV_CONTEXT_SURFACES_2D: - nmethod = method | (0x6d << 16); - break; - default: - break; - } - if (nmethod != 0 && nmethod < ARRAY_SIZE(nv2a_method_names)) { - method_name = nv2a_method_names[nmethod]; - } - if (method_name) { - NV2A_DPRINTF("pgraph method (%d): %s (0x%x)\n", - subchannel, method_name, parameter); - } else { + // const char* method_name = NULL; + // unsigned int nmethod = 0; + // switch (graphics_class) { + // case NV_KELVIN_PRIMITIVE: + // nmethod = method | (0x5c << 16); + // break; + // case NV_CONTEXT_SURFACES_2D: + // nmethod = method | (0x6d << 16); + // break; + // default: + // break; + // } + // if (nmethod != 0 && nmethod < ARRAY_SIZE(nv2a_method_names)) { + // method_name = nv2a_method_names[nmethod]; + // } + // if (method_name) { + // NV2A_DPRINTF("pgraph method (%d): %s (0x%x)\n", + // subchannel, method_name, parameter); + // } else { NV2A_DPRINTF("pgraph method (%d): 0x%x -> 0x%04x (0x%x)\n", subchannel, graphics_class, method, parameter); - } + // } } if (method == last) { count++; } @@ -6163,277 +2585,1890 @@ static void pgraph_method_log(unsigned int subchannel, last = method; } -static void nv2a_overlay_draw_line(VGACommonState *vga, uint8_t *line, int y) +static void pgraph_allocate_inline_buffer_vertices(PGRAPHState *pg, + unsigned int attr) { - NV2A_DPRINTF("nv2a_overlay_draw_line\n"); + int i; + VertexAttribute *attribute = &pg->vertex_attributes[attr]; - NV2AState *d = container_of(vga, NV2AState, vga); - DisplaySurface *surface = qemu_console_surface(d->vga.con); + if (attribute->inline_buffer || pg->inline_buffer_length == 0) { + return; + } - int surf_bpp = surface_bytes_per_pixel(surface); - int surf_width = surface_width(surface); + /* Now upload the previous attribute value */ + attribute->inline_buffer = (float*)g_malloc(NV2A_MAX_BATCH_LENGTH + * sizeof(float) * 4); + for (i = 0; i < pg->inline_buffer_length; i++) { + memcpy(&attribute->inline_buffer[i * 4], + attribute->inline_value, + sizeof(float) * 4); + } +} - if (!(d->pvideo.regs[NV_PVIDEO_BUFFER] & NV_PVIDEO_BUFFER_0_USE)) return; +static void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg) +{ + int i; - hwaddr base = d->pvideo.regs[NV_PVIDEO_BASE]; - hwaddr limit = d->pvideo.regs[NV_PVIDEO_LIMIT]; - hwaddr offset = d->pvideo.regs[NV_PVIDEO_OFFSET]; + assert(pg->inline_buffer_length < NV2A_MAX_BATCH_LENGTH); - int in_width = GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], - NV_PVIDEO_SIZE_IN_WIDTH); - int in_height = GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], - NV_PVIDEO_SIZE_IN_HEIGHT); - int in_s = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN], - NV_PVIDEO_POINT_IN_S); - int in_t = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN], - NV_PVIDEO_POINT_IN_T); - int in_pitch = GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], - NV_PVIDEO_FORMAT_PITCH); - int in_color = GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], - NV_PVIDEO_FORMAT_COLOR); + for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) { + VertexAttribute *attribute = &pg->vertex_attributes[i]; + if (attribute->inline_buffer) { + memcpy(&attribute->inline_buffer[ + pg->inline_buffer_length * 4], + attribute->inline_value, + sizeof(float) * 4); + } + } - // TODO: support other color formats - assert(in_color == NV_PVIDEO_FORMAT_COLOR_LE_CR8YB8CB8YA8); + pg->inline_buffer_length++; +} - int out_width = GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], - NV_PVIDEO_SIZE_OUT_WIDTH); - int out_height = GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], - NV_PVIDEO_SIZE_OUT_HEIGHT); - int out_x = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], - NV_PVIDEO_POINT_OUT_X); - int out_y = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], - NV_PVIDEO_POINT_OUT_Y); +void pgraph_init(NV2AState *d) +{ + int i; + + PGRAPHState *pg = &d->pgraph; + + qemu_mutex_init(&pg->lock); + qemu_cond_init(&pg->interrupt_cond); + qemu_cond_init(&pg->fifo_access_cond); + qemu_cond_init(&pg->flip_3d); + + /* fire up opengl */ + + pg->gl_context = glo_context_create(); + assert(pg->gl_context); + +#ifdef DEBUG_NV2A_GL + glEnable(GL_DEBUG_OUTPUT); +#endif + + glextensions_init(); + + /* DXT textures */ + assert(glo_check_extension("GL_EXT_texture_compression_s3tc")); + /* Internal RGB565 texture format */ + assert(glo_check_extension("GL_ARB_ES2_compatibility")); + + GLint max_vertex_attributes; + glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_vertex_attributes); + assert(max_vertex_attributes >= NV2A_VERTEXSHADER_ATTRIBUTES); - if (y < out_y || y >= out_y + out_height) return; + glGenFramebuffers(1, &pg->gl_framebuffer); + glBindFramebuffer(GL_FRAMEBUFFER, pg->gl_framebuffer); - // TODO: scaling, color keys + /* need a valid framebuffer to start with */ + glGenTextures(1, &pg->gl_color_buffer); + glBindTexture(GL_TEXTURE_2D, pg->gl_color_buffer); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 640, 480, + 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_TEXTURE_2D, pg->gl_color_buffer, 0); - int in_y = y - out_y; - if (in_y >= in_height) return; + assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) + == GL_FRAMEBUFFER_COMPLETE); - assert(offset + in_pitch * (in_y + 1) <= limit); - uint8_t *in_line = d->vram_ptr + base + offset + in_pitch * in_y; + //glPolygonMode( GL_FRONT_AND_BACK, GL_LINE ); - int x; - for (x=0; x= surf_width) break; - int ix = in_s + x; - if (ix >= in_width) break; + pg->texture_cache = g_lru_cache_new_full( + 0, + NULL, + texture_key_destroy, + 0, + NULL, + texture_binding_destroy, + texture_key_hash, + texture_key_equal, + texture_key_retrieve, + NULL, + NULL + ); - uint8_t r,g,b; - convert_yuy2_to_rgb(in_line, ix, &r, &g, &b); + g_lru_cache_set_max_size(pg->texture_cache, 512); - unsigned int pixel = vga->rgb_to_pixel(r, g, b); - switch (surf_bpp) { - case 1: - ((uint8_t*)line)[ox] = pixel; + pg->shader_cache = g_hash_table_new(shader_hash, shader_equal); + + + for (i=0; ivertex_attributes[i].gl_converted_buffer); + glGenBuffers(1, &pg->vertex_attributes[i].gl_inline_buffer); + } + glGenBuffers(1, &pg->gl_inline_array_buffer); + glGenBuffers(1, &pg->gl_element_buffer); + + glGenBuffers(1, &pg->gl_memory_buffer); + glBindBuffer(GL_ARRAY_BUFFER, pg->gl_memory_buffer); + glBufferData(GL_ARRAY_BUFFER, + memory_region_size(d->vram), + NULL, + GL_DYNAMIC_DRAW); + + glGenVertexArrays(1, &pg->gl_vertex_array); + glBindVertexArray(pg->gl_vertex_array); + + assert(glGetError() == GL_NO_ERROR); + + glo_set_current(NULL); +} + +void pgraph_destroy(PGRAPHState *pg) +{ + qemu_mutex_destroy(&pg->lock); + qemu_cond_destroy(&pg->interrupt_cond); + qemu_cond_destroy(&pg->fifo_access_cond); + qemu_cond_destroy(&pg->flip_3d); + + glo_set_current(pg->gl_context); + + if (pg->gl_color_buffer) { + glDeleteTextures(1, &pg->gl_color_buffer); + } + if (pg->gl_zeta_buffer) { + glDeleteTextures(1, &pg->gl_zeta_buffer); + } + glDeleteFramebuffers(1, &pg->gl_framebuffer); + + // TODO: clear out shader cached + // TODO: clear out texture cache + + glo_set_current(NULL); + + glo_context_destroy(pg->gl_context); +} + +static void pgraph_shader_update_constants(PGRAPHState *pg, + ShaderBinding *binding, + bool binding_changed, + bool vertex_program, + bool fixed_function) +{ + int i, j; + + /* update combiner constants */ + for (i = 0; i<= 8; i++) { + uint32_t constant[2]; + if (i == 8) { + /* final combiner */ + constant[0] = pg->regs[NV_PGRAPH_SPECFOGFACTOR0]; + constant[1] = pg->regs[NV_PGRAPH_SPECFOGFACTOR1]; + } else { + constant[0] = pg->regs[NV_PGRAPH_COMBINEFACTOR0 + i * 4]; + constant[1] = pg->regs[NV_PGRAPH_COMBINEFACTOR1 + i * 4]; + } + + for (j = 0; j < 2; j++) { + GLint loc = binding->psh_constant_loc[i][j]; + if (loc != -1) { + float value[4]; + value[0] = (float) ((constant[j] >> 16) & 0xFF) / 255.0f; + value[1] = (float) ((constant[j] >> 8) & 0xFF) / 255.0f; + value[2] = (float) (constant[j] & 0xFF) / 255.0f; + value[3] = (float) ((constant[j] >> 24) & 0xFF) / 255.0f; + + glUniform4fv(loc, 1, value); + } + } + } + if (binding->alpha_ref_loc != -1) { + float alpha_ref = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], + NV_PGRAPH_CONTROL_0_ALPHAREF) / 255.0; + glUniform1f(binding->alpha_ref_loc, alpha_ref); + } + + + /* For each texture stage */ + for (i = 0; i < NV2A_MAX_TEXTURES; i++) { + // char name[32]; + GLint loc; + + /* Bump luminance only during stages 1 - 3 */ + if (i > 0) { + loc = binding->bump_mat_loc[i]; + if (loc != -1) { + glUniformMatrix2fv(loc, 1, GL_FALSE, pg->bump_env_matrix[i - 1]); + } + loc = binding->bump_scale_loc[i]; + if (loc != -1) { + glUniform1f(loc, *(float*)&pg->regs[ + NV_PGRAPH_BUMPSCALE1 + (i - 1) * 4]); + } + loc = binding->bump_offset_loc[i]; + if (loc != -1) { + glUniform1f(loc, *(float*)&pg->regs[ + NV_PGRAPH_BUMPOFFSET1 + (i - 1) * 4]); + } + } + + } + + if (binding->fog_color_loc != -1) { + uint32_t fog_color = pg->regs[NV_PGRAPH_FOGCOLOR]; + glUniform4f(binding->fog_color_loc, + GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_RED) / 255.0, + GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_GREEN) / 255.0, + GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_BLUE) / 255.0, + GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_ALPHA) / 255.0); + } + if (binding->fog_param_loc[0] != -1) { + glUniform1f(binding->fog_param_loc[0], + *(float*)&pg->regs[NV_PGRAPH_FOGPARAM0]); + } + if (binding->fog_param_loc[1] != -1) { + glUniform1f(binding->fog_param_loc[1], + *(float*)&pg->regs[NV_PGRAPH_FOGPARAM1]); + } + + + float zclip_max = *(float*)&pg->regs[NV_PGRAPH_ZCLIPMAX]; + float zclip_min = *(float*)&pg->regs[NV_PGRAPH_ZCLIPMIN]; + + if (fixed_function) { + /* update lighting constants */ + struct { + uint32_t* v; + bool* dirty; + GLint* locs; + size_t len; + } lighting_arrays[] = { + {&pg->ltctxa[0][0], &pg->ltctxa_dirty[0], binding->ltctxa_loc, NV2A_LTCTXA_COUNT}, + {&pg->ltctxb[0][0], &pg->ltctxb_dirty[0], binding->ltctxb_loc, NV2A_LTCTXB_COUNT}, + {&pg->ltc1[0][0], &pg->ltc1_dirty[0], binding->ltc1_loc, NV2A_LTC1_COUNT}, + }; + + for (i=0; ilight_infinite_half_vector_loc[i]; + if (loc != -1) { + glUniform3fv(loc, 1, pg->light_infinite_half_vector[i]); + } + loc = binding->light_infinite_direction_loc[i]; + if (loc != -1) { + glUniform3fv(loc, 1, pg->light_infinite_direction[i]); + } + + loc = binding->light_local_position_loc[i]; + if (loc != -1) { + glUniform3fv(loc, 1, pg->light_local_position[i]); + } + loc = binding->light_local_attenuation_loc[i]; + if (loc != -1) { + glUniform3fv(loc, 1, pg->light_local_attenuation[i]); + } + } + + /* estimate the viewport by assuming it matches the surface ... */ + //FIXME: Get surface dimensions? + float m11 = 0.5 * pg->surface_shape.clip_width; + float m22 = -0.5 * pg->surface_shape.clip_height; + float m33 = zclip_max - zclip_min; + //float m41 = m11; + //float m42 = -m22; + float m43 = zclip_min; + //float m44 = 1.0; + + if (m33 == 0.0) { + m33 = 1.0; + } + float invViewport[16] = { + 1.0/m11, 0, 0, 0, + 0, 1.0/m22, 0, 0, + 0, 0, 1.0/m33, 0, + -1.0, 1.0, -m43/m33, 1.0 + }; + + if (binding->inv_viewport_loc != -1) { + glUniformMatrix4fv(binding->inv_viewport_loc, + 1, GL_FALSE, &invViewport[0]); + } + + } + + /* update vertex program constants */ + for (i=0; ivsh_constants_dirty[i] && !binding_changed) continue; + + GLint loc = binding->vsh_constant_loc[i]; + //assert(loc != -1); + if (loc != -1) { + glUniform4fv(loc, 1, (const GLfloat*)pg->vsh_constants[i]); + } + pg->vsh_constants_dirty[i] = false; + } + + if (binding->surface_size_loc != -1) { + glUniform2f(binding->surface_size_loc, pg->surface_shape.clip_width, + pg->surface_shape.clip_height); + } + + if (binding->clip_range_loc != -1) { + glUniform2f(binding->clip_range_loc, zclip_min, zclip_max); + } + +} + +static void pgraph_bind_shaders(PGRAPHState *pg) +{ + int i, j; + + bool vertex_program = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D], + NV_PGRAPH_CSV0_D_MODE) == 2; + + bool fixed_function = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D], + NV_PGRAPH_CSV0_D_MODE) == 0; + + int program_start = GET_MASK(pg->regs[NV_PGRAPH_CSV0_C], + NV_PGRAPH_CSV0_C_CHEOPS_PROGRAM_START); + + NV2A_GL_DGROUP_BEGIN("%s (VP: %s FFP: %s)", __func__, + vertex_program ? "yes" : "no", + fixed_function ? "yes" : "no"); + + ShaderBinding* old_binding = pg->shader_binding; + + ShaderState state = { + .psh = (PshState){ + /* register combier stuff */ + .combiner_control = pg->regs[NV_PGRAPH_COMBINECTL], + .shader_stage_program = pg->regs[NV_PGRAPH_SHADERPROG], + .other_stage_input = pg->regs[NV_PGRAPH_SHADERCTL], + .final_inputs_0 = pg->regs[NV_PGRAPH_COMBINESPECFOG0], + .final_inputs_1 = pg->regs[NV_PGRAPH_COMBINESPECFOG1], + + .alpha_test = pg->regs[NV_PGRAPH_CONTROL_0] + & NV_PGRAPH_CONTROL_0_ALPHATESTENABLE, + .alpha_func = (enum PshAlphaFunc)GET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], + NV_PGRAPH_CONTROL_0_ALPHAFUNC), + }, + + /* fixed function stuff */ + .skinning = (enum VshSkinning)GET_MASK(pg->regs[NV_PGRAPH_CSV0_D], + NV_PGRAPH_CSV0_D_SKIN), + .lighting = GET_MASK(pg->regs[NV_PGRAPH_CSV0_C], + NV_PGRAPH_CSV0_C_LIGHTING), + .normalization = pg->regs[NV_PGRAPH_CSV0_C] + & NV_PGRAPH_CSV0_C_NORMALIZATION_ENABLE, + + .fixed_function = fixed_function, + + /* vertex program stuff */ + .vertex_program = vertex_program, + .z_perspective = pg->regs[NV_PGRAPH_CONTROL_0] + & NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE, + + /* geometry shader stuff */ + .primitive_mode = (enum ShaderPrimitiveMode)pg->primitive_mode, + .polygon_front_mode = (enum ShaderPolygonMode)GET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], + NV_PGRAPH_SETUPRASTER_FRONTFACEMODE), + .polygon_back_mode = (enum ShaderPolygonMode)GET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], + NV_PGRAPH_SETUPRASTER_BACKFACEMODE), + }; + + state.program_length = 0; + memset(state.program_data, 0, sizeof(state.program_data)); + + if (vertex_program) { + // copy in vertex program tokens + for (i = program_start; i < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH; i++) { + uint32_t *cur_token = (uint32_t*)&pg->program_data[i]; + memcpy(&state.program_data[state.program_length], + cur_token, + VSH_TOKEN_SIZE * sizeof(uint32_t)); + state.program_length++; + + if (vsh_get_field(cur_token, FLD_FINAL)) { + break; + } + } + } + + /* Texgen */ + for (i = 0; i < 4; i++) { + unsigned int reg = (i < 2) ? NV_PGRAPH_CSV1_A : NV_PGRAPH_CSV1_B; + for (j = 0; j < 4; j++) { + unsigned int masks[] = { + (i % 2) ? NV_PGRAPH_CSV1_A_T1_S : NV_PGRAPH_CSV1_A_T0_S, + (i % 2) ? NV_PGRAPH_CSV1_A_T1_T : NV_PGRAPH_CSV1_A_T0_T, + (i % 2) ? NV_PGRAPH_CSV1_A_T1_R : NV_PGRAPH_CSV1_A_T0_R, + (i % 2) ? NV_PGRAPH_CSV1_A_T1_Q : NV_PGRAPH_CSV1_A_T0_Q + }; + state.texgen[i][j] = (enum VshTexgen)GET_MASK(pg->regs[reg], masks[j]); + } + } + + /* Fog */ + state.fog_enable = pg->regs[NV_PGRAPH_CONTROL_3] + & NV_PGRAPH_CONTROL_3_FOGENABLE; + if (state.fog_enable) { + /*FIXME: Use CSV0_D? */ + state.fog_mode = (enum VshFogMode)GET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], + NV_PGRAPH_CONTROL_3_FOG_MODE); + state.foggen = (enum VshFoggen)GET_MASK(pg->regs[NV_PGRAPH_CSV0_D], + NV_PGRAPH_CSV0_D_FOGGENMODE); + } else { + /* FIXME: Do we still pass the fogmode? */ + state.fog_mode = (enum VshFogMode)0; + state.foggen = (enum VshFoggen)0; + } + + /* Texture matrices */ + for (i = 0; i < 4; i++) { + state.texture_matrix_enable[i] = pg->texture_matrix_enable[i]; + } + + /* Lighting */ + if (state.lighting) { + for (i = 0; i < NV2A_MAX_LIGHTS; i++) { + state.light[i] = (enum VshLight)GET_MASK(pg->regs[NV_PGRAPH_CSV0_D], + NV_PGRAPH_CSV0_D_LIGHT0 << (i * 2)); + } + } + + for (i = 0; i < 8; i++) { + state.psh.rgb_inputs[i] = pg->regs[NV_PGRAPH_COMBINECOLORI0 + i * 4]; + state.psh.rgb_outputs[i] = pg->regs[NV_PGRAPH_COMBINECOLORO0 + i * 4]; + state.psh.alpha_inputs[i] = pg->regs[NV_PGRAPH_COMBINEALPHAI0 + i * 4]; + state.psh.alpha_outputs[i] = pg->regs[NV_PGRAPH_COMBINEALPHAO0 + i * 4]; + //constant_0[i] = pg->regs[NV_PGRAPH_COMBINEFACTOR0 + i * 4]; + //constant_1[i] = pg->regs[NV_PGRAPH_COMBINEFACTOR1 + i * 4]; + } + + for (i = 0; i < 4; i++) { + state.psh.rect_tex[i] = false; + bool enabled = pg->regs[NV_PGRAPH_TEXCTL0_0 + i*4] + & NV_PGRAPH_TEXCTL0_0_ENABLE; + unsigned int color_format = + GET_MASK(pg->regs[NV_PGRAPH_TEXFMT0 + i*4], + NV_PGRAPH_TEXFMT0_COLOR); + + if (enabled && kelvin_color_format_map[color_format].linear) { + state.psh.rect_tex[i] = true; + } + + for (j = 0; j < 4; j++) { + state.psh.compare_mode[i][j] = + (pg->regs[NV_PGRAPH_SHADERCLIPMODE] >> (4 * i + j)) & 1; + } + state.psh.alphakill[i] = pg->regs[NV_PGRAPH_TEXCTL0_0 + i*4] + & NV_PGRAPH_TEXCTL0_0_ALPHAKILLEN; + } + + ShaderBinding* cached_shader = (ShaderBinding*)g_hash_table_lookup(pg->shader_cache, &state); + if (cached_shader) { + pg->shader_binding = cached_shader; + } else { + pg->shader_binding = generate_shaders(state); + + /* cache it */ + ShaderState *cache_state = (ShaderState *)g_malloc(sizeof(*cache_state)); + memcpy(cache_state, &state, sizeof(*cache_state)); + g_hash_table_insert(pg->shader_cache, cache_state, + (gpointer)pg->shader_binding); + } + + bool binding_changed = (pg->shader_binding != old_binding); + + glUseProgram(pg->shader_binding->gl_program); + + pgraph_shader_update_constants(pg, pg->shader_binding, binding_changed, + vertex_program, fixed_function); + + NV2A_GL_DGROUP_END(); +} + +static bool pgraph_framebuffer_dirty(PGRAPHState *pg) +{ + bool shape_changed = memcmp(&pg->surface_shape, &pg->last_surface_shape, + sizeof(SurfaceShape)) != 0; + if (!shape_changed || (!pg->surface_shape.color_format + && !pg->surface_shape.zeta_format)) { + return false; + } + return true; +} + +static bool pgraph_color_write_enabled(PGRAPHState *pg) +{ + return pg->regs[NV_PGRAPH_CONTROL_0] & ( + NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE + | NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE + | NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE + | NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE); +} + +static bool pgraph_zeta_write_enabled(PGRAPHState *pg) +{ + return pg->regs[NV_PGRAPH_CONTROL_0] & ( + NV_PGRAPH_CONTROL_0_ZWRITEENABLE + | NV_PGRAPH_CONTROL_0_STENCIL_WRITE_ENABLE); +} + +static void pgraph_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta) +{ + NV2A_DPRINTF("pgraph_set_surface_dirty(%d, %d) -- %d %d\n", + color, zeta, + pgraph_color_write_enabled(pg), pgraph_zeta_write_enabled(pg)); + /* FIXME: Does this apply to CLEARs too? */ + color = color && pgraph_color_write_enabled(pg); + zeta = zeta && pgraph_zeta_write_enabled(pg); + pg->surface_color.draw_dirty |= color; + pg->surface_zeta.draw_dirty |= zeta; +} + +static void pgraph_update_surface_part(NV2AState *d, bool upload, bool color) { + PGRAPHState *pg = &d->pgraph; + + unsigned int width, height; + pgraph_get_surface_dimensions(pg, &width, &height); + pgraph_apply_anti_aliasing_factor(pg, &width, &height); + + Surface *surface; + hwaddr dma_address; + GLuint *gl_buffer; + unsigned int bytes_per_pixel; + GLenum gl_internal_format, gl_format, gl_type, gl_attachment; + + if (color) { + surface = &pg->surface_color; + dma_address = pg->dma_color; + gl_buffer = &pg->gl_color_buffer; + + assert(pg->surface_shape.color_format != 0); + assert(pg->surface_shape.color_format + < ARRAY_SIZE(kelvin_surface_color_format_map)); + SurfaceColorFormatInfo f = + kelvin_surface_color_format_map[pg->surface_shape.color_format]; + if (f.bytes_per_pixel == 0) { + fprintf(stderr, "nv2a: unimplemented color surface format 0x%x\n", + pg->surface_shape.color_format); + abort(); + } + + bytes_per_pixel = f.bytes_per_pixel; + gl_internal_format = f.gl_internal_format; + gl_format = f.gl_format; + gl_type = f.gl_type; + gl_attachment = GL_COLOR_ATTACHMENT0; + + } else { + surface = &pg->surface_zeta; + dma_address = pg->dma_zeta; + gl_buffer = &pg->gl_zeta_buffer; + + assert(pg->surface_shape.zeta_format != 0); + switch (pg->surface_shape.zeta_format) { + case NV097_SET_SURFACE_FORMAT_ZETA_Z16: + bytes_per_pixel = 2; + gl_format = GL_DEPTH_COMPONENT; + gl_attachment = GL_DEPTH_ATTACHMENT; + if (pg->surface_shape.z_format) { + gl_type = GL_HALF_FLOAT; + gl_internal_format = GL_DEPTH_COMPONENT32F; + } else { + gl_type = GL_UNSIGNED_SHORT; + gl_internal_format = GL_DEPTH_COMPONENT16; + } break; - case 2: - ((uint16_t*)line)[ox] = pixel; - break; - case 4: - ((uint32_t*)line)[ox] = pixel; + case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8: + bytes_per_pixel = 4; + gl_format = GL_DEPTH_STENCIL; + gl_attachment = GL_DEPTH_STENCIL_ATTACHMENT; + if (pg->surface_shape.z_format) { + assert(false); + gl_type = GL_FLOAT_32_UNSIGNED_INT_24_8_REV; + gl_internal_format = GL_DEPTH32F_STENCIL8; + } else { + gl_type = GL_UNSIGNED_INT_24_8; + gl_internal_format = GL_DEPTH24_STENCIL8; + } break; default: assert(false); break; } } -} -static int nv2a_get_bpp(VGACommonState *s) -{ - if ((s->cr[0x28] & 3) == 3) { - return 32; + + DMAObject dma = nv_dma_load(d, dma_address); + /* There's a bunch of bugs that could cause us to hit this function + * at the wrong time and get a invalid dma object. + * Check that it's sane. */ + assert(dma.dma_class == NV_DMA_IN_MEMORY_CLASS); + + assert(dma.address + surface->offset != 0); + assert(surface->offset <= dma.limit); + assert(surface->offset + surface->pitch * height <= dma.limit + 1); + + hwaddr data_len; + uint8_t *data = (uint8_t*)nv_dma_map(d, dma_address, &data_len); + + /* TODO */ + // assert(pg->surface_clip_x == 0 && pg->surface_clip_y == 0); + + bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE); + + uint8_t *buf = data + surface->offset; + if (swizzle) { + buf = (uint8_t*)g_malloc(height * surface->pitch); + } + + bool dirty = surface->buffer_dirty; + if (color) { + // dirty |= 1; + dirty |= memory_region_test_and_clear_dirty(d->vram, + dma.address + surface->offset, + surface->pitch * height, + DIRTY_MEMORY_NV2A); + } + if (upload && dirty) { + /* surface modified (or moved) by the cpu. + * copy it into the opengl renderbuffer */ + assert(!surface->draw_dirty); + + assert(surface->pitch % bytes_per_pixel == 0); + + if (swizzle) { + unswizzle_rect(data + surface->offset, + width, height, + buf, + surface->pitch, + bytes_per_pixel); + } + + if (!color) { + /* need to clear the depth_stencil and depth attachment for zeta */ + glFramebufferTexture2D(GL_FRAMEBUFFER, + GL_DEPTH_ATTACHMENT, + GL_TEXTURE_2D, + 0, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, + GL_DEPTH_STENCIL_ATTACHMENT, + GL_TEXTURE_2D, + 0, 0); + } + + glFramebufferTexture2D(GL_FRAMEBUFFER, + gl_attachment, + GL_TEXTURE_2D, + 0, 0); + + if (*gl_buffer) { + glDeleteTextures(1, gl_buffer); + *gl_buffer = 0; + } + + glGenTextures(1, gl_buffer); + glBindTexture(GL_TEXTURE_2D, *gl_buffer); + + /* This is VRAM so we can't do this inplace! */ + uint8_t *flipped_buf = (uint8_t*)g_malloc(width * height * bytes_per_pixel); + unsigned int irow; + for (irow = 0; irow < height; irow++) { + memcpy(&flipped_buf[width * (height - irow - 1) + * bytes_per_pixel], + &buf[surface->pitch * irow], + width * bytes_per_pixel); + } + + glTexImage2D(GL_TEXTURE_2D, 0, gl_internal_format, + width, height, 0, + gl_format, gl_type, + flipped_buf); + + g_free(flipped_buf); + + glFramebufferTexture2D(GL_FRAMEBUFFER, + gl_attachment, + GL_TEXTURE_2D, + *gl_buffer, 0); + + assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) + == GL_FRAMEBUFFER_COMPLETE); + + if (color) { + pgraph_update_memory_buffer(d, dma.address + surface->offset, + surface->pitch * height, true); + } + surface->buffer_dirty = false; + +#ifdef DEBUG_NV2A + uint8_t *out = data + surface->offset + 64; + NV2A_DPRINTF("upload_surface %s 0x%" HWADDR_PRIx " - 0x%" HWADDR_PRIx ", " + "(0x%" HWADDR_PRIx " - 0x%" HWADDR_PRIx ", " + "%d %d, %d %d, %d) - %x %x %x %x\n", + color ? "color" : "zeta", + dma.address, dma.address + dma.limit, + dma.address + surface->offset, + dma.address + surface->pitch * height, + pg->surface_shape.clip_x, pg->surface_shape.clip_y, + pg->surface_shape.clip_width, + pg->surface_shape.clip_height, + surface->pitch, + out[0], out[1], out[2], out[3]); +#endif + } + + if (!upload && surface->draw_dirty) { + /* read the opengl framebuffer into the surface */ + glo_readpixels(gl_format, gl_type, + bytes_per_pixel, surface->pitch, + width, height, + buf); + assert(glGetError() == GL_NO_ERROR); + + if (swizzle) { + swizzle_rect(buf, + width, height, + data + surface->offset, + surface->pitch, + bytes_per_pixel); + } + + memory_region_set_client_dirty(d->vram, + dma.address + surface->offset, + surface->pitch * height, + DIRTY_MEMORY_VGA); + + if (color) { + pgraph_update_memory_buffer(d, dma.address + surface->offset, + surface->pitch * height, true); + } + + surface->draw_dirty = false; + surface->write_enabled_cache = false; + +#ifdef DEBUG_NV2A + uint8_t *out = data + surface->offset + 64; + NV2A_DPRINTF("read_surface %s 0x%" HWADDR_PRIx " - 0x%" HWADDR_PRIx ", " + "(0x%" HWADDR_PRIx " - 0x%" HWADDR_PRIx ", " + "%d %d, %d %d, %d) - %x %x %x %x\n", + color ? "color" : "zeta", + dma.address, dma.address + dma.limit, + dma.address + surface->offset, + dma.address + surface->pitch * pg->surface_shape.clip_height, + pg->surface_shape.clip_x, pg->surface_shape.clip_y, + pg->surface_shape.clip_width, pg->surface_shape.clip_height, + surface->pitch, + out[0], out[1], out[2], out[3]); +#endif + } + + if (swizzle) { + g_free(buf); } - return (s->cr[0x28] & 3) * 8; } -static void nv2a_get_offsets(VGACommonState *s, - uint32_t *pline_offset, - uint32_t *pstart_addr, - uint32_t *pline_compare) +static void pgraph_update_surface(NV2AState *d, bool upload, + bool color_write, bool zeta_write) { - NV2AState *d = container_of(s, NV2AState, vga); - uint32_t start_addr, line_offset, line_compare; + PGRAPHState *pg = &d->pgraph; - line_offset = s->cr[0x13] - | ((s->cr[0x19] & 0xe0) << 3) - | ((s->cr[0x25] & 0x20) << 6); - line_offset <<= 3; - *pline_offset = line_offset; + pg->surface_shape.z_format = GET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER], + NV_PGRAPH_SETUPRASTER_Z_FORMAT); - start_addr = d->pcrtc.start / 4; - *pstart_addr = start_addr; + /* FIXME: Does this apply to CLEARs too? */ + color_write = color_write && pgraph_color_write_enabled(pg); + zeta_write = zeta_write && pgraph_zeta_write_enabled(pg); - line_compare = s->cr[VGA_CRTC_LINE_COMPARE] | - ((s->cr[VGA_CRTC_OVERFLOW] & 0x10) << 4) | - ((s->cr[VGA_CRTC_MAX_SCAN] & 0x40) << 3); - *pline_compare = line_compare; + if (upload && pgraph_framebuffer_dirty(pg)) { + assert(!pg->surface_color.draw_dirty); + assert(!pg->surface_zeta.draw_dirty); + + pg->surface_color.buffer_dirty = true; + pg->surface_zeta.buffer_dirty = true; + + glFramebufferTexture2D(GL_FRAMEBUFFER, + GL_COLOR_ATTACHMENT0, + GL_TEXTURE_2D, + 0, 0); + + if (pg->gl_color_buffer) { + glDeleteTextures(1, &pg->gl_color_buffer); + pg->gl_color_buffer = 0; + } + + glFramebufferTexture2D(GL_FRAMEBUFFER, + GL_DEPTH_ATTACHMENT, + GL_TEXTURE_2D, + 0, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, + GL_DEPTH_STENCIL_ATTACHMENT, + GL_TEXTURE_2D, + 0, 0); + + if (pg->gl_zeta_buffer) { + glDeleteTextures(1, &pg->gl_zeta_buffer); + pg->gl_zeta_buffer = 0; + } + + memcpy(&pg->last_surface_shape, &pg->surface_shape, + sizeof(SurfaceShape)); + } + + if ((color_write || (!upload && pg->surface_color.write_enabled_cache)) + && (upload || pg->surface_color.draw_dirty)) { + pgraph_update_surface_part(d, upload, true); + } + + + if ((zeta_write || (!upload && pg->surface_zeta.write_enabled_cache)) + && (upload || pg->surface_zeta.draw_dirty)) { + pgraph_update_surface_part(d, upload, false); + } } - -static void nv2a_vga_gfx_update(void *opaque) -{ - VGACommonState *vga = opaque; - vga->hw_ops->gfx_update(vga); - - NV2AState *d = container_of(vga, NV2AState, vga); - d->pcrtc.pending_interrupts |= NV_PCRTC_INTR_0_VBLANK; - update_irq(d); -} - -static void nv2a_init_memory(NV2AState *d, MemoryRegion *ram) -{ - /* xbox is UMA - vram *is* ram */ - d->vram = ram; - - /* PCI exposed vram */ - memory_region_init_alias(&d->vram_pci, OBJECT(d), "nv2a-vram-pci", d->vram, - 0, memory_region_size(d->vram)); - pci_register_bar(&d->dev, 1, PCI_BASE_ADDRESS_MEM_PREFETCH, &d->vram_pci); - - - /* RAMIN - should be in vram somewhere, but not quite sure where atm */ - memory_region_init_ram(&d->ramin, OBJECT(d), "nv2a-ramin", 0x100000); - /* memory_region_init_alias(&d->ramin, "nv2a-ramin", &d->vram, - memory_region_size(&d->vram) - 0x100000, - 0x100000); */ - - memory_region_add_subregion(&d->mmio, 0x700000, &d->ramin); - - - d->vram_ptr = memory_region_get_ram_ptr(d->vram); - d->ramin_ptr = memory_region_get_ram_ptr(&d->ramin); - - memory_region_set_log(d->vram, true, DIRTY_MEMORY_NV2A); - memory_region_set_dirty(d->vram, 0, memory_region_size(d->vram)); - - /* hacky. swap out vga's vram */ - memory_region_destroy(&d->vga.vram); - memory_region_init_alias(&d->vga.vram, OBJECT(d), "vga.vram", - d->vram, 0, memory_region_size(d->vram)); - d->vga.vram_ptr = memory_region_get_ram_ptr(&d->vga.vram); - vga_dirty_log_start(&d->vga); - - - pgraph_init(d); - - /* fire up puller */ - qemu_thread_create(&d->pfifo.puller_thread, - pfifo_puller_thread, - d, QEMU_THREAD_JOINABLE); - - /* fire up pusher */ - qemu_thread_create(&d->pfifo.pusher_thread, - pfifo_pusher_thread, - d, QEMU_THREAD_JOINABLE); -} - -static int nv2a_initfn(PCIDevice *dev) +static void pgraph_bind_textures(NV2AState *d) { int i; - NV2AState *d; + PGRAPHState *pg = &d->pgraph; - d = NV2A_DEVICE(dev); + NV2A_GL_DGROUP_BEGIN("%s", __func__); - dev->config[PCI_INTERRUPT_PIN] = 0x01; + for (i=0; ipcrtc.start = 0; + uint32_t ctl_0 = pg->regs[NV_PGRAPH_TEXCTL0_0 + i*4]; + uint32_t ctl_1 = pg->regs[NV_PGRAPH_TEXCTL1_0 + i*4]; + uint32_t fmt = pg->regs[NV_PGRAPH_TEXFMT0 + i*4]; + uint32_t filter = pg->regs[NV_PGRAPH_TEXFILTER0 + i*4]; + uint32_t address = pg->regs[NV_PGRAPH_TEXADDRESS0 + i*4]; + uint32_t palette = pg->regs[NV_PGRAPH_TEXPALETTE0 + i*4]; - d->pramdac.core_clock_coeff = 0x00011c01; /* 189MHz...? */ - d->pramdac.core_clock_freq = 189000000; - d->pramdac.memory_clock_coeff = 0; - d->pramdac.video_clock_coeff = 0x0003C20D; /* 25182Khz...? */ + bool enabled = GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_ENABLE); + unsigned int min_mipmap_level = + GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_MIN_LOD_CLAMP); + unsigned int max_mipmap_level = + GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_MAX_LOD_CLAMP); + + unsigned int pitch = + GET_MASK(ctl_1, NV_PGRAPH_TEXCTL1_0_IMAGE_PITCH); + + unsigned int dma_select = + GET_MASK(fmt, NV_PGRAPH_TEXFMT0_CONTEXT_DMA); + bool cubemap = + GET_MASK(fmt, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE); + unsigned int dimensionality = + GET_MASK(fmt, NV_PGRAPH_TEXFMT0_DIMENSIONALITY); + unsigned int color_format = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_COLOR); + unsigned int levels = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_MIPMAP_LEVELS); + unsigned int log_width = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_U); + unsigned int log_height = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_V); + unsigned int log_depth = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_P); + + unsigned int rect_width = + GET_MASK(pg->regs[NV_PGRAPH_TEXIMAGERECT0 + i*4], + NV_PGRAPH_TEXIMAGERECT0_WIDTH); + unsigned int rect_height = + GET_MASK(pg->regs[NV_PGRAPH_TEXIMAGERECT0 + i*4], + NV_PGRAPH_TEXIMAGERECT0_HEIGHT); +#ifdef DEBUG_NV2A + unsigned int lod_bias = + GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIPMAP_LOD_BIAS); +#endif + unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN); + unsigned int mag_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG); + + unsigned int addru = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRU); + unsigned int addrv = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRV); + unsigned int addrp = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRP); + + unsigned int border_source = GET_MASK(fmt, + NV_PGRAPH_TEXFMT0_BORDER_SOURCE); + uint32_t border_color = pg->regs[NV_PGRAPH_BORDERCOLOR0 + i*4]; + + unsigned int offset = pg->regs[NV_PGRAPH_TEXOFFSET0 + i*4]; + + bool palette_dma_select = + GET_MASK(palette, NV_PGRAPH_TEXPALETTE0_CONTEXT_DMA); + unsigned int palette_length_index = + GET_MASK(palette, NV_PGRAPH_TEXPALETTE0_LENGTH); + unsigned int palette_offset = + palette & NV_PGRAPH_TEXPALETTE0_OFFSET; + + unsigned int palette_length = 0; + switch (palette_length_index) { + case NV_PGRAPH_TEXPALETTE0_LENGTH_256: palette_length = 256; break; + case NV_PGRAPH_TEXPALETTE0_LENGTH_128: palette_length = 128; break; + case NV_PGRAPH_TEXPALETTE0_LENGTH_64: palette_length = 64; break; + case NV_PGRAPH_TEXPALETTE0_LENGTH_32: palette_length = 32; break; + default: assert(false); break; + } + + /* Check for unsupported features */ + assert(!(filter & NV_PGRAPH_TEXFILTER0_ASIGNED)); + assert(!(filter & NV_PGRAPH_TEXFILTER0_RSIGNED)); + assert(!(filter & NV_PGRAPH_TEXFILTER0_GSIGNED)); + assert(!(filter & NV_PGRAPH_TEXFILTER0_BSIGNED)); + + glActiveTexture(GL_TEXTURE0 + i); + if (!enabled) { + glBindTexture(GL_TEXTURE_CUBE_MAP, 0); + glBindTexture(GL_TEXTURE_RECTANGLE, 0); + glBindTexture(GL_TEXTURE_1D, 0); + glBindTexture(GL_TEXTURE_2D, 0); + glBindTexture(GL_TEXTURE_3D, 0); + continue; + } + + if (!pg->texture_dirty[i] && pg->texture_binding[i]) { + glBindTexture(pg->texture_binding[i]->gl_target, + pg->texture_binding[i]->gl_texture); + continue; + } + + NV2A_DPRINTF(" texture %d is format 0x%x, (r %d, %d or %d, %d, %d; %d%s)," + " filter %x %x, levels %d-%d %d bias %d\n", + i, color_format, + rect_width, rect_height, + 1 << log_width, 1 << log_height, 1 << log_depth, + pitch, + cubemap ? "; cubemap" : "", + min_filter, mag_filter, + min_mipmap_level, max_mipmap_level, levels, + lod_bias); + + assert(color_format < ARRAY_SIZE(kelvin_color_format_map)); + ColorFormatInfo f = kelvin_color_format_map[color_format]; + if (f.bytes_per_pixel == 0) { + fprintf(stderr, "nv2a: unimplemented texture color format 0x%x\n", + color_format); + abort(); + } + + unsigned int width, height, depth; + if (f.linear) { + assert(dimensionality == 2); + width = rect_width; + height = rect_height; + depth = 1; + } else { + width = 1 << log_width; + height = 1 << log_height; + depth = 1 << log_depth; + + /* FIXME: What about 3D mipmaps? */ + levels = MIN(levels, max_mipmap_level + 1); + if (f.gl_format != 0) { + /* Discard mipmap levels that would be smaller than 1x1. + * FIXME: Is this actually needed? + * + * >> Level 0: 32 x 4 + * Level 1: 16 x 2 + * Level 2: 8 x 1 + * Level 3: 4 x 1 + * Level 4: 2 x 1 + * Level 5: 1 x 1 + */ + levels = MIN(levels, MAX(log_width, log_height) + 1); + } else { + /* OpenGL requires DXT textures to always have a width and + * height a multiple of 4. The Xbox and DirectX handles DXT + * textures smaller than 4 by padding the reset of the block. + * + * See: + * https://msdn.microsoft.com/en-us/library/windows/desktop/bb204843(v=vs.85).aspx + * https://msdn.microsoft.com/en-us/library/windows/desktop/bb694531%28v=vs.85%29.aspx#Virtual_Size + * + * Work around this for now by discarding mipmap levels that + * would result in too-small textures. A correct solution + * will be to decompress these levels manually, or add texture + * sampling logic. + * + * >> Level 0: 64 x 8 + * Level 1: 32 x 4 + * Level 2: 16 x 2 << Ignored + * >> Level 0: 16 x 16 + * Level 1: 8 x 8 + * Level 2: 4 x 4 << OK! + */ + if (log_width < 2 || log_height < 2) { + /* Base level is smaller than 4x4... */ + levels = 1; + } else { + levels = MIN(levels, MIN(log_width, log_height) - 1); + } + } + assert(levels > 0); + } + + hwaddr dma_len; + uint8_t *texture_data; + if (dma_select) { + texture_data = (uint8_t*)nv_dma_map(d, pg->dma_b, &dma_len); + } else { + texture_data = (uint8_t*)nv_dma_map(d, pg->dma_a, &dma_len); + } + assert(offset < dma_len); + texture_data += offset; + + hwaddr palette_dma_len; + uint8_t *palette_data; + if (palette_dma_select) { + palette_data = (uint8_t*)nv_dma_map(d, pg->dma_b, &palette_dma_len); + } else { + palette_data = (uint8_t*)nv_dma_map(d, pg->dma_a, &palette_dma_len); + } + assert(palette_offset < palette_dma_len); + palette_data += palette_offset; + + NV2A_DPRINTF(" - 0x%tx\n", texture_data - d->vram_ptr); + + size_t length = 0; + if (f.linear) { + assert(cubemap == false); + assert(dimensionality == 2); + length = height * pitch; + } else { + if (dimensionality >= 2) { + unsigned int w = width, h = height; + int level; + if (f.gl_format != 0) { + for (level = 0; level < levels; level++) { + w = MAX(w, 1); h = MAX(h, 1); + length += w * h * f.bytes_per_pixel; + w /= 2; + h /= 2; + } + } else { + /* Compressed textures are a bit different */ + unsigned int block_size; + if (f.gl_internal_format == + GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) { + block_size = 8; + } else { + block_size = 16; + } + + for (level = 0; level < levels; level++) { + w = MAX(w, 4); h = MAX(h, 4); + length += w/4 * h/4 * block_size; + w /= 2; h /= 2; + } + } + if (cubemap) { + assert(dimensionality == 2); + length *= 6; + } + if (dimensionality >= 3) { + length *= depth; + } + } + } + + TextureShape state = { + .cubemap = cubemap, + .dimensionality = dimensionality, + .color_format = color_format, + .levels = levels, + .width = width, + .height = height, + .depth = depth, + .min_mipmap_level = min_mipmap_level, + .max_mipmap_level = max_mipmap_level, + .pitch = pitch, + }; + +#ifdef USE_TEXTURE_CACHE + TextureKey key = { + .state = state, + .data_hash = fast_hash(texture_data, length, 5003) + ^ fnv_hash(palette_data, palette_length), + .texture_data = texture_data, + .palette_data = palette_data, + }; + + gpointer cache_key = g_malloc(sizeof(TextureKey)); + memcpy(cache_key, &key, sizeof(TextureKey)); + + GError *err; + TextureBinding *binding = (TextureBinding *)g_lru_cache_get(pg->texture_cache, cache_key, &err); + assert(binding); + binding->refcnt++; +#else + TextureBinding *binding = generate_texture(state, + texture_data, palette_data); +#endif + + glBindTexture(binding->gl_target, binding->gl_texture); + if (f.linear) { + /* somtimes games try to set mipmap min filters on linear textures. + * this could indicate a bug... */ + switch (min_filter) { + case NV_PGRAPH_TEXFILTER0_MIN_BOX_NEARESTLOD: + case NV_PGRAPH_TEXFILTER0_MIN_BOX_TENT_LOD: + min_filter = NV_PGRAPH_TEXFILTER0_MIN_BOX_LOD0; + break; + case NV_PGRAPH_TEXFILTER0_MIN_TENT_NEARESTLOD: + case NV_PGRAPH_TEXFILTER0_MIN_TENT_TENT_LOD: + min_filter = NV_PGRAPH_TEXFILTER0_MIN_TENT_LOD0; + break; + } + } - /* legacy VGA shit */ - VGACommonState *vga = &d->vga; - vga->vram_size_mb = 4; - /* seems to start in color mode */ - vga->msr = VGA_MIS_COLOR; + glTexParameteri(binding->gl_target, GL_TEXTURE_MIN_FILTER, + pgraph_texture_min_filter_map[min_filter]); + glTexParameteri(binding->gl_target, GL_TEXTURE_MAG_FILTER, + pgraph_texture_mag_filter_map[mag_filter]); - vga_common_init(vga, OBJECT(dev)); - vga->get_bpp = nv2a_get_bpp; - vga->get_offsets = nv2a_get_offsets; - vga->overlay_draw_line = nv2a_overlay_draw_line; + /* Texture wrapping */ + assert(addru < ARRAY_SIZE(pgraph_texture_addr_map)); + glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_S, + pgraph_texture_addr_map[addru]); + if (dimensionality > 1) { + assert(addrv < ARRAY_SIZE(pgraph_texture_addr_map)); + glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_T, + pgraph_texture_addr_map[addrv]); + } + if (dimensionality > 2) { + assert(addrp < ARRAY_SIZE(pgraph_texture_addr_map)); + glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_R, + pgraph_texture_addr_map[addrp]); + } - d->hw_ops = *vga->hw_ops; - d->hw_ops.gfx_update = nv2a_vga_gfx_update; - vga->con = graphic_console_init(DEVICE(dev), &d->hw_ops, vga); + /* FIXME: Only upload if necessary? [s, t or r = GL_CLAMP_TO_BORDER] */ + if (border_source == NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR) { + GLfloat gl_border_color[] = { + /* FIXME: Color channels might be wrong order */ + ((border_color >> 16) & 0xFF) / 255.0f, /* red */ + ((border_color >> 8) & 0xFF) / 255.0f, /* green */ + (border_color & 0xFF) / 255.0f, /* blue */ + ((border_color >> 24) & 0xFF) / 255.0f /* alpha */ + }; + glTexParameterfv(binding->gl_target, GL_TEXTURE_BORDER_COLOR, + gl_border_color); + } + if (pg->texture_binding[i]) { + texture_binding_destroy(pg->texture_binding[i]); + } + pg->texture_binding[i] = binding; + pg->texture_dirty[i] = false; + } + NV2A_GL_DGROUP_END(); +} - /* mmio */ - memory_region_init(&d->mmio, OBJECT(dev), "nv2a-mmio", 0x1000000); - pci_register_bar(&d->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio); +static void pgraph_apply_anti_aliasing_factor(PGRAPHState *pg, + unsigned int *width, + unsigned int *height) +{ + switch (pg->surface_shape.anti_aliasing) { + case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_1: + break; + case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_CORNER_2: + if (width) { *width *= 2; } + break; + case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_SQUARE_OFFSET_4: + if (width) { *width *= 2; } + if (height) { *height *= 2; } + break; + default: + assert(false); + break; + } +} - for (i=0; iblock_mmio[i], OBJECT(dev), - &blocktable[i].ops, d, - blocktable[i].name, blocktable[i].size); - memory_region_add_subregion(&d->mmio, blocktable[i].offset, - &d->block_mmio[i]); +static void pgraph_get_surface_dimensions(PGRAPHState *pg, + unsigned int *width, + unsigned int *height) +{ + bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE); + if (swizzle) { + *width = 1 << pg->surface_shape.log_width; + *height = 1 << pg->surface_shape.log_height; + } else { + *width = pg->surface_shape.clip_width; + *height = pg->surface_shape.clip_height; + } +} + +static void pgraph_update_memory_buffer(NV2AState *d, hwaddr addr, hwaddr size, + bool f) +{ + glBindBuffer(GL_ARRAY_BUFFER, d->pgraph.gl_memory_buffer); + + hwaddr end = TARGET_PAGE_ALIGN(addr + size); + addr &= TARGET_PAGE_MASK; + assert(end < memory_region_size(d->vram)); + if (f || memory_region_test_and_clear_dirty(d->vram, + addr, + end - addr, + DIRTY_MEMORY_NV2A)) { + glBufferSubData(GL_ARRAY_BUFFER, addr, end - addr, d->vram_ptr + addr); + } +} + +static void pgraph_bind_vertex_attributes(NV2AState *d, + unsigned int num_elements, + bool inline_data, + unsigned int inline_stride) +{ + int i, j; + PGRAPHState *pg = &d->pgraph; + + if (inline_data) { + NV2A_GL_DGROUP_BEGIN("%s (num_elements: %d inline stride: %d)", + __func__, num_elements, inline_stride); + } else { + NV2A_GL_DGROUP_BEGIN("%s (num_elements: %d)", __func__, num_elements); } - qemu_mutex_init(&d->pfifo.lock); - qemu_cond_init(&d->pfifo.puller_cond); - qemu_cond_init(&d->pfifo.pusher_cond); + for (i=0; ivertex_attributes[i]; + if (attribute->count) { + uint8_t *data; + unsigned int in_stride; + if (inline_data && attribute->needs_conversion) { + data = (uint8_t*)pg->inline_array + + attribute->inline_array_offset; + in_stride = inline_stride; + } else { + hwaddr dma_len; + if (attribute->dma_select) { + data = (uint8_t*)nv_dma_map(d, pg->dma_vertex_b, &dma_len); + } else { + data = (uint8_t*)nv_dma_map(d, pg->dma_vertex_a, &dma_len); + } - d->pfifo.regs[NV_PFIFO_CACHE1_STATUS] |= NV_PFIFO_CACHE1_STATUS_LOW_MARK; + assert(attribute->offset < dma_len); + data += attribute->offset; - return 0; + in_stride = attribute->stride; + } + + if (attribute->needs_conversion) { + NV2A_DPRINTF("converted %d\n", i); + + unsigned int out_stride = attribute->converted_size + * attribute->converted_count; + + if (num_elements > attribute->converted_elements) { + attribute->converted_buffer = (uint8_t*)g_realloc( + attribute->converted_buffer, + num_elements * out_stride); + } + + for (j=attribute->converted_elements; jconverted_buffer + j * out_stride; + + switch (attribute->format) { + case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP: { + uint32_t p = ldl_le_p((uint32_t*)in); + float *xyz = (float*)out; + xyz[0] = ((int32_t)(((p >> 0) & 0x7FF) << 21) >> 21) + / 1023.0f; + xyz[1] = ((int32_t)(((p >> 11) & 0x7FF) << 21) >> 21) + / 1023.0f; + xyz[2] = ((int32_t)(((p >> 22) & 0x3FF) << 22) >> 22) + / 511.0f; + break; + } + default: + assert(false); + break; + } + } + + + glBindBuffer(GL_ARRAY_BUFFER, attribute->gl_converted_buffer); + if (num_elements != attribute->converted_elements) { + glBufferData(GL_ARRAY_BUFFER, + num_elements * out_stride, + attribute->converted_buffer, + GL_DYNAMIC_DRAW); + attribute->converted_elements = num_elements; + } + + + glVertexAttribPointer(i, + attribute->converted_count, + attribute->gl_type, + attribute->gl_normalize, + out_stride, + 0); + } else if (inline_data) { + glBindBuffer(GL_ARRAY_BUFFER, pg->gl_inline_array_buffer); + glVertexAttribPointer(i, + attribute->gl_count, + attribute->gl_type, + attribute->gl_normalize, + inline_stride, + (void*)(uintptr_t)attribute->inline_array_offset); + } else { + hwaddr addr = data - d->vram_ptr; + pgraph_update_memory_buffer(d, addr, + num_elements * attribute->stride, + false); + glVertexAttribPointer(i, + attribute->gl_count, + attribute->gl_type, + attribute->gl_normalize, + attribute->stride, + (void*)(uint64_t)addr); + } + glEnableVertexAttribArray(i); + } else { + glDisableVertexAttribArray(i); + + glVertexAttrib4fv(i, attribute->inline_value); + } + } + NV2A_GL_DGROUP_END(); } -static void nv2a_exitfn(PCIDevice *dev) +static unsigned int pgraph_bind_inline_array(NV2AState *d) { - NV2AState *d; - d = NV2A_DEVICE(dev); + int i; - d->exiting = true; - - qemu_cond_broadcast(&d->pfifo.puller_cond); - qemu_cond_broadcast(&d->pfifo.pusher_cond); - qemu_thread_join(&d->pfifo.puller_thread); - qemu_thread_join(&d->pfifo.pusher_thread); + PGRAPHState *pg = &d->pgraph; - pgraph_destroy(&d->pgraph); + unsigned int offset = 0; + for (i=0; ivertex_attributes[i]; + if (attribute->count) { + attribute->inline_array_offset = offset; + + NV2A_DPRINTF("bind inline attribute %d size=%d, count=%d\n", + i, attribute->size, attribute->count); + offset += attribute->size * attribute->count; + assert(offset % 4 == 0); + } + } + + unsigned int vertex_size = offset; + + + unsigned int index_count = pg->inline_array_length*4 / vertex_size; + + NV2A_DPRINTF("draw inline array %d, %d\n", vertex_size, index_count); + + glBindBuffer(GL_ARRAY_BUFFER, pg->gl_inline_array_buffer); + glBufferData(GL_ARRAY_BUFFER, pg->inline_array_length*4, pg->inline_array, + GL_DYNAMIC_DRAW); + + pgraph_bind_vertex_attributes(d, index_count, true, vertex_size); + + return index_count; } -static void nv2a_class_init(ObjectClass *klass, void *data) +static void load_graphics_object(NV2AState *d, hwaddr instance_address, + GraphicsObject *obj) { - DeviceClass *dc = DEVICE_CLASS(klass); - PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + uint8_t *obj_ptr; + uint32_t switch1, switch2, switch3; - k->vendor_id = PCI_VENDOR_ID_NVIDIA; - k->device_id = PCI_DEVICE_ID_NVIDIA_GEFORCE_NV2A; - k->revision = 161; - k->class_id = PCI_CLASS_DISPLAY_3D; - k->init = nv2a_initfn; - k->exit = nv2a_exitfn; + assert(instance_address < memory_region_size(&d->ramin)); - dc->desc = "GeForce NV2A Integrated Graphics"; + obj_ptr = d->ramin_ptr + instance_address; + + switch1 = ldl_le_p((uint32_t*)obj_ptr); + switch2 = ldl_le_p((uint32_t*)(obj_ptr+4)); + switch3 = ldl_le_p((uint32_t*)(obj_ptr+8)); + + obj->graphics_class = switch1 & NV_PGRAPH_CTX_SWITCH1_GRCLASS; + + /* init graphics object */ + switch (obj->graphics_class) { + case NV_KELVIN_PRIMITIVE: + // kelvin->vertex_attributes[NV2A_VERTEX_ATTR_DIFFUSE].inline_value = 0xFFFFFFF; + break; + default: + break; + } } -static const TypeInfo nv2a_info = { - .name = "nv2a", - .parent = TYPE_PCI_DEVICE, - .instance_size = sizeof(NV2AState), - .class_init = nv2a_class_init, -}; - -static void nv2a_register(void) +static GraphicsObject* lookup_graphics_object(PGRAPHState *s, + hwaddr instance_address) { - type_register_static(&nv2a_info); + int i; + for (i=0; isubchannel_data[i].object_instance == instance_address) { + return &s->subchannel_data[i].object; + } + } + return NULL; } -type_init(nv2a_register); -void nv2a_init(PCIBus *bus, int devfn, MemoryRegion *ram) -{ - PCIDevice *dev = pci_create_simple(bus, devfn, "nv2a"); - NV2AState *d = NV2A_DEVICE(dev); - nv2a_init_memory(d, ram); +/* 16 bit to [0.0, F16_MAX = 511.9375] */ +static float convert_f16_to_float(uint16_t f16) { + if (f16 == 0x0000) { return 0.0; } + uint32_t i = (f16 << 11) + 0x3C000000; + return *(float*)&i; +} + +/* 24 bit to [0.0, F24_MAX] */ +static float convert_f24_to_float(uint32_t f24) { + assert(!(f24 >> 24)); + f24 &= 0xFFFFFF; + if (f24 == 0x000000) { return 0.0; } + uint32_t i = f24 << 7; + return *(float*)&i; +} + +static uint8_t cliptobyte(int x) +{ + return (uint8_t)((x < 0) ? 0 : ((x > 255) ? 255 : x)); +} + +static void convert_yuy2_to_rgb(const uint8_t *line, unsigned int ix, + uint8_t *r, uint8_t *g, uint8_t* b) { + int c, d, e; + c = (int)line[ix * 2] - 16; + if (ix % 2) { + d = (int)line[ix * 2 - 1] - 128; + e = (int)line[ix * 2 + 1] - 128; + } else { + d = (int)line[ix * 2 + 1] - 128; + e = (int)line[ix * 2 + 3] - 128; + } + *r = cliptobyte((298 * c + 409 * e + 128) >> 8); + *g = cliptobyte((298 * c - 100 * d - 208 * e + 128) >> 8); + *b = cliptobyte((298 * c + 516 * d + 128) >> 8); +} + +static uint8_t* convert_texture_data(const TextureShape s, + const uint8_t *data, + const uint8_t *palette_data, + unsigned int width, + unsigned int height, + unsigned int depth, + unsigned int row_pitch, + unsigned int slice_pitch) +{ + if (s.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8) { + assert(depth == 1); /* FIXME */ + uint8_t* converted_data = (uint8_t*)g_malloc(width * height * 4); + int x, y; + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + uint8_t index = data[y * row_pitch + x]; + uint32_t color = *(uint32_t*)(palette_data + index * 4); + *(uint32_t*)(converted_data + y * width * 4 + x * 4) = color; + } + } + return converted_data; + } else if (s.color_format + == NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8) { + assert(depth == 1); /* FIXME */ + uint8_t* converted_data = (uint8_t*)g_malloc(width * height * 4); + int x, y; + for (y = 0; y < height; y++) { + const uint8_t* line = &data[y * s.width * 2]; + for (x = 0; x < width; x++) { + uint8_t* pixel = &converted_data[(y * s.width + x) * 4]; + /* FIXME: Actually needs uyvy? */ + convert_yuy2_to_rgb(line, x, &pixel[0], &pixel[1], &pixel[2]); + pixel[3] = 255; + } + } + return converted_data; + } else if (s.color_format + == NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5) { + assert(depth == 1); /* FIXME */ + uint8_t *converted_data = (uint8_t*)g_malloc(width * height * 3); + int x, y; + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + uint16_t rgb655 = *(uint16_t*)(data + y * row_pitch + x * 2); + int8_t *pixel = (int8_t*)&converted_data[(y * width + x) * 3]; + /* Maps 5 bit G and B signed value range to 8 bit + * signed values. R is probably unsigned. + */ + rgb655 ^= (1 << 9) | (1 << 4); + pixel[0] = ((rgb655 & 0xFC00) >> 10) * 0x7F / 0x3F; + pixel[1] = ((rgb655 & 0x03E0) >> 5) * 0xFF / 0x1F - 0x80; + pixel[2] = (rgb655 & 0x001F) * 0xFF / 0x1F - 0x80; + } + } + return converted_data; + } else { + return NULL; + } +} + +static void upload_gl_texture(GLenum gl_target, + const TextureShape s, + const uint8_t *texture_data, + const uint8_t *palette_data) +{ + ColorFormatInfo f = kelvin_color_format_map[s.color_format]; + + switch(gl_target) { + case GL_TEXTURE_1D: + assert(false); + break; + case GL_TEXTURE_RECTANGLE: { + /* Can't handle strides unaligned to pixels */ + assert(s.pitch % f.bytes_per_pixel == 0); + glPixelStorei(GL_UNPACK_ROW_LENGTH, + s.pitch / f.bytes_per_pixel); + + uint8_t *converted = convert_texture_data(s, texture_data, + palette_data, + s.width, s.height, 1, + s.pitch, 0); + + glTexImage2D(gl_target, 0, f.gl_internal_format, + s.width, s.height, 0, + f.gl_format, f.gl_type, + converted ? converted : texture_data); + + if (converted) { + g_free(converted); + } + + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + break; + } + case GL_TEXTURE_2D: + case GL_TEXTURE_CUBE_MAP_POSITIVE_X: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_X: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Y: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y: + case GL_TEXTURE_CUBE_MAP_POSITIVE_Z: + case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: { + + unsigned int width = s.width, height = s.height; + + int level; + for (level = 0; level < s.levels; level++) { + if (f.gl_format == 0) { /* compressed */ + + width = MAX(width, 4); height = MAX(height, 4); + + unsigned int block_size; + if (f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) { + block_size = 8; + } else { + block_size = 16; + } + + glCompressedTexImage2D(gl_target, level, f.gl_internal_format, + width, height, 0, + width/4 * height/4 * block_size, + texture_data); + + texture_data += width/4 * height/4 * block_size; + } else { + + width = MAX(width, 1); height = MAX(height, 1); + + unsigned int pitch = width * f.bytes_per_pixel; + uint8_t *unswizzled = (uint8_t*)g_malloc(height * pitch); + unswizzle_rect(texture_data, width, height, + unswizzled, pitch, f.bytes_per_pixel); + + uint8_t *converted = convert_texture_data(s, unswizzled, + palette_data, + width, height, 1, + pitch, 0); + + glTexImage2D(gl_target, level, f.gl_internal_format, + width, height, 0, + f.gl_format, f.gl_type, + converted ? converted : unswizzled); + + if (converted) { + g_free(converted); + } + g_free(unswizzled); + + texture_data += width * height * f.bytes_per_pixel; + } + + width /= 2; + height /= 2; + } + + break; + } + case GL_TEXTURE_3D: { + + unsigned int width = s.width, height = s.height, depth = s.depth; + + assert(f.gl_format != 0); /* FIXME: compressed not supported yet */ + assert(f.linear == false); + + int level; + for (level = 0; level < s.levels; level++) { + + unsigned int row_pitch = width * f.bytes_per_pixel; + unsigned int slice_pitch = row_pitch * height; + uint8_t *unswizzled = (uint8_t*)g_malloc(slice_pitch * depth); + unswizzle_box(texture_data, width, height, depth, unswizzled, + row_pitch, slice_pitch, f.bytes_per_pixel); + + uint8_t *converted = convert_texture_data(s, unswizzled, + palette_data, + width, height, depth, + row_pitch, slice_pitch); + + glTexImage3D(gl_target, level, f.gl_internal_format, + width, height, depth, 0, + f.gl_format, f.gl_type, + converted ? converted : unswizzled); + + if (converted) { + g_free(converted); + } + g_free(unswizzled); + + texture_data += width * height * depth * f.bytes_per_pixel; + + width /= 2; + height /= 2; + depth /= 2; + } + break; + } + default: + assert(false); + break; + } +} + +static TextureBinding* generate_texture(const TextureShape s, + const uint8_t *texture_data, + const uint8_t *palette_data) +{ + ColorFormatInfo f = kelvin_color_format_map[s.color_format]; + + /* Create a new opengl texture */ + GLuint gl_texture; + glGenTextures(1, &gl_texture); + + GLenum gl_target; + if (s.cubemap) { + assert(f.linear == false); + assert(s.dimensionality == 2); + gl_target = GL_TEXTURE_CUBE_MAP; + } else { + if (f.linear) { + /* linear textures use unnormalised texcoords. + * GL_TEXTURE_RECTANGLE_ARB conveniently also does, but + * does not allow repeat and mirror wrap modes. + * (or mipmapping, but xbox d3d says 'Non swizzled and non + * compressed textures cannot be mip mapped.') + * Not sure if that'll be an issue. */ + + /* FIXME: GLSL 330 provides us with textureSize()! Use that? */ + gl_target = GL_TEXTURE_RECTANGLE; + assert(s.dimensionality == 2); + } else { + switch(s.dimensionality) { + case 1: gl_target = GL_TEXTURE_1D; break; + case 2: gl_target = GL_TEXTURE_2D; break; + case 3: gl_target = GL_TEXTURE_3D; break; + default: + assert(false); + break; + } + } + } + + glBindTexture(gl_target, gl_texture); + + NV2A_GL_DLABEL(GL_TEXTURE, gl_texture, + "format: 0x%02X%s, %d dimensions%s, width: %d, height: %d, depth: %d", + s.color_format, f.linear ? "" : " (SZ)", + s.dimensionality, s.cubemap ? " (Cubemap)" : "", + s.width, s.height, s.depth); + + if (gl_target == GL_TEXTURE_CUBE_MAP) { + + size_t length = 0; + unsigned int w = s.width, h = s.height; + int level; + for (level = 0; level < s.levels; level++) { + /* FIXME: This is wrong for compressed textures and textures with 1x? non-square mipmaps */ + length += w * h * f.bytes_per_pixel; + w /= 2; + h /= 2; + } + + upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_X, + s, texture_data + 0 * length, palette_data); + upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_X, + s, texture_data + 1 * length, palette_data); + upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_Y, + s, texture_data + 2 * length, palette_data); + upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, + s, texture_data + 3 * length, palette_data); + upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_Z, + s, texture_data + 4 * length, palette_data); + upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, + s, texture_data + 5 * length, palette_data); + } else { + upload_gl_texture(gl_target, s, texture_data, palette_data); + } + + /* Linear textures don't support mipmapping */ + if (!f.linear) { + glTexParameteri(gl_target, GL_TEXTURE_BASE_LEVEL, + s.min_mipmap_level); + glTexParameteri(gl_target, GL_TEXTURE_MAX_LEVEL, + s.levels - 1); + } + + if (f.gl_swizzle_mask[0] != 0 || f.gl_swizzle_mask[1] != 0 + || f.gl_swizzle_mask[2] != 0 || f.gl_swizzle_mask[3] != 0) { + glTexParameteriv(gl_target, GL_TEXTURE_SWIZZLE_RGBA, + (const GLint *)f.gl_swizzle_mask); + } + + TextureBinding* ret = (TextureBinding *)g_malloc(sizeof(TextureBinding)); + ret->gl_target = gl_target; + ret->gl_texture = gl_texture; + ret->refcnt = 1; + return ret; +} + +/* functions for texture LRU cache */ +static guint texture_key_hash(gconstpointer key) +{ + const TextureKey *k = (const TextureKey *)key; + uint64_t state_hash = fnv_hash( + (const uint8_t*)&k->state, sizeof(TextureShape)); + return state_hash ^ k->data_hash; +} +static gboolean texture_key_equal(gconstpointer a, gconstpointer b) +{ + const TextureKey *ak = (const TextureKey *)a, *bk = (const TextureKey *)b; + return memcmp(&ak->state, &bk->state, sizeof(TextureShape)) == 0 + && ak->data_hash == bk->data_hash; +} +static gpointer texture_key_retrieve(gpointer key, gpointer user_data, GError **error) +{ + const TextureKey *k = (const TextureKey *)key; + TextureBinding *v = generate_texture(k->state, + k->texture_data, + k->palette_data); + if (error != NULL) { + *error = NULL; + } + return v; +} +static void texture_key_destroy(gpointer data) +{ + g_free(data); +} +static void texture_binding_destroy(gpointer data) +{ + TextureBinding *binding = (TextureBinding *)data; + assert(binding->refcnt > 0); + binding->refcnt--; + if (binding->refcnt == 0) { + glDeleteTextures(1, &binding->gl_texture); + g_free(binding); + } +} + +/* hash and equality for shader cache hash table */ +static guint shader_hash(gconstpointer key) +{ + return fnv_hash((const uint8_t *)key, sizeof(ShaderState)); +} +static gboolean shader_equal(gconstpointer a, gconstpointer b) +{ + const ShaderState *as = (const ShaderState *)a, *bs = (const ShaderState *)b; + return memcmp(as, bs, sizeof(ShaderState)) == 0; +} + +static unsigned int kelvin_map_stencil_op(uint32_t parameter) +{ + unsigned int op; + switch (parameter) { + case NV097_SET_STENCIL_OP_V_KEEP: + op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_KEEP; break; + case NV097_SET_STENCIL_OP_V_ZERO: + op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_ZERO; break; + case NV097_SET_STENCIL_OP_V_REPLACE: + op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_REPLACE; break; + case NV097_SET_STENCIL_OP_V_INCRSAT: + op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INCRSAT; break; + case NV097_SET_STENCIL_OP_V_DECRSAT: + op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_DECRSAT; break; + case NV097_SET_STENCIL_OP_V_INVERT: + op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INVERT; break; + case NV097_SET_STENCIL_OP_V_INCR: + op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INCR; break; + case NV097_SET_STENCIL_OP_V_DECR: + op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_DECR; break; + default: + assert(false); + break; + } + return op; +} + +static unsigned int kelvin_map_polygon_mode(uint32_t parameter) +{ + unsigned int mode; + switch (parameter) { + case NV097_SET_FRONT_POLYGON_MODE_V_POINT: + mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_POINT; break; + case NV097_SET_FRONT_POLYGON_MODE_V_LINE: + mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_LINE; break; + case NV097_SET_FRONT_POLYGON_MODE_V_FILL: + mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_FILL; break; + default: + assert(false); + break; + } + return mode; +} + +static unsigned int kelvin_map_texgen(uint32_t parameter, unsigned int channel) +{ + assert(channel < 4); + unsigned int texgen; + switch (parameter) { + case NV097_SET_TEXGEN_S_DISABLE: + texgen = NV_PGRAPH_CSV1_A_T0_S_DISABLE; break; + case NV097_SET_TEXGEN_S_EYE_LINEAR: + texgen = NV_PGRAPH_CSV1_A_T0_S_EYE_LINEAR; break; + case NV097_SET_TEXGEN_S_OBJECT_LINEAR: + texgen = NV_PGRAPH_CSV1_A_T0_S_OBJECT_LINEAR; break; + case NV097_SET_TEXGEN_S_SPHERE_MAP: + assert(channel < 2); + texgen = NV_PGRAPH_CSV1_A_T0_S_SPHERE_MAP; break; + case NV097_SET_TEXGEN_S_REFLECTION_MAP: + assert(channel < 3); + texgen = NV_PGRAPH_CSV1_A_T0_S_REFLECTION_MAP; break; + case NV097_SET_TEXGEN_S_NORMAL_MAP: + assert(channel < 3); + texgen = NV_PGRAPH_CSV1_A_T0_S_NORMAL_MAP; break; + default: + assert(false); + break; + } + return texgen; +} + +static uint64_t fnv_hash(const uint8_t *data, size_t len) +{ + return XXH64(data, len, 0); +} + +static uint64_t fast_hash(const uint8_t *data, size_t len, unsigned int samples) +{ + return XXH64(data, len, 0);; } diff --git a/hw/xbox/nv2a/nv2a_pmc.c b/hw/xbox/nv2a/nv2a_pmc.c new file mode 100644 index 0000000000..269954a306 --- /dev/null +++ b/hw/xbox/nv2a/nv2a_pmc.c @@ -0,0 +1,71 @@ +/* + * QEMU Geforce NV2A implementation + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018 Matt Borgerson + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +/* PMC - card master control */ +uint64_t pmc_read(void *opaque, hwaddr addr, unsigned int size) +{ + NV2AState *d = (NV2AState *)opaque; + + uint64_t r = 0; + switch (addr) { + case NV_PMC_BOOT_0: + /* chipset and stepping: + * NV2A, A02, Rev 0 */ + + r = 0x02A000A2; + break; + case NV_PMC_INTR_0: + /* Shows which functional units have pending IRQ */ + r = d->pmc.pending_interrupts; + break; + case NV_PMC_INTR_EN_0: + /* Selects which functional units can cause IRQs */ + r = d->pmc.enabled_interrupts; + break; + default: + break; + } + + reg_log_read(NV_PMC, addr, r); + return r; +} + +void pmc_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size) +{ + NV2AState *d = (NV2AState *)opaque; + + reg_log_write(NV_PMC, addr, val); + + switch (addr) { + case NV_PMC_INTR_0: + /* the bits of the interrupts to clear are wrtten */ + d->pmc.pending_interrupts &= ~val; + update_irq(d); + break; + case NV_PMC_INTR_EN_0: + d->pmc.enabled_interrupts = val; + update_irq(d); + break; + default: + break; + } +} + diff --git a/hw/xbox/nv2a/nv2a_pramdac.c b/hw/xbox/nv2a/nv2a_pramdac.c new file mode 100644 index 0000000000..f0c59af099 --- /dev/null +++ b/hw/xbox/nv2a/nv2a_pramdac.c @@ -0,0 +1,87 @@ +/* + * QEMU Geforce NV2A implementation + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018 Matt Borgerson + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +uint64_t pramdac_read(void *opaque, hwaddr addr, unsigned int size) +{ + NV2AState *d = (NV2AState *)opaque; + + uint64_t r = 0; + switch (addr & ~3) { + case NV_PRAMDAC_NVPLL_COEFF: + r = d->pramdac.core_clock_coeff; + break; + case NV_PRAMDAC_MPLL_COEFF: + r = d->pramdac.memory_clock_coeff; + break; + case NV_PRAMDAC_VPLL_COEFF: + r = d->pramdac.video_clock_coeff; + break; + case NV_PRAMDAC_PLL_TEST_COUNTER: + /* emulated PLLs locked instantly? */ + r = NV_PRAMDAC_PLL_TEST_COUNTER_VPLL2_LOCK + | NV_PRAMDAC_PLL_TEST_COUNTER_NVPLL_LOCK + | NV_PRAMDAC_PLL_TEST_COUNTER_MPLL_LOCK + | NV_PRAMDAC_PLL_TEST_COUNTER_VPLL_LOCK; + break; + default: + break; + } + + /* Surprisingly, QEMU doesn't handle unaligned access for you properly */ + r >>= 32 - 8 * size - 8 * (addr & 3); + + NV2A_DPRINTF("PRAMDAC: read %d [0x%" HWADDR_PRIx "] -> %llx\n", size, addr, r); + return r; +} + +void pramdac_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size) +{ + NV2AState *d = (NV2AState *)opaque; + uint32_t m, n, p; + + reg_log_write(NV_PRAMDAC, addr, val); + + switch (addr) { + case NV_PRAMDAC_NVPLL_COEFF: + d->pramdac.core_clock_coeff = val; + + m = val & NV_PRAMDAC_NVPLL_COEFF_MDIV; + n = (val & NV_PRAMDAC_NVPLL_COEFF_NDIV) >> 8; + p = (val & NV_PRAMDAC_NVPLL_COEFF_PDIV) >> 16; + + if (m == 0) { + d->pramdac.core_clock_freq = 0; + } else { + d->pramdac.core_clock_freq = (NV2A_CRYSTAL_FREQ * n) + / (1 << p) / m; + } + + break; + case NV_PRAMDAC_MPLL_COEFF: + d->pramdac.memory_clock_coeff = val; + break; + case NV_PRAMDAC_VPLL_COEFF: + d->pramdac.video_clock_coeff = val; + break; + default: + break; + } +} diff --git a/hw/xbox/nv2a/nv2a_prmcio.c b/hw/xbox/nv2a/nv2a_prmcio.c new file mode 100644 index 0000000000..a18d835115 --- /dev/null +++ b/hw/xbox/nv2a/nv2a_prmcio.c @@ -0,0 +1,55 @@ +/* + * QEMU Geforce NV2A implementation + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018 Matt Borgerson + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +/* PRMCIO - aliases VGA CRTC and attribute controller registers */ +uint64_t prmcio_read(void *opaque, + hwaddr addr, unsigned int size) +{ + NV2AState *d = opaque; + uint64_t r = vga_ioport_read(&d->vga, addr); + + reg_log_read(NV_PRMCIO, addr, r); + return r; +} +void prmcio_write(void *opaque, hwaddr addr, + uint64_t val, unsigned int size) +{ + NV2AState *d = opaque; + + reg_log_write(NV_PRMCIO, addr, val); + + switch (addr) { + case VGA_ATT_W: + /* Cromwell sets attrs without enabling VGA_AR_ENABLE_DISPLAY + * (which should result in a blank screen). + * Either nvidia's hardware is lenient or it is set through + * something else. The former seems more likely. + */ + if (d->vga.ar_flip_flop == 0) { + val |= VGA_AR_ENABLE_DISPLAY; + } + break; + default: + break; + } + + vga_ioport_write(&d->vga, addr, val); +} diff --git a/hw/xbox/nv2a.h b/hw/xbox/nv2a/nv2a_prmvio.c similarity index 54% rename from hw/xbox/nv2a.h rename to hw/xbox/nv2a/nv2a_prmvio.c index 24b665aad7..3ace37b5c6 100644 --- a/hw/xbox/nv2a.h +++ b/hw/xbox/nv2a/nv2a_prmvio.c @@ -2,6 +2,8 @@ * QEMU Geforce NV2A implementation * * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018 Matt Borgerson * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -17,9 +19,22 @@ * along with this program; if not, see . */ -#ifndef HW_NV2A_H -#define HW_NV2A_H +/* PRMVIO - aliases VGA sequencer and graphics controller registers */ +uint64_t prmvio_read(void *opaque, + hwaddr addr, unsigned int size) +{ + NV2AState *d = opaque; + uint64_t r = vga_ioport_read(&d->vga, addr); -void nv2a_init(PCIBus *bus, int devfn, MemoryRegion *ram); + reg_log_read(NV_PRMVIO, addr, r); + return r; +} +void prmvio_write(void *opaque, hwaddr addr, + uint64_t val, unsigned int size) +{ + NV2AState *d = opaque; -#endif + reg_log_write(NV_PRMVIO, addr, val); + + vga_ioport_write(&d->vga, addr, val); +} diff --git a/hw/xbox/nv2a_psh.c b/hw/xbox/nv2a/nv2a_psh.c similarity index 99% rename from hw/xbox/nv2a_psh.c rename to hw/xbox/nv2a/nv2a_psh.c index 40775b576e..4c39748310 100644 --- a/hw/xbox/nv2a_psh.c +++ b/hw/xbox/nv2a/nv2a_psh.c @@ -25,6 +25,8 @@ * along with this program; if not, see . */ +#include "qemu/osdep.h" + #include #include #include @@ -33,8 +35,8 @@ #include "qapi/qmp/qstring.h" -#include "hw/xbox/nv2a_shaders_common.h" -#include "hw/xbox/nv2a_psh.h" +#include "nv2a_shaders_common.h" +#include "nv2a_psh.h" /* * This implements translation of register combiners into glsl diff --git a/hw/xbox/nv2a_psh.h b/hw/xbox/nv2a/nv2a_psh.h similarity index 100% rename from hw/xbox/nv2a_psh.h rename to hw/xbox/nv2a/nv2a_psh.h diff --git a/hw/xbox/nv2a/nv2a_ptimer.c b/hw/xbox/nv2a/nv2a_ptimer.c new file mode 100644 index 0000000000..564a49085f --- /dev/null +++ b/hw/xbox/nv2a/nv2a_ptimer.c @@ -0,0 +1,89 @@ +/* + * QEMU Geforce NV2A implementation + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018 Matt Borgerson + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +/* PIMTER - time measurement and time-based alarms */ +static uint64_t ptimer_get_clock(NV2AState *d) +{ + return muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + d->pramdac.core_clock_freq * d->ptimer.numerator, + NANOSECONDS_PER_SECOND * d->ptimer.denominator); +} + +uint64_t ptimer_read(void *opaque, hwaddr addr, unsigned int size) +{ + NV2AState *d = opaque; + + uint64_t r = 0; + switch (addr) { + case NV_PTIMER_INTR_0: + r = d->ptimer.pending_interrupts; + break; + case NV_PTIMER_INTR_EN_0: + r = d->ptimer.enabled_interrupts; + break; + case NV_PTIMER_NUMERATOR: + r = d->ptimer.numerator; + break; + case NV_PTIMER_DENOMINATOR: + r = d->ptimer.denominator; + break; + case NV_PTIMER_TIME_0: + r = (ptimer_get_clock(d) & 0x7ffffff) << 5; + break; + case NV_PTIMER_TIME_1: + r = (ptimer_get_clock(d) >> 27) & 0x1fffffff; + break; + default: + break; + } + + reg_log_read(NV_PTIMER, addr, r); + return r; +} + +void ptimer_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size) +{ + NV2AState *d = opaque; + + reg_log_write(NV_PTIMER, addr, val); + + switch (addr) { + case NV_PTIMER_INTR_0: + d->ptimer.pending_interrupts &= ~val; + update_irq(d); + break; + case NV_PTIMER_INTR_EN_0: + d->ptimer.enabled_interrupts = val; + update_irq(d); + break; + case NV_PTIMER_DENOMINATOR: + d->ptimer.denominator = val; + break; + case NV_PTIMER_NUMERATOR: + d->ptimer.numerator = val; + break; + case NV_PTIMER_ALARM_0: + d->ptimer.alarm_time = val; + break; + default: + break; + } +} diff --git a/hw/xbox/nv2a/nv2a_pvideo.c b/hw/xbox/nv2a/nv2a_pvideo.c new file mode 100644 index 0000000000..a0777f28c9 --- /dev/null +++ b/hw/xbox/nv2a/nv2a_pvideo.c @@ -0,0 +1,73 @@ +/* + * QEMU Geforce NV2A implementation + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018 Matt Borgerson + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +static void pvideo_vga_invalidate(NV2AState *d) +{ + int y1 = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], + NV_PVIDEO_POINT_OUT_Y); + int y2 = y1 + GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], + NV_PVIDEO_SIZE_OUT_HEIGHT); + NV2A_DPRINTF("pvideo_vga_invalidate %d %d\n", y1, y2); + vga_invalidate_scanlines(&d->vga, y1, y2); +} + +uint64_t pvideo_read(void *opaque, + hwaddr addr, unsigned int size) +{ + NV2AState *d = opaque; + + uint64_t r = 0; + switch (addr) { + case NV_PVIDEO_STOP: + r = 0; + break; + default: + r = d->pvideo.regs[addr]; + break; + } + + reg_log_read(NV_PVIDEO, addr, r); + return r; +} + +void pvideo_write(void *opaque, hwaddr addr, + uint64_t val, unsigned int size) +{ + NV2AState *d = opaque; + + reg_log_write(NV_PVIDEO, addr, val); + + switch (addr) { + case NV_PVIDEO_BUFFER: + d->pvideo.regs[addr] = val; + // d->vga.enable_overlay = true; + pvideo_vga_invalidate(d); + break; + case NV_PVIDEO_STOP: + d->pvideo.regs[NV_PVIDEO_BUFFER] = 0; + // d->vga.enable_overlay = false; + pvideo_vga_invalidate(d); + break; + default: + d->pvideo.regs[addr] = val; + break; + } +} diff --git a/hw/xbox/nv2a_shaders.c b/hw/xbox/nv2a/nv2a_shaders.c similarity index 97% rename from hw/xbox/nv2a_shaders.c rename to hw/xbox/nv2a/nv2a_shaders.c index 9eb37ec5ba..f568c6e48a 100644 --- a/hw/xbox/nv2a_shaders.c +++ b/hw/xbox/nv2a/nv2a_shaders.c @@ -18,10 +18,56 @@ * along with this program; if not, see . */ +#include "qemu/osdep.h" #include "qemu-common.h" -#include "hw/xbox/nv2a_debug.h" -#include "hw/xbox/nv2a_shaders_common.h" -#include "hw/xbox/nv2a_shaders.h" +#include "nv2a_debug.h" +#include "nv2a_shaders_common.h" +#include "nv2a_shaders.h" + +void qstring_append_fmt(QString *qstring, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + qstring_append_va(qstring, fmt, ap); + va_end(ap); +} + +QString *qstring_from_fmt(const char *fmt, ...) +{ + QString *ret = qstring_new(); + va_list ap; + va_start(ap, fmt); + qstring_append_va(ret, fmt, ap); + va_end(ap); + + return ret; +} + +void qstring_append_va(QString *qstring, const char *fmt, va_list va) +{ + char scratch[256]; + + va_list ap; + va_copy(ap, va); + const int len = vsnprintf(scratch, sizeof(scratch), fmt, ap); + va_end(ap); + + if (len == 0) { + return; + } else if (len < sizeof(scratch)) { + qstring_append(qstring, scratch); + return; + } + + /* overflowed out scratch buffer, alloc and try again */ + char *buf = g_malloc(len + 1); + va_copy(ap, va); + vsnprintf(buf, len + 1, fmt, ap); + va_end(ap); + + qstring_append(qstring, buf); + g_free(buf); +} static QString* generate_geometry_shader( enum ShaderPolygonMode polygon_front_mode, diff --git a/hw/xbox/nv2a_shaders.h b/hw/xbox/nv2a/nv2a_shaders.h similarity index 100% rename from hw/xbox/nv2a_shaders.h rename to hw/xbox/nv2a/nv2a_shaders.h diff --git a/hw/xbox/nv2a_shaders_common.h b/hw/xbox/nv2a/nv2a_shaders_common.h similarity index 87% rename from hw/xbox/nv2a_shaders_common.h rename to hw/xbox/nv2a/nv2a_shaders_common.h index c8e1a962ff..d329a1bba9 100644 --- a/hw/xbox/nv2a_shaders_common.h +++ b/hw/xbox/nv2a/nv2a_shaders_common.h @@ -34,4 +34,9 @@ " vec4 T3;\n" \ "};\n" + +void qstring_append_fmt(QString *qstring, const char *fmt, ...); +QString *qstring_from_fmt(const char *fmt, ...); +void qstring_append_va(QString *qstring, const char *fmt, va_list va); + #endif diff --git a/hw/xbox/nv2a/nv2a_stubs.c b/hw/xbox/nv2a/nv2a_stubs.c new file mode 100644 index 0000000000..c2c5ec46ac --- /dev/null +++ b/hw/xbox/nv2a/nv2a_stubs.c @@ -0,0 +1,118 @@ +/* + * QEMU Geforce NV2A implementation + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018 Matt Borgerson + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +uint64_t prma_read(void *opaque, + hwaddr addr, unsigned int size) +{ + reg_log_read(NV_PRMA, addr, 0); + return 0; +} +void prma_write(void *opaque, hwaddr addr, + uint64_t val, unsigned int size) +{ + reg_log_write(NV_PRMA, addr, val); +} + +uint64_t pcounter_read(void *opaque, + hwaddr addr, unsigned int size) +{ + reg_log_read(NV_PCOUNTER, addr, 0); + return 0; +} +void pcounter_write(void *opaque, hwaddr addr, + uint64_t val, unsigned int size) +{ + reg_log_write(NV_PCOUNTER, addr, val); +} + +uint64_t pvpe_read(void *opaque, + hwaddr addr, unsigned int size) +{ + reg_log_read(NV_PVPE, addr, 0); + return 0; +} +void pvpe_write(void *opaque, hwaddr addr, + uint64_t val, unsigned int size) +{ + reg_log_write(NV_PVPE, addr, val); +} + +uint64_t ptv_read(void *opaque, + hwaddr addr, unsigned int size) +{ + reg_log_read(NV_PTV, addr, 0); + return 0; +} +void ptv_write(void *opaque, hwaddr addr, + uint64_t val, unsigned int size) +{ + reg_log_write(NV_PTV, addr, val); +} + +uint64_t prmfb_read(void *opaque, + hwaddr addr, unsigned int size) +{ + reg_log_read(NV_PRMFB, addr, 0); + return 0; +} +void prmfb_write(void *opaque, hwaddr addr, + uint64_t val, unsigned int size) +{ + reg_log_write(NV_PRMFB, addr, val); +} + +uint64_t prmdio_read(void *opaque, + hwaddr addr, unsigned int size) +{ + reg_log_read(NV_PRMDIO, addr, 0); + return 0; +} +void prmdio_write(void *opaque, hwaddr addr, + uint64_t val, unsigned int size) +{ + reg_log_write(NV_PRMDIO, addr, val); +} + +uint64_t pstraps_read(void *opaque, + hwaddr addr, unsigned int size) +{ + reg_log_read(NV_PSTRAPS, addr, 0); + return 0; +} +void pstraps_write(void *opaque, hwaddr addr, + uint64_t val, unsigned int size) +{ + reg_log_write(NV_PSTRAPS, addr, val); +} + +/* PRAMIN - RAMIN access */ +/* +uint64_t pramin_read(void *opaque, + hwaddr addr, unsigned int size) +{ + NV2A_DPRINTF("nv2a PRAMIN: read [0x%" HWADDR_PRIx "] -> 0x%" HWADDR_PRIx "\n", addr, r); + return 0; +} +void pramin_write(void *opaque, hwaddr addr, + uint64_t val, unsigned int size) +{ + NV2A_DPRINTF("nv2a PRAMIN: [0x%" HWADDR_PRIx "] = 0x%02llx\n", addr, val); +}*/ diff --git a/hw/xbox/nv2a/nv2a_user.c b/hw/xbox/nv2a/nv2a_user.c new file mode 100644 index 0000000000..5f931ab495 --- /dev/null +++ b/hw/xbox/nv2a/nv2a_user.c @@ -0,0 +1,95 @@ +/* + * QEMU Geforce NV2A implementation + * + * Copyright (c) 2012 espes + * Copyright (c) 2015 Jannik Vogel + * Copyright (c) 2018 Matt Borgerson + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +/* USER - PFIFO MMIO and DMA submission area */ +uint64_t user_read(void *opaque, hwaddr addr, unsigned int size) +{ + NV2AState *d = (NV2AState *)opaque; + + unsigned int channel_id = addr >> 16; + assert(channel_id < NV2A_NUM_CHANNELS); + + ChannelControl *control = &d->user.channel_control[channel_id]; + + uint32_t channel_modes = d->pfifo.regs[NV_PFIFO_MODE]; + + uint64_t r = 0; + if (channel_modes & (1 << channel_id)) { + /* DMA Mode */ + switch (addr & 0xFFFF) { + case NV_USER_DMA_PUT: + r = control->dma_put; + break; + case NV_USER_DMA_GET: + r = control->dma_get; + break; + case NV_USER_REF: + r = control->ref; + break; + default: + break; + } + } else { + /* PIO Mode */ + assert(false); + } + + reg_log_read(NV_USER, addr, r); + return r; +} + +void user_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size) +{ + NV2AState *d = (NV2AState *)opaque; + + reg_log_write(NV_USER, addr, val); + + unsigned int channel_id = addr >> 16; + assert(channel_id < NV2A_NUM_CHANNELS); + + ChannelControl *control = &d->user.channel_control[channel_id]; + + uint32_t channel_modes = d->pfifo.regs[NV_PFIFO_MODE]; + if (channel_modes & (1 << channel_id)) { + /* DMA Mode */ + switch (addr & 0xFFFF) { + case NV_USER_DMA_PUT: + control->dma_put = val; + + if (d->pfifo.cache1.push_enabled) { + pfifo_run_pusher(d); + } + break; + case NV_USER_DMA_GET: + control->dma_get = val; + break; + case NV_USER_REF: + control->ref = val; + break; + default: + break; + } + } else { + /* PIO Mode */ + assert(false); + } + +} diff --git a/hw/xbox/nv2a_vsh.c b/hw/xbox/nv2a/nv2a_vsh.c similarity index 99% rename from hw/xbox/nv2a_vsh.c rename to hw/xbox/nv2a/nv2a_vsh.c index bf3f5f3bf3..c413fe5de9 100644 --- a/hw/xbox/nv2a_vsh.c +++ b/hw/xbox/nv2a/nv2a_vsh.c @@ -25,13 +25,15 @@ * along with this program; if not, see . */ +#include "qemu/osdep.h" + #include #include #include #include -#include "hw/xbox/nv2a_shaders_common.h" -#include "hw/xbox/nv2a_vsh.h" +#include "nv2a_shaders_common.h" +#include "nv2a_vsh.h" #define VSH_D3DSCM_CORRECTION 96 diff --git a/hw/xbox/nv2a_vsh.h b/hw/xbox/nv2a/nv2a_vsh.h similarity index 100% rename from hw/xbox/nv2a_vsh.h rename to hw/xbox/nv2a/nv2a_vsh.h diff --git a/hw/xbox/swizzle.c b/hw/xbox/nv2a/swizzle.c similarity index 99% rename from hw/xbox/swizzle.c rename to hw/xbox/nv2a/swizzle.c index b3180d95f7..350e82722f 100644 --- a/hw/xbox/swizzle.c +++ b/hw/xbox/nv2a/swizzle.c @@ -26,7 +26,7 @@ #include #include "qemu/osdep.h" -#include "hw/xbox/swizzle.h" +#include "swizzle.h" /* This should be pretty straightforward. * It creates a bit pattern like ..zyxzyxzyx from ..xxx, ..yyy and ..zzz diff --git a/hw/xbox/swizzle.h b/hw/xbox/nv2a/swizzle.h similarity index 100% rename from hw/xbox/swizzle.h rename to hw/xbox/nv2a/swizzle.h diff --git a/hw/xbox/nv2a/xxhash.c b/hw/xbox/nv2a/xxhash.c new file mode 100644 index 0000000000..da06ea72bf --- /dev/null +++ b/hw/xbox/nv2a/xxhash.c @@ -0,0 +1,1029 @@ +/* +* xxHash - Fast Hash algorithm +* Copyright (C) 2012-2016, Yann Collet +* +* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above +* copyright notice, this list of conditions and the following disclaimer +* in the documentation and/or other materials provided with the +* distribution. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +* You can contact the author at : +* - xxHash homepage: http://www.xxhash.com +* - xxHash source repository : https://github.com/Cyan4973/xxHash +*/ + + +/* ************************************* +* Tuning parameters +***************************************/ +/*!XXH_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. + * It can generate buggy code on targets which do not support unaligned memory accesses. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://stackoverflow.com/a/32095106/646947 for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ + || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \ + || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define XXH_FORCE_MEMORY_ACCESS 2 +# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ + || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ + || defined(__ARM_ARCH_7S__) )) +# define XXH_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +/*!XXH_ACCEPT_NULL_INPUT_POINTER : + * If input pointer is NULL, xxHash default behavior is to dereference it, triggering a segfault. + * When this macro is enabled, xxHash actively checks input for null pointer. + * It it is, result for null input pointers is the same as a null-length input. + */ +#ifndef XXH_ACCEPT_NULL_INPUT_POINTER /* can be defined externally */ +# define XXH_ACCEPT_NULL_INPUT_POINTER 0 +#endif + +/*!XXH_FORCE_NATIVE_FORMAT : + * By default, xxHash library provides endian-independent Hash values, based on little-endian convention. + * Results are therefore identical for little-endian and big-endian CPU. + * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. + * Should endian-independence be of no importance for your application, you may set the #define below to 1, + * to improve speed for Big-endian CPU. + * This option has no impact on Little_Endian CPU. + */ +#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */ +# define XXH_FORCE_NATIVE_FORMAT 0 +#endif + +/*!XXH_FORCE_ALIGN_CHECK : + * This is a minor performance trick, only useful with lots of very small keys. + * It means : check for aligned/unaligned input. + * The check costs one initial branch per hash; + * set it to 0 when the input is guaranteed to be aligned, + * or when alignment doesn't matter for performance. + */ +#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ +# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +# define XXH_FORCE_ALIGN_CHECK 0 +# else +# define XXH_FORCE_ALIGN_CHECK 1 +# endif +#endif + + +/* ************************************* +* Includes & Memory related functions +***************************************/ +/*! Modify the local functions below should you wish to use some other memory routines +* for malloc(), free() */ +#include +static void* XXH_malloc(size_t s) { return malloc(s); } +static void XXH_free (void* p) { free(p); } +/*! and for memcpy() */ +#include +static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } + +#include /* assert */ + +#define XXH_STATIC_LINKING_ONLY +#include "xxhash.h" + + +/* ************************************* +* Compiler Specific Options +***************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# define FORCE_INLINE static __forceinline +#else +# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +# else +# define FORCE_INLINE static +# endif /* __STDC_VERSION__ */ +#endif + + +/* ************************************* +* Basic Types +***************************************/ +#ifndef MEM_MODULE +# if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; +# else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; +# endif +#endif + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U32 u32; } __attribute__((packed)) unalign; +static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } + +#else + +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ +static U32 XXH_read32(const void* memPtr) +{ + U32 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + + +/* **************************************** +* Compiler-specific Functions and Macros +******************************************/ +#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ +#if defined(_MSC_VER) +# define XXH_rotl32(x,r) _rotl(x,r) +# define XXH_rotl64(x,r) _rotl64(x,r) +#else +# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) +# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) +#endif + +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap32 _byteswap_ulong +#elif XXH_GCC_VERSION >= 403 +# define XXH_swap32 __builtin_bswap32 +#else +static U32 XXH_swap32 (U32 x) +{ + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff ); +} +#endif + + +/* ************************************* +* Architecture Macros +***************************************/ +typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; + +/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ +#ifndef XXH_CPU_LITTLE_ENDIAN +static int XXH_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} +# define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian() +#endif + + +/* *************************** +* Memory reads +*****************************/ +typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; + +FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); + else + return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); +} + +FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE32_align(ptr, endian, XXH_unaligned); +} + +static U32 XXH_readBE32(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); +} + + +/* ************************************* +* Macros +***************************************/ +#define XXH_STATIC_ASSERT(c) { enum { XXH_sa = 1/(int)(!!(c)) }; } /* use after variable declarations */ +XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } + + +/* ******************************************************************* +* 32-bit hash functions +*********************************************************************/ +static const U32 PRIME32_1 = 2654435761U; +static const U32 PRIME32_2 = 2246822519U; +static const U32 PRIME32_3 = 3266489917U; +static const U32 PRIME32_4 = 668265263U; +static const U32 PRIME32_5 = 374761393U; + +static U32 XXH32_round(U32 seed, U32 input) +{ + seed += input * PRIME32_2; + seed = XXH_rotl32(seed, 13); + seed *= PRIME32_1; + return seed; +} + +/* mix all bits */ +static U32 XXH32_avalanche(U32 h32) +{ + h32 ^= h32 >> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + return(h32); +} + +#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) + +static U32 +XXH32_finalize(U32 h32, const void* ptr, size_t len, + XXH_endianess endian, XXH_alignment align) + +{ + const BYTE* p = (const BYTE*)ptr; +#define PROCESS1 \ + h32 += (*p) * PRIME32_5; \ + p++; \ + h32 = XXH_rotl32(h32, 11) * PRIME32_1 ; + +#define PROCESS4 \ + h32 += XXH_get32bits(p) * PRIME32_3; \ + p+=4; \ + h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; + + switch(len&15) /* or switch(bEnd - p) */ + { + case 12: PROCESS4; + /* fallthrough */ + case 8: PROCESS4; + /* fallthrough */ + case 4: PROCESS4; + return XXH32_avalanche(h32); + + case 13: PROCESS4; + /* fallthrough */ + case 9: PROCESS4; + /* fallthrough */ + case 5: PROCESS4; + PROCESS1; + return XXH32_avalanche(h32); + + case 14: PROCESS4; + /* fallthrough */ + case 10: PROCESS4; + /* fallthrough */ + case 6: PROCESS4; + PROCESS1; + PROCESS1; + return XXH32_avalanche(h32); + + case 15: PROCESS4; + /* fallthrough */ + case 11: PROCESS4; + /* fallthrough */ + case 7: PROCESS4; + /* fallthrough */ + case 3: PROCESS1; + /* fallthrough */ + case 2: PROCESS1; + /* fallthrough */ + case 1: PROCESS1; + /* fallthrough */ + case 0: return XXH32_avalanche(h32); + } + assert(0); + return h32; /* reaching this point is deemed impossible */ +} + + +FORCE_INLINE U32 +XXH32_endian_align(const void* input, size_t len, U32 seed, + XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* bEnd = p + len; + U32 h32; + +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + if (p==NULL) { + len=0; + bEnd=p=(const BYTE*)(size_t)16; + } +#endif + + if (len>=16) { + const BYTE* const limit = bEnd - 15; + U32 v1 = seed + PRIME32_1 + PRIME32_2; + U32 v2 = seed + PRIME32_2; + U32 v3 = seed + 0; + U32 v4 = seed - PRIME32_1; + + do { + v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4; + v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4; + v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4; + v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4; + } while (p < limit); + + h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); + } else { + h32 = seed + PRIME32_5; + } + + h32 += (U32)len; + + return XXH32_finalize(h32, p, len&15, endian, align); +} + + +XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH32_state_t state; + XXH32_reset(&state, seed); + XXH32_update(&state, input, len); + return XXH32_digest(&state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } } + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + + + +/*====== Hash streaming ======*/ + +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) +{ + return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState) +{ + memcpy(dstState, srcState, sizeof(*dstState)); +} + +XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) +{ + XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + memset(&state, 0, sizeof(state)); + state.v1 = seed + PRIME32_1 + PRIME32_2; + state.v2 = seed + PRIME32_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME32_1; + /* do not write into reserved, planned to be removed in a future version */ + memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved)); + return XXH_OK; +} + + +FORCE_INLINE +XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + + if (input==NULL) +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + return XXH_OK; +#else + return XXH_ERROR; +#endif + + state->total_len_32 += (unsigned)len; + state->large_len |= (len>=16) | (state->total_len_32>=16); + + if (state->memsize + len < 16) { /* fill in tmp buffer */ + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); + state->memsize += (unsigned)len; + return XXH_OK; + } + + if (state->memsize) { /* some data left from previous update */ + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); + { const U32* p32 = state->mem32; + state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++; + state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++; + state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++; + state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); + } + p += 16-state->memsize; + state->memsize = 0; + } + + if (p <= bEnd-16) { + const BYTE* const limit = bEnd - 16; + U32 v1 = state->v1; + U32 v2 = state->v2; + U32 v3 = state->v3; + U32 v4 = state->v4; + + do { + v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4; + v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4; + v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4; + v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + + return XXH_OK; +} + + +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH32_update_endian(state_in, input, len, XXH_bigEndian); +} + + +FORCE_INLINE U32 +XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) +{ + U32 h32; + + if (state->large_len) { + h32 = XXH_rotl32(state->v1, 1) + + XXH_rotl32(state->v2, 7) + + XXH_rotl32(state->v3, 12) + + XXH_rotl32(state->v4, 18); + } else { + h32 = state->v3 /* == seed */ + PRIME32_5; + } + + h32 += state->total_len_32; + + return XXH32_finalize(h32, state->mem32, state->memsize, endian, XXH_aligned); +} + + +XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_digest_endian(state_in, XXH_littleEndian); + else + return XXH32_digest_endian(state_in, XXH_bigEndian); +} + + +/*====== Canonical representation ======*/ + +/*! Default XXH result types are basic unsigned 32 and 64 bits. +* The canonical representation follows human-readable write convention, aka big-endian (large digits first). +* These functions allow transformation of hash result into and from its canonical format. +* This way, hash values can be written into a file or buffer, remaining comparable across different systems. +*/ + +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); + memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) +{ + return XXH_readBE32(src); +} + + +#ifndef XXH_NO_LONG_LONG + +/* ******************************************************************* +* 64-bit hash functions +*********************************************************************/ + +/*====== Memory access ======*/ + +#ifndef MEM_MODULE +# define MEM_MODULE +# if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint64_t U64; +# else + /* if compiler doesn't support unsigned long long, replace by another 64-bit type */ + typedef unsigned long long U64; +# endif +#endif + + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign64; +static U64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; } + +#else + +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ + +static U64 XXH_read64(const void* memPtr) +{ + U64 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap64 _byteswap_uint64 +#elif XXH_GCC_VERSION >= 403 +# define XXH_swap64 __builtin_bswap64 +#else +static U64 XXH_swap64 (U64 x) +{ + return ((x << 56) & 0xff00000000000000ULL) | + ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | + ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL); +} +#endif + +FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); + else + return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); +} + +FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE64_align(ptr, endian, XXH_unaligned); +} + +static U64 XXH_readBE64(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); +} + + +/*====== xxh64 ======*/ + +static const U64 PRIME64_1 = 11400714785074694791ULL; +static const U64 PRIME64_2 = 14029467366897019727ULL; +static const U64 PRIME64_3 = 1609587929392839161ULL; +static const U64 PRIME64_4 = 9650029242287828579ULL; +static const U64 PRIME64_5 = 2870177450012600261ULL; + +static U64 XXH64_round(U64 acc, U64 input) +{ + acc += input * PRIME64_2; + acc = XXH_rotl64(acc, 31); + acc *= PRIME64_1; + return acc; +} + +static U64 XXH64_mergeRound(U64 acc, U64 val) +{ + val = XXH64_round(0, val); + acc ^= val; + acc = acc * PRIME64_1 + PRIME64_4; + return acc; +} + +static U64 XXH64_avalanche(U64 h64) +{ + h64 ^= h64 >> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + return h64; +} + + +#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) + +static U64 +XXH64_finalize(U64 h64, const void* ptr, size_t len, + XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)ptr; + +#define PROCESS1_64 \ + h64 ^= (*p) * PRIME64_5; \ + p++; \ + h64 = XXH_rotl64(h64, 11) * PRIME64_1; + +#define PROCESS4_64 \ + h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; \ + p+=4; \ + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + +#define PROCESS8_64 { \ + U64 const k1 = XXH64_round(0, XXH_get64bits(p)); \ + p+=8; \ + h64 ^= k1; \ + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; \ +} + + switch(len&31) { + case 24: PROCESS8_64; + /* fallthrough */ + case 16: PROCESS8_64; + /* fallthrough */ + case 8: PROCESS8_64; + return XXH64_avalanche(h64); + + case 28: PROCESS8_64; + /* fallthrough */ + case 20: PROCESS8_64; + /* fallthrough */ + case 12: PROCESS8_64; + /* fallthrough */ + case 4: PROCESS4_64; + return XXH64_avalanche(h64); + + case 25: PROCESS8_64; + /* fallthrough */ + case 17: PROCESS8_64; + /* fallthrough */ + case 9: PROCESS8_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 29: PROCESS8_64; + /* fallthrough */ + case 21: PROCESS8_64; + /* fallthrough */ + case 13: PROCESS8_64; + /* fallthrough */ + case 5: PROCESS4_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 26: PROCESS8_64; + /* fallthrough */ + case 18: PROCESS8_64; + /* fallthrough */ + case 10: PROCESS8_64; + PROCESS1_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 30: PROCESS8_64; + /* fallthrough */ + case 22: PROCESS8_64; + /* fallthrough */ + case 14: PROCESS8_64; + /* fallthrough */ + case 6: PROCESS4_64; + PROCESS1_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 27: PROCESS8_64; + /* fallthrough */ + case 19: PROCESS8_64; + /* fallthrough */ + case 11: PROCESS8_64; + PROCESS1_64; + PROCESS1_64; + PROCESS1_64; + return XXH64_avalanche(h64); + + case 31: PROCESS8_64; + /* fallthrough */ + case 23: PROCESS8_64; + /* fallthrough */ + case 15: PROCESS8_64; + /* fallthrough */ + case 7: PROCESS4_64; + /* fallthrough */ + case 3: PROCESS1_64; + /* fallthrough */ + case 2: PROCESS1_64; + /* fallthrough */ + case 1: PROCESS1_64; + /* fallthrough */ + case 0: return XXH64_avalanche(h64); + } + + /* impossible to reach */ + assert(0); + return 0; /* unreachable, but some compilers complain without it */ +} + +FORCE_INLINE U64 +XXH64_endian_align(const void* input, size_t len, U64 seed, + XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* bEnd = p + len; + U64 h64; + +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + if (p==NULL) { + len=0; + bEnd=p=(const BYTE*)(size_t)32; + } +#endif + + if (len>=32) { + const BYTE* const limit = bEnd - 32; + U64 v1 = seed + PRIME64_1 + PRIME64_2; + U64 v2 = seed + PRIME64_2; + U64 v3 = seed + 0; + U64 v4 = seed - PRIME64_1; + + do { + v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8; + v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8; + v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8; + v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8; + } while (p<=limit); + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + + } else { + h64 = seed + PRIME64_5; + } + + h64 += (U64) len; + + return XXH64_finalize(h64, p, len, endian, align); +} + + +XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH64_state_t state; + XXH64_reset(&state, seed); + XXH64_update(&state, input, len); + return XXH64_digest(&state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } } + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + +/*====== Hash Streaming ======*/ + +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) +{ + return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState) +{ + memcpy(dstState, srcState, sizeof(*dstState)); +} + +XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) +{ + XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + memset(&state, 0, sizeof(state)); + state.v1 = seed + PRIME64_1 + PRIME64_2; + state.v2 = seed + PRIME64_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME64_1; + /* do not write into reserved, planned to be removed in a future version */ + memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved)); + return XXH_OK; +} + +FORCE_INLINE +XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + + if (input==NULL) +#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) + return XXH_OK; +#else + return XXH_ERROR; +#endif + + state->total_len += len; + + if (state->memsize + len < 32) { /* fill in tmp buffer */ + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); + state->memsize += (U32)len; + return XXH_OK; + } + + if (state->memsize) { /* tmp buffer is full */ + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); + state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian)); + state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian)); + state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian)); + state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian)); + p += 32-state->memsize; + state->memsize = 0; + } + + if (p+32 <= bEnd) { + const BYTE* const limit = bEnd - 32; + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + do { + v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8; + v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8; + v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8; + v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH64_update_endian(state_in, input, len, XXH_bigEndian); +} + +FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) +{ + U64 h64; + + if (state->total_len >= 32) { + U64 const v1 = state->v1; + U64 const v2 = state->v2; + U64 const v3 = state->v3; + U64 const v4 = state->v4; + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + } else { + h64 = state->v3 /*seed*/ + PRIME64_5; + } + + h64 += (U64) state->total_len; + + return XXH64_finalize(h64, state->mem64, (size_t)state->total_len, endian, XXH_aligned); +} + +XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_digest_endian(state_in, XXH_littleEndian); + else + return XXH64_digest_endian(state_in, XXH_bigEndian); +} + + +/*====== Canonical representation ======*/ + +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); + memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) +{ + return XXH_readBE64(src); +} + +#endif /* XXH_NO_LONG_LONG */ diff --git a/hw/xbox/nv2a/xxhash.h b/hw/xbox/nv2a/xxhash.h new file mode 100644 index 0000000000..d6bad94335 --- /dev/null +++ b/hw/xbox/nv2a/xxhash.h @@ -0,0 +1,328 @@ +/* + xxHash - Extremely Fast Hash algorithm + Header File + Copyright (C) 2012-2016, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - xxHash source repository : https://github.com/Cyan4973/xxHash +*/ + +/* Notice extracted from xxHash homepage : + +xxHash is an extremely fast Hash algorithm, running at RAM speed limits. +It also successfully passes all tests from the SMHasher suite. + +Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) + +Name Speed Q.Score Author +xxHash 5.4 GB/s 10 +CrapWow 3.2 GB/s 2 Andrew +MumurHash 3a 2.7 GB/s 10 Austin Appleby +SpookyHash 2.0 GB/s 10 Bob Jenkins +SBox 1.4 GB/s 9 Bret Mulvey +Lookup3 1.2 GB/s 9 Bob Jenkins +SuperFastHash 1.2 GB/s 1 Paul Hsieh +CityHash64 1.05 GB/s 10 Pike & Alakuijala +FNV 0.55 GB/s 5 Fowler, Noll, Vo +CRC32 0.43 GB/s 9 +MD5-32 0.33 GB/s 10 Ronald L. Rivest +SHA1-32 0.28 GB/s 10 + +Q.Score is a measure of quality of the hash function. +It depends on successfully passing SMHasher test set. +10 is a perfect score. + +A 64-bit version, named XXH64, is available since r35. +It offers much better speed, but for 64-bit applications only. +Name Speed on 64 bits Speed on 32 bits +XXH64 13.8 GB/s 1.9 GB/s +XXH32 6.8 GB/s 6.0 GB/s +*/ + +#ifndef XXHASH_H_5627135585666179 +#define XXHASH_H_5627135585666179 1 + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* **************************** +* Definitions +******************************/ +#include /* size_t */ +typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; + + +/* **************************** + * API modifier + ******************************/ +/** XXH_INLINE_ALL (and XXH_PRIVATE_API) + * This is useful to include xxhash functions in `static` mode + * in order to inline them, and remove their symbol from the public list. + * Inlining can offer dramatic performance improvement on small keys. + * Methodology : + * #define XXH_INLINE_ALL + * #include "xxhash.h" + * `xxhash.c` is automatically included. + * It's not useful to compile and link it as a separate module. + */ +#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) +# ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY +# endif +# if defined(__GNUC__) +# define XXH_PUBLIC_API static __inline __attribute__((unused)) +# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define XXH_PUBLIC_API static inline +# elif defined(_MSC_VER) +# define XXH_PUBLIC_API static __inline +# else + /* this version may generate warnings for unused static functions */ +# define XXH_PUBLIC_API static +# endif +#else +# define XXH_PUBLIC_API /* do nothing */ +#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */ + +/*! XXH_NAMESPACE, aka Namespace Emulation : + * + * If you want to include _and expose_ xxHash functions from within your own library, + * but also want to avoid symbol collisions with other libraries which may also include xxHash, + * + * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library + * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values). + * + * Note that no change is required within the calling program as long as it includes `xxhash.h` : + * regular symbol name will be automatically translated by this header. + */ +#ifdef XXH_NAMESPACE +# define XXH_CAT(A,B) A##B +# define XXH_NAME2(A,B) XXH_CAT(A,B) +# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) +# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) +# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) +# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) +# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) +# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) +# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) +# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) +# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) +# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) +# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) +# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) +# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) +# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) +# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) +# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) +# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) +# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) +# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) +#endif + + +/* ************************************* +* Version +***************************************/ +#define XXH_VERSION_MAJOR 0 +#define XXH_VERSION_MINOR 6 +#define XXH_VERSION_RELEASE 5 +#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) +XXH_PUBLIC_API unsigned XXH_versionNumber (void); + + +/*-********************************************************************** +* 32-bit hash +************************************************************************/ +typedef unsigned int XXH32_hash_t; + +/*! XXH32() : + Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input". + The memory between input & input+length must be valid (allocated and read-accessible). + "seed" can be used to alter the result predictably. + Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */ +XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); + +/*====== Streaming ======*/ +typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state); + +XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); + +/* + * Streaming functions generate the xxHash of an input provided in multiple segments. + * Note that, for small input, they are slower than single-call functions, due to state management. + * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized. + * + * XXH state must first be allocated, using XXH*_createState() . + * + * Start a new hash by initializing state with a seed, using XXH*_reset(). + * + * Then, feed the hash state by calling XXH*_update() as many times as necessary. + * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. + * + * Finally, a hash value can be produced anytime, by using XXH*_digest(). + * This function returns the nn-bits hash as an int or long long. + * + * It's still possible to continue inserting input into the hash state after a digest, + * and generate some new hashes later on, by calling again XXH*_digest(). + * + * When done, free XXH state space if it was allocated dynamically. + */ + +/*====== Canonical representation ======*/ + +typedef struct { unsigned char digest[4]; } XXH32_canonical_t; +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); + +/* Default result type for XXH functions are primitive unsigned 32 and 64 bits. + * The canonical representation uses human-readable write convention, aka big-endian (large digits first). + * These functions allow transformation of hash result into and from its canonical format. + * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. + */ + + +#ifndef XXH_NO_LONG_LONG +/*-********************************************************************** +* 64-bit hash +************************************************************************/ +typedef unsigned long long XXH64_hash_t; + +/*! XXH64() : + Calculate the 64-bit hash of sequence of length "len" stored at memory address "input". + "seed" can be used to alter the result predictably. + This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark). +*/ +XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); + +/*====== Streaming ======*/ +typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state); + +XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); + +/*====== Canonical representation ======*/ +typedef struct { unsigned char digest[8]; } XXH64_canonical_t; +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); +#endif /* XXH_NO_LONG_LONG */ + + + +#ifdef XXH_STATIC_LINKING_ONLY + +/* ================================================================================================ + This section contains declarations which are not guaranteed to remain stable. + They may change in future versions, becoming incompatible with a different version of the library. + These declarations should only be used with static linking. + Never use them in association with dynamic linking ! +=================================================================================================== */ + +/* These definitions are only present to allow + * static allocation of XXH state, on stack or in a struct for example. + * Never **ever** use members directly. */ + +#if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + +struct XXH32_state_s { + uint32_t total_len_32; + uint32_t large_len; + uint32_t v1; + uint32_t v2; + uint32_t v3; + uint32_t v4; + uint32_t mem32[4]; + uint32_t memsize; + uint32_t reserved; /* never read nor write, might be removed in a future version */ +}; /* typedef'd to XXH32_state_t */ + +struct XXH64_state_s { + uint64_t total_len; + uint64_t v1; + uint64_t v2; + uint64_t v3; + uint64_t v4; + uint64_t mem64[4]; + uint32_t memsize; + uint32_t reserved[2]; /* never read nor write, might be removed in a future version */ +}; /* typedef'd to XXH64_state_t */ + +# else + +struct XXH32_state_s { + unsigned total_len_32; + unsigned large_len; + unsigned v1; + unsigned v2; + unsigned v3; + unsigned v4; + unsigned mem32[4]; + unsigned memsize; + unsigned reserved; /* never read nor write, might be removed in a future version */ +}; /* typedef'd to XXH32_state_t */ + +# ifndef XXH_NO_LONG_LONG /* remove 64-bit support */ +struct XXH64_state_s { + unsigned long long total_len; + unsigned long long v1; + unsigned long long v2; + unsigned long long v3; + unsigned long long v4; + unsigned long long mem64[4]; + unsigned memsize; + unsigned reserved[2]; /* never read nor write, might be removed in a future version */ +}; /* typedef'd to XXH64_state_t */ +# endif + +# endif + + +#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) +# include "xxhash.c" /* include xxhash function bodies as `static`, for inlining */ +#endif + +#endif /* XXH_STATIC_LINKING_ONLY */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* XXHASH_H_5627135585666179 */