From 9b42f4aaa38aa0e42e083a913a900895de17e1d0 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Wed, 10 Feb 2021 22:14:47 -0700 Subject: [PATCH] nv2a: Add binning to LRU --- hw/xbox/nv2a/Makefile.objs | 1 - hw/xbox/nv2a/lru.c | 201 -------------------------------- hw/xbox/nv2a/lru.h | 230 ++++++++++++++++++++++++++----------- hw/xbox/nv2a/nv2a_int.h | 15 ++- hw/xbox/nv2a/pgraph.c | 112 ++++++++++-------- 5 files changed, 235 insertions(+), 324 deletions(-) delete mode 100644 hw/xbox/nv2a/lru.c diff --git a/hw/xbox/nv2a/Makefile.objs b/hw/xbox/nv2a/Makefile.objs index d9de1b4c8b..3e65589d07 100644 --- a/hw/xbox/nv2a/Makefile.objs +++ b/hw/xbox/nv2a/Makefile.objs @@ -18,6 +18,5 @@ obj-y += user.o obj-y += vsh.o obj-y += gl/ -obj-y += lru.o obj-y += swizzle.o obj-y += s3tc.o diff --git a/hw/xbox/nv2a/lru.c b/hw/xbox/nv2a/lru.c deleted file mode 100644 index f3adabd9a4..0000000000 --- a/hw/xbox/nv2a/lru.c +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright (c) 2018 Matt Borgerson - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "lru.h" - -#define LRU_DEBUG 0 - -#if LRU_DEBUG -#define lru_dprintf(...) do { printf(__VA_ARGS__); } while(0) -#else -#define lru_dprintf(...) do {} while(0) -#endif - -/* - * Create the LRU cache - */ -struct lru *lru_init( - struct lru *lru, - lru_obj_init_func obj_init, - lru_obj_deinit_func obj_deinit, - lru_obj_key_compare_func obj_key_compare - ) -{ - assert(lru != NULL); - - lru->active = NULL; - lru->free = NULL; - - lru->obj_init = obj_init; - lru->obj_deinit = obj_deinit; - lru->obj_key_compare = obj_key_compare; - - lru->num_free = 0; - lru->num_collisions = 0; - lru->num_hit = 0; - lru->num_miss = 0; - - return lru; -} - -/* - * Add a node to the free list - */ -struct lru_node *lru_add_free(struct lru *lru, struct lru_node *node) -{ - node->next = lru->free; - lru->free = node; - lru->num_free++; - return node; -} - -/* - * Lookup object in cache: - * - If found, object is promoted to front of RU list and returned - * - If not found, - * - If cache is full, evict LRU, deinit object and add it to free list - * - Allocate object from free list, init, move to front of RU list - */ -struct lru_node *lru_lookup(struct lru *lru, uint64_t hash, void *key) -{ - struct lru_node *prev, *node; - - assert(lru != NULL); - assert((lru->active != NULL) || (lru->free != NULL)); - - /* Walk through the cache in order of recent use */ - prev = NULL; - node = lru->active; - - lru_dprintf("Looking for hash %016lx...\n", hash); - - if (node != NULL) { - do { - lru_dprintf(" %016lx\n", node->hash); - - /* Fast hash compare */ - if (node->hash == hash) { - /* Detailed key comparison */ - if (lru->obj_key_compare(node, key) == 0) { - lru_dprintf("Hit, node=%p!\n", node); - lru->num_hit++; - - if (prev == NULL) { - /* Node is already at the front of the RU list */ - return node; - } - - /* Unlink and promote node */ - lru_dprintf("Promoting node %p\n", node); - prev->next = node->next; - node->next = lru->active; - lru->active = node; - return node; - } - - /* Hash collision! Get a better hashing function... */ - lru_dprintf("Hash collision detected!\n"); - lru->num_collisions++; - } - - if (node->next == NULL) { - /* No more nodes left to look at after this... Stop here as we - * may need to evict this final (last recently used) node. - */ - break; - } - - prev = node; - node = node->next; - } while (1); - } - - lru_dprintf("Miss\n"); - lru->num_miss++; - - /* Reached the end of the active list. - * - * `node` points to: - * - NULL if there are no active objects in the cache, or - * - the last object in the RU list - * - * `prev` points to: - * - NULL if there are <= 1 active objects in the cache, or - * - the second to last object in the RU list - */ - - if (lru->free == NULL) { - /* No free nodes left, must evict a node. `node` is LRU. */ - assert(node != NULL); /* Sanity check: there must be an active object */ - lru_dprintf("Evicting %p\n", node); - - if (prev == NULL) { - /* This was the only node */ - lru->active = NULL; - } else { - /* Unlink node */ - prev->next = node->next; - } - - lru->obj_deinit(node); - lru_add_free(lru, node); - } - - /* Allocate a node from the free list */ - node = lru->free; - assert(node != NULL); /* Sanity check: there must be a free node */ - lru->free = node->next; - lru->num_free--; - - /* Initialize, promote, and return the node */ - lru->obj_init(node, key); - node->hash = hash; - node->next = lru->active; - lru->active = node; - return node; -} - -/* - * Remove all items in the active list - */ -void lru_flush(struct lru *lru) -{ - struct lru_node *node, *next; - - node = lru->active; - next = NULL; - - while (node != NULL) { - next = node->next; - lru->obj_deinit(node); - lru_add_free(lru, node); - node = next; - } - - lru->active = NULL; -} diff --git a/hw/xbox/nv2a/lru.h b/hw/xbox/nv2a/lru.h index 9725cdf3bb..0ef842d045 100644 --- a/hw/xbox/nv2a/lru.h +++ b/hw/xbox/nv2a/lru.h @@ -1,40 +1,7 @@ /* - * Simple LRU Object List - * ====================== - * - Designed for pre-allocated array of objects which are accessed frequently - * - Objects are identified by a hash and an opaque `key` data structure - * - Lookups are first done by hash, then confirmed by callback compare function - * - Two singly linked lists are maintained: a free list and an active list - * - On cache miss, object is created from free list or by evicting the LRU - * - When created, a callback function is called to fully initialize the object + * LRU object list * - * Setup - * ----- - * - Create an object data structure, embed in it `struct lru_node` - * - Create an init, deinit, and compare function - * - Call `lru_init` - * - Allocate a number of these objects - * - For each object, call `lru_add_free` to populate entries in the cache - * - * Runtime - * ------- - * - Initialize custom key data structure (will be used for comparison) - * - Create 64b hash of the object and/or key - * - Call `lru_lookup` with the hash and key - * - The active list is searched, the compare callback will be called if an - * object with matching hash is found - * - If object is found in the cache, it will be moved to the front of the - * active list and returned - * - If object is not found in the cache: - * - If no free items are available, the LRU will be evicted, deinit - * callback will be called - * - An object is popped from the free list and the init callback is called - * on the object - * - The object is added to the front of the active list and returned - * - * --- - * - * Copyright (c) 2018 Matt Borgerson + * Copyright (c) 2021 Matt Borgerson * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -58,44 +25,173 @@ #ifndef LRU_H #define LRU_H +#include #include -#include +#include "qemu/queue.h" -struct lru_node; +#define LRU_NUM_BINS (1<<8) -typedef struct lru_node *(*lru_obj_init_func)(struct lru_node *obj, void *key); -typedef struct lru_node *(*lru_obj_deinit_func)(struct lru_node *obj); -typedef int (*lru_obj_key_compare_func)(struct lru_node *obj, void *key); - -struct lru { - struct lru_node *active; /* Singly-linked list tracking recently active */ - struct lru_node *free; /* Singly-linked list tracking available objects */ - - lru_obj_init_func obj_init; - lru_obj_deinit_func obj_deinit; - lru_obj_key_compare_func obj_key_compare; - - size_t num_free; - size_t num_collisions; - size_t num_hit; - size_t num_miss; -}; - -/* This should be embedded in the object structure */ -struct lru_node { +typedef struct LruNode { + QTAILQ_ENTRY(LruNode) next_global; + QTAILQ_ENTRY(LruNode) next_bin; uint64_t hash; - struct lru_node *next; +} LruNode; + +typedef struct Lru Lru; + +struct Lru { + QTAILQ_HEAD(, LruNode) global; + QTAILQ_HEAD(, LruNode) bins[LRU_NUM_BINS]; + + /* Initialize a node. */ + void (*init_node)(Lru *lru, LruNode *node, void *key); + + /* In case of hash collision. Return `true` if nodes differ. */ + bool (*compare_nodes)(Lru *lru, LruNode *node, void *key); + + /* Optional. Called before eviction. Return `false` to prevent eviction. */ + bool (*pre_node_evict)(Lru *lru, LruNode *node); + + /* Optional. Called after eviction. Reclaim any associated resources. */ + void (*post_node_evict)(Lru *lru, LruNode *node); }; -struct lru *lru_init( - struct lru *lru, - lru_obj_init_func obj_init, - lru_obj_deinit_func obj_deinit, - lru_obj_key_compare_func obj_key_compare - ); +static inline +void lru_init(Lru *lru) +{ + QTAILQ_INIT(&lru->global); + for (unsigned int i = 0; i < LRU_NUM_BINS; i++) { + QTAILQ_INIT(&lru->bins[i]); + } + lru->init_node = NULL; + lru->compare_nodes = NULL; + lru->pre_node_evict = NULL; + lru->post_node_evict = NULL; +} -struct lru_node *lru_add_free(struct lru *lru, struct lru_node *node); -struct lru_node *lru_lookup(struct lru *lru, uint64_t hash, void *key); -void lru_flush(struct lru *lru); +static inline +void lru_add_free(Lru *lru, LruNode *node) +{ + node->next_bin.tqe_circ.tql_prev = NULL; + QTAILQ_INSERT_TAIL(&lru->global, node, next_global); +} + +static inline +unsigned int lru_hash_to_bin(Lru *lru, uint64_t hash) +{ + return hash % LRU_NUM_BINS; +} + +static inline +unsigned int lru_get_node_bin(Lru *lru, LruNode *node) +{ + return lru_hash_to_bin(lru, node->hash); +} + +static inline +bool lru_is_node_in_use(Lru *lru, LruNode *node) +{ + return QTAILQ_IN_USE(node, next_bin); +} + +static inline +void lru_evict_node(Lru *lru, LruNode *node) +{ + if (!lru_is_node_in_use(lru, node)) { + return; + } + + unsigned int bin = lru_get_node_bin(lru, node); + QTAILQ_REMOVE(&lru->bins[bin], node, next_bin); + if (lru->post_node_evict) { + lru->post_node_evict(lru, node); + } +} + +static inline +LruNode *lru_evict_one(Lru *lru) +{ + LruNode *found; + + QTAILQ_FOREACH_REVERSE(found, &lru->global, next_global) { + bool can_evict = true; + if (lru_is_node_in_use(lru, found) && lru->pre_node_evict) { + can_evict = lru->pre_node_evict(lru, found); + } + if (can_evict) { + break; + } + } + + assert(found != NULL); /* No evictable node! */ + + lru_evict_node(lru, found); + return found; +} + +static inline +LruNode *lru_lookup(Lru *lru, uint64_t hash, void *key) +{ + unsigned int bin = lru_hash_to_bin(lru, hash); + LruNode *iter, *found = NULL; + + QTAILQ_FOREACH(iter, &lru->bins[bin], next_bin) { + if ((iter->hash == hash) && !lru->compare_nodes(lru, iter, key)) { + found = iter; + break; + } + } + + if (found) { + QTAILQ_REMOVE(&lru->bins[bin], found, next_bin); + } else { + found = lru_evict_one(lru); + found->hash = hash; + if (lru->init_node) { + lru->init_node(lru, found, key); + } + assert(found->hash == hash); + } + + QTAILQ_REMOVE(&lru->global, found, next_global); + QTAILQ_INSERT_HEAD(&lru->global, found, next_global); + QTAILQ_INSERT_HEAD(&lru->bins[bin], found, next_bin); + + return found; +} + +static inline +void lru_flush(Lru *lru) +{ + LruNode *iter, *iter_next; + + for (unsigned int bin = 0; bin < LRU_NUM_BINS; bin++) { + QTAILQ_FOREACH_SAFE(iter, &lru->bins[bin], next_bin, iter_next) { + bool can_evict = true; + if (lru->pre_node_evict) { + can_evict = lru->pre_node_evict(lru, iter); + } + if (can_evict) { + lru_evict_node(lru, iter); + QTAILQ_REMOVE(&lru->global, iter, next_global); + QTAILQ_INSERT_TAIL(&lru->global, iter, next_global); + } + } + } +} + +typedef void (*LruNodeVisitorFunc)(Lru *lru, LruNode *node, void *opaque); + +static inline +void lru_visit_active(Lru *lru, LruNodeVisitorFunc visitor_func, void *opaque) +{ + LruNode *iter, *iter_next; + + for (unsigned int bin = 0; bin < LRU_NUM_BINS; bin++) { + QTAILQ_FOREACH_SAFE(iter, &lru->bins[bin], next_bin, iter_next) { + visitor_func(lru, iter, opaque); + } + } +} #endif diff --git a/hw/xbox/nv2a/nv2a_int.h b/hw/xbox/nv2a/nv2a_int.h index 4078bcdd66..2a6bcf03bd 100644 --- a/hw/xbox/nv2a/nv2a_int.h +++ b/hw/xbox/nv2a/nv2a_int.h @@ -181,17 +181,20 @@ typedef struct TextureBinding { } TextureBinding; typedef struct TextureKey { - struct lru_node node; TextureShape state; - TextureBinding *binding; - hwaddr texture_vram_offset; hwaddr texture_length; hwaddr palette_vram_offset; hwaddr palette_length; - bool possibly_dirty; } TextureKey; +typedef struct TextureLruNode { + LruNode node; + TextureKey key; + TextureBinding *binding; + bool possibly_dirty; +} TextureLruNode; + typedef struct KelvinState { hwaddr object_instance; } KelvinState; @@ -256,8 +259,8 @@ typedef struct PGRAPHState { bool downloads_pending; hwaddr dma_a, dma_b; - struct lru texture_cache; - struct TextureKey *texture_cache_entries; + Lru texture_cache; + struct TextureLruNode *texture_cache_entries; bool texture_dirty[NV2A_MAX_TEXTURES]; TextureBinding *texture_binding[NV2A_MAX_TEXTURES]; diff --git a/hw/xbox/nv2a/pgraph.c b/hw/xbox/nv2a/pgraph.c index 26db420b86..d7e1a50860 100644 --- a/hw/xbox/nv2a/pgraph.c +++ b/hw/xbox/nv2a/pgraph.c @@ -384,9 +384,10 @@ static uint8_t* convert_texture_data(const TextureShape s, const uint8_t *data, static void upload_gl_texture(GLenum gl_target, const TextureShape s, const uint8_t *texture_data, const uint8_t *palette_data); static TextureBinding* generate_texture(const TextureShape s, const uint8_t *texture_data, const uint8_t *palette_data); static void texture_binding_destroy(gpointer data); -static struct lru_node *texture_cache_entry_init(struct lru_node *obj, void *key); -static struct lru_node *texture_cache_entry_deinit(struct lru_node *obj); -static int texture_cache_entry_compare(struct lru_node *obj, void *key); +static void texture_cache_entry_init(Lru *lru, LruNode *node, void *key); +static void texture_cache_entry_post_evict(Lru *lru, LruNode *node); +static bool texture_cache_entry_compare(Lru *lru, LruNode *node, void *key); + static void pgraph_mark_textures_possibly_dirty(NV2AState *d, hwaddr addr, hwaddr size); static bool pgraph_check_texture_dirty(NV2AState *d, hwaddr addr, hwaddr size); static guint shader_hash(gconstpointer key); @@ -2945,16 +2946,17 @@ void pgraph_init(NV2AState *d) // Initialize texture cache const size_t texture_cache_size = 512; - lru_init(&pg->texture_cache, - &texture_cache_entry_init, - &texture_cache_entry_deinit, - &texture_cache_entry_compare); - pg->texture_cache_entries = malloc(texture_cache_size * sizeof(struct TextureKey)); + lru_init(&pg->texture_cache); + pg->texture_cache_entries = malloc(texture_cache_size * sizeof(TextureLruNode)); assert(pg->texture_cache_entries != NULL); for (i = 0; i < texture_cache_size; i++) { lru_add_free(&pg->texture_cache, &pg->texture_cache_entries[i].node); } + pg->texture_cache.init_node = texture_cache_entry_init; + pg->texture_cache.compare_nodes = texture_cache_entry_compare; + pg->texture_cache.post_node_evict = texture_cache_entry_post_evict; + pg->shader_cache = g_hash_table_new(shader_hash, shader_equal); @@ -4611,6 +4613,34 @@ static void pgraph_update_surface(NV2AState *d, bool upload, pgraph_surface_evict_old(d); } +struct pgraph_texture_possibly_dirty_struct { + hwaddr addr, end; +}; + +static void pgraph_mark_textures_possibly_dirty_visitor(Lru *lru, LruNode *node, void *opaque) +{ + struct pgraph_texture_possibly_dirty_struct *test = + (struct pgraph_texture_possibly_dirty_struct *)opaque; + + struct TextureLruNode *tnode = container_of(node, TextureLruNode, node); + if (tnode->binding == NULL || tnode->possibly_dirty) { + return; + } + + uintptr_t k_tex_addr = tnode->key.texture_vram_offset; + uintptr_t k_tex_end = k_tex_addr + tnode->key.texture_length - 1; + bool overlapping = !(test->addr > k_tex_end || k_tex_addr > test->end); + + if (tnode->key.palette_length > 0) { + uintptr_t k_pal_addr = tnode->key.palette_vram_offset; + uintptr_t k_pal_end = k_pal_addr + tnode->key.palette_length - 1; + overlapping |= !(test->addr > k_pal_end || k_pal_addr > test->end); + } + + tnode->possibly_dirty |= overlapping; +} + + static void pgraph_mark_textures_possibly_dirty(NV2AState *d, hwaddr addr, hwaddr size) { @@ -4618,25 +4648,14 @@ static void pgraph_mark_textures_possibly_dirty(NV2AState *d, addr &= TARGET_PAGE_MASK; assert(end <= memory_region_size(d->vram)); - struct lru_node *node = d->pgraph.texture_cache.active; - for (; node; node = node->next) { - struct TextureKey *k = container_of(node, struct TextureKey, node); - if (k->binding == NULL || k->possibly_dirty) { - continue; - } + struct pgraph_texture_possibly_dirty_struct test = { + .addr = addr, + .end = end, + }; - uintptr_t k_tex_addr = k->texture_vram_offset; - uintptr_t k_tex_end = k_tex_addr + k->texture_length - 1; - bool overlapping = !(addr > k_tex_end || k_tex_addr > end); - - if (k->palette_length > 0) { - uintptr_t k_pal_addr = k->palette_vram_offset; - uintptr_t k_pal_end = k_pal_addr + k->palette_length - 1; - overlapping |= !(addr > k_pal_end || k_pal_addr > end); - } - - k->possibly_dirty |= overlapping; - } + lru_visit_active(&d->pgraph.texture_cache, + pgraph_mark_textures_possibly_dirty_visitor, + &test); } static bool pgraph_check_texture_dirty(NV2AState *d, hwaddr addr, hwaddr size) @@ -4950,9 +4969,9 @@ static void pgraph_bind_textures(NV2AState *d) // Search for existing texture binding in cache uint64_t tex_binding_hash = fast_hash((uint8_t*)&key, sizeof(key)); - struct lru_node *found = lru_lookup(&pg->texture_cache, - tex_binding_hash, &key); - TextureKey *key_out = container_of(found, struct TextureKey, node); + LruNode *found = lru_lookup(&pg->texture_cache, + tex_binding_hash, &key); + TextureLruNode *key_out = container_of(found, TextureLruNode, node); bool possibly_dirty = (key_out->binding == NULL) || key_out->possibly_dirty; @@ -5667,34 +5686,29 @@ static void texture_binding_destroy(gpointer data) } /* functions for texture LRU cache */ -static struct lru_node *texture_cache_entry_init(struct lru_node *obj, void *key) +static void texture_cache_entry_init(Lru *lru, LruNode *node, void *key) { - struct TextureKey *k_out = container_of(obj, struct TextureKey, node); - struct TextureKey *k_in = (struct TextureKey *)key; - memcpy(k_out, k_in, sizeof(struct TextureKey)); - k_out->binding = NULL; - return obj; + TextureLruNode *tnode = container_of(node, TextureLruNode, node); + memcpy(&tnode->key, key, sizeof(TextureKey)); + + tnode->binding = NULL; + tnode->possibly_dirty = false; } -static struct lru_node *texture_cache_entry_deinit(struct lru_node *obj) +static void texture_cache_entry_post_evict(Lru *lru, LruNode *node) { - struct TextureKey *a = container_of(obj, struct TextureKey, node); - if (a->binding) { - texture_binding_destroy(a->binding); + TextureLruNode *tnode = container_of(node, TextureLruNode, node); + if (tnode->binding) { + texture_binding_destroy(tnode->binding); + tnode->binding = NULL; + tnode->possibly_dirty = false; } - return obj; } -static int texture_cache_entry_compare(struct lru_node *obj, void *key) +static bool texture_cache_entry_compare(Lru *lru, LruNode *node, void *key) { - struct TextureKey *a = container_of(obj, struct TextureKey, node); - struct TextureKey *b = (struct TextureKey *)key; - return memcmp(&a->state, &b->state, sizeof(a->state)) - || (a->texture_vram_offset != b->texture_vram_offset) - || (a->texture_length != b->texture_length) - || (a->palette_vram_offset != b->palette_vram_offset) - || (a->palette_length != b->palette_length) - ; + TextureLruNode *tnode = container_of(node, TextureLruNode, node); + return memcmp(&tnode->key, key, sizeof(TextureKey)); } /* hash and equality for shader cache hash table */