nv2a: Add binning to LRU

This commit is contained in:
Matt Borgerson 2021-02-10 22:14:47 -07:00 committed by mborgerson
parent d2990d0e47
commit 9b42f4aaa3
5 changed files with 235 additions and 324 deletions

View File

@ -18,6 +18,5 @@ obj-y += user.o
obj-y += vsh.o
obj-y += gl/
obj-y += lru.o
obj-y += swizzle.o
obj-y += s3tc.o

View File

@ -1,201 +0,0 @@
/*
* Copyright (c) 2018 Matt Borgerson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include "lru.h"
#define LRU_DEBUG 0
#if LRU_DEBUG
#define lru_dprintf(...) do { printf(__VA_ARGS__); } while(0)
#else
#define lru_dprintf(...) do {} while(0)
#endif
/*
* Create the LRU cache
*/
struct lru *lru_init(
struct lru *lru,
lru_obj_init_func obj_init,
lru_obj_deinit_func obj_deinit,
lru_obj_key_compare_func obj_key_compare
)
{
assert(lru != NULL);
lru->active = NULL;
lru->free = NULL;
lru->obj_init = obj_init;
lru->obj_deinit = obj_deinit;
lru->obj_key_compare = obj_key_compare;
lru->num_free = 0;
lru->num_collisions = 0;
lru->num_hit = 0;
lru->num_miss = 0;
return lru;
}
/*
* Add a node to the free list
*/
struct lru_node *lru_add_free(struct lru *lru, struct lru_node *node)
{
node->next = lru->free;
lru->free = node;
lru->num_free++;
return node;
}
/*
* Lookup object in cache:
* - If found, object is promoted to front of RU list and returned
* - If not found,
* - If cache is full, evict LRU, deinit object and add it to free list
* - Allocate object from free list, init, move to front of RU list
*/
struct lru_node *lru_lookup(struct lru *lru, uint64_t hash, void *key)
{
struct lru_node *prev, *node;
assert(lru != NULL);
assert((lru->active != NULL) || (lru->free != NULL));
/* Walk through the cache in order of recent use */
prev = NULL;
node = lru->active;
lru_dprintf("Looking for hash %016lx...\n", hash);
if (node != NULL) {
do {
lru_dprintf(" %016lx\n", node->hash);
/* Fast hash compare */
if (node->hash == hash) {
/* Detailed key comparison */
if (lru->obj_key_compare(node, key) == 0) {
lru_dprintf("Hit, node=%p!\n", node);
lru->num_hit++;
if (prev == NULL) {
/* Node is already at the front of the RU list */
return node;
}
/* Unlink and promote node */
lru_dprintf("Promoting node %p\n", node);
prev->next = node->next;
node->next = lru->active;
lru->active = node;
return node;
}
/* Hash collision! Get a better hashing function... */
lru_dprintf("Hash collision detected!\n");
lru->num_collisions++;
}
if (node->next == NULL) {
/* No more nodes left to look at after this... Stop here as we
* may need to evict this final (last recently used) node.
*/
break;
}
prev = node;
node = node->next;
} while (1);
}
lru_dprintf("Miss\n");
lru->num_miss++;
/* Reached the end of the active list.
*
* `node` points to:
* - NULL if there are no active objects in the cache, or
* - the last object in the RU list
*
* `prev` points to:
* - NULL if there are <= 1 active objects in the cache, or
* - the second to last object in the RU list
*/
if (lru->free == NULL) {
/* No free nodes left, must evict a node. `node` is LRU. */
assert(node != NULL); /* Sanity check: there must be an active object */
lru_dprintf("Evicting %p\n", node);
if (prev == NULL) {
/* This was the only node */
lru->active = NULL;
} else {
/* Unlink node */
prev->next = node->next;
}
lru->obj_deinit(node);
lru_add_free(lru, node);
}
/* Allocate a node from the free list */
node = lru->free;
assert(node != NULL); /* Sanity check: there must be a free node */
lru->free = node->next;
lru->num_free--;
/* Initialize, promote, and return the node */
lru->obj_init(node, key);
node->hash = hash;
node->next = lru->active;
lru->active = node;
return node;
}
/*
* Remove all items in the active list
*/
void lru_flush(struct lru *lru)
{
struct lru_node *node, *next;
node = lru->active;
next = NULL;
while (node != NULL) {
next = node->next;
lru->obj_deinit(node);
lru_add_free(lru, node);
node = next;
}
lru->active = NULL;
}

View File

@ -1,40 +1,7 @@
/*
* Simple LRU Object List
* ======================
* - Designed for pre-allocated array of objects which are accessed frequently
* - Objects are identified by a hash and an opaque `key` data structure
* - Lookups are first done by hash, then confirmed by callback compare function
* - Two singly linked lists are maintained: a free list and an active list
* - On cache miss, object is created from free list or by evicting the LRU
* - When created, a callback function is called to fully initialize the object
* LRU object list
*
* Setup
* -----
* - Create an object data structure, embed in it `struct lru_node`
* - Create an init, deinit, and compare function
* - Call `lru_init`
* - Allocate a number of these objects
* - For each object, call `lru_add_free` to populate entries in the cache
*
* Runtime
* -------
* - Initialize custom key data structure (will be used for comparison)
* - Create 64b hash of the object and/or key
* - Call `lru_lookup` with the hash and key
* - The active list is searched, the compare callback will be called if an
* object with matching hash is found
* - If object is found in the cache, it will be moved to the front of the
* active list and returned
* - If object is not found in the cache:
* - If no free items are available, the LRU will be evicted, deinit
* callback will be called
* - An object is popped from the free list and the init callback is called
* on the object
* - The object is added to the front of the active list and returned
*
* ---
*
* Copyright (c) 2018 Matt Borgerson
* Copyright (c) 2021 Matt Borgerson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@ -58,44 +25,173 @@
#ifndef LRU_H
#define LRU_H
#include <assert.h>
#include <stdint.h>
#include <string.h>
#include "qemu/queue.h"
struct lru_node;
#define LRU_NUM_BINS (1<<8)
typedef struct lru_node *(*lru_obj_init_func)(struct lru_node *obj, void *key);
typedef struct lru_node *(*lru_obj_deinit_func)(struct lru_node *obj);
typedef int (*lru_obj_key_compare_func)(struct lru_node *obj, void *key);
struct lru {
struct lru_node *active; /* Singly-linked list tracking recently active */
struct lru_node *free; /* Singly-linked list tracking available objects */
lru_obj_init_func obj_init;
lru_obj_deinit_func obj_deinit;
lru_obj_key_compare_func obj_key_compare;
size_t num_free;
size_t num_collisions;
size_t num_hit;
size_t num_miss;
};
/* This should be embedded in the object structure */
struct lru_node {
typedef struct LruNode {
QTAILQ_ENTRY(LruNode) next_global;
QTAILQ_ENTRY(LruNode) next_bin;
uint64_t hash;
struct lru_node *next;
} LruNode;
typedef struct Lru Lru;
struct Lru {
QTAILQ_HEAD(, LruNode) global;
QTAILQ_HEAD(, LruNode) bins[LRU_NUM_BINS];
/* Initialize a node. */
void (*init_node)(Lru *lru, LruNode *node, void *key);
/* In case of hash collision. Return `true` if nodes differ. */
bool (*compare_nodes)(Lru *lru, LruNode *node, void *key);
/* Optional. Called before eviction. Return `false` to prevent eviction. */
bool (*pre_node_evict)(Lru *lru, LruNode *node);
/* Optional. Called after eviction. Reclaim any associated resources. */
void (*post_node_evict)(Lru *lru, LruNode *node);
};
struct lru *lru_init(
struct lru *lru,
lru_obj_init_func obj_init,
lru_obj_deinit_func obj_deinit,
lru_obj_key_compare_func obj_key_compare
);
static inline
void lru_init(Lru *lru)
{
QTAILQ_INIT(&lru->global);
for (unsigned int i = 0; i < LRU_NUM_BINS; i++) {
QTAILQ_INIT(&lru->bins[i]);
}
lru->init_node = NULL;
lru->compare_nodes = NULL;
lru->pre_node_evict = NULL;
lru->post_node_evict = NULL;
}
struct lru_node *lru_add_free(struct lru *lru, struct lru_node *node);
struct lru_node *lru_lookup(struct lru *lru, uint64_t hash, void *key);
void lru_flush(struct lru *lru);
static inline
void lru_add_free(Lru *lru, LruNode *node)
{
node->next_bin.tqe_circ.tql_prev = NULL;
QTAILQ_INSERT_TAIL(&lru->global, node, next_global);
}
static inline
unsigned int lru_hash_to_bin(Lru *lru, uint64_t hash)
{
return hash % LRU_NUM_BINS;
}
static inline
unsigned int lru_get_node_bin(Lru *lru, LruNode *node)
{
return lru_hash_to_bin(lru, node->hash);
}
static inline
bool lru_is_node_in_use(Lru *lru, LruNode *node)
{
return QTAILQ_IN_USE(node, next_bin);
}
static inline
void lru_evict_node(Lru *lru, LruNode *node)
{
if (!lru_is_node_in_use(lru, node)) {
return;
}
unsigned int bin = lru_get_node_bin(lru, node);
QTAILQ_REMOVE(&lru->bins[bin], node, next_bin);
if (lru->post_node_evict) {
lru->post_node_evict(lru, node);
}
}
static inline
LruNode *lru_evict_one(Lru *lru)
{
LruNode *found;
QTAILQ_FOREACH_REVERSE(found, &lru->global, next_global) {
bool can_evict = true;
if (lru_is_node_in_use(lru, found) && lru->pre_node_evict) {
can_evict = lru->pre_node_evict(lru, found);
}
if (can_evict) {
break;
}
}
assert(found != NULL); /* No evictable node! */
lru_evict_node(lru, found);
return found;
}
static inline
LruNode *lru_lookup(Lru *lru, uint64_t hash, void *key)
{
unsigned int bin = lru_hash_to_bin(lru, hash);
LruNode *iter, *found = NULL;
QTAILQ_FOREACH(iter, &lru->bins[bin], next_bin) {
if ((iter->hash == hash) && !lru->compare_nodes(lru, iter, key)) {
found = iter;
break;
}
}
if (found) {
QTAILQ_REMOVE(&lru->bins[bin], found, next_bin);
} else {
found = lru_evict_one(lru);
found->hash = hash;
if (lru->init_node) {
lru->init_node(lru, found, key);
}
assert(found->hash == hash);
}
QTAILQ_REMOVE(&lru->global, found, next_global);
QTAILQ_INSERT_HEAD(&lru->global, found, next_global);
QTAILQ_INSERT_HEAD(&lru->bins[bin], found, next_bin);
return found;
}
static inline
void lru_flush(Lru *lru)
{
LruNode *iter, *iter_next;
for (unsigned int bin = 0; bin < LRU_NUM_BINS; bin++) {
QTAILQ_FOREACH_SAFE(iter, &lru->bins[bin], next_bin, iter_next) {
bool can_evict = true;
if (lru->pre_node_evict) {
can_evict = lru->pre_node_evict(lru, iter);
}
if (can_evict) {
lru_evict_node(lru, iter);
QTAILQ_REMOVE(&lru->global, iter, next_global);
QTAILQ_INSERT_TAIL(&lru->global, iter, next_global);
}
}
}
}
typedef void (*LruNodeVisitorFunc)(Lru *lru, LruNode *node, void *opaque);
static inline
void lru_visit_active(Lru *lru, LruNodeVisitorFunc visitor_func, void *opaque)
{
LruNode *iter, *iter_next;
for (unsigned int bin = 0; bin < LRU_NUM_BINS; bin++) {
QTAILQ_FOREACH_SAFE(iter, &lru->bins[bin], next_bin, iter_next) {
visitor_func(lru, iter, opaque);
}
}
}
#endif

View File

@ -181,17 +181,20 @@ typedef struct TextureBinding {
} TextureBinding;
typedef struct TextureKey {
struct lru_node node;
TextureShape state;
TextureBinding *binding;
hwaddr texture_vram_offset;
hwaddr texture_length;
hwaddr palette_vram_offset;
hwaddr palette_length;
bool possibly_dirty;
} TextureKey;
typedef struct TextureLruNode {
LruNode node;
TextureKey key;
TextureBinding *binding;
bool possibly_dirty;
} TextureLruNode;
typedef struct KelvinState {
hwaddr object_instance;
} KelvinState;
@ -256,8 +259,8 @@ typedef struct PGRAPHState {
bool downloads_pending;
hwaddr dma_a, dma_b;
struct lru texture_cache;
struct TextureKey *texture_cache_entries;
Lru texture_cache;
struct TextureLruNode *texture_cache_entries;
bool texture_dirty[NV2A_MAX_TEXTURES];
TextureBinding *texture_binding[NV2A_MAX_TEXTURES];

View File

@ -384,9 +384,10 @@ static uint8_t* convert_texture_data(const TextureShape s, const uint8_t *data,
static void upload_gl_texture(GLenum gl_target, const TextureShape s, const uint8_t *texture_data, const uint8_t *palette_data);
static TextureBinding* generate_texture(const TextureShape s, const uint8_t *texture_data, const uint8_t *palette_data);
static void texture_binding_destroy(gpointer data);
static struct lru_node *texture_cache_entry_init(struct lru_node *obj, void *key);
static struct lru_node *texture_cache_entry_deinit(struct lru_node *obj);
static int texture_cache_entry_compare(struct lru_node *obj, void *key);
static void texture_cache_entry_init(Lru *lru, LruNode *node, void *key);
static void texture_cache_entry_post_evict(Lru *lru, LruNode *node);
static bool texture_cache_entry_compare(Lru *lru, LruNode *node, void *key);
static void pgraph_mark_textures_possibly_dirty(NV2AState *d, hwaddr addr, hwaddr size);
static bool pgraph_check_texture_dirty(NV2AState *d, hwaddr addr, hwaddr size);
static guint shader_hash(gconstpointer key);
@ -2945,16 +2946,17 @@ void pgraph_init(NV2AState *d)
// Initialize texture cache
const size_t texture_cache_size = 512;
lru_init(&pg->texture_cache,
&texture_cache_entry_init,
&texture_cache_entry_deinit,
&texture_cache_entry_compare);
pg->texture_cache_entries = malloc(texture_cache_size * sizeof(struct TextureKey));
lru_init(&pg->texture_cache);
pg->texture_cache_entries = malloc(texture_cache_size * sizeof(TextureLruNode));
assert(pg->texture_cache_entries != NULL);
for (i = 0; i < texture_cache_size; i++) {
lru_add_free(&pg->texture_cache, &pg->texture_cache_entries[i].node);
}
pg->texture_cache.init_node = texture_cache_entry_init;
pg->texture_cache.compare_nodes = texture_cache_entry_compare;
pg->texture_cache.post_node_evict = texture_cache_entry_post_evict;
pg->shader_cache = g_hash_table_new(shader_hash, shader_equal);
@ -4611,6 +4613,34 @@ static void pgraph_update_surface(NV2AState *d, bool upload,
pgraph_surface_evict_old(d);
}
struct pgraph_texture_possibly_dirty_struct {
hwaddr addr, end;
};
static void pgraph_mark_textures_possibly_dirty_visitor(Lru *lru, LruNode *node, void *opaque)
{
struct pgraph_texture_possibly_dirty_struct *test =
(struct pgraph_texture_possibly_dirty_struct *)opaque;
struct TextureLruNode *tnode = container_of(node, TextureLruNode, node);
if (tnode->binding == NULL || tnode->possibly_dirty) {
return;
}
uintptr_t k_tex_addr = tnode->key.texture_vram_offset;
uintptr_t k_tex_end = k_tex_addr + tnode->key.texture_length - 1;
bool overlapping = !(test->addr > k_tex_end || k_tex_addr > test->end);
if (tnode->key.palette_length > 0) {
uintptr_t k_pal_addr = tnode->key.palette_vram_offset;
uintptr_t k_pal_end = k_pal_addr + tnode->key.palette_length - 1;
overlapping |= !(test->addr > k_pal_end || k_pal_addr > test->end);
}
tnode->possibly_dirty |= overlapping;
}
static void pgraph_mark_textures_possibly_dirty(NV2AState *d,
hwaddr addr, hwaddr size)
{
@ -4618,25 +4648,14 @@ static void pgraph_mark_textures_possibly_dirty(NV2AState *d,
addr &= TARGET_PAGE_MASK;
assert(end <= memory_region_size(d->vram));
struct lru_node *node = d->pgraph.texture_cache.active;
for (; node; node = node->next) {
struct TextureKey *k = container_of(node, struct TextureKey, node);
if (k->binding == NULL || k->possibly_dirty) {
continue;
}
struct pgraph_texture_possibly_dirty_struct test = {
.addr = addr,
.end = end,
};
uintptr_t k_tex_addr = k->texture_vram_offset;
uintptr_t k_tex_end = k_tex_addr + k->texture_length - 1;
bool overlapping = !(addr > k_tex_end || k_tex_addr > end);
if (k->palette_length > 0) {
uintptr_t k_pal_addr = k->palette_vram_offset;
uintptr_t k_pal_end = k_pal_addr + k->palette_length - 1;
overlapping |= !(addr > k_pal_end || k_pal_addr > end);
}
k->possibly_dirty |= overlapping;
}
lru_visit_active(&d->pgraph.texture_cache,
pgraph_mark_textures_possibly_dirty_visitor,
&test);
}
static bool pgraph_check_texture_dirty(NV2AState *d, hwaddr addr, hwaddr size)
@ -4950,9 +4969,9 @@ static void pgraph_bind_textures(NV2AState *d)
// Search for existing texture binding in cache
uint64_t tex_binding_hash = fast_hash((uint8_t*)&key, sizeof(key));
struct lru_node *found = lru_lookup(&pg->texture_cache,
tex_binding_hash, &key);
TextureKey *key_out = container_of(found, struct TextureKey, node);
LruNode *found = lru_lookup(&pg->texture_cache,
tex_binding_hash, &key);
TextureLruNode *key_out = container_of(found, TextureLruNode, node);
bool possibly_dirty = (key_out->binding == NULL)
|| key_out->possibly_dirty;
@ -5667,34 +5686,29 @@ static void texture_binding_destroy(gpointer data)
}
/* functions for texture LRU cache */
static struct lru_node *texture_cache_entry_init(struct lru_node *obj, void *key)
static void texture_cache_entry_init(Lru *lru, LruNode *node, void *key)
{
struct TextureKey *k_out = container_of(obj, struct TextureKey, node);
struct TextureKey *k_in = (struct TextureKey *)key;
memcpy(k_out, k_in, sizeof(struct TextureKey));
k_out->binding = NULL;
return obj;
TextureLruNode *tnode = container_of(node, TextureLruNode, node);
memcpy(&tnode->key, key, sizeof(TextureKey));
tnode->binding = NULL;
tnode->possibly_dirty = false;
}
static struct lru_node *texture_cache_entry_deinit(struct lru_node *obj)
static void texture_cache_entry_post_evict(Lru *lru, LruNode *node)
{
struct TextureKey *a = container_of(obj, struct TextureKey, node);
if (a->binding) {
texture_binding_destroy(a->binding);
TextureLruNode *tnode = container_of(node, TextureLruNode, node);
if (tnode->binding) {
texture_binding_destroy(tnode->binding);
tnode->binding = NULL;
tnode->possibly_dirty = false;
}
return obj;
}
static int texture_cache_entry_compare(struct lru_node *obj, void *key)
static bool texture_cache_entry_compare(Lru *lru, LruNode *node, void *key)
{
struct TextureKey *a = container_of(obj, struct TextureKey, node);
struct TextureKey *b = (struct TextureKey *)key;
return memcmp(&a->state, &b->state, sizeof(a->state))
|| (a->texture_vram_offset != b->texture_vram_offset)
|| (a->texture_length != b->texture_length)
|| (a->palette_vram_offset != b->palette_vram_offset)
|| (a->palette_length != b->palette_length)
;
TextureLruNode *tnode = container_of(node, TextureLruNode, node);
return memcmp(&tnode->key, key, sizeof(TextureKey));
}
/* hash and equality for shader cache hash table */