From adc8a82a5a439a91644557f7ba3aad8872ac1cdb Mon Sep 17 00:00:00 2001 From: Anthony Pesch Date: Mon, 26 Dec 2016 12:13:23 -0800 Subject: [PATCH] don't lookup next TA context on each write use list instead of rb_tree for live contexts --- src/emu/tracer.c | 7 +-- src/hw/pvr/ta.c | 110 +++++++++++++++++------------------------- src/hw/pvr/ta_types.h | 9 ++-- src/hw/pvr/tr.c | 10 ++-- src/hw/pvr/trace.c | 4 +- src/hw/pvr/trace.h | 4 +- 6 files changed, 60 insertions(+), 84 deletions(-) diff --git a/src/emu/tracer.c b/src/emu/tracer.c index f58a9be6..06771cd0 100644 --- a/src/emu/tracer.c +++ b/src/emu/tracer.c @@ -177,8 +177,8 @@ static void tracer_copy_command(const struct trace_cmd *cmd, ctx->bg_tsp = cmd->context.bg_tsp; ctx->bg_tcw = cmd->context.bg_tcw; ctx->bg_depth = cmd->context.bg_depth; - ctx->rb_width = cmd->context.rb_width; - ctx->rb_height = cmd->context.rb_height; + ctx->video_width = cmd->context.video_width; + ctx->video_height = cmd->context.video_height; memcpy(ctx->bg_vertices, cmd->context.bg_vertices, cmd->context.bg_vertices_size); memcpy(ctx->params, cmd->context.params, cmd->context.params_size); @@ -887,9 +887,6 @@ struct tracer *tracer_create(struct window *window) { win_add_listener(tracer->window, &tracer->listener); - /* setup tile context buffers */ - tracer->ctx.params = tracer->params; - /* setup render context buffers */ tracer->rctx.surfs = tracer->surfs; tracer->rctx.surfs_size = array_size(tracer->surfs); diff --git a/src/hw/pvr/ta.c b/src/hw/pvr/ta.c index ccc7a8c7..ffc8ec7f 100644 --- a/src/hw/pvr/ta.c +++ b/src/hw/pvr/ta.c @@ -1,7 +1,6 @@ #include "hw/pvr/ta.h" #include "core/list.h" #include "core/profiler.h" -#include "core/rb_tree.h" #include "core/string.h" #include "hw/holly/holly.h" #include "hw/pvr/pixel_convert.h" @@ -55,7 +54,8 @@ struct ta { are in a tree ordered by the context's guest address */ struct tile_ctx contexts[TA_MAX_CONTEXTS]; struct list free_contexts; - struct rb_tree live_contexts; + struct list live_contexts; + struct tile_ctx *next_context; /* the pending context is the last context requested to be rendered by the emulation thread. a mutex is used to synchronize access with the graphics @@ -63,11 +63,6 @@ struct ta { mutex_t pending_mutex; struct tile_ctx *pending_context; - /* buffers used by the tile contexts. allocating here instead of inside each - tile_ctx to avoid blowing the stack when a tile_ctx is needed temporarily - on the stack for searching */ - uint8_t params[TA_MAX_CONTEXTS * TA_MAX_PARAMS]; - /* debug info */ int frame; int frames_skipped; @@ -106,22 +101,7 @@ static int ta_entry_cmp(const struct rb_node *rb_lhs, } } -static int ta_context_cmp(const struct rb_node *rb_lhs, - const struct rb_node *rb_rhs) { - const struct tile_ctx *lhs = rb_entry(rb_lhs, const struct tile_ctx, live_it); - const struct tile_ctx *rhs = rb_entry(rb_rhs, const struct tile_ctx, live_it); - - if (lhs->addr < rhs->addr) { - return -1; - } else if (lhs->addr > rhs->addr) { - return 1; - } else { - return 0; - } -} - static struct rb_callbacks ta_entry_cb = {&ta_entry_cmp, NULL, NULL}; -static struct rb_callbacks ta_context_cb = {&ta_context_cmp, NULL, NULL}; /* See "57.1.1.2 Parameter Combinations" for information on the poly types. */ static int ta_get_poly_type_raw(union pcw pcw) { @@ -322,67 +302,66 @@ static void ta_palette_invalidated(const struct exception *ex, void *data) { } static struct tile_ctx *ta_get_context(struct ta *ta, uint32_t addr) { - struct tile_ctx search; - search.addr = addr; - - return rb_find_entry(&ta->live_contexts, &search, struct tile_ctx, live_it, - &ta_context_cb); + list_for_each_entry(ctx, &ta->live_contexts, struct tile_ctx, it) { + if (ctx->addr == addr) { + return ctx; + } + } + return NULL; } static struct tile_ctx *ta_alloc_context(struct ta *ta, uint32_t addr) { /* remove from free list */ struct tile_ctx *ctx = - list_first_entry(&ta->free_contexts, struct tile_ctx, free_it); + list_first_entry(&ta->free_contexts, struct tile_ctx, it); CHECK_NOTNULL(ctx); - list_remove(&ta->free_contexts, &ctx->free_it); + list_remove(&ta->free_contexts, &ctx->it); /* reset context */ - uint8_t *params = ctx->params; - memset(ctx, 0, sizeof(*ctx)); ctx->addr = addr; - ctx->params = params; + ctx->cursor = 0; + ctx->size = 0; + ctx->list_type = 0; + ctx->vertex_type = 0; /* add to live tree */ - rb_insert(&ta->live_contexts, &ctx->live_it, &ta_context_cb); + list_add(&ta->live_contexts, &ctx->it); return ctx; } static void ta_unlink_context(struct ta *ta, struct tile_ctx *ctx) { - rb_unlink(&ta->live_contexts, &ctx->live_it, &ta_context_cb); + list_remove(&ta->live_contexts, &ctx->it); } static void ta_free_context(struct ta *ta, struct tile_ctx *ctx) { - list_add(&ta->free_contexts, &ctx->free_it); + list_add(&ta->free_contexts, &ctx->it); } -static void ta_cont_context(struct ta *ta, uint32_t addr) { - struct tile_ctx *ctx = ta_get_context(ta, addr); - CHECK_NOTNULL(ctx); - - ctx->list_type = TA_NUM_LISTS; - ctx->vertex_type = TA_NUM_VERTS; -} - -static void ta_init_context(struct ta *ta, uint32_t addr) { +static struct tile_ctx *ta_demand_context(struct ta *ta, uint32_t addr) { struct tile_ctx *ctx = ta_get_context(ta, addr); if (!ctx) { ctx = ta_alloc_context(ta, addr); } - ctx->addr = addr; + return ctx; +} + +static void ta_cont_context(struct ta *ta, struct tile_ctx *ctx) { + ctx->list_type = TA_NUM_LISTS; + ctx->vertex_type = TA_NUM_VERTS; +} + +static void ta_init_context(struct ta *ta, struct tile_ctx *ctx) { ctx->cursor = 0; ctx->size = 0; ctx->list_type = TA_NUM_LISTS; ctx->vertex_type = TA_NUM_VERTS; } -static void ta_write_context(struct ta *ta, uint32_t addr, void *ptr, +static void ta_write_context(struct ta *ta, struct tile_ctx *ctx, void *ptr, int size) { - struct tile_ctx *ctx = ta_get_context(ta, addr); - CHECK_NOTNULL(ctx); - CHECK_LT(ctx->size + size, TA_MAX_PARAMS); memcpy(&ctx->params[ctx->size], ptr, size); ctx->size += size; @@ -578,11 +557,11 @@ static void ta_save_register_state(struct ta *ta, struct tile_ctx *ctx) { if (pvr->SPG_CONTROL->interlace || (!pvr->SPG_CONTROL->NTSC && !pvr->SPG_CONTROL->PAL)) { /* interlaced and VGA mode both render at full resolution */ - ctx->rb_width = 640; - ctx->rb_height = 480; + ctx->video_width = 640; + ctx->video_height = 480; } else { - ctx->rb_width = 320; - ctx->rb_height = 240; + ctx->video_width = 320; + ctx->video_height = 240; } /* according to the hardware docs, this is the correct calculation of the @@ -650,10 +629,7 @@ static void ta_render_timer(void *data) { ta_end_render(ta); } -static void ta_start_render(struct ta *ta, uint32_t addr) { - struct tile_ctx *ctx = ta_get_context(ta, addr); - CHECK_NOTNULL(ctx); - +static void ta_start_render(struct ta *ta, struct tile_ctx *ctx) { /* save off required register state that may be modified by the time the context is rendered */ ta_save_register_state(ta, ctx); @@ -811,7 +787,7 @@ static void ta_poly_fifo_write(struct ta *ta, uint32_t dst, void *ptr, uint8_t *src = ptr; uint8_t *end = src + size; while (src < end) { - ta_write_context(ta, ta->pvr->TA_ISP_BASE->base_address, src, 32); + ta_write_context(ta, ta->next_context, src, 32); src += 32; } @@ -861,10 +837,7 @@ static bool ta_init(struct device *dev) { for (int i = 0; i < array_size(ta->contexts); i++) { struct tile_ctx *ctx = &ta->contexts[i]; - - ctx->params = ta->params + (TA_MAX_PARAMS * i); - - list_add(&ta->free_contexts, &ctx->free_it); + list_add(&ta->free_contexts, &ctx->it); } return true; @@ -1024,7 +997,9 @@ REG_W32(pvr_cb, STARTRENDER) { return; } - ta_start_render(ta, ta->pvr->PARAM_BASE->base_address); + struct tile_ctx *ctx = ta_get_context(ta, ta->pvr->PARAM_BASE->base_address); + CHECK_NOTNULL(ctx); + ta_start_render(ta, ctx); } REG_W32(pvr_cb, TA_LIST_INIT) { @@ -1034,7 +1009,9 @@ REG_W32(pvr_cb, TA_LIST_INIT) { return; } - ta_init_context(ta, ta->pvr->TA_ISP_BASE->base_address); + struct tile_ctx *ctx = ta_demand_context(ta, ta->pvr->TA_ISP_BASE->base_address); + ta_init_context(ta, ctx); + ta->next_context = ctx; } REG_W32(pvr_cb, TA_LIST_CONT) { @@ -1044,7 +1021,10 @@ REG_W32(pvr_cb, TA_LIST_CONT) { return; } - ta_cont_context(ta, ta->pvr->TA_ISP_BASE->base_address); + struct tile_ctx *ctx = ta_get_context(ta, ta->pvr->TA_ISP_BASE->base_address); + CHECK_NOTNULL(ctx); + ta_cont_context(ta, ctx); + ta->next_context = ctx; } REG_W32(pvr_cb, TA_YUV_TEX_BASE) { diff --git a/src/hw/pvr/ta_types.h b/src/hw/pvr/ta_types.h index 367410e1..3b9d2a89 100644 --- a/src/hw/pvr/ta_types.h +++ b/src/hw/pvr/ta_types.h @@ -445,8 +445,8 @@ struct tile_ctx { bool autosort; int stride; int pal_pxl_format; - int rb_width; - int rb_height; + int video_width; + int video_height; union isp bg_isp; union tsp bg_tsp; union tcw bg_tcw; @@ -454,7 +454,7 @@ struct tile_ctx { uint8_t bg_vertices[BG_VERTEX_SIZE]; /* parameter buffer */ - uint8_t *params; + uint8_t params[TA_MAX_PARAMS]; int cursor; int size; @@ -462,8 +462,7 @@ struct tile_ctx { int list_type; int vertex_type; - struct list_node free_it; - struct rb_node live_it; + struct list_node it; }; #endif diff --git a/src/hw/pvr/tr.c b/src/hw/pvr/tr.c index 9294b1e0..53136292 100644 --- a/src/hw/pvr/tr.c +++ b/src/hw/pvr/tr.c @@ -478,13 +478,13 @@ static void tr_parse_bg(struct tr *tr, const struct tile_ctx *ctx, /* override xyz values supplied by ISP_BACKGND_T. while the hardware docs act like they should be correct, they're most definitely not in most cases */ v0->xyz[0] = 0.0f; - v0->xyz[1] = (float)ctx->rb_height; + v0->xyz[1] = (float)ctx->video_height; v0->xyz[2] = ctx->bg_depth; v1->xyz[0] = 0.0f; v1->xyz[1] = 0.0f; v1->xyz[2] = ctx->bg_depth; - v2->xyz[0] = (float)ctx->rb_width; - v2->xyz[1] = (float)ctx->rb_height; + v2->xyz[0] = (float)ctx->video_width; + v2->xyz[1] = (float)ctx->video_height; v2->xyz[2] = ctx->bg_depth; /* 4th vertex isn't supplied, fill it out automatically */ @@ -900,13 +900,13 @@ static void tr_proj_mat(struct tr *tr, const struct tile_ctx *ctx, /* fudge so z isn't mapped to exactly 0.0 and 1.0 */ float zdepth = (znear - zfar) * 1.1f; - rctx->projection[0] = 2.0f / (float)ctx->rb_width; + rctx->projection[0] = 2.0f / (float)ctx->video_width; rctx->projection[4] = 0.0f; rctx->projection[8] = 0.0f; rctx->projection[12] = -1.0f; rctx->projection[1] = 0.0f; - rctx->projection[5] = -2.0f / (float)ctx->rb_height; + rctx->projection[5] = -2.0f / (float)ctx->video_height; rctx->projection[9] = 0.0f; rctx->projection[13] = 1.0f; diff --git a/src/hw/pvr/trace.c b/src/hw/pvr/trace.c index 76307727..0ebf4330 100644 --- a/src/hw/pvr/trace.c +++ b/src/hw/pvr/trace.c @@ -175,8 +175,8 @@ void trace_writer_render_context(struct trace_writer *writer, cmd.context.autosort = ctx->autosort; cmd.context.stride = ctx->stride; cmd.context.pal_pxl_format = ctx->pal_pxl_format; - cmd.context.rb_width = ctx->rb_width; - cmd.context.rb_height = ctx->rb_height; + cmd.context.video_width = ctx->video_width; + cmd.context.video_height = ctx->video_height; cmd.context.bg_isp = ctx->bg_isp; cmd.context.bg_tsp = ctx->bg_tsp; cmd.context.bg_tcw = ctx->bg_tcw; diff --git a/src/hw/pvr/trace.h b/src/hw/pvr/trace.h index 9bce7f5c..c9d36086 100644 --- a/src/hw/pvr/trace.h +++ b/src/hw/pvr/trace.h @@ -36,8 +36,8 @@ struct trace_cmd { int8_t autosort; uint32_t stride; uint32_t pal_pxl_format; - uint32_t rb_width; - uint32_t rb_height; + uint32_t video_width; + uint32_t video_height; union isp bg_isp; union tsp bg_tsp; union tcw bg_tcw;