don't lookup next TA context on each write

use list instead of rb_tree for live contexts
This commit is contained in:
Anthony Pesch 2016-12-26 12:13:23 -08:00
parent 6b174f3bac
commit adc8a82a5a
6 changed files with 60 additions and 84 deletions

View File

@ -177,8 +177,8 @@ static void tracer_copy_command(const struct trace_cmd *cmd,
ctx->bg_tsp = cmd->context.bg_tsp;
ctx->bg_tcw = cmd->context.bg_tcw;
ctx->bg_depth = cmd->context.bg_depth;
ctx->rb_width = cmd->context.rb_width;
ctx->rb_height = cmd->context.rb_height;
ctx->video_width = cmd->context.video_width;
ctx->video_height = cmd->context.video_height;
memcpy(ctx->bg_vertices, cmd->context.bg_vertices,
cmd->context.bg_vertices_size);
memcpy(ctx->params, cmd->context.params, cmd->context.params_size);
@ -887,9 +887,6 @@ struct tracer *tracer_create(struct window *window) {
win_add_listener(tracer->window, &tracer->listener);
/* setup tile context buffers */
tracer->ctx.params = tracer->params;
/* setup render context buffers */
tracer->rctx.surfs = tracer->surfs;
tracer->rctx.surfs_size = array_size(tracer->surfs);

View File

@ -1,7 +1,6 @@
#include "hw/pvr/ta.h"
#include "core/list.h"
#include "core/profiler.h"
#include "core/rb_tree.h"
#include "core/string.h"
#include "hw/holly/holly.h"
#include "hw/pvr/pixel_convert.h"
@ -55,7 +54,8 @@ struct ta {
are in a tree ordered by the context's guest address */
struct tile_ctx contexts[TA_MAX_CONTEXTS];
struct list free_contexts;
struct rb_tree live_contexts;
struct list live_contexts;
struct tile_ctx *next_context;
/* the pending context is the last context requested to be rendered by the
emulation thread. a mutex is used to synchronize access with the graphics
@ -63,11 +63,6 @@ struct ta {
mutex_t pending_mutex;
struct tile_ctx *pending_context;
/* buffers used by the tile contexts. allocating here instead of inside each
tile_ctx to avoid blowing the stack when a tile_ctx is needed temporarily
on the stack for searching */
uint8_t params[TA_MAX_CONTEXTS * TA_MAX_PARAMS];
/* debug info */
int frame;
int frames_skipped;
@ -106,22 +101,7 @@ static int ta_entry_cmp(const struct rb_node *rb_lhs,
}
}
static int ta_context_cmp(const struct rb_node *rb_lhs,
const struct rb_node *rb_rhs) {
const struct tile_ctx *lhs = rb_entry(rb_lhs, const struct tile_ctx, live_it);
const struct tile_ctx *rhs = rb_entry(rb_rhs, const struct tile_ctx, live_it);
if (lhs->addr < rhs->addr) {
return -1;
} else if (lhs->addr > rhs->addr) {
return 1;
} else {
return 0;
}
}
static struct rb_callbacks ta_entry_cb = {&ta_entry_cmp, NULL, NULL};
static struct rb_callbacks ta_context_cb = {&ta_context_cmp, NULL, NULL};
/* See "57.1.1.2 Parameter Combinations" for information on the poly types. */
static int ta_get_poly_type_raw(union pcw pcw) {
@ -322,67 +302,66 @@ static void ta_palette_invalidated(const struct exception *ex, void *data) {
}
static struct tile_ctx *ta_get_context(struct ta *ta, uint32_t addr) {
struct tile_ctx search;
search.addr = addr;
return rb_find_entry(&ta->live_contexts, &search, struct tile_ctx, live_it,
&ta_context_cb);
list_for_each_entry(ctx, &ta->live_contexts, struct tile_ctx, it) {
if (ctx->addr == addr) {
return ctx;
}
}
return NULL;
}
static struct tile_ctx *ta_alloc_context(struct ta *ta, uint32_t addr) {
/* remove from free list */
struct tile_ctx *ctx =
list_first_entry(&ta->free_contexts, struct tile_ctx, free_it);
list_first_entry(&ta->free_contexts, struct tile_ctx, it);
CHECK_NOTNULL(ctx);
list_remove(&ta->free_contexts, &ctx->free_it);
list_remove(&ta->free_contexts, &ctx->it);
/* reset context */
uint8_t *params = ctx->params;
memset(ctx, 0, sizeof(*ctx));
ctx->addr = addr;
ctx->params = params;
ctx->cursor = 0;
ctx->size = 0;
ctx->list_type = 0;
ctx->vertex_type = 0;
/* add to live tree */
rb_insert(&ta->live_contexts, &ctx->live_it, &ta_context_cb);
list_add(&ta->live_contexts, &ctx->it);
return ctx;
}
static void ta_unlink_context(struct ta *ta, struct tile_ctx *ctx) {
rb_unlink(&ta->live_contexts, &ctx->live_it, &ta_context_cb);
list_remove(&ta->live_contexts, &ctx->it);
}
static void ta_free_context(struct ta *ta, struct tile_ctx *ctx) {
list_add(&ta->free_contexts, &ctx->free_it);
list_add(&ta->free_contexts, &ctx->it);
}
static void ta_cont_context(struct ta *ta, uint32_t addr) {
struct tile_ctx *ctx = ta_get_context(ta, addr);
CHECK_NOTNULL(ctx);
ctx->list_type = TA_NUM_LISTS;
ctx->vertex_type = TA_NUM_VERTS;
}
static void ta_init_context(struct ta *ta, uint32_t addr) {
static struct tile_ctx *ta_demand_context(struct ta *ta, uint32_t addr) {
struct tile_ctx *ctx = ta_get_context(ta, addr);
if (!ctx) {
ctx = ta_alloc_context(ta, addr);
}
ctx->addr = addr;
return ctx;
}
static void ta_cont_context(struct ta *ta, struct tile_ctx *ctx) {
ctx->list_type = TA_NUM_LISTS;
ctx->vertex_type = TA_NUM_VERTS;
}
static void ta_init_context(struct ta *ta, struct tile_ctx *ctx) {
ctx->cursor = 0;
ctx->size = 0;
ctx->list_type = TA_NUM_LISTS;
ctx->vertex_type = TA_NUM_VERTS;
}
static void ta_write_context(struct ta *ta, uint32_t addr, void *ptr,
static void ta_write_context(struct ta *ta, struct tile_ctx *ctx, void *ptr,
int size) {
struct tile_ctx *ctx = ta_get_context(ta, addr);
CHECK_NOTNULL(ctx);
CHECK_LT(ctx->size + size, TA_MAX_PARAMS);
memcpy(&ctx->params[ctx->size], ptr, size);
ctx->size += size;
@ -578,11 +557,11 @@ static void ta_save_register_state(struct ta *ta, struct tile_ctx *ctx) {
if (pvr->SPG_CONTROL->interlace ||
(!pvr->SPG_CONTROL->NTSC && !pvr->SPG_CONTROL->PAL)) {
/* interlaced and VGA mode both render at full resolution */
ctx->rb_width = 640;
ctx->rb_height = 480;
ctx->video_width = 640;
ctx->video_height = 480;
} else {
ctx->rb_width = 320;
ctx->rb_height = 240;
ctx->video_width = 320;
ctx->video_height = 240;
}
/* according to the hardware docs, this is the correct calculation of the
@ -650,10 +629,7 @@ static void ta_render_timer(void *data) {
ta_end_render(ta);
}
static void ta_start_render(struct ta *ta, uint32_t addr) {
struct tile_ctx *ctx = ta_get_context(ta, addr);
CHECK_NOTNULL(ctx);
static void ta_start_render(struct ta *ta, struct tile_ctx *ctx) {
/* save off required register state that may be modified by the time the
context is rendered */
ta_save_register_state(ta, ctx);
@ -811,7 +787,7 @@ static void ta_poly_fifo_write(struct ta *ta, uint32_t dst, void *ptr,
uint8_t *src = ptr;
uint8_t *end = src + size;
while (src < end) {
ta_write_context(ta, ta->pvr->TA_ISP_BASE->base_address, src, 32);
ta_write_context(ta, ta->next_context, src, 32);
src += 32;
}
@ -861,10 +837,7 @@ static bool ta_init(struct device *dev) {
for (int i = 0; i < array_size(ta->contexts); i++) {
struct tile_ctx *ctx = &ta->contexts[i];
ctx->params = ta->params + (TA_MAX_PARAMS * i);
list_add(&ta->free_contexts, &ctx->free_it);
list_add(&ta->free_contexts, &ctx->it);
}
return true;
@ -1024,7 +997,9 @@ REG_W32(pvr_cb, STARTRENDER) {
return;
}
ta_start_render(ta, ta->pvr->PARAM_BASE->base_address);
struct tile_ctx *ctx = ta_get_context(ta, ta->pvr->PARAM_BASE->base_address);
CHECK_NOTNULL(ctx);
ta_start_render(ta, ctx);
}
REG_W32(pvr_cb, TA_LIST_INIT) {
@ -1034,7 +1009,9 @@ REG_W32(pvr_cb, TA_LIST_INIT) {
return;
}
ta_init_context(ta, ta->pvr->TA_ISP_BASE->base_address);
struct tile_ctx *ctx = ta_demand_context(ta, ta->pvr->TA_ISP_BASE->base_address);
ta_init_context(ta, ctx);
ta->next_context = ctx;
}
REG_W32(pvr_cb, TA_LIST_CONT) {
@ -1044,7 +1021,10 @@ REG_W32(pvr_cb, TA_LIST_CONT) {
return;
}
ta_cont_context(ta, ta->pvr->TA_ISP_BASE->base_address);
struct tile_ctx *ctx = ta_get_context(ta, ta->pvr->TA_ISP_BASE->base_address);
CHECK_NOTNULL(ctx);
ta_cont_context(ta, ctx);
ta->next_context = ctx;
}
REG_W32(pvr_cb, TA_YUV_TEX_BASE) {

View File

@ -445,8 +445,8 @@ struct tile_ctx {
bool autosort;
int stride;
int pal_pxl_format;
int rb_width;
int rb_height;
int video_width;
int video_height;
union isp bg_isp;
union tsp bg_tsp;
union tcw bg_tcw;
@ -454,7 +454,7 @@ struct tile_ctx {
uint8_t bg_vertices[BG_VERTEX_SIZE];
/* parameter buffer */
uint8_t *params;
uint8_t params[TA_MAX_PARAMS];
int cursor;
int size;
@ -462,8 +462,7 @@ struct tile_ctx {
int list_type;
int vertex_type;
struct list_node free_it;
struct rb_node live_it;
struct list_node it;
};
#endif

View File

@ -478,13 +478,13 @@ static void tr_parse_bg(struct tr *tr, const struct tile_ctx *ctx,
/* override xyz values supplied by ISP_BACKGND_T. while the hardware docs act
like they should be correct, they're most definitely not in most cases */
v0->xyz[0] = 0.0f;
v0->xyz[1] = (float)ctx->rb_height;
v0->xyz[1] = (float)ctx->video_height;
v0->xyz[2] = ctx->bg_depth;
v1->xyz[0] = 0.0f;
v1->xyz[1] = 0.0f;
v1->xyz[2] = ctx->bg_depth;
v2->xyz[0] = (float)ctx->rb_width;
v2->xyz[1] = (float)ctx->rb_height;
v2->xyz[0] = (float)ctx->video_width;
v2->xyz[1] = (float)ctx->video_height;
v2->xyz[2] = ctx->bg_depth;
/* 4th vertex isn't supplied, fill it out automatically */
@ -900,13 +900,13 @@ static void tr_proj_mat(struct tr *tr, const struct tile_ctx *ctx,
/* fudge so z isn't mapped to exactly 0.0 and 1.0 */
float zdepth = (znear - zfar) * 1.1f;
rctx->projection[0] = 2.0f / (float)ctx->rb_width;
rctx->projection[0] = 2.0f / (float)ctx->video_width;
rctx->projection[4] = 0.0f;
rctx->projection[8] = 0.0f;
rctx->projection[12] = -1.0f;
rctx->projection[1] = 0.0f;
rctx->projection[5] = -2.0f / (float)ctx->rb_height;
rctx->projection[5] = -2.0f / (float)ctx->video_height;
rctx->projection[9] = 0.0f;
rctx->projection[13] = 1.0f;

View File

@ -175,8 +175,8 @@ void trace_writer_render_context(struct trace_writer *writer,
cmd.context.autosort = ctx->autosort;
cmd.context.stride = ctx->stride;
cmd.context.pal_pxl_format = ctx->pal_pxl_format;
cmd.context.rb_width = ctx->rb_width;
cmd.context.rb_height = ctx->rb_height;
cmd.context.video_width = ctx->video_width;
cmd.context.video_height = ctx->video_height;
cmd.context.bg_isp = ctx->bg_isp;
cmd.context.bg_tsp = ctx->bg_tsp;
cmd.context.bg_tcw = ctx->bg_tcw;

View File

@ -36,8 +36,8 @@ struct trace_cmd {
int8_t autosort;
uint32_t stride;
uint32_t pal_pxl_format;
uint32_t rb_width;
uint32_t rb_height;
uint32_t video_width;
uint32_t video_height;
union isp bg_isp;
union tsp bg_tsp;
union tcw bg_tcw;