From ec4e798426fb41e7aeef9a7ffa3a16517baf6916 Mon Sep 17 00:00:00 2001 From: Matt Borgerson Date: Thu, 11 Jun 2020 01:38:50 -0700 Subject: [PATCH] nv2a: Merge pfifo pusher/puller threads --- hw/xbox/nv2a/nv2a.c | 46 ++++++------ hw/xbox/nv2a/nv2a_int.h | 22 +++--- hw/xbox/nv2a/nv2a_pfifo.c | 142 +++++++++++++++++++++++++------------ hw/xbox/nv2a/nv2a_pgraph.c | 88 ++++++++++++----------- hw/xbox/nv2a/nv2a_user.c | 4 +- 5 files changed, 178 insertions(+), 124 deletions(-) diff --git a/hw/xbox/nv2a/nv2a.c b/hw/xbox/nv2a/nv2a.c index 0696f53dbe..8245157c3f 100644 --- a/hw/xbox/nv2a/nv2a.c +++ b/hw/xbox/nv2a/nv2a.c @@ -3,7 +3,7 @@ * * Copyright (c) 2012 espes * Copyright (c) 2015 Jannik Vogel - * Copyright (c) 2018 Matt Borgerson + * Copyright (c) 2018-2020 Matt Borgerson * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -419,20 +419,20 @@ static void nv2a_init_memory(NV2AState *d, MemoryRegion *ram) pgraph_init(d); - /* fire up puller */ - qemu_thread_create(&d->pfifo.puller_thread, "nv2a.puller_thread", - pfifo_puller_thread, - d, QEMU_THREAD_JOINABLE); - - /* fire up pusher */ - qemu_thread_create(&d->pfifo.pusher_thread, "nv2a.pusher_thread", - pfifo_pusher_thread, - d, QEMU_THREAD_JOINABLE); + /* fire up pfifo */ + qemu_thread_create(&d->pfifo.thread, "nv2a.pfifo_thread", + pfifo_thread, d, QEMU_THREAD_JOINABLE); } static void nv2a_reset(NV2AState *d) { qemu_mutex_lock(&d->pfifo.lock); + + // Wait for pfifo to become idle + qemu_cond_broadcast(&d->pfifo.fifo_cond); + qemu_mutex_unlock_iothread(); + qemu_cond_wait(&d->pfifo.fifo_idle_cond, &d->pfifo.lock); + qemu_mutex_lock_iothread(); qemu_mutex_lock(&d->pgraph.lock); memset(d->pfifo.regs, 0, sizeof(d->pfifo.regs)); @@ -446,19 +446,17 @@ static void nv2a_reset(NV2AState *d) d->pfifo.regs[NV_PFIFO_CACHE1_STATUS] |= NV_PFIFO_CACHE1_STATUS_LOW_MARK; - // PGRAPH might be blocked waiting for an increment. Simply simulate one - // here to continue for now. - SET_MASK(d->pgraph.regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_READ_3D, 1); - vga_common_reset(&d->vga); - qemu_cond_broadcast(&d->pfifo.puller_cond); - qemu_cond_broadcast(&d->pfifo.pusher_cond); - qemu_cond_broadcast(&d->pgraph.flip_3d); - qemu_cond_broadcast(&d->pgraph.interrupt_cond); + d->pgraph.waiting_for_nop = false; + d->pgraph.waiting_for_flip = false; + d->pgraph.waiting_for_fifo_access = false; + d->pgraph.waiting_for_context_switch = false; + + pfifo_kick(d); - qemu_mutex_unlock(&d->pfifo.lock); qemu_mutex_unlock(&d->pgraph.lock); + qemu_mutex_unlock(&d->pfifo.lock); } static void nv2a_realize(PCIDevice *dev, Error **errp) @@ -499,8 +497,8 @@ static void nv2a_realize(PCIDevice *dev, Error **errp) } qemu_mutex_init(&d->pfifo.lock); - qemu_cond_init(&d->pfifo.puller_cond); - qemu_cond_init(&d->pfifo.pusher_cond); + qemu_cond_init(&d->pfifo.fifo_cond); + qemu_cond_init(&d->pfifo.fifo_idle_cond); } static void nv2a_exitfn(PCIDevice *dev) @@ -510,10 +508,8 @@ static void nv2a_exitfn(PCIDevice *dev) d->exiting = true; - qemu_cond_broadcast(&d->pfifo.puller_cond); - qemu_cond_broadcast(&d->pfifo.pusher_cond); - qemu_thread_join(&d->pfifo.puller_thread); - qemu_thread_join(&d->pfifo.pusher_thread); + qemu_cond_broadcast(&d->pfifo.fifo_cond); + qemu_thread_join(&d->pfifo.thread); pgraph_destroy(&d->pgraph); } diff --git a/hw/xbox/nv2a/nv2a_int.h b/hw/xbox/nv2a/nv2a_int.h index cc104bc241..e42c79fa03 100644 --- a/hw/xbox/nv2a/nv2a_int.h +++ b/hw/xbox/nv2a/nv2a_int.h @@ -3,7 +3,7 @@ * * Copyright (c) 2012 espes * Copyright (c) 2015 Jannik Vogel - * Copyright (c) 2018 Matt Borgerson + * Copyright (c) 2018-2020 Matt Borgerson * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -173,16 +173,12 @@ typedef struct PGRAPHState { uint32_t pending_interrupts; uint32_t enabled_interrupts; - QemuCond interrupt_cond; /* subchannels state we're not sure the location of... */ ContextSurfaces2DState context_surfaces_2d; ImageBlitState image_blit; KelvinState kelvin; - QemuCond fifo_access_cond; - QemuCond flip_3d; - hwaddr dma_color, dma_zeta; Surface surface_color, surface_zeta; unsigned int surface_type; @@ -265,6 +261,12 @@ typedef struct PGRAPHState { GLuint gl_vertex_array; uint32_t regs[0x2000]; + + bool waiting_for_nop; + bool waiting_for_flip; + bool waiting_for_fifo_access; + bool waiting_for_context_switch; + } PGRAPHState; typedef struct NV2AState { @@ -295,10 +297,10 @@ typedef struct NV2AState { uint32_t enabled_interrupts; uint32_t regs[0x2000]; QemuMutex lock; - QemuThread puller_thread; - QemuCond puller_cond; - QemuThread pusher_thread; - QemuCond pusher_cond; + QemuThread thread; + QemuCond fifo_cond; + QemuCond fifo_idle_cond; + bool fifo_kick; } pfifo; struct { @@ -352,4 +354,6 @@ typedef struct NV2ABlockInfo { static void reg_log_read(int block, hwaddr addr, uint64_t val); static void reg_log_write(int block, hwaddr addr, uint64_t val); +void pfifo_kick(NV2AState *d); + #endif diff --git a/hw/xbox/nv2a/nv2a_pfifo.c b/hw/xbox/nv2a/nv2a_pfifo.c index dac42b08f1..14e155c4b3 100644 --- a/hw/xbox/nv2a/nv2a_pfifo.c +++ b/hw/xbox/nv2a/nv2a_pfifo.c @@ -3,7 +3,7 @@ * * Copyright (c) 2012 espes * Copyright (c) 2015 Jannik Vogel - * Copyright (c) 2018 Matt Borgerson + * Copyright (c) 2018-2020 Matt Borgerson * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -82,14 +82,45 @@ void pfifo_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size) break; } - qemu_cond_broadcast(&d->pfifo.pusher_cond); - qemu_cond_broadcast(&d->pfifo.puller_cond); + pfifo_kick(d); qemu_mutex_unlock(&d->pfifo.lock); } +void pfifo_kick(NV2AState *d) +{ + d->pfifo.fifo_kick = true; + qemu_cond_broadcast(&d->pfifo.fifo_cond); +} + +static bool pfifo_stall_for_flip(NV2AState *d) +{ + bool should_stall = false; + + if (atomic_read(&d->pgraph.waiting_for_flip)) { + qemu_mutex_lock(&d->pgraph.lock); + if (!pgraph_is_flip_stall_complete(d)) { + should_stall = true; + } else { + d->pgraph.waiting_for_flip = false; + } + qemu_mutex_unlock(&d->pgraph.lock); + } + + return should_stall; +} + static void pfifo_run_puller(NV2AState *d) { + if (pfifo_stall_for_flip(d)) return; + + // Conditions cleared by pgraph_write + if (atomic_read(&d->pgraph.waiting_for_nop) || + atomic_read(&d->pgraph.waiting_for_context_switch)) { + // Wait for events + return; + } + uint32_t *pull0 = &d->pfifo.regs[NV_PFIFO_CACHE1_PULL0]; uint32_t *pull1 = &d->pfifo.regs[NV_PFIFO_CACHE1_PULL1]; uint32_t *engine_reg = &d->pfifo.regs[NV_PFIFO_CACHE1_ENGINE]; @@ -117,22 +148,6 @@ static void pfifo_run_puller(NV2AState *d) assert(get < 128*4 && (get % 4) == 0); uint32_t method_entry = d->pfifo.regs[NV_PFIFO_CACHE1_METHOD + get*2]; uint32_t parameter = d->pfifo.regs[NV_PFIFO_CACHE1_DATA + get*2]; - - uint32_t new_get = (get+4) & 0x1fc; - *get_reg = new_get; - - if (new_get == put) { - // set low mark - *status |= NV_PFIFO_CACHE1_STATUS_LOW_MARK; - } - if (*status & NV_PFIFO_CACHE1_STATUS_HIGH_MARK) { - // unset high mark - *status &= ~NV_PFIFO_CACHE1_STATUS_HIGH_MARK; - // signal pusher - qemu_cond_signal(&d->pfifo.pusher_cond); - } - - uint32_t method = method_entry & 0x1FFC; uint32_t subchannel = GET_MASK(method_entry, NV_PFIFO_CACHE1_METHOD_SUBCHANNEL); @@ -153,14 +168,28 @@ static void pfifo_run_puller(NV2AState *d) SET_MASK(*pull1, NV_PFIFO_CACHE1_PULL1_ENGINE, entry.engine); // NV2A_DPRINTF("engine_reg1 %d 0x%x\n", subchannel, *engine_reg); - // TODO: this is fucked + qemu_mutex_unlock(&d->pfifo.lock); qemu_mutex_lock(&d->pgraph.lock); //make pgraph busy - qemu_mutex_unlock(&d->pfifo.lock); + // Switch contexts if necessary pgraph_context_switch(d, entry.channel_id); + if (d->pgraph.waiting_for_context_switch) { + qemu_mutex_unlock(&d->pgraph.lock); + qemu_mutex_lock(&d->pfifo.lock); + // Wait for event + return; + } + pgraph_wait_fifo_access(d); + if (!pgraph_is_wait_for_access_complete(d)) { + qemu_mutex_unlock(&d->pgraph.lock); + qemu_mutex_lock(&d->pfifo.lock); + // Wait for event + return; + } + d->pgraph.waiting_for_fifo_access = false; pgraph_method(d, subchannel, 0, entry.instance); // make pgraph not busy @@ -187,11 +216,18 @@ static void pfifo_run_puller(NV2AState *d) SET_MASK(*pull1, NV_PFIFO_CACHE1_PULL1_ENGINE, engine); // TODO: this is fucked + qemu_mutex_unlock(&d->pfifo.lock); qemu_mutex_lock(&d->pgraph.lock); //make pgraph busy - qemu_mutex_unlock(&d->pfifo.lock); pgraph_wait_fifo_access(d); + if (!pgraph_is_wait_for_access_complete(d)) { + qemu_mutex_unlock(&d->pgraph.lock); + qemu_mutex_lock(&d->pfifo.lock); + // Wait for event + return; + } + d->pgraph.waiting_for_fifo_access = false; pgraph_method(d, subchannel, method, parameter); // make pgraph not busy @@ -201,27 +237,29 @@ static void pfifo_run_puller(NV2AState *d) assert(false); } - } -} + // Advance now that the method has executed + uint32_t new_get = (get+4) & 0x1fc; + *get_reg = new_get; -static void* pfifo_puller_thread(void *arg) -{ - NV2AState *d = (NV2AState *)arg; + if (new_get == put) { + // set low mark + *status |= NV_PFIFO_CACHE1_STATUS_LOW_MARK; + d->pfifo.fifo_kick = true; + } + if (*status & NV_PFIFO_CACHE1_STATUS_HIGH_MARK) { + // unset high mark + *status &= ~NV_PFIFO_CACHE1_STATUS_HIGH_MARK; + d->pfifo.fifo_kick = true; + } - glo_set_current(d->pgraph.gl_context); + // If the condition is already satisfied, in which case do not attempt to stall + if (pfifo_stall_for_flip(d)) return; - qemu_mutex_lock(&d->pfifo.lock); - while (true) { - pfifo_run_puller(d); - qemu_cond_wait(&d->pfifo.puller_cond, &d->pfifo.lock); - - if (d->exiting) { - break; + if (atomic_read(&d->pgraph.waiting_for_nop)) { + // Wait for event + return; } } - qemu_mutex_unlock(&d->pfifo.lock); - - return NULL; } static void pfifo_run_pusher(NV2AState *d) @@ -239,11 +277,17 @@ static void pfifo_run_pusher(NV2AState *d) uint32_t *get_reg = &d->pfifo.regs[NV_PFIFO_CACHE1_GET]; uint32_t *put_reg = &d->pfifo.regs[NV_PFIFO_CACHE1_PUT]; - if (!GET_MASK(*push0, NV_PFIFO_CACHE1_PUSH0_ACCESS)) return; - if (!GET_MASK(*dma_push, NV_PFIFO_CACHE1_DMA_PUSH_ACCESS)) return; + if (!GET_MASK(*push0, NV_PFIFO_CACHE1_PUSH0_ACCESS)) { + return; + } + if (!GET_MASK(*dma_push, NV_PFIFO_CACHE1_DMA_PUSH_ACCESS)) { + return; + } /* suspended */ - if (GET_MASK(*dma_push, NV_PFIFO_CACHE1_DMA_PUSH_STATUS)) return; + if (GET_MASK(*dma_push, NV_PFIFO_CACHE1_DMA_PUSH_STATUS)) { + return; + } // TODO: should we become busy here?? // NV_PFIFO_CACHE1_DMA_PUSH_STATE _BUSY @@ -328,8 +372,6 @@ static void pfifo_run_pusher(NV2AState *d) if (*status & NV_PFIFO_CACHE1_STATUS_LOW_MARK) { // unset low mark *status &= ~NV_PFIFO_CACHE1_STATUS_LOW_MARK; - // signal puller - qemu_cond_signal(&d->pfifo.puller_cond); } if (method_type == NV_PFIFO_CACHE1_DMA_STATE_METHOD_TYPE_INC) { @@ -435,14 +477,24 @@ static void pfifo_run_pusher(NV2AState *d) } } -static void* pfifo_pusher_thread(void *arg) +static void *pfifo_thread(void *arg) { NV2AState *d = (NV2AState *)arg; + glo_set_current(d->pgraph.gl_context); qemu_mutex_lock(&d->pfifo.lock); while (true) { + d->pfifo.fifo_kick = false; + pfifo_run_pusher(d); - qemu_cond_wait(&d->pfifo.pusher_cond, &d->pfifo.lock); + pfifo_run_puller(d); + + if (!d->pfifo.fifo_kick) { + qemu_cond_broadcast(&d->pfifo.fifo_idle_cond); + + // Both the pusher and puller are waiting for some action + qemu_cond_wait(&d->pfifo.fifo_cond, &d->pfifo.lock); + } if (d->exiting) { break; diff --git a/hw/xbox/nv2a/nv2a_pgraph.c b/hw/xbox/nv2a/nv2a_pgraph.c index cb25a3a5cc..4045f33114 100644 --- a/hw/xbox/nv2a/nv2a_pgraph.c +++ b/hw/xbox/nv2a/nv2a_pgraph.c @@ -3,7 +3,7 @@ * * Copyright (c) 2012 espes * Copyright (c) 2015 Jannik Vogel - * Copyright (c) 2018 Matt Borgerson + * Copyright (c) 2018-2020 Matt Borgerson * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -387,12 +387,20 @@ void pgraph_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size) reg_log_write(NV_PGRAPH, addr, val); + qemu_mutex_lock(&d->pfifo.lock); // FIXME: Factor out fifo lock here qemu_mutex_lock(&pg->lock); switch (addr) { case NV_PGRAPH_INTR: pg->pending_interrupts &= ~val; - qemu_cond_broadcast(&pg->interrupt_cond); + + if (!(pg->pending_interrupts & NV_PGRAPH_INTR_ERROR)) { + pg->waiting_for_nop = false; + } + if (!(pg->pending_interrupts & NV_PGRAPH_INTR_CONTEXT_SWITCH)) { + pg->waiting_for_context_switch = false; + } + pfifo_kick(d); break; case NV_PGRAPH_INTR_EN: pg->enabled_interrupts = val; @@ -405,7 +413,7 @@ void pgraph_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size) NV_PGRAPH_SURFACE_READ_3D)+1) % GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_MODULO_3D) ); - qemu_cond_broadcast(&pg->flip_3d); + pfifo_kick(d); } break; case NV_PGRAPH_RDI_DATA: { @@ -459,11 +467,33 @@ void pgraph_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size) // events switch (addr) { case NV_PGRAPH_FIFO: - qemu_cond_broadcast(&pg->fifo_access_cond); + pfifo_kick(d); break; } qemu_mutex_unlock(&pg->lock); + qemu_mutex_unlock(&d->pfifo.lock); +} + +/* If NV097_FLIP_STALL was executed, check if the flip has completed. + * This will usually happen in the VSYNC interrupt handler. + */ +static int pgraph_is_flip_stall_complete(NV2AState *d) +{ + PGRAPHState *pg = &d->pgraph; + + NV2A_DPRINTF("flip stall read: %d, write: %d, modulo: %d\n", + GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_READ_3D), + GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_WRITE_3D), + GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_MODULO_3D)); + + uint32_t s = pg->regs[NV_PGRAPH_SURFACE]; + if (GET_MASK(s, NV_PGRAPH_SURFACE_READ_3D) + != GET_MASK(s, NV_PGRAPH_SURFACE_WRITE_3D)) { + return 1; + } + + return 0; } static void pgraph_method(NV2AState *d, @@ -669,16 +699,13 @@ static void pgraph_method(NV2AState *d, pg->regs[NV_PGRAPH_TRAPPED_DATA_LOW] = parameter; pg->regs[NV_PGRAPH_NSOURCE] = NV_PGRAPH_NSOURCE_NOTIFICATION; /* TODO: check this */ pg->pending_interrupts |= NV_PGRAPH_INTR_ERROR; + pg->waiting_for_nop = true; qemu_mutex_unlock(&pg->lock); qemu_mutex_lock_iothread(); update_irq(d); - qemu_mutex_lock(&pg->lock); qemu_mutex_unlock_iothread(); - - while (pg->pending_interrupts & NV_PGRAPH_INTR_ERROR) { - qemu_cond_wait(&pg->interrupt_cond, &pg->lock); - } + qemu_mutex_lock(&pg->lock); } break; @@ -686,7 +713,6 @@ static void pgraph_method(NV2AState *d, pgraph_update_surface(d, false, true, true); break; - case NV097_SET_FLIP_READ: SET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_READ_3D, parameter); @@ -719,21 +745,7 @@ static void pgraph_method(NV2AState *d, } case NV097_FLIP_STALL: pgraph_update_surface(d, false, true, true); - - while (true) { - NV2A_DPRINTF("flip stall read: %d, write: %d, modulo: %d\n", - GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_READ_3D), - GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_WRITE_3D), - GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_MODULO_3D)); - - uint32_t s = pg->regs[NV_PGRAPH_SURFACE]; - if (GET_MASK(s, NV_PGRAPH_SURFACE_READ_3D) - != GET_MASK(s, NV_PGRAPH_SURFACE_WRITE_3D)) { - break; - } - qemu_cond_wait(&pg->flip_3d, &pg->lock); - } - NV2A_DPRINTF("flip stall done\n"); + pg->waiting_for_flip = true; break; // TODO: these should be loading the dma objects from ramin here? @@ -2595,25 +2607,23 @@ static void pgraph_context_switch(NV2AState *d, unsigned int channel_id) assert(!(d->pgraph.regs[NV_PGRAPH_DEBUG_3] & NV_PGRAPH_DEBUG_3_HW_CONTEXT_SWITCH)); + d->pgraph.waiting_for_context_switch = true; qemu_mutex_unlock(&d->pgraph.lock); qemu_mutex_lock_iothread(); d->pgraph.pending_interrupts |= NV_PGRAPH_INTR_CONTEXT_SWITCH; update_irq(d); - - qemu_mutex_lock(&d->pgraph.lock); qemu_mutex_unlock_iothread(); - - // wait for the interrupt to be serviced - while (d->pgraph.pending_interrupts & NV_PGRAPH_INTR_CONTEXT_SWITCH) { - qemu_cond_wait(&d->pgraph.interrupt_cond, &d->pgraph.lock); - } + qemu_mutex_lock(&d->pgraph.lock); } } -static void pgraph_wait_fifo_access(NV2AState *d) { - while (!(d->pgraph.regs[NV_PGRAPH_FIFO] & NV_PGRAPH_FIFO_ACCESS)) { - qemu_cond_wait(&d->pgraph.fifo_access_cond, &d->pgraph.lock); - } +static void pgraph_wait_fifo_access(NV2AState *d) +{ + d->pgraph.waiting_for_fifo_access = true; +} + +static int pgraph_is_wait_for_access_complete(NV2AState *d) { + return !!(d->pgraph.regs[NV_PGRAPH_FIFO] & NV_PGRAPH_FIFO_ACCESS); } // static const char* nv2a_method_names[] = {}; @@ -2706,9 +2716,6 @@ static void pgraph_init(NV2AState *d) PGRAPHState *pg = &d->pgraph; qemu_mutex_init(&pg->lock); - qemu_cond_init(&pg->interrupt_cond); - qemu_cond_init(&pg->fifo_access_cond); - qemu_cond_init(&pg->flip_3d); /* fire up opengl */ @@ -2785,9 +2792,6 @@ static void pgraph_init(NV2AState *d) static void pgraph_destroy(PGRAPHState *pg) { qemu_mutex_destroy(&pg->lock); - qemu_cond_destroy(&pg->interrupt_cond); - qemu_cond_destroy(&pg->fifo_access_cond); - qemu_cond_destroy(&pg->flip_3d); glo_set_current(pg->gl_context); diff --git a/hw/xbox/nv2a/nv2a_user.c b/hw/xbox/nv2a/nv2a_user.c index cd488fdbd4..501f1f8539 100644 --- a/hw/xbox/nv2a/nv2a_user.c +++ b/hw/xbox/nv2a/nv2a_user.c @@ -102,9 +102,7 @@ void user_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size) break; } - // kick pfifo - qemu_cond_broadcast(&d->pfifo.pusher_cond); - qemu_cond_broadcast(&d->pfifo.puller_cond); + pfifo_kick(d); } else { /* ramfc */