Interrupts fire on the right 'thread', ringbuffer work,

This commit is contained in:
Ben Vanik 2013-10-19 11:50:01 -07:00
parent 6db8c6c961
commit 90bc6ad1a8
10 changed files with 145 additions and 48 deletions

View File

@ -11,6 +11,7 @@
#include <xenia/cpu/jit.h>
#include <xenia/cpu/ppc/disasm.h>
#include <xenia/cpu/ppc/state.h>
#include <xenia/gpu/graphics_system.h>
@ -67,6 +68,7 @@ Processor::~Processor() {
}
modules_.clear();
xe_memory_heap_free(memory_, interrupt_thread_block_, 2048);
DeallocThread(interrupt_thread_state_);
xe_mutex_free(interrupt_thread_lock_);
@ -107,6 +109,9 @@ int Processor::Setup() {
interrupt_thread_lock_ = xe_mutex_alloc(10000);
interrupt_thread_state_ = AllocThread(16 * 1024, 0, 0);
interrupt_thread_block_ = xe_memory_heap_alloc(
memory_, 0, 2048, 0);
interrupt_thread_state_->ppc_state()->r[13] = interrupt_thread_block_;
sym_table_ = new SymbolTable();
@ -259,11 +264,19 @@ uint64_t Processor::Execute(ThreadState* thread_state, uint32_t address,
return ppc_state->r[3];
}
uint64_t Processor::ExecuteInterrupt(uint32_t address,
uint64_t Processor::ExecuteInterrupt(uint32_t cpu,
uint32_t address,
uint64_t arg0, uint64_t arg1) {
// Acquire lock on interrupt thread (we can only dispatch one at a time).
xe_mutex_lock(interrupt_thread_lock_);
// Set 0x10C(r13) to the current CPU ID.
uint8_t* p = xe_memory_addr(memory_, 0);
XESETUINT8BE(p + interrupt_thread_block_ + 0x10C, cpu);
// Execute interrupt.
uint64_t result = Execute(interrupt_thread_state_, address, arg0, arg1);
xe_mutex_unlock(interrupt_thread_lock_);
return result;
}

View File

@ -62,7 +62,8 @@ public:
uint64_t Execute(ThreadState* thread_state, uint32_t address,
uint64_t arg0, uint64_t arg1);
uint64_t ExecuteInterrupt(uint32_t address, uint64_t arg0, uint64_t arg1);
uint64_t ExecuteInterrupt(
uint32_t cpu, uint32_t address, uint64_t arg0, uint64_t arg1);
sdb::FunctionSymbol* GetFunction(uint32_t address);
void* GetFunctionPointer(uint32_t address);
@ -80,6 +81,7 @@ private:
xe_mutex_t* interrupt_thread_lock_;
ThreadState* interrupt_thread_state_;
uint32_t interrupt_thread_block_;
};

View File

@ -26,27 +26,23 @@ ThreadState::ThreadState(
stack_address_ = xe_memory_heap_alloc(memory_, 0, stack_size, 0);
xe_zero_struct(&ppc_state_, sizeof(ppc_state_));
// Allocate with 64b alignment.
ppc_state_ = (xe_ppc_state_t*)xe_malloc_aligned(sizeof(xe_ppc_state_t));
XEASSERT(((uint64_t)ppc_state_ & 0xF) == 0);
xe_zero_struct(ppc_state_, sizeof(xe_ppc_state_t));
// Stash pointers to common structures that callbacks may need.
ppc_state_.membase = xe_memory_addr(memory_, 0);
ppc_state_.processor = processor;
ppc_state_.thread_state = this;
ppc_state_->membase = xe_memory_addr(memory_, 0);
ppc_state_->processor = processor;
ppc_state_->thread_state = this;
// Set initial registers.
ppc_state_.r[1] = stack_address_ + stack_size;
ppc_state_.r[13] = thread_state_address_;
ppc_state_->r[1] = stack_address_ + stack_size;
ppc_state_->r[13] = thread_state_address_;
}
ThreadState::~ThreadState() {
xe_free_aligned(ppc_state_);
xe_memory_heap_free(memory_, stack_address_, 0);
xe_memory_release(memory_);
}
uint32_t ThreadState::thread_id() const {
return thread_id_;
}
xe_ppc_state_t* ThreadState::ppc_state() {
return &ppc_state_;
}

View File

@ -29,9 +29,8 @@ public:
uint32_t thread_id);
~ThreadState();
uint32_t thread_id() const;
xe_ppc_state_t* ppc_state();
uint32_t thread_id() const { return thread_id_; }
xe_ppc_state_t* ppc_state() const { return ppc_state_; }
private:
uint32_t stack_size_;
@ -42,7 +41,8 @@ private:
uint32_t thread_state_address_;
uint32_t thread_id_;
xe_ppc_state_t ppc_state_;
// NOTE: must be 64b aligned for SSE ops.
xe_ppc_state_t* ppc_state_;
};

View File

@ -25,7 +25,7 @@ GraphicsSystem::GraphicsSystem(const CreationParams* params) :
last_interrupt_time_(0), swap_pending_(false) {
memory_ = xe_memory_retain(params->memory);
worker_ = new RingBufferWorker(memory_);
worker_ = new RingBufferWorker(this, memory_);
// Set during Initialize();
driver_ = 0;
@ -160,12 +160,17 @@ void GraphicsSystem::WriteRegister(uint32_t r, uint64_t value) {
regs->values[r].u32 = (uint32_t)value;
}
void GraphicsSystem::DispatchInterruptCallback() {
void GraphicsSystem::DispatchInterruptCallback(uint32_t cpu) {
// Pick a CPU, if needed. We're going to guess 2. Because.
if (cpu == 0xFFFFFFFF) {
cpu = 2;
}
// NOTE: we may be executing in some random thread.
last_interrupt_time_ = xe_pal_now();
if (!interrupt_callback_) {
return;
}
processor_->ExecuteInterrupt(
interrupt_callback_, 0, interrupt_callback_data_);
cpu, interrupt_callback_, 1, interrupt_callback_data_);
}

View File

@ -52,7 +52,7 @@ public:
virtual uint64_t ReadRegister(uint32_t r);
virtual void WriteRegister(uint32_t r, uint64_t value);
void DispatchInterruptCallback();
void DispatchInterruptCallback(uint32_t cpu = 0xFFFFFFFF);
bool swap_pending() const { return swap_pending_; }
void set_swap_pending(bool value) { swap_pending_ = value; }

View File

@ -10,6 +10,7 @@
#include <xenia/gpu/ring_buffer_worker.h>
#include <xenia/gpu/graphics_driver.h>
#include <xenia/gpu/graphics_system.h>
#include <xenia/gpu/xenos/packets.h>
#include <xenia/gpu/xenos/registers.h>
@ -19,8 +20,9 @@ using namespace xe::gpu;
using namespace xe::gpu::xenos;
RingBufferWorker::RingBufferWorker(xe_memory_ref memory) :
memory_(memory), driver_(0) {
RingBufferWorker::RingBufferWorker(
GraphicsSystem* graphics_system, xe_memory_ref memory) :
graphics_system_(graphics_system), memory_(memory), driver_(0) {
write_ptr_index_event_ = CreateEvent(
NULL, FALSE, FALSE, NULL);
@ -31,6 +33,10 @@ RingBufferWorker::RingBufferWorker(xe_memory_ref memory) :
read_ptr_writeback_ptr_ = 0;
write_ptr_index_ = 0;
write_ptr_max_index_ = 0;
LARGE_INTEGER perf_counter;
QueryPerformanceCounter(&perf_counter);
counter_base_ = perf_counter.QuadPart;
}
RingBufferWorker::~RingBufferWorker() {
@ -38,6 +44,12 @@ RingBufferWorker::~RingBufferWorker() {
CloseHandle(write_ptr_index_event_);
}
uint64_t RingBufferWorker::GetCounter() {
LARGE_INTEGER perf_counter;
QueryPerformanceCounter(&perf_counter);
return perf_counter.QuadPart - counter_base_;
}
void RingBufferWorker::Initialize(GraphicsDriver* driver,
uint32_t ptr, uint32_t page_count) {
driver_ = driver;
@ -268,6 +280,21 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
ADVANCE_PTR(count);
break;
case PM4_INTERRUPT:
// generate interrupt from the command stream
{
XELOGGPU("[%.8X] Packet(%.8X): PM4_INTERRUPT",
packet_ptr, packet);
LOG_DATA(count);
uint32_t cpu_mask = READ_AND_ADVANCE_PTR();
for (int n = 0; n < 6; n++) {
if (cpu_mask & (1 << n)) {
graphics_system_->DispatchInterruptCallback(n);
}
}
}
break;
case PM4_INDIRECT_BUFFER:
// indirect buffer dispatch
{
@ -302,8 +329,8 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
value = regs->values[poll_reg_addr].u32;
}
switch (wait_info & 0x7) {
case 0x0: // Always.
matched = true;
case 0x0: // Never.
matched = false;
break;
case 0x1: // Less than reference.
matched = (value & mask) < ref;
@ -323,14 +350,17 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
case 0x6: // Greater than reference.
matched = (value & mask) > ref;
break;
default:
XELOGE("Unsupported wait comparison type!");
XEASSERTALWAYS();
case 0x7: // Always
matched = true;
break;
}
if (!matched) {
// Wait.
SwitchToThread();
if (wait >= 0x100) {
Sleep(wait / 0x100);
} else {
SwitchToThread();
}
}
} while (!matched);
}
@ -375,7 +405,6 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
uint32_t poll_reg_addr = READ_AND_ADVANCE_PTR();
uint32_t ref = READ_AND_ADVANCE_PTR();
uint32_t mask = READ_AND_ADVANCE_PTR();
uint32_t wait = READ_AND_ADVANCE_PTR();
uint32_t write_reg_addr = READ_AND_ADVANCE_PTR();
uint32_t write_data = READ_AND_ADVANCE_PTR();
uint32_t value;
@ -389,8 +418,8 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
}
bool matched = false;
switch (wait_info & 0x7) {
case 0x0: // Always.
matched = true;
case 0x0: // Never.
matched = false;
break;
case 0x1: // Less than reference.
matched = (value & mask) < ref;
@ -410,9 +439,8 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
case 0x6: // Greater than reference.
matched = (value & mask) > ref;
break;
default:
XELOGE("Unsupported wait comparison type!");
XEASSERTALWAYS();
case 0x7: // Always
matched = true;
break;
}
if (matched) {
@ -441,12 +469,22 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
XELOGGPU("[%.8X] Packet(%.8X): PM4_EVENT_WRITE_SHD",
packet_ptr, packet);
LOG_DATA(count);
uint32_t d0 = READ_AND_ADVANCE_PTR(); // 3?
XEASSERT(d0 == 0x3);
uint32_t d1 = READ_AND_ADVANCE_PTR(); // ptr
uint32_t d2 = READ_AND_ADVANCE_PTR(); // value?
if (!(d1 & 0xC0000000)) {
XESETUINT32BE(p + TRANSLATE_ADDR(d1), d2);
uint32_t initiator = READ_AND_ADVANCE_PTR();
uint32_t address = READ_AND_ADVANCE_PTR();
uint32_t value = READ_AND_ADVANCE_PTR();
// Writeback initiator.
WriteRegister(XE_GPU_REG_VGT_EVENT_INITIATOR, initiator & 0x1F);
uint32_t data_value;
if ((initiator >> 31) & 0x1) {
// Write counter (GPU clock counter?).
// TODO(benvanik): 64-bit write?
data_value = (uint32_t)GetCounter();
} else {
// Write value.
data_value = value;
}
if (!(address & 0xC0000000)) {
XESETUINT32BE(p + TRANSLATE_ADDR(address), data_value);
} else {
// TODO(benvanik): read up on PM4_EVENT_WRITE_SHD.
// No clue. Maybe relative write based on a register base?
@ -543,6 +581,35 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
}
break;
case PM4_SET_BIN_MASK_LO:
{
uint32_t value = READ_AND_ADVANCE_PTR();
XELOGGPU("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_LO = %.8X",
packet_ptr, packet, value);
}
break;
case PM4_SET_BIN_MASK_HI:
{
uint32_t value = READ_AND_ADVANCE_PTR();
XELOGGPU("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_HI = %.8X",
packet_ptr, packet, value);
}
break;
case PM4_SET_BIN_SELECT_LO:
{
uint32_t value = READ_AND_ADVANCE_PTR();
XELOGGPU("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_LO = %.8X",
packet_ptr, packet, value);
}
break;
case PM4_SET_BIN_SELECT_HI:
{
uint32_t value = READ_AND_ADVANCE_PTR();
XELOGGPU("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_HI = %.8X",
packet_ptr, packet, value);
}
break;
default:
XELOGGPU("[%.8X] Packet(%.8X): unknown!",
packet_ptr, packet);

View File

@ -19,14 +19,17 @@ namespace xe {
namespace gpu {
class GraphicsDriver;
class GraphicsSystem;
class RingBufferWorker {
public:
RingBufferWorker(xe_memory_ref memory);
RingBufferWorker(GraphicsSystem* graphics_system, xe_memory_ref memory);
virtual ~RingBufferWorker();
xe_memory_ref memory();
uint64_t GetCounter();
void Initialize(GraphicsDriver* driver,
uint32_t ptr, uint32_t page_count);
void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size);
@ -49,9 +52,11 @@ private:
void WriteRegister(uint32_t index, uint32_t value);
protected:
xe_memory_ref memory_;
xe_memory_ref memory_;
GraphicsSystem* graphics_system_;
GraphicsDriver* driver_;
GraphicsDriver* driver_;
uint64_t counter_base_;
uint32_t primary_buffer_ptr_;
uint32_t primary_buffer_size_;

View File

@ -67,9 +67,16 @@ enum Type3Opcode {
PM4_SET_BIN_SELECT = 0x51, // sets the 64-bit BIN_SELECT register in the PFP
PM4_CONTEXT_UPDATE = 0x5e, // updates the current context, if needed
PM4_INTERRUPT = 0x40, // generate interrupt from the command stream
PM4_INTERRUPT = 0x54, // generate interrupt from the command stream
PM4_IM_STORE = 0x2c, // copy sequencer instruction memory to system memory
// Tiled rendering:
// https://www.google.com/patents/US20060055701
PM4_SET_BIN_MASK_LO = 0x60,
PM4_SET_BIN_MASK_HI = 0x61,
PM4_SET_BIN_SELECT_LO = 0x62,
PM4_SET_BIN_SELECT_HI = 0x63,
};

View File

@ -98,6 +98,8 @@ XE_GPU_REGISTER(0x2182, dword, SQ_INTERPOLATOR_CNTL)
XE_GPU_REGISTER(0x2183, dword, SQ_WRAPPING_0)
XE_GPU_REGISTER(0x2184, dword, SQ_WRAPPING_1)
XE_GPU_REGISTER(0x21F9, dword, VGT_EVENT_INITIATOR)
XE_GPU_REGISTER(0x2200, dword, RB_DEPTHCONTROL)
XE_GPU_REGISTER(0x2201, dword, RB_BLENDCONTROL_0)
XE_GPU_REGISTER(0x2202, dword, RB_COLORCONTROL)