Basic command buffer processing.

This commit is contained in:
Ben Vanik 2013-06-01 21:49:14 -07:00
parent ba16a0d847
commit e00e665a09
8 changed files with 326 additions and 46 deletions

View File

@ -365,6 +365,8 @@ XEEMITTER(mulli, 0x1C000000, D )(X64Emitter& e, X86Compiler& c, InstrDat
GpVar v_lo(c.newGpVar()); GpVar v_lo(c.newGpVar());
GpVar v_hi(c.newGpVar()); GpVar v_hi(c.newGpVar());
c.alloc(v_lo, rax);
c.alloc(v_hi, rdx);
c.mov(v_lo, e.get_uint64(XEEXTS16(i.D.DS))); c.mov(v_lo, e.get_uint64(XEEXTS16(i.D.DS)));
c.mul(v_hi, v_lo, e.gpr_value(i.D.RA)); c.mul(v_hi, v_lo, e.gpr_value(i.D.RA));
e.update_gpr_value(i.D.RT, v_lo); e.update_gpr_value(i.D.RT, v_lo);

View File

@ -0,0 +1,49 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2013 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_COMMAND_BUFFER_H_
#define XENIA_GPU_COMMAND_BUFFER_H_
#include <xenia/core.h>
namespace xe {
namespace gpu {
// TODO(benvanik): command packet types.
class CommandBuffer {
public:
CommandBuffer(xe_memory_ref memory) {
memory_ = xe_memory_retain(memory);
}
virtual ~CommandBuffer() {
xe_memory_release(memory_);
}
xe_memory_ref memory() {
return memory_;
}
// TODO(benvanik): command methods.
virtual void Foo() = 0;
protected:
xe_memory_ref memory_;
};
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_COMMAND_BUFFER_H_

View File

@ -9,16 +9,18 @@
#include <xenia/gpu/ring_buffer_worker.h> #include <xenia/gpu/ring_buffer_worker.h>
#include <xenia/gpu/xenos/packets.h>
#include <xenia/gpu/xenos/registers.h>
using namespace xe; using namespace xe;
using namespace xe::gpu; using namespace xe::gpu;
using namespace xe::gpu::xenos;
RingBufferWorker::RingBufferWorker(xe_memory_ref memory) : RingBufferWorker::RingBufferWorker(xe_memory_ref memory) :
memory_(memory) { memory_(memory) {
running_ = true; running_ = true;
read_ptr_index_event_ = CreateEvent(
NULL, FALSE, FALSE, NULL);
write_ptr_index_event_ = CreateEvent( write_ptr_index_event_ = CreateEvent(
NULL, FALSE, FALSE, NULL); NULL, FALSE, FALSE, NULL);
@ -79,42 +81,13 @@ void RingBufferWorker::ThreadStart() {
// Process the new commands. // Process the new commands.
XELOGGPU("Ring buffer thread work"); XELOGGPU("Ring buffer thread work");
#define READ_UINT32() \ // TODO(benvanik): handle wrapping around
XEGETUINT32BE(p + primary_buffer_ptr_ + read_ptr_index_ * 4); \ // read_ptr_index_ = (read_ptr_index_ + 1) % (primary_buffer_size_ / 4);
read_ptr_index_ = (read_ptr_index_ + 1) % (primary_buffer_size_ / 4); XEASSERT(write_ptr_index_ >= read_ptr_index_);
uint32_t length = write_ptr_index_ - read_ptr_index_;
while (true) { if (length) {
uint32_t command = READ_UINT32(); ExecuteSegment(primary_buffer_ptr_ + read_ptr_index_ * 4, length);
read_ptr_index_ = write_ptr_index_;
switch (command) {
case 0xC0114800:
{
// Init packet.
// Will have 18-19 ops after it. Maybe.
XELOGGPU("Command(%.8X): init packet", command);
for (int n = 0; n < 18; n++) {
READ_UINT32();
}
}
break;
case 0xC0013F00:
{
// Kick segment.
uint32_t segment_ptr = READ_UINT32();
uint32_t length = READ_UINT32();
XELOGGPU("Command(%.8X): kick segment %.8X (%db)",
command, segment_ptr, length * 4);
ExecuteSegment(segment_ptr, length);
}
break;
default:
XELOGGPU("Command(%.8X): unknown primary buffer command", command);
break;
}
if (read_ptr_index_ == write_ptr_index_) {
break;
}
} }
// TODO(benvanik): use read_ptr_update_freq_ and only issue after moving // TODO(benvanik): use read_ptr_update_freq_ and only issue after moving
@ -122,7 +95,6 @@ void RingBufferWorker::ThreadStart() {
if (read_ptr_writeback_ptr_) { if (read_ptr_writeback_ptr_) {
XESETUINT32BE(p + read_ptr_writeback_ptr_, read_ptr_index_); XESETUINT32BE(p + read_ptr_writeback_ptr_, read_ptr_index_);
} }
SetEvent(read_ptr_index_event_);
} }
} }
@ -130,13 +102,143 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
uint8_t* p = xe_memory_addr(memory_); uint8_t* p = xe_memory_addr(memory_);
// Adjust pointer base. // Adjust pointer base.
ptr += (primary_buffer_ptr_ & ~0x1FFFFFFF); ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (ptr & 0x1FFFFFFF);
XELOGGPU("CommandList(%.8X): executing %d commands", ptr, length); #define LOG_DATA(count) \
for (uint32_t __m = 0; __m < count; __m++) { \
XELOGGPU(" %.8X", XEGETUINT32BE(packet_base + 1 * 4 + __m * 4)); \
}
XELOGGPU("CommandList(%.8X): executing %dw", ptr, length);
// Execute commands! // Execute commands!
for (uint32_t n = 0; n < length; n++) { for (uint32_t n = 0; n < length;) {
uint32_t command = XEGETUINT32BE(p + ptr + n * 4); const uint8_t* packet_base = p + ptr + n * 4;
XELOGGPU(" Command(%.8X)", command); const uint32_t packet = XEGETUINT32BE(packet_base);
const uint32_t packet_type = packet >> 30;
switch (packet_type) {
case 0x00:
{
// Type-0 packet.
// Write count registers in sequence to the registers starting at
// (base_index << 2).
XELOGGPU("Packet(%.8X): set registers:", packet);
uint32_t count = ((packet >> 16) & 0x3FFF) + 1;
uint32_t base_index = (packet & 0xFFFF);
for (uint32_t m = 0; m < count; m++) {
uint32_t reg_data = XEGETUINT32BE(packet_base + 1 * 4 + m * 4);
XELOGGPU(" %.4X <- %.8X", base_index + m, reg_data);
// TODO(benvanik): process register writes.
}
n += 1 + count;
}
break;
case 0x01:
{
// Type-1 packet.
// Contains two registers of data. Type-0 should be more common.
XELOGGPU("Packet(%.8X): set registers:", packet);
uint32_t reg_index_1 = packet & 0x7FF;
uint32_t reg_index_2 = (packet >> 11) & 0x7FF;
uint32_t reg_data_1 = XEGETUINT32BE(packet_base + 1 * 4);
uint32_t reg_data_2 = XEGETUINT32BE(packet_base + 2 * 4);
XELOGGPU(" %.4X <- %.8X", reg_index_1, reg_data_1);
XELOGGPU(" %.4X <- %.8X", reg_index_2, reg_data_2);
// TODO(benvanik): process register writes.
n += 1 + 2;
}
break;
case 0x02:
// Type-2 packet.
// No-op. Do nothing.
break;
case 0x03:
{
// Type-3 packet.
uint32_t count = ((packet >> 16) & 0x3FFF) + 1;
uint32_t opcode = (packet >> 8) & 0x7F;
// & 1 == predicate, maybe?
switch (opcode) {
case PM4_ME_INIT:
// initialize CP's micro-engine
XELOGGPU("Packet(%.8X): PM4_ME_INIT", packet);
LOG_DATA(count);
break;
case PM4_NOP:
// skip N 32-bit words to get to the next packet
// No-op, ignore some data.
XELOGGPU("Packet(%.8X): PM4_NOP", packet);
LOG_DATA(count);
break;
case PM4_INDIRECT_BUFFER:
// indirect buffer dispatch
{
uint32_t list_ptr = XEGETUINT32BE(packet_base + 1 * 4);
uint32_t list_length = XEGETUINT32BE(packet_base + 2 * 4);
XELOGGPU("Packet(%.8X): PM4_INDIRECT_BUFFER %.8X (%dw)",
packet, list_ptr, list_length);
ExecuteSegment(list_ptr, list_length);
}
break;
case PM4_WAIT_REG_MEM:
// wait until a register or memory location is a specific value
XELOGGPU("Packet(%.8X): PM4_WAIT_REG_MEM", packet);
LOG_DATA(count);
break;
case PM4_REG_RMW:
// register read/modify/write
// ? (used during shader upload and edram setup)
XELOGGPU("Packet(%.8X): PM4_REG_RMW", packet);
LOG_DATA(count);
break;
case PM4_EVENT_WRITE_SHD:
// generate a VS|PS_done event
{
XELOGGPU("Packet(%.8X): PM4_EVENT_WRITE_SHD", packet);
LOG_DATA(count);
// 3?
uint32_t d0 = XEGETUINT32BE(packet_base + 1 * 4);
// ptr
uint32_t d1 = XEGETUINT32BE(packet_base + 2 * 4);
// value?
uint32_t d2 = XEGETUINT32BE(packet_base + 3 * 4);
XESETUINT32BE(
p + d1 + (primary_buffer_ptr_ & ~0x1FFFFFFF), d2);
}
break;
case PM4_DRAW_INDX_2:
// draw using supplied indices in packet
XELOGGPU("Packet(%.8X): PM4_DRAW_INDX_2", packet);
LOG_DATA(count);
break;
case PM4_IM_LOAD_IMMEDIATE:
// load sequencer instruction memory (code embedded in packet)
XELOGGPU("Packet(%.8X): PM4_IM_LOAD_IMMEDIATE", packet);
LOG_DATA(count);
break;
case PM4_INVALIDATE_STATE:
// selective invalidation of state pointers
XELOGGPU("Packet(%.8X): PM4_INVALIDATE_STATE", packet);
LOG_DATA(count);
break;
default:
XELOGGPU("Packet(%.8X): unknown!", packet);
LOG_DATA(count);
break;
}
n += 1 + count;
}
break;
}
} }
} }

View File

@ -44,7 +44,6 @@ protected:
uint32_t primary_buffer_ptr_; uint32_t primary_buffer_ptr_;
uint32_t primary_buffer_size_; uint32_t primary_buffer_size_;
HANDLE read_ptr_index_event_;
uint32_t read_ptr_index_; uint32_t read_ptr_index_;
uint32_t read_ptr_update_freq_; uint32_t read_ptr_update_freq_;
uint32_t read_ptr_writeback_ptr_; uint32_t read_ptr_writeback_ptr_;

View File

@ -1,6 +1,7 @@
# Copyright 2013 Ben Vanik. All Rights Reserved. # Copyright 2013 Ben Vanik. All Rights Reserved.
{ {
'sources': [ 'sources': [
'command_buffer.h',
'gpu-private.h', 'gpu-private.h',
'gpu.cc', 'gpu.cc',
'gpu.h', 'gpu.h',
@ -12,6 +13,7 @@
'includes': [ 'includes': [
'nop/sources.gypi', 'nop/sources.gypi',
'xenos/sources.gypi',
], ],
'conditions': [ 'conditions': [

View File

@ -0,0 +1,81 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2013 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_XENOS_PACKETS_H_
#define XENIA_GPU_XENOS_PACKETS_H_
#include <xenia/core.h>
namespace xe {
namespace gpu {
namespace xenos {
// Opcodes (IT_OPCODE) for Type-3 commands in the ringbuffer.
// https://github.com/freedreno/amd-gpu/blob/master/include/api/gsl_pm4types.h
// Not sure if all of these are used.
enum Type3Opcode {
PM4_ME_INIT = 0x48, // initialize CP's micro-engine
PM4_NOP = 0x10, // skip N 32-bit words to get to the next packet
PM4_INDIRECT_BUFFER = 0x3f, // indirect buffer dispatch. prefetch parser uses this packet type to determine whether to pre-fetch the IB
PM4_INDIRECT_BUFFER_PFD = 0x37, // indirect buffer dispatch. same as IB, but init is pipelined
PM4_WAIT_FOR_IDLE = 0x26, // wait for the IDLE state of the engine
PM4_WAIT_REG_MEM = 0x3c, // wait until a register or memory location is a specific value
PM4_WAIT_REG_EQ = 0x52, // wait until a register location is equal to a specific value
PM4_WAT_REG_GTE = 0x53, // wait until a register location is >= a specific value
PM4_WAIT_UNTIL_READ = 0x5c, // wait until a read completes
PM4_WAIT_IB_PFD_COMPLETE = 0x5d, // wait until all base/size writes from an IB_PFD packet have completed
PM4_REG_RMW = 0x21, // register read/modify/write
PM4_REG_TO_MEM = 0x3e, // reads register in chip and writes to memory
PM4_MEM_WRITE = 0x3d, // write N 32-bit words to memory
PM4_MEM_WRITE_CNTR = 0x4f, // write CP_PROG_COUNTER value to memory
PM4_COND_EXEC = 0x44, // conditional execution of a sequence of packets
PM4_COND_WRITE = 0x45, // conditional write to memory or register
PM4_EVENT_WRITE = 0x46, // generate an event that creates a write to memory when completed
PM4_EVENT_WRITE_SHD = 0x58, // generate a VS|PS_done event
PM4_EVENT_WRITE_CFL = 0x59, // generate a cache flush done event
PM4_EVENT_WRITE_ZPD = 0x5b, // generate a z_pass done event
PM4_DRAW_INDX = 0x22, // initiate fetch of index buffer and draw
PM4_DRAW_INDX_2 = 0x36, // draw using supplied indices in packet
PM4_DRAW_INDX_BIN = 0x34, // initiate fetch of index buffer and binIDs and draw
PM4_DRAW_INDX_2_BIN = 0x35, // initiate fetch of bin IDs and draw using supplied indices
PM4_VIZ_QUERY = 0x23, // begin/end initiator for viz query extent processing
PM4_SET_STATE = 0x25, // fetch state sub-blocks and initiate shader code DMAs
PM4_SET_CONSTANT = 0x2d, // load constant into chip and to memory
PM4_IM_LOAD = 0x27, // load sequencer instruction memory (pointer-based)
PM4_IM_LOAD_IMMEDIATE = 0x2b, // load sequencer instruction memory (code embedded in packet)
PM4_LOAD_CONSTANT_CONTEXT = 0x2e, // load constants from a location in memory
PM4_INVALIDATE_STATE = 0x3b, // selective invalidation of state pointers
PM4_SET_SHADER_BASES = 0x4A, // dynamically changes shader instruction memory partition
PM4_SET_BIN_BASE_OFFSET = 0x4B, // program an offset that will added to the BIN_BASE value of the 3D_DRAW_INDX_BIN packet
PM4_SET_BIN_MASK = 0x50, // sets the 64-bit BIN_MASK register in the PFP
PM4_SET_BIN_SELECT = 0x51, // sets the 64-bit BIN_SELECT register in the PFP
PM4_CONTEXT_UPDATE = 0x5e, // updates the current context, if needed
PM4_INTERRUPT = 0x40, // generate interrupt from the command stream
PM4_IM_STORE = 0x2c, // copy sequencer instruction memory to system memory
};
} // namespace xenos
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_XENOS_PACKETS_H_

View File

@ -0,0 +1,38 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2013 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_XENOS_REGISTERS_H_
#define XENIA_GPU_XENOS_REGISTERS_H_
#include <xenia/core.h>
namespace xe {
namespace gpu {
namespace xenos {
union RegisterValue {
uint32_t uint_value;
float float_value;
};
struct RegisterFile {
// TODO(benvanik): figure out the actual number.
RegisterValue registers[0xFFFF];
};
} // namespace xenos
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_XENOS_REGISTERS_H_

View File

@ -0,0 +1,7 @@
# Copyright 2013 Ben Vanik. All Rights Reserved.
{
'sources': [
'packets.h',
'registers.h',
],
}