diff --git a/src/xenia/cpu/x64/x64_emit_alu.cc b/src/xenia/cpu/x64/x64_emit_alu.cc index 923b4bdbd..eb372af7f 100644 --- a/src/xenia/cpu/x64/x64_emit_alu.cc +++ b/src/xenia/cpu/x64/x64_emit_alu.cc @@ -365,6 +365,8 @@ XEEMITTER(mulli, 0x1C000000, D )(X64Emitter& e, X86Compiler& c, InstrDat GpVar v_lo(c.newGpVar()); GpVar v_hi(c.newGpVar()); + c.alloc(v_lo, rax); + c.alloc(v_hi, rdx); c.mov(v_lo, e.get_uint64(XEEXTS16(i.D.DS))); c.mul(v_hi, v_lo, e.gpr_value(i.D.RA)); e.update_gpr_value(i.D.RT, v_lo); diff --git a/src/xenia/gpu/command_buffer.h b/src/xenia/gpu/command_buffer.h new file mode 100644 index 000000000..b601505f5 --- /dev/null +++ b/src/xenia/gpu/command_buffer.h @@ -0,0 +1,49 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2013 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_COMMAND_BUFFER_H_ +#define XENIA_GPU_COMMAND_BUFFER_H_ + +#include + + +namespace xe { +namespace gpu { + + +// TODO(benvanik): command packet types. + + +class CommandBuffer { +public: + CommandBuffer(xe_memory_ref memory) { + memory_ = xe_memory_retain(memory); + } + + virtual ~CommandBuffer() { + xe_memory_release(memory_); + } + + xe_memory_ref memory() { + return memory_; + } + + // TODO(benvanik): command methods. + virtual void Foo() = 0; + +protected: + xe_memory_ref memory_; +}; + + +} // namespace gpu +} // namespace xe + + +#endif // XENIA_GPU_COMMAND_BUFFER_H_ diff --git a/src/xenia/gpu/ring_buffer_worker.cc b/src/xenia/gpu/ring_buffer_worker.cc index 57d160cec..fcccaf657 100644 --- a/src/xenia/gpu/ring_buffer_worker.cc +++ b/src/xenia/gpu/ring_buffer_worker.cc @@ -9,16 +9,18 @@ #include +#include +#include + using namespace xe; using namespace xe::gpu; +using namespace xe::gpu::xenos; RingBufferWorker::RingBufferWorker(xe_memory_ref memory) : memory_(memory) { running_ = true; - read_ptr_index_event_ = CreateEvent( - NULL, FALSE, FALSE, NULL); write_ptr_index_event_ = CreateEvent( NULL, FALSE, FALSE, NULL); @@ -79,42 +81,13 @@ void RingBufferWorker::ThreadStart() { // Process the new commands. XELOGGPU("Ring buffer thread work"); - #define READ_UINT32() \ - XEGETUINT32BE(p + primary_buffer_ptr_ + read_ptr_index_ * 4); \ - read_ptr_index_ = (read_ptr_index_ + 1) % (primary_buffer_size_ / 4); - - while (true) { - uint32_t command = READ_UINT32(); - - switch (command) { - case 0xC0114800: - { - // Init packet. - // Will have 18-19 ops after it. Maybe. - XELOGGPU("Command(%.8X): init packet", command); - for (int n = 0; n < 18; n++) { - READ_UINT32(); - } - } - break; - case 0xC0013F00: - { - // Kick segment. - uint32_t segment_ptr = READ_UINT32(); - uint32_t length = READ_UINT32(); - XELOGGPU("Command(%.8X): kick segment %.8X (%db)", - command, segment_ptr, length * 4); - ExecuteSegment(segment_ptr, length); - } - break; - default: - XELOGGPU("Command(%.8X): unknown primary buffer command", command); - break; - } - - if (read_ptr_index_ == write_ptr_index_) { - break; - } + // TODO(benvanik): handle wrapping around + // read_ptr_index_ = (read_ptr_index_ + 1) % (primary_buffer_size_ / 4); + XEASSERT(write_ptr_index_ >= read_ptr_index_); + uint32_t length = write_ptr_index_ - read_ptr_index_; + if (length) { + ExecuteSegment(primary_buffer_ptr_ + read_ptr_index_ * 4, length); + read_ptr_index_ = write_ptr_index_; } // TODO(benvanik): use read_ptr_update_freq_ and only issue after moving @@ -122,7 +95,6 @@ void RingBufferWorker::ThreadStart() { if (read_ptr_writeback_ptr_) { XESETUINT32BE(p + read_ptr_writeback_ptr_, read_ptr_index_); } - SetEvent(read_ptr_index_event_); } } @@ -130,13 +102,143 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) { uint8_t* p = xe_memory_addr(memory_); // Adjust pointer base. - ptr += (primary_buffer_ptr_ & ~0x1FFFFFFF); + ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (ptr & 0x1FFFFFFF); - XELOGGPU("CommandList(%.8X): executing %d commands", ptr, length); +#define LOG_DATA(count) \ + for (uint32_t __m = 0; __m < count; __m++) { \ + XELOGGPU(" %.8X", XEGETUINT32BE(packet_base + 1 * 4 + __m * 4)); \ + } + + XELOGGPU("CommandList(%.8X): executing %dw", ptr, length); // Execute commands! - for (uint32_t n = 0; n < length; n++) { - uint32_t command = XEGETUINT32BE(p + ptr + n * 4); - XELOGGPU(" Command(%.8X)", command); + for (uint32_t n = 0; n < length;) { + const uint8_t* packet_base = p + ptr + n * 4; + const uint32_t packet = XEGETUINT32BE(packet_base); + const uint32_t packet_type = packet >> 30; + switch (packet_type) { + case 0x00: + { + // Type-0 packet. + // Write count registers in sequence to the registers starting at + // (base_index << 2). + XELOGGPU("Packet(%.8X): set registers:", packet); + uint32_t count = ((packet >> 16) & 0x3FFF) + 1; + uint32_t base_index = (packet & 0xFFFF); + for (uint32_t m = 0; m < count; m++) { + uint32_t reg_data = XEGETUINT32BE(packet_base + 1 * 4 + m * 4); + XELOGGPU(" %.4X <- %.8X", base_index + m, reg_data); + // TODO(benvanik): process register writes. + } + n += 1 + count; + } + break; + case 0x01: + { + // Type-1 packet. + // Contains two registers of data. Type-0 should be more common. + XELOGGPU("Packet(%.8X): set registers:", packet); + uint32_t reg_index_1 = packet & 0x7FF; + uint32_t reg_index_2 = (packet >> 11) & 0x7FF; + uint32_t reg_data_1 = XEGETUINT32BE(packet_base + 1 * 4); + uint32_t reg_data_2 = XEGETUINT32BE(packet_base + 2 * 4); + XELOGGPU(" %.4X <- %.8X", reg_index_1, reg_data_1); + XELOGGPU(" %.4X <- %.8X", reg_index_2, reg_data_2); + // TODO(benvanik): process register writes. + n += 1 + 2; + } + break; + case 0x02: + // Type-2 packet. + // No-op. Do nothing. + break; + case 0x03: + { + // Type-3 packet. + uint32_t count = ((packet >> 16) & 0x3FFF) + 1; + uint32_t opcode = (packet >> 8) & 0x7F; + // & 1 == predicate, maybe? + + switch (opcode) { + case PM4_ME_INIT: + // initialize CP's micro-engine + XELOGGPU("Packet(%.8X): PM4_ME_INIT", packet); + LOG_DATA(count); + break; + + case PM4_NOP: + // skip N 32-bit words to get to the next packet + // No-op, ignore some data. + XELOGGPU("Packet(%.8X): PM4_NOP", packet); + LOG_DATA(count); + break; + + case PM4_INDIRECT_BUFFER: + // indirect buffer dispatch + { + uint32_t list_ptr = XEGETUINT32BE(packet_base + 1 * 4); + uint32_t list_length = XEGETUINT32BE(packet_base + 2 * 4); + XELOGGPU("Packet(%.8X): PM4_INDIRECT_BUFFER %.8X (%dw)", + packet, list_ptr, list_length); + ExecuteSegment(list_ptr, list_length); + } + break; + + case PM4_WAIT_REG_MEM: + // wait until a register or memory location is a specific value + XELOGGPU("Packet(%.8X): PM4_WAIT_REG_MEM", packet); + LOG_DATA(count); + break; + + case PM4_REG_RMW: + // register read/modify/write + // ? (used during shader upload and edram setup) + XELOGGPU("Packet(%.8X): PM4_REG_RMW", packet); + LOG_DATA(count); + break; + + case PM4_EVENT_WRITE_SHD: + // generate a VS|PS_done event + { + XELOGGPU("Packet(%.8X): PM4_EVENT_WRITE_SHD", packet); + LOG_DATA(count); + // 3? + uint32_t d0 = XEGETUINT32BE(packet_base + 1 * 4); + // ptr + uint32_t d1 = XEGETUINT32BE(packet_base + 2 * 4); + // value? + uint32_t d2 = XEGETUINT32BE(packet_base + 3 * 4); + XESETUINT32BE( + p + d1 + (primary_buffer_ptr_ & ~0x1FFFFFFF), d2); + } + break; + + case PM4_DRAW_INDX_2: + // draw using supplied indices in packet + XELOGGPU("Packet(%.8X): PM4_DRAW_INDX_2", packet); + LOG_DATA(count); + break; + + case PM4_IM_LOAD_IMMEDIATE: + // load sequencer instruction memory (code embedded in packet) + XELOGGPU("Packet(%.8X): PM4_IM_LOAD_IMMEDIATE", packet); + LOG_DATA(count); + break; + case PM4_INVALIDATE_STATE: + // selective invalidation of state pointers + XELOGGPU("Packet(%.8X): PM4_INVALIDATE_STATE", packet); + LOG_DATA(count); + break; + + default: + XELOGGPU("Packet(%.8X): unknown!", packet); + LOG_DATA(count); + break; + } + + n += 1 + count; + } + break; + } } -} \ No newline at end of file +} diff --git a/src/xenia/gpu/ring_buffer_worker.h b/src/xenia/gpu/ring_buffer_worker.h index d9c3e8c93..34b3d7a66 100644 --- a/src/xenia/gpu/ring_buffer_worker.h +++ b/src/xenia/gpu/ring_buffer_worker.h @@ -44,7 +44,6 @@ protected: uint32_t primary_buffer_ptr_; uint32_t primary_buffer_size_; - HANDLE read_ptr_index_event_; uint32_t read_ptr_index_; uint32_t read_ptr_update_freq_; uint32_t read_ptr_writeback_ptr_; diff --git a/src/xenia/gpu/sources.gypi b/src/xenia/gpu/sources.gypi index d1c157aeb..323e0b447 100644 --- a/src/xenia/gpu/sources.gypi +++ b/src/xenia/gpu/sources.gypi @@ -1,6 +1,7 @@ # Copyright 2013 Ben Vanik. All Rights Reserved. { 'sources': [ + 'command_buffer.h', 'gpu-private.h', 'gpu.cc', 'gpu.h', @@ -12,6 +13,7 @@ 'includes': [ 'nop/sources.gypi', + 'xenos/sources.gypi', ], 'conditions': [ diff --git a/src/xenia/gpu/xenos/packets.h b/src/xenia/gpu/xenos/packets.h new file mode 100644 index 000000000..39fa75043 --- /dev/null +++ b/src/xenia/gpu/xenos/packets.h @@ -0,0 +1,81 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2013 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_XENOS_PACKETS_H_ +#define XENIA_GPU_XENOS_PACKETS_H_ + +#include + + +namespace xe { +namespace gpu { +namespace xenos { + + +// Opcodes (IT_OPCODE) for Type-3 commands in the ringbuffer. +// https://github.com/freedreno/amd-gpu/blob/master/include/api/gsl_pm4types.h +// Not sure if all of these are used. +enum Type3Opcode { + PM4_ME_INIT = 0x48, // initialize CP's micro-engine + + PM4_NOP = 0x10, // skip N 32-bit words to get to the next packet + + PM4_INDIRECT_BUFFER = 0x3f, // indirect buffer dispatch. prefetch parser uses this packet type to determine whether to pre-fetch the IB + PM4_INDIRECT_BUFFER_PFD = 0x37, // indirect buffer dispatch. same as IB, but init is pipelined + + PM4_WAIT_FOR_IDLE = 0x26, // wait for the IDLE state of the engine + PM4_WAIT_REG_MEM = 0x3c, // wait until a register or memory location is a specific value + PM4_WAIT_REG_EQ = 0x52, // wait until a register location is equal to a specific value + PM4_WAT_REG_GTE = 0x53, // wait until a register location is >= a specific value + PM4_WAIT_UNTIL_READ = 0x5c, // wait until a read completes + PM4_WAIT_IB_PFD_COMPLETE = 0x5d, // wait until all base/size writes from an IB_PFD packet have completed + + PM4_REG_RMW = 0x21, // register read/modify/write + PM4_REG_TO_MEM = 0x3e, // reads register in chip and writes to memory + PM4_MEM_WRITE = 0x3d, // write N 32-bit words to memory + PM4_MEM_WRITE_CNTR = 0x4f, // write CP_PROG_COUNTER value to memory + PM4_COND_EXEC = 0x44, // conditional execution of a sequence of packets + PM4_COND_WRITE = 0x45, // conditional write to memory or register + + PM4_EVENT_WRITE = 0x46, // generate an event that creates a write to memory when completed + PM4_EVENT_WRITE_SHD = 0x58, // generate a VS|PS_done event + PM4_EVENT_WRITE_CFL = 0x59, // generate a cache flush done event + PM4_EVENT_WRITE_ZPD = 0x5b, // generate a z_pass done event + + PM4_DRAW_INDX = 0x22, // initiate fetch of index buffer and draw + PM4_DRAW_INDX_2 = 0x36, // draw using supplied indices in packet + PM4_DRAW_INDX_BIN = 0x34, // initiate fetch of index buffer and binIDs and draw + PM4_DRAW_INDX_2_BIN = 0x35, // initiate fetch of bin IDs and draw using supplied indices + + PM4_VIZ_QUERY = 0x23, // begin/end initiator for viz query extent processing + PM4_SET_STATE = 0x25, // fetch state sub-blocks and initiate shader code DMAs + PM4_SET_CONSTANT = 0x2d, // load constant into chip and to memory + PM4_IM_LOAD = 0x27, // load sequencer instruction memory (pointer-based) + PM4_IM_LOAD_IMMEDIATE = 0x2b, // load sequencer instruction memory (code embedded in packet) + PM4_LOAD_CONSTANT_CONTEXT = 0x2e, // load constants from a location in memory + PM4_INVALIDATE_STATE = 0x3b, // selective invalidation of state pointers + + PM4_SET_SHADER_BASES = 0x4A, // dynamically changes shader instruction memory partition + PM4_SET_BIN_BASE_OFFSET = 0x4B, // program an offset that will added to the BIN_BASE value of the 3D_DRAW_INDX_BIN packet + PM4_SET_BIN_MASK = 0x50, // sets the 64-bit BIN_MASK register in the PFP + PM4_SET_BIN_SELECT = 0x51, // sets the 64-bit BIN_SELECT register in the PFP + + PM4_CONTEXT_UPDATE = 0x5e, // updates the current context, if needed + PM4_INTERRUPT = 0x40, // generate interrupt from the command stream + + PM4_IM_STORE = 0x2c, // copy sequencer instruction memory to system memory +}; + + +} // namespace xenos +} // namespace gpu +} // namespace xe + + +#endif // XENIA_GPU_XENOS_PACKETS_H_ diff --git a/src/xenia/gpu/xenos/registers.h b/src/xenia/gpu/xenos/registers.h new file mode 100644 index 000000000..00e38fc0c --- /dev/null +++ b/src/xenia/gpu/xenos/registers.h @@ -0,0 +1,38 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2013 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_XENOS_REGISTERS_H_ +#define XENIA_GPU_XENOS_REGISTERS_H_ + +#include + + +namespace xe { +namespace gpu { +namespace xenos { + + +union RegisterValue { + uint32_t uint_value; + float float_value; +}; + + +struct RegisterFile { + // TODO(benvanik): figure out the actual number. + RegisterValue registers[0xFFFF]; +}; + + +} // namespace xenos +} // namespace gpu +} // namespace xe + + +#endif // XENIA_GPU_XENOS_REGISTERS_H_ diff --git a/src/xenia/gpu/xenos/sources.gypi b/src/xenia/gpu/xenos/sources.gypi new file mode 100644 index 000000000..c7f29b01e --- /dev/null +++ b/src/xenia/gpu/xenos/sources.gypi @@ -0,0 +1,7 @@ +# Copyright 2013 Ben Vanik. All Rights Reserved. +{ + 'sources': [ + 'packets.h', + 'registers.h', + ], +}