GL4 command processor.
This commit is contained in:
parent
42e6a44624
commit
f8325512d7
|
@ -45,6 +45,7 @@ class Control {
|
|||
virtual void ResizeToFill(int32_t pad_left, int32_t pad_top,
|
||||
int32_t pad_right, int32_t pad_bottom) = 0;
|
||||
void Layout();
|
||||
virtual void Invalidate() {}
|
||||
|
||||
// TODO(benvanik): colors/brushes/etc.
|
||||
// TODO(benvanik): fonts.
|
||||
|
|
|
@ -114,10 +114,19 @@ void Win32Control::OnResize(UIEvent& e) {
|
|||
for (auto& child_control : children_) {
|
||||
auto win32_control = static_cast<Win32Control*>(child_control.get());
|
||||
win32_control->OnResize(e);
|
||||
win32_control->Invalidate();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Win32Control::Invalidate() {
|
||||
InvalidateRect(hwnd_, nullptr, FALSE);
|
||||
for (auto& child_control : children_) {
|
||||
auto win32_control = static_cast<Win32Control*>(child_control.get());
|
||||
win32_control->Invalidate();
|
||||
}
|
||||
}
|
||||
|
||||
void Win32Control::set_cursor_visible(bool value) {
|
||||
if (is_cursor_visible_ == value) {
|
||||
return;
|
||||
|
|
|
@ -33,6 +33,7 @@ class Win32Control : public Control {
|
|||
int32_t bottom) override;
|
||||
void ResizeToFill(int32_t pad_left, int32_t pad_top, int32_t pad_right,
|
||||
int32_t pad_bottom) override;
|
||||
void Invalidate() override;
|
||||
|
||||
void set_cursor_visible(bool value) override;
|
||||
void set_enabled(bool value) override;
|
||||
|
|
|
@ -0,0 +1,940 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <xenia/gpu/gl4/command_processor.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include <poly/logging.h>
|
||||
#include <xenia/gpu/gl4/gl4_graphics_system.h>
|
||||
#include <xenia/gpu/gpu-private.h>
|
||||
#include <xenia/gpu/xenos.h>
|
||||
|
||||
#define XETRACECP(fmt, ...) \
|
||||
if (FLAGS_trace_ring_buffer) XELOGGPU(fmt, ##__VA_ARGS__)
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace gl4 {
|
||||
|
||||
using namespace xe::gpu::xenos;
|
||||
|
||||
CommandProcessor::CommandProcessor(GL4GraphicsSystem* graphics_system)
|
||||
: memory_(graphics_system->memory()),
|
||||
membase_(graphics_system->memory()->membase()),
|
||||
graphics_system_(graphics_system),
|
||||
register_file_(graphics_system_->register_file()),
|
||||
worker_running_(true),
|
||||
time_base_(0),
|
||||
counter_(0),
|
||||
primary_buffer_ptr_(0),
|
||||
primary_buffer_size_(0),
|
||||
read_ptr_index_(0),
|
||||
read_ptr_update_freq_(0),
|
||||
read_ptr_writeback_ptr_(0),
|
||||
write_ptr_index_event_(CreateEvent(NULL, FALSE, FALSE, NULL)),
|
||||
write_ptr_index_(0) {
|
||||
LARGE_INTEGER perf_counter;
|
||||
QueryPerformanceCounter(&perf_counter);
|
||||
time_base_ = perf_counter.QuadPart;
|
||||
}
|
||||
|
||||
CommandProcessor::~CommandProcessor() { CloseHandle(write_ptr_index_event_); }
|
||||
|
||||
uint64_t CommandProcessor::QueryTime() {
|
||||
LARGE_INTEGER perf_counter;
|
||||
QueryPerformanceCounter(&perf_counter);
|
||||
return perf_counter.QuadPart - time_base_;
|
||||
}
|
||||
|
||||
void CommandProcessor::Initialize(uint32_t ptr, uint32_t page_count) {
|
||||
primary_buffer_ptr_ = ptr;
|
||||
// Not sure this is correct, but it's a way to take the page_count back to
|
||||
// the number of bytes allocated by the physical alloc.
|
||||
uint32_t original_size = 1 << (0x1C - page_count - 1);
|
||||
primary_buffer_size_ = original_size;
|
||||
read_ptr_index_ = 0;
|
||||
|
||||
worker_running_ = true;
|
||||
worker_thread_ = std::thread([this]() {
|
||||
poly::threading::set_name("GL4 Worker");
|
||||
xe::Profiler::ThreadEnter("GL4 Worker");
|
||||
WorkerMain();
|
||||
xe::Profiler::ThreadExit();
|
||||
});
|
||||
}
|
||||
|
||||
void CommandProcessor::Shutdown() {
|
||||
worker_running_ = false;
|
||||
SetEvent(write_ptr_index_event_);
|
||||
worker_thread_.join();
|
||||
}
|
||||
|
||||
void CommandProcessor::WorkerMain() {
|
||||
while (worker_running_) {
|
||||
uint32_t write_ptr_index = write_ptr_index_.load();
|
||||
while (write_ptr_index == 0xBAADF00D ||
|
||||
read_ptr_index_ == write_ptr_index) {
|
||||
// Check if the pointer has moved.
|
||||
// We wait a short bit here to yield time. Since we are also running the
|
||||
// main window display we don't want to pause too long, though.
|
||||
// YieldProcessor();
|
||||
const int wait_time_ms = 5;
|
||||
if (WaitForSingleObject(write_ptr_index_event_, wait_time_ms) ==
|
||||
WAIT_TIMEOUT) {
|
||||
write_ptr_index = write_ptr_index_.load();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
assert_true(read_ptr_index_ != write_ptr_index);
|
||||
|
||||
// Process the new commands.
|
||||
XETRACECP("Command processor thread work");
|
||||
|
||||
// Execute. Note that we handle wraparound transparently.
|
||||
ExecutePrimaryBuffer(read_ptr_index_, write_ptr_index);
|
||||
read_ptr_index_ = write_ptr_index;
|
||||
|
||||
// TODO(benvanik): use reader->Read_update_freq_ and only issue after moving
|
||||
// that many indices.
|
||||
if (read_ptr_writeback_ptr_) {
|
||||
poly::store_and_swap<uint32_t>(membase_ + read_ptr_writeback_ptr_,
|
||||
read_ptr_index_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CommandProcessor::EnableReadPointerWriteBack(uint32_t ptr,
|
||||
uint32_t block_size) {
|
||||
// CP_RB_RPTR_ADDR Ring Buffer Read Pointer Address 0x70C
|
||||
// ptr = RB_RPTR_ADDR, pointer to write back the address to.
|
||||
read_ptr_writeback_ptr_ = (primary_buffer_ptr_ & ~0x1FFFFFFF) + ptr;
|
||||
// CP_RB_CNTL Ring Buffer Control 0x704
|
||||
// block_size = RB_BLKSZ, number of quadwords read between updates of the
|
||||
// read pointer.
|
||||
read_ptr_update_freq_ = (uint32_t)pow(2.0, (double)block_size) / 4;
|
||||
}
|
||||
|
||||
void CommandProcessor::UpdateWritePointer(uint32_t value) {
|
||||
write_ptr_index_ = value;
|
||||
SetEvent(write_ptr_index_event_);
|
||||
}
|
||||
|
||||
void CommandProcessor::WriteRegister(uint32_t packet_ptr, uint32_t index,
|
||||
uint32_t value) {
|
||||
RegisterFile* regs = register_file_;
|
||||
assert_true(index < RegisterFile::kRegisterCount);
|
||||
regs->values[index].u32 = value;
|
||||
|
||||
// If this is a COHER register, set the dirty flag.
|
||||
// This will block the command processor the next time it WAIT_MEM_REGs and
|
||||
// allow us to synchronize the memory.
|
||||
if (index == XE_GPU_REG_COHER_STATUS_HOST) {
|
||||
regs->values[index].u32 |= 0x80000000ul;
|
||||
}
|
||||
|
||||
// Scratch register writeback.
|
||||
if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) {
|
||||
uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0;
|
||||
if ((1 << scratch_reg) & regs->values[XE_GPU_REG_SCRATCH_UMSK].u32) {
|
||||
// Enabled - write to address.
|
||||
uint32_t scratch_addr = regs->values[XE_GPU_REG_SCRATCH_ADDR].u32;
|
||||
uint32_t mem_addr = scratch_addr + (scratch_reg * 4);
|
||||
poly::store_and_swap<uint32_t>(
|
||||
membase_ + xenos::GpuToCpu(primary_buffer_ptr_, mem_addr), value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CommandProcessor::MakeCoherent() {
|
||||
// Status host often has 0x01000000 or 0x03000000.
|
||||
// This is likely toggling VC (vertex cache) or TC (texture cache).
|
||||
// Or, it also has a direction in here maybe - there is probably
|
||||
// some way to check for dest coherency (what all the COHER_DEST_BASE_*
|
||||
// registers are for).
|
||||
// Best docs I've found on this are here:
|
||||
// http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/10/R6xx_R7xx_3D.pdf
|
||||
// http://cgit.freedesktop.org/xorg/driver/xf86-video-radeonhd/tree/src/r6xx_accel.c?id=3f8b6eccd9dba116cc4801e7f80ce21a879c67d2#n454
|
||||
|
||||
RegisterFile* regs = register_file_;
|
||||
auto status_host = regs->values[XE_GPU_REG_COHER_STATUS_HOST].u32;
|
||||
auto base_host = regs->values[XE_GPU_REG_COHER_BASE_HOST].u32;
|
||||
auto size_host = regs->values[XE_GPU_REG_COHER_SIZE_HOST].u32;
|
||||
|
||||
if (!(status_host & 0x80000000ul)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO(benvanik): notify resource cache of base->size and type.
|
||||
XETRACECP("Make %.8X -> %.8X (%db) coherent", base_host,
|
||||
base_host + size_host, size_host);
|
||||
|
||||
// Mark coherent.
|
||||
status_host &= ~0x80000000ul;
|
||||
regs->values[XE_GPU_REG_COHER_STATUS_HOST].u32 = status_host;
|
||||
}
|
||||
|
||||
class CommandProcessor::RingbufferReader {
|
||||
public:
|
||||
RingbufferReader(uint8_t* membase, uint32_t base_ptr, uint32_t ptr_mask,
|
||||
uint32_t start_ptr, uint32_t end_ptr)
|
||||
: membase_(membase),
|
||||
base_ptr_(base_ptr),
|
||||
ptr_mask_(ptr_mask),
|
||||
start_ptr_(start_ptr),
|
||||
end_ptr_(end_ptr),
|
||||
ptr_(start_ptr) {}
|
||||
|
||||
uint32_t ptr() const { return ptr_; }
|
||||
uint32_t offset() const { return (ptr_ - start_ptr_) / sizeof(uint32_t); }
|
||||
bool can_read() const { return ptr_ != end_ptr_; }
|
||||
|
||||
uint32_t Peek() { return poly::load_and_swap<uint32_t>(membase_ + ptr_); }
|
||||
|
||||
void CheckRead(uint32_t words) {
|
||||
assert_true(ptr_ + words * sizeof(uint32_t) <= end_ptr_);
|
||||
}
|
||||
|
||||
uint32_t Read() {
|
||||
uint32_t value = poly::load_and_swap<uint32_t>(membase_ + ptr_);
|
||||
Advance(1);
|
||||
return value;
|
||||
}
|
||||
|
||||
void Advance(uint32_t words) {
|
||||
ptr_ = ptr_ + words * sizeof(uint32_t);
|
||||
if (ptr_mask_) {
|
||||
ptr_ = base_ptr_ +
|
||||
(((ptr_ - base_ptr_) / sizeof(uint32_t)) & ptr_mask_) *
|
||||
sizeof(uint32_t);
|
||||
}
|
||||
assert_true(ptr_ <= end_ptr_);
|
||||
}
|
||||
|
||||
void Skip(uint32_t words) { Advance(words); }
|
||||
|
||||
void TraceData(uint32_t words) {
|
||||
for (uint32_t i = 0; i < words; ++i) {
|
||||
uint32_t i_ptr = ptr_ + i * sizeof(uint32_t);
|
||||
XETRACECP("[%.8X] %.8X", i_ptr,
|
||||
poly::load_and_swap<uint32_t>(membase_ + i_ptr));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
uint8_t* membase_;
|
||||
|
||||
uint32_t base_ptr_;
|
||||
uint32_t ptr_mask_;
|
||||
uint32_t start_ptr_;
|
||||
uint32_t end_ptr_;
|
||||
uint32_t ptr_;
|
||||
};
|
||||
|
||||
void CommandProcessor::ExecutePrimaryBuffer(uint32_t start_index,
|
||||
uint32_t end_index) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
// Adjust pointer base.
|
||||
uint32_t start_ptr = primary_buffer_ptr_ + start_index * sizeof(uint32_t);
|
||||
start_ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (start_ptr & 0x1FFFFFFF);
|
||||
uint32_t end_ptr = primary_buffer_ptr_ + end_index * sizeof(uint32_t);
|
||||
end_ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (end_ptr & 0x1FFFFFFF);
|
||||
|
||||
XETRACECP("[%.8X] ExecutePrimaryBuffer(%dw -> %dw)", start_ptr, start_index,
|
||||
end_index);
|
||||
|
||||
// Execute commands!
|
||||
uint32_t ptr_mask = (primary_buffer_size_ / sizeof(uint32_t)) - 1;
|
||||
RingbufferReader reader(membase_, primary_buffer_ptr_, ptr_mask, start_ptr,
|
||||
end_ptr);
|
||||
while (reader.can_read()) {
|
||||
ExecutePacket(&reader);
|
||||
}
|
||||
if (end_index > start_index) {
|
||||
assert_true(reader.offset() == (end_index - start_index));
|
||||
}
|
||||
|
||||
XETRACECP(" ExecutePrimaryBuffer End");
|
||||
}
|
||||
|
||||
void CommandProcessor::ExecuteIndirectBuffer(uint32_t ptr, uint32_t length) {
|
||||
XETRACECP("[%.8X] ExecuteIndirectBuffer(%dw)", ptr, length);
|
||||
|
||||
// Execute commands!
|
||||
uint32_t ptr_mask = 0;
|
||||
RingbufferReader reader(membase_, primary_buffer_ptr_, ptr_mask, ptr,
|
||||
ptr + length * sizeof(uint32_t));
|
||||
while (reader.can_read()) {
|
||||
ExecutePacket(&reader);
|
||||
}
|
||||
|
||||
XETRACECP(" ExecuteIndirectBuffer End");
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacket(RingbufferReader* reader) {
|
||||
RegisterFile* regs = register_file_;
|
||||
|
||||
uint32_t packet_ptr = reader->ptr();
|
||||
const uint32_t packet = reader->Read();
|
||||
const uint32_t packet_type = packet >> 30;
|
||||
if (packet == 0) {
|
||||
XETRACECP("[%.8X] Packet(%.8X): 0?", packet_ptr, packet);
|
||||
return true;
|
||||
}
|
||||
|
||||
switch (packet_type) {
|
||||
case 0x00:
|
||||
return ExecutePacketType0(reader, packet_ptr, packet);
|
||||
case 0x01:
|
||||
return ExecutePacketType1(reader, packet_ptr, packet);
|
||||
case 0x02:
|
||||
return ExecutePacketType2(reader, packet_ptr, packet);
|
||||
case 0x03:
|
||||
return ExecutePacketType3(reader, packet_ptr, packet);
|
||||
}
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType0(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
uint32_t packet) {
|
||||
// Type-0 packet.
|
||||
// Write count registers in sequence to the registers starting at
|
||||
// (base_index << 2).
|
||||
XETRACECP("[%.8X] Packet(%.8X): set registers:", packet_ptr, packet);
|
||||
uint32_t count = ((packet >> 16) & 0x3FFF) + 1;
|
||||
uint32_t base_index = (packet & 0x7FFF);
|
||||
uint32_t write_one_reg = (packet >> 15) & 0x1;
|
||||
for (uint32_t m = 0; m < count; m++) {
|
||||
uint32_t reg_data = reader->Peek();
|
||||
uint32_t target_index = write_one_reg ? base_index : base_index + m;
|
||||
const char* reg_name = register_file_->GetRegisterName(target_index);
|
||||
XETRACECP("[%.8X] %.8X -> %.4X %s", reader->ptr(), reg_data, target_index,
|
||||
reg_name ? reg_name : "");
|
||||
reader->Advance(1);
|
||||
WriteRegister(packet_ptr, target_index, reg_data);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType1(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
uint32_t packet) {
|
||||
// Type-1 packet.
|
||||
// Contains two registers of data. Type-0 should be more common.
|
||||
XETRACECP("[%.8X] Packet(%.8X): set registers:", packet_ptr, packet);
|
||||
uint32_t reg_index_1 = packet & 0x7FF;
|
||||
uint32_t reg_index_2 = (packet >> 11) & 0x7FF;
|
||||
uint32_t reg_ptr_1 = reader->ptr();
|
||||
uint32_t reg_data_1 = reader->Read();
|
||||
uint32_t reg_ptr_2 = reader->ptr();
|
||||
uint32_t reg_data_2 = reader->Read();
|
||||
const char* reg_name_1 = register_file_->GetRegisterName(reg_index_1);
|
||||
const char* reg_name_2 = register_file_->GetRegisterName(reg_index_2);
|
||||
XETRACECP("[%.8X] %.8X -> %.4X %s", reg_ptr_1, reg_data_1, reg_index_1,
|
||||
reg_name_1 ? reg_name_1 : "");
|
||||
XETRACECP("[%.8X] %.8X -> %.4X %s", reg_ptr_2, reg_data_2, reg_index_2,
|
||||
reg_name_2 ? reg_name_2 : "");
|
||||
WriteRegister(packet_ptr, reg_index_1, reg_data_1);
|
||||
WriteRegister(packet_ptr, reg_index_2, reg_data_2);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType2(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
uint32_t packet) {
|
||||
// Type-2 packet.
|
||||
// No-op. Do nothing.
|
||||
XETRACECP("[%.8X] Packet(%.8X): padding", packet_ptr, packet);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
uint32_t packet) {
|
||||
// Type-3 packet.
|
||||
// & 1 == predicate, maybe?
|
||||
uint32_t opcode = (packet >> 8) & 0x7F;
|
||||
uint32_t count = ((packet >> 16) & 0x3FFF) + 1;
|
||||
auto data_start_offset = reader->offset();
|
||||
|
||||
bool result = false;
|
||||
switch (opcode) {
|
||||
case PM4_ME_INIT:
|
||||
result = ExecutePacketType3_ME_INIT(reader, packet_ptr, packet, count);
|
||||
break;
|
||||
case PM4_NOP:
|
||||
result = ExecutePacketType3_NOP(reader, packet_ptr, packet, count);
|
||||
break;
|
||||
case PM4_INTERRUPT:
|
||||
result = ExecutePacketType3_INTERRUPT(reader, packet_ptr, packet, count);
|
||||
break;
|
||||
case PM4_XE_SWAP:
|
||||
result = ExecutePacketType3_XE_SWAP(reader, packet_ptr, packet, count);
|
||||
break;
|
||||
case PM4_INDIRECT_BUFFER:
|
||||
result =
|
||||
ExecutePacketType3_INDIRECT_BUFFER(reader, packet_ptr, packet, count);
|
||||
break;
|
||||
case PM4_WAIT_REG_MEM:
|
||||
result =
|
||||
ExecutePacketType3_WAIT_REG_MEM(reader, packet_ptr, packet, count);
|
||||
break;
|
||||
case PM4_REG_RMW:
|
||||
result = ExecutePacketType3_REG_RMW(reader, packet_ptr, packet, count);
|
||||
break;
|
||||
case PM4_COND_WRITE:
|
||||
result = ExecutePacketType3_COND_WRITE(reader, packet_ptr, packet, count);
|
||||
break;
|
||||
case PM4_EVENT_WRITE:
|
||||
result =
|
||||
ExecutePacketType3_EVENT_WRITE(reader, packet_ptr, packet, count);
|
||||
break;
|
||||
case PM4_EVENT_WRITE_SHD:
|
||||
result =
|
||||
ExecutePacketType3_EVENT_WRITE_SHD(reader, packet_ptr, packet, count);
|
||||
break;
|
||||
case PM4_DRAW_INDX:
|
||||
result = ExecutePacketType3_DRAW_INDX(reader, packet_ptr, packet, count);
|
||||
break;
|
||||
case PM4_DRAW_INDX_2:
|
||||
result =
|
||||
ExecutePacketType3_DRAW_INDX_2(reader, packet_ptr, packet, count);
|
||||
break;
|
||||
case PM4_SET_CONSTANT:
|
||||
result =
|
||||
ExecutePacketType3_SET_CONSTANT(reader, packet_ptr, packet, count);
|
||||
break;
|
||||
case PM4_LOAD_ALU_CONSTANT:
|
||||
result = ExecutePacketType3_LOAD_ALU_CONSTANT(reader, packet_ptr, packet,
|
||||
count);
|
||||
break;
|
||||
case PM4_IM_LOAD:
|
||||
result = ExecutePacketType3_IM_LOAD(reader, packet_ptr, packet, count);
|
||||
break;
|
||||
case PM4_IM_LOAD_IMMEDIATE:
|
||||
result = ExecutePacketType3_IM_LOAD_IMMEDIATE(reader, packet_ptr, packet,
|
||||
count);
|
||||
break;
|
||||
case PM4_INVALIDATE_STATE:
|
||||
result = ExecutePacketType3_INVALIDATE_STATE(reader, packet_ptr, packet,
|
||||
count);
|
||||
break;
|
||||
|
||||
case PM4_SET_BIN_MASK_LO: {
|
||||
uint32_t value = reader->Read();
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_LO = %.8X", packet_ptr,
|
||||
packet, value);
|
||||
result = true;
|
||||
} break;
|
||||
case PM4_SET_BIN_MASK_HI: {
|
||||
uint32_t value = reader->Read();
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_HI = %.8X", packet_ptr,
|
||||
packet, value);
|
||||
result = true;
|
||||
} break;
|
||||
case PM4_SET_BIN_SELECT_LO: {
|
||||
uint32_t value = reader->Read();
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_LO = %.8X", packet_ptr,
|
||||
packet, value);
|
||||
result = true;
|
||||
} break;
|
||||
case PM4_SET_BIN_SELECT_HI: {
|
||||
uint32_t value = reader->Read();
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_HI = %.8X", packet_ptr,
|
||||
packet, value);
|
||||
result = true;
|
||||
} break;
|
||||
|
||||
// Ignored packets - useful if breaking on the default handler below.
|
||||
case 0x50: // 0xC0015000 usually 2 words, 0xFFFFFFFF / 0x00000000
|
||||
XETRACECP("[%.8X] Packet(%.8X): unknown!", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
reader->Skip(count);
|
||||
break;
|
||||
|
||||
default:
|
||||
XETRACECP("[%.8X] Packet(%.8X): unknown!", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
reader->Skip(count);
|
||||
break;
|
||||
}
|
||||
|
||||
assert_true(reader->offset() == data_start_offset + count);
|
||||
return result;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_ME_INIT(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
uint32_t packet,
|
||||
uint32_t count) {
|
||||
// initialize CP's micro-engine
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_ME_INIT", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
reader->Advance(count);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_NOP(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
uint32_t packet, uint32_t count) {
|
||||
// skip N 32-bit words to get to the next packet
|
||||
// No-op, ignore some data.
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_NOP", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
reader->Advance(count);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_INTERRUPT(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
uint32_t packet,
|
||||
uint32_t count) {
|
||||
// generate interrupt from the command stream
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_INTERRUPT", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
uint32_t cpu_mask = reader->Read();
|
||||
for (int n = 0; n < 6; n++) {
|
||||
if (cpu_mask & (1 << n)) {
|
||||
graphics_system_->DispatchInterruptCallback(1, n);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_XE_SWAP(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
uint32_t packet,
|
||||
uint32_t count) {
|
||||
// Xenia-specific VdSwap hook.
|
||||
// VdSwap will post this to tell us we need to swap the screen/fire an
|
||||
// interrupt.
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_XE_SWAP", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
reader->Advance(count);
|
||||
if (swap_handler_) {
|
||||
swap_handler_();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_INDIRECT_BUFFER(
|
||||
RingbufferReader* reader, uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t count) {
|
||||
// indirect buffer dispatch
|
||||
uint32_t list_ptr = reader->Read();
|
||||
uint32_t list_length = reader->Read();
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_INDIRECT_BUFFER %.8X (%dw)", packet_ptr,
|
||||
packet, list_ptr, list_length);
|
||||
ExecuteIndirectBuffer(GpuToCpu(list_ptr), list_length);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_WAIT_REG_MEM(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
uint32_t packet,
|
||||
uint32_t count) {
|
||||
// wait until a register or memory location is a specific value
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_WAIT_REG_MEM", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
uint32_t wait_info = reader->Read();
|
||||
uint32_t poll_reg_addr = reader->Read();
|
||||
uint32_t ref = reader->Read();
|
||||
uint32_t mask = reader->Read();
|
||||
uint32_t wait = reader->Read();
|
||||
bool matched = false;
|
||||
do {
|
||||
uint32_t value;
|
||||
if (wait_info & 0x10) {
|
||||
// Memory.
|
||||
auto endianness = static_cast<Endian>(poll_reg_addr & 0x3);
|
||||
poll_reg_addr &= ~0x3;
|
||||
value =
|
||||
poly::load<uint32_t>(membase_ + GpuToCpu(packet_ptr, poll_reg_addr));
|
||||
value = GpuSwap(value, endianness);
|
||||
} else {
|
||||
// Register.
|
||||
assert_true(poll_reg_addr < RegisterFile::kRegisterCount);
|
||||
value = register_file_->values[poll_reg_addr].u32;
|
||||
if (poll_reg_addr == XE_GPU_REG_COHER_STATUS_HOST) {
|
||||
MakeCoherent();
|
||||
value = register_file_->values[poll_reg_addr].u32;
|
||||
}
|
||||
}
|
||||
switch (wait_info & 0x7) {
|
||||
case 0x0: // Never.
|
||||
matched = false;
|
||||
break;
|
||||
case 0x1: // Less than reference.
|
||||
matched = (value & mask) < ref;
|
||||
break;
|
||||
case 0x2: // Less than or equal to reference.
|
||||
matched = (value & mask) <= ref;
|
||||
break;
|
||||
case 0x3: // Equal to reference.
|
||||
matched = (value & mask) == ref;
|
||||
break;
|
||||
case 0x4: // Not equal to reference.
|
||||
matched = (value & mask) != ref;
|
||||
break;
|
||||
case 0x5: // Greater than or equal to reference.
|
||||
matched = (value & mask) >= ref;
|
||||
break;
|
||||
case 0x6: // Greater than reference.
|
||||
matched = (value & mask) > ref;
|
||||
break;
|
||||
case 0x7: // Always
|
||||
matched = true;
|
||||
break;
|
||||
}
|
||||
if (!matched) {
|
||||
// Wait.
|
||||
if (wait >= 0x100) {
|
||||
Sleep(wait / 0x100);
|
||||
} else {
|
||||
SwitchToThread();
|
||||
}
|
||||
}
|
||||
} while (!matched);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_REG_RMW(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
uint32_t packet,
|
||||
uint32_t count) {
|
||||
// register read/modify/write
|
||||
// ? (used during shader upload and edram setup)
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_REG_RMW", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
uint32_t rmw_info = reader->Read();
|
||||
uint32_t and_mask = reader->Read();
|
||||
uint32_t or_mask = reader->Read();
|
||||
uint32_t value = register_file_->values[rmw_info & 0x1FFF].u32;
|
||||
if ((rmw_info >> 30) & 0x1) {
|
||||
// | reg
|
||||
value |= register_file_->values[or_mask & 0x1FFF].u32;
|
||||
} else {
|
||||
// | imm
|
||||
value |= or_mask;
|
||||
}
|
||||
if ((rmw_info >> 31) & 0x1) {
|
||||
// & reg
|
||||
value &= register_file_->values[and_mask & 0x1FFF].u32;
|
||||
} else {
|
||||
// & imm
|
||||
value &= and_mask;
|
||||
}
|
||||
WriteRegister(packet_ptr, rmw_info & 0x1FFF, value);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_COND_WRITE(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
uint32_t packet,
|
||||
uint32_t count) {
|
||||
// conditional write to memory or register
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_COND_WRITE", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
uint32_t wait_info = reader->Read();
|
||||
uint32_t poll_reg_addr = reader->Read();
|
||||
uint32_t ref = reader->Read();
|
||||
uint32_t mask = reader->Read();
|
||||
uint32_t write_reg_addr = reader->Read();
|
||||
uint32_t write_data = reader->Read();
|
||||
uint32_t value;
|
||||
if (wait_info & 0x10) {
|
||||
// Memory.
|
||||
auto endianness = static_cast<Endian>(poll_reg_addr & 0x3);
|
||||
poll_reg_addr &= ~0x3;
|
||||
value =
|
||||
poly::load<uint32_t>(membase_ + GpuToCpu(packet_ptr, poll_reg_addr));
|
||||
value = GpuSwap(value, endianness);
|
||||
} else {
|
||||
// Register.
|
||||
assert_true(poll_reg_addr < RegisterFile::kRegisterCount);
|
||||
value = register_file_->values[poll_reg_addr].u32;
|
||||
}
|
||||
bool matched = false;
|
||||
switch (wait_info & 0x7) {
|
||||
case 0x0: // Never.
|
||||
matched = false;
|
||||
break;
|
||||
case 0x1: // Less than reference.
|
||||
matched = (value & mask) < ref;
|
||||
break;
|
||||
case 0x2: // Less than or equal to reference.
|
||||
matched = (value & mask) <= ref;
|
||||
break;
|
||||
case 0x3: // Equal to reference.
|
||||
matched = (value & mask) == ref;
|
||||
break;
|
||||
case 0x4: // Not equal to reference.
|
||||
matched = (value & mask) != ref;
|
||||
break;
|
||||
case 0x5: // Greater than or equal to reference.
|
||||
matched = (value & mask) >= ref;
|
||||
break;
|
||||
case 0x6: // Greater than reference.
|
||||
matched = (value & mask) > ref;
|
||||
break;
|
||||
case 0x7: // Always
|
||||
matched = true;
|
||||
break;
|
||||
}
|
||||
if (matched) {
|
||||
// Write.
|
||||
if (wait_info & 0x100) {
|
||||
// Memory.
|
||||
auto endianness = static_cast<Endian>(write_reg_addr & 0x3);
|
||||
write_reg_addr &= ~0x3;
|
||||
write_data = GpuSwap(write_data, endianness);
|
||||
poly::store(membase_ + GpuToCpu(packet_ptr, write_reg_addr), write_data);
|
||||
} else {
|
||||
// Register.
|
||||
WriteRegister(packet_ptr, write_reg_addr, write_data);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_EVENT_WRITE(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
uint32_t packet,
|
||||
uint32_t count) {
|
||||
// generate an event that creates a write to memory when completed
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_EVENT_WRITE (unimplemented!)", packet_ptr,
|
||||
packet);
|
||||
reader->TraceData(count);
|
||||
uint32_t initiator = reader->Read();
|
||||
if (count == 1) {
|
||||
// Just an event flag? Where does this write?
|
||||
} else {
|
||||
// Write to an address.
|
||||
assert_always();
|
||||
reader->Advance(count - 1);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_SHD(
|
||||
RingbufferReader* reader, uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t count) {
|
||||
// generate a VS|PS_done event
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_EVENT_WRITE_SHD", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
uint32_t initiator = reader->Read();
|
||||
uint32_t address = reader->Read();
|
||||
uint32_t value = reader->Read();
|
||||
// Writeback initiator.
|
||||
WriteRegister(packet_ptr, XE_GPU_REG_VGT_EVENT_INITIATOR, initiator & 0x3F);
|
||||
uint32_t data_value;
|
||||
if ((initiator >> 31) & 0x1) {
|
||||
// Write counter (GPU vblank counter?).
|
||||
data_value = counter_;
|
||||
} else {
|
||||
// Write value.
|
||||
data_value = value;
|
||||
}
|
||||
auto endianness = static_cast<Endian>(address & 0x3);
|
||||
address &= ~0x3;
|
||||
data_value = GpuSwap(data_value, endianness);
|
||||
poly::store(membase_ + GpuToCpu(address), data_value);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_DRAW_INDX(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
uint32_t packet,
|
||||
uint32_t count) {
|
||||
// initiate fetch of index buffer and draw
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_DRAW_INDX", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
// dword0 = viz query info
|
||||
uint32_t dword0 = reader->Read();
|
||||
uint32_t dword1 = reader->Read();
|
||||
uint32_t index_count = dword1 >> 16;
|
||||
auto prim_type = static_cast<PrimitiveType>(dword1 & 0x3F);
|
||||
uint32_t src_sel = (dword1 >> 6) & 0x3;
|
||||
if (src_sel == 0x0) {
|
||||
// Indexed draw.
|
||||
uint32_t index_base = reader->Read();
|
||||
uint32_t index_size = reader->Read();
|
||||
auto endianness = static_cast<Endian>(index_size >> 30);
|
||||
index_size &= 0x00FFFFFF;
|
||||
bool index_32bit = (dword1 >> 11) & 0x1;
|
||||
index_size *= index_32bit ? 4 : 2;
|
||||
} else if (src_sel == 0x2) {
|
||||
// Auto draw.
|
||||
} else {
|
||||
// Unknown source select.
|
||||
assert_always();
|
||||
}
|
||||
// if (!driver_->PrepareDraw(draw_command_)) {
|
||||
// draw_command_.prim_type = prim_type;
|
||||
// draw_command_.start_index = 0;
|
||||
// draw_command_.index_count = index_count;
|
||||
// draw_command_.base_vertex = 0;
|
||||
// if (src_sel == 0x0) {
|
||||
// // Indexed draw.
|
||||
// // TODO(benvanik): detect subregions of larger index
|
||||
// buffers!
|
||||
// driver_->PrepareDrawIndexBuffer(
|
||||
// draw_command_, index_base, index_size,
|
||||
// endianness,
|
||||
// index_32bit ? INDEX_FORMAT_32BIT : INDEX_FORMAT_16BIT);
|
||||
// } else if (src_sel == 0x2) {
|
||||
// // Auto draw.
|
||||
// draw_command_.index_buffer = nullptr;
|
||||
// } else {
|
||||
// // Unknown source select.
|
||||
// assert_always();
|
||||
// }
|
||||
// driver_->Draw(draw_command_);
|
||||
// } else {
|
||||
// if (src_sel == 0x0) {
|
||||
// reader->Advance(2); // skip
|
||||
// }
|
||||
// }
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_DRAW_INDX_2(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
uint32_t packet,
|
||||
uint32_t count) {
|
||||
// draw using supplied indices in packet
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_DRAW_INDX_2", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
uint32_t dword0 = reader->Read();
|
||||
uint32_t index_count = dword0 >> 16;
|
||||
auto prim_type = static_cast<PrimitiveType>(dword0 & 0x3F);
|
||||
uint32_t src_sel = (dword0 >> 6) & 0x3;
|
||||
assert_true(src_sel == 0x2); // 'SrcSel=AutoIndex'
|
||||
bool index_32bit = (dword0 >> 11) & 0x1;
|
||||
uint32_t indices_size = index_count * (index_32bit ? 4 : 2);
|
||||
reader->CheckRead(indices_size / sizeof(uint32_t));
|
||||
/*if (!driver_->PrepareDraw(draw_command_)) {
|
||||
draw_command_.prim_type = prim_type;
|
||||
draw_command_.start_index = 0;
|
||||
draw_command_.index_count = index_count;
|
||||
draw_command_.base_vertex = 0;
|
||||
draw_command_.index_buffer = nullptr;
|
||||
driver_->Draw(draw_command_);
|
||||
}*/
|
||||
reader->Advance(count - 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_SET_CONSTANT(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
uint32_t packet,
|
||||
uint32_t count) {
|
||||
// load constant into chip and to memory
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_SET_CONSTANT", packet_ptr, packet);
|
||||
// PM4_REG(reg) ((0x4 << 16) | (GSL_HAL_SUBBLOCK_OFFSET(reg)))
|
||||
// reg - 0x2000
|
||||
uint32_t offset_type = reader->Read();
|
||||
uint32_t index = offset_type & 0x7FF;
|
||||
uint32_t type = (offset_type >> 16) & 0xFF;
|
||||
switch (type) {
|
||||
case 0x4: // REGISTER
|
||||
index += 0x2000; // registers
|
||||
for (uint32_t n = 0; n < count - 1; n++, index++) {
|
||||
uint32_t data = reader->Read();
|
||||
const char* reg_name = register_file_->GetRegisterName(index);
|
||||
XETRACECP("[%.8X] %.8X -> %.4X %s", packet_ptr + (1 + n) * 4, data,
|
||||
index, reg_name ? reg_name : "");
|
||||
WriteRegister(packet_ptr, index, data);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert_always();
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_LOAD_ALU_CONSTANT(
|
||||
RingbufferReader* reader, uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t count) {
|
||||
// load constants from memory
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_LOAD_ALU_CONSTANT", packet_ptr, packet);
|
||||
uint32_t address = reader->Read();
|
||||
address &= 0x3FFFFFFF;
|
||||
uint32_t offset_type = reader->Read();
|
||||
uint32_t index = offset_type & 0x7FF;
|
||||
uint32_t size = reader->Read();
|
||||
size &= 0xFFF;
|
||||
index += 0x4000; // alu constants
|
||||
for (uint32_t n = 0; n < size; n++, index++) {
|
||||
uint32_t data = poly::load_and_swap<uint32_t>(
|
||||
membase_ + GpuToCpu(packet_ptr, address + n * 4));
|
||||
const char* reg_name = register_file_->GetRegisterName(index);
|
||||
XETRACECP("[%.8X] %.8X -> %.4X %s", packet_ptr, data, index,
|
||||
reg_name ? reg_name : "");
|
||||
WriteRegister(packet_ptr, index, data);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_IM_LOAD(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
uint32_t packet,
|
||||
uint32_t count) {
|
||||
// load sequencer instruction memory (pointer-based)
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_IM_LOAD", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
uint32_t addr_type = reader->Read();
|
||||
auto shader_type = static_cast<ShaderType>(addr_type & 0x3);
|
||||
uint32_t addr = addr_type & ~0x3;
|
||||
uint32_t start_size = reader->Read();
|
||||
uint32_t start = start_size >> 16;
|
||||
uint32_t size = start_size & 0xFFFF; // dwords
|
||||
assert_true(start == 0);
|
||||
/*driver_->LoadShader(shader_type,
|
||||
GpuToCpu(packet_ptr, addr), size * 4, start);*/
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_IM_LOAD_IMMEDIATE(
|
||||
RingbufferReader* reader, uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t count) {
|
||||
// load sequencer instruction memory (code embedded in packet)
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_IM_LOAD_IMMEDIATE", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
uint32_t dword0 = reader->Read();
|
||||
uint32_t dword1 = reader->Read();
|
||||
auto shader_type = static_cast<ShaderType>(dword0);
|
||||
uint32_t start_size = dword1;
|
||||
uint32_t start = start_size >> 16;
|
||||
uint32_t size = start_size & 0xFFFF; // dwords
|
||||
assert_true(start == 0);
|
||||
// TODO(benvanik): figure out if this could wrap.
|
||||
reader->CheckRead(size);
|
||||
/*driver_->LoadShader(shader_type, reader->ptr(), size * 4,
|
||||
start);*/
|
||||
reader->Advance(size);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_INVALIDATE_STATE(
|
||||
RingbufferReader* reader, uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t count) {
|
||||
// selective invalidation of state pointers
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_INVALIDATE_STATE", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
uint32_t mask = reader->Read();
|
||||
// driver_->InvalidateState(mask);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace gl4
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,138 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2014 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_GL4_COMMAND_PROCESSOR_H_
|
||||
#define XENIA_GPU_GL4_COMMAND_PROCESSOR_H_
|
||||
|
||||
#include <atomic>
|
||||
#include <functional>
|
||||
#include <thread>
|
||||
|
||||
#include <xenia/gpu/register_file.h>
|
||||
#include <xenia/gpu/xenos.h>
|
||||
#include <xenia/memory.h>
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace gl4 {
|
||||
|
||||
class GL4GraphicsSystem;
|
||||
|
||||
class CommandProcessor {
|
||||
public:
|
||||
CommandProcessor(GL4GraphicsSystem* graphics_system);
|
||||
~CommandProcessor();
|
||||
|
||||
void set_swap_handler(std::function<void()> fn) { swap_handler_ = fn; }
|
||||
|
||||
uint64_t QueryTime();
|
||||
uint32_t counter() const { return counter_; }
|
||||
void increment_counter() { counter_++; }
|
||||
|
||||
void Initialize(uint32_t ptr, uint32_t page_count);
|
||||
void Shutdown();
|
||||
void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size);
|
||||
|
||||
void UpdateWritePointer(uint32_t value);
|
||||
|
||||
private:
|
||||
class RingbufferReader;
|
||||
|
||||
void WorkerMain();
|
||||
|
||||
void WriteRegister(uint32_t packet_ptr, uint32_t index, uint32_t value);
|
||||
void MakeCoherent();
|
||||
|
||||
void ExecutePrimaryBuffer(uint32_t start_index, uint32_t end_index);
|
||||
void ExecuteIndirectBuffer(uint32_t ptr, uint32_t length);
|
||||
bool ExecutePacket(RingbufferReader* reader);
|
||||
bool ExecutePacketType0(RingbufferReader* reader, uint32_t packet_ptr,
|
||||
uint32_t packet);
|
||||
bool ExecutePacketType1(RingbufferReader* reader, uint32_t packet_ptr,
|
||||
uint32_t packet);
|
||||
bool ExecutePacketType2(RingbufferReader* reader, uint32_t packet_ptr,
|
||||
uint32_t packet);
|
||||
bool ExecutePacketType3(RingbufferReader* reader, uint32_t packet_ptr,
|
||||
uint32_t packet);
|
||||
bool ExecutePacketType3_ME_INIT(RingbufferReader* reader, uint32_t packet_ptr,
|
||||
uint32_t packet, uint32_t count);
|
||||
bool ExecutePacketType3_NOP(RingbufferReader* reader, uint32_t packet_ptr,
|
||||
uint32_t packet, uint32_t count);
|
||||
bool ExecutePacketType3_INTERRUPT(RingbufferReader* reader,
|
||||
uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t count);
|
||||
bool ExecutePacketType3_XE_SWAP(RingbufferReader* reader, uint32_t packet_ptr,
|
||||
uint32_t packet, uint32_t count);
|
||||
bool ExecutePacketType3_INDIRECT_BUFFER(RingbufferReader* reader,
|
||||
uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t count);
|
||||
bool ExecutePacketType3_WAIT_REG_MEM(RingbufferReader* reader,
|
||||
uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t count);
|
||||
bool ExecutePacketType3_REG_RMW(RingbufferReader* reader, uint32_t packet_ptr,
|
||||
uint32_t packet, uint32_t count);
|
||||
bool ExecutePacketType3_COND_WRITE(RingbufferReader* reader,
|
||||
uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t count);
|
||||
bool ExecutePacketType3_EVENT_WRITE(RingbufferReader* reader,
|
||||
uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t count);
|
||||
bool ExecutePacketType3_EVENT_WRITE_SHD(RingbufferReader* reader,
|
||||
uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t count);
|
||||
bool ExecutePacketType3_DRAW_INDX(RingbufferReader* reader,
|
||||
uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t count);
|
||||
bool ExecutePacketType3_DRAW_INDX_2(RingbufferReader* reader,
|
||||
uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t count);
|
||||
bool ExecutePacketType3_SET_CONSTANT(RingbufferReader* reader,
|
||||
uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t count);
|
||||
bool ExecutePacketType3_LOAD_ALU_CONSTANT(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
uint32_t packet, uint32_t count);
|
||||
bool ExecutePacketType3_IM_LOAD(RingbufferReader* reader, uint32_t packet_ptr,
|
||||
uint32_t packet, uint32_t count);
|
||||
bool ExecutePacketType3_IM_LOAD_IMMEDIATE(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
uint32_t packet, uint32_t count);
|
||||
bool ExecutePacketType3_INVALIDATE_STATE(RingbufferReader* reader,
|
||||
uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t count);
|
||||
|
||||
Memory* memory_;
|
||||
uint8_t* membase_;
|
||||
GL4GraphicsSystem* graphics_system_;
|
||||
RegisterFile* register_file_;
|
||||
|
||||
std::thread worker_thread_;
|
||||
std::atomic<bool> worker_running_;
|
||||
|
||||
std::function<void()> swap_handler_;
|
||||
|
||||
uint64_t time_base_;
|
||||
uint32_t counter_;
|
||||
|
||||
uint32_t primary_buffer_ptr_;
|
||||
uint32_t primary_buffer_size_;
|
||||
|
||||
uint32_t read_ptr_index_;
|
||||
uint32_t read_ptr_update_freq_;
|
||||
uint32_t read_ptr_writeback_ptr_;
|
||||
|
||||
HANDLE write_ptr_index_event_;
|
||||
std::atomic<uint32_t> write_ptr_index_;
|
||||
};
|
||||
|
||||
} // namespace gl4
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_GL4_COMMAND_PROCESSOR_H_
|
|
@ -9,26 +9,148 @@
|
|||
|
||||
#include <xenia/gpu/gl4/gl4_graphics_system.h>
|
||||
|
||||
#include <poly/threading.h>
|
||||
#include <xenia/cpu/processor.h>
|
||||
#include <xenia/gpu/gpu-private.h>
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace gl4 {
|
||||
|
||||
GL4GraphicsSystem::GL4GraphicsSystem(Emulator* emulator)
|
||||
: GraphicsSystem(emulator) {}
|
||||
: GraphicsSystem(emulator), timer_queue_(nullptr), vsync_timer_(nullptr) {}
|
||||
|
||||
GL4GraphicsSystem::~GL4GraphicsSystem() = default;
|
||||
|
||||
X_STATUS GL4GraphicsSystem::Setup() {
|
||||
auto result = GraphicsSystem::Setup();
|
||||
if (result) {
|
||||
return result;
|
||||
}
|
||||
|
||||
// Create rendering control.
|
||||
// This must happen on the UI thread.
|
||||
poly::threading::Fence control_ready_fence;
|
||||
auto loop = emulator_->main_window()->loop();
|
||||
loop->Post([this]() {
|
||||
control_ = std::make_unique<WGLControl>();
|
||||
loop->Post([&]() {
|
||||
control_ = std::make_unique<WGLControl>(loop);
|
||||
emulator_->main_window()->AddChild(control_.get());
|
||||
control_ready_fence.Signal();
|
||||
});
|
||||
control_ready_fence.Wait();
|
||||
|
||||
// Create command processor. This will spin up a thread to process all
|
||||
// incoming ringbuffer packets.
|
||||
command_processor_ = std::make_unique<CommandProcessor>(this);
|
||||
command_processor_->set_swap_handler(
|
||||
std::bind(&GL4GraphicsSystem::SwapHandler, this));
|
||||
|
||||
// Let the processor know we want register access callbacks.
|
||||
emulator_->memory()->AddMappedRange(
|
||||
0x7FC80000, 0xFFFF0000, 0x0000FFFF, this,
|
||||
reinterpret_cast<cpu::MMIOReadCallback>(MMIOReadRegisterThunk),
|
||||
reinterpret_cast<cpu::MMIOWriteCallback>(MMIOWriteRegisterThunk));
|
||||
|
||||
// 60hz vsync timer.
|
||||
timer_queue_ = CreateTimerQueue();
|
||||
CreateTimerQueueTimer(&vsync_timer_, timer_queue_,
|
||||
(WAITORTIMERCALLBACK)VsyncCallbackThunk, this, 16, 16,
|
||||
WT_EXECUTEINTIMERTHREAD);
|
||||
|
||||
return X_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
void GL4GraphicsSystem::Shutdown() {
|
||||
DeleteTimerQueueTimer(timer_queue_, vsync_timer_, nullptr);
|
||||
DeleteTimerQueue(timer_queue_);
|
||||
|
||||
command_processor_->Shutdown();
|
||||
|
||||
// TODO(benvanik): remove mapped range.
|
||||
|
||||
command_processor_.reset();
|
||||
control_.reset();
|
||||
|
||||
GraphicsSystem::Shutdown();
|
||||
}
|
||||
|
||||
void GL4GraphicsSystem::InitializeRingBuffer(uint32_t ptr,
|
||||
uint32_t page_count) {
|
||||
command_processor_->Initialize(ptr, page_count);
|
||||
}
|
||||
|
||||
void GL4GraphicsSystem::EnableReadPointerWriteBack(uint32_t ptr,
|
||||
uint32_t block_size) {
|
||||
command_processor_->EnableReadPointerWriteBack(ptr, block_size);
|
||||
}
|
||||
|
||||
void GL4GraphicsSystem::MarkVblank() {
|
||||
static bool thread_name_set = false;
|
||||
if (!thread_name_set) {
|
||||
thread_name_set = true;
|
||||
Profiler::ThreadEnter("GL4 Vsync Timer");
|
||||
}
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
// Increment vblank counter (so the game sees us making progress).
|
||||
command_processor_->increment_counter();
|
||||
|
||||
// TODO(benvanik): we shouldn't need to do the dispatch here, but there's
|
||||
// something wrong and the CP will block waiting for code that
|
||||
// needs to be run in the interrupt.
|
||||
DispatchInterruptCallback(0, 2);
|
||||
}
|
||||
|
||||
void GL4GraphicsSystem::SwapHandler() {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
// Swap requested. Synchronously post a request to the loop so that
|
||||
// we do the swap in the right thread.
|
||||
control_->SynchronousRepaint();
|
||||
|
||||
// Roll over vblank.
|
||||
MarkVblank();
|
||||
}
|
||||
|
||||
uint64_t GL4GraphicsSystem::ReadRegister(uint64_t addr) {
|
||||
uint32_t r = addr & 0xFFFF;
|
||||
if (FLAGS_trace_ring_buffer) {
|
||||
XELOGGPU("ReadRegister(%.4X)", r);
|
||||
}
|
||||
|
||||
switch (r) {
|
||||
case 0x6530: // ????
|
||||
return 1;
|
||||
case 0x6544: // ? vblank pending?
|
||||
return 1;
|
||||
case 0x6584: // ????
|
||||
return 1;
|
||||
}
|
||||
|
||||
assert_true(r >= 0 && r < RegisterFile::kRegisterCount);
|
||||
return register_file_.values[r].u32;
|
||||
}
|
||||
|
||||
void GL4GraphicsSystem::WriteRegister(uint64_t addr, uint64_t value) {
|
||||
uint32_t r = addr & 0xFFFF;
|
||||
if (FLAGS_trace_ring_buffer) {
|
||||
XELOGGPU("WriteRegister(%.4X, %.8X)", r, value);
|
||||
}
|
||||
|
||||
switch (r) {
|
||||
case 0x0714: // CP_RB_WPTR
|
||||
command_processor_->UpdateWritePointer(static_cast<uint32_t>(value));
|
||||
break;
|
||||
case 0x6110: // ? swap related?
|
||||
XELOGW("Unimplemented GPU register %.4X write: %.8X", r, value);
|
||||
return;
|
||||
default:
|
||||
XELOGW("Unknown GPU register %.4X write: %.8X", r, value);
|
||||
break;
|
||||
}
|
||||
|
||||
assert_true(r >= 0 && r < RegisterFile::kRegisterCount);
|
||||
register_file_.values[r].u32 = static_cast<uint32_t>(value);
|
||||
}
|
||||
|
||||
} // namespace gl4
|
||||
|
|
|
@ -13,8 +13,10 @@
|
|||
#include <memory>
|
||||
|
||||
#include <xenia/common.h>
|
||||
#include <xenia/gpu/gl4/command_processor.h>
|
||||
#include <xenia/gpu/gl4/wgl_control.h>
|
||||
#include <xenia/gpu/graphics_system.h>
|
||||
#include <xenia/gpu/register_file.h>
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
@ -28,8 +30,34 @@ class GL4GraphicsSystem : public GraphicsSystem {
|
|||
X_STATUS Setup() override;
|
||||
void Shutdown() override;
|
||||
|
||||
RegisterFile* register_file() { return ®ister_file_; }
|
||||
|
||||
void InitializeRingBuffer(uint32_t ptr, uint32_t page_count) override;
|
||||
void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size) override;
|
||||
|
||||
private:
|
||||
void MarkVblank();
|
||||
void SwapHandler();
|
||||
uint64_t ReadRegister(uint64_t addr);
|
||||
void WriteRegister(uint64_t addr, uint64_t value);
|
||||
|
||||
static uint64_t MMIOReadRegisterThunk(GL4GraphicsSystem* gs, uint64_t addr) {
|
||||
return gs->ReadRegister(addr);
|
||||
}
|
||||
static void MMIOWriteRegisterThunk(GL4GraphicsSystem* gs, uint64_t addr,
|
||||
uint64_t value) {
|
||||
gs->WriteRegister(addr, value);
|
||||
}
|
||||
static void __stdcall VsyncCallbackThunk(GL4GraphicsSystem* gs, BOOLEAN) {
|
||||
gs->MarkVblank();
|
||||
}
|
||||
|
||||
RegisterFile register_file_;
|
||||
std::unique_ptr<CommandProcessor> command_processor_;
|
||||
std::unique_ptr<WGLControl> control_;
|
||||
|
||||
HANDLE timer_queue_;
|
||||
HANDLE vsync_timer_;
|
||||
};
|
||||
|
||||
} // namespace gl4
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
# Copyright 2014 Ben Vanik. All Rights Reserved.
|
||||
{
|
||||
'sources': [
|
||||
'command_processor.cc',
|
||||
'command_processor.h',
|
||||
'gl4_gpu-private.h',
|
||||
'gl4_gpu.cc',
|
||||
'gl4_gpu.h',
|
||||
|
|
|
@ -9,14 +9,17 @@
|
|||
|
||||
#include <xenia/gpu/gl4/wgl_control.h>
|
||||
|
||||
#include <poly/assert.h>
|
||||
#include <poly/logging.h>
|
||||
#include <xenia/profiling.h>
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace gl4 {
|
||||
|
||||
WGLControl::WGLControl()
|
||||
: poly::ui::win32::Win32Control(Flags::kFlagOwnPaint) {}
|
||||
WGLControl::WGLControl(poly::ui::Loop* loop)
|
||||
: poly::ui::win32::Win32Control(Flags::kFlagOwnPaint),
|
||||
loop_(loop) {}
|
||||
|
||||
WGLControl::~WGLControl() = default;
|
||||
|
||||
|
@ -68,9 +71,7 @@ bool WGLControl::Create() {
|
|||
return true;
|
||||
}
|
||||
|
||||
void WGLControl::OnLayout(poly::ui::UIEvent& e) {
|
||||
Control::ResizeToFill();
|
||||
}
|
||||
void WGLControl::OnLayout(poly::ui::UIEvent& e) { Control::ResizeToFill(); }
|
||||
|
||||
LRESULT WGLControl::WndProc(HWND hWnd, UINT message, WPARAM wParam,
|
||||
LPARAM lParam) {
|
||||
|
@ -78,14 +79,23 @@ LRESULT WGLControl::WndProc(HWND hWnd, UINT message, WPARAM wParam,
|
|||
case WM_PAINT:
|
||||
context_.MakeCurrent();
|
||||
glViewport(0, 0, width_, height_);
|
||||
glClearColor(1.0f, 0, 0, 1.0f);
|
||||
glClearColor(rand() / (float)RAND_MAX, 1.0f, 0, 1.0f);
|
||||
glClear(GL_COLOR_BUFFER_BIT);
|
||||
// TODO(benvanik): profiler present.
|
||||
// Profiler::Present();
|
||||
SwapBuffers(context_.dc());
|
||||
return 0;
|
||||
break;
|
||||
}
|
||||
return Win32Control::WndProc(hWnd, message, wParam, lParam);
|
||||
}
|
||||
|
||||
void WGLControl::SynchronousRepaint() {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
// This will not return until the WM_PAINT has completed.
|
||||
RedrawWindow(hwnd(), nullptr, nullptr,
|
||||
RDW_INTERNALPAINT | RDW_UPDATENOW | RDW_ALLCHILDREN);
|
||||
}
|
||||
|
||||
} // namespace gl4
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
#ifndef XENIA_GPU_GL4_WGL_CONTROL_H_
|
||||
#define XENIA_GPU_GL4_WGL_CONTROL_H_
|
||||
|
||||
#include <poly/threading.h>
|
||||
#include <poly/ui/loop.h>
|
||||
#include <poly/ui/win32/win32_control.h>
|
||||
#include <xenia/gpu/gl4/gl_context.h>
|
||||
|
||||
|
@ -19,11 +21,13 @@ namespace gl4 {
|
|||
|
||||
class WGLControl : public poly::ui::win32::Win32Control {
|
||||
public:
|
||||
WGLControl();
|
||||
WGLControl(poly::ui::Loop* loop);
|
||||
~WGLControl() override;
|
||||
|
||||
GLContext* context() { return &context_; }
|
||||
|
||||
void SynchronousRepaint();
|
||||
|
||||
protected:
|
||||
bool Create() override;
|
||||
|
||||
|
@ -33,6 +37,7 @@ class WGLControl : public poly::ui::win32::Win32Control {
|
|||
LPARAM lParam) override;
|
||||
|
||||
private:
|
||||
poly::ui::Loop* loop_;
|
||||
GLContext context_;
|
||||
};
|
||||
|
||||
|
|
|
@ -13,15 +13,15 @@
|
|||
// TODO(benvanik): based on platform.
|
||||
#include <xenia/gpu/gl4/gl4_gpu.h>
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
DEFINE_string(gpu, "any", "Graphics system. Use: [any, gl4]");
|
||||
|
||||
DEFINE_bool(trace_ring_buffer, false, "Trace GPU ring buffer packets.");
|
||||
DEFINE_string(dump_shaders, "",
|
||||
"Path to write GPU shaders to as they are compiled.");
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
std::unique_ptr<GraphicsSystem> Create(Emulator* emulator) {
|
||||
if (FLAGS_gpu.compare("gl4") == 0) {
|
||||
return xe::gpu::gl4::Create(emulator);
|
||||
|
|
|
@ -40,15 +40,6 @@ void GraphicsSystem::SetInterruptCallback(uint32_t callback,
|
|||
XELOGGPU("SetInterruptCallback(%.4X, %.4X)", callback, user_data);
|
||||
}
|
||||
|
||||
void GraphicsSystem::InitializeRingBuffer(uint32_t ptr, uint32_t page_count) {
|
||||
//
|
||||
}
|
||||
|
||||
void GraphicsSystem::EnableReadPointerWriteBack(uint32_t ptr,
|
||||
uint32_t block_size) {
|
||||
//
|
||||
}
|
||||
|
||||
void GraphicsSystem::DispatchInterruptCallback(uint32_t source, uint32_t cpu) {
|
||||
// Pick a CPU, if needed. We're going to guess 2. Because.
|
||||
if (cpu == 0xFFFFFFFF) {
|
||||
|
|
|
@ -32,14 +32,14 @@ class GraphicsSystem {
|
|||
virtual void Shutdown();
|
||||
|
||||
void SetInterruptCallback(uint32_t callback, uint32_t user_data);
|
||||
void InitializeRingBuffer(uint32_t ptr, uint32_t page_count);
|
||||
void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size);
|
||||
virtual void InitializeRingBuffer(uint32_t ptr, uint32_t page_count) = 0;
|
||||
virtual void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size) = 0;
|
||||
|
||||
void DispatchInterruptCallback(uint32_t source, uint32_t cpu);
|
||||
|
||||
protected:
|
||||
GraphicsSystem(Emulator* emulator);
|
||||
|
||||
void DispatchInterruptCallback(uint32_t source, uint32_t cpu);
|
||||
|
||||
Emulator* emulator_;
|
||||
Memory* memory_;
|
||||
cpu::Processor* processor_;
|
||||
|
|
Loading…
Reference in New Issue