GPU recording (--trace_gpu=file) and playback (gpu-trace-viewer file).
This commit is contained in:
parent
c4aeedd0a3
commit
130c11a2ca
|
@ -18,8 +18,8 @@ namespace poly {
|
|||
class MappedMemory {
|
||||
public:
|
||||
enum class Mode {
|
||||
READ,
|
||||
READ_WRITE,
|
||||
kRead,
|
||||
kReadWrite,
|
||||
};
|
||||
|
||||
virtual ~MappedMemory() = default;
|
||||
|
|
|
@ -45,14 +45,14 @@ std::unique_ptr<MappedMemory> MappedMemory::Open(const std::wstring& path,
|
|||
DWORD mapping_protect = 0;
|
||||
DWORD view_access = 0;
|
||||
switch (mode) {
|
||||
case Mode::READ:
|
||||
case Mode::kRead:
|
||||
file_access |= GENERIC_READ;
|
||||
file_share |= FILE_SHARE_READ;
|
||||
create_mode |= OPEN_EXISTING;
|
||||
mapping_protect |= PAGE_READONLY;
|
||||
view_access |= FILE_MAP_READ;
|
||||
break;
|
||||
case Mode::READ_WRITE:
|
||||
case Mode::kReadWrite:
|
||||
file_access |= GENERIC_READ | GENERIC_WRITE;
|
||||
file_share |= 0;
|
||||
create_mode |= OPEN_EXISTING;
|
||||
|
|
|
@ -102,7 +102,7 @@ X_STATUS Emulator::Setup() {
|
|||
}
|
||||
|
||||
// Initialize the GPU.
|
||||
graphics_system_ = std::move(xe::gpu::Create(this));
|
||||
graphics_system_ = std::move(xe::gpu::Create());
|
||||
if (!graphics_system_) {
|
||||
return X_STATUS_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
@ -122,7 +122,8 @@ X_STATUS Emulator::Setup() {
|
|||
if (result) {
|
||||
return result;
|
||||
}
|
||||
result = graphics_system_->Setup();
|
||||
result = graphics_system_->Setup(processor_.get(), main_window_->loop(),
|
||||
main_window_.get());
|
||||
if (result) {
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -22,9 +22,6 @@
|
|||
|
||||
#include "third_party/xxhash/xxhash.h"
|
||||
|
||||
#define XETRACECP(fmt, ...) \
|
||||
if (FLAGS_trace_ring_buffer) XELOGGPU(fmt, ##__VA_ARGS__)
|
||||
|
||||
#define FINE_GRAINED_DRAW_SCOPES 1
|
||||
|
||||
namespace xe {
|
||||
|
@ -56,6 +53,7 @@ CommandProcessor::CommandProcessor(GL4GraphicsSystem* graphics_system)
|
|||
membase_(graphics_system->memory()->membase()),
|
||||
graphics_system_(graphics_system),
|
||||
register_file_(graphics_system_->register_file()),
|
||||
trace_writer_(graphics_system->memory()->membase()),
|
||||
worker_running_(true),
|
||||
time_base_(0),
|
||||
counter_(0),
|
||||
|
@ -94,6 +92,8 @@ uint64_t CommandProcessor::QueryTime() {
|
|||
bool CommandProcessor::Initialize(std::unique_ptr<GLContext> context) {
|
||||
context_ = std::move(context);
|
||||
|
||||
pending_fn_event_ = CreateEvent(nullptr, TRUE, FALSE, nullptr);
|
||||
|
||||
worker_running_ = true;
|
||||
worker_thread_ = std::thread([this]() {
|
||||
poly::threading::set_name("GL4 Worker");
|
||||
|
@ -106,6 +106,8 @@ bool CommandProcessor::Initialize(std::unique_ptr<GLContext> context) {
|
|||
}
|
||||
|
||||
void CommandProcessor::Shutdown() {
|
||||
EndTracing();
|
||||
|
||||
worker_running_ = false;
|
||||
SetEvent(write_ptr_index_event_);
|
||||
worker_thread_.join();
|
||||
|
@ -115,6 +117,22 @@ void CommandProcessor::Shutdown() {
|
|||
shader_cache_.clear();
|
||||
|
||||
context_.reset();
|
||||
|
||||
CloseHandle(pending_fn_event_);
|
||||
}
|
||||
|
||||
void CommandProcessor::BeginTracing(const std::wstring& root_path) {
|
||||
std::wstring path = poly::join_paths(root_path, L"gpu_trace");
|
||||
trace_writer_.Open(path);
|
||||
}
|
||||
|
||||
void CommandProcessor::EndTracing() { trace_writer_.Close(); }
|
||||
|
||||
void CommandProcessor::CallInThread(std::function<void()> fn) {
|
||||
assert_null(pending_fn_);
|
||||
pending_fn_ = std::move(fn);
|
||||
WaitForSingleObject(pending_fn_event_, INFINITE);
|
||||
ResetEvent(pending_fn_event_);
|
||||
}
|
||||
|
||||
void CommandProcessor::WorkerMain() {
|
||||
|
@ -125,6 +143,13 @@ void CommandProcessor::WorkerMain() {
|
|||
}
|
||||
|
||||
while (worker_running_) {
|
||||
if (pending_fn_) {
|
||||
auto fn = std::move(pending_fn_);
|
||||
pending_fn_ = nullptr;
|
||||
fn();
|
||||
SetEvent(pending_fn_event_);
|
||||
}
|
||||
|
||||
uint32_t write_ptr_index = write_ptr_index_.load();
|
||||
if (write_ptr_index == 0xBAADF00D || read_ptr_index_ == write_ptr_index) {
|
||||
SCOPE_profile_cpu_i("gpu", "xe::gpu::gl4::CommandProcessor::Stall");
|
||||
|
@ -140,15 +165,15 @@ void CommandProcessor::WorkerMain() {
|
|||
SwitchToThread();
|
||||
MemoryBarrier();
|
||||
write_ptr_index = write_ptr_index_.load();
|
||||
} while (write_ptr_index == 0xBAADF00D ||
|
||||
read_ptr_index_ == write_ptr_index);
|
||||
} while (!pending_fn_ && (write_ptr_index == 0xBAADF00D ||
|
||||
read_ptr_index_ == write_ptr_index));
|
||||
// ReturnFromWait();
|
||||
if (pending_fn_) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
assert_true(read_ptr_index_ != write_ptr_index);
|
||||
|
||||
// Process the new commands.
|
||||
XETRACECP("Command processor thread work");
|
||||
|
||||
// Execute. Note that we handle wraparound transparently.
|
||||
ExecutePrimaryBuffer(read_ptr_index_, write_ptr_index);
|
||||
read_ptr_index_ = write_ptr_index;
|
||||
|
@ -378,8 +403,7 @@ void CommandProcessor::UpdateWritePointer(uint32_t value) {
|
|||
SetEvent(write_ptr_index_event_);
|
||||
}
|
||||
|
||||
void CommandProcessor::WriteRegister(uint32_t packet_ptr, uint32_t index,
|
||||
uint32_t value) {
|
||||
void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
|
||||
RegisterFile* regs = register_file_;
|
||||
assert_true(index < RegisterFile::kRegisterCount);
|
||||
regs->values[index].u32 = value;
|
||||
|
@ -398,8 +422,8 @@ void CommandProcessor::WriteRegister(uint32_t packet_ptr, uint32_t index,
|
|||
// Enabled - write to address.
|
||||
uint32_t scratch_addr = regs->values[XE_GPU_REG_SCRATCH_ADDR].u32;
|
||||
uint32_t mem_addr = scratch_addr + (scratch_reg * 4);
|
||||
poly::store_and_swap<uint32_t>(
|
||||
membase_ + xenos::GpuToCpu(primary_buffer_ptr_, mem_addr), value);
|
||||
poly::store_and_swap<uint32_t>(membase_ + xenos::GpuToCpu(mem_addr),
|
||||
value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -426,8 +450,8 @@ void CommandProcessor::MakeCoherent() {
|
|||
}
|
||||
|
||||
// TODO(benvanik): notify resource cache of base->size and type.
|
||||
XETRACECP("Make %.8X -> %.8X (%db) coherent", base_host,
|
||||
base_host + size_host, size_host);
|
||||
// XELOGD("Make %.8X -> %.8X (%db) coherent", base_host, base_host +
|
||||
// size_host, size_host);
|
||||
|
||||
// Mark coherent.
|
||||
status_host &= ~0x80000000ul;
|
||||
|
@ -437,6 +461,8 @@ void CommandProcessor::MakeCoherent() {
|
|||
void CommandProcessor::PrepareForWait() {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
trace_writer_.Flush();
|
||||
|
||||
// TODO(benvanik): fences and fancy stuff. We should figure out a way to
|
||||
// make interrupt callbacks from the GPU so that we don't have to do a full
|
||||
// synchronize here.
|
||||
|
@ -494,14 +520,6 @@ class CommandProcessor::RingbufferReader {
|
|||
|
||||
void Skip(uint32_t words) { Advance(words); }
|
||||
|
||||
void TraceData(uint32_t words) {
|
||||
for (uint32_t i = 0; i < words; ++i) {
|
||||
uint32_t i_ptr = ptr_ + i * sizeof(uint32_t);
|
||||
XETRACECP("[%.8X] %.8X", i_ptr,
|
||||
poly::load_and_swap<uint32_t>(membase_ + i_ptr));
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
uint8_t* membase_;
|
||||
|
||||
|
@ -523,8 +541,7 @@ void CommandProcessor::ExecutePrimaryBuffer(uint32_t start_index,
|
|||
uint32_t end_ptr = primary_buffer_ptr_ + end_index * sizeof(uint32_t);
|
||||
end_ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (end_ptr & 0x1FFFFFFF);
|
||||
|
||||
XETRACECP("[%.8X] ExecutePrimaryBuffer(%dw -> %dw)", start_ptr, start_index,
|
||||
end_index);
|
||||
trace_writer_.WritePrimaryBufferStart(start_ptr, end_index - start_index);
|
||||
|
||||
// Execute commands!
|
||||
uint32_t ptr_mask = (primary_buffer_size_ / sizeof(uint32_t)) - 1;
|
||||
|
@ -537,13 +554,13 @@ void CommandProcessor::ExecutePrimaryBuffer(uint32_t start_index,
|
|||
assert_true(reader.offset() == (end_index - start_index));
|
||||
}
|
||||
|
||||
XETRACECP(" ExecutePrimaryBuffer End");
|
||||
trace_writer_.WritePrimaryBufferEnd();
|
||||
}
|
||||
|
||||
void CommandProcessor::ExecuteIndirectBuffer(uint32_t ptr, uint32_t length) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
XETRACECP("[%.8X] ExecuteIndirectBuffer(%dw)", ptr, length);
|
||||
trace_writer_.WriteIndirectBufferStart(ptr, length / sizeof(uint32_t));
|
||||
|
||||
// Execute commands!
|
||||
uint32_t ptr_mask = 0;
|
||||
|
@ -553,29 +570,38 @@ void CommandProcessor::ExecuteIndirectBuffer(uint32_t ptr, uint32_t length) {
|
|||
ExecutePacket(&reader);
|
||||
}
|
||||
|
||||
XETRACECP(" ExecuteIndirectBuffer End");
|
||||
trace_writer_.WriteIndirectBufferEnd();
|
||||
}
|
||||
|
||||
void CommandProcessor::ExecutePacket(uint32_t ptr, uint32_t count) {
|
||||
uint32_t ptr_mask = 0;
|
||||
RingbufferReader reader(membase_, primary_buffer_ptr_, ptr_mask, ptr,
|
||||
ptr + count * sizeof(uint32_t));
|
||||
while (reader.can_read()) {
|
||||
ExecutePacket(&reader);
|
||||
}
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacket(RingbufferReader* reader) {
|
||||
RegisterFile* regs = register_file_;
|
||||
|
||||
uint32_t packet_ptr = reader->ptr();
|
||||
const uint32_t packet = reader->Read();
|
||||
const uint32_t packet_type = packet >> 30;
|
||||
if (packet == 0) {
|
||||
XETRACECP("[%.8X] Packet(%.8X): 0?", packet_ptr, packet);
|
||||
trace_writer_.WritePacketStart(reader->ptr() - 4, 1);
|
||||
trace_writer_.WritePacketEnd();
|
||||
return true;
|
||||
}
|
||||
|
||||
switch (packet_type) {
|
||||
case 0x00:
|
||||
return ExecutePacketType0(reader, packet_ptr, packet);
|
||||
return ExecutePacketType0(reader, packet);
|
||||
case 0x01:
|
||||
return ExecutePacketType1(reader, packet_ptr, packet);
|
||||
return ExecutePacketType1(reader, packet);
|
||||
case 0x02:
|
||||
return ExecutePacketType2(reader, packet_ptr, packet);
|
||||
return ExecutePacketType2(reader, packet);
|
||||
case 0x03:
|
||||
return ExecutePacketType3(reader, packet_ptr, packet);
|
||||
return ExecutePacketType3(reader, packet);
|
||||
default:
|
||||
assert_unhandled_case(packet_type);
|
||||
return false;
|
||||
|
@ -583,75 +609,66 @@ bool CommandProcessor::ExecutePacket(RingbufferReader* reader) {
|
|||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType0(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
uint32_t packet) {
|
||||
// Type-0 packet.
|
||||
// Write count registers in sequence to the registers starting at
|
||||
// (base_index << 2).
|
||||
XETRACECP("[%.8X] Packet(%.8X): set registers:", packet_ptr, packet);
|
||||
|
||||
uint32_t count = ((packet >> 16) & 0x3FFF) + 1;
|
||||
trace_writer_.WritePacketStart(reader->ptr() - 4, 1 + count);
|
||||
|
||||
uint32_t base_index = (packet & 0x7FFF);
|
||||
uint32_t write_one_reg = (packet >> 15) & 0x1;
|
||||
for (uint32_t m = 0; m < count; m++) {
|
||||
uint32_t reg_data = reader->Peek();
|
||||
uint32_t reg_data = reader->Read();
|
||||
uint32_t target_index = write_one_reg ? base_index : base_index + m;
|
||||
const char* reg_name = register_file_->GetRegisterName(target_index);
|
||||
XETRACECP("[%.8X] %.8X -> %.4X %s", reader->ptr(), reg_data, target_index,
|
||||
reg_name ? reg_name : "");
|
||||
reader->Advance(1);
|
||||
WriteRegister(packet_ptr, target_index, reg_data);
|
||||
WriteRegister(target_index, reg_data);
|
||||
}
|
||||
|
||||
trace_writer_.WritePacketEnd();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType1(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
uint32_t packet) {
|
||||
// Type-1 packet.
|
||||
// Contains two registers of data. Type-0 should be more common.
|
||||
XETRACECP("[%.8X] Packet(%.8X): set registers:", packet_ptr, packet);
|
||||
trace_writer_.WritePacketStart(reader->ptr() - 4, 3);
|
||||
uint32_t reg_index_1 = packet & 0x7FF;
|
||||
uint32_t reg_index_2 = (packet >> 11) & 0x7FF;
|
||||
uint32_t reg_ptr_1 = reader->ptr();
|
||||
uint32_t reg_data_1 = reader->Read();
|
||||
uint32_t reg_ptr_2 = reader->ptr();
|
||||
uint32_t reg_data_2 = reader->Read();
|
||||
const char* reg_name_1 = register_file_->GetRegisterName(reg_index_1);
|
||||
const char* reg_name_2 = register_file_->GetRegisterName(reg_index_2);
|
||||
XETRACECP("[%.8X] %.8X -> %.4X %s", reg_ptr_1, reg_data_1, reg_index_1,
|
||||
reg_name_1 ? reg_name_1 : "");
|
||||
XETRACECP("[%.8X] %.8X -> %.4X %s", reg_ptr_2, reg_data_2, reg_index_2,
|
||||
reg_name_2 ? reg_name_2 : "");
|
||||
WriteRegister(packet_ptr, reg_index_1, reg_data_1);
|
||||
WriteRegister(packet_ptr, reg_index_2, reg_data_2);
|
||||
WriteRegister(reg_index_1, reg_data_1);
|
||||
WriteRegister(reg_index_2, reg_data_2);
|
||||
trace_writer_.WritePacketEnd();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType2(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
uint32_t packet) {
|
||||
// Type-2 packet.
|
||||
// No-op. Do nothing.
|
||||
XETRACECP("[%.8X] Packet(%.8X): padding", packet_ptr, packet);
|
||||
trace_writer_.WritePacketStart(reader->ptr() - 4, 1);
|
||||
trace_writer_.WritePacketEnd();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
uint32_t packet) {
|
||||
// Type-3 packet.
|
||||
uint32_t opcode = (packet >> 8) & 0x7F;
|
||||
uint32_t count = ((packet >> 16) & 0x3FFF) + 1;
|
||||
auto data_start_offset = reader->offset();
|
||||
|
||||
trace_writer_.WritePacketStart(reader->ptr() - 4, 1 + count);
|
||||
|
||||
// & 1 == predicate - when set, we do bin check to see if we should execute
|
||||
// the packet. Only type 3 packets are affected.
|
||||
if (packet & 1) {
|
||||
bool any_pass = (bin_select_ & bin_mask_) != 0;
|
||||
if (!any_pass) {
|
||||
XETRACECP("[%.8X] Packet(%.8X): SKIPPED (predicate fail)", packet_ptr,
|
||||
packet);
|
||||
reader->Skip(count);
|
||||
trace_writer_.WritePacketEnd();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -659,96 +676,78 @@ bool CommandProcessor::ExecutePacketType3(RingbufferReader* reader,
|
|||
bool result = false;
|
||||
switch (opcode) {
|
||||
case PM4_ME_INIT:
|
||||
result = ExecutePacketType3_ME_INIT(reader, packet_ptr, packet, count);
|
||||
result = ExecutePacketType3_ME_INIT(reader, packet, count);
|
||||
break;
|
||||
case PM4_NOP:
|
||||
result = ExecutePacketType3_NOP(reader, packet_ptr, packet, count);
|
||||
result = ExecutePacketType3_NOP(reader, packet, count);
|
||||
break;
|
||||
case PM4_INTERRUPT:
|
||||
result = ExecutePacketType3_INTERRUPT(reader, packet_ptr, packet, count);
|
||||
result = ExecutePacketType3_INTERRUPT(reader, packet, count);
|
||||
break;
|
||||
case PM4_XE_SWAP:
|
||||
result = ExecutePacketType3_XE_SWAP(reader, packet_ptr, packet, count);
|
||||
result = ExecutePacketType3_XE_SWAP(reader, packet, count);
|
||||
break;
|
||||
case PM4_INDIRECT_BUFFER:
|
||||
result =
|
||||
ExecutePacketType3_INDIRECT_BUFFER(reader, packet_ptr, packet, count);
|
||||
result = ExecutePacketType3_INDIRECT_BUFFER(reader, packet, count);
|
||||
break;
|
||||
case PM4_WAIT_REG_MEM:
|
||||
result =
|
||||
ExecutePacketType3_WAIT_REG_MEM(reader, packet_ptr, packet, count);
|
||||
result = ExecutePacketType3_WAIT_REG_MEM(reader, packet, count);
|
||||
break;
|
||||
case PM4_REG_RMW:
|
||||
result = ExecutePacketType3_REG_RMW(reader, packet_ptr, packet, count);
|
||||
result = ExecutePacketType3_REG_RMW(reader, packet, count);
|
||||
break;
|
||||
case PM4_COND_WRITE:
|
||||
result = ExecutePacketType3_COND_WRITE(reader, packet_ptr, packet, count);
|
||||
result = ExecutePacketType3_COND_WRITE(reader, packet, count);
|
||||
break;
|
||||
case PM4_EVENT_WRITE:
|
||||
result =
|
||||
ExecutePacketType3_EVENT_WRITE(reader, packet_ptr, packet, count);
|
||||
result = ExecutePacketType3_EVENT_WRITE(reader, packet, count);
|
||||
break;
|
||||
case PM4_EVENT_WRITE_SHD:
|
||||
result =
|
||||
ExecutePacketType3_EVENT_WRITE_SHD(reader, packet_ptr, packet, count);
|
||||
result = ExecutePacketType3_EVENT_WRITE_SHD(reader, packet, count);
|
||||
break;
|
||||
case PM4_EVENT_WRITE_EXT:
|
||||
result =
|
||||
ExecutePacketType3_EVENT_WRITE_EXT(reader, packet_ptr, packet, count);
|
||||
result = ExecutePacketType3_EVENT_WRITE_EXT(reader, packet, count);
|
||||
break;
|
||||
case PM4_DRAW_INDX:
|
||||
result = ExecutePacketType3_DRAW_INDX(reader, packet_ptr, packet, count);
|
||||
result = ExecutePacketType3_DRAW_INDX(reader, packet, count);
|
||||
break;
|
||||
case PM4_DRAW_INDX_2:
|
||||
result =
|
||||
ExecutePacketType3_DRAW_INDX_2(reader, packet_ptr, packet, count);
|
||||
result = ExecutePacketType3_DRAW_INDX_2(reader, packet, count);
|
||||
break;
|
||||
case PM4_SET_CONSTANT:
|
||||
result =
|
||||
ExecutePacketType3_SET_CONSTANT(reader, packet_ptr, packet, count);
|
||||
result = ExecutePacketType3_SET_CONSTANT(reader, packet, count);
|
||||
break;
|
||||
case PM4_LOAD_ALU_CONSTANT:
|
||||
result = ExecutePacketType3_LOAD_ALU_CONSTANT(reader, packet_ptr, packet,
|
||||
count);
|
||||
result = ExecutePacketType3_LOAD_ALU_CONSTANT(reader, packet, count);
|
||||
break;
|
||||
case PM4_IM_LOAD:
|
||||
result = ExecutePacketType3_IM_LOAD(reader, packet_ptr, packet, count);
|
||||
result = ExecutePacketType3_IM_LOAD(reader, packet, count);
|
||||
break;
|
||||
case PM4_IM_LOAD_IMMEDIATE:
|
||||
result = ExecutePacketType3_IM_LOAD_IMMEDIATE(reader, packet_ptr, packet,
|
||||
count);
|
||||
result = ExecutePacketType3_IM_LOAD_IMMEDIATE(reader, packet, count);
|
||||
break;
|
||||
case PM4_INVALIDATE_STATE:
|
||||
result = ExecutePacketType3_INVALIDATE_STATE(reader, packet_ptr, packet,
|
||||
count);
|
||||
result = ExecutePacketType3_INVALIDATE_STATE(reader, packet, count);
|
||||
break;
|
||||
|
||||
case PM4_SET_BIN_MASK_LO: {
|
||||
uint32_t value = reader->Read();
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_LO = %.8X", packet_ptr,
|
||||
packet, value);
|
||||
bin_mask_ = (bin_mask_ & 0xFFFFFFFF00000000ull) | value;
|
||||
result = true;
|
||||
} break;
|
||||
case PM4_SET_BIN_MASK_HI: {
|
||||
uint32_t value = reader->Read();
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_HI = %.8X", packet_ptr,
|
||||
packet, value);
|
||||
bin_mask_ =
|
||||
(bin_mask_ & 0xFFFFFFFFull) | (static_cast<uint64_t>(value) << 32);
|
||||
result = true;
|
||||
} break;
|
||||
case PM4_SET_BIN_SELECT_LO: {
|
||||
uint32_t value = reader->Read();
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_LO = %.8X", packet_ptr,
|
||||
packet, value);
|
||||
bin_select_ = (bin_select_ & 0xFFFFFFFF00000000ull) | value;
|
||||
result = true;
|
||||
} break;
|
||||
case PM4_SET_BIN_SELECT_HI: {
|
||||
uint32_t value = reader->Read();
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_HI = %.8X", packet_ptr,
|
||||
packet, value);
|
||||
bin_select_ =
|
||||
(bin_select_ & 0xFFFFFFFFull) | (static_cast<uint64_t>(value) << 32);
|
||||
result = true;
|
||||
|
@ -757,53 +756,44 @@ bool CommandProcessor::ExecutePacketType3(RingbufferReader* reader,
|
|||
// Ignored packets - useful if breaking on the default handler below.
|
||||
case 0x50: // 0xC0015000 usually 2 words, 0xFFFFFFFF / 0x00000000
|
||||
case 0x51: // 0xC0015100 usually 2 words, 0xFFFFFFFF / 0xFFFFFFFF
|
||||
XETRACECP("[%.8X] Packet(%.8X): unknown!", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
reader->Skip(count);
|
||||
break;
|
||||
|
||||
default:
|
||||
XETRACECP("[%.8X] Packet(%.8X): unknown!", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
reader->Skip(count);
|
||||
break;
|
||||
}
|
||||
|
||||
trace_writer_.WritePacketEnd();
|
||||
assert_true(reader->offset() == data_start_offset + count);
|
||||
return result;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_ME_INIT(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
|
||||
uint32_t packet,
|
||||
uint32_t count) {
|
||||
// initialize CP's micro-engine
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_ME_INIT", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
reader->Advance(count);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_NOP(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
|
||||
uint32_t packet, uint32_t count) {
|
||||
// skip N 32-bit words to get to the next packet
|
||||
// No-op, ignore some data.
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_NOP", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
reader->Advance(count);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_INTERRUPT(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
|
||||
uint32_t packet,
|
||||
uint32_t count) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
// generate interrupt from the command stream
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_INTERRUPT", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
uint32_t cpu_mask = reader->Read();
|
||||
for (int n = 0; n < 6; n++) {
|
||||
if (cpu_mask & (1 << n)) {
|
||||
|
@ -814,7 +804,7 @@ bool CommandProcessor::ExecutePacketType3_INTERRUPT(RingbufferReader* reader,
|
|||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_XE_SWAP(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
|
||||
uint32_t packet,
|
||||
uint32_t count) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
@ -826,9 +816,7 @@ bool CommandProcessor::ExecutePacketType3_XE_SWAP(RingbufferReader* reader,
|
|||
// Xenia-specific VdSwap hook.
|
||||
// VdSwap will post this to tell us we need to swap the screen/fire an
|
||||
// interrupt.
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_XE_SWAP", packet_ptr, packet);
|
||||
// 63 words here, but only the first has any data.
|
||||
reader->TraceData(1);
|
||||
uint32_t frontbuffer_ptr = reader->Read();
|
||||
reader->Advance(count - 1);
|
||||
|
||||
|
@ -868,30 +856,28 @@ bool CommandProcessor::ExecutePacketType3_XE_SWAP(RingbufferReader* reader,
|
|||
// Remove any dead textures, etc.
|
||||
texture_cache_.Scavenge();
|
||||
}
|
||||
|
||||
trace_writer_.WriteEvent(EventType::kSwap);
|
||||
trace_writer_.Flush();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_INDIRECT_BUFFER(
|
||||
RingbufferReader* reader, uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t count) {
|
||||
RingbufferReader* reader, uint32_t packet, uint32_t count) {
|
||||
// indirect buffer dispatch
|
||||
uint32_t list_ptr = reader->Read();
|
||||
uint32_t list_length = reader->Read();
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_INDIRECT_BUFFER %.8X (%dw)", packet_ptr,
|
||||
packet, list_ptr, list_length);
|
||||
ExecuteIndirectBuffer(GpuToCpu(list_ptr), list_length);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_WAIT_REG_MEM(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
|
||||
uint32_t packet,
|
||||
uint32_t count) {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
// wait until a register or memory location is a specific value
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_WAIT_REG_MEM", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
uint32_t wait_info = reader->Read();
|
||||
uint32_t poll_reg_addr = reader->Read();
|
||||
uint32_t ref = reader->Read();
|
||||
|
@ -904,9 +890,9 @@ bool CommandProcessor::ExecutePacketType3_WAIT_REG_MEM(RingbufferReader* reader,
|
|||
// Memory.
|
||||
auto endianness = static_cast<Endian>(poll_reg_addr & 0x3);
|
||||
poll_reg_addr &= ~0x3;
|
||||
value =
|
||||
poly::load<uint32_t>(membase_ + GpuToCpu(packet_ptr, poll_reg_addr));
|
||||
value = poly::load<uint32_t>(membase_ + GpuToCpu(poll_reg_addr));
|
||||
value = GpuSwap(value, endianness);
|
||||
trace_writer_.WriteMemoryRead(poll_reg_addr, 4);
|
||||
} else {
|
||||
// Register.
|
||||
assert_true(poll_reg_addr < RegisterFile::kRegisterCount);
|
||||
|
@ -963,13 +949,11 @@ bool CommandProcessor::ExecutePacketType3_WAIT_REG_MEM(RingbufferReader* reader,
|
|||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_REG_RMW(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
|
||||
uint32_t packet,
|
||||
uint32_t count) {
|
||||
// register read/modify/write
|
||||
// ? (used during shader upload and edram setup)
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_REG_RMW", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
uint32_t rmw_info = reader->Read();
|
||||
uint32_t and_mask = reader->Read();
|
||||
uint32_t or_mask = reader->Read();
|
||||
|
@ -988,17 +972,15 @@ bool CommandProcessor::ExecutePacketType3_REG_RMW(RingbufferReader* reader,
|
|||
// & imm
|
||||
value &= and_mask;
|
||||
}
|
||||
WriteRegister(packet_ptr, rmw_info & 0x1FFF, value);
|
||||
WriteRegister(rmw_info & 0x1FFF, value);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_COND_WRITE(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
|
||||
uint32_t packet,
|
||||
uint32_t count) {
|
||||
// conditional write to memory or register
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_COND_WRITE", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
uint32_t wait_info = reader->Read();
|
||||
uint32_t poll_reg_addr = reader->Read();
|
||||
uint32_t ref = reader->Read();
|
||||
|
@ -1010,8 +992,8 @@ bool CommandProcessor::ExecutePacketType3_COND_WRITE(RingbufferReader* reader,
|
|||
// Memory.
|
||||
auto endianness = static_cast<Endian>(poll_reg_addr & 0x3);
|
||||
poll_reg_addr &= ~0x3;
|
||||
value =
|
||||
poly::load<uint32_t>(membase_ + GpuToCpu(packet_ptr, poll_reg_addr));
|
||||
trace_writer_.WriteMemoryRead(poll_reg_addr, 4);
|
||||
value = poly::load<uint32_t>(membase_ + GpuToCpu(poll_reg_addr));
|
||||
value = GpuSwap(value, endianness);
|
||||
} else {
|
||||
// Register.
|
||||
|
@ -1052,23 +1034,21 @@ bool CommandProcessor::ExecutePacketType3_COND_WRITE(RingbufferReader* reader,
|
|||
auto endianness = static_cast<Endian>(write_reg_addr & 0x3);
|
||||
write_reg_addr &= ~0x3;
|
||||
write_data = GpuSwap(write_data, endianness);
|
||||
poly::store(membase_ + GpuToCpu(packet_ptr, write_reg_addr), write_data);
|
||||
poly::store(membase_ + GpuToCpu(write_reg_addr), write_data);
|
||||
trace_writer_.WriteMemoryWrite(write_reg_addr, 4);
|
||||
} else {
|
||||
// Register.
|
||||
WriteRegister(packet_ptr, write_reg_addr, write_data);
|
||||
WriteRegister(write_reg_addr, write_data);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_EVENT_WRITE(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
|
||||
uint32_t packet,
|
||||
uint32_t count) {
|
||||
// generate an event that creates a write to memory when completed
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_EVENT_WRITE (unimplemented!)", packet_ptr,
|
||||
packet);
|
||||
reader->TraceData(count);
|
||||
uint32_t initiator = reader->Read();
|
||||
if (count == 1) {
|
||||
// Just an event flag? Where does this write?
|
||||
|
@ -1081,16 +1061,13 @@ bool CommandProcessor::ExecutePacketType3_EVENT_WRITE(RingbufferReader* reader,
|
|||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_SHD(
|
||||
RingbufferReader* reader, uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t count) {
|
||||
RingbufferReader* reader, uint32_t packet, uint32_t count) {
|
||||
// generate a VS|PS_done event
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_EVENT_WRITE_SHD", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
uint32_t initiator = reader->Read();
|
||||
uint32_t address = reader->Read();
|
||||
uint32_t value = reader->Read();
|
||||
// Writeback initiator.
|
||||
WriteRegister(packet_ptr, XE_GPU_REG_VGT_EVENT_INITIATOR, initiator & 0x3F);
|
||||
WriteRegister(XE_GPU_REG_VGT_EVENT_INITIATOR, initiator & 0x3F);
|
||||
uint32_t data_value;
|
||||
if ((initiator >> 31) & 0x1) {
|
||||
// Write counter (GPU vblank counter?).
|
||||
|
@ -1103,27 +1080,23 @@ bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_SHD(
|
|||
address &= ~0x3;
|
||||
data_value = GpuSwap(data_value, endianness);
|
||||
poly::store(membase_ + GpuToCpu(address), data_value);
|
||||
trace_writer_.WriteMemoryWrite(address, 4);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_EXT(
|
||||
RingbufferReader* reader, uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t count) {
|
||||
RingbufferReader* reader, uint32_t packet, uint32_t count) {
|
||||
// generate a screen extent event
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_EVENT_WRITE_EXT", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
uint32_t unk0 = reader->Read();
|
||||
uint32_t unk1 = reader->Read();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_DRAW_INDX(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
|
||||
uint32_t packet,
|
||||
uint32_t count) {
|
||||
// initiate fetch of index buffer and draw
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_DRAW_INDX", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
// dword0 = viz query info
|
||||
uint32_t dword0 = reader->Read();
|
||||
uint32_t dword1 = reader->Read();
|
||||
|
@ -1172,12 +1145,10 @@ bool CommandProcessor::ExecutePacketType3_DRAW_INDX(RingbufferReader* reader,
|
|||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_DRAW_INDX_2(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
|
||||
uint32_t packet,
|
||||
uint32_t count) {
|
||||
// draw using supplied indices in packet
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_DRAW_INDX_2", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
uint32_t dword0 = reader->Read();
|
||||
uint32_t index_count = dword0 >> 16;
|
||||
auto prim_type = static_cast<PrimitiveType>(dword0 & 0x3F);
|
||||
|
@ -1198,11 +1169,10 @@ bool CommandProcessor::ExecutePacketType3_DRAW_INDX_2(RingbufferReader* reader,
|
|||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_SET_CONSTANT(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
|
||||
uint32_t packet,
|
||||
uint32_t count) {
|
||||
// load constant into chip and to memory
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_SET_CONSTANT", packet_ptr, packet);
|
||||
// PM4_REG(reg) ((0x4 << 16) | (GSL_HAL_SUBBLOCK_OFFSET(reg)))
|
||||
// reg - 0x2000
|
||||
uint32_t offset_type = reader->Read();
|
||||
|
@ -1213,10 +1183,7 @@ bool CommandProcessor::ExecutePacketType3_SET_CONSTANT(RingbufferReader* reader,
|
|||
index += 0x2000; // registers
|
||||
for (uint32_t n = 0; n < count - 1; n++, index++) {
|
||||
uint32_t data = reader->Read();
|
||||
const char* reg_name = register_file_->GetRegisterName(index);
|
||||
XETRACECP("[%.8X] %.8X -> %.4X %s", packet_ptr + (1 + n) * 4, data,
|
||||
index, reg_name ? reg_name : "");
|
||||
WriteRegister(packet_ptr, index, data);
|
||||
WriteRegister(index, data);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
@ -1227,10 +1194,8 @@ bool CommandProcessor::ExecutePacketType3_SET_CONSTANT(RingbufferReader* reader,
|
|||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_LOAD_ALU_CONSTANT(
|
||||
RingbufferReader* reader, uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t count) {
|
||||
RingbufferReader* reader, uint32_t packet, uint32_t count) {
|
||||
// load constants from memory
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_LOAD_ALU_CONSTANT", packet_ptr, packet);
|
||||
uint32_t address = reader->Read();
|
||||
address &= 0x3FFFFFFF;
|
||||
uint32_t offset_type = reader->Read();
|
||||
|
@ -1238,24 +1203,20 @@ bool CommandProcessor::ExecutePacketType3_LOAD_ALU_CONSTANT(
|
|||
uint32_t size = reader->Read();
|
||||
size &= 0xFFF;
|
||||
index += 0x4000; // alu constants
|
||||
trace_writer_.WriteMemoryRead(address, size * 4);
|
||||
for (uint32_t n = 0; n < size; n++, index++) {
|
||||
uint32_t data = poly::load_and_swap<uint32_t>(
|
||||
membase_ + GpuToCpu(packet_ptr, address + n * 4));
|
||||
const char* reg_name = register_file_->GetRegisterName(index);
|
||||
XETRACECP("[%.8X] %.8X -> %.4X %s", packet_ptr, data, index,
|
||||
reg_name ? reg_name : "");
|
||||
WriteRegister(packet_ptr, index, data);
|
||||
uint32_t data =
|
||||
poly::load_and_swap<uint32_t>(membase_ + GpuToCpu(address + n * 4));
|
||||
WriteRegister(index, data);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_IM_LOAD(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
|
||||
uint32_t packet,
|
||||
uint32_t count) {
|
||||
// load sequencer instruction memory (pointer-based)
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_IM_LOAD", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
uint32_t addr_type = reader->Read();
|
||||
auto shader_type = static_cast<ShaderType>(addr_type & 0x3);
|
||||
uint32_t addr = addr_type & ~0x3;
|
||||
|
@ -1263,18 +1224,16 @@ bool CommandProcessor::ExecutePacketType3_IM_LOAD(RingbufferReader* reader,
|
|||
uint32_t start = start_size >> 16;
|
||||
uint32_t size_dwords = start_size & 0xFFFF; // dwords
|
||||
assert_true(start == 0);
|
||||
trace_writer_.WriteMemoryRead(addr, size_dwords * 4);
|
||||
LoadShader(shader_type,
|
||||
reinterpret_cast<uint32_t*>(membase_ + GpuToCpu(packet_ptr, addr)),
|
||||
reinterpret_cast<uint32_t*>(membase_ + GpuToCpu(addr)),
|
||||
size_dwords);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_IM_LOAD_IMMEDIATE(
|
||||
RingbufferReader* reader, uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t count) {
|
||||
RingbufferReader* reader, uint32_t packet, uint32_t count) {
|
||||
// load sequencer instruction memory (code embedded in packet)
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_IM_LOAD_IMMEDIATE", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
uint32_t dword0 = reader->Read();
|
||||
uint32_t dword1 = reader->Read();
|
||||
auto shader_type = static_cast<ShaderType>(dword0);
|
||||
|
@ -1290,11 +1249,8 @@ bool CommandProcessor::ExecutePacketType3_IM_LOAD_IMMEDIATE(
|
|||
}
|
||||
|
||||
bool CommandProcessor::ExecutePacketType3_INVALIDATE_STATE(
|
||||
RingbufferReader* reader, uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t count) {
|
||||
RingbufferReader* reader, uint32_t packet, uint32_t count) {
|
||||
// selective invalidation of state pointers
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_INVALIDATE_STATE", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
uint32_t mask = reader->Read();
|
||||
// driver_->InvalidateState(mask);
|
||||
return true;
|
||||
|
@ -1382,7 +1338,6 @@ bool CommandProcessor::IssueDraw() {
|
|||
// No framebuffer, so nothing we do will actually have an effect.
|
||||
// Treat it as a no-op.
|
||||
// TODO(benvanik): if we have a vs export, still allow it to go.
|
||||
XETRACECP("No-op draw (no framebuffer set)");
|
||||
draw_batcher_.DiscardDraw();
|
||||
return true;
|
||||
}
|
||||
|
@ -2066,6 +2021,7 @@ CommandProcessor::UpdateStatus CommandProcessor::PopulateIndexBuffer() {
|
|||
: sizeof(uint16_t));
|
||||
auto allocation = scratch_buffer_.Acquire(total_size);
|
||||
|
||||
trace_writer_.WriteMemoryRead(info.guest_base, info.length);
|
||||
if (info.format == IndexFormat::kInt32) {
|
||||
auto dest = reinterpret_cast<uint32_t*>(allocation.host_ptr);
|
||||
auto src = reinterpret_cast<const uint32_t*>(membase_ + info.guest_base);
|
||||
|
@ -2125,6 +2081,8 @@ CommandProcessor::UpdateStatus CommandProcessor::PopulateVertexBuffers() {
|
|||
|
||||
auto allocation = scratch_buffer_.Acquire(valid_range);
|
||||
|
||||
trace_writer_.WriteMemoryRead(fetch->address << 2, valid_range);
|
||||
|
||||
// Copy and byte swap the entire buffer.
|
||||
// We could be smart about this to save GPU bandwidth by building a CRC
|
||||
// as we copy and only if it differs from the previous value committing
|
||||
|
@ -2236,6 +2194,9 @@ CommandProcessor::UpdateStatus CommandProcessor::PopulateSampler(
|
|||
return UpdateStatus::kCompatible; // invalid texture used
|
||||
}
|
||||
|
||||
trace_writer_.WriteMemoryRead(texture_info.guest_address,
|
||||
texture_info.input_length);
|
||||
|
||||
auto entry_view = texture_cache_.Demand(texture_info, sampler_info);
|
||||
if (!entry_view) {
|
||||
// Unable to create/fetch/etc.
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "xenia/gpu/gl4/gl4_shader.h"
|
||||
#include "xenia/gpu/gl4/texture_cache.h"
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/tracing.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/memory.h"
|
||||
|
||||
|
@ -56,12 +57,18 @@ class CommandProcessor {
|
|||
|
||||
bool Initialize(std::unique_ptr<GLContext> context);
|
||||
void Shutdown();
|
||||
void CallInThread(std::function<void()> fn);
|
||||
|
||||
void BeginTracing(const std::wstring& root_path);
|
||||
void EndTracing();
|
||||
|
||||
void InitializeRingBuffer(uint32_t ptr, uint32_t page_count);
|
||||
void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size);
|
||||
|
||||
void UpdateWritePointer(uint32_t value);
|
||||
|
||||
void ExecutePacket(uint32_t ptr, uint32_t count);
|
||||
|
||||
private:
|
||||
class RingbufferReader;
|
||||
|
||||
|
@ -109,7 +116,7 @@ class CommandProcessor {
|
|||
void ShutdownGL();
|
||||
GLuint CreateGeometryProgram(const std::string& source);
|
||||
|
||||
void WriteRegister(uint32_t packet_ptr, uint32_t index, uint32_t value);
|
||||
void WriteRegister(uint32_t index, uint32_t value);
|
||||
void MakeCoherent();
|
||||
void PrepareForWait();
|
||||
void ReturnFromWait();
|
||||
|
@ -117,63 +124,48 @@ class CommandProcessor {
|
|||
void ExecutePrimaryBuffer(uint32_t start_index, uint32_t end_index);
|
||||
void ExecuteIndirectBuffer(uint32_t ptr, uint32_t length);
|
||||
bool ExecutePacket(RingbufferReader* reader);
|
||||
bool ExecutePacketType0(RingbufferReader* reader, uint32_t packet_ptr,
|
||||
uint32_t packet);
|
||||
bool ExecutePacketType1(RingbufferReader* reader, uint32_t packet_ptr,
|
||||
uint32_t packet);
|
||||
bool ExecutePacketType2(RingbufferReader* reader, uint32_t packet_ptr,
|
||||
uint32_t packet);
|
||||
bool ExecutePacketType3(RingbufferReader* reader, uint32_t packet_ptr,
|
||||
uint32_t packet);
|
||||
bool ExecutePacketType3_ME_INIT(RingbufferReader* reader, uint32_t packet_ptr,
|
||||
uint32_t packet, uint32_t count);
|
||||
bool ExecutePacketType3_NOP(RingbufferReader* reader, uint32_t packet_ptr,
|
||||
uint32_t packet, uint32_t count);
|
||||
bool ExecutePacketType3_INTERRUPT(RingbufferReader* reader,
|
||||
uint32_t packet_ptr, uint32_t packet,
|
||||
bool ExecutePacketType0(RingbufferReader* reader, uint32_t packet);
|
||||
bool ExecutePacketType1(RingbufferReader* reader, uint32_t packet);
|
||||
bool ExecutePacketType2(RingbufferReader* reader, uint32_t packet);
|
||||
bool ExecutePacketType3(RingbufferReader* reader, uint32_t packet);
|
||||
bool ExecutePacketType3_ME_INIT(RingbufferReader* reader, uint32_t packet,
|
||||
uint32_t count);
|
||||
bool ExecutePacketType3_NOP(RingbufferReader* reader, uint32_t packet,
|
||||
uint32_t count);
|
||||
bool ExecutePacketType3_INTERRUPT(RingbufferReader* reader, uint32_t packet,
|
||||
uint32_t count);
|
||||
bool ExecutePacketType3_XE_SWAP(RingbufferReader* reader, uint32_t packet_ptr,
|
||||
uint32_t packet, uint32_t count);
|
||||
bool ExecutePacketType3_XE_SWAP(RingbufferReader* reader, uint32_t packet,
|
||||
uint32_t count);
|
||||
bool ExecutePacketType3_INDIRECT_BUFFER(RingbufferReader* reader,
|
||||
uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t count);
|
||||
uint32_t packet, uint32_t count);
|
||||
bool ExecutePacketType3_WAIT_REG_MEM(RingbufferReader* reader,
|
||||
uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t count);
|
||||
bool ExecutePacketType3_REG_RMW(RingbufferReader* reader, uint32_t packet_ptr,
|
||||
uint32_t packet, uint32_t count);
|
||||
bool ExecutePacketType3_COND_WRITE(RingbufferReader* reader,
|
||||
uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t packet, uint32_t count);
|
||||
bool ExecutePacketType3_REG_RMW(RingbufferReader* reader, uint32_t packet,
|
||||
uint32_t count);
|
||||
bool ExecutePacketType3_COND_WRITE(RingbufferReader* reader, uint32_t packet,
|
||||
uint32_t count);
|
||||
bool ExecutePacketType3_EVENT_WRITE(RingbufferReader* reader,
|
||||
uint32_t packet_ptr, uint32_t packet,
|
||||
bool ExecutePacketType3_EVENT_WRITE(RingbufferReader* reader, uint32_t packet,
|
||||
uint32_t count);
|
||||
bool ExecutePacketType3_EVENT_WRITE_SHD(RingbufferReader* reader,
|
||||
uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t count);
|
||||
uint32_t packet, uint32_t count);
|
||||
bool ExecutePacketType3_EVENT_WRITE_EXT(RingbufferReader* reader,
|
||||
uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t count);
|
||||
bool ExecutePacketType3_DRAW_INDX(RingbufferReader* reader,
|
||||
uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t packet, uint32_t count);
|
||||
bool ExecutePacketType3_DRAW_INDX(RingbufferReader* reader, uint32_t packet,
|
||||
uint32_t count);
|
||||
bool ExecutePacketType3_DRAW_INDX_2(RingbufferReader* reader,
|
||||
uint32_t packet_ptr, uint32_t packet,
|
||||
bool ExecutePacketType3_DRAW_INDX_2(RingbufferReader* reader, uint32_t packet,
|
||||
uint32_t count);
|
||||
bool ExecutePacketType3_SET_CONSTANT(RingbufferReader* reader,
|
||||
uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t count);
|
||||
uint32_t packet, uint32_t count);
|
||||
bool ExecutePacketType3_LOAD_ALU_CONSTANT(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
|
||||
uint32_t packet, uint32_t count);
|
||||
bool ExecutePacketType3_IM_LOAD(RingbufferReader* reader, uint32_t packet_ptr,
|
||||
uint32_t packet, uint32_t count);
|
||||
bool ExecutePacketType3_IM_LOAD(RingbufferReader* reader, uint32_t packet,
|
||||
uint32_t count);
|
||||
bool ExecutePacketType3_IM_LOAD_IMMEDIATE(RingbufferReader* reader,
|
||||
uint32_t packet_ptr,
|
||||
|
||||
uint32_t packet, uint32_t count);
|
||||
bool ExecutePacketType3_INVALIDATE_STATE(RingbufferReader* reader,
|
||||
uint32_t packet_ptr, uint32_t packet,
|
||||
uint32_t count);
|
||||
uint32_t packet, uint32_t count);
|
||||
|
||||
bool LoadShader(ShaderType shader_type, const uint32_t* address,
|
||||
uint32_t dword_count);
|
||||
|
@ -206,10 +198,14 @@ class CommandProcessor {
|
|||
GL4GraphicsSystem* graphics_system_;
|
||||
RegisterFile* register_file_;
|
||||
|
||||
TraceWriter trace_writer_;
|
||||
|
||||
std::thread worker_thread_;
|
||||
std::atomic<bool> worker_running_;
|
||||
std::unique_ptr<GLContext> context_;
|
||||
SwapHandler swap_handler_;
|
||||
std::function<void()> pending_fn_;
|
||||
HANDLE pending_fn_event_;
|
||||
|
||||
uint64_t time_base_;
|
||||
uint32_t counter_;
|
||||
|
|
|
@ -47,9 +47,9 @@ void InitializeIfNeeded() {
|
|||
|
||||
void CleanupOnShutdown() {}
|
||||
|
||||
std::unique_ptr<GraphicsSystem> Create(Emulator* emulator) {
|
||||
std::unique_ptr<GraphicsSystem> Create() {
|
||||
InitializeIfNeeded();
|
||||
return std::make_unique<GL4GraphicsSystem>(emulator);
|
||||
return std::make_unique<GL4GraphicsSystem>();
|
||||
}
|
||||
|
||||
} // namespace gl4
|
||||
|
|
|
@ -19,7 +19,7 @@ namespace xe {
|
|||
namespace gpu {
|
||||
namespace gl4 {
|
||||
|
||||
std::unique_ptr<GraphicsSystem> Create(Emulator* emulator);
|
||||
std::unique_ptr<GraphicsSystem> Create();
|
||||
|
||||
} // namespace gl4
|
||||
} // namespace gpu
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include "xenia/gpu/gl4/gl4_gpu-private.h"
|
||||
#include "xenia/gpu/gl4/gl4_profiler_display.h"
|
||||
#include "xenia/gpu/gpu-private.h"
|
||||
#include "xenia/gpu/tracing.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
@ -21,13 +22,15 @@ namespace gl4 {
|
|||
|
||||
extern "C" GLEWContext* glewGetContext();
|
||||
|
||||
GL4GraphicsSystem::GL4GraphicsSystem(Emulator* emulator)
|
||||
: GraphicsSystem(emulator), timer_queue_(nullptr), vsync_timer_(nullptr) {}
|
||||
GL4GraphicsSystem::GL4GraphicsSystem()
|
||||
: GraphicsSystem(), timer_queue_(nullptr), vsync_timer_(nullptr) {}
|
||||
|
||||
GL4GraphicsSystem::~GL4GraphicsSystem() = default;
|
||||
|
||||
X_STATUS GL4GraphicsSystem::Setup() {
|
||||
auto result = GraphicsSystem::Setup();
|
||||
X_STATUS GL4GraphicsSystem::Setup(cpu::Processor* processor,
|
||||
ui::PlatformLoop* target_loop,
|
||||
ui::PlatformWindow* target_window) {
|
||||
auto result = GraphicsSystem::Setup(processor, target_loop, target_window);
|
||||
if (result) {
|
||||
return result;
|
||||
}
|
||||
|
@ -35,14 +38,13 @@ X_STATUS GL4GraphicsSystem::Setup() {
|
|||
// Create rendering control.
|
||||
// This must happen on the UI thread.
|
||||
poly::threading::Fence control_ready_fence;
|
||||
auto loop = emulator_->main_window()->loop();
|
||||
std::unique_ptr<GLContext> processor_context;
|
||||
loop->Post([&]() {
|
||||
target_loop_->Post([&]() {
|
||||
// Setup the GL control that actually does the drawing.
|
||||
// We run here in the loop and only touch it (and its context) on this
|
||||
// thread. That means some sync-fu when we want to swap.
|
||||
control_ = std::make_unique<WGLControl>(loop);
|
||||
emulator_->main_window()->AddChild(control_.get());
|
||||
control_ = std::make_unique<WGLControl>(target_loop_);
|
||||
target_window_->AddChild(control_.get());
|
||||
|
||||
// Setup the GL context the command processor will do all its drawing in.
|
||||
// It's shared with the control context so that we can resolve framebuffers
|
||||
|
@ -70,8 +72,12 @@ X_STATUS GL4GraphicsSystem::Setup() {
|
|||
command_processor_->set_swap_handler(
|
||||
[this](const SwapParameters& swap_params) { SwapHandler(swap_params); });
|
||||
|
||||
if (!FLAGS_trace_gpu.empty()) {
|
||||
command_processor_->BeginTracing(poly::to_wstring(FLAGS_trace_gpu));
|
||||
}
|
||||
|
||||
// Let the processor know we want register access callbacks.
|
||||
emulator_->memory()->AddMappedRange(
|
||||
memory_->AddMappedRange(
|
||||
0x7FC80000, 0xFFFF0000, 0x0000FFFF, this,
|
||||
reinterpret_cast<cpu::MMIOReadCallback>(MMIOReadRegisterThunk),
|
||||
reinterpret_cast<cpu::MMIOWriteCallback>(MMIOWriteRegisterThunk));
|
||||
|
@ -91,6 +97,8 @@ X_STATUS GL4GraphicsSystem::Setup() {
|
|||
}
|
||||
|
||||
void GL4GraphicsSystem::Shutdown() {
|
||||
command_processor_->EndTracing();
|
||||
|
||||
DeleteTimerQueueTimer(timer_queue_, vsync_timer_, nullptr);
|
||||
DeleteTimerQueue(timer_queue_);
|
||||
|
||||
|
@ -114,6 +122,101 @@ void GL4GraphicsSystem::EnableReadPointerWriteBack(uint32_t ptr,
|
|||
command_processor_->EnableReadPointerWriteBack(ptr, block_size);
|
||||
}
|
||||
|
||||
const uint8_t* GL4GraphicsSystem::PlayTrace(const uint8_t* trace_data,
|
||||
size_t trace_size,
|
||||
TracePlaybackMode playback_mode) {
|
||||
auto trace_ptr = trace_data;
|
||||
command_processor_->CallInThread([&]() {
|
||||
bool pending_break = false;
|
||||
const PacketStartCommand* pending_packet = nullptr;
|
||||
while (trace_ptr < trace_data + trace_size) {
|
||||
auto type =
|
||||
static_cast<TraceCommandType>(poly::load<uint32_t>(trace_ptr));
|
||||
switch (type) {
|
||||
case TraceCommandType::kPrimaryBufferStart: {
|
||||
auto cmd =
|
||||
reinterpret_cast<const PrimaryBufferStartCommand*>(trace_ptr);
|
||||
//
|
||||
trace_ptr += sizeof(*cmd) + cmd->count * 4;
|
||||
break;
|
||||
}
|
||||
case TraceCommandType::kPrimaryBufferEnd: {
|
||||
auto cmd =
|
||||
reinterpret_cast<const PrimaryBufferEndCommand*>(trace_ptr);
|
||||
//
|
||||
trace_ptr += sizeof(*cmd);
|
||||
break;
|
||||
}
|
||||
case TraceCommandType::kIndirectBufferStart: {
|
||||
auto cmd =
|
||||
reinterpret_cast<const IndirectBufferStartCommand*>(trace_ptr);
|
||||
//
|
||||
trace_ptr += sizeof(*cmd) + cmd->count * 4;
|
||||
break;
|
||||
}
|
||||
case TraceCommandType::kIndirectBufferEnd: {
|
||||
auto cmd =
|
||||
reinterpret_cast<const IndirectBufferEndCommand*>(trace_ptr);
|
||||
//
|
||||
trace_ptr += sizeof(*cmd);
|
||||
break;
|
||||
}
|
||||
case TraceCommandType::kPacketStart: {
|
||||
auto cmd = reinterpret_cast<const PacketStartCommand*>(trace_ptr);
|
||||
trace_ptr += sizeof(*cmd);
|
||||
std::memcpy(memory()->Translate(cmd->base_ptr), trace_ptr,
|
||||
cmd->count * 4);
|
||||
trace_ptr += cmd->count * 4;
|
||||
pending_packet = cmd;
|
||||
break;
|
||||
}
|
||||
case TraceCommandType::kPacketEnd: {
|
||||
auto cmd = reinterpret_cast<const PacketEndCommand*>(trace_ptr);
|
||||
trace_ptr += sizeof(*cmd);
|
||||
if (pending_packet) {
|
||||
command_processor_->ExecutePacket(pending_packet->base_ptr,
|
||||
pending_packet->count);
|
||||
pending_packet = nullptr;
|
||||
}
|
||||
if (pending_break) {
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TraceCommandType::kMemoryRead: {
|
||||
auto cmd = reinterpret_cast<const MemoryReadCommand*>(trace_ptr);
|
||||
trace_ptr += sizeof(*cmd);
|
||||
std::memcpy(memory()->Translate(cmd->base_ptr), trace_ptr,
|
||||
cmd->length);
|
||||
trace_ptr += cmd->length;
|
||||
break;
|
||||
}
|
||||
case TraceCommandType::kMemoryWrite: {
|
||||
auto cmd = reinterpret_cast<const MemoryWriteCommand*>(trace_ptr);
|
||||
trace_ptr += sizeof(*cmd);
|
||||
// ?
|
||||
trace_ptr += cmd->length;
|
||||
break;
|
||||
}
|
||||
case TraceCommandType::kEvent: {
|
||||
auto cmd = reinterpret_cast<const EventCommand*>(trace_ptr);
|
||||
trace_ptr += sizeof(*cmd);
|
||||
switch (cmd->event_type) {
|
||||
case EventType::kSwap: {
|
||||
if (playback_mode == TracePlaybackMode::kBreakOnSwap) {
|
||||
pending_break = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
return trace_ptr;
|
||||
}
|
||||
|
||||
void GL4GraphicsSystem::MarkVblank() {
|
||||
static bool thread_name_set = false;
|
||||
if (!thread_name_set) {
|
||||
|
@ -147,9 +250,6 @@ void GL4GraphicsSystem::SwapHandler(const SwapParameters& swap_params) {
|
|||
|
||||
uint64_t GL4GraphicsSystem::ReadRegister(uint64_t addr) {
|
||||
uint32_t r = addr & 0xFFFF;
|
||||
if (FLAGS_trace_ring_buffer) {
|
||||
XELOGGPU("ReadRegister(%.4X)", r);
|
||||
}
|
||||
|
||||
switch (r) {
|
||||
case 0x3C00: // ?
|
||||
|
@ -170,9 +270,6 @@ uint64_t GL4GraphicsSystem::ReadRegister(uint64_t addr) {
|
|||
|
||||
void GL4GraphicsSystem::WriteRegister(uint64_t addr, uint64_t value) {
|
||||
uint32_t r = addr & 0xFFFF;
|
||||
if (FLAGS_trace_ring_buffer) {
|
||||
XELOGGPU("WriteRegister(%.4X, %.8X)", r, value);
|
||||
}
|
||||
|
||||
switch (r) {
|
||||
case 0x0714: // CP_RB_WPTR
|
||||
|
|
|
@ -24,10 +24,11 @@ namespace gl4 {
|
|||
|
||||
class GL4GraphicsSystem : public GraphicsSystem {
|
||||
public:
|
||||
GL4GraphicsSystem(Emulator* emulator);
|
||||
GL4GraphicsSystem();
|
||||
~GL4GraphicsSystem() override;
|
||||
|
||||
X_STATUS Setup() override;
|
||||
X_STATUS Setup(cpu::Processor* processor, ui::PlatformLoop* target_loop,
|
||||
ui::PlatformWindow* target_window) override;
|
||||
void Shutdown() override;
|
||||
|
||||
RegisterFile* register_file() { return ®ister_file_; }
|
||||
|
@ -35,6 +36,9 @@ class GL4GraphicsSystem : public GraphicsSystem {
|
|||
void InitializeRingBuffer(uint32_t ptr, uint32_t page_count) override;
|
||||
void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size) override;
|
||||
|
||||
const uint8_t* PlayTrace(const uint8_t* trace_data, size_t trace_size,
|
||||
TracePlaybackMode playback_mode) override;
|
||||
|
||||
private:
|
||||
void MarkVblank();
|
||||
void SwapHandler(const SwapParameters& swap_params);
|
||||
|
|
|
@ -14,7 +14,8 @@
|
|||
|
||||
DECLARE_string(gpu);
|
||||
|
||||
DECLARE_bool(trace_ring_buffer);
|
||||
DECLARE_string(trace_gpu);
|
||||
|
||||
DECLARE_string(dump_shaders);
|
||||
|
||||
DECLARE_bool(vsync);
|
||||
|
|
|
@ -15,7 +15,8 @@
|
|||
|
||||
DEFINE_string(gpu, "any", "Graphics system. Use: [any, gl4]");
|
||||
|
||||
DEFINE_bool(trace_ring_buffer, false, "Trace GPU ring buffer packets.");
|
||||
DEFINE_string(trace_gpu, "", "Trace GPU data to the given root path.");
|
||||
|
||||
DEFINE_string(dump_shaders, "",
|
||||
"Path to write GPU shaders to as they are compiled.");
|
||||
|
||||
|
@ -24,14 +25,14 @@ DEFINE_bool(vsync, true, "Enable VSYNC.");
|
|||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
std::unique_ptr<GraphicsSystem> Create(Emulator* emulator) {
|
||||
std::unique_ptr<GraphicsSystem> Create() {
|
||||
if (FLAGS_gpu.compare("gl4") == 0) {
|
||||
return xe::gpu::gl4::Create(emulator);
|
||||
return xe::gpu::gl4::Create();
|
||||
} else {
|
||||
// Create best available.
|
||||
std::unique_ptr<GraphicsSystem> best;
|
||||
|
||||
best = xe::gpu::gl4::Create(emulator);
|
||||
best = xe::gpu::gl4::Create();
|
||||
if (best) {
|
||||
return best;
|
||||
}
|
||||
|
|
|
@ -21,9 +21,9 @@ class Emulator;
|
|||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
std::unique_ptr<GraphicsSystem> Create(Emulator* emulator);
|
||||
std::unique_ptr<GraphicsSystem> Create();
|
||||
|
||||
std::unique_ptr<GraphicsSystem> CreateGL4(Emulator* emulator);
|
||||
std::unique_ptr<GraphicsSystem> CreateGL4();
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
|
|
@ -10,23 +10,29 @@
|
|||
#include "xenia/gpu/graphics_system.h"
|
||||
|
||||
#include "poly/poly.h"
|
||||
#include "xenia/emulator.h"
|
||||
#include "xenia/cpu/processor.h"
|
||||
#include "xenia/gpu/gpu-private.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
GraphicsSystem::GraphicsSystem(Emulator* emulator)
|
||||
: emulator_(emulator),
|
||||
memory_(emulator->memory()),
|
||||
GraphicsSystem::GraphicsSystem()
|
||||
: memory_(nullptr),
|
||||
processor_(nullptr),
|
||||
target_loop_(nullptr),
|
||||
target_window_(nullptr),
|
||||
interrupt_callback_(0),
|
||||
interrupt_callback_data_(0) {}
|
||||
|
||||
GraphicsSystem::~GraphicsSystem() {}
|
||||
GraphicsSystem::~GraphicsSystem() = default;
|
||||
|
||||
X_STATUS GraphicsSystem::Setup() {
|
||||
processor_ = emulator_->processor();
|
||||
X_STATUS GraphicsSystem::Setup(cpu::Processor* processor,
|
||||
ui::PlatformLoop* target_loop,
|
||||
ui::PlatformWindow* target_window) {
|
||||
processor_ = processor;
|
||||
memory_ = processor->memory();
|
||||
target_loop_ = target_loop;
|
||||
target_window_ = target_window;
|
||||
|
||||
return X_STATUS_SUCCESS;
|
||||
}
|
||||
|
|
|
@ -14,7 +14,9 @@
|
|||
#include <thread>
|
||||
|
||||
#include "xenia/common.h"
|
||||
#include "xenia/emulator.h"
|
||||
#include "xenia/cpu/processor.h"
|
||||
#include "xenia/memory.h"
|
||||
#include "xenia/ui/main_window.h"
|
||||
#include "xenia/xbox.h"
|
||||
|
||||
namespace xe {
|
||||
|
@ -24,25 +26,37 @@ class GraphicsSystem {
|
|||
public:
|
||||
virtual ~GraphicsSystem();
|
||||
|
||||
Emulator* emulator() const { return emulator_; }
|
||||
Memory* memory() const { return memory_; }
|
||||
cpu::Processor* processor() const { return processor_; }
|
||||
|
||||
virtual X_STATUS Setup();
|
||||
virtual X_STATUS Setup(cpu::Processor* processor,
|
||||
ui::PlatformLoop* target_loop,
|
||||
ui::PlatformWindow* target_window);
|
||||
virtual void Shutdown();
|
||||
|
||||
void SetInterruptCallback(uint32_t callback, uint32_t user_data);
|
||||
virtual void InitializeRingBuffer(uint32_t ptr, uint32_t page_count) = 0;
|
||||
virtual void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size) = 0;
|
||||
virtual void EnableReadPointerWriteBack(uint32_t ptr,
|
||||
uint32_t block_size) = 0;
|
||||
|
||||
void DispatchInterruptCallback(uint32_t source, uint32_t cpu);
|
||||
|
||||
protected:
|
||||
GraphicsSystem(Emulator* emulator);
|
||||
enum class TracePlaybackMode {
|
||||
kUntilEnd,
|
||||
kBreakOnSwap,
|
||||
};
|
||||
virtual const uint8_t* PlayTrace(const uint8_t* trace_data, size_t trace_size,
|
||||
TracePlaybackMode playback_mode) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
protected:
|
||||
GraphicsSystem();
|
||||
|
||||
Emulator* emulator_;
|
||||
Memory* memory_;
|
||||
cpu::Processor* processor_;
|
||||
ui::PlatformLoop* target_loop_;
|
||||
ui::PlatformWindow* target_window_;
|
||||
|
||||
uint32_t interrupt_callback_;
|
||||
uint32_t interrupt_callback_data_;
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
'shader.h',
|
||||
'texture_info.cc',
|
||||
'texture_info.h',
|
||||
'tracing.h',
|
||||
'ucode.h',
|
||||
'ucode_disassembler.cc',
|
||||
'ucode_disassembler.h',
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2015 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include <gflags/gflags.h>
|
||||
#include "poly/main.h"
|
||||
#include "poly/mapped_memory.h"
|
||||
#include "xenia/gpu/graphics_system.h"
|
||||
#include "xenia/gpu/tracing.h"
|
||||
#include "xenia/emulator.h"
|
||||
#include "xenia/ui/main_window.h"
|
||||
|
||||
DEFINE_string(target_trace_file, "", "Specifies the trace file to load.");
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
int trace_viewer_main(std::vector<std::wstring>& args) {
|
||||
// Create the emulator.
|
||||
auto emulator = std::make_unique<Emulator>(L"");
|
||||
X_STATUS result = emulator->Setup();
|
||||
if (XFAILED(result)) {
|
||||
XELOGE("Failed to setup emulator: %.8X", result);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Grab path from the flag or unnamed argument.
|
||||
if (!FLAGS_target_trace_file.empty() || args.size() >= 2) {
|
||||
std::wstring path;
|
||||
if (!FLAGS_target_trace_file.empty()) {
|
||||
// Passed as a named argument.
|
||||
// TODO(benvanik): find something better than gflags that supports
|
||||
// unicode.
|
||||
path = poly::to_wstring(FLAGS_target_trace_file);
|
||||
} else {
|
||||
// Passed as an unnamed argument.
|
||||
path = args[1];
|
||||
}
|
||||
// Normalize the path and make absolute.
|
||||
std::wstring abs_path = poly::to_absolute_path(path);
|
||||
|
||||
// TODO(benvanik): UI? replay control on graphics system?
|
||||
auto graphics_system = emulator->graphics_system();
|
||||
auto mmap =
|
||||
poly::MappedMemory::Open(abs_path, poly::MappedMemory::Mode::kRead);
|
||||
auto trace_data = reinterpret_cast<const uint8_t*>(mmap->data());
|
||||
auto trace_size = mmap->size();
|
||||
|
||||
auto trace_ptr = trace_data;
|
||||
while (trace_ptr < trace_data + trace_size) {
|
||||
trace_ptr = graphics_system->PlayTrace(
|
||||
trace_ptr, trace_size - (trace_ptr - trace_data),
|
||||
GraphicsSystem::TracePlaybackMode::kBreakOnSwap);
|
||||
}
|
||||
|
||||
// Wait until we are exited.
|
||||
emulator->main_window()->loop()->AwaitQuit();
|
||||
}
|
||||
|
||||
emulator.reset();
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
DEFINE_ENTRY_POINT(L"gpu_trace_viewer", L"gpu_trace_viewer some.trace",
|
||||
xe::gpu::trace_viewer_main);
|
|
@ -0,0 +1,211 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2015 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_TRACING_H_
|
||||
#define XENIA_GPU_TRACING_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "xenia/memory.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
enum class TraceCommandType : uint32_t {
|
||||
kPrimaryBufferStart,
|
||||
kPrimaryBufferEnd,
|
||||
kIndirectBufferStart,
|
||||
kIndirectBufferEnd,
|
||||
kPacketStart,
|
||||
kPacketEnd,
|
||||
kMemoryRead,
|
||||
kMemoryWrite,
|
||||
kEvent,
|
||||
};
|
||||
|
||||
struct PrimaryBufferStartCommand {
|
||||
TraceCommandType type;
|
||||
uint32_t base_ptr;
|
||||
uint32_t count;
|
||||
};
|
||||
|
||||
struct PrimaryBufferEndCommand {
|
||||
TraceCommandType type;
|
||||
};
|
||||
|
||||
struct IndirectBufferStartCommand {
|
||||
TraceCommandType type;
|
||||
uint32_t base_ptr;
|
||||
uint32_t count;
|
||||
};
|
||||
|
||||
struct IndirectBufferEndCommand {
|
||||
TraceCommandType type;
|
||||
};
|
||||
|
||||
struct PacketStartCommand {
|
||||
TraceCommandType type;
|
||||
uint32_t base_ptr;
|
||||
uint32_t count;
|
||||
};
|
||||
|
||||
struct PacketEndCommand {
|
||||
TraceCommandType type;
|
||||
};
|
||||
|
||||
struct MemoryReadCommand {
|
||||
TraceCommandType type;
|
||||
uint32_t base_ptr;
|
||||
uint32_t length;
|
||||
};
|
||||
|
||||
struct MemoryWriteCommand {
|
||||
TraceCommandType type;
|
||||
uint32_t base_ptr;
|
||||
uint32_t length;
|
||||
};
|
||||
|
||||
enum class EventType {
|
||||
kSwap,
|
||||
};
|
||||
|
||||
struct EventCommand {
|
||||
TraceCommandType type;
|
||||
EventType event_type;
|
||||
};
|
||||
|
||||
class TraceWriter {
|
||||
public:
|
||||
TraceWriter(uint8_t* membase) : membase_(membase), file_(nullptr) {}
|
||||
~TraceWriter() = default;
|
||||
|
||||
bool Open(const std::wstring& path) {
|
||||
Close();
|
||||
file_ = _wfopen(path.c_str(), L"wb");
|
||||
return file_ != nullptr;
|
||||
}
|
||||
|
||||
void Flush() {
|
||||
if (file_) {
|
||||
fflush(file_);
|
||||
}
|
||||
}
|
||||
|
||||
void Close() {
|
||||
if (file_) {
|
||||
fflush(file_);
|
||||
fclose(file_);
|
||||
file_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void WritePrimaryBufferStart(uint32_t base_ptr, uint32_t count) {
|
||||
if (!file_) {
|
||||
return;
|
||||
}
|
||||
auto cmd = PrimaryBufferStartCommand({
|
||||
TraceCommandType::kPrimaryBufferStart, base_ptr, count,
|
||||
});
|
||||
fwrite(&cmd, 1, sizeof(cmd), file_);
|
||||
fwrite(membase_ + base_ptr, 4, count, file_);
|
||||
}
|
||||
|
||||
void WritePrimaryBufferEnd() {
|
||||
if (!file_) {
|
||||
return;
|
||||
}
|
||||
auto cmd = PrimaryBufferEndCommand({
|
||||
TraceCommandType::kPrimaryBufferEnd,
|
||||
});
|
||||
fwrite(&cmd, 1, sizeof(cmd), file_);
|
||||
}
|
||||
|
||||
void WriteIndirectBufferStart(uint32_t base_ptr, uint32_t count) {
|
||||
if (!file_) {
|
||||
return;
|
||||
}
|
||||
auto cmd = IndirectBufferStartCommand({
|
||||
TraceCommandType::kIndirectBufferStart, base_ptr, count,
|
||||
});
|
||||
fwrite(&cmd, 1, sizeof(cmd), file_);
|
||||
fwrite(membase_ + base_ptr, 4, count, file_);
|
||||
}
|
||||
|
||||
void WriteIndirectBufferEnd() {
|
||||
if (!file_) {
|
||||
return;
|
||||
}
|
||||
auto cmd = IndirectBufferEndCommand({
|
||||
TraceCommandType::kIndirectBufferEnd,
|
||||
});
|
||||
fwrite(&cmd, 1, sizeof(cmd), file_);
|
||||
}
|
||||
|
||||
void WritePacketStart(uint32_t base_ptr, uint32_t count) {
|
||||
if (!file_) {
|
||||
return;
|
||||
}
|
||||
auto cmd = PacketStartCommand({
|
||||
TraceCommandType::kPacketStart, base_ptr, count,
|
||||
});
|
||||
fwrite(&cmd, 1, sizeof(cmd), file_);
|
||||
fwrite(membase_ + base_ptr, 4, count, file_);
|
||||
}
|
||||
|
||||
void WritePacketEnd() {
|
||||
if (!file_) {
|
||||
return;
|
||||
}
|
||||
auto cmd = PacketEndCommand({
|
||||
TraceCommandType::kPacketEnd,
|
||||
});
|
||||
fwrite(&cmd, 1, sizeof(cmd), file_);
|
||||
}
|
||||
|
||||
void WriteMemoryRead(uint32_t base_ptr, size_t length) {
|
||||
if (!file_) {
|
||||
return;
|
||||
}
|
||||
auto cmd = MemoryReadCommand({
|
||||
TraceCommandType::kMemoryRead, base_ptr, uint32_t(length),
|
||||
});
|
||||
fwrite(&cmd, 1, sizeof(cmd), file_);
|
||||
fwrite(membase_ + base_ptr, 1, length, file_);
|
||||
}
|
||||
|
||||
void WriteMemoryWrite(uint32_t base_ptr, size_t length) {
|
||||
if (!file_) {
|
||||
return;
|
||||
}
|
||||
auto cmd = MemoryWriteCommand({
|
||||
TraceCommandType::kMemoryWrite, base_ptr, uint32_t(length),
|
||||
});
|
||||
fwrite(&cmd, 1, sizeof(cmd), file_);
|
||||
fwrite(membase_ + base_ptr, 1, length, file_);
|
||||
}
|
||||
|
||||
void WriteEvent(EventType event_type) {
|
||||
if (!file_) {
|
||||
return;
|
||||
}
|
||||
auto cmd = EventCommand({
|
||||
TraceCommandType::kEvent, event_type,
|
||||
});
|
||||
fwrite(&cmd, 1, sizeof(cmd), file_);
|
||||
}
|
||||
|
||||
private:
|
||||
uint8_t* membase_;
|
||||
FILE* file_;
|
||||
};
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_TRACING_H_
|
|
@ -234,15 +234,6 @@ inline uint32_t GpuToCpu(uint32_t p) {
|
|||
return p;
|
||||
}
|
||||
|
||||
inline uint32_t GpuToCpu(uint32_t base, uint32_t p) {
|
||||
// Some AMD docs say relative to base ptr, some say just this.
|
||||
// Some games use some crazy shift magic, but it seems to nop.
|
||||
uint32_t upper = 0;//base & 0xFF000000;
|
||||
//uint32_t lower = p & 0x01FFFFFF;
|
||||
uint32_t lower = p;
|
||||
return upper + lower;// -(((base >> 20) + 0x200) & 0x1000);
|
||||
}
|
||||
|
||||
// XE_GPU_REG_SQ_PROGRAM_CNTL
|
||||
typedef union {
|
||||
XEPACKEDSTRUCTANONYMOUS({
|
||||
|
|
|
@ -24,7 +24,8 @@ DiscImageDevice::DiscImageDevice(const std::string& path,
|
|||
DiscImageDevice::~DiscImageDevice() { delete gdfx_; }
|
||||
|
||||
int DiscImageDevice::Init() {
|
||||
mmap_ = poly::MappedMemory::Open(local_path_, poly::MappedMemory::Mode::READ);
|
||||
mmap_ =
|
||||
poly::MappedMemory::Open(local_path_, poly::MappedMemory::Mode::kRead);
|
||||
if (!mmap_) {
|
||||
XELOGE("Disc image could not be mapped");
|
||||
return 1;
|
||||
|
|
|
@ -125,8 +125,8 @@ std::unique_ptr<MemoryMapping> HostPathEntry::CreateMemoryMapping(
|
|||
Mode map_mode, const size_t offset, const size_t length) {
|
||||
auto mmap = poly::MappedMemory::Open(
|
||||
local_path_,
|
||||
map_mode == Mode::READ ? poly::MappedMemory::Mode::READ
|
||||
: poly::MappedMemory::Mode::READ_WRITE,
|
||||
map_mode == Mode::READ ? poly::MappedMemory::Mode::kRead
|
||||
: poly::MappedMemory::Mode::kReadWrite,
|
||||
offset, length);
|
||||
if (!mmap) {
|
||||
return nullptr;
|
||||
|
|
|
@ -25,7 +25,8 @@ STFSContainerDevice::STFSContainerDevice(const std::string& path,
|
|||
STFSContainerDevice::~STFSContainerDevice() { delete stfs_; }
|
||||
|
||||
int STFSContainerDevice::Init() {
|
||||
mmap_ = poly::MappedMemory::Open(local_path_, poly::MappedMemory::Mode::READ);
|
||||
mmap_ =
|
||||
poly::MappedMemory::Open(local_path_, poly::MappedMemory::Mode::kRead);
|
||||
if (!mmap_) {
|
||||
XELOGE("STFS container could not be mapped");
|
||||
return 1;
|
||||
|
|
|
@ -30,9 +30,9 @@ int xenia_main(std::vector<std::wstring>& args) {
|
|||
}
|
||||
|
||||
// Grab path from the flag or unnamed argument.
|
||||
if (FLAGS_target.size() || args.size() >= 2) {
|
||||
if (!FLAGS_target.empty() || args.size() >= 2) {
|
||||
std::wstring path;
|
||||
if (FLAGS_target.size()) {
|
||||
if (!FLAGS_target.empty()) {
|
||||
// Passed as a named argument.
|
||||
// TODO(benvanik): find something better than gflags that supports
|
||||
// unicode.
|
||||
|
@ -49,10 +49,10 @@ int xenia_main(std::vector<std::wstring>& args) {
|
|||
XELOGE("Failed to launch target: %.8X", result);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Wait until we are exited.
|
||||
emulator->main_window()->loop()->AwaitQuit();
|
||||
// Wait until we are exited.
|
||||
emulator->main_window()->loop()->AwaitQuit();
|
||||
}
|
||||
|
||||
emulator.reset();
|
||||
Profiler::Dump();
|
||||
|
|
23
xenia.gyp
23
xenia.gyp
|
@ -470,5 +470,28 @@
|
|||
'src/xenia/xenia_main.cc',
|
||||
],
|
||||
},
|
||||
|
||||
{
|
||||
'target_name': 'gpu-trace-viewer',
|
||||
'type': 'executable',
|
||||
|
||||
'msvs_settings': {
|
||||
'VCLinkerTool': {
|
||||
'SubSystem': '2'
|
||||
},
|
||||
},
|
||||
|
||||
'dependencies': [
|
||||
'libxenia',
|
||||
],
|
||||
|
||||
'include_dirs': [
|
||||
'.',
|
||||
],
|
||||
|
||||
'sources': [
|
||||
'src/xenia/gpu/trace_viewer_main.cc',
|
||||
],
|
||||
},
|
||||
],
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue