Cleaning up packet reading/endianness.

This commit is contained in:
Ben Vanik 2013-10-22 20:47:27 -07:00
parent 725cde55d4
commit e0b6d0f689
2 changed files with 61 additions and 51 deletions

View File

@ -185,10 +185,10 @@ void RingBufferWorker::AdvancePtr(PacketArgs& args, uint32_t n) {
} }
} }
#define ADVANCE_PTR(n) AdvancePtr(args, n) #define ADVANCE_PTR(n) AdvancePtr(args, n)
#define PEEK_PTR() \
XEGETUINT32BE(p + args.ptr)
#define READ_PTR() \ #define READ_PTR() \
XEGETUINT32BE(p + args.ptr) XEGETUINT32BE(p + args.ptr); ADVANCE_PTR(1);
#define READ_AND_ADVANCE_PTR() \
XEGETUINT32BE(p + args.ptr); ADVANCE_PTR(1);
uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) { uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
uint8_t* p = xe_memory_addr(memory_); uint8_t* p = xe_memory_addr(memory_);
@ -196,7 +196,7 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
uint32_t packet_ptr = args.ptr; uint32_t packet_ptr = args.ptr;
const uint8_t* packet_base = p + packet_ptr; const uint8_t* packet_base = p + packet_ptr;
const uint32_t packet = READ_PTR(); const uint32_t packet = PEEK_PTR();
ADVANCE_PTR(1); ADVANCE_PTR(1);
const uint32_t packet_type = packet >> 30; const uint32_t packet_type = packet >> 30;
if (packet == 0) { if (packet == 0) {
@ -217,7 +217,7 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
uint32_t base_index = (packet & 0x7FFF); uint32_t base_index = (packet & 0x7FFF);
uint32_t write_one_reg = (packet >> 15) & 0x1; uint32_t write_one_reg = (packet >> 15) & 0x1;
for (uint32_t m = 0; m < count; m++) { for (uint32_t m = 0; m < count; m++) {
uint32_t reg_data = READ_PTR(); uint32_t reg_data = PEEK_PTR();
uint32_t target_index = write_one_reg ? base_index : base_index + m; uint32_t target_index = write_one_reg ? base_index : base_index + m;
const char* reg_name = xenos::GetRegisterName(target_index); const char* reg_name = xenos::GetRegisterName(target_index);
XELOGGPU("[%.8X] %.8X -> %.4X %s", XELOGGPU("[%.8X] %.8X -> %.4X %s",
@ -238,9 +238,9 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
uint32_t reg_index_1 = packet & 0x7FF; uint32_t reg_index_1 = packet & 0x7FF;
uint32_t reg_index_2 = (packet >> 11) & 0x7FF; uint32_t reg_index_2 = (packet >> 11) & 0x7FF;
uint32_t reg_ptr_1 = args.ptr; uint32_t reg_ptr_1 = args.ptr;
uint32_t reg_data_1 = READ_AND_ADVANCE_PTR(); uint32_t reg_data_1 = READ_PTR();
uint32_t reg_ptr_2 = args.ptr; uint32_t reg_ptr_2 = args.ptr;
uint32_t reg_data_2 = READ_AND_ADVANCE_PTR(); uint32_t reg_data_2 = READ_PTR();
const char* reg_name_1 = xenos::GetRegisterName(reg_index_1); const char* reg_name_1 = xenos::GetRegisterName(reg_index_1);
const char* reg_name_2 = xenos::GetRegisterName(reg_index_2); const char* reg_name_2 = xenos::GetRegisterName(reg_index_2);
XELOGGPU("[%.8X] %.8X -> %.4X %s", XELOGGPU("[%.8X] %.8X -> %.4X %s",
@ -291,7 +291,7 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
XELOGGPU("[%.8X] Packet(%.8X): PM4_INTERRUPT", XELOGGPU("[%.8X] Packet(%.8X): PM4_INTERRUPT",
packet_ptr, packet); packet_ptr, packet);
LOG_DATA(count); LOG_DATA(count);
uint32_t cpu_mask = READ_AND_ADVANCE_PTR(); uint32_t cpu_mask = READ_PTR();
for (int n = 0; n < 6; n++) { for (int n = 0; n < 6; n++) {
if (cpu_mask & (1 << n)) { if (cpu_mask & (1 << n)) {
graphics_system_->DispatchInterruptCallback(1, n); graphics_system_->DispatchInterruptCallback(1, n);
@ -303,8 +303,8 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
case PM4_INDIRECT_BUFFER: case PM4_INDIRECT_BUFFER:
// indirect buffer dispatch // indirect buffer dispatch
{ {
uint32_t list_ptr = READ_AND_ADVANCE_PTR(); uint32_t list_ptr = READ_PTR();
uint32_t list_length = READ_AND_ADVANCE_PTR(); uint32_t list_length = READ_PTR();
XELOGGPU("[%.8X] Packet(%.8X): PM4_INDIRECT_BUFFER %.8X (%dw)", XELOGGPU("[%.8X] Packet(%.8X): PM4_INDIRECT_BUFFER %.8X (%dw)",
packet_ptr, packet, list_ptr, list_length); packet_ptr, packet, list_ptr, list_length);
ExecuteIndirectBuffer(list_ptr, list_length); ExecuteIndirectBuffer(list_ptr, list_length);
@ -317,18 +317,19 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
XELOGGPU("[%.8X] Packet(%.8X): PM4_WAIT_REG_MEM", XELOGGPU("[%.8X] Packet(%.8X): PM4_WAIT_REG_MEM",
packet_ptr, packet); packet_ptr, packet);
LOG_DATA(count); LOG_DATA(count);
uint32_t wait_info = READ_AND_ADVANCE_PTR(); uint32_t wait_info = READ_PTR();
uint32_t poll_reg_addr = READ_AND_ADVANCE_PTR(); uint32_t poll_reg_addr = READ_PTR();
uint32_t ref = READ_AND_ADVANCE_PTR(); uint32_t ref = READ_PTR();
uint32_t mask = READ_AND_ADVANCE_PTR(); uint32_t mask = READ_PTR();
uint32_t wait = READ_AND_ADVANCE_PTR(); uint32_t wait = READ_PTR();
bool matched = false; bool matched = false;
do { do {
uint32_t value; uint32_t value;
if (wait_info & 0x10) { if (wait_info & 0x10) {
// Memory. // Memory.
XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(poll_reg_addr & 0x3);
poll_reg_addr &= ~0x3;
value = XEGETUINT32LE(p + TRANSLATE_ADDR(poll_reg_addr)); value = XEGETUINT32LE(p + TRANSLATE_ADDR(poll_reg_addr));
uint32_t endianness = poll_reg_addr & 0x3;
value = GpuSwap(value, endianness); value = GpuSwap(value, endianness);
} else { } else {
// Register. // Register.
@ -380,9 +381,9 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
XELOGGPU("[%.8X] Packet(%.8X): PM4_REG_RMW", XELOGGPU("[%.8X] Packet(%.8X): PM4_REG_RMW",
packet_ptr, packet); packet_ptr, packet);
LOG_DATA(count); LOG_DATA(count);
uint32_t rmw_info = READ_AND_ADVANCE_PTR(); uint32_t rmw_info = READ_PTR();
uint32_t and_mask = READ_AND_ADVANCE_PTR(); uint32_t and_mask = READ_PTR();
uint32_t or_mask = READ_AND_ADVANCE_PTR(); uint32_t or_mask = READ_PTR();
uint32_t value = regs->values[rmw_info & 0x1FFF].u32; uint32_t value = regs->values[rmw_info & 0x1FFF].u32;
if ((rmw_info >> 30) & 0x1) { if ((rmw_info >> 30) & 0x1) {
// | reg // | reg
@ -408,17 +409,18 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
XELOGGPU("[%.8X] Packet(%.8X): PM4_COND_WRITE", XELOGGPU("[%.8X] Packet(%.8X): PM4_COND_WRITE",
packet_ptr, packet); packet_ptr, packet);
LOG_DATA(count); LOG_DATA(count);
uint32_t wait_info = READ_AND_ADVANCE_PTR(); uint32_t wait_info = READ_PTR();
uint32_t poll_reg_addr = READ_AND_ADVANCE_PTR(); uint32_t poll_reg_addr = READ_PTR();
uint32_t ref = READ_AND_ADVANCE_PTR(); uint32_t ref = READ_PTR();
uint32_t mask = READ_AND_ADVANCE_PTR(); uint32_t mask = READ_PTR();
uint32_t write_reg_addr = READ_AND_ADVANCE_PTR(); uint32_t write_reg_addr = READ_PTR();
uint32_t write_data = READ_AND_ADVANCE_PTR(); uint32_t write_data = READ_PTR();
uint32_t value; uint32_t value;
if (wait_info & 0x10) { if (wait_info & 0x10) {
// Memory. // Memory.
value = XEGETUINT32LE(p + TRANSLATE_ADDR(poll_reg_addr)); value = XEGETUINT32LE(p + TRANSLATE_ADDR(poll_reg_addr));
uint32_t endianness = poll_reg_addr & 0x3; XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(poll_reg_addr & 0x3);
poll_reg_addr &= ~0x3;
value = GpuSwap(value, endianness); value = GpuSwap(value, endianness);
} else { } else {
// Register. // Register.
@ -456,7 +458,8 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
// Write. // Write.
if (wait_info & 0x100) { if (wait_info & 0x100) {
// Memory. // Memory.
uint32_t endianness = write_reg_addr & 0x3; XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(write_reg_addr & 0x3);
write_reg_addr &= ~0x3;
write_data = GpuSwap(write_data, endianness); write_data = GpuSwap(write_data, endianness);
XESETUINT32LE(p + TRANSLATE_ADDR(write_reg_addr), write_data); XESETUINT32LE(p + TRANSLATE_ADDR(write_reg_addr), write_data);
} else { } else {
@ -473,7 +476,7 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
XELOGGPU("[%.8X] Packet(%.8X): PM4_EVENT_WRITE (unimplemented!)", XELOGGPU("[%.8X] Packet(%.8X): PM4_EVENT_WRITE (unimplemented!)",
packet_ptr, packet); packet_ptr, packet);
LOG_DATA(count); LOG_DATA(count);
uint32_t initiator = READ_AND_ADVANCE_PTR(); uint32_t initiator = READ_PTR();
if (count == 1) { if (count == 1) {
// Just an event flag? Where does this write? // Just an event flag? Where does this write?
} else { } else {
@ -489,9 +492,9 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
XELOGGPU("[%.8X] Packet(%.8X): PM4_EVENT_WRITE_SHD", XELOGGPU("[%.8X] Packet(%.8X): PM4_EVENT_WRITE_SHD",
packet_ptr, packet); packet_ptr, packet);
LOG_DATA(count); LOG_DATA(count);
uint32_t initiator = READ_AND_ADVANCE_PTR(); uint32_t initiator = READ_PTR();
uint32_t address = READ_AND_ADVANCE_PTR(); uint32_t address = READ_PTR();
uint32_t value = READ_AND_ADVANCE_PTR(); uint32_t value = READ_PTR();
// Writeback initiator. // Writeback initiator.
WriteRegister(XE_GPU_REG_VGT_EVENT_INITIATOR, initiator & 0x1F); WriteRegister(XE_GPU_REG_VGT_EVENT_INITIATOR, initiator & 0x1F);
uint32_t data_value; uint32_t data_value;
@ -502,7 +505,7 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
// Write value. // Write value.
data_value = value; data_value = value;
} }
uint32_t endianness = address & 0x3; XE_GPU_ENDIAN endianness = (XE_GPU_ENDIAN)(address & 0x3);
address &= ~0x3; address &= ~0x3;
data_value = GpuSwap(data_value, endianness); data_value = GpuSwap(data_value, endianness);
XESETUINT32LE(p + TRANSLATE_ADDR(address), data_value); XESETUINT32LE(p + TRANSLATE_ADDR(address), data_value);
@ -516,14 +519,14 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
packet_ptr, packet); packet_ptr, packet);
LOG_DATA(count); LOG_DATA(count);
// d0 = viz query info // d0 = viz query info
uint32_t d0 = READ_AND_ADVANCE_PTR(); uint32_t d0 = READ_PTR();
uint32_t d1 = READ_AND_ADVANCE_PTR(); uint32_t d1 = READ_PTR();
uint32_t index_count = d1 >> 16; uint32_t index_count = d1 >> 16;
uint32_t prim_type = d1 & 0x3F; uint32_t prim_type = d1 & 0x3F;
uint32_t src_sel = (d1 >> 6) & 0x3; uint32_t src_sel = (d1 >> 6) & 0x3;
if (src_sel == 0x0) { if (src_sel == 0x0) {
uint32_t index_base = READ_AND_ADVANCE_PTR(); uint32_t index_base = READ_PTR();
uint32_t index_size = READ_AND_ADVANCE_PTR(); uint32_t index_size = READ_PTR();
uint32_t endianness = index_size >> 29; uint32_t endianness = index_size >> 29;
index_size &= 0x00FFFFFF; index_size &= 0x00FFFFFF;
bool index_32bit = (d1 >> 11) & 0x1; bool index_32bit = (d1 >> 11) & 0x1;
@ -547,7 +550,7 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
XELOGGPU("[%.8X] Packet(%.8X): PM4_DRAW_INDX_2", XELOGGPU("[%.8X] Packet(%.8X): PM4_DRAW_INDX_2",
packet_ptr, packet); packet_ptr, packet);
LOG_DATA(count); LOG_DATA(count);
uint32_t d0 = READ_AND_ADVANCE_PTR(); uint32_t d0 = READ_PTR();
uint32_t index_count = d0 >> 16; uint32_t index_count = d0 >> 16;
uint32_t prim_type = d0 & 0x3F; uint32_t prim_type = d0 & 0x3F;
uint32_t src_sel = (d0 >> 6) & 0x3; uint32_t src_sel = (d0 >> 6) & 0x3;
@ -564,10 +567,10 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
XELOGGPU("[%.8X] Packet(%.8X): PM4_IM_LOAD", XELOGGPU("[%.8X] Packet(%.8X): PM4_IM_LOAD",
packet_ptr, packet); packet_ptr, packet);
LOG_DATA(count); LOG_DATA(count);
uint32_t addr_type = READ_AND_ADVANCE_PTR(); uint32_t addr_type = READ_PTR();
uint32_t type = addr_type & 0x3; uint32_t type = addr_type & 0x3;
uint32_t addr = addr_type & ~0x3; uint32_t addr = addr_type & ~0x3;
uint32_t start_size = READ_AND_ADVANCE_PTR(); uint32_t start_size = READ_PTR();
uint32_t start = start_size >> 16; uint32_t start = start_size >> 16;
uint32_t size = start_size & 0xFFFF; // dwords uint32_t size = start_size & 0xFFFF; // dwords
XEASSERT(start == 0); XEASSERT(start == 0);
@ -584,8 +587,8 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
XELOGGPU("[%.8X] Packet(%.8X): PM4_IM_LOAD_IMMEDIATE", XELOGGPU("[%.8X] Packet(%.8X): PM4_IM_LOAD_IMMEDIATE",
packet_ptr, packet); packet_ptr, packet);
LOG_DATA(count); LOG_DATA(count);
uint32_t type = READ_AND_ADVANCE_PTR(); uint32_t type = READ_PTR();
uint32_t start_size = READ_AND_ADVANCE_PTR(); uint32_t start_size = READ_PTR();
uint32_t start = start_size >> 16; uint32_t start = start_size >> 16;
uint32_t size = start_size & 0xFFFF; // dwords uint32_t size = start_size & 0xFFFF; // dwords
XEASSERT(start == 0); XEASSERT(start == 0);
@ -606,35 +609,35 @@ uint32_t RingBufferWorker::ExecutePacket(PacketArgs& args) {
XELOGGPU("[%.8X] Packet(%.8X): PM4_INVALIDATE_STATE", XELOGGPU("[%.8X] Packet(%.8X): PM4_INVALIDATE_STATE",
packet_ptr, packet); packet_ptr, packet);
LOG_DATA(count); LOG_DATA(count);
uint32_t mask = READ_AND_ADVANCE_PTR(); uint32_t mask = READ_PTR();
driver_->InvalidateState(mask); driver_->InvalidateState(mask);
} }
break; break;
case PM4_SET_BIN_MASK_LO: case PM4_SET_BIN_MASK_LO:
{ {
uint32_t value = READ_AND_ADVANCE_PTR(); uint32_t value = READ_PTR();
XELOGGPU("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_LO = %.8X", XELOGGPU("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_LO = %.8X",
packet_ptr, packet, value); packet_ptr, packet, value);
} }
break; break;
case PM4_SET_BIN_MASK_HI: case PM4_SET_BIN_MASK_HI:
{ {
uint32_t value = READ_AND_ADVANCE_PTR(); uint32_t value = READ_PTR();
XELOGGPU("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_HI = %.8X", XELOGGPU("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_HI = %.8X",
packet_ptr, packet, value); packet_ptr, packet, value);
} }
break; break;
case PM4_SET_BIN_SELECT_LO: case PM4_SET_BIN_SELECT_LO:
{ {
uint32_t value = READ_AND_ADVANCE_PTR(); uint32_t value = READ_PTR();
XELOGGPU("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_LO = %.8X", XELOGGPU("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_LO = %.8X",
packet_ptr, packet, value); packet_ptr, packet, value);
} }
break; break;
case PM4_SET_BIN_SELECT_HI: case PM4_SET_BIN_SELECT_HI:
{ {
uint32_t value = READ_AND_ADVANCE_PTR(); uint32_t value = READ_PTR();
XELOGGPU("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_HI = %.8X", XELOGGPU("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_HI = %.8X",
packet_ptr, packet, value); packet_ptr, packet, value);
} }

View File

@ -42,18 +42,25 @@ typedef enum {
XE_GPU_PRIMITIVE_TYPE_LINE_LOOP = 0x0C, XE_GPU_PRIMITIVE_TYPE_LINE_LOOP = 0x0C,
} XE_GPU_PRIMITIVE_TYPE; } XE_GPU_PRIMITIVE_TYPE;
XEFORCEINLINE uint32_t GpuSwap(uint32_t value, uint32_t endianness) { typedef enum {
XE_GPU_ENDIAN_NONE = 0x0,
XE_GPU_ENDIAN_8IN16 = 0x1,
XE_GPU_ENDIAN_8IN32 = 0x2,
XE_GPU_ENDIAN_16IN32 = 0x3,
} XE_GPU_ENDIAN;
XEFORCEINLINE uint32_t GpuSwap(uint32_t value, XE_GPU_ENDIAN endianness) {
switch (endianness) { switch (endianness) {
default: default:
case 0x0: // No swap. case XE_GPU_ENDIAN_NONE: // No swap.
return value; return value;
case 0x1: // Swap bytes in half words. case XE_GPU_ENDIAN_8IN16: // Swap bytes in half words.
return ((value << 8) & 0xFF00FF00) | return ((value << 8) & 0xFF00FF00) |
((value >> 8) & 0x00FF00FF); ((value >> 8) & 0x00FF00FF);
case 0x2: // Swap bytes. case XE_GPU_ENDIAN_8IN32: // Swap bytes.
// NOTE: we are likely doing two swaps here. Wasteful. Oh well. // NOTE: we are likely doing two swaps here. Wasteful. Oh well.
return XESWAP32(value); return XESWAP32(value);
case 0x3: // Swap half words. case XE_GPU_ENDIAN_16IN32: // Swap half words.
return ((value >> 16) & 0xFFFF) | (value << 16); return ((value >> 16) & 0xFFFF) | (value << 16);
} }
} }