Fixing ring buffer wrap around. I think.
This commit is contained in:
parent
e0af361d74
commit
4887234bba
|
@ -41,7 +41,10 @@ void RingBufferWorker::Initialize(GraphicsDriver* driver,
|
||||||
uint32_t ptr, uint32_t page_count) {
|
uint32_t ptr, uint32_t page_count) {
|
||||||
driver_ = driver;
|
driver_ = driver;
|
||||||
primary_buffer_ptr_ = ptr;
|
primary_buffer_ptr_ = ptr;
|
||||||
primary_buffer_size_ = page_count * 4 * 1024;
|
// Not sure this is correct, but it's a way to take the page_count back to
|
||||||
|
// the number of bytes allocated by the physical alloc.
|
||||||
|
uint32_t original_size = 1 << (0x1C - page_count - 1);
|
||||||
|
primary_buffer_size_ = original_size;
|
||||||
read_ptr_index_ = 0;
|
read_ptr_index_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -86,9 +89,19 @@ void RingBufferWorker::Pump() {
|
||||||
// Process the new commands.
|
// Process the new commands.
|
||||||
XELOGGPU("Ring buffer thread work");
|
XELOGGPU("Ring buffer thread work");
|
||||||
|
|
||||||
// TODO(benvanik): handle wrapping around
|
// Handle wrapping around.
|
||||||
// read_ptr_index_ = (read_ptr_index_ + 1) % (primary_buffer_size_ / 4);
|
// TODO(benvanik): verify this is correct.
|
||||||
XEASSERT(write_ptr_index > read_ptr_index_);
|
if (write_ptr_index_ < read_ptr_index_) {
|
||||||
|
// We wrapped. Execute all instructions until the end and go back to 0.
|
||||||
|
XELOGGPU("Ring buffer wrapped back to zero (read %0.8X, write %0.8X)",
|
||||||
|
read_ptr_index_, write_ptr_index);
|
||||||
|
uint32_t pre_length = (primary_buffer_size_ / 4) - read_ptr_index_;
|
||||||
|
if (pre_length) {
|
||||||
|
ExecuteSegment(primary_buffer_ptr_ + read_ptr_index_ * 4, pre_length);
|
||||||
|
}
|
||||||
|
read_ptr_index_ = 0;
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t length = write_ptr_index - read_ptr_index_;
|
uint32_t length = write_ptr_index - read_ptr_index_;
|
||||||
if (length) {
|
if (length) {
|
||||||
ExecuteSegment(primary_buffer_ptr_ + read_ptr_index_ * 4, length);
|
ExecuteSegment(primary_buffer_ptr_ + read_ptr_index_ * 4, length);
|
||||||
|
@ -114,12 +127,14 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
|
||||||
|
|
||||||
#define LOG_DATA(count) \
|
#define LOG_DATA(count) \
|
||||||
for (uint32_t __m = 0; __m < count; __m++) { \
|
for (uint32_t __m = 0; __m < count; __m++) { \
|
||||||
XELOGGPU(" %.8X", XEGETUINT32BE(packet_base + 1 * 4 + __m * 4)); \
|
XELOGGPU("[%.8X] %.8X", \
|
||||||
|
ptr + (n + 1 + __m) * 4, \
|
||||||
|
XEGETUINT32BE(packet_base + 1 * 4 + __m * 4)); \
|
||||||
}
|
}
|
||||||
#define TRANSLATE_ADDR(p) \
|
#define TRANSLATE_ADDR(p) \
|
||||||
((p & ~0x3) + (primary_buffer_ptr_ & ~0x1FFFFFFF))
|
((p & ~0x3) + (primary_buffer_ptr_ & ~0x1FFFFFFF))
|
||||||
|
|
||||||
XELOGGPU("CommandList(%.8X): executing %dw", ptr, length);
|
XELOGGPU("[%.8X] CommandList(): executing %dw", ptr, length);
|
||||||
|
|
||||||
// Execute commands!
|
// Execute commands!
|
||||||
for (uint32_t n = 0; n < length;) {
|
for (uint32_t n = 0; n < length;) {
|
||||||
|
@ -138,14 +153,16 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
|
||||||
// Type-0 packet.
|
// Type-0 packet.
|
||||||
// Write count registers in sequence to the registers starting at
|
// Write count registers in sequence to the registers starting at
|
||||||
// (base_index << 2).
|
// (base_index << 2).
|
||||||
XELOGGPU("Packet(%.8X): set registers:", packet);
|
XELOGGPU("[%.8X] Packet(%.8X): set registers:", ptr + n * 4,
|
||||||
|
ptr + n * 4, packet);
|
||||||
uint32_t count = ((packet >> 16) & 0x3FFF) + 1;
|
uint32_t count = ((packet >> 16) & 0x3FFF) + 1;
|
||||||
uint32_t base_index = (packet & 0xFFFF);
|
uint32_t base_index = (packet & 0xFFFF);
|
||||||
for (uint32_t m = 0; m < count; m++) {
|
for (uint32_t m = 0; m < count; m++) {
|
||||||
uint32_t reg_data = XEGETUINT32BE(packet_base + 1 * 4 + m * 4);
|
uint32_t reg_data = XEGETUINT32BE(packet_base + 1 * 4 + m * 4);
|
||||||
const char* reg_name = xenos::GetRegisterName(base_index + m);
|
const char* reg_name = xenos::GetRegisterName(base_index + m);
|
||||||
XELOGGPU(" %.8X -> %.4X %s", reg_data, base_index + m,
|
XELOGGPU("[%.8X] %.8X -> %.4X %s",
|
||||||
reg_name ? reg_name : "");
|
ptr + (n + 1 + m) * 4,
|
||||||
|
reg_data, base_index + m, reg_name ? reg_name : "");
|
||||||
// TODO(benvanik): exec write handler (if special).
|
// TODO(benvanik): exec write handler (if special).
|
||||||
if (base_index + m < kXEGpuRegisterCount) {
|
if (base_index + m < kXEGpuRegisterCount) {
|
||||||
regs->values[base_index + m].u32 = reg_data;
|
regs->values[base_index + m].u32 = reg_data;
|
||||||
|
@ -158,17 +175,20 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
|
||||||
{
|
{
|
||||||
// Type-1 packet.
|
// Type-1 packet.
|
||||||
// Contains two registers of data. Type-0 should be more common.
|
// Contains two registers of data. Type-0 should be more common.
|
||||||
XELOGGPU("Packet(%.8X): set registers:", packet);
|
XELOGGPU("[%.8X] Packet(%.8X): set registers:",
|
||||||
|
ptr + n * 4, packet);
|
||||||
uint32_t reg_index_1 = packet & 0x7FF;
|
uint32_t reg_index_1 = packet & 0x7FF;
|
||||||
uint32_t reg_index_2 = (packet >> 11) & 0x7FF;
|
uint32_t reg_index_2 = (packet >> 11) & 0x7FF;
|
||||||
uint32_t reg_data_1 = XEGETUINT32BE(packet_base + 1 * 4);
|
uint32_t reg_data_1 = XEGETUINT32BE(packet_base + 1 * 4);
|
||||||
uint32_t reg_data_2 = XEGETUINT32BE(packet_base + 2 * 4);
|
uint32_t reg_data_2 = XEGETUINT32BE(packet_base + 2 * 4);
|
||||||
const char* reg_name_1 = xenos::GetRegisterName(reg_index_1);
|
const char* reg_name_1 = xenos::GetRegisterName(reg_index_1);
|
||||||
const char* reg_name_2 = xenos::GetRegisterName(reg_index_2);
|
const char* reg_name_2 = xenos::GetRegisterName(reg_index_2);
|
||||||
XELOGGPU(" %.8X -> %.4X %s", reg_data_1, reg_index_1,
|
XELOGGPU("[%.8X] %.8X -> %.4X %s",
|
||||||
reg_name_1 ? reg_name_1 : "");
|
ptr + (n + 1) * 4,
|
||||||
XELOGGPU(" %.8X -> %.4X %s", reg_data_2, reg_index_2,
|
reg_data_1, reg_index_1, reg_name_1 ? reg_name_1 : "");
|
||||||
reg_name_2 ? reg_name_2 : "");
|
XELOGGPU("[%.8X] %.8X -> %.4X %s",
|
||||||
|
ptr + (n + 2) * 4,
|
||||||
|
reg_data_2, reg_index_2, reg_name_2 ? reg_name_2 : "");
|
||||||
// TODO(benvanik): exec write handler (if special).
|
// TODO(benvanik): exec write handler (if special).
|
||||||
if (reg_index_1 < kXEGpuRegisterCount) {
|
if (reg_index_1 < kXEGpuRegisterCount) {
|
||||||
regs->values[reg_index_1].u32 = reg_data_1;
|
regs->values[reg_index_1].u32 = reg_data_1;
|
||||||
|
@ -195,14 +215,16 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
|
||||||
switch (opcode) {
|
switch (opcode) {
|
||||||
case PM4_ME_INIT:
|
case PM4_ME_INIT:
|
||||||
// initialize CP's micro-engine
|
// initialize CP's micro-engine
|
||||||
XELOGGPU("Packet(%.8X): PM4_ME_INIT", packet);
|
XELOGGPU("[%.8X] Packet(%.8X): PM4_ME_INIT",
|
||||||
|
ptr + n * 4, packet);
|
||||||
LOG_DATA(count);
|
LOG_DATA(count);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PM4_NOP:
|
case PM4_NOP:
|
||||||
// skip N 32-bit words to get to the next packet
|
// skip N 32-bit words to get to the next packet
|
||||||
// No-op, ignore some data.
|
// No-op, ignore some data.
|
||||||
XELOGGPU("Packet(%.8X): PM4_NOP", packet);
|
XELOGGPU("[%.8X] Packet(%.8X): PM4_NOP",
|
||||||
|
ptr + n * 4, packet);
|
||||||
LOG_DATA(count);
|
LOG_DATA(count);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -211,8 +233,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
|
||||||
{
|
{
|
||||||
uint32_t list_ptr = XEGETUINT32BE(packet_base + 1 * 4);
|
uint32_t list_ptr = XEGETUINT32BE(packet_base + 1 * 4);
|
||||||
uint32_t list_length = XEGETUINT32BE(packet_base + 2 * 4);
|
uint32_t list_length = XEGETUINT32BE(packet_base + 2 * 4);
|
||||||
XELOGGPU("Packet(%.8X): PM4_INDIRECT_BUFFER %.8X (%dw)",
|
XELOGGPU("[%.8X] Packet(%.8X): PM4_INDIRECT_BUFFER %.8X (%dw)",
|
||||||
packet, list_ptr, list_length);
|
ptr + n * 4, packet, list_ptr, list_length);
|
||||||
ExecuteSegment(list_ptr, list_length);
|
ExecuteSegment(list_ptr, list_length);
|
||||||
driver_->set_address_translation(primary_buffer_ptr_ & ~0x1FFFFFFF);
|
driver_->set_address_translation(primary_buffer_ptr_ & ~0x1FFFFFFF);
|
||||||
}
|
}
|
||||||
|
@ -220,32 +242,37 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
|
||||||
|
|
||||||
case PM4_WAIT_REG_MEM:
|
case PM4_WAIT_REG_MEM:
|
||||||
// wait until a register or memory location is a specific value
|
// wait until a register or memory location is a specific value
|
||||||
XELOGGPU("Packet(%.8X): PM4_WAIT_REG_MEM", packet);
|
XELOGGPU("[%.8X] Packet(%.8X): PM4_WAIT_REG_MEM",
|
||||||
|
ptr + n * 4, packet);
|
||||||
LOG_DATA(count);
|
LOG_DATA(count);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PM4_REG_RMW:
|
case PM4_REG_RMW:
|
||||||
// register read/modify/write
|
// register read/modify/write
|
||||||
// ? (used during shader upload and edram setup)
|
// ? (used during shader upload and edram setup)
|
||||||
XELOGGPU("Packet(%.8X): PM4_REG_RMW", packet);
|
XELOGGPU("[%.8X] Packet(%.8X): PM4_REG_RMW",
|
||||||
|
ptr + n * 4, packet);
|
||||||
LOG_DATA(count);
|
LOG_DATA(count);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PM4_COND_WRITE:
|
case PM4_COND_WRITE:
|
||||||
// conditional write to memory or register
|
// conditional write to memory or register
|
||||||
XELOGGPU("Packet(%.8X): PM4_COND_WRITE", packet);
|
XELOGGPU("[%.8X] Packet(%.8X): PM4_COND_WRITE",
|
||||||
|
ptr + n * 4, packet);
|
||||||
LOG_DATA(count);
|
LOG_DATA(count);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PM4_EVENT_WRITE:
|
case PM4_EVENT_WRITE:
|
||||||
// generate an event that creates a write to memory when completed
|
// generate an event that creates a write to memory when completed
|
||||||
XELOGGPU("Packet(%.8X): PM4_EVENT_WRITE", packet);
|
XELOGGPU("[%.8X] Packet(%.8X): PM4_EVENT_WRITE",
|
||||||
|
ptr + n * 4, packet);
|
||||||
LOG_DATA(count);
|
LOG_DATA(count);
|
||||||
break;
|
break;
|
||||||
case PM4_EVENT_WRITE_SHD:
|
case PM4_EVENT_WRITE_SHD:
|
||||||
// generate a VS|PS_done event
|
// generate a VS|PS_done event
|
||||||
{
|
{
|
||||||
XELOGGPU("Packet(%.8X): PM4_EVENT_WRITE_SHD", packet);
|
XELOGGPU("[%.8X] Packet(%.8X): PM4_EVENT_WRITE_SHD",
|
||||||
|
ptr + n * 4, packet);
|
||||||
LOG_DATA(count);
|
LOG_DATA(count);
|
||||||
// 3?
|
// 3?
|
||||||
uint32_t d0 = XEGETUINT32BE(packet_base + 1 * 4);
|
uint32_t d0 = XEGETUINT32BE(packet_base + 1 * 4);
|
||||||
|
@ -260,7 +287,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
|
||||||
case PM4_DRAW_INDX:
|
case PM4_DRAW_INDX:
|
||||||
// initiate fetch of index buffer and draw
|
// initiate fetch of index buffer and draw
|
||||||
{
|
{
|
||||||
XELOGGPU("Packet(%.8X): PM4_DRAW_INDX", packet);
|
XELOGGPU("[%.8X] Packet(%.8X): PM4_DRAW_INDX",
|
||||||
|
ptr + n * 4, packet);
|
||||||
LOG_DATA(count);
|
LOG_DATA(count);
|
||||||
// d0 = viz query info
|
// d0 = viz query info
|
||||||
uint32_t d0 = XEGETUINT32BE(packet_base + 1 * 4);
|
uint32_t d0 = XEGETUINT32BE(packet_base + 1 * 4);
|
||||||
|
@ -277,7 +305,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
|
||||||
case PM4_DRAW_INDX_2:
|
case PM4_DRAW_INDX_2:
|
||||||
// draw using supplied indices in packet
|
// draw using supplied indices in packet
|
||||||
{
|
{
|
||||||
XELOGGPU("Packet(%.8X): PM4_DRAW_INDX_2", packet);
|
XELOGGPU("[%.8X] Packet(%.8X): PM4_DRAW_INDX_2",
|
||||||
|
ptr + n * 4, packet);
|
||||||
LOG_DATA(count);
|
LOG_DATA(count);
|
||||||
uint32_t d0 = XEGETUINT32BE(packet_base + 1 * 4);
|
uint32_t d0 = XEGETUINT32BE(packet_base + 1 * 4);
|
||||||
uint32_t index_count = d0 >> 16;
|
uint32_t index_count = d0 >> 16;
|
||||||
|
@ -293,7 +322,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
|
||||||
case PM4_IM_LOAD:
|
case PM4_IM_LOAD:
|
||||||
// load sequencer instruction memory (pointer-based)
|
// load sequencer instruction memory (pointer-based)
|
||||||
{
|
{
|
||||||
XELOGGPU("Packet(%.8X): PM4_IM_LOAD", packet);
|
XELOGGPU("[%.8X] Packet(%.8X): PM4_IM_LOAD",
|
||||||
|
ptr + n * 4, packet);
|
||||||
LOG_DATA(count);
|
LOG_DATA(count);
|
||||||
uint32_t addr_type = XEGETUINT32BE(packet_base + 1 * 4);
|
uint32_t addr_type = XEGETUINT32BE(packet_base + 1 * 4);
|
||||||
uint32_t type = addr_type & 0x3;
|
uint32_t type = addr_type & 0x3;
|
||||||
|
@ -312,7 +342,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
|
||||||
case PM4_IM_LOAD_IMMEDIATE:
|
case PM4_IM_LOAD_IMMEDIATE:
|
||||||
// load sequencer instruction memory (code embedded in packet)
|
// load sequencer instruction memory (code embedded in packet)
|
||||||
{
|
{
|
||||||
XELOGGPU("Packet(%.8X): PM4_IM_LOAD_IMMEDIATE", packet);
|
XELOGGPU("[%.8X] Packet(%.8X): PM4_IM_LOAD_IMMEDIATE",
|
||||||
|
ptr + n * 4, packet);
|
||||||
uint32_t type = XEGETUINT32BE(packet_base + 1 * 4);
|
uint32_t type = XEGETUINT32BE(packet_base + 1 * 4);
|
||||||
uint32_t start_size = XEGETUINT32BE(packet_base + 2 * 4);
|
uint32_t start_size = XEGETUINT32BE(packet_base + 2 * 4);
|
||||||
uint32_t start = start_size >> 16;
|
uint32_t start = start_size >> 16;
|
||||||
|
@ -330,7 +361,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
|
||||||
case PM4_INVALIDATE_STATE:
|
case PM4_INVALIDATE_STATE:
|
||||||
// selective invalidation of state pointers
|
// selective invalidation of state pointers
|
||||||
{
|
{
|
||||||
XELOGGPU("Packet(%.8X): PM4_INVALIDATE_STATE", packet);
|
XELOGGPU("[%.8X] Packet(%.8X): PM4_INVALIDATE_STATE",
|
||||||
|
ptr + n * 4, packet);
|
||||||
LOG_DATA(count);
|
LOG_DATA(count);
|
||||||
uint32_t mask = XEGETUINT32BE(packet_base + 1 * 4);
|
uint32_t mask = XEGETUINT32BE(packet_base + 1 * 4);
|
||||||
driver_->InvalidateState(mask);
|
driver_->InvalidateState(mask);
|
||||||
|
@ -338,7 +370,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
XELOGGPU("Packet(%.8X): unknown!", packet);
|
XELOGGPU("[%.8X] Packet(%.8X): unknown!",
|
||||||
|
ptr + n * 4, packet);
|
||||||
LOG_DATA(count);
|
LOG_DATA(count);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue