Fixing ring buffer wrap around. I think.

This commit is contained in:
Ben Vanik 2013-10-13 13:39:46 -07:00
parent e0af361d74
commit 4887234bba
1 changed files with 62 additions and 29 deletions

View File

@ -41,7 +41,10 @@ void RingBufferWorker::Initialize(GraphicsDriver* driver,
uint32_t ptr, uint32_t page_count) {
driver_ = driver;
primary_buffer_ptr_ = ptr;
primary_buffer_size_ = page_count * 4 * 1024;
// Not sure this is correct, but it's a way to take the page_count back to
// the number of bytes allocated by the physical alloc.
uint32_t original_size = 1 << (0x1C - page_count - 1);
primary_buffer_size_ = original_size;
read_ptr_index_ = 0;
}
@ -86,9 +89,19 @@ void RingBufferWorker::Pump() {
// Process the new commands.
XELOGGPU("Ring buffer thread work");
// TODO(benvanik): handle wrapping around
// read_ptr_index_ = (read_ptr_index_ + 1) % (primary_buffer_size_ / 4);
XEASSERT(write_ptr_index > read_ptr_index_);
// Handle wrapping around.
// TODO(benvanik): verify this is correct.
if (write_ptr_index_ < read_ptr_index_) {
// We wrapped. Execute all instructions until the end and go back to 0.
XELOGGPU("Ring buffer wrapped back to zero (read %0.8X, write %0.8X)",
read_ptr_index_, write_ptr_index);
uint32_t pre_length = (primary_buffer_size_ / 4) - read_ptr_index_;
if (pre_length) {
ExecuteSegment(primary_buffer_ptr_ + read_ptr_index_ * 4, pre_length);
}
read_ptr_index_ = 0;
}
uint32_t length = write_ptr_index - read_ptr_index_;
if (length) {
ExecuteSegment(primary_buffer_ptr_ + read_ptr_index_ * 4, length);
@ -114,12 +127,14 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
#define LOG_DATA(count) \
for (uint32_t __m = 0; __m < count; __m++) { \
XELOGGPU(" %.8X", XEGETUINT32BE(packet_base + 1 * 4 + __m * 4)); \
XELOGGPU("[%.8X] %.8X", \
ptr + (n + 1 + __m) * 4, \
XEGETUINT32BE(packet_base + 1 * 4 + __m * 4)); \
}
#define TRANSLATE_ADDR(p) \
((p & ~0x3) + (primary_buffer_ptr_ & ~0x1FFFFFFF))
XELOGGPU("CommandList(%.8X): executing %dw", ptr, length);
XELOGGPU("[%.8X] CommandList(): executing %dw", ptr, length);
// Execute commands!
for (uint32_t n = 0; n < length;) {
@ -138,14 +153,16 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
// Type-0 packet.
// Write count registers in sequence to the registers starting at
// (base_index << 2).
XELOGGPU("Packet(%.8X): set registers:", packet);
XELOGGPU("[%.8X] Packet(%.8X): set registers:", ptr + n * 4,
ptr + n * 4, packet);
uint32_t count = ((packet >> 16) & 0x3FFF) + 1;
uint32_t base_index = (packet & 0xFFFF);
for (uint32_t m = 0; m < count; m++) {
uint32_t reg_data = XEGETUINT32BE(packet_base + 1 * 4 + m * 4);
const char* reg_name = xenos::GetRegisterName(base_index + m);
XELOGGPU(" %.8X -> %.4X %s", reg_data, base_index + m,
reg_name ? reg_name : "");
XELOGGPU("[%.8X] %.8X -> %.4X %s",
ptr + (n + 1 + m) * 4,
reg_data, base_index + m, reg_name ? reg_name : "");
// TODO(benvanik): exec write handler (if special).
if (base_index + m < kXEGpuRegisterCount) {
regs->values[base_index + m].u32 = reg_data;
@ -158,17 +175,20 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
{
// Type-1 packet.
// Contains two registers of data. Type-0 should be more common.
XELOGGPU("Packet(%.8X): set registers:", packet);
XELOGGPU("[%.8X] Packet(%.8X): set registers:",
ptr + n * 4, packet);
uint32_t reg_index_1 = packet & 0x7FF;
uint32_t reg_index_2 = (packet >> 11) & 0x7FF;
uint32_t reg_data_1 = XEGETUINT32BE(packet_base + 1 * 4);
uint32_t reg_data_2 = XEGETUINT32BE(packet_base + 2 * 4);
const char* reg_name_1 = xenos::GetRegisterName(reg_index_1);
const char* reg_name_2 = xenos::GetRegisterName(reg_index_2);
XELOGGPU(" %.8X -> %.4X %s", reg_data_1, reg_index_1,
reg_name_1 ? reg_name_1 : "");
XELOGGPU(" %.8X -> %.4X %s", reg_data_2, reg_index_2,
reg_name_2 ? reg_name_2 : "");
XELOGGPU("[%.8X] %.8X -> %.4X %s",
ptr + (n + 1) * 4,
reg_data_1, reg_index_1, reg_name_1 ? reg_name_1 : "");
XELOGGPU("[%.8X] %.8X -> %.4X %s",
ptr + (n + 2) * 4,
reg_data_2, reg_index_2, reg_name_2 ? reg_name_2 : "");
// TODO(benvanik): exec write handler (if special).
if (reg_index_1 < kXEGpuRegisterCount) {
regs->values[reg_index_1].u32 = reg_data_1;
@ -195,14 +215,16 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
switch (opcode) {
case PM4_ME_INIT:
// initialize CP's micro-engine
XELOGGPU("Packet(%.8X): PM4_ME_INIT", packet);
XELOGGPU("[%.8X] Packet(%.8X): PM4_ME_INIT",
ptr + n * 4, packet);
LOG_DATA(count);
break;
case PM4_NOP:
// skip N 32-bit words to get to the next packet
// No-op, ignore some data.
XELOGGPU("Packet(%.8X): PM4_NOP", packet);
XELOGGPU("[%.8X] Packet(%.8X): PM4_NOP",
ptr + n * 4, packet);
LOG_DATA(count);
break;
@ -211,8 +233,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
{
uint32_t list_ptr = XEGETUINT32BE(packet_base + 1 * 4);
uint32_t list_length = XEGETUINT32BE(packet_base + 2 * 4);
XELOGGPU("Packet(%.8X): PM4_INDIRECT_BUFFER %.8X (%dw)",
packet, list_ptr, list_length);
XELOGGPU("[%.8X] Packet(%.8X): PM4_INDIRECT_BUFFER %.8X (%dw)",
ptr + n * 4, packet, list_ptr, list_length);
ExecuteSegment(list_ptr, list_length);
driver_->set_address_translation(primary_buffer_ptr_ & ~0x1FFFFFFF);
}
@ -220,32 +242,37 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
case PM4_WAIT_REG_MEM:
// wait until a register or memory location is a specific value
XELOGGPU("Packet(%.8X): PM4_WAIT_REG_MEM", packet);
XELOGGPU("[%.8X] Packet(%.8X): PM4_WAIT_REG_MEM",
ptr + n * 4, packet);
LOG_DATA(count);
break;
case PM4_REG_RMW:
// register read/modify/write
// ? (used during shader upload and edram setup)
XELOGGPU("Packet(%.8X): PM4_REG_RMW", packet);
XELOGGPU("[%.8X] Packet(%.8X): PM4_REG_RMW",
ptr + n * 4, packet);
LOG_DATA(count);
break;
case PM4_COND_WRITE:
// conditional write to memory or register
XELOGGPU("Packet(%.8X): PM4_COND_WRITE", packet);
XELOGGPU("[%.8X] Packet(%.8X): PM4_COND_WRITE",
ptr + n * 4, packet);
LOG_DATA(count);
break;
case PM4_EVENT_WRITE:
// generate an event that creates a write to memory when completed
XELOGGPU("Packet(%.8X): PM4_EVENT_WRITE", packet);
XELOGGPU("[%.8X] Packet(%.8X): PM4_EVENT_WRITE",
ptr + n * 4, packet);
LOG_DATA(count);
break;
case PM4_EVENT_WRITE_SHD:
// generate a VS|PS_done event
{
XELOGGPU("Packet(%.8X): PM4_EVENT_WRITE_SHD", packet);
XELOGGPU("[%.8X] Packet(%.8X): PM4_EVENT_WRITE_SHD",
ptr + n * 4, packet);
LOG_DATA(count);
// 3?
uint32_t d0 = XEGETUINT32BE(packet_base + 1 * 4);
@ -260,7 +287,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
case PM4_DRAW_INDX:
// initiate fetch of index buffer and draw
{
XELOGGPU("Packet(%.8X): PM4_DRAW_INDX", packet);
XELOGGPU("[%.8X] Packet(%.8X): PM4_DRAW_INDX",
ptr + n * 4, packet);
LOG_DATA(count);
// d0 = viz query info
uint32_t d0 = XEGETUINT32BE(packet_base + 1 * 4);
@ -277,7 +305,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
case PM4_DRAW_INDX_2:
// draw using supplied indices in packet
{
XELOGGPU("Packet(%.8X): PM4_DRAW_INDX_2", packet);
XELOGGPU("[%.8X] Packet(%.8X): PM4_DRAW_INDX_2",
ptr + n * 4, packet);
LOG_DATA(count);
uint32_t d0 = XEGETUINT32BE(packet_base + 1 * 4);
uint32_t index_count = d0 >> 16;
@ -293,7 +322,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
case PM4_IM_LOAD:
// load sequencer instruction memory (pointer-based)
{
XELOGGPU("Packet(%.8X): PM4_IM_LOAD", packet);
XELOGGPU("[%.8X] Packet(%.8X): PM4_IM_LOAD",
ptr + n * 4, packet);
LOG_DATA(count);
uint32_t addr_type = XEGETUINT32BE(packet_base + 1 * 4);
uint32_t type = addr_type & 0x3;
@ -312,7 +342,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
case PM4_IM_LOAD_IMMEDIATE:
// load sequencer instruction memory (code embedded in packet)
{
XELOGGPU("Packet(%.8X): PM4_IM_LOAD_IMMEDIATE", packet);
XELOGGPU("[%.8X] Packet(%.8X): PM4_IM_LOAD_IMMEDIATE",
ptr + n * 4, packet);
uint32_t type = XEGETUINT32BE(packet_base + 1 * 4);
uint32_t start_size = XEGETUINT32BE(packet_base + 2 * 4);
uint32_t start = start_size >> 16;
@ -330,7 +361,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
case PM4_INVALIDATE_STATE:
// selective invalidation of state pointers
{
XELOGGPU("Packet(%.8X): PM4_INVALIDATE_STATE", packet);
XELOGGPU("[%.8X] Packet(%.8X): PM4_INVALIDATE_STATE",
ptr + n * 4, packet);
LOG_DATA(count);
uint32_t mask = XEGETUINT32BE(packet_base + 1 * 4);
driver_->InvalidateState(mask);
@ -338,7 +370,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
break;
default:
XELOGGPU("Packet(%.8X): unknown!", packet);
XELOGGPU("[%.8X] Packet(%.8X): unknown!",
ptr + n * 4, packet);
LOG_DATA(count);
break;
}