Deciphering copy packets.

This commit is contained in:
Ben Vanik 2014-12-24 03:58:04 -08:00
parent 3dffc72e59
commit 9c6be1edba
3 changed files with 197 additions and 24 deletions

View File

@ -592,8 +592,11 @@ bool CommandProcessor::ExecutePacketType3_XE_SWAP(RingbufferReader* reader,
// VdSwap will post this to tell us we need to swap the screen/fire an
// interrupt.
XETRACECP("[%.8X] Packet(%.8X): PM4_XE_SWAP", packet_ptr, packet);
reader->TraceData(count);
reader->Advance(count);
// 63 words here, but only the first has any data.
reader->TraceData(1);
uint32_t frontbuffer_ptr = reader->Read();
// TODO(benvanik): something with the frontbuffer ptr.
reader->Advance(count - 1);
if (swap_handler_) {
swap_handler_();
}
@ -1074,7 +1077,14 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
if (enable_mode == ModeControl::kIgnore) {
// Ignored.
return true;
} else if (enable_mode == ModeControl::kCopy) {
}
if (!UpdateRenderTargets(draw_command)) {
PLOGE("Unable to setup render targets");
return false;
}
if (enable_mode == ModeControl::kCopy) {
// Special copy handling.
return IssueCopy(draw_command);
}
@ -1083,10 +1093,6 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
PLOGE("Unable to setup render state");
return false;
}
if (!UpdateRenderTargets(draw_command)) {
PLOGE("Unable to setup render targets");
return false;
}
// if (!PopulateShaders(draw_command)) {
// XELOGE("Unable to prepare draw shaders");
@ -1470,9 +1476,123 @@ bool CommandProcessor::UpdateRenderTargets(DrawCommand* draw_command) {
}
bool CommandProcessor::IssueCopy(DrawCommand* draw_command) {
// uint32_t copy_dest_pitch = regs[XE_GPU_REG_RB_COPY_DEST_PITCH].u32;
// uint32_t copy_dest_height = (copy_dest_pitch >> 16) & 0x3FFF;
// copy_dest_pitch &= 0x3FFF;
auto& regs = *register_file_;
// This is used to resolve surfaces, taking them from EDRAM render targets
// to system memory. It can optionally clear color/depth surfaces, too.
// The command buffer has stuff for actually doing this by drawing, however
// we should be able to do it without that much easier.
uint32_t copy_control = regs[XE_GPU_REG_RB_COPY_CONTROL].u32;
// Render targets 0-3, 4 = depth
uint32_t copy_src_select = copy_control & 0x7;
bool color_clear_enabled = (copy_control >> 8) & 0x1;
bool depth_clear_enabled = (copy_control >> 9) & 0x1;
auto copy_command = static_cast<CopyCommand>((copy_control >> 20) & 0x3);
uint32_t copy_dest_info = regs[XE_GPU_REG_RB_COPY_DEST_INFO].u32;
auto copy_dest_endian = static_cast<Endian128>(copy_dest_info & 0x7);
uint32_t copy_dest_array = (copy_dest_info >> 3) & 0x1;
assert_true(copy_dest_array == 0);
uint32_t copy_dest_slice = (copy_dest_info >> 4) & 0x7;
assert_true(copy_dest_slice == 0);
auto copy_dest_format =
static_cast<ColorFormat>((copy_dest_info >> 7) & 0x3F);
uint32_t copy_dest_number = (copy_dest_info >> 13) & 0x7;
assert_true(copy_dest_number == 0);
uint32_t copy_dest_bias = (copy_dest_info >> 16) & 0x3F;
assert_true(copy_dest_bias == 0);
uint32_t copy_dest_swap = (copy_dest_info >> 25) & 0x1;
uint32_t copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32;
uint32_t copy_dest_pitch = regs[XE_GPU_REG_RB_COPY_DEST_PITCH].u32;
uint32_t copy_dest_height = (copy_dest_pitch >> 16) & 0x3FFF;
copy_dest_pitch &= 0x3FFF;
// None of this is supported yet:
uint32_t copy_surface_slice = regs[XE_GPU_REG_RB_COPY_SURFACE_SLICE].u32;
assert_true(copy_surface_slice == 0);
uint32_t copy_func = regs[XE_GPU_REG_RB_COPY_FUNC].u32;
assert_true(copy_func == 0);
uint32_t copy_ref = regs[XE_GPU_REG_RB_COPY_REF].u32;
assert_true(copy_ref == 0);
uint32_t copy_mask = regs[XE_GPU_REG_RB_COPY_MASK].u32;
assert_true(copy_mask == 0);
GLenum read_format;
GLenum read_type;
switch (copy_dest_format) {
case ColorFormat::kColor_8_8_8_8:
read_format = copy_dest_swap ? GL_BGRA : GL_RGBA;
read_type = GL_UNSIGNED_BYTE;
break;
default:
assert_unhandled_case(copy_dest_format);
return false;
}
// TODO(benvanik): swap channel ordering on copy_dest_swap
// Can we use GL swizzles for this?
// Swap byte order during read.
// TODO(benvanik): handle other endian modes.
switch (copy_dest_endian) {
case Endian128::kUnspecified:
glPixelStorei(GL_PACK_SWAP_BYTES, GL_FALSE);
break;
case Endian128::k8in32:
glPixelStorei(GL_PACK_SWAP_BYTES, GL_TRUE);
break;
default:
assert_unhandled_case(copy_dest_endian);
return false;
}
// Destination pointer in guest memory.
// We have GL throw bytes directly into it.
// TODO(benvanik): copy to staging texture then PBO back?
void* ptr = membase_ + GpuToCpu(copy_dest_base);
uint32_t x = 0;
uint32_t y = 0;
uint32_t w = copy_dest_pitch;
uint32_t h = copy_dest_height;
switch (copy_command) {
case CopyCommand::kConvert:
if (copy_src_select <= 3) {
// Source from a bound render target.
glReadBuffer(GL_COLOR_ATTACHMENT0 + copy_src_select);
glReadPixels(x, y, w, h, read_format, read_type, ptr);
} else {
// Source from the bound depth/stencil target.
glReadPixels(x, y, w, h, GL_DEPTH_STENCIL, read_type, ptr);
}
break;
case CopyCommand::kRaw:
case CopyCommand::kConstantOne:
case CopyCommand::kNull:
default:
assert_unhandled_case(copy_command);
return false;
}
if (color_clear_enabled || depth_clear_enabled) {
// Clear requested, so let's setup for that.
uint32_t copy_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32;
uint32_t copy_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32;
uint32_t copy_color_clear_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LOW].u32;
assert_true(copy_color_clear == copy_color_clear_low);
if (color_clear_enabled) {
// Clear the render target we selected for copy.
assert_true(copy_src_select < 3);
}
if (depth_clear_enabled) {
// Clear the current depth buffer.
}
}
return true;
}
@ -1567,11 +1687,11 @@ GLuint CommandProcessor::GetFramebuffer(GLuint color_targets[4],
CachedFramebuffer* cached = nullptr;
for (auto& it = cached_framebuffers_.begin();
it != cached_framebuffers_.end(); ++it) {
if (it->depth_target == depth_target &&
it->color_targets[0] == color_targets[0] &&
it->color_targets[1] == color_targets[1] &&
it->color_targets[2] == color_targets[2] &&
it->color_targets[3] == color_targets[3]) {
if ((depth_target == -1u || it->depth_target == depth_target) &&
(color_targets[0] == -1u || it->color_targets[0] == color_targets[0]) &&
(color_targets[1] == -1u || it->color_targets[1] == color_targets[1]) &&
(color_targets[2] == -1u || it->color_targets[2] == color_targets[2]) &&
(color_targets[3] == -1u || it->color_targets[3] == color_targets[3])) {
return it->framebuffer;
}
}
@ -1579,9 +1699,16 @@ GLuint CommandProcessor::GetFramebuffer(GLuint color_targets[4],
cached = &cached_framebuffers_.back();
glCreateFramebuffers(1, &cached->framebuffer);
for (int i = 0; i < 4; ++i) {
cached->color_targets[i] = color_targets[i];
uint32_t color_target = color_targets[i];
if (color_target == -1u) {
color_target = 0;
}
cached->color_targets[i] = color_target;
glNamedFramebufferTexture(cached->framebuffer, GL_COLOR_ATTACHMENT0 + i,
color_targets[i], 0);
color_target, 0);
}
if (depth_target == -1u) {
depth_target = 0;
}
cached->depth_target = depth_target;
glNamedFramebufferTexture(cached->framebuffer, GL_DEPTH_STENCIL_ATTACHMENT,

View File

@ -51,6 +51,15 @@ enum class Endian : uint32_t {
k16in32 = 3,
};
enum class Endian128 : uint32_t {
kUnspecified = 0,
k8in16 = 1,
k8in32 = 2,
k16in32 = 3,
k8in64 = 4,
k8in128 = 5,
};
enum class IndexFormat : uint32_t {
kInt16,
kInt32,
@ -80,6 +89,40 @@ enum class ModeControl : uint32_t {
kCopy = 6,
};
enum class CopyCommand : uint32_t {
kRaw = 0,
kConvert = 1,
kConstantOne = 2,
kNull = 3, // ?
};
// Subset of a2xx_sq_surfaceformat.
enum class ColorFormat : uint32_t {
kColor_8 = 2,
kColor_1_5_5_5 = 3,
kColor_5_6_5 = 4,
kColor_6_5_5 = 5,
kColor_8_8_8_8 = 6,
kColor_2_10_10_10 = 7,
kColor_8_A = 8,
kColor_8_B = 9,
kColor_8_8 = 10,
kColor_8_8_8_8_A = 14,
kColor_4_4_4_4 = 15,
kColor_10_11_11 = 16,
kColor_11_11_10 = 17,
kColor_16 = 24,
kColor_16_16 = 25,
kColor_16_16_16_16 = 26,
kColor_16_FLOAT = 30,
kColor_16_16_FLOAT = 31,
kColor_16_16_16_16_FLOAT = 32,
kColor_32_FLOAT = 36,
kColor_32_32_FLOAT = 37,
kColor_32_32_32_32_FLOAT = 38,
kColor_2_10_10_10_FLOAT = 62,
};
#define XE_GPU_MAKE_SWIZZLE(x, y, z, w) \
(((XE_GPU_SWIZZLE_##x) << 0) | ((XE_GPU_SWIZZLE_##y) << 3) | \
((XE_GPU_SWIZZLE_##z) << 6) | ((XE_GPU_SWIZZLE_##w) << 9))

View File

@ -369,14 +369,16 @@ SHIM_CALL VdSwap_shim(PPCContext* ppc_state, KernelState* state) {
uint32_t unk0 = SHIM_GET_ARG_32(0); // ptr into primary ringbuffer
uint32_t unk1 = SHIM_GET_ARG_32(1);
uint32_t unk2 = SHIM_GET_ARG_32(2);
uint32_t unk3 = SHIM_GET_ARG_32(3); // ptr to 0xBEEF0000
uint32_t unk4 = SHIM_GET_ARG_32(4); // 0xBEEF0001
uint32_t unk5 = SHIM_GET_ARG_32(5);
uint32_t unk6 = SHIM_GET_ARG_32(6); // ptr to 6?
uint32_t unk3 = SHIM_GET_ARG_32(3); // ptr to 0xBEEF0000
uint32_t unk4 = SHIM_GET_ARG_32(4); // 0xBEEF0001
uint32_t frontbuffer_ptr = SHIM_GET_ARG_32(5); // ptr to frontbuffer address
uint32_t unk6 = SHIM_GET_ARG_32(6); // ptr to 6?
uint32_t unk7 = SHIM_GET_ARG_32(7);
XELOGD("VdSwap(%.8X, %.8X, %.8X, %.8X, %.8X, %.8X, %.8X, %.8X)", unk0, unk1,
unk2, unk3, unk4, unk5, unk6, unk7);
uint32_t frontbuffer = SHIM_MEM_32(frontbuffer_ptr);
XELOGD("VdSwap(%.8X, %.8X, %.8X, %.8X, %.8X, %.8X(%.8X), %.8X, %.8X)", unk0,
unk1, unk2, unk3, unk4, frontbuffer_ptr, frontbuffer, unk6, unk7);
// The caller seems to reserve 64 words (256b) in the primary ringbuffer
// for this method to do what it needs. We just zero them out and send a
@ -385,8 +387,9 @@ SHIM_CALL VdSwap_shim(PPCContext* ppc_state, KernelState* state) {
// use this method.
memset(SHIM_MEM_ADDR(unk0), 0, 64 * 4);
auto dwords = reinterpret_cast<uint32_t*>(SHIM_MEM_ADDR(unk0));
dwords[0] = poly::byte_swap((0x03 << 30) | ((1 - 1) << 16) |
dwords[0] = poly::byte_swap((0x03 << 30) | ((63 - 1) << 16) |
(xe::gpu::xenos::PM4_XE_SWAP << 8));
dwords[1] = poly::byte_swap(frontbuffer);
SHIM_SET_RETURN_64(0);
}