Deciphering copy packets.
This commit is contained in:
parent
3dffc72e59
commit
9c6be1edba
|
@ -592,8 +592,11 @@ bool CommandProcessor::ExecutePacketType3_XE_SWAP(RingbufferReader* reader,
|
|||
// VdSwap will post this to tell us we need to swap the screen/fire an
|
||||
// interrupt.
|
||||
XETRACECP("[%.8X] Packet(%.8X): PM4_XE_SWAP", packet_ptr, packet);
|
||||
reader->TraceData(count);
|
||||
reader->Advance(count);
|
||||
// 63 words here, but only the first has any data.
|
||||
reader->TraceData(1);
|
||||
uint32_t frontbuffer_ptr = reader->Read();
|
||||
// TODO(benvanik): something with the frontbuffer ptr.
|
||||
reader->Advance(count - 1);
|
||||
if (swap_handler_) {
|
||||
swap_handler_();
|
||||
}
|
||||
|
@ -1074,7 +1077,14 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
|
|||
if (enable_mode == ModeControl::kIgnore) {
|
||||
// Ignored.
|
||||
return true;
|
||||
} else if (enable_mode == ModeControl::kCopy) {
|
||||
}
|
||||
|
||||
if (!UpdateRenderTargets(draw_command)) {
|
||||
PLOGE("Unable to setup render targets");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (enable_mode == ModeControl::kCopy) {
|
||||
// Special copy handling.
|
||||
return IssueCopy(draw_command);
|
||||
}
|
||||
|
@ -1083,10 +1093,6 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
|
|||
PLOGE("Unable to setup render state");
|
||||
return false;
|
||||
}
|
||||
if (!UpdateRenderTargets(draw_command)) {
|
||||
PLOGE("Unable to setup render targets");
|
||||
return false;
|
||||
}
|
||||
|
||||
// if (!PopulateShaders(draw_command)) {
|
||||
// XELOGE("Unable to prepare draw shaders");
|
||||
|
@ -1470,9 +1476,123 @@ bool CommandProcessor::UpdateRenderTargets(DrawCommand* draw_command) {
|
|||
}
|
||||
|
||||
bool CommandProcessor::IssueCopy(DrawCommand* draw_command) {
|
||||
// uint32_t copy_dest_pitch = regs[XE_GPU_REG_RB_COPY_DEST_PITCH].u32;
|
||||
// uint32_t copy_dest_height = (copy_dest_pitch >> 16) & 0x3FFF;
|
||||
// copy_dest_pitch &= 0x3FFF;
|
||||
auto& regs = *register_file_;
|
||||
|
||||
// This is used to resolve surfaces, taking them from EDRAM render targets
|
||||
// to system memory. It can optionally clear color/depth surfaces, too.
|
||||
// The command buffer has stuff for actually doing this by drawing, however
|
||||
// we should be able to do it without that much easier.
|
||||
|
||||
uint32_t copy_control = regs[XE_GPU_REG_RB_COPY_CONTROL].u32;
|
||||
// Render targets 0-3, 4 = depth
|
||||
uint32_t copy_src_select = copy_control & 0x7;
|
||||
bool color_clear_enabled = (copy_control >> 8) & 0x1;
|
||||
bool depth_clear_enabled = (copy_control >> 9) & 0x1;
|
||||
auto copy_command = static_cast<CopyCommand>((copy_control >> 20) & 0x3);
|
||||
|
||||
uint32_t copy_dest_info = regs[XE_GPU_REG_RB_COPY_DEST_INFO].u32;
|
||||
auto copy_dest_endian = static_cast<Endian128>(copy_dest_info & 0x7);
|
||||
uint32_t copy_dest_array = (copy_dest_info >> 3) & 0x1;
|
||||
assert_true(copy_dest_array == 0);
|
||||
uint32_t copy_dest_slice = (copy_dest_info >> 4) & 0x7;
|
||||
assert_true(copy_dest_slice == 0);
|
||||
auto copy_dest_format =
|
||||
static_cast<ColorFormat>((copy_dest_info >> 7) & 0x3F);
|
||||
uint32_t copy_dest_number = (copy_dest_info >> 13) & 0x7;
|
||||
assert_true(copy_dest_number == 0);
|
||||
uint32_t copy_dest_bias = (copy_dest_info >> 16) & 0x3F;
|
||||
assert_true(copy_dest_bias == 0);
|
||||
uint32_t copy_dest_swap = (copy_dest_info >> 25) & 0x1;
|
||||
|
||||
uint32_t copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32;
|
||||
uint32_t copy_dest_pitch = regs[XE_GPU_REG_RB_COPY_DEST_PITCH].u32;
|
||||
uint32_t copy_dest_height = (copy_dest_pitch >> 16) & 0x3FFF;
|
||||
copy_dest_pitch &= 0x3FFF;
|
||||
|
||||
// None of this is supported yet:
|
||||
uint32_t copy_surface_slice = regs[XE_GPU_REG_RB_COPY_SURFACE_SLICE].u32;
|
||||
assert_true(copy_surface_slice == 0);
|
||||
uint32_t copy_func = regs[XE_GPU_REG_RB_COPY_FUNC].u32;
|
||||
assert_true(copy_func == 0);
|
||||
uint32_t copy_ref = regs[XE_GPU_REG_RB_COPY_REF].u32;
|
||||
assert_true(copy_ref == 0);
|
||||
uint32_t copy_mask = regs[XE_GPU_REG_RB_COPY_MASK].u32;
|
||||
assert_true(copy_mask == 0);
|
||||
|
||||
GLenum read_format;
|
||||
GLenum read_type;
|
||||
switch (copy_dest_format) {
|
||||
case ColorFormat::kColor_8_8_8_8:
|
||||
read_format = copy_dest_swap ? GL_BGRA : GL_RGBA;
|
||||
read_type = GL_UNSIGNED_BYTE;
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(copy_dest_format);
|
||||
return false;
|
||||
}
|
||||
|
||||
// TODO(benvanik): swap channel ordering on copy_dest_swap
|
||||
// Can we use GL swizzles for this?
|
||||
|
||||
// Swap byte order during read.
|
||||
// TODO(benvanik): handle other endian modes.
|
||||
switch (copy_dest_endian) {
|
||||
case Endian128::kUnspecified:
|
||||
glPixelStorei(GL_PACK_SWAP_BYTES, GL_FALSE);
|
||||
break;
|
||||
case Endian128::k8in32:
|
||||
glPixelStorei(GL_PACK_SWAP_BYTES, GL_TRUE);
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(copy_dest_endian);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Destination pointer in guest memory.
|
||||
// We have GL throw bytes directly into it.
|
||||
// TODO(benvanik): copy to staging texture then PBO back?
|
||||
void* ptr = membase_ + GpuToCpu(copy_dest_base);
|
||||
|
||||
uint32_t x = 0;
|
||||
uint32_t y = 0;
|
||||
uint32_t w = copy_dest_pitch;
|
||||
uint32_t h = copy_dest_height;
|
||||
switch (copy_command) {
|
||||
case CopyCommand::kConvert:
|
||||
if (copy_src_select <= 3) {
|
||||
// Source from a bound render target.
|
||||
glReadBuffer(GL_COLOR_ATTACHMENT0 + copy_src_select);
|
||||
glReadPixels(x, y, w, h, read_format, read_type, ptr);
|
||||
} else {
|
||||
// Source from the bound depth/stencil target.
|
||||
glReadPixels(x, y, w, h, GL_DEPTH_STENCIL, read_type, ptr);
|
||||
}
|
||||
break;
|
||||
case CopyCommand::kRaw:
|
||||
case CopyCommand::kConstantOne:
|
||||
case CopyCommand::kNull:
|
||||
default:
|
||||
assert_unhandled_case(copy_command);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (color_clear_enabled || depth_clear_enabled) {
|
||||
// Clear requested, so let's setup for that.
|
||||
uint32_t copy_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32;
|
||||
uint32_t copy_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32;
|
||||
uint32_t copy_color_clear_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LOW].u32;
|
||||
assert_true(copy_color_clear == copy_color_clear_low);
|
||||
|
||||
if (color_clear_enabled) {
|
||||
// Clear the render target we selected for copy.
|
||||
assert_true(copy_src_select < 3);
|
||||
}
|
||||
|
||||
if (depth_clear_enabled) {
|
||||
// Clear the current depth buffer.
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1567,11 +1687,11 @@ GLuint CommandProcessor::GetFramebuffer(GLuint color_targets[4],
|
|||
CachedFramebuffer* cached = nullptr;
|
||||
for (auto& it = cached_framebuffers_.begin();
|
||||
it != cached_framebuffers_.end(); ++it) {
|
||||
if (it->depth_target == depth_target &&
|
||||
it->color_targets[0] == color_targets[0] &&
|
||||
it->color_targets[1] == color_targets[1] &&
|
||||
it->color_targets[2] == color_targets[2] &&
|
||||
it->color_targets[3] == color_targets[3]) {
|
||||
if ((depth_target == -1u || it->depth_target == depth_target) &&
|
||||
(color_targets[0] == -1u || it->color_targets[0] == color_targets[0]) &&
|
||||
(color_targets[1] == -1u || it->color_targets[1] == color_targets[1]) &&
|
||||
(color_targets[2] == -1u || it->color_targets[2] == color_targets[2]) &&
|
||||
(color_targets[3] == -1u || it->color_targets[3] == color_targets[3])) {
|
||||
return it->framebuffer;
|
||||
}
|
||||
}
|
||||
|
@ -1579,9 +1699,16 @@ GLuint CommandProcessor::GetFramebuffer(GLuint color_targets[4],
|
|||
cached = &cached_framebuffers_.back();
|
||||
glCreateFramebuffers(1, &cached->framebuffer);
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
cached->color_targets[i] = color_targets[i];
|
||||
uint32_t color_target = color_targets[i];
|
||||
if (color_target == -1u) {
|
||||
color_target = 0;
|
||||
}
|
||||
cached->color_targets[i] = color_target;
|
||||
glNamedFramebufferTexture(cached->framebuffer, GL_COLOR_ATTACHMENT0 + i,
|
||||
color_targets[i], 0);
|
||||
color_target, 0);
|
||||
}
|
||||
if (depth_target == -1u) {
|
||||
depth_target = 0;
|
||||
}
|
||||
cached->depth_target = depth_target;
|
||||
glNamedFramebufferTexture(cached->framebuffer, GL_DEPTH_STENCIL_ATTACHMENT,
|
||||
|
|
|
@ -51,6 +51,15 @@ enum class Endian : uint32_t {
|
|||
k16in32 = 3,
|
||||
};
|
||||
|
||||
enum class Endian128 : uint32_t {
|
||||
kUnspecified = 0,
|
||||
k8in16 = 1,
|
||||
k8in32 = 2,
|
||||
k16in32 = 3,
|
||||
k8in64 = 4,
|
||||
k8in128 = 5,
|
||||
};
|
||||
|
||||
enum class IndexFormat : uint32_t {
|
||||
kInt16,
|
||||
kInt32,
|
||||
|
@ -80,6 +89,40 @@ enum class ModeControl : uint32_t {
|
|||
kCopy = 6,
|
||||
};
|
||||
|
||||
enum class CopyCommand : uint32_t {
|
||||
kRaw = 0,
|
||||
kConvert = 1,
|
||||
kConstantOne = 2,
|
||||
kNull = 3, // ?
|
||||
};
|
||||
|
||||
// Subset of a2xx_sq_surfaceformat.
|
||||
enum class ColorFormat : uint32_t {
|
||||
kColor_8 = 2,
|
||||
kColor_1_5_5_5 = 3,
|
||||
kColor_5_6_5 = 4,
|
||||
kColor_6_5_5 = 5,
|
||||
kColor_8_8_8_8 = 6,
|
||||
kColor_2_10_10_10 = 7,
|
||||
kColor_8_A = 8,
|
||||
kColor_8_B = 9,
|
||||
kColor_8_8 = 10,
|
||||
kColor_8_8_8_8_A = 14,
|
||||
kColor_4_4_4_4 = 15,
|
||||
kColor_10_11_11 = 16,
|
||||
kColor_11_11_10 = 17,
|
||||
kColor_16 = 24,
|
||||
kColor_16_16 = 25,
|
||||
kColor_16_16_16_16 = 26,
|
||||
kColor_16_FLOAT = 30,
|
||||
kColor_16_16_FLOAT = 31,
|
||||
kColor_16_16_16_16_FLOAT = 32,
|
||||
kColor_32_FLOAT = 36,
|
||||
kColor_32_32_FLOAT = 37,
|
||||
kColor_32_32_32_32_FLOAT = 38,
|
||||
kColor_2_10_10_10_FLOAT = 62,
|
||||
};
|
||||
|
||||
#define XE_GPU_MAKE_SWIZZLE(x, y, z, w) \
|
||||
(((XE_GPU_SWIZZLE_##x) << 0) | ((XE_GPU_SWIZZLE_##y) << 3) | \
|
||||
((XE_GPU_SWIZZLE_##z) << 6) | ((XE_GPU_SWIZZLE_##w) << 9))
|
||||
|
|
|
@ -369,14 +369,16 @@ SHIM_CALL VdSwap_shim(PPCContext* ppc_state, KernelState* state) {
|
|||
uint32_t unk0 = SHIM_GET_ARG_32(0); // ptr into primary ringbuffer
|
||||
uint32_t unk1 = SHIM_GET_ARG_32(1);
|
||||
uint32_t unk2 = SHIM_GET_ARG_32(2);
|
||||
uint32_t unk3 = SHIM_GET_ARG_32(3); // ptr to 0xBEEF0000
|
||||
uint32_t unk4 = SHIM_GET_ARG_32(4); // 0xBEEF0001
|
||||
uint32_t unk5 = SHIM_GET_ARG_32(5);
|
||||
uint32_t unk6 = SHIM_GET_ARG_32(6); // ptr to 6?
|
||||
uint32_t unk3 = SHIM_GET_ARG_32(3); // ptr to 0xBEEF0000
|
||||
uint32_t unk4 = SHIM_GET_ARG_32(4); // 0xBEEF0001
|
||||
uint32_t frontbuffer_ptr = SHIM_GET_ARG_32(5); // ptr to frontbuffer address
|
||||
uint32_t unk6 = SHIM_GET_ARG_32(6); // ptr to 6?
|
||||
uint32_t unk7 = SHIM_GET_ARG_32(7);
|
||||
|
||||
XELOGD("VdSwap(%.8X, %.8X, %.8X, %.8X, %.8X, %.8X, %.8X, %.8X)", unk0, unk1,
|
||||
unk2, unk3, unk4, unk5, unk6, unk7);
|
||||
uint32_t frontbuffer = SHIM_MEM_32(frontbuffer_ptr);
|
||||
|
||||
XELOGD("VdSwap(%.8X, %.8X, %.8X, %.8X, %.8X, %.8X(%.8X), %.8X, %.8X)", unk0,
|
||||
unk1, unk2, unk3, unk4, frontbuffer_ptr, frontbuffer, unk6, unk7);
|
||||
|
||||
// The caller seems to reserve 64 words (256b) in the primary ringbuffer
|
||||
// for this method to do what it needs. We just zero them out and send a
|
||||
|
@ -385,8 +387,9 @@ SHIM_CALL VdSwap_shim(PPCContext* ppc_state, KernelState* state) {
|
|||
// use this method.
|
||||
memset(SHIM_MEM_ADDR(unk0), 0, 64 * 4);
|
||||
auto dwords = reinterpret_cast<uint32_t*>(SHIM_MEM_ADDR(unk0));
|
||||
dwords[0] = poly::byte_swap((0x03 << 30) | ((1 - 1) << 16) |
|
||||
dwords[0] = poly::byte_swap((0x03 << 30) | ((63 - 1) << 16) |
|
||||
(xe::gpu::xenos::PM4_XE_SWAP << 8));
|
||||
dwords[1] = poly::byte_swap(frontbuffer);
|
||||
|
||||
SHIM_SET_RETURN_64(0);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue