Deciphering copy packets.

2014-12-24 03:58:04 -08:00 · 2014-12-24 03:58:04 -08:00 · 9c6be1edba
parent 3dffc72e59
commit 9c6be1edba
3 changed files with 197 additions and 24 deletions
--- a/src/xenia/gpu/gl4/command_processor.cc
+++ b/src/xenia/gpu/gl4/command_processor.cc
@ -592,8 +592,11 @@ bool CommandProcessor::ExecutePacketType3_XE_SWAP(RingbufferReader* reader,
  // VdSwap will post this to tell us we need to swap the screen/fire an
  // interrupt.
  XETRACECP("[%.8X] Packet(%.8X): PM4_XE_SWAP", packet_ptr, packet);
-  reader->TraceData(count);
-  reader->Advance(count);
+  // 63 words here, but only the first has any data.
+  reader->TraceData(1);
+  uint32_t frontbuffer_ptr = reader->Read();
+  // TODO(benvanik): something with the frontbuffer ptr.
+  reader->Advance(count - 1);
  if (swap_handler_) {
    swap_handler_();
  }
@ -1074,7 +1077,14 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
  if (enable_mode == ModeControl::kIgnore) {
    // Ignored.
    return true;
-  } else if (enable_mode == ModeControl::kCopy) {
+  }
+
+  if (!UpdateRenderTargets(draw_command)) {
+    PLOGE("Unable to setup render targets");
+    return false;
+  }
+
+  if (enable_mode == ModeControl::kCopy) {
    // Special copy handling.
    return IssueCopy(draw_command);
  }
@ -1083,10 +1093,6 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) {
    PLOGE("Unable to setup render state");
    return false;
  }
-  if (!UpdateRenderTargets(draw_command)) {
-    PLOGE("Unable to setup render targets");
-    return false;
-  }

  // if (!PopulateShaders(draw_command)) {
  //  XELOGE("Unable to prepare draw shaders");
@ -1470,9 +1476,123 @@ bool CommandProcessor::UpdateRenderTargets(DrawCommand* draw_command) {
 }

 bool CommandProcessor::IssueCopy(DrawCommand* draw_command) {
-  // uint32_t copy_dest_pitch = regs[XE_GPU_REG_RB_COPY_DEST_PITCH].u32;
-  // uint32_t copy_dest_height = (copy_dest_pitch >> 16) & 0x3FFF;
-  // copy_dest_pitch &= 0x3FFF;
+  auto& regs = *register_file_;
+
+  // This is used to resolve surfaces, taking them from EDRAM render targets
+  // to system memory. It can optionally clear color/depth surfaces, too.
+  // The command buffer has stuff for actually doing this by drawing, however
+  // we should be able to do it without that much easier.
+
+  uint32_t copy_control = regs[XE_GPU_REG_RB_COPY_CONTROL].u32;
+  // Render targets 0-3, 4 = depth
+  uint32_t copy_src_select = copy_control & 0x7;
+  bool color_clear_enabled = (copy_control >> 8) & 0x1;
+  bool depth_clear_enabled = (copy_control >> 9) & 0x1;
+  auto copy_command = static_cast<CopyCommand>((copy_control >> 20) & 0x3);
+
+  uint32_t copy_dest_info = regs[XE_GPU_REG_RB_COPY_DEST_INFO].u32;
+  auto copy_dest_endian = static_cast<Endian128>(copy_dest_info & 0x7);
+  uint32_t copy_dest_array = (copy_dest_info >> 3) & 0x1;
+  assert_true(copy_dest_array == 0);
+  uint32_t copy_dest_slice = (copy_dest_info >> 4) & 0x7;
+  assert_true(copy_dest_slice == 0);
+  auto copy_dest_format =
+      static_cast<ColorFormat>((copy_dest_info >> 7) & 0x3F);
+  uint32_t copy_dest_number = (copy_dest_info >> 13) & 0x7;
+  assert_true(copy_dest_number == 0);
+  uint32_t copy_dest_bias = (copy_dest_info >> 16) & 0x3F;
+  assert_true(copy_dest_bias == 0);
+  uint32_t copy_dest_swap = (copy_dest_info >> 25) & 0x1;
+
+  uint32_t copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32;
+  uint32_t copy_dest_pitch = regs[XE_GPU_REG_RB_COPY_DEST_PITCH].u32;
+  uint32_t copy_dest_height = (copy_dest_pitch >> 16) & 0x3FFF;
+  copy_dest_pitch &= 0x3FFF;
+
+  // None of this is supported yet:
+  uint32_t copy_surface_slice = regs[XE_GPU_REG_RB_COPY_SURFACE_SLICE].u32;
+  assert_true(copy_surface_slice == 0);
+  uint32_t copy_func = regs[XE_GPU_REG_RB_COPY_FUNC].u32;
+  assert_true(copy_func == 0);
+  uint32_t copy_ref = regs[XE_GPU_REG_RB_COPY_REF].u32;
+  assert_true(copy_ref == 0);
+  uint32_t copy_mask = regs[XE_GPU_REG_RB_COPY_MASK].u32;
+  assert_true(copy_mask == 0);
+
+  GLenum read_format;
+  GLenum read_type;
+  switch (copy_dest_format) {
+    case ColorFormat::kColor_8_8_8_8:
+      read_format = copy_dest_swap ? GL_BGRA : GL_RGBA;
+      read_type = GL_UNSIGNED_BYTE;
+      break;
+    default:
+      assert_unhandled_case(copy_dest_format);
+      return false;
+  }
+
+  // TODO(benvanik): swap channel ordering on copy_dest_swap
+  //                 Can we use GL swizzles for this?
+
+  // Swap byte order during read.
+  // TODO(benvanik): handle other endian modes.
+  switch (copy_dest_endian) {
+    case Endian128::kUnspecified:
+      glPixelStorei(GL_PACK_SWAP_BYTES, GL_FALSE);
+      break;
+    case Endian128::k8in32:
+      glPixelStorei(GL_PACK_SWAP_BYTES, GL_TRUE);
+      break;
+    default:
+      assert_unhandled_case(copy_dest_endian);
+      return false;
+  }
+
+  // Destination pointer in guest memory.
+  // We have GL throw bytes directly into it.
+  // TODO(benvanik): copy to staging texture then PBO back?
+  void* ptr = membase_ + GpuToCpu(copy_dest_base);
+
+  uint32_t x = 0;
+  uint32_t y = 0;
+  uint32_t w = copy_dest_pitch;
+  uint32_t h = copy_dest_height;
+  switch (copy_command) {
+    case CopyCommand::kConvert:
+      if (copy_src_select <= 3) {
+        // Source from a bound render target.
+        glReadBuffer(GL_COLOR_ATTACHMENT0 + copy_src_select);
+        glReadPixels(x, y, w, h, read_format, read_type, ptr);
+      } else {
+        // Source from the bound depth/stencil target.
+        glReadPixels(x, y, w, h, GL_DEPTH_STENCIL, read_type, ptr);
+      }
+      break;
+    case CopyCommand::kRaw:
+    case CopyCommand::kConstantOne:
+    case CopyCommand::kNull:
+    default:
+      assert_unhandled_case(copy_command);
+      return false;
+  }
+
+  if (color_clear_enabled || depth_clear_enabled) {
+    // Clear requested, so let's setup for that.
+    uint32_t copy_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32;
+    uint32_t copy_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32;
+    uint32_t copy_color_clear_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LOW].u32;
+    assert_true(copy_color_clear == copy_color_clear_low);
+
+    if (color_clear_enabled) {
+      // Clear the render target we selected for copy.
+      assert_true(copy_src_select < 3);
+    }
+
+    if (depth_clear_enabled) {
+      // Clear the current depth buffer.
+    }
+  }
+
  return true;
 }

@ -1567,11 +1687,11 @@ GLuint CommandProcessor::GetFramebuffer(GLuint color_targets[4],
  CachedFramebuffer* cached = nullptr;
  for (auto& it = cached_framebuffers_.begin();
       it != cached_framebuffers_.end(); ++it) {
-    if (it->depth_target == depth_target &&
-        it->color_targets[0] == color_targets[0] &&
-        it->color_targets[1] == color_targets[1] &&
-        it->color_targets[2] == color_targets[2] &&
-        it->color_targets[3] == color_targets[3]) {
+    if ((depth_target == -1u || it->depth_target == depth_target) &&
+        (color_targets[0] == -1u || it->color_targets[0] == color_targets[0]) &&
+        (color_targets[1] == -1u || it->color_targets[1] == color_targets[1]) &&
+        (color_targets[2] == -1u || it->color_targets[2] == color_targets[2]) &&
+        (color_targets[3] == -1u || it->color_targets[3] == color_targets[3])) {
      return it->framebuffer;
    }
  }
@ -1579,9 +1699,16 @@ GLuint CommandProcessor::GetFramebuffer(GLuint color_targets[4],
  cached = &cached_framebuffers_.back();
  glCreateFramebuffers(1, &cached->framebuffer);
  for (int i = 0; i < 4; ++i) {
-    cached->color_targets[i] = color_targets[i];
+    uint32_t color_target = color_targets[i];
+    if (color_target == -1u) {
+      color_target = 0;
+    }
+    cached->color_targets[i] = color_target;
    glNamedFramebufferTexture(cached->framebuffer, GL_COLOR_ATTACHMENT0 + i,
-                              color_targets[i], 0);
+                              color_target, 0);
+  }
+  if (depth_target == -1u) {
+    depth_target = 0;
  }
  cached->depth_target = depth_target;
  glNamedFramebufferTexture(cached->framebuffer, GL_DEPTH_STENCIL_ATTACHMENT,
--- a/src/xenia/gpu/xenos.h
+++ b/src/xenia/gpu/xenos.h
@ -51,6 +51,15 @@ enum class Endian : uint32_t {
  k16in32 = 3,
 };

+enum class Endian128 : uint32_t {
+  kUnspecified = 0,
+  k8in16 = 1,
+  k8in32 = 2,
+  k16in32 = 3,
+  k8in64 = 4,
+  k8in128 = 5,
+};
+
 enum class IndexFormat : uint32_t {
  kInt16,
  kInt32,
@ -80,6 +89,40 @@ enum class ModeControl : uint32_t {
  kCopy = 6,
 };

+enum class CopyCommand : uint32_t {
+  kRaw = 0,
+  kConvert = 1,
+  kConstantOne = 2,
+  kNull = 3,  // ?
+};
+
+// Subset of a2xx_sq_surfaceformat.
+enum class ColorFormat : uint32_t {
+  kColor_8 = 2,
+  kColor_1_5_5_5 = 3,
+  kColor_5_6_5 = 4,
+  kColor_6_5_5 = 5,
+  kColor_8_8_8_8 = 6,
+  kColor_2_10_10_10 = 7,
+  kColor_8_A = 8,
+  kColor_8_B = 9,
+  kColor_8_8 = 10,
+  kColor_8_8_8_8_A = 14,
+  kColor_4_4_4_4 = 15,
+  kColor_10_11_11 = 16,
+  kColor_11_11_10 = 17,
+  kColor_16 = 24,
+  kColor_16_16 = 25,
+  kColor_16_16_16_16 = 26,
+  kColor_16_FLOAT = 30,
+  kColor_16_16_FLOAT = 31,
+  kColor_16_16_16_16_FLOAT = 32,
+  kColor_32_FLOAT = 36,
+  kColor_32_32_FLOAT = 37,
+  kColor_32_32_32_32_FLOAT = 38,
+  kColor_2_10_10_10_FLOAT = 62,
+};
+
 #define XE_GPU_MAKE_SWIZZLE(x, y, z, w)                        \
  (((XE_GPU_SWIZZLE_##x) << 0) | ((XE_GPU_SWIZZLE_##y) << 3) | \
   ((XE_GPU_SWIZZLE_##z) << 6) | ((XE_GPU_SWIZZLE_##w) << 9))
--- a/src/xenia/kernel/xboxkrnl_video.cc
+++ b/src/xenia/kernel/xboxkrnl_video.cc
@ -369,14 +369,16 @@ SHIM_CALL VdSwap_shim(PPCContext* ppc_state, KernelState* state) {
  uint32_t unk0 = SHIM_GET_ARG_32(0);  // ptr into primary ringbuffer
  uint32_t unk1 = SHIM_GET_ARG_32(1);
  uint32_t unk2 = SHIM_GET_ARG_32(2);
-  uint32_t unk3 = SHIM_GET_ARG_32(3);  // ptr to 0xBEEF0000
-  uint32_t unk4 = SHIM_GET_ARG_32(4);  // 0xBEEF0001
-  uint32_t unk5 = SHIM_GET_ARG_32(5);
-  uint32_t unk6 = SHIM_GET_ARG_32(6);  // ptr to 6?
+  uint32_t unk3 = SHIM_GET_ARG_32(3);             // ptr to 0xBEEF0000
+  uint32_t unk4 = SHIM_GET_ARG_32(4);             // 0xBEEF0001
+  uint32_t frontbuffer_ptr = SHIM_GET_ARG_32(5);  // ptr to frontbuffer address
+  uint32_t unk6 = SHIM_GET_ARG_32(6);             // ptr to 6?
  uint32_t unk7 = SHIM_GET_ARG_32(7);

-  XELOGD("VdSwap(%.8X, %.8X, %.8X, %.8X, %.8X, %.8X, %.8X, %.8X)", unk0, unk1,
-         unk2, unk3, unk4, unk5, unk6, unk7);
+  uint32_t frontbuffer = SHIM_MEM_32(frontbuffer_ptr);
+
+  XELOGD("VdSwap(%.8X, %.8X, %.8X, %.8X, %.8X, %.8X(%.8X), %.8X, %.8X)", unk0,
+         unk1, unk2, unk3, unk4, frontbuffer_ptr, frontbuffer, unk6, unk7);

  // The caller seems to reserve 64 words (256b) in the primary ringbuffer
  // for this method to do what it needs. We just zero them out and send a
@ -385,8 +387,9 @@ SHIM_CALL VdSwap_shim(PPCContext* ppc_state, KernelState* state) {
  // use this method.
  memset(SHIM_MEM_ADDR(unk0), 0, 64 * 4);
  auto dwords = reinterpret_cast<uint32_t*>(SHIM_MEM_ADDR(unk0));
-  dwords[0] = poly::byte_swap((0x03 << 30) | ((1 - 1) << 16) |
+  dwords[0] = poly::byte_swap((0x03 << 30) | ((63 - 1) << 16) |
                              (xe::gpu::xenos::PM4_XE_SWAP << 8));
+  dwords[1] = poly::byte_swap(frontbuffer);

  SHIM_SET_RETURN_64(0);
 }