diff --git a/src/xenia/gpu/gl4/command_processor.cc b/src/xenia/gpu/gl4/command_processor.cc index 628b2d871..27247288a 100644 --- a/src/xenia/gpu/gl4/command_processor.cc +++ b/src/xenia/gpu/gl4/command_processor.cc @@ -592,8 +592,11 @@ bool CommandProcessor::ExecutePacketType3_XE_SWAP(RingbufferReader* reader, // VdSwap will post this to tell us we need to swap the screen/fire an // interrupt. XETRACECP("[%.8X] Packet(%.8X): PM4_XE_SWAP", packet_ptr, packet); - reader->TraceData(count); - reader->Advance(count); + // 63 words here, but only the first has any data. + reader->TraceData(1); + uint32_t frontbuffer_ptr = reader->Read(); + // TODO(benvanik): something with the frontbuffer ptr. + reader->Advance(count - 1); if (swap_handler_) { swap_handler_(); } @@ -1074,7 +1077,14 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) { if (enable_mode == ModeControl::kIgnore) { // Ignored. return true; - } else if (enable_mode == ModeControl::kCopy) { + } + + if (!UpdateRenderTargets(draw_command)) { + PLOGE("Unable to setup render targets"); + return false; + } + + if (enable_mode == ModeControl::kCopy) { // Special copy handling. return IssueCopy(draw_command); } @@ -1083,10 +1093,6 @@ bool CommandProcessor::IssueDraw(DrawCommand* draw_command) { PLOGE("Unable to setup render state"); return false; } - if (!UpdateRenderTargets(draw_command)) { - PLOGE("Unable to setup render targets"); - return false; - } // if (!PopulateShaders(draw_command)) { // XELOGE("Unable to prepare draw shaders"); @@ -1470,9 +1476,123 @@ bool CommandProcessor::UpdateRenderTargets(DrawCommand* draw_command) { } bool CommandProcessor::IssueCopy(DrawCommand* draw_command) { - // uint32_t copy_dest_pitch = regs[XE_GPU_REG_RB_COPY_DEST_PITCH].u32; - // uint32_t copy_dest_height = (copy_dest_pitch >> 16) & 0x3FFF; - // copy_dest_pitch &= 0x3FFF; + auto& regs = *register_file_; + + // This is used to resolve surfaces, taking them from EDRAM render targets + // to system memory. It can optionally clear color/depth surfaces, too. + // The command buffer has stuff for actually doing this by drawing, however + // we should be able to do it without that much easier. + + uint32_t copy_control = regs[XE_GPU_REG_RB_COPY_CONTROL].u32; + // Render targets 0-3, 4 = depth + uint32_t copy_src_select = copy_control & 0x7; + bool color_clear_enabled = (copy_control >> 8) & 0x1; + bool depth_clear_enabled = (copy_control >> 9) & 0x1; + auto copy_command = static_cast((copy_control >> 20) & 0x3); + + uint32_t copy_dest_info = regs[XE_GPU_REG_RB_COPY_DEST_INFO].u32; + auto copy_dest_endian = static_cast(copy_dest_info & 0x7); + uint32_t copy_dest_array = (copy_dest_info >> 3) & 0x1; + assert_true(copy_dest_array == 0); + uint32_t copy_dest_slice = (copy_dest_info >> 4) & 0x7; + assert_true(copy_dest_slice == 0); + auto copy_dest_format = + static_cast((copy_dest_info >> 7) & 0x3F); + uint32_t copy_dest_number = (copy_dest_info >> 13) & 0x7; + assert_true(copy_dest_number == 0); + uint32_t copy_dest_bias = (copy_dest_info >> 16) & 0x3F; + assert_true(copy_dest_bias == 0); + uint32_t copy_dest_swap = (copy_dest_info >> 25) & 0x1; + + uint32_t copy_dest_base = regs[XE_GPU_REG_RB_COPY_DEST_BASE].u32; + uint32_t copy_dest_pitch = regs[XE_GPU_REG_RB_COPY_DEST_PITCH].u32; + uint32_t copy_dest_height = (copy_dest_pitch >> 16) & 0x3FFF; + copy_dest_pitch &= 0x3FFF; + + // None of this is supported yet: + uint32_t copy_surface_slice = regs[XE_GPU_REG_RB_COPY_SURFACE_SLICE].u32; + assert_true(copy_surface_slice == 0); + uint32_t copy_func = regs[XE_GPU_REG_RB_COPY_FUNC].u32; + assert_true(copy_func == 0); + uint32_t copy_ref = regs[XE_GPU_REG_RB_COPY_REF].u32; + assert_true(copy_ref == 0); + uint32_t copy_mask = regs[XE_GPU_REG_RB_COPY_MASK].u32; + assert_true(copy_mask == 0); + + GLenum read_format; + GLenum read_type; + switch (copy_dest_format) { + case ColorFormat::kColor_8_8_8_8: + read_format = copy_dest_swap ? GL_BGRA : GL_RGBA; + read_type = GL_UNSIGNED_BYTE; + break; + default: + assert_unhandled_case(copy_dest_format); + return false; + } + + // TODO(benvanik): swap channel ordering on copy_dest_swap + // Can we use GL swizzles for this? + + // Swap byte order during read. + // TODO(benvanik): handle other endian modes. + switch (copy_dest_endian) { + case Endian128::kUnspecified: + glPixelStorei(GL_PACK_SWAP_BYTES, GL_FALSE); + break; + case Endian128::k8in32: + glPixelStorei(GL_PACK_SWAP_BYTES, GL_TRUE); + break; + default: + assert_unhandled_case(copy_dest_endian); + return false; + } + + // Destination pointer in guest memory. + // We have GL throw bytes directly into it. + // TODO(benvanik): copy to staging texture then PBO back? + void* ptr = membase_ + GpuToCpu(copy_dest_base); + + uint32_t x = 0; + uint32_t y = 0; + uint32_t w = copy_dest_pitch; + uint32_t h = copy_dest_height; + switch (copy_command) { + case CopyCommand::kConvert: + if (copy_src_select <= 3) { + // Source from a bound render target. + glReadBuffer(GL_COLOR_ATTACHMENT0 + copy_src_select); + glReadPixels(x, y, w, h, read_format, read_type, ptr); + } else { + // Source from the bound depth/stencil target. + glReadPixels(x, y, w, h, GL_DEPTH_STENCIL, read_type, ptr); + } + break; + case CopyCommand::kRaw: + case CopyCommand::kConstantOne: + case CopyCommand::kNull: + default: + assert_unhandled_case(copy_command); + return false; + } + + if (color_clear_enabled || depth_clear_enabled) { + // Clear requested, so let's setup for that. + uint32_t copy_depth_clear = regs[XE_GPU_REG_RB_DEPTH_CLEAR].u32; + uint32_t copy_color_clear = regs[XE_GPU_REG_RB_COLOR_CLEAR].u32; + uint32_t copy_color_clear_low = regs[XE_GPU_REG_RB_COLOR_CLEAR_LOW].u32; + assert_true(copy_color_clear == copy_color_clear_low); + + if (color_clear_enabled) { + // Clear the render target we selected for copy. + assert_true(copy_src_select < 3); + } + + if (depth_clear_enabled) { + // Clear the current depth buffer. + } + } + return true; } @@ -1567,11 +1687,11 @@ GLuint CommandProcessor::GetFramebuffer(GLuint color_targets[4], CachedFramebuffer* cached = nullptr; for (auto& it = cached_framebuffers_.begin(); it != cached_framebuffers_.end(); ++it) { - if (it->depth_target == depth_target && - it->color_targets[0] == color_targets[0] && - it->color_targets[1] == color_targets[1] && - it->color_targets[2] == color_targets[2] && - it->color_targets[3] == color_targets[3]) { + if ((depth_target == -1u || it->depth_target == depth_target) && + (color_targets[0] == -1u || it->color_targets[0] == color_targets[0]) && + (color_targets[1] == -1u || it->color_targets[1] == color_targets[1]) && + (color_targets[2] == -1u || it->color_targets[2] == color_targets[2]) && + (color_targets[3] == -1u || it->color_targets[3] == color_targets[3])) { return it->framebuffer; } } @@ -1579,9 +1699,16 @@ GLuint CommandProcessor::GetFramebuffer(GLuint color_targets[4], cached = &cached_framebuffers_.back(); glCreateFramebuffers(1, &cached->framebuffer); for (int i = 0; i < 4; ++i) { - cached->color_targets[i] = color_targets[i]; + uint32_t color_target = color_targets[i]; + if (color_target == -1u) { + color_target = 0; + } + cached->color_targets[i] = color_target; glNamedFramebufferTexture(cached->framebuffer, GL_COLOR_ATTACHMENT0 + i, - color_targets[i], 0); + color_target, 0); + } + if (depth_target == -1u) { + depth_target = 0; } cached->depth_target = depth_target; glNamedFramebufferTexture(cached->framebuffer, GL_DEPTH_STENCIL_ATTACHMENT, diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index 54e706688..97c48254c 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -51,6 +51,15 @@ enum class Endian : uint32_t { k16in32 = 3, }; +enum class Endian128 : uint32_t { + kUnspecified = 0, + k8in16 = 1, + k8in32 = 2, + k16in32 = 3, + k8in64 = 4, + k8in128 = 5, +}; + enum class IndexFormat : uint32_t { kInt16, kInt32, @@ -80,6 +89,40 @@ enum class ModeControl : uint32_t { kCopy = 6, }; +enum class CopyCommand : uint32_t { + kRaw = 0, + kConvert = 1, + kConstantOne = 2, + kNull = 3, // ? +}; + +// Subset of a2xx_sq_surfaceformat. +enum class ColorFormat : uint32_t { + kColor_8 = 2, + kColor_1_5_5_5 = 3, + kColor_5_6_5 = 4, + kColor_6_5_5 = 5, + kColor_8_8_8_8 = 6, + kColor_2_10_10_10 = 7, + kColor_8_A = 8, + kColor_8_B = 9, + kColor_8_8 = 10, + kColor_8_8_8_8_A = 14, + kColor_4_4_4_4 = 15, + kColor_10_11_11 = 16, + kColor_11_11_10 = 17, + kColor_16 = 24, + kColor_16_16 = 25, + kColor_16_16_16_16 = 26, + kColor_16_FLOAT = 30, + kColor_16_16_FLOAT = 31, + kColor_16_16_16_16_FLOAT = 32, + kColor_32_FLOAT = 36, + kColor_32_32_FLOAT = 37, + kColor_32_32_32_32_FLOAT = 38, + kColor_2_10_10_10_FLOAT = 62, +}; + #define XE_GPU_MAKE_SWIZZLE(x, y, z, w) \ (((XE_GPU_SWIZZLE_##x) << 0) | ((XE_GPU_SWIZZLE_##y) << 3) | \ ((XE_GPU_SWIZZLE_##z) << 6) | ((XE_GPU_SWIZZLE_##w) << 9)) diff --git a/src/xenia/kernel/xboxkrnl_video.cc b/src/xenia/kernel/xboxkrnl_video.cc index e21d02ace..36bc76069 100644 --- a/src/xenia/kernel/xboxkrnl_video.cc +++ b/src/xenia/kernel/xboxkrnl_video.cc @@ -369,14 +369,16 @@ SHIM_CALL VdSwap_shim(PPCContext* ppc_state, KernelState* state) { uint32_t unk0 = SHIM_GET_ARG_32(0); // ptr into primary ringbuffer uint32_t unk1 = SHIM_GET_ARG_32(1); uint32_t unk2 = SHIM_GET_ARG_32(2); - uint32_t unk3 = SHIM_GET_ARG_32(3); // ptr to 0xBEEF0000 - uint32_t unk4 = SHIM_GET_ARG_32(4); // 0xBEEF0001 - uint32_t unk5 = SHIM_GET_ARG_32(5); - uint32_t unk6 = SHIM_GET_ARG_32(6); // ptr to 6? + uint32_t unk3 = SHIM_GET_ARG_32(3); // ptr to 0xBEEF0000 + uint32_t unk4 = SHIM_GET_ARG_32(4); // 0xBEEF0001 + uint32_t frontbuffer_ptr = SHIM_GET_ARG_32(5); // ptr to frontbuffer address + uint32_t unk6 = SHIM_GET_ARG_32(6); // ptr to 6? uint32_t unk7 = SHIM_GET_ARG_32(7); - XELOGD("VdSwap(%.8X, %.8X, %.8X, %.8X, %.8X, %.8X, %.8X, %.8X)", unk0, unk1, - unk2, unk3, unk4, unk5, unk6, unk7); + uint32_t frontbuffer = SHIM_MEM_32(frontbuffer_ptr); + + XELOGD("VdSwap(%.8X, %.8X, %.8X, %.8X, %.8X, %.8X(%.8X), %.8X, %.8X)", unk0, + unk1, unk2, unk3, unk4, frontbuffer_ptr, frontbuffer, unk6, unk7); // The caller seems to reserve 64 words (256b) in the primary ringbuffer // for this method to do what it needs. We just zero them out and send a @@ -385,8 +387,9 @@ SHIM_CALL VdSwap_shim(PPCContext* ppc_state, KernelState* state) { // use this method. memset(SHIM_MEM_ADDR(unk0), 0, 64 * 4); auto dwords = reinterpret_cast(SHIM_MEM_ADDR(unk0)); - dwords[0] = poly::byte_swap((0x03 << 30) | ((1 - 1) << 16) | + dwords[0] = poly::byte_swap((0x03 << 30) | ((63 - 1) << 16) | (xe::gpu::xenos::PM4_XE_SWAP << 8)); + dwords[1] = poly::byte_swap(frontbuffer); SHIM_SET_RETURN_64(0); }