diff --git a/src/xenia/app/emulator_window.cc b/src/xenia/app/emulator_window.cc index 3367dc228..90257d45d 100644 --- a/src/xenia/app/emulator_window.cc +++ b/src/xenia/app/emulator_window.cc @@ -1147,8 +1147,8 @@ const std::map controller_hotkey_map = { "A + Guide = Toggle Readback Resolve", true)}, {X_INPUT_GAMEPAD_B | X_INPUT_GAMEPAD_GUIDE, EmulatorWindow::ControllerHotKey( - EmulatorWindow::ButtonFunctions::CloseWindow, - "B + Guide = Close Window")}, + EmulatorWindow::ButtonFunctions::ToggleLogging, + "B + Guide = Toggle between loglevel set in config and the 'Disabled' loglevel.", true, true)}, {X_INPUT_GAMEPAD_Y | X_INPUT_GAMEPAD_GUIDE, EmulatorWindow::ControllerHotKey( EmulatorWindow::ButtonFunctions::ToggleFullscreen, @@ -1190,7 +1190,7 @@ const std::map controller_hotkey_map = { "Start = Run Selected Title", false, false)}, {X_INPUT_GAMEPAD_BACK | X_INPUT_GAMEPAD_START, EmulatorWindow::ControllerHotKey( - EmulatorWindow::ButtonFunctions::CloseWindow, + EmulatorWindow::ButtonFunctions::ToggleLogging, "Back + Start = Close Window", false, false)}, {X_INPUT_GAMEPAD_DPAD_DOWN, EmulatorWindow::ControllerHotKey( @@ -1296,8 +1296,8 @@ EmulatorWindow::ControllerHotKey EmulatorWindow::ProcessControllerHotkey( case ButtonFunctions::DecTitleSelect: selected_title_index--; break; - case ButtonFunctions::CloseWindow: - window_->RequestClose(); + case ButtonFunctions::ToggleLogging: + logging::internal::ToggleLogLevel(); break; case ButtonFunctions::Unknown: default: diff --git a/src/xenia/app/emulator_window.h b/src/xenia/app/emulator_window.h index e1bf297b4..b25941826 100644 --- a/src/xenia/app/emulator_window.h +++ b/src/xenia/app/emulator_window.h @@ -85,7 +85,7 @@ class EmulatorWindow { ToggleControllerVibration, ClearMemoryPageState, ReadbackResolve, - CloseWindow, + ToggleLogging, IncTitleSelect, DecTitleSelect, Unknown diff --git a/src/xenia/apu/xma_decoder.cc b/src/xenia/apu/xma_decoder.cc index eac5d3d53..6562266fa 100644 --- a/src/xenia/apu/xma_decoder.cc +++ b/src/xenia/apu/xma_decoder.cc @@ -101,7 +101,8 @@ void av_log_callback(void* avcl, int level, const char* fmt, va_list va) { StringBuffer buff; buff.AppendVarargs(fmt, va); - xe::logging::AppendLogLineFormat(log_level, level_char, "ffmpeg: {}", + xe::logging::AppendLogLineFormat(LogSrc::Apu, log_level, level_char, + "ffmpeg: {}", buff.to_string_view()); } diff --git a/src/xenia/base/logging.cc b/src/xenia/base/logging.cc index 08463bb0b..eec0fb93b 100644 --- a/src/xenia/base/logging.cc +++ b/src/xenia/base/logging.cc @@ -55,6 +55,12 @@ DEFINE_bool(log_to_debugprint, false, "Dump the log to DebugPrint.", "Logging"); #endif // XE_PLATFORM_ANDROID DEFINE_bool(flush_log, true, "Flush log file after each log line batch.", "Logging"); + +DEFINE_uint32(log_mask, 0, + "Disables specific categorizes for more granular debug logging. " + "Kernel = 1, Apu = 2, Cpu = 4.", + "Logging"); + DEFINE_int32( log_level, 2, "Maximum level to be logged. (0=error, 1=warning, 2=info, 3=debug)", @@ -465,8 +471,16 @@ void ShutdownLogging() { memory::AlignedFree(logger); } -bool logging::internal::ShouldLog(LogLevel log_level) { - return static_cast(log_level) <= cvars::log_level; +static int g_saved_loglevel = static_cast(LogLevel::Disabled); +void logging::internal::ToggleLogLevel() { + auto swap = g_saved_loglevel; + + g_saved_loglevel = cvars::log_level; + cvars::log_level = swap; +} +bool logging::internal::ShouldLog(LogLevel log_level, uint32_t log_mask) { + return static_cast(log_level) <= cvars::log_level && + (log_mask & cvars::log_mask) == 0; } std::pair logging::internal::GetThreadBuffer() { @@ -483,8 +497,8 @@ void logging::internal::AppendLogLine(LogLevel log_level, } void logging::AppendLogLine(LogLevel log_level, const char prefix_char, - const std::string_view str) { - if (!internal::ShouldLog(log_level) || !str.size()) { + const std::string_view str, uint32_t log_mask) { + if (!internal::ShouldLog(log_level, log_mask) || !str.size()) { return; } logger_->AppendLine(xe::threading::current_thread_id(), prefix_char, diff --git a/src/xenia/base/logging.h b/src/xenia/base/logging.h index 5f6331d48..9401a59de 100644 --- a/src/xenia/base/logging.h +++ b/src/xenia/base/logging.h @@ -27,6 +27,7 @@ namespace xe { // may be related to. These names should not be taken as fact as what a given // log line from any log level actually is. enum class LogLevel { + Disabled = -1, Error = 0, Warning, Info, @@ -34,6 +35,15 @@ enum class LogLevel { Trace, }; +// bitmasks! +namespace LogSrc { +enum : uint32_t { + Uncategorized = 0, + Kernel = 1, + Apu = 2, + Cpu = 4, +}; +} class LogSink { public: virtual ~LogSink() = default; @@ -72,32 +82,38 @@ void ShutdownLogging(); namespace logging { namespace internal { -bool ShouldLog(LogLevel log_level); +void ToggleLogLevel(); + +bool ShouldLog(LogLevel log_level, + uint32_t log_mask = xe::LogSrc::Uncategorized); std::pair GetThreadBuffer(); XE_NOALIAS void AppendLogLine(LogLevel log_level, const char prefix_char, size_t written); } // namespace internal -//technically, noalias is incorrect here, these functions do in fact alias global memory, -//but msvc will not optimize the calls away, and the global memory modified by the calls is limited to internal logging variables, -//so it might as well be noalias +// technically, noalias is incorrect here, these functions do in fact alias +// global memory, but msvc will not optimize the calls away, and the global +// memory modified by the calls is limited to internal logging variables, so it +// might as well be noalias template -XE_NOALIAS -XE_NOINLINE XE_COLD static void AppendLogLineFormat_Impl(LogLevel log_level, - const char prefix_char, - const char* format, - const Args&... args) { +XE_NOALIAS XE_NOINLINE XE_COLD static void AppendLogLineFormat_Impl( + LogLevel log_level, const char prefix_char, const char* format, + const Args&... args) { auto target = internal::GetThreadBuffer(); auto result = fmt::format_to_n(target.first, target.second, format, args...); internal::AppendLogLine(log_level, prefix_char, result.size); } - // Appends a line to the log with {fmt}-style formatting. -//chrispy: inline the initial check, outline the append. the append should happen rarely for end users +// Appends a line to the log with {fmt}-style formatting. +// chrispy: inline the initial check, outline the append. the append should +// happen rarely for end users template -XE_FORCEINLINE static void AppendLogLineFormat(LogLevel log_level, const char prefix_char, - const char* format, const Args&... args) { - if (!internal::ShouldLog(log_level)) { +XE_FORCEINLINE static void AppendLogLineFormat(uint32_t log_src_mask, + LogLevel log_level, + const char prefix_char, + const char* format, + const Args&... args) { + if (!internal::ShouldLog(log_level, log_src_mask)) { return; } AppendLogLineFormat_Impl(log_level, prefix_char, format, args...); @@ -105,7 +121,36 @@ XE_FORCEINLINE static void AppendLogLineFormat(LogLevel log_level, const char pr // Appends a line to the log. void AppendLogLine(LogLevel log_level, const char prefix_char, - const std::string_view str); + const std::string_view str, + uint32_t log_mask = LogSrc::Uncategorized); + +template +struct LoggerBatch { + char* thrd_buf; // current position in thread buffer + size_t thrd_buf_rem; // num left in thrd buffer + size_t total_size; + + void reset() { + auto target = logging::internal::GetThreadBuffer(); + + thrd_buf = target.first; + thrd_buf_rem = target.second; + total_size = 0; + } + + LoggerBatch() { reset(); } + template + void operator()(const char (&fmt)[fmtlen], Ts&&... args) { + auto tmpres = fmt::format_to_n(thrd_buf, thrd_buf_rem, fmt, args...); + thrd_buf_rem -= tmpres.size; + thrd_buf = tmpres.out; + total_size += tmpres.size; + } + + void submit(char prefix_char) { + logging::internal::AppendLogLine(ll, prefix_char, total_size); + } +}; } // namespace logging @@ -118,48 +163,56 @@ void AppendLogLine(LogLevel log_level, const char prefix_char, template XE_COLD void XELOGE(const char* format, const Args&... args) { - xe::logging::AppendLogLineFormat(xe::LogLevel::Error, '!', format, args...); + xe::logging::AppendLogLineFormat(xe::LogSrc::Uncategorized, + xe::LogLevel::Error, '!', format, args...); } template -XE_COLD -void XELOGW(const char* format, const Args&... args) { - xe::logging::AppendLogLineFormat(xe::LogLevel::Warning, 'w', format, args...); +XE_COLD void XELOGW(const char* format, const Args&... args) { + xe::logging::AppendLogLineFormat(xe::LogSrc::Uncategorized, + xe::LogLevel::Warning, 'w', format, args...); } template void XELOGI(const char* format, const Args&... args) { - xe::logging::AppendLogLineFormat(xe::LogLevel::Info, 'i', format, args...); + xe::logging::AppendLogLineFormat(xe::LogSrc::Uncategorized, + xe::LogLevel::Info, 'i', format, args...); } template void XELOGD(const char* format, const Args&... args) { - xe::logging::AppendLogLineFormat(xe::LogLevel::Debug, 'd', format, args...); + xe::logging::AppendLogLineFormat(xe::LogSrc::Uncategorized, + xe::LogLevel::Debug, 'd', format, args...); } template void XELOGCPU(const char* format, const Args&... args) { - xe::logging::AppendLogLineFormat(xe::LogLevel::Info, 'C', format, args...); + xe::logging::AppendLogLineFormat(xe::LogSrc::Cpu, xe::LogLevel::Info, 'C', + format, args...); } template void XELOGAPU(const char* format, const Args&... args) { - xe::logging::AppendLogLineFormat(xe::LogLevel::Debug, 'A', format, args...); + xe::logging::AppendLogLineFormat(xe::LogSrc::Apu, xe::LogLevel::Debug, 'A', + format, args...); } template void XELOGGPU(const char* format, const Args&... args) { - xe::logging::AppendLogLineFormat(xe::LogLevel::Debug, 'G', format, args...); + xe::logging::AppendLogLineFormat(xe::LogSrc::Uncategorized, + xe::LogLevel::Debug, 'G', format, args...); } template void XELOGKERNEL(const char* format, const Args&... args) { - xe::logging::AppendLogLineFormat(xe::LogLevel::Info, 'K', format, args...); + xe::logging::AppendLogLineFormat(xe::LogSrc::Kernel, xe::LogLevel::Info, 'K', + format, args...); } template void XELOGFS(const char* format, const Args&... args) { - xe::logging::AppendLogLineFormat(xe::LogLevel::Info, 'F', format, args...); + xe::logging::AppendLogLineFormat(xe::LogSrc::Uncategorized, + xe::LogLevel::Info, 'F', format, args...); } #else diff --git a/src/xenia/cpu/backend/backend.h b/src/xenia/cpu/backend/backend.h index 2e167b0f8..565dde35d 100644 --- a/src/xenia/cpu/backend/backend.h +++ b/src/xenia/cpu/backend/backend.h @@ -28,7 +28,14 @@ class Processor; namespace xe { namespace cpu { namespace backend { +static constexpr uint32_t MAX_GUEST_PSEUDO_STACKTRACE_ENTRIES = 32; +struct GuestPseudoStackTrace { + uint32_t count; + uint32_t truncated_flag; // set to 1 if there were more than + // MAX_GUEST_PSEUDO_STACKTRACE_ENTRIES entries. + uint32_t return_addrs[MAX_GUEST_PSEUDO_STACKTRACE_ENTRIES]; +}; class Assembler; class CodeCache; @@ -84,6 +91,10 @@ class Backend { * */ virtual void PrepareForReentry(void* ctx) {} + // returns true if populated st + virtual bool PopulatePseudoStacktrace(GuestPseudoStackTrace* st) { + return false; + } protected: Processor* processor_ = nullptr; MachineInfo machine_info_; diff --git a/src/xenia/cpu/backend/x64/x64_backend.cc b/src/xenia/cpu/backend/x64/x64_backend.cc index 3969f91a4..1e1f3fedb 100644 --- a/src/xenia/cpu/backend/x64/x64_backend.cc +++ b/src/xenia/cpu/backend/x64/x64_backend.cc @@ -1131,6 +1131,40 @@ void X64Backend::SetGuestRoundingMode(void* ctx, unsigned int mode) { ((ppc::PPCContext*)ctx)->fpscr.bits.rn = control; } +bool X64Backend::PopulatePseudoStacktrace(GuestPseudoStackTrace* st) { + if (!cvars::enable_host_guest_stack_synchronization) { + return false; + } + + ThreadState* thrd_state = ThreadState::Get(); + if (!thrd_state) { + return false; // we're not a guest! + } + ppc::PPCContext* ctx = thrd_state->context(); + + X64BackendContext* backend_ctx = BackendContextForGuestContext(ctx); + + uint32_t depth = backend_ctx->current_stackpoint_depth - 1; + if (static_cast(depth) < 1) { + return false; + } + uint32_t num_entries_to_populate = + std::min(MAX_GUEST_PSEUDO_STACKTRACE_ENTRIES, depth); + + st->count = num_entries_to_populate; + st->truncated_flag = num_entries_to_populate < depth ? 1 : 0; + + X64BackendStackpoint* current_stackpoint = + &backend_ctx->stackpoints[backend_ctx->current_stackpoint_depth - 1]; + + for (uint32_t stp_index = 0; stp_index < num_entries_to_populate; + ++stp_index) { + st->return_addrs[stp_index] = current_stackpoint->guest_return_address_; + current_stackpoint--; + } + return true; +} + #if XE_X64_PROFILER_AVAILABLE == 1 uint64_t* X64Backend::GetProfilerRecordForFunction(uint32_t guest_address) { // who knows, we might want to compile different versions of a function one diff --git a/src/xenia/cpu/backend/x64/x64_backend.h b/src/xenia/cpu/backend/x64/x64_backend.h index 55ef7da6d..97b37a692 100644 --- a/src/xenia/cpu/backend/x64/x64_backend.h +++ b/src/xenia/cpu/backend/x64/x64_backend.h @@ -145,7 +145,7 @@ class X64Backend : public Backend { reinterpret_cast(ctx) - sizeof(X64BackendContext)); } virtual void SetGuestRoundingMode(void* ctx, unsigned int mode) override; - + virtual bool PopulatePseudoStacktrace(GuestPseudoStackTrace* st) override; void RecordMMIOExceptionForGuestInstruction(void* host_address); #if XE_X64_PROFILER_AVAILABLE == 1 uint64_t* GetProfilerRecordForFunction(uint32_t guest_address); diff --git a/src/xenia/cpu/backend/x64/x64_tracers.cc b/src/xenia/cpu/backend/x64/x64_tracers.cc index 325589bad..afe27ec21 100644 --- a/src/xenia/cpu/backend/x64/x64_tracers.cc +++ b/src/xenia/cpu/backend/x64/x64_tracers.cc @@ -34,11 +34,12 @@ bool trace_enabled = true; #define IFLUSH() #define IPRINT(s) \ if (trace_enabled && THREAD_MATCH) \ - xe::logging::AppendLogLine(xe::LogLevel::Debug, 't', s) + xe::logging::AppendLogLine(xe::LogLevel::Debug, 't', s, xe::LogSrc::Cpu) #define DFLUSH() #define DPRINT(...) \ if (trace_enabled && THREAD_MATCH) \ - xe::logging::AppendLogLineFormat(xe::LogLevel::Debug, 't', __VA_ARGS__) + xe::logging::AppendLogLineFormat(xe::LogSrc::Cpu, xe::LogLevel::Debug, 't', \ + __VA_ARGS__) uint32_t GetTracingMode() { uint32_t mode = 0; diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc index d759e97b3..21fcbe061 100644 --- a/src/xenia/gpu/command_processor.cc +++ b/src/xenia/gpu/command_processor.cc @@ -13,14 +13,37 @@ #include "third_party/fmt/include/fmt/format.h" #include "xenia/base/byte_stream.h" +#include "xenia/base/cvar.h" #include "xenia/base/logging.h" #include "xenia/base/profiling.h" #include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/graphics_system.h" +#include "xenia/gpu/packet_disassembler.h" #include "xenia/gpu/sampler_info.h" #include "xenia/gpu/texture_info.h" #include "xenia/kernel/kernel_state.h" #include "xenia/kernel/user_module.h" +#if !defined(NDEBUG) + +#define XE_ENABLE_GPU_REG_WRITE_LOGGING 1 +#endif +DEFINE_bool( + log_guest_driven_gpu_register_written_values, false, + "Only does anything in debug builds, if set will log every write to a gpu " + "register done by a guest. Does not log writes that are done by the CP on " + "its own, just ones the guest makes or instructs it to make.", + "GPU"); + +DEFINE_bool(disassemble_pm4, false, + "Only does anything in debug builds, if set will disassemble and " + "log all PM4 packets sent to the CP.", + "GPU"); + +DEFINE_bool( + log_ringbuffer_kickoff_initiator_bts, false, + "Only does anything in debug builds, if set will log the pseudo-stacktrace " + "of the guest thread that wrote the new read position.", + "GPU"); namespace xe { namespace gpu { @@ -234,6 +257,7 @@ void CommandProcessor::WorkerThreadMain() { // TODO(benvanik): use reader->Read_update_freq_ and only issue after moving // that many indices. + // Keep in mind that the gpu also updates the cpu-side copy if the write pointer and read pointer would be equal if (read_ptr_writeback_ptr_) { xe::store_and_swap( memory_->TranslatePhysical(read_ptr_writeback_ptr_), read_ptr_index_); @@ -320,10 +344,94 @@ void CommandProcessor::EnableReadPointerWriteBack(uint32_t ptr, read_ptr_update_freq_ = uint32_t(1) << block_size_log2 >> 2; } +XE_NOINLINE XE_COLD void CommandProcessor::LogKickoffInitator(uint32_t value) { + cpu::backend::GuestPseudoStackTrace st; + + if (logging::internal::ShouldLog(LogLevel::Debug) && kernel_state_->processor() + ->backend() + ->PopulatePseudoStacktrace(&st)) { + logging::LoggerBatch log_initiator{}; + + log_initiator("Updating read ptr to {}, initiator stacktrace below\n", + value); + + for (uint32_t i = 0; i < st.count; ++i) { + log_initiator("\t{:08X}\n", st.return_addrs[i]); + } + + if (st.truncated_flag) { + log_initiator("\t(Truncated stacktrace to {} entries)\n", + cpu::backend::MAX_GUEST_PSEUDO_STACKTRACE_ENTRIES); + } + log_initiator.submit('d'); + } +} + void CommandProcessor::UpdateWritePointer(uint32_t value) { + XE_UNLIKELY_IF (cvars::log_ringbuffer_kickoff_initiator_bts) { + LogKickoffInitator(value); + } write_ptr_index_ = value; write_ptr_index_event_->SetBoostPriority(); } + +void CommandProcessor::LogRegisterSet(uint32_t register_index, uint32_t value) { +#if XE_ENABLE_GPU_REG_WRITE_LOGGING == 1 + if (cvars::log_guest_driven_gpu_register_written_values && logging::internal::ShouldLog(LogLevel::Debug)) { + const RegisterInfo* reginfo = RegisterFile::GetRegisterInfo(register_index); + + if (!reginfo) { + XELOGD("Unknown_Reg{:04X} <- {:08X}\n", register_index, value); + } else { + XELOGD("{} <- {:08X}\n", reginfo->name, value); + } + } +#endif +} + +void CommandProcessor::LogRegisterSets(uint32_t base_register_index, + const uint32_t* values, + uint32_t n_values) { +#if XE_ENABLE_GPU_REG_WRITE_LOGGING == 1 + if (cvars::log_guest_driven_gpu_register_written_values && + logging::internal::ShouldLog(LogLevel::Debug)) { + auto target = logging::internal::GetThreadBuffer(); + + auto target_ptr = target.first; + + size_t total_size = 0; + + size_t rem_size = target.second; + + for (uint32_t i = 0; i < n_values; ++i) { + uint32_t register_index = base_register_index + i; + + uint32_t value = xe::load_and_swap(&values[i]); + + const RegisterInfo* reginfo = + RegisterFile::GetRegisterInfo(register_index); + + if (!reginfo) { + auto tmpres = fmt::format_to_n(target_ptr, rem_size, + "Unknown_Reg{:04X} <- {:08X}\n", + register_index, value); + target_ptr = tmpres.out; + rem_size -= tmpres.size; + total_size += tmpres.size; + + } else { + auto tmpres = fmt::format_to_n(target_ptr, rem_size, "{} <- {:08X}\n", + reginfo->name, value); + rem_size -= tmpres.size; + target_ptr = tmpres.out; + total_size += tmpres.size; + } + } + logging::internal::AppendLogLine(LogLevel::Debug, 'd', total_size); + } +#endif +} + void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index, uint32_t value) { RegisterFile& regs = *register_file_; diff --git a/src/xenia/gpu/command_processor.h b/src/xenia/gpu/command_processor.h index 0553a4184..46ce73d9e 100644 --- a/src/xenia/gpu/command_processor.h +++ b/src/xenia/gpu/command_processor.h @@ -131,7 +131,9 @@ class CommandProcessor { void UpdateWritePointer(uint32_t value); - + void LogRegisterSet(uint32_t register_index, uint32_t value); + void LogRegisterSets(uint32_t base_register_index, const uint32_t* values, + uint32_t n_values); bool is_paused() const { return paused_; } void Pause(); @@ -296,6 +298,8 @@ class CommandProcessor { reg::DC_LUT_30_COLOR gamma_ramp_256_entry_table_[256] = {}; reg::DC_LUT_PWL_DATA gamma_ramp_pwl_rgb_[128][3] = {}; uint32_t gamma_ramp_rw_component_ = 0; + + XE_NOINLINE XE_COLD void LogKickoffInitator(uint32_t value); }; } // namespace gpu diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 164de4a0a..7da4a74a9 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -24,6 +24,7 @@ #include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/registers.h" #include "xenia/gpu/xenos.h" +#include "xenia/gpu/packet_disassembler.h" #include "xenia/ui/d3d12/d3d12_presenter.h" #include "xenia/ui/d3d12/d3d12_util.h" @@ -1816,6 +1817,7 @@ void D3D12CommandProcessor::WriteRegisterForceinline(uint32_t index, } // todo: bit-pack the bools and use bitarith to reduce branches void D3D12CommandProcessor::WriteRegister(uint32_t index, uint32_t value) { + LogRegisterSet(index, value); WriteRegisterForceinline(index, value); } @@ -2073,7 +2075,7 @@ XE_FORCEINLINE void D3D12CommandProcessor::WriteRegisterRangeFromMem_WithKnownBound( uint32_t start_index, uint32_t* base, uint32_t num_registers) { uint32_t end = start_index + num_registers; - + LogRegisterSets(start_index, base, num_registers); uint32_t current_index = start_index; auto get_end_before_qty = [&end, current_index](uint32_t regnum) { diff --git a/src/xenia/gpu/gpu_flags.h b/src/xenia/gpu/gpu_flags.h index 2a675f5a1..9a6ef3343 100644 --- a/src/xenia/gpu/gpu_flags.h +++ b/src/xenia/gpu/gpu_flags.h @@ -26,4 +26,6 @@ DECLARE_bool(half_pixel_offset); DECLARE_int32(query_occlusion_fake_sample_count); +DECLARE_bool(disassemble_pm4); + #endif // XENIA_GPU_GPU_FLAGS_H_ diff --git a/src/xenia/gpu/packet_disassembler.h b/src/xenia/gpu/packet_disassembler.h index 942a88409..8aa60a4a2 100644 --- a/src/xenia/gpu/packet_disassembler.h +++ b/src/xenia/gpu/packet_disassembler.h @@ -36,6 +36,31 @@ struct PacketAction { kRegisterWrite, kSetBinMask, kSetBinSelect, + kSetBinMaskLo, + kSetBinMaskHi, + kSetBinSelectLo, + kSetBinSelectHi, + kMeInit, + kGenInterrupt, + kWaitRegMem, + kRegRmw, + kCondWrite, + kEventWrite, + kEventWriteSHD, + kEventWriteExt, + kDrawIndx, + kDrawIndx2, + kInvalidateState, + kImLoad, + kImLoadImmediate, + kContextUpdate, + kWaitForIdle, + kVizQuery, + kEventWriteZPD, + kMemWrite, + kRegToMem, + kIndirBuffer, + kXeSwap }; Type type; @@ -50,8 +75,121 @@ struct PacketAction { struct { uint64_t value; } set_bin_select; + struct { + uint32_t cpu_mask; + } gen_interrupt; + + struct { + uint32_t wait_info; + uint32_t poll_reg_addr; + uint32_t ref; + uint32_t mask; + uint32_t wait; + } wait_reg_mem; + struct { + uint32_t rmw_info, and_mask, or_mask; + } reg_rmw; + + struct { + uint32_t wait_info; + uint32_t poll_reg_addr; + uint32_t ref; + uint32_t mask; + uint32_t write_reg_addr; + uint32_t write_data; + } cond_write; + + struct { + uint32_t initiator; + } event_write; + + struct { + uint32_t initiator, address, value; + } event_write_shd; + + struct { + uint32_t unk0, unk1; + } event_write_ext; + + struct { + uint32_t initiator; + } event_write_zpd; + struct { + uint32_t dword0, dword1, index_count; + xenos::PrimitiveType prim_type; + uint32_t src_sel; + uint32_t guest_base; + uint32_t index_size; + xenos::Endian endianness; + + } draw_indx; + + struct { + uint32_t dword0; + uint32_t index_count; + xenos::PrimitiveType prim_type; + uint32_t src_sel; + uint32_t indices_size; + + } draw_indx2; + + struct { + uint32_t state_mask; + } invalidate_state; + + struct { + xenos::ShaderType shader_type; + uint32_t addr; + uint32_t start; + uint32_t size_dwords; + } im_load; + struct { + xenos::ShaderType shader_type; + uint32_t start; + uint32_t size_dwords; + } im_load_imm; + + // for bin select hi/lo binmask hi/lo + struct { + uint32_t value; + } lohi_op; + struct { + // i think waitforidle is just being pad to a qword + uint32_t probably_unused; + } wait_for_idle; + + struct { + uint32_t maybe_unused; + } context_update; + + struct { + uint32_t dword0; + + uint32_t id; + bool end; + } vizquery; + + struct { + uint32_t addr; + xenos::Endian endianness; + } mem_write; + + struct { + uint32_t reg_addr; + uint32_t mem_addr; + xenos::Endian endianness; + } reg2mem; + struct { + uint32_t frontbuffer_ptr; + } xe_swap; + struct { + uint32_t list_ptr; + uint32_t list_length; + } indir_buffer; }; + std::vector words; + static PacketAction RegisterWrite(uint32_t index, uint32_t value) { PacketAction action; action.type = Type::kRegisterWrite; @@ -73,6 +211,32 @@ struct PacketAction { action.set_bin_select.value = value; return action; } + + void InjectBeWords(const uint32_t* values, uint32_t count) { + for (unsigned i = 0; i < count; ++i) { + words.push_back(xe::load_and_swap(&values[i])); + } + } + void InjectBeHalfwordsAsWords(const uint16_t* values, uint32_t count) { + for (unsigned i = 0; i < count; ++i) { + words.push_back( + static_cast(xe::load_and_swap(&values[i]))); + } + } + + static PacketAction MeInit(const uint32_t* values, uint32_t count) { + PacketAction action; + action.type = Type::kMeInit; + action.InjectBeWords(values, count); + return action; + } + + static PacketAction Interrupt(uint32_t cpu_mask) { + PacketAction action; + action.type = Type::kGenInterrupt; + action.gen_interrupt.cpu_mask = cpu_mask; + return action; + } }; struct PacketInfo { diff --git a/src/xenia/gpu/pm4_command_processor_declare.h b/src/xenia/gpu/pm4_command_processor_declare.h index de6b66f6f..5f3e45a76 100644 --- a/src/xenia/gpu/pm4_command_processor_declare.h +++ b/src/xenia/gpu/pm4_command_processor_declare.h @@ -15,7 +15,9 @@ public: void ExecutePacket(uint32_t ptr, uint32_t count); protected: - XE_NOINLINE +XE_NOINLINE +void DisassembleCurrentPacket() XE_RESTRICT; +XE_NOINLINE bool ExecutePacketType0( uint32_t packet) XE_RESTRICT; XE_NOINLINE bool ExecutePacketType1( uint32_t packet) XE_RESTRICT; diff --git a/src/xenia/gpu/pm4_command_processor_implement.h b/src/xenia/gpu/pm4_command_processor_implement.h index 4f0aaa330..62e2b1f64 100644 --- a/src/xenia/gpu/pm4_command_processor_implement.h +++ b/src/xenia/gpu/pm4_command_processor_implement.h @@ -1,4 +1,9 @@ #pragma once + +#if !defined(NDEBUG) +#define XE_ENABLE_PM4_DISASM 1 +#endif + using namespace xe::gpu::xenos; void COMMAND_PROCESSOR::ExecuteIndirectBuffer(uint32_t ptr, uint32_t count) XE_RESTRICT { @@ -35,8 +40,237 @@ void COMMAND_PROCESSOR::ExecuteIndirectBuffer(uint32_t ptr, return; } } +XE_NOINLINE +static void LOGU32s(logging::LoggerBatch& logger, + const std::vector& values) { + bool first = true; +#if 0 + XELOGD("[ "); + for (auto&& val : values) { + if (first) { + XELOGD("0x{:08X}", val); + first = false; + } else { + XELOGD(", 0x{:08X}", val); + } + } + XELOGD(" ]"); +#else + for (auto&& val : values) { + if (first) { + logger("0x{:08X}", val); + first = false; + } else { + logger(", 0x{:08X}", val); + } + } +#endif +} + +std::string GenerateRegnameForPm4Print(uint32_t reg) { + auto reg_info = RegisterFile::GetRegisterInfo(reg); + + if (reg_info) { + return reg_info->name; + } else { + return fmt::format("Unknown_Reg_{:04X}", reg); + } +} + +XE_NOINLINE +void COMMAND_PROCESSOR::DisassembleCurrentPacket() XE_RESTRICT { + xe::gpu::PacketInfo packet_info; + + logging::LoggerBatch logger{}; + + if (PacketDisassembler::DisasmPacket(reader_.buffer() + reader_.read_offset(), + &packet_info)) { + logger("CP - {}, count {}, predicated = {}\n", packet_info.type_info->name, + packet_info.count, packet_info.predicated); + +#define LOG_ACTION_FIELD(__type, name) \ + logger("\t" #name " = {:08X}\n", action.__type.name) +#define LOG_ACTION_FIELD_DEC(__type, name) \ + logger("\t" #name " = {}\n", action.__type.name) + +#define LOG_ENDIANNESS(__type, name) \ + logger("\t" #name " = {}\n", \ + xenos::GetEndianEnglishDescription(action.__type.name)) +#define LOG_PRIMTYPE(__type, name) \ + logger("\t" #name " = {}\n", \ + xenos::GetPrimitiveTypeEnglishDescription(action.__type.name)) + for (auto&& action : packet_info.actions) { + using PType = PacketAction::Type; + switch (action.type) { + case PType::kRegisterWrite: + break; + case PType::kSetBinMask: + logger("\tSetBinMask {}\n", action.set_bin_mask.value); + break; + case PType::kSetBinSelect: + logger("\tSetBinSelect {}\n", action.set_bin_select.value); + break; + case PType::kMeInit: + logger("\tMeInit - "); + LOGU32s(logger, action.words); + logger("\n"); + break; + case PType::kGenInterrupt: + logger("\tGenInterrupt for cpu mask {:04X}\n", + action.gen_interrupt.cpu_mask); + break; + case PType::kSetBinMaskHi: + case PType::kSetBinMaskLo: + case PType::kSetBinSelectHi: + case PType::kSetBinSelectLo: + LOG_ACTION_FIELD(lohi_op, value); + break; + case PType::kWaitRegMem: + LOG_ACTION_FIELD(wait_reg_mem, wait_info); + LOG_ACTION_FIELD(wait_reg_mem, poll_reg_addr); + LOG_ACTION_FIELD(wait_reg_mem, ref); + LOG_ACTION_FIELD(wait_reg_mem, mask); + LOG_ACTION_FIELD(wait_reg_mem, wait); + break; + case PType::kRegRmw: { + uint32_t rmw_info = action.reg_rmw.rmw_info; + uint32_t and_mask = action.reg_rmw.and_mask; + uint32_t or_mask = action.reg_rmw.or_mask; + + uint32_t and_mask_is_reg = (rmw_info >> 31) & 0x1; + uint32_t or_mask_is_reg = (rmw_info >> 30) & 0x1; + + std::string and_mask_str; + + if (and_mask_is_reg) { + and_mask_str = GenerateRegnameForPm4Print(and_mask & 0x1FFF); + } else { + and_mask_str = fmt::format("0x{:08X}", and_mask); + } + std::string or_mask_str; + + if (or_mask_is_reg) { + or_mask_str = GenerateRegnameForPm4Print(or_mask & 0x1FFF); + } else { + or_mask_str = fmt::format("0x{:08X}", or_mask); + } + + std::string dest = GenerateRegnameForPm4Print(rmw_info & 0x1FFF); + + logger("\t{} = ({} & {}) | {}\n", dest, dest, and_mask_str, + or_mask_str); + LOG_ACTION_FIELD(reg_rmw, rmw_info); + LOG_ACTION_FIELD(reg_rmw, and_mask); + LOG_ACTION_FIELD(reg_rmw, or_mask); + + break; + } + case PType::kCondWrite: + LOG_ACTION_FIELD(cond_write, wait_info); + LOG_ACTION_FIELD(cond_write, poll_reg_addr); + LOG_ACTION_FIELD(cond_write, ref); + LOG_ACTION_FIELD(cond_write, mask); + LOG_ACTION_FIELD(cond_write, write_reg_addr); + LOG_ACTION_FIELD(cond_write, write_data); + break; + + case PType::kEventWrite: + LOG_ACTION_FIELD(event_write, initiator); + break; + case PType::kEventWriteSHD: + LOG_ACTION_FIELD(event_write_shd, initiator); + LOG_ACTION_FIELD(event_write_shd, address); + LOG_ACTION_FIELD(event_write_shd, value); + break; + case PType::kEventWriteExt: + LOG_ACTION_FIELD(event_write_ext, unk0); + LOG_ACTION_FIELD(event_write_ext, unk1); + break; + case PType::kDrawIndx: + LOG_ACTION_FIELD(draw_indx, dword0); + LOG_ACTION_FIELD(draw_indx, dword1); + LOG_ACTION_FIELD_DEC(draw_indx, index_count); + LOG_PRIMTYPE(draw_indx, prim_type); + LOG_ACTION_FIELD(draw_indx, src_sel); + LOG_ACTION_FIELD(draw_indx, guest_base); + LOG_ACTION_FIELD_DEC(draw_indx, index_size); + LOG_ENDIANNESS(draw_indx, endianness); + break; + case PType::kDrawIndx2: + LOG_ACTION_FIELD(draw_indx2, dword0); + LOG_ACTION_FIELD_DEC(draw_indx2, index_count); + LOG_PRIMTYPE(draw_indx2, prim_type); + LOG_ACTION_FIELD(draw_indx2, src_sel); + LOG_ACTION_FIELD_DEC(draw_indx2, indices_size); + logger("Indices = "); + LOGU32s(logger, action.words); + logger("\n"); + break; + case PType::kInvalidateState: + LOG_ACTION_FIELD(invalidate_state, state_mask); + break; + case PType::kImLoad: + LOG_ACTION_FIELD(im_load, shader_type); + LOG_ACTION_FIELD(im_load, addr); + LOG_ACTION_FIELD(im_load, start); + LOG_ACTION_FIELD_DEC(im_load, size_dwords); + break; + case PType::kImLoadImmediate: + LOG_ACTION_FIELD_DEC(im_load_imm, shader_type); + LOG_ACTION_FIELD(im_load_imm, start); + LOG_ACTION_FIELD_DEC(im_load_imm, size_dwords); + logger("Shader instruction words = "); + LOGU32s(logger, action.words); + logger("\n"); + break; + case PType::kWaitForIdle: + LOG_ACTION_FIELD(wait_for_idle, probably_unused); + break; + case PType::kContextUpdate: + LOG_ACTION_FIELD(context_update, maybe_unused); + break; + case PType::kVizQuery: + LOG_ACTION_FIELD_DEC(vizquery, id); + LOG_ACTION_FIELD_DEC(vizquery, end); + LOG_ACTION_FIELD(vizquery, dword0); + break; + case PType::kEventWriteZPD: + LOG_ACTION_FIELD(event_write_zpd, initiator); + + break; + case PType::kMemWrite: + LOG_ACTION_FIELD(mem_write, addr); + LOG_ENDIANNESS(mem_write, endianness); + logger("Values to write (with GpuSwap pre-applied) = "); + LOGU32s(logger, action.words); + logger("\n"); + break; + case PType::kRegToMem: + logger("{}\n", GenerateRegnameForPm4Print(action.reg2mem.reg_addr)); + LOG_ACTION_FIELD(reg2mem, mem_addr); + LOG_ENDIANNESS(reg2mem, endianness); + break; + case PType::kIndirBuffer: + LOG_ACTION_FIELD(indir_buffer, list_ptr); + LOG_ACTION_FIELD_DEC(indir_buffer, list_length); + break; + case PType::kXeSwap: + LOG_ACTION_FIELD(xe_swap, frontbuffer_ptr); + break; + } + } + } else { + logger("Unknown packet! Failed to disassemble.\n"); + } + logger.submit('d'); +} bool COMMAND_PROCESSOR::ExecutePacket() { +#if XE_ENABLE_PM4_DISASM == 1 + if (cvars::disassemble_pm4 && logging::internal::ShouldLog(LogLevel::Debug)) { + COMMAND_PROCESSOR::DisassembleCurrentPacket(); + } +#endif const uint32_t packet = reader_.ReadAndSwap(); const uint32_t packet_type = packet >> 30; diff --git a/src/xenia/gpu/register_table.inc b/src/xenia/gpu/register_table.inc index aa22558eb..bfe7549d3 100644 --- a/src/xenia/gpu/register_table.inc +++ b/src/xenia/gpu/register_table.inc @@ -19,6 +19,8 @@ XE_GPU_REGISTER(0x0048, kDword, BIF_PERFCOUNTER0_SELECT) XE_GPU_REGISTER(0x0049, kDword, BIF_PERFCOUNTER0_HI) XE_GPU_REGISTER(0x004A, kDword, BIF_PERFCOUNTER0_LOW) +XE_GPU_REGISTER(0x1C5, kDword, CP_RB_WPTR) + XE_GPU_REGISTER(0x01DD, kDword, SCRATCH_ADDR) XE_GPU_REGISTER(0x01DC, kDword, SCRATCH_UMSK) @@ -34,19 +36,31 @@ XE_GPU_REGISTER(0x0398, kDword, RBBM_PERFCOUNTER0_HI) XE_GPU_REGISTER(0x0399, kDword, RBBM_PERFCOUNTER1_LOW) XE_GPU_REGISTER(0x039A, kDword, RBBM_PERFCOUNTER1_HI) +//XAM reads this directly and stores it to a struct, have not tracked where it goes from there +//PM4 command PM4_MEM_WRITE_CNTR is supposed to write this to memory +//XE_GPU_REGISTER(0x44b, kDword,CP_PROG_COUNTER ) XE_GPU_REGISTER(0x045E, kDword, CALLBACK_ACK) XE_GPU_REGISTER(0x0578, kDword, SCRATCH_REG0) // interrupt sync XE_GPU_REGISTER(0x0579, kDword, SCRATCH_REG1) // present interval XE_GPU_REGISTER(0x057A, kDword, SCRATCH_REG2) XE_GPU_REGISTER(0x057B, kDword, SCRATCH_REG3) -XE_GPU_REGISTER(0x057C, kDword, CALLBACK_ADDRESS) -XE_GPU_REGISTER(0x057D, kDword, CALLBACK_CONTEXT) +XE_GPU_REGISTER(0x057C, kDword, SCRATCH_REG4) //originally this was named CALLBACK_ADDRESS, but that didnt make sense +XE_GPU_REGISTER(0x057D, kDword, SCRATCH_REG5) XE_GPU_REGISTER(0x057E, kDword, SCRATCH_REG6) XE_GPU_REGISTER(0x057F, kDword, SCRATCH_REG7) XE_GPU_REGISTER(0x05C8, kDword, WAIT_UNTIL) + +//update count = 6 bit field, bits 8- 14 +//there are several other fields here, they have an unknown purpose +//XE_GPU_REGISTER(0x704, kDword, CP_RB_CNTL) + +//address must be 4 byte aligned +//low 2 bits encode two different fields? +//xboxkrnl just does addr |2 when assigning +//XE_GPU_REGISTER(0x70C, kDword, CP_RB_RPTR_ADDR) XE_GPU_REGISTER(0x0815, kDword, MC0_PERFCOUNTER0_SELECT) XE_GPU_REGISTER(0x0816, kDword, MC0_PERFCOUNTER0_HI) XE_GPU_REGISTER(0x0817, kDword, MC0_PERFCOUNTER0_LOW) @@ -142,7 +156,7 @@ XE_GPU_REGISTER(0x0DD8, kDword, SX_PERFCOUNTER0_LOW) XE_GPU_REGISTER(0x0DD9, kDword, SX_PERFCOUNTER0_HI) // Set with WAIT_UNTIL = WAIT_3D_IDLECLEAN -XE_GPU_REGISTER(0x0E00, kDword, UNKNOWN_0E00) +XE_GPU_REGISTER(0x0E00, kDword, TC_CNTL_STATUS) XE_GPU_REGISTER(0x0E05, kDword, TCR_PERFCOUNTER0_SELECT) XE_GPU_REGISTER(0x0E06, kDword, TCR_PERFCOUNTER0_HI) diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index e9c15a0c1..c527b81be 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -32,6 +32,7 @@ #include "xenia/gpu/vulkan/vulkan_shader.h" #include "xenia/gpu/vulkan/vulkan_shared_memory.h" #include "xenia/gpu/xenos.h" +#include "xenia/gpu/packet_disassembler.h" #include "xenia/kernel/kernel_state.h" #include "xenia/kernel/user_module.h" #include "xenia/ui/vulkan/vulkan_presenter.h" diff --git a/src/xenia/gpu/xenos.cc b/src/xenia/gpu/xenos.cc index 05bb30014..f1a3dff34 100644 --- a/src/xenia/gpu/xenos.cc +++ b/src/xenia/gpu/xenos.cc @@ -206,7 +206,39 @@ const char* GetDepthRenderTargetFormatName(DepthRenderTargetFormat format) { return "kUnknown"; } } +static const char* const g_endian_names[] = {"none", "8 in 16", "8 in 32", + "16 in 32"}; +const char* GetEndianEnglishDescription(xenos::Endian endian) { + return g_endian_names[static_cast(endian)]; +} +static const char* const g_primtype_human_names[] = {"none", + "point list", + "line list", + "line strip", + "triangle list", + "triangle fan", + "triangle strip", + "triangle with flags", + "rectangle list", + "unused1", + "unused2", + "unused3", + "line loop", + "quad list", + "quad strip", + "polygon", + "2D copy rect list v0", + "2D copy rect list v1", + "2D copy rect list v2", + "2D copy rect list v3", + "2D fillrect list", + "2D line strip", + "2D triangle strip"}; + +const char* GetPrimitiveTypeEnglishDescription(xenos::PrimitiveType prim_type) { + return g_primtype_human_names[static_cast(prim_type)]; +} } // namespace xenos } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index 6c39daf45..8f16690b3 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -48,6 +48,9 @@ enum class PrimitiveType : uint32_t { kTriangleStrip = 0x06, kTriangleWithWFlags = 0x07, kRectangleList = 0x08, + kUnused1 = 0x09, + kUnused2 = 0x0A, + kUnused3 = 0x0B, kLineLoop = 0x0C, kQuadList = 0x0D, kQuadStrip = 0x0E, @@ -1596,7 +1599,11 @@ inline uint32_t MakePacketType3(Type3Opcode opcode, uint16_t count, return (3u << 30) | (((count - 1) & 0x3FFF) << 16) | ((opcode & 0x7F) << 8) | (predicate ? 1 : 0); } - +/* + * pretty english descriptions of enumeration values for the packet disassembler + */ +const char* GetEndianEnglishDescription(xenos::Endian endian); +const char* GetPrimitiveTypeEnglishDescription(xenos::PrimitiveType prim_type); } // namespace xenos } // namespace gpu } // namespace xe diff --git a/src/xenia/kernel/util/shim_utils.h b/src/xenia/kernel/util/shim_utils.h index 976b73514..20c677f91 100644 --- a/src/xenia/kernel/util/shim_utils.h +++ b/src/xenia/kernel/util/shim_utils.h @@ -521,10 +521,10 @@ XE_NOALIAS void PrintKernelCall(cpu::Export* export_entry, string_buffer.Append(')'); if (export_entry->tags & xe::cpu::ExportTag::kImportant) { xe::logging::AppendLogLine(xe::LogLevel::Info, 'i', - string_buffer.to_string_view()); + string_buffer.to_string_view(), LogSrc::Kernel); } else { xe::logging::AppendLogLine(xe::LogLevel::Debug, 'd', - string_buffer.to_string_view()); + string_buffer.to_string_view(), LogSrc::Kernel); } } /*