Merge pull request #151 from chrisps/extreme_logging_helpers

Command Processor debug logging changes
This commit is contained in:
chrisps 2023-04-16 13:20:14 -04:00 committed by GitHub
commit 779be8283d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 733 additions and 49 deletions

View File

@ -1147,8 +1147,8 @@ const std::map<int, EmulatorWindow::ControllerHotKey> controller_hotkey_map = {
"A + Guide = Toggle Readback Resolve", true)},
{X_INPUT_GAMEPAD_B | X_INPUT_GAMEPAD_GUIDE,
EmulatorWindow::ControllerHotKey(
EmulatorWindow::ButtonFunctions::CloseWindow,
"B + Guide = Close Window")},
EmulatorWindow::ButtonFunctions::ToggleLogging,
"B + Guide = Toggle between loglevel set in config and the 'Disabled' loglevel.", true, true)},
{X_INPUT_GAMEPAD_Y | X_INPUT_GAMEPAD_GUIDE,
EmulatorWindow::ControllerHotKey(
EmulatorWindow::ButtonFunctions::ToggleFullscreen,
@ -1190,7 +1190,7 @@ const std::map<int, EmulatorWindow::ControllerHotKey> controller_hotkey_map = {
"Start = Run Selected Title", false, false)},
{X_INPUT_GAMEPAD_BACK | X_INPUT_GAMEPAD_START,
EmulatorWindow::ControllerHotKey(
EmulatorWindow::ButtonFunctions::CloseWindow,
EmulatorWindow::ButtonFunctions::ToggleLogging,
"Back + Start = Close Window", false, false)},
{X_INPUT_GAMEPAD_DPAD_DOWN,
EmulatorWindow::ControllerHotKey(
@ -1296,8 +1296,8 @@ EmulatorWindow::ControllerHotKey EmulatorWindow::ProcessControllerHotkey(
case ButtonFunctions::DecTitleSelect:
selected_title_index--;
break;
case ButtonFunctions::CloseWindow:
window_->RequestClose();
case ButtonFunctions::ToggleLogging:
logging::internal::ToggleLogLevel();
break;
case ButtonFunctions::Unknown:
default:

View File

@ -85,7 +85,7 @@ class EmulatorWindow {
ToggleControllerVibration,
ClearMemoryPageState,
ReadbackResolve,
CloseWindow,
ToggleLogging,
IncTitleSelect,
DecTitleSelect,
Unknown

View File

@ -101,7 +101,8 @@ void av_log_callback(void* avcl, int level, const char* fmt, va_list va) {
StringBuffer buff;
buff.AppendVarargs(fmt, va);
xe::logging::AppendLogLineFormat(log_level, level_char, "ffmpeg: {}",
xe::logging::AppendLogLineFormat(LogSrc::Apu, log_level, level_char,
"ffmpeg: {}",
buff.to_string_view());
}

View File

@ -55,6 +55,12 @@ DEFINE_bool(log_to_debugprint, false, "Dump the log to DebugPrint.", "Logging");
#endif // XE_PLATFORM_ANDROID
DEFINE_bool(flush_log, true, "Flush log file after each log line batch.",
"Logging");
DEFINE_uint32(log_mask, 0,
"Disables specific categorizes for more granular debug logging. "
"Kernel = 1, Apu = 2, Cpu = 4.",
"Logging");
DEFINE_int32(
log_level, 2,
"Maximum level to be logged. (0=error, 1=warning, 2=info, 3=debug)",
@ -465,8 +471,16 @@ void ShutdownLogging() {
memory::AlignedFree(logger);
}
bool logging::internal::ShouldLog(LogLevel log_level) {
return static_cast<int32_t>(log_level) <= cvars::log_level;
static int g_saved_loglevel = static_cast<int>(LogLevel::Disabled);
void logging::internal::ToggleLogLevel() {
auto swap = g_saved_loglevel;
g_saved_loglevel = cvars::log_level;
cvars::log_level = swap;
}
bool logging::internal::ShouldLog(LogLevel log_level, uint32_t log_mask) {
return static_cast<int32_t>(log_level) <= cvars::log_level &&
(log_mask & cvars::log_mask) == 0;
}
std::pair<char*, size_t> logging::internal::GetThreadBuffer() {
@ -483,8 +497,8 @@ void logging::internal::AppendLogLine(LogLevel log_level,
}
void logging::AppendLogLine(LogLevel log_level, const char prefix_char,
const std::string_view str) {
if (!internal::ShouldLog(log_level) || !str.size()) {
const std::string_view str, uint32_t log_mask) {
if (!internal::ShouldLog(log_level, log_mask) || !str.size()) {
return;
}
logger_->AppendLine(xe::threading::current_thread_id(), prefix_char,

View File

@ -27,6 +27,7 @@ namespace xe {
// may be related to. These names should not be taken as fact as what a given
// log line from any log level actually is.
enum class LogLevel {
Disabled = -1,
Error = 0,
Warning,
Info,
@ -34,6 +35,15 @@ enum class LogLevel {
Trace,
};
// bitmasks!
namespace LogSrc {
enum : uint32_t {
Uncategorized = 0,
Kernel = 1,
Apu = 2,
Cpu = 4,
};
}
class LogSink {
public:
virtual ~LogSink() = default;
@ -72,32 +82,38 @@ void ShutdownLogging();
namespace logging {
namespace internal {
bool ShouldLog(LogLevel log_level);
void ToggleLogLevel();
bool ShouldLog(LogLevel log_level,
uint32_t log_mask = xe::LogSrc::Uncategorized);
std::pair<char*, size_t> GetThreadBuffer();
XE_NOALIAS
void AppendLogLine(LogLevel log_level, const char prefix_char, size_t written);
} // namespace internal
//technically, noalias is incorrect here, these functions do in fact alias global memory,
//but msvc will not optimize the calls away, and the global memory modified by the calls is limited to internal logging variables,
//so it might as well be noalias
// technically, noalias is incorrect here, these functions do in fact alias
// global memory, but msvc will not optimize the calls away, and the global
// memory modified by the calls is limited to internal logging variables, so it
// might as well be noalias
template <typename... Args>
XE_NOALIAS
XE_NOINLINE XE_COLD static void AppendLogLineFormat_Impl(LogLevel log_level,
const char prefix_char,
const char* format,
const Args&... args) {
XE_NOALIAS XE_NOINLINE XE_COLD static void AppendLogLineFormat_Impl(
LogLevel log_level, const char prefix_char, const char* format,
const Args&... args) {
auto target = internal::GetThreadBuffer();
auto result = fmt::format_to_n(target.first, target.second, format, args...);
internal::AppendLogLine(log_level, prefix_char, result.size);
}
// Appends a line to the log with {fmt}-style formatting.
//chrispy: inline the initial check, outline the append. the append should happen rarely for end users
// Appends a line to the log with {fmt}-style formatting.
// chrispy: inline the initial check, outline the append. the append should
// happen rarely for end users
template <typename... Args>
XE_FORCEINLINE static void AppendLogLineFormat(LogLevel log_level, const char prefix_char,
const char* format, const Args&... args) {
if (!internal::ShouldLog(log_level)) {
XE_FORCEINLINE static void AppendLogLineFormat(uint32_t log_src_mask,
LogLevel log_level,
const char prefix_char,
const char* format,
const Args&... args) {
if (!internal::ShouldLog(log_level, log_src_mask)) {
return;
}
AppendLogLineFormat_Impl(log_level, prefix_char, format, args...);
@ -105,7 +121,36 @@ XE_FORCEINLINE static void AppendLogLineFormat(LogLevel log_level, const char pr
// Appends a line to the log.
void AppendLogLine(LogLevel log_level, const char prefix_char,
const std::string_view str);
const std::string_view str,
uint32_t log_mask = LogSrc::Uncategorized);
template <LogLevel ll>
struct LoggerBatch {
char* thrd_buf; // current position in thread buffer
size_t thrd_buf_rem; // num left in thrd buffer
size_t total_size;
void reset() {
auto target = logging::internal::GetThreadBuffer();
thrd_buf = target.first;
thrd_buf_rem = target.second;
total_size = 0;
}
LoggerBatch() { reset(); }
template <size_t fmtlen, typename... Ts>
void operator()(const char (&fmt)[fmtlen], Ts&&... args) {
auto tmpres = fmt::format_to_n(thrd_buf, thrd_buf_rem, fmt, args...);
thrd_buf_rem -= tmpres.size;
thrd_buf = tmpres.out;
total_size += tmpres.size;
}
void submit(char prefix_char) {
logging::internal::AppendLogLine(ll, prefix_char, total_size);
}
};
} // namespace logging
@ -118,48 +163,56 @@ void AppendLogLine(LogLevel log_level, const char prefix_char,
template <typename... Args>
XE_COLD void XELOGE(const char* format, const Args&... args) {
xe::logging::AppendLogLineFormat(xe::LogLevel::Error, '!', format, args...);
xe::logging::AppendLogLineFormat(xe::LogSrc::Uncategorized,
xe::LogLevel::Error, '!', format, args...);
}
template <typename... Args>
XE_COLD
void XELOGW(const char* format, const Args&... args) {
xe::logging::AppendLogLineFormat(xe::LogLevel::Warning, 'w', format, args...);
XE_COLD void XELOGW(const char* format, const Args&... args) {
xe::logging::AppendLogLineFormat(xe::LogSrc::Uncategorized,
xe::LogLevel::Warning, 'w', format, args...);
}
template <typename... Args>
void XELOGI(const char* format, const Args&... args) {
xe::logging::AppendLogLineFormat(xe::LogLevel::Info, 'i', format, args...);
xe::logging::AppendLogLineFormat(xe::LogSrc::Uncategorized,
xe::LogLevel::Info, 'i', format, args...);
}
template <typename... Args>
void XELOGD(const char* format, const Args&... args) {
xe::logging::AppendLogLineFormat(xe::LogLevel::Debug, 'd', format, args...);
xe::logging::AppendLogLineFormat(xe::LogSrc::Uncategorized,
xe::LogLevel::Debug, 'd', format, args...);
}
template <typename... Args>
void XELOGCPU(const char* format, const Args&... args) {
xe::logging::AppendLogLineFormat(xe::LogLevel::Info, 'C', format, args...);
xe::logging::AppendLogLineFormat(xe::LogSrc::Cpu, xe::LogLevel::Info, 'C',
format, args...);
}
template <typename... Args>
void XELOGAPU(const char* format, const Args&... args) {
xe::logging::AppendLogLineFormat(xe::LogLevel::Debug, 'A', format, args...);
xe::logging::AppendLogLineFormat(xe::LogSrc::Apu, xe::LogLevel::Debug, 'A',
format, args...);
}
template <typename... Args>
void XELOGGPU(const char* format, const Args&... args) {
xe::logging::AppendLogLineFormat(xe::LogLevel::Debug, 'G', format, args...);
xe::logging::AppendLogLineFormat(xe::LogSrc::Uncategorized,
xe::LogLevel::Debug, 'G', format, args...);
}
template <typename... Args>
void XELOGKERNEL(const char* format, const Args&... args) {
xe::logging::AppendLogLineFormat(xe::LogLevel::Info, 'K', format, args...);
xe::logging::AppendLogLineFormat(xe::LogSrc::Kernel, xe::LogLevel::Info, 'K',
format, args...);
}
template <typename... Args>
void XELOGFS(const char* format, const Args&... args) {
xe::logging::AppendLogLineFormat(xe::LogLevel::Info, 'F', format, args...);
xe::logging::AppendLogLineFormat(xe::LogSrc::Uncategorized,
xe::LogLevel::Info, 'F', format, args...);
}
#else

View File

@ -28,7 +28,14 @@ class Processor;
namespace xe {
namespace cpu {
namespace backend {
static constexpr uint32_t MAX_GUEST_PSEUDO_STACKTRACE_ENTRIES = 32;
struct GuestPseudoStackTrace {
uint32_t count;
uint32_t truncated_flag; // set to 1 if there were more than
// MAX_GUEST_PSEUDO_STACKTRACE_ENTRIES entries.
uint32_t return_addrs[MAX_GUEST_PSEUDO_STACKTRACE_ENTRIES];
};
class Assembler;
class CodeCache;
@ -84,6 +91,10 @@ class Backend {
* */
virtual void PrepareForReentry(void* ctx) {}
// returns true if populated st
virtual bool PopulatePseudoStacktrace(GuestPseudoStackTrace* st) {
return false;
}
protected:
Processor* processor_ = nullptr;
MachineInfo machine_info_;

View File

@ -1131,6 +1131,40 @@ void X64Backend::SetGuestRoundingMode(void* ctx, unsigned int mode) {
((ppc::PPCContext*)ctx)->fpscr.bits.rn = control;
}
bool X64Backend::PopulatePseudoStacktrace(GuestPseudoStackTrace* st) {
if (!cvars::enable_host_guest_stack_synchronization) {
return false;
}
ThreadState* thrd_state = ThreadState::Get();
if (!thrd_state) {
return false; // we're not a guest!
}
ppc::PPCContext* ctx = thrd_state->context();
X64BackendContext* backend_ctx = BackendContextForGuestContext(ctx);
uint32_t depth = backend_ctx->current_stackpoint_depth - 1;
if (static_cast<int32_t>(depth) < 1) {
return false;
}
uint32_t num_entries_to_populate =
std::min(MAX_GUEST_PSEUDO_STACKTRACE_ENTRIES, depth);
st->count = num_entries_to_populate;
st->truncated_flag = num_entries_to_populate < depth ? 1 : 0;
X64BackendStackpoint* current_stackpoint =
&backend_ctx->stackpoints[backend_ctx->current_stackpoint_depth - 1];
for (uint32_t stp_index = 0; stp_index < num_entries_to_populate;
++stp_index) {
st->return_addrs[stp_index] = current_stackpoint->guest_return_address_;
current_stackpoint--;
}
return true;
}
#if XE_X64_PROFILER_AVAILABLE == 1
uint64_t* X64Backend::GetProfilerRecordForFunction(uint32_t guest_address) {
// who knows, we might want to compile different versions of a function one

View File

@ -145,7 +145,7 @@ class X64Backend : public Backend {
reinterpret_cast<intptr_t>(ctx) - sizeof(X64BackendContext));
}
virtual void SetGuestRoundingMode(void* ctx, unsigned int mode) override;
virtual bool PopulatePseudoStacktrace(GuestPseudoStackTrace* st) override;
void RecordMMIOExceptionForGuestInstruction(void* host_address);
#if XE_X64_PROFILER_AVAILABLE == 1
uint64_t* GetProfilerRecordForFunction(uint32_t guest_address);

View File

@ -34,11 +34,12 @@ bool trace_enabled = true;
#define IFLUSH()
#define IPRINT(s) \
if (trace_enabled && THREAD_MATCH) \
xe::logging::AppendLogLine(xe::LogLevel::Debug, 't', s)
xe::logging::AppendLogLine(xe::LogLevel::Debug, 't', s, xe::LogSrc::Cpu)
#define DFLUSH()
#define DPRINT(...) \
if (trace_enabled && THREAD_MATCH) \
xe::logging::AppendLogLineFormat(xe::LogLevel::Debug, 't', __VA_ARGS__)
xe::logging::AppendLogLineFormat(xe::LogSrc::Cpu, xe::LogLevel::Debug, 't', \
__VA_ARGS__)
uint32_t GetTracingMode() {
uint32_t mode = 0;

View File

@ -13,14 +13,37 @@
#include "third_party/fmt/include/fmt/format.h"
#include "xenia/base/byte_stream.h"
#include "xenia/base/cvar.h"
#include "xenia/base/logging.h"
#include "xenia/base/profiling.h"
#include "xenia/gpu/gpu_flags.h"
#include "xenia/gpu/graphics_system.h"
#include "xenia/gpu/packet_disassembler.h"
#include "xenia/gpu/sampler_info.h"
#include "xenia/gpu/texture_info.h"
#include "xenia/kernel/kernel_state.h"
#include "xenia/kernel/user_module.h"
#if !defined(NDEBUG)
#define XE_ENABLE_GPU_REG_WRITE_LOGGING 1
#endif
DEFINE_bool(
log_guest_driven_gpu_register_written_values, false,
"Only does anything in debug builds, if set will log every write to a gpu "
"register done by a guest. Does not log writes that are done by the CP on "
"its own, just ones the guest makes or instructs it to make.",
"GPU");
DEFINE_bool(disassemble_pm4, false,
"Only does anything in debug builds, if set will disassemble and "
"log all PM4 packets sent to the CP.",
"GPU");
DEFINE_bool(
log_ringbuffer_kickoff_initiator_bts, false,
"Only does anything in debug builds, if set will log the pseudo-stacktrace "
"of the guest thread that wrote the new read position.",
"GPU");
namespace xe {
namespace gpu {
@ -234,6 +257,7 @@ void CommandProcessor::WorkerThreadMain() {
// TODO(benvanik): use reader->Read_update_freq_ and only issue after moving
// that many indices.
// Keep in mind that the gpu also updates the cpu-side copy if the write pointer and read pointer would be equal
if (read_ptr_writeback_ptr_) {
xe::store_and_swap<uint32_t>(
memory_->TranslatePhysical(read_ptr_writeback_ptr_), read_ptr_index_);
@ -320,10 +344,94 @@ void CommandProcessor::EnableReadPointerWriteBack(uint32_t ptr,
read_ptr_update_freq_ = uint32_t(1) << block_size_log2 >> 2;
}
XE_NOINLINE XE_COLD void CommandProcessor::LogKickoffInitator(uint32_t value) {
cpu::backend::GuestPseudoStackTrace st;
if (logging::internal::ShouldLog(LogLevel::Debug) && kernel_state_->processor()
->backend()
->PopulatePseudoStacktrace(&st)) {
logging::LoggerBatch<LogLevel::Debug> log_initiator{};
log_initiator("Updating read ptr to {}, initiator stacktrace below\n",
value);
for (uint32_t i = 0; i < st.count; ++i) {
log_initiator("\t{:08X}\n", st.return_addrs[i]);
}
if (st.truncated_flag) {
log_initiator("\t(Truncated stacktrace to {} entries)\n",
cpu::backend::MAX_GUEST_PSEUDO_STACKTRACE_ENTRIES);
}
log_initiator.submit('d');
}
}
void CommandProcessor::UpdateWritePointer(uint32_t value) {
XE_UNLIKELY_IF (cvars::log_ringbuffer_kickoff_initiator_bts) {
LogKickoffInitator(value);
}
write_ptr_index_ = value;
write_ptr_index_event_->SetBoostPriority();
}
void CommandProcessor::LogRegisterSet(uint32_t register_index, uint32_t value) {
#if XE_ENABLE_GPU_REG_WRITE_LOGGING == 1
if (cvars::log_guest_driven_gpu_register_written_values && logging::internal::ShouldLog(LogLevel::Debug)) {
const RegisterInfo* reginfo = RegisterFile::GetRegisterInfo(register_index);
if (!reginfo) {
XELOGD("Unknown_Reg{:04X} <- {:08X}\n", register_index, value);
} else {
XELOGD("{} <- {:08X}\n", reginfo->name, value);
}
}
#endif
}
void CommandProcessor::LogRegisterSets(uint32_t base_register_index,
const uint32_t* values,
uint32_t n_values) {
#if XE_ENABLE_GPU_REG_WRITE_LOGGING == 1
if (cvars::log_guest_driven_gpu_register_written_values &&
logging::internal::ShouldLog(LogLevel::Debug)) {
auto target = logging::internal::GetThreadBuffer();
auto target_ptr = target.first;
size_t total_size = 0;
size_t rem_size = target.second;
for (uint32_t i = 0; i < n_values; ++i) {
uint32_t register_index = base_register_index + i;
uint32_t value = xe::load_and_swap<uint32_t>(&values[i]);
const RegisterInfo* reginfo =
RegisterFile::GetRegisterInfo(register_index);
if (!reginfo) {
auto tmpres = fmt::format_to_n(target_ptr, rem_size,
"Unknown_Reg{:04X} <- {:08X}\n",
register_index, value);
target_ptr = tmpres.out;
rem_size -= tmpres.size;
total_size += tmpres.size;
} else {
auto tmpres = fmt::format_to_n(target_ptr, rem_size, "{} <- {:08X}\n",
reginfo->name, value);
rem_size -= tmpres.size;
target_ptr = tmpres.out;
total_size += tmpres.size;
}
}
logging::internal::AppendLogLine(LogLevel::Debug, 'd', total_size);
}
#endif
}
void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index,
uint32_t value) {
RegisterFile& regs = *register_file_;

View File

@ -131,7 +131,9 @@ class CommandProcessor {
void UpdateWritePointer(uint32_t value);
void LogRegisterSet(uint32_t register_index, uint32_t value);
void LogRegisterSets(uint32_t base_register_index, const uint32_t* values,
uint32_t n_values);
bool is_paused() const { return paused_; }
void Pause();
@ -296,6 +298,8 @@ class CommandProcessor {
reg::DC_LUT_30_COLOR gamma_ramp_256_entry_table_[256] = {};
reg::DC_LUT_PWL_DATA gamma_ramp_pwl_rgb_[128][3] = {};
uint32_t gamma_ramp_rw_component_ = 0;
XE_NOINLINE XE_COLD void LogKickoffInitator(uint32_t value);
};
} // namespace gpu

View File

@ -24,6 +24,7 @@
#include "xenia/gpu/gpu_flags.h"
#include "xenia/gpu/registers.h"
#include "xenia/gpu/xenos.h"
#include "xenia/gpu/packet_disassembler.h"
#include "xenia/ui/d3d12/d3d12_presenter.h"
#include "xenia/ui/d3d12/d3d12_util.h"
@ -1816,6 +1817,7 @@ void D3D12CommandProcessor::WriteRegisterForceinline(uint32_t index,
}
// todo: bit-pack the bools and use bitarith to reduce branches
void D3D12CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
LogRegisterSet(index, value);
WriteRegisterForceinline(index, value);
}
@ -2073,7 +2075,7 @@ XE_FORCEINLINE void
D3D12CommandProcessor::WriteRegisterRangeFromMem_WithKnownBound(
uint32_t start_index, uint32_t* base, uint32_t num_registers) {
uint32_t end = start_index + num_registers;
LogRegisterSets(start_index, base, num_registers);
uint32_t current_index = start_index;
auto get_end_before_qty = [&end, current_index](uint32_t regnum) {

View File

@ -26,4 +26,6 @@ DECLARE_bool(half_pixel_offset);
DECLARE_int32(query_occlusion_fake_sample_count);
DECLARE_bool(disassemble_pm4);
#endif // XENIA_GPU_GPU_FLAGS_H_

View File

@ -36,6 +36,31 @@ struct PacketAction {
kRegisterWrite,
kSetBinMask,
kSetBinSelect,
kSetBinMaskLo,
kSetBinMaskHi,
kSetBinSelectLo,
kSetBinSelectHi,
kMeInit,
kGenInterrupt,
kWaitRegMem,
kRegRmw,
kCondWrite,
kEventWrite,
kEventWriteSHD,
kEventWriteExt,
kDrawIndx,
kDrawIndx2,
kInvalidateState,
kImLoad,
kImLoadImmediate,
kContextUpdate,
kWaitForIdle,
kVizQuery,
kEventWriteZPD,
kMemWrite,
kRegToMem,
kIndirBuffer,
kXeSwap
};
Type type;
@ -50,8 +75,121 @@ struct PacketAction {
struct {
uint64_t value;
} set_bin_select;
struct {
uint32_t cpu_mask;
} gen_interrupt;
struct {
uint32_t wait_info;
uint32_t poll_reg_addr;
uint32_t ref;
uint32_t mask;
uint32_t wait;
} wait_reg_mem;
struct {
uint32_t rmw_info, and_mask, or_mask;
} reg_rmw;
struct {
uint32_t wait_info;
uint32_t poll_reg_addr;
uint32_t ref;
uint32_t mask;
uint32_t write_reg_addr;
uint32_t write_data;
} cond_write;
struct {
uint32_t initiator;
} event_write;
struct {
uint32_t initiator, address, value;
} event_write_shd;
struct {
uint32_t unk0, unk1;
} event_write_ext;
struct {
uint32_t initiator;
} event_write_zpd;
struct {
uint32_t dword0, dword1, index_count;
xenos::PrimitiveType prim_type;
uint32_t src_sel;
uint32_t guest_base;
uint32_t index_size;
xenos::Endian endianness;
} draw_indx;
struct {
uint32_t dword0;
uint32_t index_count;
xenos::PrimitiveType prim_type;
uint32_t src_sel;
uint32_t indices_size;
} draw_indx2;
struct {
uint32_t state_mask;
} invalidate_state;
struct {
xenos::ShaderType shader_type;
uint32_t addr;
uint32_t start;
uint32_t size_dwords;
} im_load;
struct {
xenos::ShaderType shader_type;
uint32_t start;
uint32_t size_dwords;
} im_load_imm;
// for bin select hi/lo binmask hi/lo
struct {
uint32_t value;
} lohi_op;
struct {
// i think waitforidle is just being pad to a qword
uint32_t probably_unused;
} wait_for_idle;
struct {
uint32_t maybe_unused;
} context_update;
struct {
uint32_t dword0;
uint32_t id;
bool end;
} vizquery;
struct {
uint32_t addr;
xenos::Endian endianness;
} mem_write;
struct {
uint32_t reg_addr;
uint32_t mem_addr;
xenos::Endian endianness;
} reg2mem;
struct {
uint32_t frontbuffer_ptr;
} xe_swap;
struct {
uint32_t list_ptr;
uint32_t list_length;
} indir_buffer;
};
std::vector<uint32_t> words;
static PacketAction RegisterWrite(uint32_t index, uint32_t value) {
PacketAction action;
action.type = Type::kRegisterWrite;
@ -73,6 +211,32 @@ struct PacketAction {
action.set_bin_select.value = value;
return action;
}
void InjectBeWords(const uint32_t* values, uint32_t count) {
for (unsigned i = 0; i < count; ++i) {
words.push_back(xe::load_and_swap<uint32_t>(&values[i]));
}
}
void InjectBeHalfwordsAsWords(const uint16_t* values, uint32_t count) {
for (unsigned i = 0; i < count; ++i) {
words.push_back(
static_cast<uint32_t>(xe::load_and_swap<uint16_t>(&values[i])));
}
}
static PacketAction MeInit(const uint32_t* values, uint32_t count) {
PacketAction action;
action.type = Type::kMeInit;
action.InjectBeWords(values, count);
return action;
}
static PacketAction Interrupt(uint32_t cpu_mask) {
PacketAction action;
action.type = Type::kGenInterrupt;
action.gen_interrupt.cpu_mask = cpu_mask;
return action;
}
};
struct PacketInfo {

View File

@ -15,7 +15,9 @@ public:
void ExecutePacket(uint32_t ptr, uint32_t count);
protected:
XE_NOINLINE
XE_NOINLINE
void DisassembleCurrentPacket() XE_RESTRICT;
XE_NOINLINE
bool ExecutePacketType0( uint32_t packet) XE_RESTRICT;
XE_NOINLINE
bool ExecutePacketType1( uint32_t packet) XE_RESTRICT;

View File

@ -1,4 +1,9 @@
#pragma once
#if !defined(NDEBUG)
#define XE_ENABLE_PM4_DISASM 1
#endif
using namespace xe::gpu::xenos;
void COMMAND_PROCESSOR::ExecuteIndirectBuffer(uint32_t ptr,
uint32_t count) XE_RESTRICT {
@ -35,8 +40,237 @@ void COMMAND_PROCESSOR::ExecuteIndirectBuffer(uint32_t ptr,
return;
}
}
XE_NOINLINE
static void LOGU32s(logging::LoggerBatch<LogLevel::Debug>& logger,
const std::vector<uint32_t>& values) {
bool first = true;
#if 0
XELOGD("[ ");
for (auto&& val : values) {
if (first) {
XELOGD("0x{:08X}", val);
first = false;
} else {
XELOGD(", 0x{:08X}", val);
}
}
XELOGD(" ]");
#else
for (auto&& val : values) {
if (first) {
logger("0x{:08X}", val);
first = false;
} else {
logger(", 0x{:08X}", val);
}
}
#endif
}
std::string GenerateRegnameForPm4Print(uint32_t reg) {
auto reg_info = RegisterFile::GetRegisterInfo(reg);
if (reg_info) {
return reg_info->name;
} else {
return fmt::format("Unknown_Reg_{:04X}", reg);
}
}
XE_NOINLINE
void COMMAND_PROCESSOR::DisassembleCurrentPacket() XE_RESTRICT {
xe::gpu::PacketInfo packet_info;
logging::LoggerBatch<LogLevel::Debug> logger{};
if (PacketDisassembler::DisasmPacket(reader_.buffer() + reader_.read_offset(),
&packet_info)) {
logger("CP - {}, count {}, predicated = {}\n", packet_info.type_info->name,
packet_info.count, packet_info.predicated);
#define LOG_ACTION_FIELD(__type, name) \
logger("\t" #name " = {:08X}\n", action.__type.name)
#define LOG_ACTION_FIELD_DEC(__type, name) \
logger("\t" #name " = {}\n", action.__type.name)
#define LOG_ENDIANNESS(__type, name) \
logger("\t" #name " = {}\n", \
xenos::GetEndianEnglishDescription(action.__type.name))
#define LOG_PRIMTYPE(__type, name) \
logger("\t" #name " = {}\n", \
xenos::GetPrimitiveTypeEnglishDescription(action.__type.name))
for (auto&& action : packet_info.actions) {
using PType = PacketAction::Type;
switch (action.type) {
case PType::kRegisterWrite:
break;
case PType::kSetBinMask:
logger("\tSetBinMask {}\n", action.set_bin_mask.value);
break;
case PType::kSetBinSelect:
logger("\tSetBinSelect {}\n", action.set_bin_select.value);
break;
case PType::kMeInit:
logger("\tMeInit - ");
LOGU32s(logger, action.words);
logger("\n");
break;
case PType::kGenInterrupt:
logger("\tGenInterrupt for cpu mask {:04X}\n",
action.gen_interrupt.cpu_mask);
break;
case PType::kSetBinMaskHi:
case PType::kSetBinMaskLo:
case PType::kSetBinSelectHi:
case PType::kSetBinSelectLo:
LOG_ACTION_FIELD(lohi_op, value);
break;
case PType::kWaitRegMem:
LOG_ACTION_FIELD(wait_reg_mem, wait_info);
LOG_ACTION_FIELD(wait_reg_mem, poll_reg_addr);
LOG_ACTION_FIELD(wait_reg_mem, ref);
LOG_ACTION_FIELD(wait_reg_mem, mask);
LOG_ACTION_FIELD(wait_reg_mem, wait);
break;
case PType::kRegRmw: {
uint32_t rmw_info = action.reg_rmw.rmw_info;
uint32_t and_mask = action.reg_rmw.and_mask;
uint32_t or_mask = action.reg_rmw.or_mask;
uint32_t and_mask_is_reg = (rmw_info >> 31) & 0x1;
uint32_t or_mask_is_reg = (rmw_info >> 30) & 0x1;
std::string and_mask_str;
if (and_mask_is_reg) {
and_mask_str = GenerateRegnameForPm4Print(and_mask & 0x1FFF);
} else {
and_mask_str = fmt::format("0x{:08X}", and_mask);
}
std::string or_mask_str;
if (or_mask_is_reg) {
or_mask_str = GenerateRegnameForPm4Print(or_mask & 0x1FFF);
} else {
or_mask_str = fmt::format("0x{:08X}", or_mask);
}
std::string dest = GenerateRegnameForPm4Print(rmw_info & 0x1FFF);
logger("\t{} = ({} & {}) | {}\n", dest, dest, and_mask_str,
or_mask_str);
LOG_ACTION_FIELD(reg_rmw, rmw_info);
LOG_ACTION_FIELD(reg_rmw, and_mask);
LOG_ACTION_FIELD(reg_rmw, or_mask);
break;
}
case PType::kCondWrite:
LOG_ACTION_FIELD(cond_write, wait_info);
LOG_ACTION_FIELD(cond_write, poll_reg_addr);
LOG_ACTION_FIELD(cond_write, ref);
LOG_ACTION_FIELD(cond_write, mask);
LOG_ACTION_FIELD(cond_write, write_reg_addr);
LOG_ACTION_FIELD(cond_write, write_data);
break;
case PType::kEventWrite:
LOG_ACTION_FIELD(event_write, initiator);
break;
case PType::kEventWriteSHD:
LOG_ACTION_FIELD(event_write_shd, initiator);
LOG_ACTION_FIELD(event_write_shd, address);
LOG_ACTION_FIELD(event_write_shd, value);
break;
case PType::kEventWriteExt:
LOG_ACTION_FIELD(event_write_ext, unk0);
LOG_ACTION_FIELD(event_write_ext, unk1);
break;
case PType::kDrawIndx:
LOG_ACTION_FIELD(draw_indx, dword0);
LOG_ACTION_FIELD(draw_indx, dword1);
LOG_ACTION_FIELD_DEC(draw_indx, index_count);
LOG_PRIMTYPE(draw_indx, prim_type);
LOG_ACTION_FIELD(draw_indx, src_sel);
LOG_ACTION_FIELD(draw_indx, guest_base);
LOG_ACTION_FIELD_DEC(draw_indx, index_size);
LOG_ENDIANNESS(draw_indx, endianness);
break;
case PType::kDrawIndx2:
LOG_ACTION_FIELD(draw_indx2, dword0);
LOG_ACTION_FIELD_DEC(draw_indx2, index_count);
LOG_PRIMTYPE(draw_indx2, prim_type);
LOG_ACTION_FIELD(draw_indx2, src_sel);
LOG_ACTION_FIELD_DEC(draw_indx2, indices_size);
logger("Indices = ");
LOGU32s(logger, action.words);
logger("\n");
break;
case PType::kInvalidateState:
LOG_ACTION_FIELD(invalidate_state, state_mask);
break;
case PType::kImLoad:
LOG_ACTION_FIELD(im_load, shader_type);
LOG_ACTION_FIELD(im_load, addr);
LOG_ACTION_FIELD(im_load, start);
LOG_ACTION_FIELD_DEC(im_load, size_dwords);
break;
case PType::kImLoadImmediate:
LOG_ACTION_FIELD_DEC(im_load_imm, shader_type);
LOG_ACTION_FIELD(im_load_imm, start);
LOG_ACTION_FIELD_DEC(im_load_imm, size_dwords);
logger("Shader instruction words = ");
LOGU32s(logger, action.words);
logger("\n");
break;
case PType::kWaitForIdle:
LOG_ACTION_FIELD(wait_for_idle, probably_unused);
break;
case PType::kContextUpdate:
LOG_ACTION_FIELD(context_update, maybe_unused);
break;
case PType::kVizQuery:
LOG_ACTION_FIELD_DEC(vizquery, id);
LOG_ACTION_FIELD_DEC(vizquery, end);
LOG_ACTION_FIELD(vizquery, dword0);
break;
case PType::kEventWriteZPD:
LOG_ACTION_FIELD(event_write_zpd, initiator);
break;
case PType::kMemWrite:
LOG_ACTION_FIELD(mem_write, addr);
LOG_ENDIANNESS(mem_write, endianness);
logger("Values to write (with GpuSwap pre-applied) = ");
LOGU32s(logger, action.words);
logger("\n");
break;
case PType::kRegToMem:
logger("{}\n", GenerateRegnameForPm4Print(action.reg2mem.reg_addr));
LOG_ACTION_FIELD(reg2mem, mem_addr);
LOG_ENDIANNESS(reg2mem, endianness);
break;
case PType::kIndirBuffer:
LOG_ACTION_FIELD(indir_buffer, list_ptr);
LOG_ACTION_FIELD_DEC(indir_buffer, list_length);
break;
case PType::kXeSwap:
LOG_ACTION_FIELD(xe_swap, frontbuffer_ptr);
break;
}
}
} else {
logger("Unknown packet! Failed to disassemble.\n");
}
logger.submit('d');
}
bool COMMAND_PROCESSOR::ExecutePacket() {
#if XE_ENABLE_PM4_DISASM == 1
if (cvars::disassemble_pm4 && logging::internal::ShouldLog(LogLevel::Debug)) {
COMMAND_PROCESSOR::DisassembleCurrentPacket();
}
#endif
const uint32_t packet = reader_.ReadAndSwap<uint32_t>();
const uint32_t packet_type = packet >> 30;

View File

@ -19,6 +19,8 @@ XE_GPU_REGISTER(0x0048, kDword, BIF_PERFCOUNTER0_SELECT)
XE_GPU_REGISTER(0x0049, kDword, BIF_PERFCOUNTER0_HI)
XE_GPU_REGISTER(0x004A, kDword, BIF_PERFCOUNTER0_LOW)
XE_GPU_REGISTER(0x1C5, kDword, CP_RB_WPTR)
XE_GPU_REGISTER(0x01DD, kDword, SCRATCH_ADDR)
XE_GPU_REGISTER(0x01DC, kDword, SCRATCH_UMSK)
@ -34,19 +36,31 @@ XE_GPU_REGISTER(0x0398, kDword, RBBM_PERFCOUNTER0_HI)
XE_GPU_REGISTER(0x0399, kDword, RBBM_PERFCOUNTER1_LOW)
XE_GPU_REGISTER(0x039A, kDword, RBBM_PERFCOUNTER1_HI)
//XAM reads this directly and stores it to a struct, have not tracked where it goes from there
//PM4 command PM4_MEM_WRITE_CNTR is supposed to write this to memory
//XE_GPU_REGISTER(0x44b, kDword,CP_PROG_COUNTER )
XE_GPU_REGISTER(0x045E, kDword, CALLBACK_ACK)
XE_GPU_REGISTER(0x0578, kDword, SCRATCH_REG0) // interrupt sync
XE_GPU_REGISTER(0x0579, kDword, SCRATCH_REG1) // present interval
XE_GPU_REGISTER(0x057A, kDword, SCRATCH_REG2)
XE_GPU_REGISTER(0x057B, kDword, SCRATCH_REG3)
XE_GPU_REGISTER(0x057C, kDword, CALLBACK_ADDRESS)
XE_GPU_REGISTER(0x057D, kDword, CALLBACK_CONTEXT)
XE_GPU_REGISTER(0x057C, kDword, SCRATCH_REG4) //originally this was named CALLBACK_ADDRESS, but that didnt make sense
XE_GPU_REGISTER(0x057D, kDword, SCRATCH_REG5)
XE_GPU_REGISTER(0x057E, kDword, SCRATCH_REG6)
XE_GPU_REGISTER(0x057F, kDword, SCRATCH_REG7)
XE_GPU_REGISTER(0x05C8, kDword, WAIT_UNTIL)
//update count = 6 bit field, bits 8- 14
//there are several other fields here, they have an unknown purpose
//XE_GPU_REGISTER(0x704, kDword, CP_RB_CNTL)
//address must be 4 byte aligned
//low 2 bits encode two different fields?
//xboxkrnl just does addr |2 when assigning
//XE_GPU_REGISTER(0x70C, kDword, CP_RB_RPTR_ADDR)
XE_GPU_REGISTER(0x0815, kDword, MC0_PERFCOUNTER0_SELECT)
XE_GPU_REGISTER(0x0816, kDword, MC0_PERFCOUNTER0_HI)
XE_GPU_REGISTER(0x0817, kDword, MC0_PERFCOUNTER0_LOW)
@ -142,7 +156,7 @@ XE_GPU_REGISTER(0x0DD8, kDword, SX_PERFCOUNTER0_LOW)
XE_GPU_REGISTER(0x0DD9, kDword, SX_PERFCOUNTER0_HI)
// Set with WAIT_UNTIL = WAIT_3D_IDLECLEAN
XE_GPU_REGISTER(0x0E00, kDword, UNKNOWN_0E00)
XE_GPU_REGISTER(0x0E00, kDword, TC_CNTL_STATUS)
XE_GPU_REGISTER(0x0E05, kDword, TCR_PERFCOUNTER0_SELECT)
XE_GPU_REGISTER(0x0E06, kDword, TCR_PERFCOUNTER0_HI)

View File

@ -32,6 +32,7 @@
#include "xenia/gpu/vulkan/vulkan_shader.h"
#include "xenia/gpu/vulkan/vulkan_shared_memory.h"
#include "xenia/gpu/xenos.h"
#include "xenia/gpu/packet_disassembler.h"
#include "xenia/kernel/kernel_state.h"
#include "xenia/kernel/user_module.h"
#include "xenia/ui/vulkan/vulkan_presenter.h"

View File

@ -206,7 +206,39 @@ const char* GetDepthRenderTargetFormatName(DepthRenderTargetFormat format) {
return "kUnknown";
}
}
static const char* const g_endian_names[] = {"none", "8 in 16", "8 in 32",
"16 in 32"};
const char* GetEndianEnglishDescription(xenos::Endian endian) {
return g_endian_names[static_cast<uint32_t>(endian)];
}
static const char* const g_primtype_human_names[] = {"none",
"point list",
"line list",
"line strip",
"triangle list",
"triangle fan",
"triangle strip",
"triangle with flags",
"rectangle list",
"unused1",
"unused2",
"unused3",
"line loop",
"quad list",
"quad strip",
"polygon",
"2D copy rect list v0",
"2D copy rect list v1",
"2D copy rect list v2",
"2D copy rect list v3",
"2D fillrect list",
"2D line strip",
"2D triangle strip"};
const char* GetPrimitiveTypeEnglishDescription(xenos::PrimitiveType prim_type) {
return g_primtype_human_names[static_cast<uint32_t>(prim_type)];
}
} // namespace xenos
} // namespace gpu
} // namespace xe

View File

@ -48,6 +48,9 @@ enum class PrimitiveType : uint32_t {
kTriangleStrip = 0x06,
kTriangleWithWFlags = 0x07,
kRectangleList = 0x08,
kUnused1 = 0x09,
kUnused2 = 0x0A,
kUnused3 = 0x0B,
kLineLoop = 0x0C,
kQuadList = 0x0D,
kQuadStrip = 0x0E,
@ -1596,7 +1599,11 @@ inline uint32_t MakePacketType3(Type3Opcode opcode, uint16_t count,
return (3u << 30) | (((count - 1) & 0x3FFF) << 16) | ((opcode & 0x7F) << 8) |
(predicate ? 1 : 0);
}
/*
* pretty english descriptions of enumeration values for the packet disassembler
*/
const char* GetEndianEnglishDescription(xenos::Endian endian);
const char* GetPrimitiveTypeEnglishDescription(xenos::PrimitiveType prim_type);
} // namespace xenos
} // namespace gpu
} // namespace xe

View File

@ -521,10 +521,10 @@ XE_NOALIAS void PrintKernelCall(cpu::Export* export_entry,
string_buffer.Append(')');
if (export_entry->tags & xe::cpu::ExportTag::kImportant) {
xe::logging::AppendLogLine(xe::LogLevel::Info, 'i',
string_buffer.to_string_view());
string_buffer.to_string_view(), LogSrc::Kernel);
} else {
xe::logging::AppendLogLine(xe::LogLevel::Debug, 'd',
string_buffer.to_string_view());
string_buffer.to_string_view(), LogSrc::Kernel);
}
}
/*