From 7e58a3b3207bfaec763d933d917d14b40df3f55a Mon Sep 17 00:00:00 2001 From: "chss95cs@gmail.com" Date: Thu, 29 Sep 2022 07:04:17 -0700 Subject: [PATCH 1/3] Fix compiler errors i introduced under clang-cl remove xe_kernel_export_shim_fn field of Export function_data, trampoline is now the only way exports get invoked Remove kernelstate argument from string functions in order to conform to the trampoline signature (the argument was unused anyway) Constant-evaluated initialization of ppc_opcode_disasm_table, removal of unused std::vector fields Constant-evaluated initialization of export tables name field on export is just a const char* now, only immutable static strings are ever passed to it Remove unused callcount field of export. PM4 compare op function extracted Globally apply /Oy, /GS-, /Gw on msvc windows Remove imgui testwindow code call, it took up like 300 kb --- premake5.lua | 6 ++ src/xenia/apu/conversion.h | 32 ++----- src/xenia/base/dma.cc | 38 +++----- src/xenia/base/memory.h | 10 +- src/xenia/base/platform_amd64.cc | 2 +- src/xenia/cpu/export_resolver.cc | 2 +- src/xenia/cpu/export_resolver.h | 66 ++++++------- src/xenia/cpu/ppc/ppc_opcode_disasm_gen.cc | 6 +- src/xenia/cpu/ppc/ppc_opcode_info.h | 8 +- src/xenia/cpu/xex_module.cc | 9 +- src/xenia/debug/ui/debug_window.cc | 3 +- src/xenia/gpu/command_processor.cc | 8 -- src/xenia/gpu/command_processor.h | 10 -- .../gpu/pm4_command_processor_implement.h | 95 +++++++------------ src/xenia/kernel/kernel_module.cc | 8 +- src/xenia/kernel/user_module.cc | 2 +- src/xenia/kernel/util/shim_utils.h | 1 - src/xenia/kernel/xam/xam_input.cc | 2 +- src/xenia/kernel/xam/xam_module.cc | 16 ++-- src/xenia/kernel/xboxkrnl/xboxkrnl_strings.cc | 20 ++-- 20 files changed, 140 insertions(+), 204 deletions(-) diff --git a/premake5.lua b/premake5.lua index 0371cbee9..71efb4a85 100644 --- a/premake5.lua +++ b/premake5.lua @@ -62,6 +62,12 @@ filter({"configurations:Checked", "platforms:Linux"}) defines({ "_GLIBCXX_DEBUG", -- libstdc++ debug mode }) +filter({"configurations:Release", "platforms:Windows"}) + buildoptions({ + "/Gw", + "/GS-", + "/Oy" + }) filter("configurations:Debug") runtime("Release") diff --git a/src/xenia/apu/conversion.h b/src/xenia/apu/conversion.h index 0f807d67b..da9e761f3 100644 --- a/src/xenia/apu/conversion.h +++ b/src/xenia/apu/conversion.h @@ -21,29 +21,7 @@ namespace conversion { #if XE_ARCH_AMD64 -#if 0 -inline void sequential_6_BE_to_interleaved_6_LE(float* output, - const float* input, - size_t ch_sample_count) { - const uint32_t* in = reinterpret_cast(input); - uint32_t* out = reinterpret_cast(output); - const __m128i byte_swap_shuffle = - _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3); - for (size_t sample = 0; sample < ch_sample_count; sample++) { - __m128i sample0 = _mm_set_epi32( - in[3 * ch_sample_count + sample], in[2 * ch_sample_count + sample], - in[1 * ch_sample_count + sample], in[0 * ch_sample_count + sample]); - uint32_t sample1 = in[4 * ch_sample_count + sample]; - uint32_t sample2 = in[5 * ch_sample_count + sample]; - sample0 = _mm_shuffle_epi8(sample0, byte_swap_shuffle); - _mm_storeu_si128(reinterpret_cast<__m128i*>(&out[sample * 6]), sample0); - sample1 = xe::byte_swap(sample1); - out[sample * 6 + 4] = sample1; - sample2 = xe::byte_swap(sample2); - out[sample * 6 + 5] = sample2; - } -} -#else + XE_NOINLINE static void _generic_sequential_6_BE_to_interleaved_6_LE( float* XE_RESTRICT output, const float* XE_RESTRICT input, @@ -58,6 +36,8 @@ static void _generic_sequential_6_BE_to_interleaved_6_LE( } } } +#if XE_COMPILER_CLANG_CL != 1 +// load_be_u32 unavailable on clang-cl XE_NOINLINE static void _movbe_sequential_6_BE_to_interleaved_6_LE( float* XE_RESTRICT output, const float* XE_RESTRICT input, @@ -80,7 +60,13 @@ inline static void sequential_6_BE_to_interleaved_6_LE( ch_sample_count); } } +#else +inline static void sequential_6_BE_to_interleaved_6_LE( + float* output, const float* input, unsigned ch_sample_count) { + _generic_sequential_6_BE_to_interleaved_6_LE(output, input, ch_sample_count); +} #endif + inline void sequential_6_BE_to_interleaved_2_LE(float* output, const float* input, size_t ch_sample_count) { diff --git a/src/xenia/base/dma.cc b/src/xenia/base/dma.cc index 664f96054..520cbdbc3 100644 --- a/src/xenia/base/dma.cc +++ b/src/xenia/base/dma.cc @@ -2,7 +2,6 @@ #include "logging.h" #include "mutex.h" #include "platform_win.h" -#include "xbyak/xbyak/xbyak_util.h" XE_NTDLL_IMPORT(NtDelayExecution, cls_NtDelayExecution, NtDelayExecutionPointer); @@ -22,7 +21,13 @@ static void xedmaloghelper(const char (&fmt)[N], Ts... args) { #define XEDMALOG(...) static_cast(0) using xe::swcache::CacheLine; static constexpr unsigned NUM_CACHELINES_IN_PAGE = 4096 / sizeof(CacheLine); - +#if defined(__clang__) +XE_FORCEINLINE +static void mvdir64b(void* to, const void* from) { + __asm__("movdir64b %1, %0" : : "r"(to), "m"(*(char*)from) : "memory"); +} +#define _movdir64b mvdir64b +#endif XE_FORCEINLINE static void XeCopy16384StreamingAVX(CacheLine* XE_RESTRICT to, CacheLine* XE_RESTRICT from) { @@ -140,6 +145,7 @@ static void vastcpy_impl_avx(CacheLine* XE_RESTRICT physaddr, xe::swcache::WriteLineNT(physaddr + i, &line0); } } + static void vastcpy_impl_movdir64m(CacheLine* XE_RESTRICT physaddr, CacheLine* XE_RESTRICT rdmapping, uint32_t written_length) { @@ -171,24 +177,6 @@ static void vastcpy_impl_movdir64m(CacheLine* XE_RESTRICT physaddr, } } -static class DMAFeatures { - public: - uint32_t has_fast_rep_movsb : 1; - uint32_t has_movdir64b : 1; - - DMAFeatures() { - unsigned int data[4]; - memset(data, 0, sizeof(data)); - // intel extended features - Xbyak::util::Cpu::getCpuidEx(7, 0, data); - if (data[2] & (1 << 28)) { - has_movdir64b = 1; - } - if (data[1] & (1 << 9)) { - has_fast_rep_movsb = 1; - } - } -} dma_x86_features; XE_COLD static void first_vastcpy(CacheLine* XE_RESTRICT physaddr, CacheLine* XE_RESTRICT rdmapping, @@ -201,7 +189,7 @@ static void first_vastcpy(CacheLine* XE_RESTRICT physaddr, CacheLine* XE_RESTRICT rdmapping, uint32_t written_length) { VastCpyDispatch dispatch_to_use = nullptr; - if (dma_x86_features.has_movdir64b) { + if (amd64::GetFeatureFlags() & amd64::kX64EmitMovdir64M) { XELOGI("Selecting MOVDIR64M vastcpy."); dispatch_to_use = vastcpy_impl_movdir64m; } else { @@ -271,10 +259,10 @@ class XeDMACGeneric : public XeDMAC { virtual void WaitJobDone(DMACJobHandle handle) override { while (WaitForSingleObject((HANDLE)handle, 2) == WAIT_TIMEOUT) { // NtAlertThreadByThreadId.invoke(thrd_->system_id()); - // while (SignalObjectAndWait(gotjob_event, (HANDLE)handle, 2, false) == - // WAIT_TIMEOUT) { - // ; - } + // while (SignalObjectAndWait(gotjob_event, (HANDLE)handle, 2, false) == + // WAIT_TIMEOUT) { + // ; + } //} // SignalObjectAndWait(gotjob_event, (HANDLE)handle, INFINITE, false); diff --git a/src/xenia/base/memory.h b/src/xenia/base/memory.h index b5574b815..b20b33b97 100644 --- a/src/xenia/base/memory.h +++ b/src/xenia/base/memory.h @@ -616,23 +616,23 @@ static void Prefetch(const void* addr) { } template <> -static void Prefetch(const void* addr) { +void Prefetch(const void* addr) { PrefetchW(addr); } template <> -static void Prefetch(const void* addr) { +void Prefetch(const void* addr) { PrefetchNTA(addr); } template <> -static void Prefetch(const void* addr) { +void Prefetch(const void* addr) { PrefetchL3(addr); } template <> -static void Prefetch(const void* addr) { +void Prefetch(const void* addr) { PrefetchL2(addr); } template <> -static void Prefetch(const void* addr) { +void Prefetch(const void* addr) { PrefetchL1(addr); } // todo: does aarch64 have streaming stores/loads? diff --git a/src/xenia/base/platform_amd64.cc b/src/xenia/base/platform_amd64.cc index 7005420e5..e852ea1bf 100644 --- a/src/xenia/base/platform_amd64.cc +++ b/src/xenia/base/platform_amd64.cc @@ -9,7 +9,7 @@ #include "xenia/base/cvar.h" #include "xenia/base/platform.h" - +#define XBYAK_NO_OP_NAMES #include "third_party/xbyak/xbyak/xbyak.h" #include "third_party/xbyak/xbyak/xbyak_util.h" DEFINE_int64(x64_extension_mask, -1LL, diff --git a/src/xenia/cpu/export_resolver.cc b/src/xenia/cpu/export_resolver.cc index b05df5d83..1b60e46a7 100644 --- a/src/xenia/cpu/export_resolver.cc +++ b/src/xenia/cpu/export_resolver.cc @@ -81,7 +81,7 @@ void ExportResolver::SetFunctionMapping(const std::string_view module_name, auto export_entry = GetExportByOrdinal(module_name, ordinal); assert_not_null(export_entry); export_entry->tags |= ExportTag::kImplemented; - export_entry->function_data.shim = shim; + export_entry->function_data.trampoline = (ExportTrampoline)(void*)shim; } void ExportResolver::SetFunctionMapping(const std::string_view module_name, diff --git a/src/xenia/cpu/export_resolver.h b/src/xenia/cpu/export_resolver.h index 216f47548..65884e31f 100644 --- a/src/xenia/cpu/export_resolver.h +++ b/src/xenia/cpu/export_resolver.h @@ -44,57 +44,50 @@ struct ExportTag { // packed like so: // ll...... cccccccc ........ ..bihssi - static const int CategoryShift = 16; + static constexpr int CategoryShift = 16; // Export is implemented in some form and can be used. - static const type kImplemented = 1u << 0; + static constexpr type kImplemented = 1u << 0; // Export is a stub and is probably bad. - static const type kStub = 1u << 1; + static constexpr type kStub = 1u << 1; // Export is known to cause problems, or may not be complete. - static const type kSketchy = 1u << 2; + static constexpr type kSketchy = 1u << 2; // Export is called *a lot*. - static const type kHighFrequency = 1u << 3; + static constexpr type kHighFrequency = 1u << 3; // Export is important and should always be logged. - static const type kImportant = 1u << 4; + static constexpr type kImportant = 1u << 4; // Export blocks the calling thread - static const type kBlocking = 1u << 5; - + static constexpr type kBlocking = 1u << 5; + static constexpr type kIsVariable = 1u << 6; // Export will be logged on each call. - static const type kLog = 1u << 30; + static constexpr type kLog = 1u << 30; // Export's result will be logged on each call. - static const type kLogResult = 1u << 31; + static constexpr type kLogResult = 1u << 31; }; // DEPRECATED typedef void (*xe_kernel_export_shim_fn)(void*, void*); typedef void (*ExportTrampoline)(ppc::PPCContext* ppc_context); - +#pragma pack(push, 1) class Export { public: enum class Type { kFunction = 0, kVariable = 1, }; - - Export(uint16_t ordinal, Type type, const char* name, - ExportTag::type tags = 0) - : ordinal(ordinal), - type(type), + constexpr Export(uint16_t ordinal, Type type, const char* name, + ExportTag::type tags = 0) + : function_data({nullptr}), + name(name ? name : ""), tags(tags), - function_data({nullptr, nullptr, 0}) { - std::strncpy(this->name, name, xe::countof(this->name)); + ordinal(ordinal) + + { + if (type == Type::kVariable) { + this->tags |= ExportTag::kIsVariable; + } } - - uint16_t ordinal; - Type type; - char name[96]; - ExportTag::type tags; - - bool is_implemented() const { - return (tags & ExportTag::kImplemented) == ExportTag::kImplemented; - } - union { // Variable data. Only valid when kXEKernelExportFlagVariable is set. // This is an address in the client memory space that the variable can @@ -102,17 +95,26 @@ class Export { uint32_t variable_ptr; struct { - // DEPRECATED - xe_kernel_export_shim_fn shim; // Trampoline that is called from the guest-to-host thunk. // Expects only PPC context as first arg. ExportTrampoline trampoline; - uint64_t call_count; } function_data; }; -}; + const char* const name; + ExportTag::type tags; + uint16_t ordinal; + // Type type; + constexpr bool is_implemented() const { + return (tags & ExportTag::kImplemented) == ExportTag::kImplemented; + } + constexpr Type get_type() const { + return (this->tags & ExportTag::kIsVariable) ? Type::kVariable + : Type::kFunction; + } +}; +#pragma pack(pop) class ExportResolver { public: class Table { diff --git a/src/xenia/cpu/ppc/ppc_opcode_disasm_gen.cc b/src/xenia/cpu/ppc/ppc_opcode_disasm_gen.cc index 3a2772bd5..a66c4c2a2 100644 --- a/src/xenia/cpu/ppc/ppc_opcode_disasm_gen.cc +++ b/src/xenia/cpu/ppc/ppc_opcode_disasm_gen.cc @@ -4948,8 +4948,8 @@ void PrintDisasm_xorx(const PPCDecodeData& d, StringBuffer* str) { } #define INIT_LIST(...) {__VA_ARGS__} #define INSTRUCTION(opcode, mnem, form, group, type, desc, reads, writes, fn) \ - {PPCOpcodeGroup::group, PPCOpcodeFormat::form, opcode, mnem, desc, INIT_LIST reads, INIT_LIST writes, fn} -PPCOpcodeDisasmInfo ppc_opcode_disasm_table[] = { + {PPCOpcodeGroup::group, PPCOpcodeFormat::form, opcode, mnem, desc, fn} +static constexpr PPCOpcodeDisasmInfo ppc_opcode_disasm_table[] = { INSTRUCTION(0x7c000014, "addcx" , kXO , kI, kGeneral, "Add Carrying" , (PPCOpcodeField::kRA,PPCOpcodeField::kRB), (PPCOpcodeField::kRD,PPCOpcodeField::kCA,PPCOpcodeField::kOEcond,PPCOpcodeField::kCRcond), PrintDisasm_addcx), INSTRUCTION(0x7c000114, "addex" , kXO , kI, kGeneral, "Add Extended" , (PPCOpcodeField::kRA,PPCOpcodeField::kRB,PPCOpcodeField::kCA), (PPCOpcodeField::kRD,PPCOpcodeField::kOEcond,PPCOpcodeField::kCRcond), PrintDisasm_addex), INSTRUCTION(0x38000000, "addi" , kD , kI, kGeneral, "Add Immediate" , (PPCOpcodeField::kRA0,PPCOpcodeField::kSIMM), (PPCOpcodeField::kRD), PrintDisasm_addi), @@ -5414,7 +5414,7 @@ const PPCOpcodeDisasmInfo& GetOpcodeDisasmInfo(PPCOpcode opcode) { } void RegisterOpcodeDisasm(PPCOpcode opcode, InstrDisasmFn fn) { assert_null(ppc_opcode_disasm_table[static_cast(opcode)].disasm); - ppc_opcode_disasm_table[static_cast(opcode)].disasm = fn; + const_cast( &ppc_opcode_disasm_table[static_cast(opcode)])->disasm = fn; } } // namespace ppc diff --git a/src/xenia/cpu/ppc/ppc_opcode_info.h b/src/xenia/cpu/ppc/ppc_opcode_info.h index f61f1a443..3a98210a8 100644 --- a/src/xenia/cpu/ppc/ppc_opcode_info.h +++ b/src/xenia/cpu/ppc/ppc_opcode_info.h @@ -133,18 +133,18 @@ enum class PPCOpcodeField : uint32_t { kTO, kLEV, }; - +#pragma pack(push, 1) struct PPCOpcodeDisasmInfo { PPCOpcodeGroup group; PPCOpcodeFormat format; uint32_t opcode; const char* name; const char* description; - std::vector reads; - std::vector writes; + // std::vector reads; + // std::vector writes; InstrDisasmFn disasm; }; - +#pragma pack(pop) PPCOpcode LookupOpcode(uint32_t code); const PPCOpcodeInfo& GetOpcodeInfo(PPCOpcode opcode); diff --git a/src/xenia/cpu/xex_module.cc b/src/xenia/cpu/xex_module.cc index 6e0d13178..d7325ea91 100644 --- a/src/xenia/cpu/xex_module.cc +++ b/src/xenia/cpu/xex_module.cc @@ -1194,11 +1194,11 @@ bool XexModule::SetupLibraryImports(const std::string_view name, } if (kernel_export) { - if (kernel_export->type == Export::Type::kFunction) { + if (kernel_export->get_type() == Export::Type::kFunction) { // Not exactly sure what this should be... // Appears to be ignored. *record_slot = 0xDEADC0DE; - } else if (kernel_export->type == Export::Type::kVariable) { + } else if (kernel_export->get_type() == Export::Type::kVariable) { // Kernel import variable if (kernel_export->is_implemented()) { // Implemented - replace with pointer. @@ -1287,8 +1287,9 @@ bool XexModule::SetupLibraryImports(const std::string_view name, handler = (GuestFunction::ExternHandler) kernel_export->function_data.trampoline; } else { - handler = - (GuestFunction::ExternHandler)kernel_export->function_data.shim; + //__debugbreak(); + // handler = + // (GuestFunction::ExternHandler)kernel_export->function_data.shim; } } else { XELOGW("WARNING: Imported kernel function {} is unimplemented!", diff --git a/src/xenia/debug/ui/debug_window.cc b/src/xenia/debug/ui/debug_window.cc index 0c03b7ebb..9b067b571 100644 --- a/src/xenia/debug/ui/debug_window.cc +++ b/src/xenia/debug/ui/debug_window.cc @@ -258,7 +258,6 @@ void DebugWindow::DrawFrame(ImGuiIO& io) { ImGui::PopStyleVar(); if (cvars::imgui_debug) { - ImGui::ShowDemoWindow(); ImGui::ShowMetricsWindow(); } } @@ -1254,7 +1253,7 @@ void DebugWindow::DrawBreakpointsPane() { continue; } auto export_entry = all_exports[call_rankings[i].first]; - if (export_entry->type != cpu::Export::Type::kFunction || + if (export_entry->get_type() != cpu::Export::Type::kFunction || !export_entry->is_implemented()) { continue; } diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc index 66da46546..5e0ea6c1a 100644 --- a/src/xenia/gpu/command_processor.cc +++ b/src/xenia/gpu/command_processor.cc @@ -527,56 +527,48 @@ void CommandProcessor::WriteFetchRangeFromRing(xe::RingBuffer* ring, WriteRegisterRangeFromRing(ring, base + 0x4800, num_times); } -XE_FORCEINLINE void CommandProcessor::WriteBoolRangeFromRing(xe::RingBuffer* ring, uint32_t base, uint32_t num_times) { WriteRegisterRangeFromRing(ring, base + 0x4900, num_times); } -XE_FORCEINLINE void CommandProcessor::WriteLoopRangeFromRing(xe::RingBuffer* ring, uint32_t base, uint32_t num_times) { WriteRegisterRangeFromRing(ring, base + 0x4908, num_times); } -XE_FORCEINLINE void CommandProcessor::WriteREGISTERSRangeFromRing(xe::RingBuffer* ring, uint32_t base, uint32_t num_times) { WriteRegisterRangeFromRing(ring, base + 0x2000, num_times); } -XE_FORCEINLINE void CommandProcessor::WriteALURangeFromMem(uint32_t start_index, uint32_t* base, uint32_t num_registers) { WriteRegistersFromMem(start_index + 0x4000, base, num_registers); } -XE_FORCEINLINE void CommandProcessor::WriteFetchRangeFromMem(uint32_t start_index, uint32_t* base, uint32_t num_registers) { WriteRegistersFromMem(start_index + 0x4800, base, num_registers); } -XE_FORCEINLINE void CommandProcessor::WriteBoolRangeFromMem(uint32_t start_index, uint32_t* base, uint32_t num_registers) { WriteRegistersFromMem(start_index + 0x4900, base, num_registers); } -XE_FORCEINLINE void CommandProcessor::WriteLoopRangeFromMem(uint32_t start_index, uint32_t* base, uint32_t num_registers) { WriteRegistersFromMem(start_index + 0x4908, base, num_registers); } -XE_FORCEINLINE void CommandProcessor::WriteREGISTERSRangeFromMem(uint32_t start_index, uint32_t* base, uint32_t num_registers) { diff --git a/src/xenia/gpu/command_processor.h b/src/xenia/gpu/command_processor.h index 72c3258f1..4e96391b8 100644 --- a/src/xenia/gpu/command_processor.h +++ b/src/xenia/gpu/command_processor.h @@ -178,43 +178,33 @@ class CommandProcessor { num_times); // repeatedly write a value to one register, presumably a // register with special handling for writes - XE_FORCEINLINE void WriteALURangeFromRing(xe::RingBuffer* ring, uint32_t base, uint32_t num_times); - XE_FORCEINLINE void WriteFetchRangeFromRing(xe::RingBuffer* ring, uint32_t base, uint32_t num_times); - XE_FORCEINLINE void WriteBoolRangeFromRing(xe::RingBuffer* ring, uint32_t base, uint32_t num_times); - XE_FORCEINLINE void WriteLoopRangeFromRing(xe::RingBuffer* ring, uint32_t base, uint32_t num_times); - XE_FORCEINLINE void WriteREGISTERSRangeFromRing(xe::RingBuffer* ring, uint32_t base, uint32_t num_times); - XE_FORCEINLINE void WriteALURangeFromMem(uint32_t start_index, uint32_t* base, uint32_t num_registers); - XE_FORCEINLINE void WriteFetchRangeFromMem(uint32_t start_index, uint32_t* base, uint32_t num_registers); - XE_FORCEINLINE void WriteBoolRangeFromMem(uint32_t start_index, uint32_t* base, uint32_t num_registers); - XE_FORCEINLINE void WriteLoopRangeFromMem(uint32_t start_index, uint32_t* base, uint32_t num_registers); - XE_FORCEINLINE void WriteREGISTERSRangeFromMem(uint32_t start_index, uint32_t* base, uint32_t num_registers); diff --git a/src/xenia/gpu/pm4_command_processor_implement.h b/src/xenia/gpu/pm4_command_processor_implement.h index 1c877a9ab..48795abba 100644 --- a/src/xenia/gpu/pm4_command_processor_implement.h +++ b/src/xenia/gpu/pm4_command_processor_implement.h @@ -9,8 +9,6 @@ void COMMAND_PROCESSOR::ExecuteIndirectBuffer(uint32_t ptr, RingBuffer old_reader = reader_; // Execute commands! - // RingBuffer reader(memory_->TranslatePhysical(ptr), count * - // sizeof(uint32_t)); reader.set_write_offset(count * sizeof(uint32_t)); new (&reader_) RingBuffer(memory_->TranslatePhysical(ptr), count * sizeof(uint32_t)); reader_.set_write_offset(count * sizeof(uint32_t)); @@ -429,6 +427,38 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_INDIRECT_BUFFER( COMMAND_PROCESSOR::ExecuteIndirectBuffer(GpuToCpu(list_ptr), list_length); return true; } + +XE_NOINLINE +static bool MatchValueAndRef(uint32_t value, uint32_t ref, uint32_t wait_info) { + bool matched = false; + switch (wait_info & 0x7) { + case 0x0: // Never. + matched = false; + break; + case 0x1: // Less than reference. + matched = value < ref; + break; + case 0x2: // Less than or equal to reference. + matched = value <= ref; + break; + case 0x3: // Equal to reference. + matched = value == ref; + break; + case 0x4: // Not equal to reference. + matched = value != ref; + break; + case 0x5: // Greater than or equal to reference. + matched = value >= ref; + break; + case 0x6: // Greater than reference. + matched = value > ref; + break; + case 0x7: // Always + matched = true; + break; + } + return matched; +} XE_NOINLINE bool COMMAND_PROCESSOR::ExecutePacketType3_WAIT_REG_MEM( uint32_t packet, uint32_t count) XE_RESTRICT { @@ -459,32 +489,8 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_WAIT_REG_MEM( value = register_file_->values[poll_reg_addr].u32; } } - switch (wait_info & 0x7) { - case 0x0: // Never. - matched = false; - break; - case 0x1: // Less than reference. - matched = (value & mask) < ref; - break; - case 0x2: // Less than or equal to reference. - matched = (value & mask) <= ref; - break; - case 0x3: // Equal to reference. - matched = (value & mask) == ref; - break; - case 0x4: // Not equal to reference. - matched = (value & mask) != ref; - break; - case 0x5: // Greater than or equal to reference. - matched = (value & mask) >= ref; - break; - case 0x6: // Greater than reference. - matched = (value & mask) > ref; - break; - case 0x7: // Always - matched = true; - break; - } + matched = MatchValueAndRef(value & mask, ref, wait_info); + if (!matched) { // Wait. if (wait >= 0x100) { @@ -598,34 +604,8 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_COND_WRITE( assert_true(poll_reg_addr < RegisterFile::kRegisterCount); value = register_file_->values[poll_reg_addr].u32; } - bool matched = false; - value &= mask; - switch (wait_info & 0x7) { - case 0x0: // Never. - matched = false; - break; - case 0x1: // Less than reference. - matched = value < ref; - break; - case 0x2: // Less than or equal to reference. - matched = value <= ref; - break; - case 0x3: // Equal to reference. - matched = value == ref; - break; - case 0x4: // Not equal to reference. - matched = value != ref; - break; - case 0x5: // Greater than or equal to reference. - matched = value >= ref; - break; - case 0x6: // Greater than reference. - matched = value > ref; - break; - case 0x7: // Always - matched = true; - break; - } + bool matched = MatchValueAndRef(value & mask, ref, wait_info); + if (matched) { // Write. if (wait_info & 0x100) { @@ -718,9 +698,6 @@ bool COMMAND_PROCESSOR::ExecutePacketType3_EVENT_WRITE_EXT( for (unsigned i = 0; i < 6; ++i) { destination[i] = extents[i]; } - // xe::copy_and_swap_16_unaligned(memory_->TranslatePhysical(address), - // extents, - // xe::countof(extents)); trace_writer_.WriteMemoryWrite(CpuToGpu(address), sizeof(extents)); return true; diff --git a/src/xenia/kernel/kernel_module.cc b/src/xenia/kernel/kernel_module.cc index 1b79b73fb..ebf0d64ba 100644 --- a/src/xenia/kernel/kernel_module.cc +++ b/src/xenia/kernel/kernel_module.cc @@ -94,7 +94,7 @@ uint32_t KernelModule::GetProcAddressByOrdinal(uint16_t ordinal) { // Export (or its parent library) not found. return 0; } - if (export_entry->type == cpu::Export::Type::kVariable) { + if (export_entry->get_type() == cpu::Export::Type::kVariable) { if (export_entry->variable_ptr) { return export_entry->variable_ptr; } else { @@ -105,8 +105,7 @@ uint32_t KernelModule::GetProcAddressByOrdinal(uint16_t ordinal) { return 0; } } else { - if (export_entry->function_data.trampoline || - export_entry->function_data.shim) { + if (export_entry->function_data.trampoline) { auto global_lock = global_critical_region_.Acquire(); // See if the function has been generated already. @@ -119,9 +118,6 @@ uint32_t KernelModule::GetProcAddressByOrdinal(uint16_t ordinal) { if (export_entry->function_data.trampoline) { handler = (cpu::GuestFunction::ExternHandler) export_entry->function_data.trampoline; - } else { - handler = - (cpu::GuestFunction::ExternHandler)export_entry->function_data.shim; } uint32_t guest_addr = diff --git a/src/xenia/kernel/user_module.cc b/src/xenia/kernel/user_module.cc index f2dc5b1d8..1e26664c6 100644 --- a/src/xenia/kernel/user_module.cc +++ b/src/xenia/kernel/user_module.cc @@ -783,7 +783,7 @@ void UserModule::Dump() { } } if (kernel_export && - kernel_export->type == cpu::Export::Type::kVariable) { + kernel_export->get_type() == cpu::Export::Type::kVariable) { sb.AppendFormat(" V {:08X} {:03X} ({:4}) {} {}\n", info->value_address, info->ordinal, info->ordinal, implemented ? " " : "!!", name); diff --git a/src/xenia/kernel/util/shim_utils.h b/src/xenia/kernel/util/shim_utils.h index b1b85e5a8..7ead28998 100644 --- a/src/xenia/kernel/util/shim_utils.h +++ b/src/xenia/kernel/util/shim_utils.h @@ -554,7 +554,6 @@ struct ExportRegistrerHelper { new cpu::Export(ORDINAL, xe::cpu::Export::Type::kFunction, name, TAGS); struct X { static void Trampoline(PPCContext* ppc_context) { - ++export_entry->function_data.call_count; Param::Init init = { ppc_context, 0, diff --git a/src/xenia/kernel/xam/xam_input.cc b/src/xenia/kernel/xam/xam_input.cc index 789c276ea..242ee1cfa 100644 --- a/src/xenia/kernel/xam/xam_input.cc +++ b/src/xenia/kernel/xam/xam_input.cc @@ -91,7 +91,7 @@ DECLARE_XAM_EXPORT1(XamInputGetCapabilitiesEx, kInput, kSketchy); dword_result_t XamInputGetState_entry(dword_t user_index, dword_t flags, pointer_t input_state) { if (input_state) { - memset((void*)input_state.host_address(), 0, sizeof X_INPUT_STATE); + memset((void*)input_state.host_address(), 0, sizeof(X_INPUT_STATE)); } if (user_index >= 4) { return X_ERROR_DEVICE_NOT_CONNECTED; diff --git a/src/xenia/kernel/xam/xam_module.cc b/src/xenia/kernel/xam/xam_module.cc index a94b1d6ef..c32e702bd 100644 --- a/src/xenia/kernel/xam/xam_module.cc +++ b/src/xenia/kernel/xam/xam_module.cc @@ -41,21 +41,21 @@ xe::cpu::Export* RegisterExport_xam(xe::cpu::Export* export_entry) { xam_exports[export_entry->ordinal] = export_entry; return export_entry; } - +// Build the export table used for resolution. +#include "xenia/kernel/util/export_table_pre.inc" +static constexpr xe::cpu::Export xam_export_table[] = { +#include "xenia/kernel/xam/xam_table.inc" +}; +#include "xenia/kernel/util/export_table_post.inc" void XamModule::RegisterExportTable(xe::cpu::ExportResolver* export_resolver) { assert_not_null(export_resolver); -// Build the export table used for resolution. -#include "xenia/kernel/util/export_table_pre.inc" - static xe::cpu::Export xam_export_table[] = { -#include "xenia/kernel/xam/xam_table.inc" - }; -#include "xenia/kernel/util/export_table_post.inc" for (size_t i = 0; i < xe::countof(xam_export_table); ++i) { auto& export_entry = xam_export_table[i]; assert_true(export_entry.ordinal < xam_exports.size()); if (!xam_exports[export_entry.ordinal]) { - xam_exports[export_entry.ordinal] = &export_entry; + xam_exports[export_entry.ordinal] = + const_cast(&export_entry); } } export_resolver->RegisterTable("xam.xex", &xam_exports); diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_strings.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_strings.cc index 3ba806c89..6da1b5380 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_strings.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_strings.cc @@ -824,7 +824,7 @@ class WideCountFormatData : public FormatData { int32_t count_; }; -SHIM_CALL DbgPrint_entry(PPCContext* ppc_context, KernelState* kernel_state) { +SHIM_CALL DbgPrint_entry(PPCContext* ppc_context) { uint32_t format_ptr = SHIM_GET_ARG_32(0); if (!format_ptr) { SHIM_SET_RETURN_32(X_STATUS_INVALID_PARAMETER); @@ -854,7 +854,7 @@ SHIM_CALL DbgPrint_entry(PPCContext* ppc_context, KernelState* kernel_state) { } // https://msdn.microsoft.com/en-us/library/2ts7cx93.aspx -SHIM_CALL _snprintf_entry(PPCContext* ppc_context, KernelState* kernel_state) { +SHIM_CALL _snprintf_entry(PPCContext* ppc_context) { uint32_t buffer_ptr = SHIM_GET_ARG_32(0); int32_t buffer_count = SHIM_GET_ARG_32(1); uint32_t format_ptr = SHIM_GET_ARG_32(2); @@ -894,7 +894,7 @@ SHIM_CALL _snprintf_entry(PPCContext* ppc_context, KernelState* kernel_state) { } // https://msdn.microsoft.com/en-us/library/ybk95axf.aspx -SHIM_CALL sprintf_entry(PPCContext* ppc_context, KernelState* kernel_state) { +SHIM_CALL sprintf_entry(PPCContext* ppc_context) { uint32_t buffer_ptr = SHIM_GET_ARG_32(0); uint32_t format_ptr = SHIM_GET_ARG_32(1); @@ -925,7 +925,7 @@ SHIM_CALL sprintf_entry(PPCContext* ppc_context, KernelState* kernel_state) { } // https://msdn.microsoft.com/en-us/library/2ts7cx93.aspx -SHIM_CALL _snwprintf_entry(PPCContext* ppc_context, KernelState* kernel_state) { +SHIM_CALL _snwprintf_entry(PPCContext* ppc_context) { uint32_t buffer_ptr = SHIM_GET_ARG_32(0); int32_t buffer_count = SHIM_GET_ARG_32(1); uint32_t format_ptr = SHIM_GET_ARG_32(2); @@ -966,7 +966,7 @@ SHIM_CALL _snwprintf_entry(PPCContext* ppc_context, KernelState* kernel_state) { } // https://msdn.microsoft.com/en-us/library/ybk95axf.aspx -SHIM_CALL swprintf_entry(PPCContext* ppc_context, KernelState* kernel_state) { +SHIM_CALL swprintf_entry(PPCContext* ppc_context) { uint32_t buffer_ptr = SHIM_GET_ARG_32(0); uint32_t format_ptr = SHIM_GET_ARG_32(1); @@ -998,7 +998,7 @@ SHIM_CALL swprintf_entry(PPCContext* ppc_context, KernelState* kernel_state) { } // https://msdn.microsoft.com/en-us/library/1kt27hek.aspx -SHIM_CALL _vsnprintf_entry(PPCContext* ppc_context, KernelState* kernel_state) { +SHIM_CALL _vsnprintf_entry(PPCContext* ppc_context) { uint32_t buffer_ptr = SHIM_GET_ARG_32(0); int32_t buffer_count = SHIM_GET_ARG_32(1); uint32_t format_ptr = SHIM_GET_ARG_32(2); @@ -1087,7 +1087,7 @@ SHIM_CALL _vsnwprintf_entry(PPCContext* ppc_context, } // https://msdn.microsoft.com/en-us/library/28d5ce15.aspx -SHIM_CALL vsprintf_entry(PPCContext* ppc_context, KernelState* kernel_state) { +SHIM_CALL vsprintf_entry(PPCContext* ppc_context) { uint32_t buffer_ptr = SHIM_GET_ARG_32(0); uint32_t format_ptr = SHIM_GET_ARG_32(1); uint32_t arg_ptr = SHIM_GET_ARG_32(2); @@ -1147,7 +1147,7 @@ SHIM_CALL _vscwprintf_entry(PPCContext* ppc_context, } // https://msdn.microsoft.com/en-us/library/28d5ce15.aspx -SHIM_CALL vswprintf_entry(PPCContext* ppc_context, KernelState* kernel_state) { +SHIM_CALL vswprintf_entry(PPCContext* ppc_context) { uint32_t buffer_ptr = SHIM_GET_ARG_32(0); uint32_t format_ptr = SHIM_GET_ARG_32(1); uint32_t arg_ptr = SHIM_GET_ARG_32(2); @@ -1179,7 +1179,7 @@ SHIM_CALL vswprintf_entry(PPCContext* ppc_context, KernelState* kernel_state) { } SHIM_SET_RETURN_32(count); } - +#if 1 void RegisterStringExports(xe::cpu::ExportResolver* export_resolver, KernelState* state) { SHIM_SET_MAPPING("xboxkrnl.exe", DbgPrint, state); @@ -1193,7 +1193,7 @@ void RegisterStringExports(xe::cpu::ExportResolver* export_resolver, SHIM_SET_MAPPING("xboxkrnl.exe", vswprintf, state); SHIM_SET_MAPPING("xboxkrnl.exe", _vsnwprintf, state); } - +#endif } // namespace xboxkrnl } // namespace kernel } // namespace xe From b4c175d8a3fb4d44a1a4b2238dcf538c7cc4e9b6 Mon Sep 17 00:00:00 2001 From: "chss95cs@gmail.com" Date: Thu, 29 Sep 2022 07:26:38 -0700 Subject: [PATCH 2/3] Enable SDL_LEAN_AND_MEAN, SDL_RENDER_DISABLED, saves about 500kb in final exe Build several projects that arent performance critical with /Os and /O1 under msvc windows --- src/xenia/app/premake5.lua | 5 +++++ src/xenia/cpu/premake5.lua | 5 +++++ src/xenia/debug/ui/premake5.lua | 5 +++++ src/xenia/hid/premake5.lua | 6 +++++- src/xenia/ui/d3d12/premake5.lua | 5 +++++ src/xenia/ui/premake5.lua | 6 +++++- src/xenia/ui/vulkan/premake5.lua | 5 +++++ src/xenia/vfs/premake5.lua | 5 +++++ third_party/SDL2-static.lua | 2 ++ third_party/capstone.lua | 5 +++++ third_party/fmt.lua | 5 +++++ third_party/glslang-spirv.lua | 5 +++++ third_party/imgui.lua | 5 +++++ 13 files changed, 62 insertions(+), 2 deletions(-) diff --git a/src/xenia/app/premake5.lua b/src/xenia/app/premake5.lua index 011dd225f..c6499d3a4 100644 --- a/src/xenia/app/premake5.lua +++ b/src/xenia/app/premake5.lua @@ -119,6 +119,11 @@ project("xenia-app") "xenia-gpu-d3d12-trace-viewer", "xenia-ui-window-d3d12-demo", }) + filter({"configurations:Release", "platforms:Windows"}) + buildoptions({ + "/Os", + "/O1" + }) filter("platforms:Windows") -- Only create the .user file if it doesn't already exist. diff --git a/src/xenia/cpu/premake5.lua b/src/xenia/cpu/premake5.lua index 748602659..d1291d198 100644 --- a/src/xenia/cpu/premake5.lua +++ b/src/xenia/cpu/premake5.lua @@ -10,6 +10,11 @@ project("xenia-cpu") "xenia-base", "mspack", }) + filter({"configurations:Release", "platforms:Windows"}) + buildoptions({ + "/Os", + "/O1" + }) includedirs({ project_root.."/third_party/llvm/include", }) diff --git a/src/xenia/debug/ui/premake5.lua b/src/xenia/debug/ui/premake5.lua index 7503528b3..d85334a5b 100644 --- a/src/xenia/debug/ui/premake5.lua +++ b/src/xenia/debug/ui/premake5.lua @@ -12,6 +12,11 @@ project("xenia-debug-ui") "xenia-cpu", "xenia-ui", }) + filter({"configurations:Release", "platforms:Windows"}) + buildoptions({ + "/Os", + "/O1" + }) defines({ }) includedirs({ diff --git a/src/xenia/hid/premake5.lua b/src/xenia/hid/premake5.lua index 4e961f623..ccbedefb9 100644 --- a/src/xenia/hid/premake5.lua +++ b/src/xenia/hid/premake5.lua @@ -38,7 +38,11 @@ project("xenia-hid-demo") resincludedirs({ project_root, }) - + filter({"configurations:Release", "platforms:Windows"}) + buildoptions({ + "/Os", + "/O1" + }) filter("platforms:not Android-*") links({ "xenia-helper-sdl", diff --git a/src/xenia/ui/d3d12/premake5.lua b/src/xenia/ui/d3d12/premake5.lua index c0d10096b..ecde1c1f3 100644 --- a/src/xenia/ui/d3d12/premake5.lua +++ b/src/xenia/ui/d3d12/premake5.lua @@ -10,6 +10,11 @@ project("xenia-ui-d3d12") "xenia-base", "xenia-ui", }) + filter({"configurations:Release", "platforms:Windows"}) + buildoptions({ + "/Os", + "/O1" + }) local_platform_files() files({ "../shaders/bytecode/d3d12_5_1/*.h", diff --git a/src/xenia/ui/premake5.lua b/src/xenia/ui/premake5.lua index 6aff82bec..518bfd358 100644 --- a/src/xenia/ui/premake5.lua +++ b/src/xenia/ui/premake5.lua @@ -14,7 +14,11 @@ project("xenia-ui") local_platform_files() removefiles({"*_demo.cc"}) removefiles({"windowed_app_main_*.cc"}) - + filter({"configurations:Release", "platforms:Windows"}) + buildoptions({ + "/Os", + "/O1" + }) filter("platforms:Android-*") -- Exports JNI functions. wholelib("On") diff --git a/src/xenia/ui/vulkan/premake5.lua b/src/xenia/ui/vulkan/premake5.lua index bdc6b7458..cbe4e26f0 100644 --- a/src/xenia/ui/vulkan/premake5.lua +++ b/src/xenia/ui/vulkan/premake5.lua @@ -10,6 +10,11 @@ project("xenia-ui-vulkan") "xenia-base", "xenia-ui", }) + filter({"configurations:Release", "platforms:Windows"}) + buildoptions({ + "/Os", + "/O1" + }) includedirs({ project_root.."/third_party/Vulkan-Headers/include", }) diff --git a/src/xenia/vfs/premake5.lua b/src/xenia/vfs/premake5.lua index f312d93c6..9cd71842d 100644 --- a/src/xenia/vfs/premake5.lua +++ b/src/xenia/vfs/premake5.lua @@ -11,6 +11,11 @@ project("xenia-vfs") }) defines({ }) + filter({"configurations:Release", "platforms:Windows"}) + buildoptions({ + "/Os", + "/O1" + }) recursive_platform_files() removefiles({"vfs_dump.cc"}) diff --git a/third_party/SDL2-static.lua b/third_party/SDL2-static.lua index d03a607a5..e38878690 100644 --- a/third_party/SDL2-static.lua +++ b/third_party/SDL2-static.lua @@ -7,6 +7,8 @@ project("SDL2") defines({ "HAVE_LIBC", + "SDL_LEAN_AND_MEAN=1", + "SDL_RENDER_DISABLED=1" }) links({ "setupapi.lib", diff --git a/third_party/capstone.lua b/third_party/capstone.lua index 6dc415974..b5c4d0107 100644 --- a/third_party/capstone.lua +++ b/third_party/capstone.lua @@ -11,6 +11,11 @@ project("capstone") "CAPSTONE_USE_SYS_DYN_MEM", "_LIB", }) + filter({"configurations:Release", "platforms:Windows"}) + buildoptions({ + "/Os", + "/O1" + }) includedirs({ "capstone", "capstone/include", diff --git a/third_party/fmt.lua b/third_party/fmt.lua index ae72071b9..394e34800 100644 --- a/third_party/fmt.lua +++ b/third_party/fmt.lua @@ -8,6 +8,11 @@ project("fmt") defines({ "_LIB", }) + filter({"configurations:Release", "platforms:Windows"}) + buildoptions({ + "/Os", + "/O1" + }) includedirs({ "fmt/include", }) diff --git a/third_party/glslang-spirv.lua b/third_party/glslang-spirv.lua index 19a04c71e..c5f00a584 100644 --- a/third_party/glslang-spirv.lua +++ b/third_party/glslang-spirv.lua @@ -10,6 +10,11 @@ project("glslang-spirv") }) includedirs({ }) + filter({"configurations:Release", "platforms:Windows"}) + buildoptions({ + "/Os", + "/O1" + }) files({ "glslang/SPIRV/bitutils.h", "glslang/SPIRV/disassemble.cpp", diff --git a/third_party/imgui.lua b/third_party/imgui.lua index 083947c14..99c14b9a5 100644 --- a/third_party/imgui.lua +++ b/third_party/imgui.lua @@ -11,6 +11,11 @@ project("imgui") includedirs({ "imgui", }) + filter({"configurations:Release", "platforms:Windows"}) + buildoptions({ + "/Os", + "/O1" + }) files({ "imgui/imconfig.h", "imgui/imgui.cpp", From bae63b95c534e46a5ceebaadacd98812d5c12673 Mon Sep 17 00:00:00 2001 From: "chss95cs@gmail.com" Date: Fri, 30 Sep 2022 06:51:25 -0700 Subject: [PATCH 3/3] Update to latest version of cxxopts --- src/xenia/base/cvar.cc | 4 ++-- third_party/cxxopts | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/xenia/base/cvar.cc b/src/xenia/base/cvar.cc index 219f033ae..c22fecbb6 100644 --- a/src/xenia/base/cvar.cc +++ b/src/xenia/base/cvar.cc @@ -8,7 +8,7 @@ */ #include "xenia/base/cvar.h" - +#include #define UTF_CPP_CPLUSPLUS 201703L #include "third_party/utfcpp/source/utf8.h" @@ -87,7 +87,7 @@ void ParseLaunchArguments(int& argc, char**& argv, configVar->LoadFromLaunchOptions(&result); } } - } catch (const cxxopts::OptionException& e) { + } catch (const cxxopts::exceptions::exception& e) { xe::AttachConsole(); if (xe::has_console_attached()) { std::cout << e.what() << std::endl; diff --git a/third_party/cxxopts b/third_party/cxxopts index 48e265dc4..2e3c6991d 160000 --- a/third_party/cxxopts +++ b/third_party/cxxopts @@ -1 +1 @@ -Subproject commit 48e265dc4b347d3178e4458de81704e0d88047b2 +Subproject commit 2e3c6991d33811878ebcc0839d3815850d129b3a