From db234309bfc5205d1fd691cebcb07d7f4675a788 Mon Sep 17 00:00:00 2001 From: gibbed Date: Thu, 22 Nov 2018 16:36:19 -0600 Subject: [PATCH 1/9] [Kernel] Fix logging of broken XamEnumerate usage. --- src/xenia/kernel/xam/xam_info.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/xenia/kernel/xam/xam_info.cc b/src/xenia/kernel/xam/xam_info.cc index ac7b33009..9996a17ee 100644 --- a/src/xenia/kernel/xam/xam_info.cc +++ b/src/xenia/kernel/xam/xam_info.cc @@ -202,8 +202,11 @@ dword_result_t XamEnumerate(dword_t handle, dword_t flags, lpvoid_t buffer, if (actual_buffer_length != buffer_length) { // Known culprits: // Final Fight: Double Impact - XELOGW("Broken usage of XamEnumerate! %.X vs %.X", buffer_length, - actual_buffer_length); + XELOGW( + "Broken usage of XamEnumerate! buffer length=%.X vs actual length=%.X " + "(item size=%.X, items per enumerate=%u)", + (uint32_t)buffer_length, actual_buffer_length, e->item_size(), + e->items_per_enumerate()); } buffer.Zero(actual_buffer_length); From 69463033b1d8dc24b92026b9b91ce36bbb129420 Mon Sep 17 00:00:00 2001 From: gibbed Date: Thu, 22 Nov 2018 16:49:01 -0600 Subject: [PATCH 2/9] [Kernel] Don't trust buffer length in XamEnumerate when it matches items per enumerate. --- src/xenia/kernel/xam/xam_info.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/xenia/kernel/xam/xam_info.cc b/src/xenia/kernel/xam/xam_info.cc index 9996a17ee..38fdc04d0 100644 --- a/src/xenia/kernel/xam/xam_info.cc +++ b/src/xenia/kernel/xam/xam_info.cc @@ -198,10 +198,11 @@ dword_result_t XamEnumerate(dword_t handle, dword_t flags, lpvoid_t buffer, } } - size_t actual_buffer_length = e->item_size() * e->items_per_enumerate(); - if (actual_buffer_length != buffer_length) { + size_t actual_buffer_length = (uint32_t)buffer_length; + if (buffer_length == e->items_per_enumerate()) { + actual_buffer_length = e->item_size() * e->items_per_enumerate(); // Known culprits: - // Final Fight: Double Impact + // Final Fight: Double Impact (saves) XELOGW( "Broken usage of XamEnumerate! buffer length=%.X vs actual length=%.X " "(item size=%.X, items per enumerate=%u)", From 00261b9ca0389b27fc619735a2f6ae9c16b37ecb Mon Sep 17 00:00:00 2001 From: gibbed Date: Thu, 22 Nov 2018 17:40:22 -0600 Subject: [PATCH 3/9] [App] Get absolute path of content root in all cases. --- src/xenia/app/xenia_main.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/xenia/app/xenia_main.cc b/src/xenia/app/xenia_main.cc index 827584af0..ead3e286a 100644 --- a/src/xenia/app/xenia_main.cc +++ b/src/xenia/app/xenia_main.cc @@ -161,9 +161,8 @@ int xenia_main(const std::vector& args) { #endif content_root = xe::join_paths(content_root, L"content"); } - - content_root = xe::to_absolute_path(content_root); } + content_root = xe::to_absolute_path(content_root); // Create the emulator but don't initialize so we can setup the window. auto emulator = std::make_unique(L"", content_root); From d04c7487739c9cf074907410808f2a780bd38c30 Mon Sep 17 00:00:00 2001 From: gibbed Date: Thu, 22 Nov 2018 17:40:38 -0600 Subject: [PATCH 4/9] [App] Log content root. --- src/xenia/app/xenia_main.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/xenia/app/xenia_main.cc b/src/xenia/app/xenia_main.cc index ead3e286a..2597fc02f 100644 --- a/src/xenia/app/xenia_main.cc +++ b/src/xenia/app/xenia_main.cc @@ -163,6 +163,7 @@ int xenia_main(const std::vector& args) { } } content_root = xe::to_absolute_path(content_root); + XELOGI("Content root: %S", content_root.c_str()); // Create the emulator but don't initialize so we can setup the window. auto emulator = std::make_unique(L"", content_root); From 94284ea9aa328f469a3b3ea5974f0e5a291ae7f5 Mon Sep 17 00:00:00 2001 From: gibbed Date: Fri, 23 Nov 2018 07:50:42 -0600 Subject: [PATCH 5/9] [CPU] Fix null termination in HIRBuilder::CommentFormat. --- src/xenia/cpu/hir/hir_builder.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xenia/cpu/hir/hir_builder.cc b/src/xenia/cpu/hir/hir_builder.cc index af954ca6c..e461996b5 100644 --- a/src/xenia/cpu/hir/hir_builder.cc +++ b/src/xenia/cpu/hir/hir_builder.cc @@ -764,7 +764,7 @@ void HIRBuilder::CommentFormat(const char* format, ...) { va_start(args, format); size_t chars_written = vsnprintf(p, kMaxCommentSize - 1, format, args); va_end(args); - size_t rewind = kMaxCommentSize - chars_written; + size_t rewind = kMaxCommentSize - chars_written - 1; arena_->Rewind(rewind); Instr* i = AppendInstr(OPCODE_COMMENT_info, 0); i->src1.offset = (uint64_t)p; From 4c04a9383a2815c71c9dcd92838acdc1f97ecaa1 Mon Sep 17 00:00:00 2001 From: gibbed Date: Fri, 23 Nov 2018 07:52:14 -0600 Subject: [PATCH 6/9] [JIT] Fix constant propagation for OPCODE_SELECT. --- .../passes/constant_propagation_pass.cc | 10 +++-- src/xenia/cpu/ppc/ppc_translator.cc | 13 ++++-- src/xenia/cpu/ppc/testing/instr_slw.s | 41 +++++++++++++++++++ 3 files changed, 56 insertions(+), 8 deletions(-) diff --git a/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc b/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc index 44e1e37c1..e44fce425 100644 --- a/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc +++ b/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc @@ -278,11 +278,13 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder) { if (i->src1.value->IsConstant()) { if (i->src1.value->type != VEC128_TYPE) { if (i->src1.value->IsConstantTrue()) { - v->set_from(i->src2.value); - i->Remove(); + auto src2 = i->src2.value; + i->Replace(&OPCODE_ASSIGN_info, 0); + i->set_src1(src2); } else if (i->src1.value->IsConstantFalse()) { - v->set_from(i->src3.value); - i->Remove(); + auto src3 = i->src3.value; + i->Replace(&OPCODE_ASSIGN_info, 0); + i->set_src1(src3); } else if (i->src2.value->IsConstant() && i->src3.value->IsConstant()) { // TODO: Select diff --git a/src/xenia/cpu/ppc/ppc_translator.cc b/src/xenia/cpu/ppc/ppc_translator.cc index 84aa47f99..ec1768163 100644 --- a/src/xenia/cpu/ppc/ppc_translator.cc +++ b/src/xenia/cpu/ppc/ppc_translator.cc @@ -53,10 +53,15 @@ PPCTranslator::PPCTranslator(PPCFrontend* frontend) : frontend_(frontend) { if (validate) compiler_->AddPass(std::make_unique()); compiler_->AddPass(std::make_unique()); if (validate) compiler_->AddPass(std::make_unique()); - compiler_->AddPass(std::make_unique()); - if (validate) compiler_->AddPass(std::make_unique()); - compiler_->AddPass(std::make_unique()); - if (validate) compiler_->AddPass(std::make_unique()); + // TODO(gibbed): loop until these passes stop making changes? + for (int i = 0; i < 5; ++i) { + compiler_->AddPass(std::make_unique()); + if (validate) + compiler_->AddPass(std::make_unique()); + compiler_->AddPass(std::make_unique()); + if (validate) + compiler_->AddPass(std::make_unique()); + } if (backend->machine_info()->supports_extended_load_store) { // Backend supports the advanced LOAD/STORE instructions. // These will save us a lot of HIR opcodes. diff --git a/src/xenia/cpu/ppc/testing/instr_slw.s b/src/xenia/cpu/ppc/testing/instr_slw.s index b1f4af3d1..e6548766a 100644 --- a/src/xenia/cpu/ppc/testing/instr_slw.s +++ b/src/xenia/cpu/ppc/testing/instr_slw.s @@ -159,3 +159,44 @@ test_slw_9_constant: #_ REGISTER_OUT r3 0 #_ REGISTER_OUT r4 0xFFFFFFFFFFFFFFFF #_ REGISTER_OUT r5 32 + +test_slw_10: + #_ REGISTER_IN r4 99 + #_ REGISTER_IN r5 1 + cntlzw r5, r5 + subi r5, r5, 28 + slw r3, r4, r5 + blr + #_ REGISTER_OUT r3 792 + #_ REGISTER_OUT r4 99 + #_ REGISTER_OUT r5 3 + +test_slw_10_constant: + #_ REGISTER_IN r4 99 + li r5, 1 + cntlzw r5, r5 + subi r5, r5, 28 + slw r3, r4, r5 + blr + #_ REGISTER_OUT r3 792 + #_ REGISTER_OUT r4 99 + #_ REGISTER_OUT r5 3 + +test_slw_11: + #_ REGISTER_IN r4 99 + #_ REGISTER_IN r5 3 + li r5, 3 + slw r3, r4, r5 + blr + #_ REGISTER_OUT r3 792 + #_ REGISTER_OUT r4 99 + #_ REGISTER_OUT r5 3 + +test_slw_11_constant: + #_ REGISTER_IN r4 99 + li r5, 3 + slw r3, r4, r5 + blr + #_ REGISTER_OUT r3 792 + #_ REGISTER_OUT r4 99 + #_ REGISTER_OUT r5 3 From 7d07720de13ca0463377d8549181db438ae65c65 Mon Sep 17 00:00:00 2001 From: gibbed Date: Fri, 23 Nov 2018 08:02:09 -0600 Subject: [PATCH 7/9] [JIT] Zero constant propagation for OPCODE_SHL/OPCODE_SHR. --- .../cpu/compiler/passes/constant_propagation_pass.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc b/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc index e44fce425..3db8e99d6 100644 --- a/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc +++ b/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc @@ -618,6 +618,10 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder) { v->set_from(i->src1.value); v->Shl(i->src2.value); i->Remove(); + } else if (i->src2.value->IsConstantZero()) { + auto src1 = i->src1.value; + i->Replace(&OPCODE_ASSIGN_info, 0); + i->set_src1(src1); } break; case OPCODE_SHR: @@ -625,6 +629,10 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder) { v->set_from(i->src1.value); v->Shr(i->src2.value); i->Remove(); + } else if (i->src2.value->IsConstantZero()) { + auto src1 = i->src1.value; + i->Replace(&OPCODE_ASSIGN_info, 0); + i->set_src1(src1); } break; case OPCODE_SHA: From b24936d248b0530d556a5b8fa7328af832b306e8 Mon Sep 17 00:00:00 2001 From: gibbed Date: Fri, 23 Nov 2018 09:02:49 -0600 Subject: [PATCH 8/9] [Kernel] Tag XamUserGetSigninState as high frequency. --- src/xenia/kernel/xam/xam_user.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/xenia/kernel/xam/xam_user.cc b/src/xenia/kernel/xam/xam_user.cc index ef25087ad..20e64ab5b 100644 --- a/src/xenia/kernel/xam/xam_user.cc +++ b/src/xenia/kernel/xam/xam_user.cc @@ -49,7 +49,8 @@ dword_result_t XamUserGetSigninState(dword_t user_index) { return 0; } } -DECLARE_XAM_EXPORT1(XamUserGetSigninState, kUserProfiles, kImplemented); +DECLARE_XAM_EXPORT2(XamUserGetSigninState, kUserProfiles, kImplemented, + kHighFrequency); typedef struct { xe::be xuid; From 996093e4996ecc3ae296dc9d28f87d80c2960a59 Mon Sep 17 00:00:00 2001 From: gibbed Date: Fri, 23 Nov 2018 09:33:30 -0600 Subject: [PATCH 9/9] [x64] Support constant src1 for Int32 VECTOR_SHL_V128/VECTOR_SHR_V128. --- src/xenia/cpu/backend/x64/x64_sequences.cc | 39 ++++++++++++++++------ 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc index 0441c6886..5c2118fc7 100644 --- a/src/xenia/cpu/backend/x64/x64_sequences.cc +++ b/src/xenia/cpu/backend/x64/x64_sequences.cc @@ -5691,6 +5691,15 @@ struct VECTOR_SHL_V128 return _mm_load_si128(reinterpret_cast<__m128i*>(value)); } static void EmitInt32(X64Emitter& e, const EmitArgType& i) { + Xmm src1; + if (i.src1.is_constant) { + src1 = e.xmm2; + e.LoadConstantXmm(src1, i.src1.constant()); + } + else { + src1 = i.src1; + } + if (i.src2.is_constant) { const auto& shamt = i.src2.constant(); bool all_same = true; @@ -5702,7 +5711,7 @@ struct VECTOR_SHL_V128 } if (all_same) { // Every count is the same, so we can use vpslld. - e.vpslld(i.dest, i.src1, shamt.u8[0] & 0x1F); + e.vpslld(i.dest, src1, shamt.u8[0] & 0x1F); return; } } @@ -5716,13 +5725,13 @@ struct VECTOR_SHL_V128 masked.u32[n] &= 0x1F; } e.LoadConstantXmm(e.xmm0, masked); - e.vpsllvd(i.dest, i.src1, e.xmm0); + e.vpsllvd(i.dest, src1, e.xmm0); } else { // Fully variable shift. // src shift mask may have values >31, and x86 sets to zero when // that happens so we mask. e.vandps(e.xmm0, i.src2, e.GetXmmConstPtr(XMMShiftMaskPS)); - e.vpsllvd(i.dest, i.src1, e.xmm0); + e.vpsllvd(i.dest, src1, e.xmm0); } } else { // Shift 4 words in src1 by amount specified in src2. @@ -5740,7 +5749,8 @@ struct VECTOR_SHL_V128 e.mov(e.rax, 0x1F); e.vmovq(e.xmm1, e.rax); e.vpand(e.xmm0, e.xmm0, e.xmm1); - e.vpslld(i.dest, i.src1, e.xmm0); + + e.vpslld(i.dest, src1, e.xmm0); e.jmp(end); } @@ -5752,7 +5762,7 @@ struct VECTOR_SHL_V128 } else { e.lea(e.r9, e.StashXmm(1, i.src2)); } - e.lea(e.r8, e.StashXmm(0, i.src1)); + e.lea(e.r8, e.StashXmm(0, src1)); e.CallNativeSafe(reinterpret_cast(EmulateVectorShlI32)); e.vmovaps(i.dest, e.xmm0); @@ -5877,6 +5887,15 @@ struct VECTOR_SHR_V128 return _mm_load_si128(reinterpret_cast<__m128i*>(value)); } static void EmitInt32(X64Emitter& e, const EmitArgType& i) { + Xmm src1; + if (i.src1.is_constant) { + src1 = e.xmm2; + e.LoadConstantXmm(src1, i.src1.constant()); + } + else { + src1 = i.src1; + } + if (i.src2.is_constant) { const auto& shamt = i.src2.constant(); bool all_same = true; @@ -5888,7 +5907,7 @@ struct VECTOR_SHR_V128 } if (all_same) { // Every count is the same, so we can use vpsrld. - e.vpsrld(i.dest, i.src1, shamt.u8[0] & 0x1F); + e.vpsrld(i.dest, src1, shamt.u8[0] & 0x1F); return; } else { if (e.IsFeatureEnabled(kX64EmitAVX2)) { @@ -5898,7 +5917,7 @@ struct VECTOR_SHR_V128 masked.u32[n] &= 0x1F; } e.LoadConstantXmm(e.xmm0, masked); - e.vpsrlvd(i.dest, i.src1, e.xmm0); + e.vpsrlvd(i.dest, src1, e.xmm0); return; } } @@ -5909,7 +5928,7 @@ struct VECTOR_SHR_V128 // src shift mask may have values >31, and x86 sets to zero when // that happens so we mask. e.vandps(e.xmm0, i.src2, e.GetXmmConstPtr(XMMShiftMaskPS)); - e.vpsrlvd(i.dest, i.src1, e.xmm0); + e.vpsrlvd(i.dest, src1, e.xmm0); } else { // Shift 4 words in src1 by amount specified in src2. Xbyak::Label emu, end; @@ -5926,7 +5945,7 @@ struct VECTOR_SHR_V128 e.mov(e.rax, 0x1F); e.vmovq(e.xmm1, e.rax); e.vpand(e.xmm0, e.xmm0, e.xmm1); - e.vpsrld(i.dest, i.src1, e.xmm0); + e.vpsrld(i.dest, src1, e.xmm0); e.jmp(end); } @@ -5938,7 +5957,7 @@ struct VECTOR_SHR_V128 } else { e.lea(e.r9, e.StashXmm(1, i.src2)); } - e.lea(e.r8, e.StashXmm(0, i.src1)); + e.lea(e.r8, e.StashXmm(0, src1)); e.CallNativeSafe(reinterpret_cast(EmulateVectorShrI32)); e.vmovaps(i.dest, e.xmm0);