diff --git a/src/xenia/app/xenia_main.cc b/src/xenia/app/xenia_main.cc index a69fb1439..b4337c70e 100644 --- a/src/xenia/app/xenia_main.cc +++ b/src/xenia/app/xenia_main.cc @@ -176,9 +176,9 @@ int xenia_main(const std::vector& args) { #endif content_root = xe::join_paths(content_root, L"content"); } - - content_root = xe::to_absolute_path(content_root); } + content_root = xe::to_absolute_path(content_root); + XELOGI("Content root: %S", content_root.c_str()); // Create the emulator but don't initialize so we can setup the window. auto emulator = std::make_unique(L"", content_root); diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc index 0441c6886..5c2118fc7 100644 --- a/src/xenia/cpu/backend/x64/x64_sequences.cc +++ b/src/xenia/cpu/backend/x64/x64_sequences.cc @@ -5691,6 +5691,15 @@ struct VECTOR_SHL_V128 return _mm_load_si128(reinterpret_cast<__m128i*>(value)); } static void EmitInt32(X64Emitter& e, const EmitArgType& i) { + Xmm src1; + if (i.src1.is_constant) { + src1 = e.xmm2; + e.LoadConstantXmm(src1, i.src1.constant()); + } + else { + src1 = i.src1; + } + if (i.src2.is_constant) { const auto& shamt = i.src2.constant(); bool all_same = true; @@ -5702,7 +5711,7 @@ struct VECTOR_SHL_V128 } if (all_same) { // Every count is the same, so we can use vpslld. - e.vpslld(i.dest, i.src1, shamt.u8[0] & 0x1F); + e.vpslld(i.dest, src1, shamt.u8[0] & 0x1F); return; } } @@ -5716,13 +5725,13 @@ struct VECTOR_SHL_V128 masked.u32[n] &= 0x1F; } e.LoadConstantXmm(e.xmm0, masked); - e.vpsllvd(i.dest, i.src1, e.xmm0); + e.vpsllvd(i.dest, src1, e.xmm0); } else { // Fully variable shift. // src shift mask may have values >31, and x86 sets to zero when // that happens so we mask. e.vandps(e.xmm0, i.src2, e.GetXmmConstPtr(XMMShiftMaskPS)); - e.vpsllvd(i.dest, i.src1, e.xmm0); + e.vpsllvd(i.dest, src1, e.xmm0); } } else { // Shift 4 words in src1 by amount specified in src2. @@ -5740,7 +5749,8 @@ struct VECTOR_SHL_V128 e.mov(e.rax, 0x1F); e.vmovq(e.xmm1, e.rax); e.vpand(e.xmm0, e.xmm0, e.xmm1); - e.vpslld(i.dest, i.src1, e.xmm0); + + e.vpslld(i.dest, src1, e.xmm0); e.jmp(end); } @@ -5752,7 +5762,7 @@ struct VECTOR_SHL_V128 } else { e.lea(e.r9, e.StashXmm(1, i.src2)); } - e.lea(e.r8, e.StashXmm(0, i.src1)); + e.lea(e.r8, e.StashXmm(0, src1)); e.CallNativeSafe(reinterpret_cast(EmulateVectorShlI32)); e.vmovaps(i.dest, e.xmm0); @@ -5877,6 +5887,15 @@ struct VECTOR_SHR_V128 return _mm_load_si128(reinterpret_cast<__m128i*>(value)); } static void EmitInt32(X64Emitter& e, const EmitArgType& i) { + Xmm src1; + if (i.src1.is_constant) { + src1 = e.xmm2; + e.LoadConstantXmm(src1, i.src1.constant()); + } + else { + src1 = i.src1; + } + if (i.src2.is_constant) { const auto& shamt = i.src2.constant(); bool all_same = true; @@ -5888,7 +5907,7 @@ struct VECTOR_SHR_V128 } if (all_same) { // Every count is the same, so we can use vpsrld. - e.vpsrld(i.dest, i.src1, shamt.u8[0] & 0x1F); + e.vpsrld(i.dest, src1, shamt.u8[0] & 0x1F); return; } else { if (e.IsFeatureEnabled(kX64EmitAVX2)) { @@ -5898,7 +5917,7 @@ struct VECTOR_SHR_V128 masked.u32[n] &= 0x1F; } e.LoadConstantXmm(e.xmm0, masked); - e.vpsrlvd(i.dest, i.src1, e.xmm0); + e.vpsrlvd(i.dest, src1, e.xmm0); return; } } @@ -5909,7 +5928,7 @@ struct VECTOR_SHR_V128 // src shift mask may have values >31, and x86 sets to zero when // that happens so we mask. e.vandps(e.xmm0, i.src2, e.GetXmmConstPtr(XMMShiftMaskPS)); - e.vpsrlvd(i.dest, i.src1, e.xmm0); + e.vpsrlvd(i.dest, src1, e.xmm0); } else { // Shift 4 words in src1 by amount specified in src2. Xbyak::Label emu, end; @@ -5926,7 +5945,7 @@ struct VECTOR_SHR_V128 e.mov(e.rax, 0x1F); e.vmovq(e.xmm1, e.rax); e.vpand(e.xmm0, e.xmm0, e.xmm1); - e.vpsrld(i.dest, i.src1, e.xmm0); + e.vpsrld(i.dest, src1, e.xmm0); e.jmp(end); } @@ -5938,7 +5957,7 @@ struct VECTOR_SHR_V128 } else { e.lea(e.r9, e.StashXmm(1, i.src2)); } - e.lea(e.r8, e.StashXmm(0, i.src1)); + e.lea(e.r8, e.StashXmm(0, src1)); e.CallNativeSafe(reinterpret_cast(EmulateVectorShrI32)); e.vmovaps(i.dest, e.xmm0); diff --git a/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc b/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc index 44e1e37c1..3db8e99d6 100644 --- a/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc +++ b/src/xenia/cpu/compiler/passes/constant_propagation_pass.cc @@ -278,11 +278,13 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder) { if (i->src1.value->IsConstant()) { if (i->src1.value->type != VEC128_TYPE) { if (i->src1.value->IsConstantTrue()) { - v->set_from(i->src2.value); - i->Remove(); + auto src2 = i->src2.value; + i->Replace(&OPCODE_ASSIGN_info, 0); + i->set_src1(src2); } else if (i->src1.value->IsConstantFalse()) { - v->set_from(i->src3.value); - i->Remove(); + auto src3 = i->src3.value; + i->Replace(&OPCODE_ASSIGN_info, 0); + i->set_src1(src3); } else if (i->src2.value->IsConstant() && i->src3.value->IsConstant()) { // TODO: Select @@ -616,6 +618,10 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder) { v->set_from(i->src1.value); v->Shl(i->src2.value); i->Remove(); + } else if (i->src2.value->IsConstantZero()) { + auto src1 = i->src1.value; + i->Replace(&OPCODE_ASSIGN_info, 0); + i->set_src1(src1); } break; case OPCODE_SHR: @@ -623,6 +629,10 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder) { v->set_from(i->src1.value); v->Shr(i->src2.value); i->Remove(); + } else if (i->src2.value->IsConstantZero()) { + auto src1 = i->src1.value; + i->Replace(&OPCODE_ASSIGN_info, 0); + i->set_src1(src1); } break; case OPCODE_SHA: diff --git a/src/xenia/cpu/hir/hir_builder.cc b/src/xenia/cpu/hir/hir_builder.cc index af954ca6c..e461996b5 100644 --- a/src/xenia/cpu/hir/hir_builder.cc +++ b/src/xenia/cpu/hir/hir_builder.cc @@ -764,7 +764,7 @@ void HIRBuilder::CommentFormat(const char* format, ...) { va_start(args, format); size_t chars_written = vsnprintf(p, kMaxCommentSize - 1, format, args); va_end(args); - size_t rewind = kMaxCommentSize - chars_written; + size_t rewind = kMaxCommentSize - chars_written - 1; arena_->Rewind(rewind); Instr* i = AppendInstr(OPCODE_COMMENT_info, 0); i->src1.offset = (uint64_t)p; diff --git a/src/xenia/cpu/ppc/ppc_translator.cc b/src/xenia/cpu/ppc/ppc_translator.cc index 84aa47f99..ec1768163 100644 --- a/src/xenia/cpu/ppc/ppc_translator.cc +++ b/src/xenia/cpu/ppc/ppc_translator.cc @@ -53,10 +53,15 @@ PPCTranslator::PPCTranslator(PPCFrontend* frontend) : frontend_(frontend) { if (validate) compiler_->AddPass(std::make_unique()); compiler_->AddPass(std::make_unique()); if (validate) compiler_->AddPass(std::make_unique()); - compiler_->AddPass(std::make_unique()); - if (validate) compiler_->AddPass(std::make_unique()); - compiler_->AddPass(std::make_unique()); - if (validate) compiler_->AddPass(std::make_unique()); + // TODO(gibbed): loop until these passes stop making changes? + for (int i = 0; i < 5; ++i) { + compiler_->AddPass(std::make_unique()); + if (validate) + compiler_->AddPass(std::make_unique()); + compiler_->AddPass(std::make_unique()); + if (validate) + compiler_->AddPass(std::make_unique()); + } if (backend->machine_info()->supports_extended_load_store) { // Backend supports the advanced LOAD/STORE instructions. // These will save us a lot of HIR opcodes. diff --git a/src/xenia/cpu/ppc/testing/instr_slw.s b/src/xenia/cpu/ppc/testing/instr_slw.s index b1f4af3d1..e6548766a 100644 --- a/src/xenia/cpu/ppc/testing/instr_slw.s +++ b/src/xenia/cpu/ppc/testing/instr_slw.s @@ -159,3 +159,44 @@ test_slw_9_constant: #_ REGISTER_OUT r3 0 #_ REGISTER_OUT r4 0xFFFFFFFFFFFFFFFF #_ REGISTER_OUT r5 32 + +test_slw_10: + #_ REGISTER_IN r4 99 + #_ REGISTER_IN r5 1 + cntlzw r5, r5 + subi r5, r5, 28 + slw r3, r4, r5 + blr + #_ REGISTER_OUT r3 792 + #_ REGISTER_OUT r4 99 + #_ REGISTER_OUT r5 3 + +test_slw_10_constant: + #_ REGISTER_IN r4 99 + li r5, 1 + cntlzw r5, r5 + subi r5, r5, 28 + slw r3, r4, r5 + blr + #_ REGISTER_OUT r3 792 + #_ REGISTER_OUT r4 99 + #_ REGISTER_OUT r5 3 + +test_slw_11: + #_ REGISTER_IN r4 99 + #_ REGISTER_IN r5 3 + li r5, 3 + slw r3, r4, r5 + blr + #_ REGISTER_OUT r3 792 + #_ REGISTER_OUT r4 99 + #_ REGISTER_OUT r5 3 + +test_slw_11_constant: + #_ REGISTER_IN r4 99 + li r5, 3 + slw r3, r4, r5 + blr + #_ REGISTER_OUT r3 792 + #_ REGISTER_OUT r4 99 + #_ REGISTER_OUT r5 3 diff --git a/src/xenia/kernel/xam/xam_info.cc b/src/xenia/kernel/xam/xam_info.cc index ac7b33009..38fdc04d0 100644 --- a/src/xenia/kernel/xam/xam_info.cc +++ b/src/xenia/kernel/xam/xam_info.cc @@ -198,12 +198,16 @@ dword_result_t XamEnumerate(dword_t handle, dword_t flags, lpvoid_t buffer, } } - size_t actual_buffer_length = e->item_size() * e->items_per_enumerate(); - if (actual_buffer_length != buffer_length) { + size_t actual_buffer_length = (uint32_t)buffer_length; + if (buffer_length == e->items_per_enumerate()) { + actual_buffer_length = e->item_size() * e->items_per_enumerate(); // Known culprits: - // Final Fight: Double Impact - XELOGW("Broken usage of XamEnumerate! %.X vs %.X", buffer_length, - actual_buffer_length); + // Final Fight: Double Impact (saves) + XELOGW( + "Broken usage of XamEnumerate! buffer length=%.X vs actual length=%.X " + "(item size=%.X, items per enumerate=%u)", + (uint32_t)buffer_length, actual_buffer_length, e->item_size(), + e->items_per_enumerate()); } buffer.Zero(actual_buffer_length); diff --git a/src/xenia/kernel/xam/xam_user.cc b/src/xenia/kernel/xam/xam_user.cc index ef25087ad..20e64ab5b 100644 --- a/src/xenia/kernel/xam/xam_user.cc +++ b/src/xenia/kernel/xam/xam_user.cc @@ -49,7 +49,8 @@ dword_result_t XamUserGetSigninState(dword_t user_index) { return 0; } } -DECLARE_XAM_EXPORT1(XamUserGetSigninState, kUserProfiles, kImplemented); +DECLARE_XAM_EXPORT2(XamUserGetSigninState, kUserProfiles, kImplemented, + kHighFrequency); typedef struct { xe::be xuid;