diff --git a/src/xenia/cpu/ppc/ppc_emit_alu.cc b/src/xenia/cpu/ppc/ppc_emit_alu.cc index f3e966ec8..b5f1579bd 100644 --- a/src/xenia/cpu/ppc/ppc_emit_alu.cc +++ b/src/xenia/cpu/ppc/ppc_emit_alu.cc @@ -958,18 +958,19 @@ int InstrEmit_rlwimix(PPCHIRBuilder& f, const InstrData& i) { // r <- ROTL32((RS)[32:63], n) // m <- MASK(MB+32, ME+32) // RA <- r&m | (RA)&¬m - Value* v = f.Truncate(f.LoadGPR(i.M.RT), INT32_TYPE); + Value* v = f.LoadGPR(i.M.RT); + // (x||x) + v = f.Or(f.Shl(v, 32), f.And(v, f.LoadConstantUint64(0xFFFFFFFF))); if (i.M.SH) { - v = f.RotateLeft(v, f.LoadConstantUint32(i.M.SH)); + v = f.RotateLeft(v, f.LoadConstantInt8(i.M.SH)); } // Compiler sometimes masks with 0xFFFFFFFF (identity) - avoid the work here // as our truncation/zero-extend does it for us. - uint32_t m = (uint32_t)XEMASK(i.M.MB + 32, i.M.ME + 32); - if (!(i.M.MB == 0 && i.M.ME == 31)) { - v = f.And(v, f.LoadConstantUint32(m)); + uint64_t m = XEMASK(i.M.MB + 32, i.M.ME + 32); + if (m != 0xFFFFFFFFFFFFFFFFull) { + v = f.And(v, f.LoadConstantUint64(m)); } - v = f.ZeroExtend(v, INT64_TYPE); - v = f.Or(v, f.And(f.LoadGPR(i.M.RA), f.LoadConstantUint64(~(uint64_t)m))); + v = f.Or(v, f.And(f.LoadGPR(i.M.RA), f.LoadConstantUint64(~m))); f.StoreGPR(i.M.RA, v); if (i.M.Rc) { f.UpdateCR(0, v); @@ -982,22 +983,23 @@ int InstrEmit_rlwinmx(PPCHIRBuilder& f, const InstrData& i) { // r <- ROTL32((RS)[32:63], n) // m <- MASK(MB+32, ME+32) // RA <- r & m - Value* v = f.Truncate(f.LoadGPR(i.M.RT), INT32_TYPE); + Value* v = f.LoadGPR(i.M.RT); + // (x||x) + v = f.Or(f.Shl(v, 32), f.And(v, f.LoadConstantUint64(0xFFFFFFFF))); // TODO(benvanik): optimize srwi // TODO(benvanik): optimize slwi // The compiler will generate a bunch of these for the special case of SH=0. // Which seems to just select some bits and set cr0 for use with a branch. // We can detect this and do less work. if (i.M.SH) { - v = f.RotateLeft(v, f.LoadConstantUint32(i.M.SH)); + v = f.RotateLeft(v, f.LoadConstantInt8(i.M.SH)); } // Compiler sometimes masks with 0xFFFFFFFF (identity) - avoid the work here // as our truncation/zero-extend does it for us. - if (!(i.M.MB == 0 && i.M.ME == 31)) { - v = f.And(v, - f.LoadConstantUint32(uint32_t(XEMASK(i.M.MB + 32, i.M.ME + 32)))); + uint64_t m = XEMASK(i.M.MB + 32, i.M.ME + 32); + if (m != 0xFFFFFFFFFFFFFFFFull) { + v = f.And(v, f.LoadConstantUint64(m)); } - v = f.ZeroExtend(v, INT64_TYPE); f.StoreGPR(i.M.RA, v); if (i.M.Rc) { f.UpdateCR(0, v); @@ -1010,17 +1012,13 @@ int InstrEmit_rlwnmx(PPCHIRBuilder& f, const InstrData& i) { // r <- ROTL32((RS)[32:63], n) // m <- MASK(MB+32, ME+32) // RA <- r & m - Value* v = f.Truncate(f.LoadGPR(i.M.RT), INT32_TYPE); - Value* sh = f.And(f.Truncate(f.LoadGPR(i.M.SH), INT32_TYPE), - f.LoadConstantUint32(0x1F)); + Value* sh = + f.And(f.Truncate(f.LoadGPR(i.M.SH), INT8_TYPE), f.LoadConstantInt8(0x1F)); + Value* v = f.LoadGPR(i.M.RT); + // (x||x) + v = f.Or(f.Shl(v, 32), f.And(v, f.LoadConstantUint64(0xFFFFFFFF))); v = f.RotateLeft(v, sh); - // Compiler sometimes masks with 0xFFFFFFFF (identity) - avoid the work here - // as our truncation/zero-extend does it for us. - if (!(i.M.MB == 0 && i.M.ME == 31)) { - v = f.And(v, - f.LoadConstantUint32(uint32_t(XEMASK(i.M.MB + 32, i.M.ME + 32)))); - } - v = f.ZeroExtend(v, INT64_TYPE); + v = f.And(v, f.LoadConstantUint64(XEMASK(i.M.MB + 32, i.M.ME + 32))); f.StoreGPR(i.M.RA, v); if (i.M.Rc) { f.UpdateCR(0, v); diff --git a/src/xenia/cpu/ppc/testing/instr_rlwinm.s b/src/xenia/cpu/ppc/testing/instr_rlwinm.s index 2e4dbdb05..b1376b1e0 100644 --- a/src/xenia/cpu/ppc/testing/instr_rlwinm.s +++ b/src/xenia/cpu/ppc/testing/instr_rlwinm.s @@ -253,7 +253,7 @@ test_rlwinm_11: rlwinm r6, r8, 8, 2, 0 blr #_ REGISTER_OUT r6 0xABCDEF89ABCDEF89 - #_ REGISTER_OUT r8 0x1 + #_ REGISTER_OUT r8 0x0123456789ABCDEF test_rlwinm_12: #_ REGISTER_IN r4 0xFFFFFFFFFFFFFFFF diff --git a/src/xenia/cpu/ppc/testing/instr_rlwnm.s b/src/xenia/cpu/ppc/testing/instr_rlwnm.s index dccae1c50..f41c78d6a 100644 --- a/src/xenia/cpu/ppc/testing/instr_rlwnm.s +++ b/src/xenia/cpu/ppc/testing/instr_rlwnm.s @@ -202,8 +202,8 @@ test_rlwnm_12: #_ REGISTER_IN r3 0xFFFFFFFF rlwnm r0, r3, r3, 30, 1 blr - #_ REGISTER_OUT r0 0x1 - #_ REGISTER_OUT r3 0xFFFFFFFFC0000003 + #_ REGISTER_OUT r0 0xFFFFFFFFC0000003 + #_ REGISTER_OUT r3 0xFFFFFFFF test_rlwnm_13: #_ REGISTER_IN r7 0x01234567