From 3fe8134f3bb1883296ca12771f83e8712ae5a6c6 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Tue, 24 Sep 2013 01:17:24 +0000 Subject: [PATCH] [ARM] lfsux/lfdx/lfdux/stfsx/stfsux/stfdx/stfdux implementations. --- Source/Core/Core/Src/PowerPC/JitArm32/Jit.h | 7 + .../JitArm32/JitArm_LoadStoreFloating.cpp | 257 ++++++++++++++++++ .../Src/PowerPC/JitArm32/JitArm_Tables.cpp | 14 +- 3 files changed, 271 insertions(+), 7 deletions(-) diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h index e4bf7d6941..2287025205 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h +++ b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h @@ -216,13 +216,20 @@ public: // Floating point loadStore void lfs(UGeckoInstruction _inst); void lfsu(UGeckoInstruction _inst); + void lfsux(UGeckoInstruction _inst); void lfsx(UGeckoInstruction _inst); void lfd(UGeckoInstruction _inst); + void lfdx(UGeckoInstruction _inst); void lfdu(UGeckoInstruction _inst); + void lfdux(UGeckoInstruction _inst); void stfs(UGeckoInstruction _inst); + void stfsx(UGeckoInstruction _inst); void stfsu(UGeckoInstruction _inst); + void stfsux(UGeckoInstruction _inst); void stfd(UGeckoInstruction _inst); + void stfdx(UGeckoInstruction _inst); void stfdu(UGeckoInstruction _inst); + void stfdux(UGeckoInstruction _inst); // Paired Singles void ps_add(UGeckoInstruction _inst); diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp index c9f42ebd36..97e6bfbaf8 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp @@ -108,6 +108,44 @@ void JitArm::lfsu(UGeckoInstruction inst) SetJumpTarget(DoNotLoad); } +void JitArm::lfsux(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITLoadStoreFloatingOff) + + ARMReg RA = gpr.R(inst.RA); + ARMReg RB = gpr.R(inst.RB); + + ARMReg rA = gpr.GetReg(); + ARMReg rB = gpr.GetReg(); + + ARMReg v0 = fpr.R0(inst.FD); + ARMReg v1 = fpr.R1(inst.FD); + + ADD(rB, RB, RA); + + LDR(rA, R9, PPCSTATE_OFF(Exceptions)); + CMP(rA, EXCEPTION_DSI); + FixupBranch DoNotLoad = B_CC(CC_EQ); + + MOVI2R(rA, (u32)&Memory::Read_U32); + + MOV(RA, rB); + PUSH(4, R0, R1, R2, R3); + MOV(R0, rB); + BL(rA); + + VMOV(S0, R0); + + VCVT(v0, S0, 0); + VCVT(v1, S0, 0); + POP(4, R0, R1, R2, R3); + + + gpr.Unlock(rA, rB); + SetJumpTarget(DoNotLoad); +} + void JitArm::lfsx(UGeckoInstruction inst) { INSTRUCTION_START @@ -183,6 +221,47 @@ void JitArm::lfd(UGeckoInstruction inst) SetJumpTarget(DoNotLoad); } +void JitArm::lfdx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITLoadStoreFloatingOff) + + ARMReg RB = gpr.R(inst.RB); + + ARMReg rA = gpr.GetReg(); + ARMReg rB = gpr.GetReg(); + + ARMReg v0 = fpr.R0(inst.FD); + + if (inst.RA) + { + ARMReg RA = gpr.R(inst.RA); + ADD(rB, RB, RA); + } + else + MOV(rB, RB); + + LDR(rA, R9, PPCSTATE_OFF(Exceptions)); + CMP(rA, EXCEPTION_DSI); + FixupBranch DoNotLoad = B_CC(CC_EQ); + + MOVI2R(rA, (u32)&Memory::Read_F64); + PUSH(4, R0, R1, R2, R3); + MOV(R0, rB); + BL(rA); + +#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1 + VMOV(v0, R0); +#else + VMOV(v0, D0); +#endif + + POP(4, R0, R1, R2, R3); + + gpr.Unlock(rA, rB); + SetJumpTarget(DoNotLoad); +} + void JitArm::lfdu(UGeckoInstruction inst) { INSTRUCTION_START @@ -220,6 +299,44 @@ void JitArm::lfdu(UGeckoInstruction inst) SetJumpTarget(DoNotLoad); } +void JitArm::lfdux(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITLoadStoreFloatingOff) + + ARMReg RA = gpr.R(inst.RA); + ARMReg RB = gpr.R(inst.RB); + + ARMReg rA = gpr.GetReg(); + ARMReg rB = gpr.GetReg(); + + ARMReg v0 = fpr.R0(inst.FD); + + ADD(rB, RB, RA); + + LDR(rA, R9, PPCSTATE_OFF(Exceptions)); + CMP(rA, EXCEPTION_DSI); + FixupBranch DoNotLoad = B_CC(CC_EQ); + + MOVI2R(rA, (u32)&Memory::Read_F64); + MOV(RA, rB); + + PUSH(4, R0, R1, R2, R3); + MOV(R0, rB); + BL(rA); + +#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1 + VMOV(v0, R0); +#else + VMOV(v0, D0); +#endif + + POP(4, R0, R1, R2, R3); + + gpr.Unlock(rA, rB); + SetJumpTarget(DoNotLoad); +} + void JitArm::stfs(UGeckoInstruction inst) { INSTRUCTION_START @@ -252,6 +369,38 @@ void JitArm::stfs(UGeckoInstruction inst) gpr.Unlock(rA, rB); } +void JitArm::stfsx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITLoadStoreFloatingOff) + + ARMReg RB = gpr.R(inst.RB); + ARMReg rA = gpr.GetReg(); + ARMReg rB = gpr.GetReg(); + ARMReg v0 = fpr.R0(inst.FS); + VCVT(S0, v0, 0); + + if (inst.RA) + { + ARMReg RA = gpr.R(inst.RA); + ADD(rB, RB, RA); + } + else + MOV(rB, RB); + + MOVI2R(rA, (u32)&Memory::Write_U32); + PUSH(4, R0, R1, R2, R3); + VMOV(R0, S0); + MOV(R1, rB); + + BL(rA); + + POP(4, R0, R1, R2, R3); + + gpr.Unlock(rA, rB); +} + + void JitArm::stfsu(UGeckoInstruction inst) { INSTRUCTION_START @@ -286,6 +435,40 @@ void JitArm::stfsu(UGeckoInstruction inst) gpr.Unlock(rA, rB); } +void JitArm::stfsux(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITLoadStoreFloatingOff) + + ARMReg RA = gpr.R(inst.RA); + ARMReg RB = gpr.R(inst.RB); + + ARMReg rA = gpr.GetReg(); + ARMReg rB = gpr.GetReg(); + ARMReg v0 = fpr.R0(inst.FS); + VCVT(S0, v0, 0); + + ADD(rB, RB, RA); + + LDR(rA, R9, PPCSTATE_OFF(Exceptions)); + CMP(rA, EXCEPTION_DSI); + + SetCC(CC_NEQ); + MOV(RA, rB); + SetCC(); + + MOVI2R(rA, (u32)&Memory::Write_U32); + PUSH(4, R0, R1, R2, R3); + VMOV(R0, S0); + MOV(R1, rB); + + BL(rA); + + POP(4, R0, R1, R2, R3); + + gpr.Unlock(rA, rB); +} + void JitArm::stfd(UGeckoInstruction inst) { INSTRUCTION_START @@ -322,6 +505,43 @@ void JitArm::stfd(UGeckoInstruction inst) gpr.Unlock(rA, rB); } +void JitArm::stfdx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITLoadStoreFloatingOff) + + ARMReg RB = gpr.R(inst.RB); + + ARMReg rA = gpr.GetReg(); + ARMReg rB = gpr.GetReg(); + ARMReg v0 = fpr.R0(inst.FS); + + if (inst.RA) + { + ARMReg RA = gpr.R(inst.RA); + ADD(rB, RB, RA); + } + else + MOV(rB, RB); + + + MOVI2R(rA, (u32)&Memory::Write_F64); + PUSH(4, R0, R1, R2, R3); +#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1 + VMOV(R0, v0); + MOV(R2, rB); +#else + VMOV(D0, v0); + MOV(R0, rB); +#endif + + BL(rA); + + POP(4, R0, R1, R2, R3); + + gpr.Unlock(rA, rB); +} + void JitArm::stfdu(UGeckoInstruction inst) { INSTRUCTION_START @@ -358,3 +578,40 @@ void JitArm::stfdu(UGeckoInstruction inst) gpr.Unlock(rA, rB); } +void JitArm::stfdux(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITLoadStoreFloatingOff) + ARMReg RA = gpr.R(inst.RA); + ARMReg RB = gpr.R(inst.RB); + + ARMReg rA = gpr.GetReg(); + ARMReg rB = gpr.GetReg(); + ARMReg v0 = fpr.R0(inst.FS); + + ADD(rB, RB, RA); + + LDR(rA, R9, PPCSTATE_OFF(Exceptions)); + CMP(rA, EXCEPTION_DSI); + + SetCC(CC_NEQ); + MOV(RA, rB); + SetCC(); + + MOVI2R(rA, (u32)&Memory::Write_F64); + PUSH(4, R0, R1, R2, R3); +#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1 + VMOV(R0, v0); + MOV(R2, rB); +#else + VMOV(D0, v0); + MOV(R0, rB); +#endif + + BL(rA); + + POP(4, R0, R1, R2, R3); + + gpr.Unlock(rA, rB); +} + diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp index 675907d199..4b212b6246 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp @@ -269,14 +269,14 @@ static GekkoOPTemplate table31[] = // fp load/store {535, &JitArm::lfsx}, //"lfsx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}}, - {567, &JitArm::Default}, //"lfsux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}}, - {599, &JitArm::Default}, //"lfdx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}}, - {631, &JitArm::Default}, //"lfdux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}}, + {567, &JitArm::lfsux}, //"lfsux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}}, + {599, &JitArm::lfdx}, //"lfdx", OPTYPE_LOADFP, FL_IN_A0 | FL_IN_B}}, + {631, &JitArm::lfdux}, //"lfdux", OPTYPE_LOADFP, FL_IN_A | FL_IN_B}}, - {663, &JitArm::Default}, //"stfsx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, - {695, &JitArm::Default}, //"stfsux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, - {727, &JitArm::Default}, //"stfdx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, - {759, &JitArm::Default}, //"stfdux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, + {663, &JitArm::stfsx}, //"stfsx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, + {695, &JitArm::stfsux}, //"stfsux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, + {727, &JitArm::stfdx}, //"stfdx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, + {759, &JitArm::stfdux}, //"stfdux", OPTYPE_STOREFP, FL_IN_A | FL_IN_B}}, {983, &JitArm::Default}, //"stfiwx", OPTYPE_STOREFP, FL_IN_A0 | FL_IN_B}}, {19, &JitArm::mfcr}, //"mfcr", OPTYPE_SYSTEM, FL_OUT_D}},