From 7dd8deecece2055500a207ea4fa0e573158dede5 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Tue, 8 Oct 2013 10:16:42 +0000 Subject: [PATCH] [ARM] Update the ArmEmitter with a bunch of NEON emitters. This adds around 47 instruction emitters if I counted correctly. None well tested at this point. On going to add all the NEON emitters. --- Source/Core/Common/Src/ArmEmitter.cpp | 583 ++++++++++++++++++++++---- Source/Core/Common/Src/ArmEmitter.h | 59 ++- 2 files changed, 557 insertions(+), 85 deletions(-) diff --git a/Source/Core/Common/Src/ArmEmitter.cpp b/Source/Core/Common/Src/ArmEmitter.cpp index f7864c4978..c579306217 100644 --- a/Source/Core/Common/Src/ArmEmitter.cpp +++ b/Source/Core/Common/Src/ArmEmitter.cpp @@ -883,7 +883,7 @@ void ARMXEmitter::LDMFD(ARMReg dest, bool WriteBack, const int Regnum, ...) WriteRegStoreOp(0x89, dest, WriteBack, RegList); } -ARMReg ARMXEmitter::SubBase(ARMReg Reg) +ARMReg SubBase(ARMReg Reg) { if (Reg >= S0) { @@ -898,6 +898,51 @@ ARMReg ARMXEmitter::SubBase(ARMReg Reg) return Reg; } +u32 EncodeVd(ARMReg Vd) +{ + bool quad_reg = Vd >= Q0; + bool double_reg = Vd >= D0; + + ARMReg Reg = SubBase(Vd); + + if (quad_reg) + return ((Reg & 0x10) << 18) | ((Reg & 0xF) << 12); + else + if (double_reg) + return ((Reg & 0x10) << 18) | ((Reg & 0xF) << 12); + else + return ((Reg & 0x1) << 22) | ((Reg & 0x1E) << 11); +} +u32 EncodeVn(ARMReg Vn) +{ + bool quad_reg = Vn >= Q0; + bool double_reg = Vn >= D0; + + ARMReg Reg = SubBase(Vn); + if (quad_reg) + return ((Reg & 0xF) << 16) | ((Reg & 0x10) << 3); + else + if (double_reg) + return ((Reg & 0xF) << 16) | ((Reg & 0x10) << 3); + else + return ((Reg & 0x1E) << 15) | ((Reg & 0x1) << 7); +} +u32 EncodeVm(ARMReg Vm) +{ + bool quad_reg = Vm >= Q0; + bool double_reg = Vm >= D0; + + ARMReg Reg = SubBase(Vm); + + if (quad_reg) + return ((Reg & 0x10) << 1) | (Reg & 0xF); + else + if (double_reg) + return ((Reg & 0x10) << 1) | (Reg & 0xF); + else + return ((Reg & 0x1) << 5) | (Reg >> 1); +} + // Double/single, Neon extern const VFPEnc VFPOps[16][2] = { {{0xE0, 0xA0}, {0x20, 0xD1}}, // 0: VMLA @@ -935,51 +980,6 @@ const char *VFPOpNames[16] = { "VABSi", }; -u32 ARMXEmitter::EncodeVd(ARMReg Vd) -{ - bool quad_reg = Vd >= Q0; - bool double_reg = Vd >= D0; - - ARMReg Reg = SubBase(Vd); - - if (quad_reg) - return ((Reg & 0x10) << 18) | ((Reg & 0xF) << 12); - else - if (double_reg) - return ((Reg & 0x10) << 18) | ((Reg & 0xF) << 12); - else - return ((Reg & 0x1) << 22) | ((Reg & 0x1E) << 11); -} -u32 ARMXEmitter::EncodeVn(ARMReg Vn) -{ - bool quad_reg = Vn >= Q0; - bool double_reg = Vn >= D0; - - ARMReg Reg = SubBase(Vn); - if (quad_reg) - return ((Reg & 0xF) << 16) | ((Reg & 0x10) << 3); - else - if (double_reg) - return ((Reg & 0xF) << 16) | ((Reg & 0x10) << 3); - else - return ((Reg & 0x1E) << 15) | ((Reg & 0x1) << 7); -} -u32 ARMXEmitter::EncodeVm(ARMReg Vm) -{ - bool quad_reg = Vm >= Q0; - bool double_reg = Vm >= D0; - - ARMReg Reg = SubBase(Vm); - - if (quad_reg) - return ((Reg & 0x10) << 1) | (Reg & 0xF); - else - if (double_reg) - return ((Reg & 0x10) << 1) | (Reg & 0xF); - else - return ((Reg & 0x1) << 5) | (Reg >> 1); -} - void ARMXEmitter::WriteVFPDataOp(u32 Op, ARMReg Vd, ARMReg Vn, ARMReg Vm) { bool quad_reg = Vd >= Q0; @@ -1244,38 +1244,480 @@ void ARMXEmitter::VCVT(ARMReg Dest, ARMReg Source, int flags) } } +void NEONXEmitter::VABA(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + _dbg_assert_msg_(DYNA_REC, !(Size & F_32), __FUNCTION__ " doesn't support float."); + bool register_quad = Vd >= Q0; + + Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | EncodeVn(Vn) \ + | (encodedSize(Size) << 20) | EncodeVd(Vd) | (0x71 << 4) | (register_quad << 6) | EncodeVm(Vm)); +} + +void NEONXEmitter::VABAL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, Vn >= D0 && Vn < Q0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, Vm >= D0 && Vm < Q0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + _dbg_assert_msg_(DYNA_REC, !(Size & F_32), __FUNCTION__ " doesn't support float."); + + Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (1 << 23) | EncodeVn(Vn) \ + | (encodedSize(Size) << 20) | EncodeVd(Vd) | (0x50 << 4) | EncodeVm(Vm)); +} + void NEONXEmitter::VABD(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { - _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to VABD(float)"); - _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use VABD(float) when CPU doesn't support it"); + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); bool register_quad = Vd >= Q0; - // Gets encoded as a double register - Vd = SubBase(Vd); - Vn = SubBase(Vn); - Vm = SubBase(Vm); - - Write32((0xF3 << 24) | ((Vd & 0x10) << 18) | (encodedSize(Size) << 20) | ((Vn & 0xF) << 16) \ - | ((Vd & 0xF) << 12) | (0xD << 8) | ((Vn & 0x10) << 3) | (register_quad << 6) \ - | ((Vm & 0x10) << 1) | (Vm & 0xF)); + if (Size & F_32) + Write32((0xF3 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD << 8) | EncodeVm(Vm)); + else + Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | EncodeVn(Vn) \ + | (encodedSize(Size) << 20) | EncodeVd(Vd) | (0x70 << 4) | (register_quad << 6) | EncodeVm(Vm)); } + +void NEONXEmitter::VABDL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, Vn >= D0 && Vn < Q0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, Vm >= D0 && Vm < Q0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + _dbg_assert_msg_(DYNA_REC, !(Size & F_32), __FUNCTION__ " doesn't support float."); + + Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (1 << 23) | EncodeVn(Vn) \ + | (encodedSize(Size) << 20) | EncodeVd(Vd) | (0x70 << 4) | EncodeVm(Vm)); +} + +void NEONXEmitter::VABS(NEONElementType Size, ARMReg Vd, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + bool register_quad = Vd >= Q0; + + Write32((0xF3 << 24) | (0xB1 << 16) | (encodedSize(Size) << 18) | EncodeVd(Vd) \ + | ((Size & F_32 ? 1 : 0) << 10) | (0x30 << 4) | (register_quad << 6) | EncodeVm(Vm)); +} + +void NEONXEmitter::VACGE(ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + // Only Float + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + bool register_quad = Vd >= Q0; + + Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) \ + | (0xD1 << 4) | (register_quad << 6) | EncodeVm(Vm)); +} + +void NEONXEmitter::VACGT(ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + // Only Float + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + bool register_quad = Vd >= Q0; + + Write32((0xF3 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) \ + | (0xD1 << 4) | (register_quad << 6) | EncodeVm(Vm)); +} + +void NEONXEmitter::VACLE(ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + VACGE(Vd, Vm, Vn); +} + +void NEONXEmitter::VACLT(ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + VACGT(Vd, Vn, Vm); +} + void NEONXEmitter::VADD(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { - _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to VADD(integer)"); - _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use VADD(integer) when CPU doesn't support it"); + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); bool register_quad = Vd >= Q0; - // Gets encoded as a double register - Vd = SubBase(Vd); - Vn = SubBase(Vn); - Vm = SubBase(Vm); + if (Size & F_32) + Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD0 << 4) | (register_quad << 6) | EncodeVm(Vm)); + else + Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) \ + | (0x8 << 8) | (register_quad << 6) | EncodeVm(Vm)); +} - Write32((0xF2 << 24) | ((Vd & 0x10) << 18) | (encodedSize(Size) << 20) | ((Vn & 0xF) << 16) \ - | ((Vd & 0xF) << 12) | (0x8 << 8) | ((Vn & 0x10) << 3) | (register_quad << 6) \ - | ((Vm & 0x10) << 1) | (Vm & 0xF)); +void NEONXEmitter::VADDHN(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd < Q0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, Vn >= Q0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, Vm >= Q0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + _dbg_assert_msg_(DYNA_REC, !(Size & F_32), __FUNCTION__ " doesn't support float."); + + Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) \ + | EncodeVd(Vd) | (0x80 << 4) | EncodeVm(Vm)); +} + +void NEONXEmitter::VADDL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, Vn >= D0 && Vn < Q0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, Vm >= D0 && Vm < Q0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + _dbg_assert_msg_(DYNA_REC, !(Size & F_32), __FUNCTION__ " doesn't support float."); + + Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) \ + | EncodeVd(Vd) | EncodeVm(Vm)); +} +void NEONXEmitter::VADDW(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, Vn >= Q0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, Vm >= D0 && Vm < Q0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + _dbg_assert_msg_(DYNA_REC, !(Size & F_32), __FUNCTION__ " doesn't support float."); + + Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) \ + | EncodeVd(Vd) | (1 << 8) | EncodeVm(Vm)); +} +void NEONXEmitter::VAND(ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + _dbg_assert_msg_(DYNA_REC, !(Size & F_32), __FUNCTION__ " doesn't support float."); + bool register_quad = Vd >= Q0; + + Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm)); +} +void NEONXEmitter::VBIC(ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + _dbg_assert_msg_(DYNA_REC, !(Size & F_32), __FUNCTION__ " doesn't support float."); + bool register_quad = Vd >= Q0; + + Write32((0xF2 << 24) | (1 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm)); +} +void NEONXEmitter::VBIF(ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + _dbg_assert_msg_(DYNA_REC, !(Size & F_32), __FUNCTION__ " doesn't support float."); + bool register_quad = Vd >= Q0; + + Write32((0xF3 << 24) | (3 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm)); +} +void NEONXEmitter::VBIT(ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + _dbg_assert_msg_(DYNA_REC, !(Size & F_32), __FUNCTION__ " doesn't support float."); + bool register_quad = Vd >= Q0; + + Write32((0xF3 << 24) | (2 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm)); +} +void NEONXEmitter::VBSL(ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + _dbg_assert_msg_(DYNA_REC, !(Size & F_32), __FUNCTION__ " doesn't support float."); + bool register_quad = Vd >= Q0; + + Write32((0xF3 << 24) | (1 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm)); +} +void NEONXEmitter::VCEQ(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + + bool register_quad = Vd >= Q0; + if (Size & F_32) + Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xE0 << 4) | (register_quad << 6) | EncodeVm(Vm)); + else + Write32((0xF3 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) \ + | (0x81 << 4) | (register_quad << 6) | EncodeVm(Vm)); } +void NEONXEmitter::VCEQ(NEONElementType Size, ARMReg Vd, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + + bool register_quad = Vd >= Q0; + + Write32((0xF2 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 16) \ + | EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (0x10 << 4) | (register_quad << 6) | EncodeVm(Vm)); +} +void NEONXEmitter::VCGE(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + + bool register_quad = Vd >= Q0; + if (Size & F_32) + Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xE0 << 4) | (register_quad << 6) | EncodeVm(Vm)); + else + Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) \ + | (0x31 << 4) | (register_quad << 6) | EncodeVm(Vm)); +} +void NEONXEmitter::VCGE(NEONElementType Size, ARMReg Vd, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + + bool register_quad = Vd >= Q0; + Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 16) \ + | EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (0x8 << 4) | (register_quad << 6) | EncodeVm(Vm)); +} +void NEONXEmitter::VCGT(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + + bool register_quad = Vd >= Q0; + if (Size & F_32) + Write32((0xF3 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | (0xE0 << 4) | (register_quad << 6) | EncodeVm(Vm)); + else + Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) \ + | (0x30 << 4) | (register_quad << 6) | EncodeVm(Vm)); +} +void NEONXEmitter::VCGT(NEONElementType Size, ARMReg Vd, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + + bool register_quad = Vd >= Q0; + Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) | (1 << 16) \ + | EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (register_quad << 6) | EncodeVm(Vm)); +} +void NEONXEmitter::VCLE(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + VCGE(Size, Vd, Vm, Vn); +} +void NEONXEmitter::VCLE(NEONElementType Size, ARMReg Vd, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + + bool register_quad = Vd >= Q0; + Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) | (1 << 16) \ + | EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (3 << 7) | (register_quad << 6) | EncodeVm(Vm)); +} +void NEONXEmitter::VCLS(NEONElementType Size, ARMReg Vd, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + _dbg_assert_msg_(DYNA_REC, !(Size & F_32), __FUNCTION__ " doesn't support float."); + + bool register_quad = Vd >= Q0; + Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) \ + | EncodeVd(Vd) | (1 << 10) | (register_quad << 6) | EncodeVm(Vm)); +} +void NEONXEmitter::VCLT(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + VCGT(Size, Vd, Vm, Vn); +} +void NEONXEmitter::VCLT(NEONElementType Size, ARMReg Vd, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + + bool register_quad = Vd >= Q0; + Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) | (1 << 16) \ + | EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (0x20 << 4) | (register_quad << 6) | EncodeVm(Vm)); +} +void NEONXEmitter::VCLZ(NEONElementType Size, ARMReg Vd, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + + bool register_quad = Vd >= Q0; + Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) \ + | EncodeVd(Vd) | (0x48 << 4) | (register_quad << 6) | EncodeVm(Vm)); +} +void NEONXEmitter::VCNT(NEONElementType Size, ARMReg Vd, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + _dbg_assert_msg_(DYNA_REC, Size & I_8, "Can only use I_8 with " __FUNCTION__); + + bool register_quad = Vd >= Q0; + Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) \ + | EncodeVd(Vd) | (0x90 << 4) | (register_quad << 6) | EncodeVm(Vm)); +} +void NEONXEmitter::VDUP(NEONElementType Size, ARMReg Vd, ARMReg Vm, u8 index) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + + bool register_quad = Vd >= Q0; + u32 sizeEncoded = 0, indexEncoded = 0; + if (Size & I_8) + sizeEncoded = 1; + else if (Size & I_16) + sizeEncoded = 2; + else if (Size & I_32) + sizeEncoded = 4; + if (Size & I_8) + indexEncoded <<= 1; + else if (Size & I_16) + indexEncoded <<= 2; + else if (Size & I_32) + indexEncoded <<= 3; + Write32((0xF3 << 24) | (0xD << 20) | (sizeEncoded << 16) | (indexEncoded << 16) \ + | EncodeVd(Vd) | (0xC0 << 4) | (register_quad << 6) | EncodeVm(Vm)); +} +void NEONXEmitter::VDUP(NEONElementType Size, ARMReg Vd, ARMReg Rt) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, Rt < D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + + bool register_quad = Vd >= Q0; + Vd = SubBase(Vd); + u8 sizeEncoded = 0; + if (Size & I_8) + sizeEncoded = 2; + else if (Size & I_16) + sizeEncoded = 1; + else if (Size & I_32) + sizeEncoded = 0; + + Write32((0xEE << 24) | (0x8 << 20) | ((sizeEncoded & 2) << 21) | (register_quad << 21) \ + | ((Vd & 0xF) << 16) | (Rt << 12) | (0xD1 << 4) | ((Vd & 0x10) << 3) | (1 << 4)); +} +void NEONXEmitter::VEOR(ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + bool register_quad = Vd >= Q0; + + Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm)); +} +void NEONXEmitter::VEXT(ARMReg Vd, ARMReg Vn, ARMReg Vm, u8 index) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + bool register_quad = Vd >= Q0; + + Write32((0xF2 << 24) | (0xB << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (index & 0xF) \ + | (register_quad << 6) | EncodeVm(Vm)); +} +void NEONXEmitter::VFMA(ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + _dbg_assert_msg_(DYNA_REC, cpu_info.bVFPv4, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + bool register_quad = Vd >= Q0; + + Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xC1 << 4) | (register_quad << 6) | EncodeVm(Vm)); +} +void NEONXEmitter::VFMS(ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + _dbg_assert_msg_(DYNA_REC, cpu_info.bVFPv4, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + bool register_quad = Vd >= Q0; + + Write32((0xF2 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | (0xC1 << 4) | (register_quad << 6) | EncodeVm(Vm)); +} +void NEONXEmitter::VHADD(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + _dbg_assert_msg_(DYNA_REC, !(Size & F_32), __FUNCTION__ " doesn't support float."); + + bool register_quad = Vd >= Q0; + + Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 23) | (encodedSize(Size) << 20) \ + | EncodeVn(Vn) | EncodeVd(Vd) | (register_quad << 6) | EncodeVm(Vm)); +} +void NEONXEmitter::VHSUB(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + _dbg_assert_msg_(DYNA_REC, !(Size & F_32), __FUNCTION__ " doesn't support float."); + + bool register_quad = Vd >= Q0; + + Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 23) | (encodedSize(Size) << 20) \ + | EncodeVn(Vn) | EncodeVd(Vd) | (1 << 9) | (register_quad << 6) | EncodeVm(Vm)); +} +void NEONXEmitter::VMAX(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + + bool register_quad = Vd >= Q0; + + if (Size & F_32) + Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xF0 << 4) | (register_quad << 6) | EncodeVm(Vm)); + else + Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 23) | (encodedSize(Size) << 20) \ + | EncodeVn(Vn) | EncodeVd(Vd) | (0x60 << 4) | (register_quad << 6) | EncodeVm(Vm)); +} +void NEONXEmitter::VMIN(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + + bool register_quad = Vd >= Q0; + + if (Size & F_32) + Write32((0xF2 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | (0xF0 << 4) | (register_quad << 6) | EncodeVm(Vm)); + else + Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 23) | (encodedSize(Size) << 20) \ + | EncodeVn(Vn) | EncodeVd(Vd) | (0x61 << 4) | (register_quad << 6) | EncodeVm(Vm)); +} +void NEONXEmitter::VMLA(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + + bool register_quad = Vd >= Q0; + + if (Size & F_32) + Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD1 << 4) | (register_quad << 6) | EncodeVm(Vm)); + else + Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x90 << 4) | (register_quad << 6) | EncodeVm(Vm)); +} +void NEONXEmitter::VMLS(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + + bool register_quad = Vd >= Q0; + + if (Size & F_32) + Write32((0xF2 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD1 << 4) | (register_quad << 6) | EncodeVm(Vm)); + else + Write32((0xF2 << 24) | (1 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x90 << 4) | (register_quad << 6) | EncodeVm(Vm)); +} +void NEONXEmitter::VMLAL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, Vn >= Q0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, Vm >= D0 && Vm < Q0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + _dbg_assert_msg_(DYNA_REC, !(Size & F_32), __FUNCTION__ " doesn't support float."); + + Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (encodedSize(Size) << 20) \ + | EncodeVn(Vn) | EncodeVd(Vd) | (0x80 << 4) | EncodeVm(Vm)); +} +void NEONXEmitter::VMLSL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) +{ + _dbg_assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, Vn >= Q0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, Vm >= D0 && Vm < Q0, "Pass invalid register to " __FUNCTION__); + _dbg_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use " __FUNCTION__ " when CPU doesn't support it"); + _dbg_assert_msg_(DYNA_REC, !(Size & F_32), __FUNCTION__ " doesn't support float."); + + Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (encodedSize(Size) << 20) \ + | EncodeVn(Vn) | EncodeVd(Vd) | (0xA0 << 4) | EncodeVm(Vm)); +} + void NEONXEmitter::VSUB(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) { _dbg_assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to VSUB(integer)"); @@ -1362,17 +1804,6 @@ void NEONXEmitter::VRSQRTE(NEONElementType Size, ARMReg Vd, ARMReg Vm) | ((Vm & 0x10) << 1) | (Vm & 0xF)); } -void NEONXEmitter::VEOR(ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - bool register_quad = Vd >= Q0; - Vd = SubBase(Vd); - Vn = SubBase(Vn); - Vm = SubBase(Vm); - - Write32((0xF3 << 24) | ((Vd & 0x10) << 18) | ((Vn & 0xF) << 16) - | ((Vd & 0xF) << 12) | (1 << 8) | ((Vn & 0x10) << 3) - | (register_quad << 6) | ((Vm & 0x10) << 1) | (1 << 4) | (Vm & 0xF)); -} void NEONXEmitter::VORR(ARMReg Vd, ARMReg Vn, ARMReg Vm) { bool register_quad = Vd >= Q0; diff --git a/Source/Core/Common/Src/ArmEmitter.h b/Source/Core/Common/Src/ArmEmitter.h index 71597bbfd6..2d21743a10 100644 --- a/Source/Core/Common/Src/ArmEmitter.h +++ b/Source/Core/Common/Src/ArmEmitter.h @@ -339,6 +339,12 @@ struct LiteralPool typedef const u8* JumpTarget; +u32 EncodeVd(ARMReg Vd); +u32 EncodeVn(ARMReg Vn); +u32 EncodeVm(ARMReg Vm); +// Subtracts the base from the register to give us the real one +ARMReg SubBase(ARMReg Reg); + class ARMXEmitter { friend struct OpArg; // for Write8 etc @@ -355,9 +361,6 @@ private: void WriteShiftedDataOp(u32 op, bool SetFlags, ARMReg dest, ARMReg src, Operand2 op2); void WriteSignedMultiply(u32 Op, u32 Op2, u32 Op3, ARMReg dest, ARMReg r1, ARMReg r2); - u32 EncodeVd(ARMReg Vd); - u32 EncodeVn(ARMReg Vn); - u32 EncodeVm(ARMReg Vm); void WriteVFPDataOp(u32 Op, ARMReg Vd, ARMReg Vn, ARMReg Vm); void WriteVFPDataOp6bit(u32 Op, ARMReg Vd, ARMReg Vn, ARMReg Vm, u32 bit6); @@ -530,9 +533,6 @@ public: // None of these will be created with conditional since ARM // is deprecating conditional execution of ASIMD instructions. // ASIMD instructions don't even have a conditional encoding. - - // Subtracts the base from the register to give us the real one - ARMReg SubBase(ARMReg Reg); // VFP Only void VLDR(ARMReg Dest, ARMReg Base, s16 offset); @@ -603,7 +603,6 @@ class NEONXEmitter { private: ARMXEmitter *_emit; - ARMReg SubBase(ARMReg Reg) { return _emit->SubBase(Reg); } inline void Write32(u32 value) { _emit->Write32(value); } inline u32 encodedSize(u32 value) @@ -612,7 +611,7 @@ private: return 0; else if (value & I_16) return 1; - else if (value & I_32) + else if ((value & I_32) || (value & F_32)) return 2; else if (value & I_64) return 3; @@ -628,8 +627,51 @@ public: : _emit(emit) {} + void VABA(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VABAL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); void VABD(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VABDL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VABS(NEONElementType Size, ARMReg Vd, ARMReg Vm); + void VACGE(ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VACGT(ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VACLE(ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VACLT(ARMReg Vd, ARMReg Vn, ARMReg Vm); void VADD(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VADDHN(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VADDL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VADDW(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VAND(ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VBIC(ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VBIF(ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VBIT(ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VBSL(ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VCEQ(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VCEQ(NEONElementType Size, ARMReg Vd, ARMReg Vm); + void VCGE(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VCGE(NEONElementType Size, ARMReg Vd, ARMReg Vm); + void VCGT(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VCGT(NEONElementType Size, ARMReg Vd, ARMReg Vm); + void VCLE(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VCLE(NEONElementType Size, ARMReg Vd, ARMReg Vm); + void VCLS(NEONElementType Size, ARMReg Vd, ARMReg Vm); + void VCLT(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VCLT(NEONElementType Size, ARMReg Vd, ARMReg Vm); + void VCLZ(NEONElementType Size, ARMReg Vd, ARMReg Vm); + void VCNT(NEONElementType Size, ARMReg Vd, ARMReg Vm); + void VDUP(NEONElementType Size, ARMReg Vd, ARMReg Vm, u8 index); + void VDUP(NEONElementType Size, ARMReg Vd, ARMReg Rt); + void VEOR(ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VEXT(ARMReg Vd, ARMReg Vn, ARMReg Vm, u8 index); + void VFMA(ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VFMS(ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VHADD(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VHSUB(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VMAX(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VMIN(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VMLA(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VMLS(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VMLAL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VMLSL(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); void VSUB(NEONElementType Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); void VREV64(NEONElementType Size, ARMReg Vd, ARMReg Vm); void VREV32(NEONElementType Size, ARMReg Vd, ARMReg Vm); @@ -637,7 +679,6 @@ public: void VRSQRTE(NEONElementType Size, ARMReg Vd, ARMReg Vm); - void VEOR(ARMReg Vd, ARMReg Vn, ARMReg Vm); void VORR(ARMReg Vd, ARMReg Vn, ARMReg Vm); void VLD1(NEONElementType Size, ARMReg Vd, ARMReg Rn, NEONAlignment align = ALIGN_NONE, ARMReg Rm = _PC);