From 025b447a9cc4d4611bfe997c8ea53814e63b2d8e Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sat, 15 May 2021 11:41:00 +0200 Subject: [PATCH] arm32: replace old arm emitter with vixl --- CMakeLists.txt | 71 +- core/arm_emitter/E_Branches.h | 83 - core/arm_emitter/E_DataOp.h | 1420 ----------- core/arm_emitter/E_Extend.h | 151 -- core/arm_emitter/E_LoadStore.h | 312 --- core/arm_emitter/E_Misc.h | 208 -- core/arm_emitter/E_Multiply.h | 99 - core/arm_emitter/E_Parallel.h | 112 - core/arm_emitter/E_Special.h | 74 - core/arm_emitter/E_Status.h | 99 - core/arm_emitter/E_VDataOp.h | 667 ----- core/arm_emitter/E_VLoadStore.h | 210 -- core/arm_emitter/E_VRegXfer.h | 344 --- core/arm_emitter/H_Branches.h | 85 - core/arm_emitter/H_LoadStore.h | 150 -- core/arm_emitter/H_fp.h | 102 - core/arm_emitter/H_psuedo.h | 29 - core/arm_emitter/H_state.h | 64 - core/arm_emitter/arm_coding.h | 254 -- core/arm_emitter/arm_disasm.h | 242 -- core/arm_emitter/arm_emitter.h | 128 - core/arm_emitter/arm_registers.h | 487 ---- core/core.mk | 54 +- core/deps/vixl/code-generation-scopes-vixl.h | 2 +- core/deps/vixl/platform-vixl.h | 1 + core/hw/arm7/arm7_rec_arm32.cpp | 446 ++-- core/hw/arm7/arm7_rec_arm64.cpp | 4 +- core/hw/sh4/dyna/blockmanager.h | 9 +- core/hw/sh4/dyna/driver.cpp | 2 +- core/hw/sh4/dyna/ngen.h | 14 +- core/hw/sh4/interpr/sh4_fpu.cpp | 1 - core/hw/sh4/interpr/sh4_opcodes.cpp | 2 - core/hw/sh4/sh4_core_regs.cpp | 1 - core/hw/sh4/sh4_if.h | 4 - core/hw/sh4/sh4_interpreter.h | 8 +- core/rec-ARM/ngen_arm.S | 174 -- core/rec-ARM/rec_arm.cpp | 2268 ++++++++---------- core/rec-ARM64/rec_arm64.cpp | 20 +- 38 files changed, 1288 insertions(+), 7113 deletions(-) delete mode 100644 core/arm_emitter/E_Branches.h delete mode 100755 core/arm_emitter/E_DataOp.h delete mode 100644 core/arm_emitter/E_Extend.h delete mode 100644 core/arm_emitter/E_LoadStore.h delete mode 100644 core/arm_emitter/E_Misc.h delete mode 100755 core/arm_emitter/E_Multiply.h delete mode 100644 core/arm_emitter/E_Parallel.h delete mode 100644 core/arm_emitter/E_Special.h delete mode 100644 core/arm_emitter/E_Status.h delete mode 100755 core/arm_emitter/E_VDataOp.h delete mode 100644 core/arm_emitter/E_VLoadStore.h delete mode 100644 core/arm_emitter/E_VRegXfer.h delete mode 100644 core/arm_emitter/H_Branches.h delete mode 100644 core/arm_emitter/H_LoadStore.h delete mode 100644 core/arm_emitter/H_fp.h delete mode 100644 core/arm_emitter/H_psuedo.h delete mode 100644 core/arm_emitter/H_state.h delete mode 100644 core/arm_emitter/arm_coding.h delete mode 100755 core/arm_emitter/arm_disasm.h delete mode 100644 core/arm_emitter/arm_emitter.h delete mode 100644 core/arm_emitter/arm_registers.h delete mode 100644 core/rec-ARM/ngen_arm.S diff --git a/CMakeLists.txt b/CMakeLists.txt index ad0a7a96b..7dd9811f4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,12 +29,6 @@ if(ENABLE_CTEST) include(CTest) endif() -if(MSVC) - enable_language(ASM_MASM) -else() - enable_language(ASM) -endif() - if(APPLE) set(CMAKE_Swift_LANGUAGE_VERSION 5.0) enable_language(Swift) @@ -305,14 +299,6 @@ target_sources(${PROJECT_NAME} PRIVATE core/deps/chdr/huffman.c core/deps/chdr/huffman.h) -target_sources(${PROJECT_NAME} PRIVATE - core/deps/crypto/md5.cpp - core/deps/crypto/md5.h - core/deps/crypto/sha1.cpp - core/deps/crypto/sha1.h - core/deps/crypto/sha256.cpp - core/deps/crypto/sha256.h) - target_include_directories(${PROJECT_NAME} PRIVATE core/deps/nowide/include) if(NOT FLAC_FOUND) @@ -454,29 +440,6 @@ target_sources(${PROJECT_NAME} PRIVATE core/archive/ZipArchive.cpp core/archive/ZipArchive.h) -target_sources(${PROJECT_NAME} PRIVATE - core/arm_emitter/arm_coding.h - core/arm_emitter/arm_disasm.h - core/arm_emitter/arm_emitter.h - core/arm_emitter/arm_registers.h - core/arm_emitter/E_Branches.h - core/arm_emitter/E_DataOp.h - core/arm_emitter/E_Extend.h - core/arm_emitter/E_LoadStore.h - core/arm_emitter/E_Misc.h - core/arm_emitter/E_Multiply.h - core/arm_emitter/E_Parallel.h - core/arm_emitter/E_Special.h - core/arm_emitter/E_Status.h - core/arm_emitter/E_VDataOp.h - core/arm_emitter/E_VLoadStore.h - core/arm_emitter/E_VRegXfer.h - core/arm_emitter/H_Branches.h - core/arm_emitter/H_fp.h - core/arm_emitter/H_LoadStore.h - core/arm_emitter/H_psuedo.h - core/arm_emitter/H_state.h) - target_sources(${PROJECT_NAME} PRIVATE core/cfg/cfg.cpp core/cfg/cfg.h @@ -872,7 +835,39 @@ if(USE_VULKAN AND NOT APPLE) endif() if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)") - target_sources(${PROJECT_NAME} PRIVATE core/rec-ARM/ngen_arm.S core/rec-ARM/rec_arm.cpp) + target_include_directories(${PROJECT_NAME} PRIVATE core/deps/vixl) + target_sources(${PROJECT_NAME} PRIVATE + core/rec-ARM/rec_arm.cpp + core/deps/vixl/aarch32/location-aarch32.cc + core/deps/vixl/aarch32/location-aarch32.h + core/deps/vixl/aarch32/assembler-aarch32.cc + core/deps/vixl/aarch32/assembler-aarch32.h + core/deps/vixl/aarch32/instructions-aarch32.cc + core/deps/vixl/aarch32/instructions-aarch32.h + core/deps/vixl/aarch32/constants-aarch32.cc + core/deps/vixl/aarch32/constants-aarch32.h + core/deps/vixl/aarch32/macro-assembler-aarch32.cc + core/deps/vixl/aarch32/macro-assembler-aarch32.h + core/deps/vixl/aarch32/operands-aarch32.cc + core/deps/vixl/aarch32/operands-aarch32.h + core/deps/vixl/aarch32/disasm-aarch32.cc + core/deps/vixl/aarch32/disasm-aarch32.h + core/deps/vixl/assembler-base-vixl.h + core/deps/vixl/code-buffer-vixl.cc + core/deps/vixl/code-buffer-vixl.h + core/deps/vixl/code-generation-scopes-vixl.h + core/deps/vixl/compiler-intrinsics-vixl.cc + core/deps/vixl/compiler-intrinsics-vixl.h + core/deps/vixl/cpu-features.cc + core/deps/vixl/cpu-features.h + core/deps/vixl/globals-vixl.h + core/deps/vixl/invalset-vixl.h + core/deps/vixl/macro-assembler-interface.h + core/deps/vixl/platform-vixl.h + core/deps/vixl/pool-manager.h + core/deps/vixl/pool-manager-impl.h + core/deps/vixl/utils-vixl.cc + core/deps/vixl/utils-vixl.h) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)") target_include_directories(${PROJECT_NAME} PRIVATE core/deps/vixl) target_sources(${PROJECT_NAME} PRIVATE diff --git a/core/arm_emitter/E_Branches.h b/core/arm_emitter/E_Branches.h deleted file mode 100644 index 71e545d87..000000000 --- a/core/arm_emitter/E_Branches.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * E_Branches.h - * - */ -#pragma once - - - -namespace ARM -{ - - EAPI B(u32 sImm24, ConditionCode CC=AL) - { - DECL_Id(0x0A000000); - - SET_CC; - I |= ((sImm24>>2)&0xFFFFFF); - EMIT_I; - } - - EAPI BL(u32 sImm24, ConditionCode CC=AL) - { - DECL_Id(0x0B000000); - - SET_CC; - I |= ((sImm24>>2)&0xFFFFFF); - EMIT_I; - } - - - // Note: Either X variant will switch to THUMB* if bit0 of addr is 1 - // - - EAPI BX(eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x012FFF10); - - SET_CC; - I |= (Rm&15); - EMIT_I; - } - - EAPI BLX(eReg Rm, ConditionCode CC=AL) // Form II - { - DECL_Id(0x012FFF30); - - SET_CC; - I |= (Rm&15); - EMIT_I; - } - - EAPI BXJ(eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x012FFF20); - - SET_CC; - I |= (Rm&15); - EMIT_I; - } - - - - - // This encoding looks correct, but segfaults, the pc val is align(pc,4) but this should be right in ARM - // -#if defined(_DEVEL) - EAPI BLX(u32 sImm24, bool toThumb) // Form I * H is derived so not needed, fixup sImm24 so one can just pass a real addr - { - DECL_Id(0xFA000000); - - if(toThumb) - I |= 1<<24; // SET_H - - I |= ((sImm24>>2)&0xFFFFFF); - EMIT_I; - } -#endif - - - - - -}; \ No newline at end of file diff --git a/core/arm_emitter/E_DataOp.h b/core/arm_emitter/E_DataOp.h deleted file mode 100755 index e0e8a3660..000000000 --- a/core/arm_emitter/E_DataOp.h +++ /dev/null @@ -1,1420 +0,0 @@ -/* - * - * - * {}{S} , - * := MOV | MVN - * - * {} , - * := CMP | CMN | TST | TEQ - * - * {}{S} , , - * := ADD | SUB | RSB | ADC | SBC | RSC | AND | BIC | EOR | ORR - * - * ?/> - */ -#pragma once - - - -namespace ARM -{ - - - - - - - - /* - * imm Rd,[Rn,] imm - * reg Rd,Rn,Rm shift - * rsr Rd,Rn,Rm type Rs - * - * sp.imm Rd {SP} imm - * sp.reg Rd {SP} Rm shift - * - */ - - -#if 0 -#define dpInstr(iName, iId) \ - EAPI iName (eReg Rd, eReg Rn, u32 Imm) ; \ - EAPI iName (eReg Rd, eReg Rn, eReg Rm, eShiftOp type=S_LSL, u32 Imm=0) ; \ - EAPI iName (eReg Rd, eReg Rn, eReg Rm, eShiftOp type, eReg Rs) ; -#endif - - /* - - ADC IMM 0x02A00000 - ADC REG 0x00A00000 - ADC RSR 0x00A00010 - - ADD IMM 0x02800000 - ADD REG 0x00800000 - ADD RSR 0x00800010 -ADD.SP.IMM 0x028D0000 -ADD.SP.REG 0x008D0000 - - AND IMM 0x02000000 - AND REG 0x00000000 - AND RSR 0x00000010 - - -// ASR's do not fit this pattern moved elsewhere // - - - - BIC IMM 0x03C00000 - BIC REG 0x01C00000 - BIC RSR 0x01C00010 - - CMN IMM 0x03700000 // N imm - CMN REG 0x01700000 // NMshift - CMN RSR 0x01700010 // NMtypeS - - CMP IMM 0x03500000 // N imm - CMP REG 0x01500000 - CMP RSR 0x01500010 - - - EOR IMM 0x02200000 - EOR REG 0x00200000 - EOR REG 0x00200010 - - - */ - -#define DP_PARAMS (eReg Rd, eReg Rn, ShiftOp Shift, ConditionCode CC=AL) -#define DP_RPARAMS (eReg Rd, eReg Rn, eReg Rm, ConditionCode CC=AL) - -#define DP_COMMON \ - DECL_I; \ -\ - SET_CC; \ - I |= (Rn&15)<<16; \ - I |= (Rd&15)<<12; \ - I |= (Shift&0xFFF) - -#define DP_RCOMMON \ - DECL_I; \ -\ - SET_CC; \ - I |= (Rn&15)<<16; \ - I |= (Rd&15)<<12; \ - I |= (Rm&15) - -#define DP_OPCODE(opcode) \ - I |= (opcode)<<21 - - - - EAPI AND DP_PARAMS { DP_COMMON; DP_OPCODE(DP_AND); EMIT_I; } - EAPI EOR DP_PARAMS { DP_COMMON; DP_OPCODE(DP_EOR); EMIT_I; } - EAPI SUB DP_PARAMS { DP_COMMON; DP_OPCODE(DP_SUB); EMIT_I; } - EAPI RSB DP_PARAMS { DP_COMMON; DP_OPCODE(DP_RSB); EMIT_I; } - EAPI ADD DP_PARAMS { DP_COMMON; DP_OPCODE(DP_ADD); EMIT_I; } - EAPI ADC DP_PARAMS { DP_COMMON; DP_OPCODE(DP_ADC); EMIT_I; } - EAPI SBC DP_PARAMS { DP_COMMON; DP_OPCODE(DP_SBC); EMIT_I; } - EAPI RSC DP_PARAMS { DP_COMMON; DP_OPCODE(DP_RSC); EMIT_I; } - EAPI TST DP_PARAMS { DP_COMMON; DP_OPCODE(DP_TST); EMIT_I; } - EAPI TEQ DP_PARAMS { DP_COMMON; DP_OPCODE(DP_TEQ); EMIT_I; } -// EAPI CMP DP_PARAMS { DP_COMMON; DP_OPCODE(DP_CMP); EMIT_I; } - EAPI CMN DP_PARAMS { DP_COMMON; DP_OPCODE(DP_CMN); EMIT_I; } - EAPI ORR DP_PARAMS { DP_COMMON; DP_OPCODE(DP_ORR); EMIT_I; } - EAPI MOV DP_PARAMS { DP_COMMON; DP_OPCODE(DP_MOV); EMIT_I; } - EAPI BIC DP_PARAMS { DP_COMMON; DP_OPCODE(DP_BIC); EMIT_I; } - EAPI MVN DP_PARAMS { DP_COMMON; DP_OPCODE(DP_MVN); EMIT_I; } - -#if defined(_DEVEL) && defined(_NODEF_) // These require testing -> CMP/MOV Shifter(Reg)? fmt broken? // Simple third reg type w/ no shifter - EAPI AND DP_PARAMS { DP_RCOMMON; DP_OPCODE(DP_AND); EMIT_I; } - EAPI EOR DP_PARAMS { DP_RCOMMON; DP_OPCODE(DP_EOR); EMIT_I; } - EAPI SUB DP_PARAMS { DP_RCOMMON; DP_OPCODE(DP_SUB); EMIT_I; } - EAPI RSB DP_PARAMS { DP_RCOMMON; DP_OPCODE(DP_RSB); EMIT_I; } - EAPI ADD DP_PARAMS { DP_RCOMMON; DP_OPCODE(DP_ADD); EMIT_I; } - EAPI ADC DP_PARAMS { DP_RCOMMON; DP_OPCODE(DP_ADC); EMIT_I; } - EAPI SBC DP_PARAMS { DP_RCOMMON; DP_OPCODE(DP_SBC); EMIT_I; } - EAPI RSC DP_PARAMS { DP_RCOMMON; DP_OPCODE(DP_RSC); EMIT_I; } - EAPI TST DP_PARAMS { DP_RCOMMON; DP_OPCODE(DP_TST); EMIT_I; } - EAPI TEQ DP_PARAMS { DP_RCOMMON; DP_OPCODE(DP_TEQ); EMIT_I; } - EAPI CMP DP_PARAMS { DP_RCOMMON; DP_OPCODE(DP_CMP); EMIT_I; } - EAPI CMN DP_PARAMS { DP_RCOMMON; DP_OPCODE(DP_CMN); EMIT_I; } - EAPI ORR DP_PARAMS { DP_RCOMMON; DP_OPCODE(DP_ORR); EMIT_I; } - EAPI MOV DP_PARAMS { DP_RCOMMON; DP_OPCODE(DP_MOV); EMIT_I; } - EAPI BIC DP_PARAMS { DP_RCOMMON; DP_OPCODE(DP_BIC); EMIT_I; } - EAPI MVN DP_PARAMS { DP_RCOMMON; DP_OPCODE(DP_MVN); EMIT_I; } -#endif - - - - static u32 ARMImmid8r4_enc(u32 imm32) - { - for (int i=0;i<=30;i+=2) - { - u32 immv=(imm32<>(32-i)); - if (i == 0) - immv = imm32; - if (immv<256) - { - return ((i/2)<<8) | immv; - } - } - - return -1; - } - - static u32 ARMImmid8r4(u32 imm8r4) - { - u32 rv = ARMImmid8r4_enc(imm8r4); - - verify(rv!=-1); - return rv; - } - - static bool is_i8r4(u32 i32) { return ARMImmid8r4_enc(i32) != -1; } - - - - EAPI ADD(eReg Rd, eReg Rn, eReg Rm, u32 RmLSL, bool S, ConditionCode CC=AL) - { - DECL_Id(0x00800000); - - if (S) - I |= 1<<20; - - I |= (RmLSL&31)<<7; - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - EAPI ADD(eReg Rd, eReg Rn, eReg Rm, bool S, ConditionCode CC=AL) - { - ADD(Rd,Rn,Rm,0,S,CC); - } - - EAPI ADD(eReg Rd, eReg Rn, eReg Rm, ShiftOp Shift, u32 Imm8, ConditionCode CC=AL) - { - DECL_Id(0x00800000); - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= (Rm&15); - I |= Shift<<5; - I |= (Imm8&31)<<7; - EMIT_I; - } - - EAPI ADD(eReg Rd, eReg Rn, eReg Rm, ConditionCode CC=AL) - { - ADD(Rd,Rn,Rm,false,CC); - } - - - EAPI ADD(eReg Rd, eReg Rn, s32 Imm8, ConditionCode CC=AL) - { - DECL_Id(0x02800000); - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= ARMImmid8r4(Imm8); // * 12b imm is 8b imm 4b rot. spec, add rot support! - EMIT_I; - } - - EAPI ADD(eReg Rd, eReg Rn, s32 Imm8, bool S, ConditionCode CC=AL) - { - DECL_Id(0x02800000); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= (Rd & 15) << 12; - I |= ARMImmid8r4(Imm8); - EMIT_I; - } - - EAPI ADD(eReg Rd, eReg Rn, eReg Rm, ShiftOp Shift, u32 ImmShift, bool S, ConditionCode CC=AL) - { - DECL_Id(0x00800000); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= (Rd & 15) << 12; - I |= (Rm & 15); - I |= Shift << 5; - I |= (ImmShift & 31) << 7; - EMIT_I; - } - - EAPI ADD(eReg Rd, eReg Rn, eReg Rm, ShiftOp Shift, eReg Rshift, bool S, ConditionCode CC=AL) - { - DECL_Id(0x00800010); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= (Rd & 15) << 12; - I |= (Rm & 15); - I |= (Rshift & 15) << 8; - I |= Shift << 5; - EMIT_I; - } - - EAPI ADC(eReg Rd, eReg Rn, eReg Rm, bool S, ConditionCode CC=AL) - { - DECL_Id(0x00A00000); - - if (S) - I |= 1<<20; - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - EAPI ADC(eReg Rd, eReg Rn, s32 Imm8, ConditionCode CC=AL) - { - DECL_Id(0x02A00000); - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= ARMImmid8r4(Imm8); // * 12b imm is 8b imm 4b rot. spec, add rot support! - EMIT_I; - } - - EAPI ADC(eReg Rd, eReg Rn, eReg Rm, ShiftOp Shift, u32 ImmShift, bool S, ConditionCode CC=AL) - { - DECL_Id(0x00A00000); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= (Rd & 15) << 12; - I |= (Rm & 15); - I |= Shift << 5; - I |= (ImmShift & 31) << 7; - EMIT_I; - } - - EAPI ADC(eReg Rd, eReg Rn, eReg Rm, ShiftOp Shift, eReg Rshift, bool S, ConditionCode CC=AL) - { - DECL_Id(0x00A00010); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= (Rd & 15) << 12; - I |= (Rm & 15); - I |= (Rshift & 15) << 8; - I |= Shift << 5; - EMIT_I; - } - - EAPI ADC(eReg Rd, eReg Rn, s32 Imm8, bool S, ConditionCode CC=AL) - { - DECL_Id(0x02A00000); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= (Rd & 15) << 12; - I |= ARMImmid8r4(Imm8); - EMIT_I; - } - - EAPI ADR(eReg Rd, s32 Imm8, ConditionCode CC=AL) - { - DECL_Id(0x028F0000); - - SET_CC; - I |= (Rd&15)<<12; - I |= ARMImmid8r4(Imm8); // * 12b imm is 8b imm 4b rot. spec, add rot support! - EMIT_I; - } - - EAPI ADR_Zero(eReg Rd, s32 Imm8, ConditionCode CC=AL) // Special case for subtraction of 0 - { - DECL_Id(0x024F0000); - - SET_CC; - I |= (Rd&15)<<12; - I |= ARMImmid8r4(Imm8); // * 12b imm is 8b imm 4b rot. spec, add rot support! - EMIT_I; - } - - EAPI ORR(eReg Rd, eReg Rn, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x01800000); - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - EAPI ORR(eReg Rd, eReg Rn, s32 Imm8, ConditionCode CC=AL) - { - DECL_Id(0x03800000); - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= ARMImmid8r4(Imm8); // * 12b imm is 8b imm 4b rot. spec, add rot support! - EMIT_I; - } - - EAPI ORR(eReg Rd, eReg Rn, eReg Rm, ShiftOp Shift, u32 ImmShift, bool S, ConditionCode CC=AL) - { - DECL_Id(0x01800000); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= (Rd & 15) << 12; - I |= (Rm & 15); - I |= Shift << 5; - I |= (ImmShift & 31) << 7; - EMIT_I; - } - - EAPI ORR(eReg Rd, eReg Rn, eReg Rm, ShiftOp Shift, eReg Rs, bool S, ConditionCode CC=AL) - { - DECL_Id(0x01800000); - - SET_CC; - I |= S << 20; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= (Rm&15); - I |= Shift<<5; - I |= (Rs&15)<<8; - I |= 1<<4; - EMIT_I; - } - - EAPI ORR(eReg Rd, eReg Rn, s32 Imm8, bool S, ConditionCode CC=AL) - { - DECL_Id(0x03800000); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= (Rd & 15) << 12; - I |= ARMImmid8r4(Imm8); - EMIT_I; - } - - EAPI ORR(eReg Rd, eReg Rn, eReg Rm, bool S, ShiftOp Shift, u32 Imm8, ConditionCode CC=AL) - { - DECL_Id(0x01800000); - - if (S) - I |= 1<<20; - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= (Rm&15); - I |= Shift<<5; - I |= (Imm8&31)<<7; - EMIT_I; - } - - EAPI AND(eReg Rd, eReg Rn, eReg Rm, bool S, ConditionCode CC=AL) - { - DECL_Id(0x00000000); - - if (S) - I |= 1<<20; - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - EAPI AND(eReg Rd, eReg Rn, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x00000000); - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - EAPI AND(eReg Rd, eReg Rn, s32 Imm8, ConditionCode CC=AL) - { - DECL_Id(0x02000000); - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= ARMImmid8r4(Imm8); // * 12b imm is 8b imm 4b rot. spec, add rot support! - EMIT_I; - } - - EAPI AND(eReg Rd, eReg Rn, s32 Imm8, bool S, ConditionCode CC=AL) - { - DECL_Id(0x02000000); - - if (S) - I |= 1<<20; - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= ARMImmid8r4(Imm8); // * 12b imm is 8b imm 4b rot. spec, add rot support! - EMIT_I; - } - - EAPI AND(eReg Rd, eReg Rn, eReg Rm, ShiftOp Shift, u32 ImmShift, bool S, ConditionCode CC=AL) - { - DECL_Id(0x00000000); - - if (S) - I |= 1<<20; - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= (Rm&15); - I |= Shift << 5; - I |= (ImmShift & 31) << 7; - EMIT_I; - } - - EAPI AND(eReg Rd, eReg Rn, eReg Rm, ShiftOp Shift, eReg Rshift, bool S, ConditionCode CC=AL) - { - DECL_Id(0x00000010); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= (Rd & 15) << 12; - I |= (Rm & 15); - I |= (Rshift & 15) << 8; - I |= Shift << 5; - EMIT_I; - } - - EAPI EOR(eReg Rd, eReg Rn, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x00200000); - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - EAPI EOR(eReg Rd, eReg Rn, eReg Rm, bool S, ConditionCode CC=AL) - { - DECL_Id(0x00200000); - - if (S) - I |= 1<<20; - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - EAPI EOR(eReg Rd, eReg Rn, s32 Imm8, ConditionCode CC=AL) - { - DECL_Id(0x02200000); - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= ARMImmid8r4(Imm8); // * 12b imm is 8b imm 4b rot. spec, add rot support! - EMIT_I; - } - - EAPI EOR(eReg Rd, eReg Rn, s32 Imm8, bool S, ConditionCode CC=AL) - { - DECL_Id(0x02200000); - - SET_CC; - I |= S << 20; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= ARMImmid8r4(Imm8); // * 12b imm is 8b imm 4b rot. spec, add rot support! - EMIT_I; - } - - EAPI EOR(eReg Rd, eReg Rn, eReg Rm, ShiftOp Shift, u32 ImmShift, bool S, ConditionCode CC=AL) - { - DECL_Id(0x00200000); - - if (S) - I |= 1<<20; - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= (Rm&15); - I |= Shift << 5; - I |= (ImmShift & 31) << 7; - EMIT_I; - } - - EAPI EOR(eReg Rd, eReg Rn, eReg Rm, ShiftOp Shift, eReg Rshift, bool S, ConditionCode CC=AL) - { - DECL_Id(0x00200010); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= (Rd & 15) << 12; - I |= (Rm & 15); - I |= (Rshift & 15) << 8; - I |= Shift << 5; - EMIT_I; - } - - - EAPI SUB(eReg Rd, eReg Rn, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x00400000); - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - EAPI SUB(eReg Rd, eReg Rn, s32 Imm8, bool S, ConditionCode CC=AL) - { - DECL_Id(0x02400000); - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= ARMImmid8r4(Imm8); // * 12b imm is 8b imm 4b rot. spec, add rot support! - if (S) - I |= 1<<20; - EMIT_I; - } - EAPI SUB(eReg Rd, eReg Rn, s32 Imm8, ConditionCode CC=AL) { SUB(Rd,Rn,Imm8,false,CC); } - - EAPI SUB(eReg Rd, eReg Rn, eReg Rm, ShiftOp Shift, u32 ImmShift, bool S, ConditionCode CC=AL) - { - DECL_Id(0x00400000); - - if (S) - I |= 1<<20; - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= (Rm&15); - I |= Shift << 5; - I |= (ImmShift & 31) << 7; - EMIT_I; - } - - EAPI SUB(eReg Rd, eReg Rn, eReg Rm, ShiftOp Shift, eReg Rshift, bool S, ConditionCode CC=AL) - { - DECL_Id(0x00400010); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= (Rd & 15) << 12; - I |= (Rm & 15); - I |= (Rshift & 15) << 8; - I |= Shift << 5; - EMIT_I; - } - - EAPI SBC(eReg Rd, eReg Rn, eReg Rm, bool S, ConditionCode CC=AL) - { - DECL_Id(0x00C00000); - - if (S) - I |= 1<<20; - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - EAPI SBC(eReg Rd, eReg Rn, eReg Rm, ConditionCode CC=AL) - { - SBC(Rd,Rn,Rm,false,CC); - } - - EAPI SBC(eReg Rd, eReg Rn, s32 Imm8, bool S, ConditionCode CC=AL) - { - DECL_Id(0x02C00000); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= (Rd & 15) << 12; - I |= ARMImmid8r4(Imm8); - EMIT_I; - } - - EAPI SBC(eReg Rd, eReg Rn, eReg Rm, ShiftOp Shift, u32 ImmShift, bool S, ConditionCode CC=AL) - { - DECL_Id(0x00C00000); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= (Rd & 15) << 12; - I |= (Rm & 15); - I |= Shift << 5; - I |= (ImmShift & 31) << 7; - EMIT_I; - } - - EAPI SBC(eReg Rd, eReg Rn, eReg Rm, ShiftOp Shift, eReg Rshift, bool S, ConditionCode CC=AL) - { - DECL_Id(0x00C00010); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= (Rd & 15) << 12; - I |= (Rm & 15); - I |= (Rshift & 15) << 8; - I |= Shift << 5; - EMIT_I; - } - - EAPI RSB(eReg Rd, eReg Rn, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x00600000); - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - - EAPI RSB(eReg Rd, eReg Rn, s32 Imm8, ConditionCode CC=AL) - { - DECL_Id(0x02600000); - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= ARMImmid8r4(Imm8); // * 12b imm is 8b imm 4b rot. spec, add rot support! - EMIT_I; - } - - EAPI RSB(eReg Rd, eReg Rn, s32 Imm8, bool S, ConditionCode CC=AL) - { - DECL_Id(0x02600000); - - if (S) - I |= 1<<20; - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= ARMImmid8r4(Imm8); // * 12b imm is 8b imm 4b rot. spec, add rot support! - EMIT_I; - } - - EAPI RSB(eReg Rd, eReg Rn, eReg Rm, ShiftOp Shift, u32 ImmShift, bool S, ConditionCode CC=AL) - { - DECL_Id(0x00600000); - - if (S) - I |= 1<<20; - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= (Rm&15); - I |= Shift << 5; - I |= (ImmShift & 31) << 7; - EMIT_I; - } - - EAPI RSB(eReg Rd, eReg Rn, eReg Rm, ShiftOp Shift, eReg Rshift, bool S, ConditionCode CC=AL) - { - DECL_Id(0x00600010); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= (Rd & 15) << 12; - I |= (Rm & 15); - I |= (Rshift & 15) << 8; - I |= Shift << 5; - EMIT_I; - } - - EAPI RSC(eReg Rd, eReg Rn, s32 Imm8, bool S, ConditionCode CC=AL) - { - DECL_Id(0x02E00000); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= (Rd & 15) << 12; - I |= ARMImmid8r4(Imm8); - EMIT_I; - } - - EAPI RSC(eReg Rd, eReg Rn, eReg Rm, ShiftOp Shift, u32 ImmShift, bool S, ConditionCode CC=AL) - { - DECL_Id(0x00E00000); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= (Rd & 15) << 12; - I |= (Rm & 15); - I |= Shift << 5; - I |= (ImmShift & 31) << 7; - EMIT_I; - } - - EAPI RSC(eReg Rd, eReg Rn, eReg Rm, ShiftOp Shift, eReg Rshift, bool S, ConditionCode CC=AL) - { - DECL_Id(0x00E00010); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= (Rd & 15) << 12; - I |= (Rm & 15); - I |= (Rshift & 15) << 8; - I |= Shift << 5; - EMIT_I; - } - - EAPI MVN(eReg Rd, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x01E00000); - - SET_CC; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - - EAPI MVN(eReg Rd, s32 Imm8, ConditionCode CC=AL) - { - DECL_Id(0x03E00000); - - SET_CC; - I |= (Rd&15)<<12; - I |= ARMImmid8r4(Imm8); // * 12b imm is 8b imm 4b rot. spec, add rot support! - EMIT_I; - } - - EAPI MVN(eReg Rd, s32 Imm8, bool S, ConditionCode CC=AL) - { - DECL_Id(0x03E00000); - - SET_CC; - I |= S << 20; - I |= (Rd & 15) << 12; - I |= ARMImmid8r4(Imm8); - EMIT_I; - } - - EAPI MVN(eReg Rd, eReg Rm, ShiftOp Shift, u32 Imm8, bool S, ConditionCode CC=AL) - { - DECL_Id(0x01E00000); - - SET_CC; - I |= S << 20; - I |= (Rd&15)<<12; - I |= (Rm&15); - I |= Shift<<5; - I |= (Imm8&31)<<7; - EMIT_I; - } - - EAPI MVN(eReg Rd, eReg Rm, ShiftOp Shift, eReg Rshift, bool S, ConditionCode CC=AL) - { - DECL_Id(0x01E00010); - - SET_CC; - I |= S << 20; - I |= (Rd & 15) << 12; - I |= (Rm & 15); - I |= (Rshift & 15) << 8; - I |= Shift << 5; - EMIT_I; - } - - EAPI TST(eReg Rn, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x01100000); - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rm&15); - EMIT_I; - } - - - EAPI TST(eReg Rn, u32 Imm12, ConditionCode CC=AL) - { - DECL_Id(0x03100000); - - SET_CC; - I |= (Rn&15)<<16; - I |= ARMImmid8r4(Imm12); - EMIT_I; - } - - EAPI TST(eReg Rn, s32 Imm8, bool S, ConditionCode CC=AL) - { - DECL_Id(0x03100000); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= ARMImmid8r4(Imm8); - EMIT_I; - } - - EAPI TST(eReg Rn, eReg Rm, ShiftOp Shift, u32 Imm8, bool S, ConditionCode CC=AL) - { - DECL_Id(0x01100000); - - SET_CC; - I |= S << 20; - I |= (Rn&15)<<16; - I |= (Rm&15); - I |= Shift<<5; - I |= (Imm8&31)<<7; - EMIT_I; - } - - EAPI TST(eReg Rn, eReg Rm, ShiftOp Shift, eReg Rshift, bool S, ConditionCode CC=AL) - { - DECL_Id(0x01100010); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= (Rm & 15); - I |= (Rshift & 15) << 8; - I |= Shift << 5; - EMIT_I; - } - - EAPI TEQ(eReg Rn, s32 Imm8, bool S, ConditionCode CC=AL) - { - DECL_Id(0x03300000); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= ARMImmid8r4(Imm8); - EMIT_I; - } - - EAPI TEQ(eReg Rn, eReg Rm, ShiftOp Shift, u32 Imm8, bool S, ConditionCode CC=AL) - { - DECL_Id(0x01300000); - - SET_CC; - I |= S << 20; - I |= (Rn&15)<<16; - I |= (Rm&15); - I |= Shift<<5; - I |= (Imm8&31)<<7; - EMIT_I; - } - - EAPI TEQ(eReg Rn, eReg Rm, ShiftOp Shift, eReg Rshift, bool S, ConditionCode CC=AL) - { - DECL_Id(0x01300010); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= (Rm & 15); - I |= (Rshift & 15) << 8; - I |= Shift << 5; - EMIT_I; - } - - EAPI BIC(eReg Rd, eReg Rn, s32 Imm8, ConditionCode CC=AL) - { - DECL_Id(0x03C00000); - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= ARMImmid8r4(Imm8); // * 12b imm is 8b imm 4b rot. spec, add rot support! - EMIT_I; - } - - EAPI BIC(eReg Rd, eReg Rn, s32 Imm8, bool S, ConditionCode CC=AL) - { - DECL_Id(0x03C00000); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= (Rd & 15) << 12; - I |= ARMImmid8r4(Imm8); - EMIT_I; - } - - EAPI BIC(eReg Rd, eReg Rn, eReg Rm, ShiftOp Shift, u32 ImmShift, bool S, ConditionCode CC=AL) - { - DECL_Id(0x01C00000); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= (Rd & 15) << 12; - I |= (Rm & 15); - I |= Shift << 5; - I |= (ImmShift & 31) << 7; - EMIT_I; - } - - EAPI BIC(eReg Rd, eReg Rn, eReg Rm, ShiftOp Shift, eReg Rshift, bool S, ConditionCode CC=AL) - { - DECL_Id(0x01C00010); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= (Rd & 15) << 12; - I |= (Rm & 15); - I |= (Rshift & 15) << 8; - I |= Shift << 5; - EMIT_I; - } - - EAPI BFC(eReg Rd, u8 lsb, u8 width, ConditionCode CC=AL) - { - DECL_Id(0x07C0001F); - - SET_CC; - I |= (Rd & 15) << 12; - I |= (lsb & 31) << 7; - I |= ((lsb + width - 1) & 31) << 16; - EMIT_I; - } - - EAPI BFI(eReg Rd, eReg Rn, u8 lsb, u8 width, ConditionCode CC=AL) - { - DECL_Id(0x07C00010); - - SET_CC; - I |= Rn & 15; - I |= (Rd & 15) << 12; - I |= (lsb & 31) << 7; - I |= ((lsb + width - 1) & 31) << 16; - EMIT_I; - } - - - /* - * - */ - - EAPI UBFX(eReg Rd, eReg Rm, u8 lsb, u8 width, ConditionCode CC=AL) - { - DECL_Id(0x07E00050); - verify(lsb+width<=32); - - SET_CC; - I |= (Rd&15)<<12; - I |= (Rm&15); - I |= (lsb&31)<<7; - I |= ((width-1)&31)<<16; - EMIT_I; - } - - EAPI SBFX(eReg Rd, eReg Rm, u8 lsb, u8 width, ConditionCode CC=AL) - { - DECL_Id(0x07A00050); - verify(lsb+width<=32); - - SET_CC; - I |= (Rd&15)<<12; - I |= (Rm&15); - I |= (lsb&31)<<7; - I |= ((width-1)&31)<<16; - EMIT_I; - } - - EAPI MOV(eReg Rd, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x01A00000); - - SET_CC; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - EAPI MOV(eReg Rd, eReg Rm, ShiftOp Shift, u32 Imm8, ConditionCode CC=AL) - { - DECL_Id(0x01A00000); - - SET_CC; - I |= (Rd&15)<<12; - I |= (Rm&15); - I |= Shift<<5; - I |= (Imm8&31)<<7; - EMIT_I; - } - - EAPI MOV(eReg Rd, eReg Rm, ShiftOp Shift, eReg Rs, ConditionCode CC=AL) - { - DECL_Id(0x01A00000); - - SET_CC; - I |= (Rd&15)<<12; - I |= (Rm&15); - I |= Shift<<5; - I |= (Rs&15)<<8; - I |= 1<<4; - EMIT_I; - } - - EAPI MOV(eReg Rd, eReg Rm, ShiftOp Shift, u32 Imm8, bool S, ConditionCode CC=AL) - { - DECL_Id(0x01A00000); - - SET_CC; - I |= S << 20; - I |= (Rd&15)<<12; - I |= (Rm&15); - I |= Shift<<5; - I |= (Imm8&31)<<7; - EMIT_I; - } - - EAPI MOV(eReg Rd, eReg Rm, ShiftOp Shift, eReg Rshift, bool S, ConditionCode CC=AL) - { - DECL_Id(0x01A00010); - - SET_CC; - I |= S << 20; - I |= (Rd & 15) << 12; - I |= (Rm & 15); - I |= (Rshift & 15) << 8; - I |= Shift << 5; - EMIT_I; - } - - EAPI MOVW(eReg Rd, u32 Imm16, ConditionCode CC=AL) - { - DECL_Id(0x03000000); - - SET_CC; - I |= (Imm16&0xF000)<<4; - I |= (Rd&15)<<12; - I |= (Imm16&0x0FFF); - EMIT_I; - } - - EAPI MOVT(eReg Rd, u32 Imm16, ConditionCode CC=AL) - { - DECL_Id(0x03400000); - - SET_CC; - I |= (Imm16&0xF000)<<4; - I |= (Rd&15)<<12; - I |= (Imm16&0x0FFF); - EMIT_I; - } - - EAPI MOV(eReg Rd, s32 Imm8, ConditionCode CC=AL) - { - DECL_Id(0x03A00000); - - SET_CC; - I |= (Rd&15)<<12; - I |= ARMImmid8r4(Imm8); // * 12b imm is 8b imm 4b rot. spec, add rot support! - EMIT_I; - } - - EAPI MOV(eReg Rd, s32 Imm8, bool S, ConditionCode CC=AL) - { - DECL_Id(0x03A00000); - - SET_CC; - I |= S << 20; - I |= (Rd & 15) << 12; - I |= ARMImmid8r4(Imm8); - EMIT_I; - } - - - EAPI CMP(eReg Rn, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x01500000); - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rm&15); - EMIT_I; - } - - - EAPI CMP(eReg Rn, s32 Imm8, ConditionCode CC=AL) - { - DECL_Id(0x03500000); - - SET_CC; - I |= (Rn&15)<<16; - I |= ARMImmid8r4(Imm8); // *FIXME* 12b imm is 8b imm 4b rot. spec, add rot support! - EMIT_I; - } - - EAPI CMP(eReg Rn, s32 Imm8, bool S, ConditionCode CC=AL) - { - DECL_Id(0x03500000); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= ARMImmid8r4(Imm8); - EMIT_I; - } - - EAPI CMP(eReg Rn, eReg Rm, ShiftOp Shift, eReg Rs, ConditionCode CC=AL) - { - DECL_Id(0x01500000); - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rm&15); - I |= Shift<<5; - I |= (Rs&15)<<8; - I |= 1<<4; - EMIT_I; - } - - EAPI CMP(eReg Rn, eReg Rm, ShiftOp Shift, u32 Imm8, ConditionCode CC=AL) - { - DECL_Id(0x01500000); - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rm&15); - I |= Shift<<5; - I |= (Imm8&31)<<7; - EMIT_I; - } - - EAPI CMP(eReg Rn, eReg Rm, ShiftOp Shift, u32 Imm8, bool S, ConditionCode CC=AL) - { - DECL_Id(0x01500000); - - SET_CC; - I |= S << 20; - I |= (Rn&15)<<16; - I |= (Rm&15); - I |= Shift<<5; - I |= (Imm8&31)<<7; - EMIT_I; - } - - EAPI CMP(eReg Rn, eReg Rm, ShiftOp Shift, eReg Rshift, bool S, ConditionCode CC=AL) - { - DECL_Id(0x01500010); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= (Rm & 15); - I |= (Rshift & 15) << 8; - I |= Shift << 5; - EMIT_I; - } - - EAPI CMN(eReg Rn, s32 Imm8, bool S, ConditionCode CC=AL) - { - DECL_Id(0x03600000); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= ARMImmid8r4(Imm8); - EMIT_I; - } - - EAPI CMN(eReg Rn, eReg Rm, ShiftOp Shift, u32 Imm8, bool S, ConditionCode CC=AL) - { - DECL_Id(0x01600000); - - SET_CC; - I |= S << 20; - I |= (Rn&15)<<16; - I |= (Rm&15); - I |= Shift<<5; - I |= (Imm8&31)<<7; - EMIT_I; - } - - EAPI CMN(eReg Rn, eReg Rm, ShiftOp Shift, eReg Rshift, bool S, ConditionCode CC=AL) - { - DECL_Id(0x01600010); - - SET_CC; - I |= S << 20; - I |= (Rn & 15) << 16; - I |= (Rm & 15); - I |= (Rshift & 15) << 8; - I |= Shift << 5; - EMIT_I; - } - - EAPI LSL(eReg Rd, eReg Rn, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x01A00010); - - SET_CC; - I |= (Rd&15)<<12; - I |= (Rm&15)<<8; - I |= (Rn&15)<<0; - EMIT_I; - } - EAPI LSL(eReg Rd, eReg Rm, s32 imm5, ConditionCode CC=AL) - { - DECL_Id(0x01A00000); - - SET_CC; - I |= (Rd&15)<<12; - I |= (Rm&15)<<0; - I |= (imm5&31)<<7; - EMIT_I; - } - - EAPI LSR(eReg Rd, eReg Rn, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x01A00030); - - SET_CC; - I |= (Rd&15)<<12; - I |= (Rm&15)<<8; - I |= (Rn&15)<<0; - EMIT_I; - } - - EAPI RRX(eReg Rd, eReg Rm,bool S=false, ConditionCode CC=AL) - { - DECL_Id(0x01A00060); - - if (S) - I |= 1<<20; - - SET_CC; - I |= (Rd&15)<<12; - I |= (Rm&15)<<0; - EMIT_I; - } - - - EAPI LSR(eReg Rd, eReg Rm, s32 imm5, bool S, ConditionCode CC=AL) - { - DECL_Id(0x01A00020); - - if (S) - I |= 1<<20; - - SET_CC; - I |= (Rd&15)<<12; - I |= (Rm&15)<<0; - I |= (imm5&31)<<7; - EMIT_I; - } - - EAPI LSR(eReg Rd, eReg Rm, s32 imm5, ConditionCode CC=AL) - { - LSR(Rd,Rm,imm5,false,CC); - } - - - EAPI ASR(eReg Rd, eReg Rn, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x01A00050); - - SET_CC; - I |= (Rd&15)<<12; - I |= (Rm&15)<<8; - I |= (Rn&15)<<0; - EMIT_I; - } - - EAPI ASR(eReg Rd, eReg Rm, s32 imm5, ConditionCode CC=AL) - { - DECL_Id(0x01A00040); - - SET_CC; - I |= (Rd&15)<<12; - I |= (Rm&15)<<0; - I |= (imm5&31)<<7; - EMIT_I; - } - - EAPI ROR(eReg Rd, eReg Rn, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x01A00070); - - SET_CC; - I |= (Rd&15)<<12; - I |= (Rm&15)<<8; - I |= (Rn&15)<<0; - EMIT_I; - } - - EAPI ROR(eReg Rd, eReg Rm, s32 imm5, ConditionCode CC=AL) - { - DECL_Id(0x01A00060); - - SET_CC; - I |= (Rd&15)<<12; - I |= (Rm&15)<<0; - I |= (imm5&31)<<7; - EMIT_I; - } - - - - -#undef DP_PARAMS -#undef DP_RPARAMS - -#undef DP_COMMON -#undef DP_RCOMMON - -#undef DP_OPCODE - - - -}; diff --git a/core/arm_emitter/E_Extend.h b/core/arm_emitter/E_Extend.h deleted file mode 100644 index b4d5f8c86..000000000 --- a/core/arm_emitter/E_Extend.h +++ /dev/null @@ -1,151 +0,0 @@ -/* - * E_Extend.h - * - -There are six basic instructions: - XTAB16 Extend bits[23:16] and bits[7:0] of one register to 16 bits, and add corresponding halfwordsto the values in another register. - XTAB Extend bits[ 7: 0] of one register to 32 bits, and add to the value in another register. - XTAH Extend bits[15: 0] of one register to 32 bits, and add to the value in another register. - XTB16 Extend bits[23:16] and bits[7:0] to 16 bits each. - XTB Extend bits[ 7: 0] to 32 bits. - XTH Extend bits[15: 0] to 32 bits. - -Each of the six instructions is available in the following variations, indicated by the prefixes shown: - S Sign extension, with or without addition modulo 216 or 232. - U Zero (unsigned) extension, with or without addition modulo 216 or 232. - - */ -#pragma once - - - -namespace ARM -{ - - EAPI SXTAB16(eReg Rd, eReg Rn, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x06800070); - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - EAPI SXTAB(eReg Rd, eReg Rn, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x06A00070); - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - EAPI SXTAH(eReg Rd, eReg Rn, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x06B00070); - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - EAPI SXTB16(eReg Rd, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x068F0070); - - SET_CC; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - EAPI SXTB(eReg Rd, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x06AF0070); - - SET_CC; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - EAPI SXTH(eReg Rd, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x06BF0070); - - SET_CC; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - EAPI UXTAB16(eReg Rd, eReg Rn, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x06C00070); - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - EAPI UXTAB(eReg Rd, eReg Rn, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x06E00070); - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - EAPI UXTAH(eReg Rd, eReg Rn, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x06F00070); - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - EAPI UXTB16(eReg Rd, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x06CF0070); - - SET_CC; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - EAPI UXTB(eReg Rd, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x06EF0070); - - SET_CC; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - EAPI UXTH(eReg Rd, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x06FF0070); - - SET_CC; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - -}; \ No newline at end of file diff --git a/core/arm_emitter/E_LoadStore.h b/core/arm_emitter/E_LoadStore.h deleted file mode 100644 index f01ada76d..000000000 --- a/core/arm_emitter/E_LoadStore.h +++ /dev/null @@ -1,312 +0,0 @@ -/* - * E_LoadStore.h - * - * LDR|STR{}{B}{T} Rd, - * - */ -#pragma once - - - -namespace ARM -{ - - - enum AddrMode { - - Offset, // [Rn, offset] - Base:Offset used for access, no writeback - PostIndexed, // [Rn, offset] ! - Base:Offset used for access, written back to Base reg. - PreIndexed // [Rn],offset - Base used then Base:Offset written back to Base reg. - - // offset: imm{8,12}, Rm, Rm #Shift - }; - - -#define SET_Rn I |= (Rn&15)<<16 -#define SET_Rt I |= (Rt&15)<<12 -#define SET_Rm I |= (Rm&15) - - -#define SET_Rtn SET_Rt; SET_Rn -#define SET_Rtnm SET_Rt; SET_Rn; SET_Rm - - - -#define SET_I I |= (1<<25) // Sets Register (Rather than Immediate) Addressing -#define SET_P I |= (1<<24) // If NOT Set: post-indexed, else offset||pre-indexed (W. determines which) -#define SET_U I |= (1<<23) // If SET: Offset is added to base, else its subtracted (Sign of sImm12) -#define SET_B I |= (1<<22) // If SET: Access is a byte access, else its a word access. -#define SET_W I |= (1<<21) // (P==0) [ W==0: reg. post indexed, W==1: Unprivileged T variant ] (P==1) [ W==0: Offset , W==1: Pre-indexed ] -#define SET_L I |= (1<<20) // (L==1) Load / Store - - -#define SET_AddrMode \ - if(mode==Offset) { SET_P; } \ - else if(mode==PreIndexed) { SET_P; SET_W; } - -/* -#define SET_AddrMode \ - \ - switch(mode) { \ - case PostIndexed: break; \ - case Offset: SET_P; break; \ - case PreIndexed: SET_P; SET_W; break; \ - } -*/ - - -#define SET_sImm12 \ - \ - if (0 != sImm12) { \ - if (sImm12 > 0) { SET_U; } \ - I |= (abs(sImm12) &0xFFF); \ - } - -#define SET_sImm8 \ - \ - if(0 != sImm8) { \ - verify(is_s8(sImm8)); \ - if(sImm8 > 0) { SET_U; } \ - sImm8 = abs(sImm8); \ - I |= ((u32)sImm8 &0x0F); \ - I |= ((u32)sImm8 &0xF0)<<4; \ - } - - -#define SET_Imm5 I |= ((Imm5&0x1F)<<7) -#define SET_type I |= ((type&3)<<5) - - - - - - -/* - Load/Store Encoding - - -------------------------------------------------------------------------------------- - - cond 01-IPUBWL Rn Rt addr_mode -- Load/Store Word || Unsigned Byte - - I,P,U,W - Specify type of addressing mode - L - Specify Load (L==1), else Store - B - Specify Byte (B==1), else Word - - -------------------------------------------------------------------------------------- - - cond 000-PUIWL Rn Rt addr_mode 1SH1 addr_mode -- Load/Store Halfword || Signed byte - - I,P,U,W - Specify type of addressing mode - L - Specify Load (L==1), else Store - S - Specify signed (S==1), else unsigned access - H - Specify Halfword access, else signed byte - - -------------------------------------------------------------------------------------- - - - cond 010-PUBWL Rn Rt sImm12-iiii-iiii Imm Offset - cond 011-PUBWL Rn Rt #sft sft_op 0 Rm Reg Offset - - cond 000 PU1WL Rn Rt imHI 1011 imLO Imm Offset - halfword - cond 000 PU0WL Rn Rt SBZ 1011 Rm Reg Offset - halfword - cond 000 PU1W1 Rn Rt imHI 11H1 imLO Imm Offset - signed halfword/byte - cond 000 PU0W1 Rn Rt SBZ 11H1 Rm Reg Offset - signed halfword/byte - cond 000 PU1W0 Rn Rt imHI 11S1 imLO Imm Offset - two words - cond 000 PU0W0 Rn Rt SBZ 11S1 Rm Reg Offset - two words - - cond 100-PUSWL Rn Register-list Multiple - cond 110-PUNWL Rn CRd cp_num offs8 COP & double reg xfer - - cond 0001 0B00 Rn Rt SBZ 1001 Rm Swap [byte] - - -------------------------------------------------------------------------------------- - - LDR L 010 1U001 11 Rt imm12 - *no literal stores* - LDR I 010 PU0W1 Rn Rt imm12 - STR I 010 PU0W0 Rn Rt imm12 - LDR R 011 PU0W1 Rn Rt imm5 type 0 Rm - STR R 011 PU0W0 Rn Rt imm5 type 0 Rm - - LDRT I 010 0U011 Rn Rt imm12 - STRT I 010 0U010 Rn Rt imm12 - LDRT R 011 0U011 Rn Rt imm5 type 0 Rm - STRT R 011 0U010 Rn Rt imm5 type 0 Rm - - LDRB L 010 1U101 11 Rt imm12 - *^^ - LDRB I 010 PU1W1 Rn Rt imm12 - STRB I 010 PU1W0 Rn Rt imm12 - LDRB R 011 PU1W1 Rn Rt imm5 type 0 Rm - STRB R 011 PU1W0 Rn Rt imm5 type 0 Rm - - LDRBT I - LDRBT R - - -------------------------------------------------------------------------------------- - - LDRH L 000 1U101 11 Rt imm4H 1011 imm4L - *^^ - LDRH I 000 PU1W1 Rn Rt imm4H 1011 imm4L - STRH I 000 PU1W0 Rn Rt imm4H 1011 imm4L - LDRH R 000 PU0W1 Rn Rt 0000 1011 Rm - STRH R 000 PU0W0 Rn Rt 0000 1011 Rm - - LDRD L 000 1U100 11 Rt imm4H 1101 imm4L - *^^ - LDRD I 000 PU1W0 Rn Rt imm4H 1101 imm4L - STRD I 000 PU1W0 Rn Rt imm4H 1111 imm4L - LDRD R 000 PU0W0 Rn Rt 0000 1101 Rm - STRD R 000 PU0W0 Rn Rt 0000 1111 Rm - - -------------------------------------------------------------------------------------- - - LDRHT/STRHT - - LDREX/STREX - LDREXB/STREXB - LDREXH/STREXH - LDREXD/STREXD - -------------------------------------------------------------------------------------- - - - EAPI LDR 0x04100000 // These were all the A1 Versions for Imm IIRC - EAPI LDRB 0x04500000 - EAPI LDRBT 0x06700000 - EAPI LDRD 0x00000000 - EAPI LDREX 0x01900090 - EAPI LDRH 0x00100090 - EAPI LDRSB 0x00100000 - EAPI LDRSH 0x00100000 - EAPI LDRT 0x04300000 - - EAPI STR 0x04000000 - EAPI STRB 0x04400000 - EAPI STRBT 0x06600000 - EAPI STRD 0x00000000 - EAPI STREX 0x01800090 - EAPI STRH 0x00000090 - EAPI STRT 0x04200000 - -*/ - - -extern u8* emit_opt; -extern eReg reg_addr; -extern eReg reg_dst; -extern s32 imma; - -EAPI LDR (eReg Rt, eReg Rn, s32 sImm12=0, AddrMode mode=Offset, ConditionCode CC=AL) -{ - if (emit_opt+4==(u8*)EMIT_GET_PTR() && reg_addr==Rn && imma==sImm12) - { - if (reg_dst!=Rt) - MOV(Rt,reg_dst); - } - else - { - DECL_Id(0x04100000); SET_CC; SET_Rtn; SET_AddrMode; SET_sImm12; EMIT_I; - } -} -EAPI STR (eReg Rt, eReg Rn, s32 sImm12=0, AddrMode mode=Offset, ConditionCode CC=AL) -{ - emit_opt=0;//(u8*)EMIT_GET_PTR(); - reg_addr=Rn; - reg_dst=Rt; - imma=sImm12; - DECL_Id(0x04000000); SET_CC; SET_Rtn; SET_AddrMode; SET_sImm12; EMIT_I; -} -EAPI LDRB(eReg Rt, eReg Rn, s32 sImm12=0, AddrMode mode=Offset, ConditionCode CC=AL) { DECL_Id(0x04500000); SET_CC; SET_Rtn; SET_AddrMode; SET_sImm12; SET_B; EMIT_I; } // Prob don't need SET_B, in iID -EAPI STRB(eReg Rt, eReg Rn, s32 sImm12=0, AddrMode mode=Offset, ConditionCode CC=AL) { DECL_Id(0x04400000); SET_CC; SET_Rtn; SET_AddrMode; SET_sImm12; SET_B; EMIT_I; } // Prob don't need SET_B, in iID - -EAPI LDRT (eReg Rt, eReg Rn, s32 sImm12=0, ConditionCode CC=AL) { DECL_Id(0x04300000); SET_CC; SET_Rtn; SET_sImm12; EMIT_I; } -EAPI STRT (eReg Rt, eReg Rn, s32 sImm12=0, ConditionCode CC=AL) { DECL_Id(0x04200000); SET_CC; SET_Rtn; SET_sImm12; EMIT_I; } -EAPI LDRBT(eReg Rt, eReg Rn, s32 sImm12=0, ConditionCode CC=AL) { DECL_Id(0x04700000); SET_CC; SET_Rtn; SET_sImm12; SET_B; EMIT_I; } // Prob don't need SET_B, in iID -EAPI STRBT(eReg Rt, eReg Rn, s32 sImm12=0, ConditionCode CC=AL) { DECL_Id(0x04600000); SET_CC; SET_Rtn; SET_sImm12; SET_B; EMIT_I; } // Prob don't need SET_B, in iID - - // LDR(r1,r2,r3, Offset, true, L_LSL, 5, EQ); ... LDR r1, r2, +r3 LSL #5 -EAPI LDR (eReg Rt, eReg Rn, eReg Rm, AddrMode mode=Offset, bool Add=false, ShiftOp type=S_LSL, u32 Imm5=0, ConditionCode CC=AL) { DECL_Id(0x06100000); SET_CC; SET_Rtnm; SET_AddrMode; SET_Imm5; SET_type; if(Add){ SET_U; } EMIT_I; } -EAPI STR (eReg Rt, eReg Rn, eReg Rm, AddrMode mode=Offset, bool Add=false, ShiftOp type=S_LSL, u32 Imm5=0, ConditionCode CC=AL) { DECL_Id(0x06000000); SET_CC; SET_Rtnm; SET_AddrMode; SET_Imm5; SET_type; if(Add){ SET_U; } EMIT_I; } -EAPI LDRB(eReg Rt, eReg Rn, eReg Rm, AddrMode mode=Offset, bool Add=false, ShiftOp type=S_LSL, u32 Imm5=0, ConditionCode CC=AL) { DECL_Id(0x06500000); SET_CC; SET_Rtnm; SET_AddrMode; SET_Imm5; SET_type; if(Add){ SET_U; } SET_B; EMIT_I; } // Prob don't need SET_B, in iID -EAPI STRB(eReg Rt, eReg Rn, eReg Rm, AddrMode mode=Offset, bool Add=false, ShiftOp type=S_LSL, u32 Imm5=0, ConditionCode CC=AL) { DECL_Id(0x06400000); SET_CC; SET_Rtnm; SET_AddrMode; SET_Imm5; SET_type; if(Add){ SET_U; } SET_B; EMIT_I; } // Prob don't need SET_B, in iID - -EAPI LDRT (eReg Rt, eReg Rn, eReg Rm, bool Add=false, ShiftOp type=S_LSL, u32 Imm5=0, ConditionCode CC=AL) { DECL_Id(0x06300000); SET_CC; SET_Rtnm; SET_Imm5; SET_type; if(Add){ SET_U; } EMIT_I; } -EAPI STRT (eReg Rt, eReg Rn, eReg Rm, bool Add=false, ShiftOp type=S_LSL, u32 Imm5=0, ConditionCode CC=AL) { DECL_Id(0x06200000); SET_CC; SET_Rtnm; SET_Imm5; SET_type; if(Add){ SET_U; } EMIT_I; } -EAPI LDRBT(eReg Rt, eReg Rn, eReg Rm, bool Add=false, ShiftOp type=S_LSL, u32 Imm5=0, ConditionCode CC=AL) { DECL_Id(0x06700000); SET_CC; SET_Rtnm; SET_Imm5; SET_type; if(Add){ SET_U; } SET_B; EMIT_I; } // Prob don't need SET_B, in iID -EAPI STRBT(eReg Rt, eReg Rn, eReg Rm, bool Add=false, ShiftOp type=S_LSL, u32 Imm5=0, ConditionCode CC=AL) { DECL_Id(0x06600000); SET_CC; SET_Rtnm; SET_Imm5; SET_type; if(Add){ SET_U; } SET_B; EMIT_I; } // Prob don't need SET_B, in iID - - - -// LDR Rt,[PC, #(s:+/-)Imm12] *Special Case - Literal / PC Relative -EAPI LDR(eReg Rt, s32 sImm12, ConditionCode CC=AL) { - DECL_Id(0x051F0000); SET_CC; SET_Rt; SET_sImm12; EMIT_I; -} - -// LDRB Rt,[PC, #(s:+/-)Imm12] *Special Case - Literal / PC Relative -EAPI LDRB(eReg Rt, s32 sImm12, ConditionCode CC=AL) { - DECL_Id(0x055F0000); SET_CC; SET_Rt; SET_sImm12; EMIT_I; -} - - - - -// Note: Following support Post-Indexed addressing only // - - -EAPI LDRH(eReg Rt, eReg Rn, s32 sImm8, ConditionCode CC=AL) { DECL_Id(0x005000B0); SET_CC; SET_Rtn; SET_P; SET_sImm8; EMIT_I; } -EAPI STRH(eReg Rt, eReg Rn, s32 sImm8, ConditionCode CC=AL) { DECL_Id(0x004000B0); SET_CC; SET_Rtn; SET_P; SET_sImm8; EMIT_I; } -EAPI LDRD(eReg Rt, eReg Rn, s32 sImm8, ConditionCode CC=AL) { DECL_Id(0x004000D0); SET_CC; SET_Rtn; SET_P; SET_sImm8; EMIT_I; } -EAPI STRD(eReg Rt, eReg Rn, s32 sImm8, ConditionCode CC=AL) { DECL_Id(0x004000F0); SET_CC; SET_Rtn; SET_P; SET_sImm8; EMIT_I; } -EAPI LDRSB(eReg Rt, eReg Rn, s32 sImm8 = 0, ConditionCode CC = AL) { DECL_Id(0x005000D0); SET_CC; SET_Rtn; SET_P; SET_sImm8; EMIT_I; } -EAPI LDRSH(eReg Rt, eReg Rn, s32 sImm8 = 0, ConditionCode CC = AL) { DECL_Id(0x005000F0); SET_CC; SET_Rtn; SET_P; SET_sImm8; EMIT_I; } - -EAPI LDRH(eReg Rt, eReg Rn, eReg Rm, bool Add=true,ConditionCode CC=AL) { DECL_Id(0x001000B0); SET_CC; SET_Rtnm; SET_P; if (Add) {SET_U;} EMIT_I; } -EAPI STRH(eReg Rt, eReg Rn, eReg Rm, bool Add=true,ConditionCode CC=AL) { DECL_Id(0x000000B0); SET_CC; SET_Rtnm; SET_P; if (Add) {SET_U;} EMIT_I; } -EAPI LDRD(eReg Rt, eReg Rn, eReg Rm, bool Add=true, ConditionCode CC=AL) { DECL_Id(0x000000D0); SET_CC; SET_Rtnm; SET_P; if (Add) {SET_U;} EMIT_I; } -EAPI STRD(eReg Rt, eReg Rn, eReg Rm, bool Add=true, ConditionCode CC=AL) { DECL_Id(0x000000F0); SET_CC; SET_Rtnm; SET_P; if (Add) {SET_U;} EMIT_I; } - -EAPI LDRSB(eReg Rt, eReg Rn, eReg Rm, bool Add=true,ConditionCode CC=AL) { DECL_Id(0x001000D0); SET_CC; SET_Rtnm; SET_P; if (Add) {SET_U;} EMIT_I; } -EAPI LDRSH(eReg Rt, eReg Rn, eReg Rm, bool Add=true,ConditionCode CC=AL) { DECL_Id(0x001000F0); SET_CC; SET_Rtnm; SET_P; if (Add) {SET_U;} EMIT_I; } - - -EAPI LDRH(eReg Rt, s32 sImm8, ConditionCode CC=AL) { DECL_Id(0x015F00B0); SET_CC; SET_Rt; SET_sImm8; EMIT_I; } // *Special Case - Literal / PC Relative -EAPI STRH(eReg Rt, s32 sImm8, ConditionCode CC=AL) { DECL_Id(0x014F00D0); SET_CC; SET_Rt; SET_sImm8; EMIT_I; } // *Special Case - Literal / PC Relative - - - -// TODO: {LD,ST}R{SB,EX*} && friends (If required). - - -// Must use _Reg format -EAPI PUSH(u32 RegList, ConditionCode CC=AL) -{ - DECL_Id(0x092D0000); - - SET_CC; - I |= (RegList&0xFFFF); - EMIT_I; -} - - -EAPI POP(u32 RegList, ConditionCode CC=AL) -{ - DECL_Id(0x08BD0000); - - SET_CC; - I |= (RegList&0xFFFF); - EMIT_I; -} - - -#undef SET_Rtn -#undef SET_Rtnm - -#undef SET_Rn -#undef SET_Rt -#undef SET_Rm - -#undef SET_I -#undef SET_P -#undef SET_U -#undef SET_B -#undef SET_W -#undef SET_L - -#undef SET_AddrMode - -#undef SET_sImm12 -#undef SET_sImm8 -#undef SET_Imm5 -#undef SET_type - - - - - - -}; diff --git a/core/arm_emitter/E_Misc.h b/core/arm_emitter/E_Misc.h deleted file mode 100644 index 01e8c60a9..000000000 --- a/core/arm_emitter/E_Misc.h +++ /dev/null @@ -1,208 +0,0 @@ -/* - * E_Misc.h - * - */ -#pragma once - - - -namespace ARM -{ - - /* - * Misc. Arithmetic Instructions - */ - - // Count Leading Zero's - // - EAPI CLZ(eReg Rd, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x016F0F10); - - SET_CC; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - - // Unsigned sum of absolute differences - // - EAPI USAD8(eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) - { - DECL_Id(0x0780F010); - - SET_CC; - I |= (Rd&15)<<16; - I |= (Rs&15)<<8; - I |= (Rm&15); - EMIT_I; - } - - EAPI USADA8(eReg Rd, eReg Rm, eReg Rs, eReg Rn, ConditionCode CC=AL) - { - DECL_Id(0x07800010); - - SET_CC; - I |= (Rd&15)<<16; - I |= (Rn&15)<<12; - I |= (Rs&15)<<8; - I |= (Rm&15); - EMIT_I; - } - - - - - /* - * Packing Instructions - */ - - - EAPI PKHBT(eReg Rd, eReg Rn, eReg Rm, ConditionCode CC=AL) // * shift_imm - { - DECL_Id(0x06800010); - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - - EAPI PKHTB(eReg Rd, eReg Rn, eReg Rm, ConditionCode CC=AL) // * shift_imm - { - DECL_Id(0x06800050); - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - - - /* - * Swapping Instructions - */ - - EAPI REV(eReg Rd, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x06BF0F30); - - SET_CC; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - - EAPI REV16(eReg Rd, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x06BF0FB0); - - SET_CC; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - EAPI REVSH(eReg Rd, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x06FF0F30); - - SET_CC; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - - EAPI SEL(eReg Rd, eReg Rn, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x06800FB0); - - SET_CC; - I |= (Rn&15)<<16; - I |= (Rd&15)<<12; - I |= (Rm&15); - EMIT_I; - } - - - - - - /* - * Saturate Instructions - */ - - - - // SSAT{} , #, {, } - // - EAPI SSAT(eReg Rd, u32 sat_imm, eReg Rm, u32 sft_imm, ConditionCode CC=AL) // sh&1 << 6 - { - DECL_Id(0x06A00010); - - SET_CC; - I |= (sat_imm&31)<<16; - I |= (Rd&15)<<12; - I |= (sft_imm&31)<<7; - I |= (Rm&15); - EMIT_I; - } - - // SSAT16{} , #, - // - EAPI SSAT16(eReg Rd, u32 sat_imm, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x06A00030); - - SET_CC; - I |= (sat_imm&15)<<16; - I |= (Rd&15)<<12; - I |= 15<<8; // * SBO - I |= (Rm&15); - EMIT_I; - } - - // USAT{} , #, {, } - // - EAPI USAT(eReg Rd, u32 sat_imm, eReg Rm, u32 sft_imm, ConditionCode CC=AL) - { - DECL_Id(0x06E00010); - - SET_CC; - I |= (sat_imm&31)<<16; - I |= (Rd&15)<<12; - I |= (sft_imm&31)<<7; - I |= (Rm&15); - EMIT_I; - } - - // USAT16{} , #, - // - EAPI USAT16(eReg Rd, u32 sat_imm, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x06E00030); - - SET_CC; - I |= (sat_imm&15)<<16; - I |= (Rd&15)<<12; - I |= 15<<8; // * SBO - I |= (Rm&15); - EMIT_I; - } - - - - - - - - - -}; \ No newline at end of file diff --git a/core/arm_emitter/E_Multiply.h b/core/arm_emitter/E_Multiply.h deleted file mode 100755 index e50a38366..000000000 --- a/core/arm_emitter/E_Multiply.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * E_Multiply.h - * - */ -#pragma once - - - -namespace ARM -{ - - EAPI MLA(eReg Rd, eReg Rn, eReg Rs, eReg Rm, ConditionCode CC=AL) // *FIXME* S - { - DECL_Id(0x00200090); - - SET_CC; - I |= (Rd&15)<<16; - I |= (Rn&15)<<12; - I |= (Rs&15)<<8; - I |= (Rm&15); - EMIT_I; - } - - EAPI MUL(eReg Rd, eReg Rs, eReg Rm, ConditionCode CC=AL) // *FIXME* S - { - DECL_Id(0x00000090); - - SET_CC; - I |= (Rd&15)<<16; - I |= (Rs&15)<<8; - I |= (Rm&15); - EMIT_I; - } - - EAPI UMULL(eReg Rdhi, eReg Rdlo, eReg Rs, eReg Rm, ConditionCode CC=AL) // *FIXME* S - { - DECL_Id(0x00800090); - - SET_CC; - I |= (Rdhi&15)<<16; - I |= (Rdlo&15)<<12; - I |= (Rs&15)<<8; - I |= (Rm&15); - EMIT_I; - } - - EAPI SMULL(eReg Rdhi, eReg Rdlo, eReg Rs, eReg Rm, ConditionCode CC=AL) // *FIXME* S - { - DECL_Id(0x00C00090); - - SET_CC; - I |= (Rdhi&15)<<16; - I |= (Rdlo&15)<<12; - I |= (Rs&15)<<8; - I |= (Rm&15); - EMIT_I; - } - - - - - - -#if 0 -SMLA Signed halfword Multiply Accumulate. -SMLAD Signed halfword Multiply Accumulate, Dual. -SMLAL Signed Multiply Accumulate Long. -SMLAL Signed halfword Multiply Accumulate Long. -SMLALD Signed halfword Multiply Accumulate Long, Dual. -SMLAW Signed halfword by word Multiply Accumulate. -SMLSD Signed halfword Multiply Subtract, Dual. -SMLSLD Signed halfword Multiply Subtract Long Dual. -SMMLA Signed Most significant word Multiply Accumulate. -SMMLS Signed Most significant word Multiply Subtract. -SMMUL Signed Most significant word Multiply. -SMUAD Signed halfword Multiply, Add, Dual. -SMUL Signed halfword Multiply. -SMULL Signed Multiply Long. -SMULW Signed halfword by word Multiply. -SMUSD Signed halfword Multiply, Subtract, Dual. -UMAAL Unsigned Multiply Accumulate significant Long. -UMLAL Unsigned Multiply Accumulate Long. -UMULL Unsigned Multiply Long. -#endif - - - - - - - - - - - - - - -}; \ No newline at end of file diff --git a/core/arm_emitter/E_Parallel.h b/core/arm_emitter/E_Parallel.h deleted file mode 100644 index ed0913f4a..000000000 --- a/core/arm_emitter/E_Parallel.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * E_Parallel.h - * - -ADD8 Adds each byte of the second operand register to the corresponding byte of the first operand - register to form the corresponding byte of the result. - -ADD16 Adds the top halfwords of two registers to form the top halfword of the result. - Adds the bottom halfwords of the same two registers to form the bottom halfword of the result. - -SUB8 Subtracts each byte of the second operand register from the corresponding byte of the first - operand register to form the corresponding byte of the result. - -SUB16 Subtracts the top halfword of the first operand register from the top halfword of the second operand register to form the top halfword of the result. - Subtracts the bottom halfword of the second operand registers from the bottom halfword of - the first operand register to form the bottom halfword of the result. - -ADDSUBX Does the following: - 1. Exchanges halfwords of the second operand register. - 2. Adds top halfwords and subtracts bottom halfwords. - -SUBADDX Does the following: - 1. Exchanges halfwords of the second operand register. - 2. Subtracts top halfwords and adds bottom halfwords. - - -Each of the six instructions is available in the following variations, indicated by the prefixes shown: - - S Signed arithmetic modulo 28 or 216. Sets the CPSR GE bits (see The GE[3:0] bits on page A2-13). - Q Signed saturating arithmetic. - SH Signed arithmetic, halving the results to avoid overflow. - U Unsigned arithmetic modulo 28 or 216. Sets the CPSR GE bits (see The GE[3:0] bits on page A2-13). - UQ Unsigned saturating arithmetic. - UH Unsigned arithmetic, halving the results to avoid overflow. - - -Status: - These routines require implementation if needed. - - */ -#pragma once - - -namespace ARM -{ - -#if defined(_DEVEL) && 0 - - // S - // - EAPI SADD8 (eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI SADD16 (eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI SSUB8 (eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI SSUB16 (eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI SADDSUBX(eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI SSUBADDX(eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - - - // Q - // - EAPI QADD8 (eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI QADD16 (eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI QSUB8 (eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI QSUB16 (eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI QADDSUBX(eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI QSUBADDX(eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - - - // SH - // - EAPI SHADD8 (eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI SHADD16 (eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI SHSUB8 (eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI SHSUB16 (eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI SHADDSUBX(eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI SHSUBADDX(eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - - - // U - // - EAPI UADD8 (eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI UADD16 (eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI USUB8 (eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI USUB16 (eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI UADDSUBX(eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI USUBADDX(eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - - - // UQ - // - EAPI UQADD8 (eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI UQADD16 (eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI UQSUB8 (eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI UQSUB16 (eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI UQADDSUBX(eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI UQSUBADDX(eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - - - // UH - // - EAPI UHADD8 (eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI UHADD16 (eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI UHSUB8 (eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI UHSUB16 (eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI UHADDSUBX(eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - EAPI UHSUBADDX(eReg Rd, eReg Rm, eReg Rs, ConditionCode CC=AL) ; - -#endif - - - -}; \ No newline at end of file diff --git a/core/arm_emitter/E_Special.h b/core/arm_emitter/E_Special.h deleted file mode 100644 index 7a856240e..000000000 --- a/core/arm_emitter/E_Special.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * E_Special.h - * - */ -#pragma once - - -namespace ARM -{ - - - - - - EAPI ARMERROR() - { - DECL_Id(0xFFFFFFFF); - EMIT_I; - } - - - EAPI NOP() - { - DECL_Id(0xE320F000); - EMIT_I; - } - - EAPI SVC(u32 code) - { - DECL_Id(0x0F000000); - I |= code&0xFFFFFF; - EMIT_I; - } - - EAPI BKPT() - { - DECL_Id(0x01200070); - EMIT_I; - } - -#define SWI SVC - - - - - - /* - * Synchronization & Barrier Instructions. - * - */ - - - EAPI DSB() - { - DECL_Id(0xF57FF04F); - EMIT_I; - } - - EAPI DMB() - { - DECL_Id(0xF57FF05F); - EMIT_I; - } - - EAPI ISB() - { - DECL_Id(0xF57FF06F); - EMIT_I; - } - - - -}; - diff --git a/core/arm_emitter/E_Status.h b/core/arm_emitter/E_Status.h deleted file mode 100644 index 7eb7f97a6..000000000 --- a/core/arm_emitter/E_Status.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * E_Status.h - * - */ -#pragma once - - - -namespace ARM -{ - -#if defined(_DEVEL) - - // MRS Move PSR to General-purpose Register. - // - EAPI MRS(eReg Rd, u32 R, ConditionCode CC=AL) - { - DECL_Id(0x01000000); - - SET_CC; - I |= (R&1) << 22; - I |= 15<<16; // * SBO - I |= (Rd &15)<<12; - EMIT_I; - } - - - /* MSR Move General-purpose Register to PSR. - - MSR{} CPSR_, # - MSR{} CPSR_, - MSR{} SPSR_, # - MSR{} SPSR_, - */ - - - // MSR: Immediate operand - // - EAPI MSR(u32 R, u32 fmask, u32 rot_imm, u32 imm8, ConditionCode CC=AL) - { - DECL_Id(0x03200000); - - SET_CC; - I |= (R&1) << 22; - I |= (fmask &15)<<16; - I |= 15<<12; // * SBO - I |= (rot_imm &15)<<8; - I |= (imm8 &255); - EMIT_I; - } - - - // MSR: Register operand - // - EAPI MSR(u32 R, u32 fmask, eReg Rm, ConditionCode CC=AL) - { - DECL_Id(0x01200000); - - SET_CC; - I |= (R&1) << 22; - I |= (fmask &15)<<16; - I |= 15<<12; // * SBO - I |= (Rm&15); - EMIT_I; - } - - - - // CPS Change Processor State. - // - EAPI CPS(u32 imod, u32 mmod, u32 mode) // ** [A|I|F] - { - DECL_Id(0xF1000000); - - // Note: UNconditional instruction! - I |= (imod&3)<<18; - I |= (mmod&1)<<17; - I |= (mode&15); - EMIT_I; - } - - - - - - // SETEND Modifies the CPSR endianness, E, bit, without changing any other bits in the CPSR. - // - EAPI SETEND(u32 E) - { - DECL_Id(0xF1010000); - - // Note: UNconditional instruction! - I |= (E &1) << 9; - EMIT_I; - } - -#endif - -}; diff --git a/core/arm_emitter/E_VDataOp.h b/core/arm_emitter/E_VDataOp.h deleted file mode 100755 index 81869fff1..000000000 --- a/core/arm_emitter/E_VDataOp.h +++ /dev/null @@ -1,667 +0,0 @@ -/* - * E_VDataOp.h * VFP/A.SIMD Data Processing Instruction Set Encoding - * - * V{}{}{.
} {,} , - * - * Meaning - * Q The operation uses saturating arithmetic. - * R The operation performs rounding. - * D The operation doubles the result (before accumulation, if any). - * H The operation halves the result. - * - * Meaning Typical register shape - * (none) The operands and result are all the same width. Dd, Dn, Dm - Qd, Qn, Qm - * L Long operation - result is 2 x width of both operands Qd, Dn, Dm - * N Narrow operation - result is width/2 both operands Dd, Qn, Qm - * W Wide operation - result and oper[1] are 2x width oper[2] Qd, Qn, Dm - */ -#pragma once - - -namespace ARM -{ - - - - -#define SET_Qd \ - I |= (((Qd<<1)&0x0E)<<12); \ - I |= (((Qd<<1)&0x10)<<18) - -#define SET_Qn \ - I |= (((Qn<<1)&0x0E)<<16); \ - I |= (((Qn<<1)&0x10)<<3) - -#define SET_Qm \ - I |= (((Qm<<1)&0x0E)); \ - I |= (((Qm<<1)&0x10)<<1) - - - -#define SET_Dd \ - I |= ((Dd&0x0F)<<12); \ - I |= ((Dd&0x10)<<18) - -#define SET_Dn \ - I |= ((Dn&0x0F)<<16); \ - I |= ((Dn&0x10)<<3) - -#define SET_Dm \ - I |= ((Dm&0x0F)); \ - I |= ((Dm&0x10)<<1) - - - - -#define SET_Q I |= 0x40 -#define SET_U I |= ((U&1)<<24); -#define SET_Size I |= (((Size>>4)&3)<<20) // 8,16,32 -> 0,1,2 - - -#define SET_Qdnm \ - SET_Qd; SET_Qn; SET_Qm; SET_Q - -#define SET_Ddnm \ - SET_Dd; SET_Dn; SET_Dm - -#define DECL_Qdnm - - - - - -#define VdpInstrF(viName, viId) \ - EAPI V##viName##_F32 (eFQReg Qd, eFQReg Qn, eFQReg Qm) { DECL_Id(viId); SET_Qdnm; EMIT_I; } \ - EAPI V##viName##_F32 (eFDReg Dd, eFDReg Dn, eFDReg Dm) { DECL_Id(viId); SET_Ddnm; EMIT_I; } - - -#define VdpInstrI_EOR(viName, viId) \ - EAPI V##viName (eFQReg Qd, eFQReg Qn, eFQReg Qm) { DECL_Id(viId); SET_Qdnm; EMIT_I; } \ - EAPI V##viName (eFDReg Dd, eFDReg Dn, eFDReg Dm) { DECL_Id(viId); SET_Ddnm; EMIT_I; } \ - -#define VdpInstrI(viName, viId) \ - EAPI V##viName (eFQReg Qd, eFQReg Qn, eFQReg Qm, u32 Size=32) { DECL_Id(viId); SET_Qdnm; SET_Size; EMIT_I; } \ - EAPI V##viName (eFDReg Dd, eFDReg Dn, eFDReg Dm, u32 Size=32) { DECL_Id(viId); SET_Ddnm; SET_Size; EMIT_I; } \ - \ - EAPI V##viName##_I8 (eFQReg Qd, eFQReg Qn, eFQReg Qm) { V##viName (Qd, Qn, Qm, 8); } \ - EAPI V##viName##_I8 (eFDReg Dd, eFDReg Dn, eFDReg Dm) { V##viName (Dd, Dn, Dm, 8); } \ - EAPI V##viName##_I16 (eFQReg Qd, eFQReg Qn, eFQReg Qm) { V##viName (Qd, Qn, Qm, 16); } \ - EAPI V##viName##_I16 (eFDReg Dd, eFDReg Dn, eFDReg Dm) { V##viName (Dd, Dn, Dm, 16); } \ - EAPI V##viName##_I32 (eFQReg Qd, eFQReg Qn, eFQReg Qm) { V##viName (Qd, Qn, Qm, 32); } \ - EAPI V##viName##_I32 (eFDReg Dd, eFDReg Dn, eFDReg Dm) { V##viName (Dd, Dn, Dm, 32); } - -#define VdpInstrU(viName, viId) \ - EAPI V##viName (eFQReg Qd, eFQReg Qn, eFQReg Qm, u32 Size=32, u32 U=0) { DECL_Id(viId); SET_Qdnm; SET_U; SET_Size; EMIT_I; } \ - EAPI V##viName (eFDReg Dd, eFDReg Dn, eFDReg Dm, u32 Size=32, u32 U=0) { DECL_Id(viId); SET_Ddnm; SET_U; SET_Size; EMIT_I; } \ - \ - EAPI V##viName##_U8 (eFQReg Qd, eFQReg Qn, eFQReg Qm) { V##viName (Qd, Qn, Qm, 8, 1); } \ - EAPI V##viName##_U8 (eFDReg Dd, eFDReg Dn, eFDReg Dm) { V##viName (Dd, Dn, Dm, 8, 1); } \ - EAPI V##viName##_S8 (eFQReg Qd, eFQReg Qn, eFQReg Qm) { V##viName (Qd, Qn, Qm, 8, 0); } \ - EAPI V##viName##_S8 (eFDReg Dd, eFDReg Dn, eFDReg Dm) { V##viName (Dd, Dn, Dm, 8, 0); } \ - EAPI V##viName##_U16 (eFQReg Qd, eFQReg Qn, eFQReg Qm) { V##viName (Qd, Qn, Qm, 16, 1); } \ - EAPI V##viName##_U16 (eFDReg Dd, eFDReg Dn, eFDReg Dm) { V##viName (Dd, Dn, Dm, 16, 1); } \ - EAPI V##viName##_S16 (eFQReg Qd, eFQReg Qn, eFQReg Qm) { V##viName (Qd, Qn, Qm, 16, 0); } \ - EAPI V##viName##_S16 (eFDReg Dd, eFDReg Dn, eFDReg Dm) { V##viName (Dd, Dn, Dm, 16, 0); } \ - EAPI V##viName##_U32 (eFQReg Qd, eFQReg Qn, eFQReg Qm) { V##viName (Qd, Qn, Qm, 32, 1); } \ - EAPI V##viName##_U32 (eFDReg Dd, eFDReg Dn, eFDReg Dm) { V##viName (Dd, Dn, Dm, 32, 1); } \ - EAPI V##viName##_S32 (eFQReg Qd, eFQReg Qn, eFQReg Qm) { V##viName (Qd, Qn, Qm, 32, 0); } \ - EAPI V##viName##_S32 (eFDReg Dd, eFDReg Dn, eFDReg Dm) { V##viName (Dd, Dn, Dm, 32, 0); } - -//# define VdpInstrImm (viName, viId) - - - // Another three or four like the above .. immN, above w/ size, F32 w/ sz - - -/* - * A.SIMD Parallel Add/Sub - * - Vector Add VADD (integer), VADD (floating-point) - Vector Add and Narrow, returning High Half VADDHN - Vector Add Long, Vector Add Wide VADDL, VADDW - Vector Halving Add, Vector Halving Subtract VHADD, VHSUB - Vector Pairwise Add and Accumulate Long VPADAL - Vector Pairwise Add VPADD (integer) , VPADD (floating-point) - Vector Pairwise Add Long VPADDL - Vector Rounding Add & Narrow, returning High Half VRADDHN - Vector Rounding Halving Add VRHADD - Vector Rounding Subtract & Narrow, ret. High Half VRSUBHN - Vector Saturating Add VQADD - Vector Saturating Subtract VQSUB - Vector Subtract VSUB (integer), VSUB (floating-point) - Vector Subtract and Narrow, returning High Half VSUBHN - Vector Subtract Long, Vector Subtract Wide VSUBL, VSUBW -*/ - - VdpInstrI(ADD, 0xF2000800) - VdpInstrF(ADD, 0xF2000D00) - - - VdpInstrI(HADD, 0xF2000800) - VdpInstrI(HSUB, 0xF2000A00) - - VdpInstrI(PADD, 0xF2000B10) - VdpInstrF(PADD, 0xF3000D00) - - - VdpInstrU(RHADD, 0xF3000100) - VdpInstrU(QADD, 0xF2000010) - VdpInstrU(QSUB, 0xF3000210) - - VdpInstrI(SUB, 0xF3000800) - VdpInstrF(SUB, 0xF2200D00) - - - // VADD I { DECL_Id(0xF2000800); SET_Qdnm; EMIT_I; } - // VADD F { DECL_Id(0xF2000D00); SET_Qdnm; EMIT_I; } - // VADDHN { DECL_Id(0xF2800400); SET_Qdnm; EMIT_I; } // DQQ - // VADD{L,W} { DECL_Id(0xF2800000); SET_Qdnm; EMIT_I; } // QDD || QQD - // VH{ADD,SUB} { DECL_Id(0xF2000000); SET_Qdnm; EMIT_I; } - // VPADAL { DECL_Id(0xF3B00600); SET_Qdnm; EMIT_I; } // QdQm || DdDm - // VPADD I { DECL_Id(0xF2000B10); SET_Qdnm; EMIT_I; } // DDD only - // VPADD F { DECL_Id(0xF3000D00); SET_Qdnm; EMIT_I; } // DDD only - // VPADDL { DECL_Id(0xF3B00200); SET_Qdnm; EMIT_I; } // QdQm || DdDm - // VRADDHN { DECL_Id(0xF3800400); SET_Qdnm; EMIT_I; } // DQQ - // VRHADD { DECL_Id(0xF3000100); SET_Qdnm; EMIT_I; } - // VRSUBHN { DECL_Id(0xF3800600); SET_Qdnm; EMIT_I; } // DQQ - // VQADD { DECL_Id(0xF2000010); SET_Qdnm; EMIT_I; } - // VQSUB { DECL_Id(0xF3000210); SET_Qdnm; EMIT_I; } - // VSUB I { DECL_Id(0xF3000800); SET_Qdnm; EMIT_I; } - // VSUB F { DECL_Id(0xF2200D00); SET_Qdnm; EMIT_I; } - // VSUBHN { DECL_Id(0xF2800600); SET_Qdnm; EMIT_I; } // DQQ - // VSUB{L,W} { DECL_Id(0xF2800200); SET_Qdnm; EMIT_I; } // QDD || QQD - - - - - - -/* - * A.SIMD Bitwise - * - Vector Bitwise AND VAND (register) - Vector Bitwise Bit Clear (AND complement) VBIC (immediate), VBIC (register) - Vector Bitwise Exclusive OR VEOR - Vector Bitwise Move VMOV (immediate), VMOV (register) - Vector Bitwise NOT VMVN (immediate), VMVN (register) - Vector Bitwise OR VORR (immediate), VORR (register) - Vector Bitwise OR NOT VORN (register) - Vector Bitwise Insert if False VBIF - Vector Bitwise Insert if True VBIT - Vector Bitwise Select VBSL -*/ - - VdpInstrI(AND, 0xF2000110) - VdpInstrI(BIC, 0xF2100110) - VdpInstrI_EOR(EOR, 0xF3000110) - VdpInstrI_EOR(BSL, 0xF3100110) - VdpInstrI_EOR(BIT, 0xF3200110) - VdpInstrI_EOR(BIF, 0xF3300110) - VdpInstrI(ORN, 0xF2300110) - // VAND R { DECL_Id(0xF2000110); SET_Qdnm; EMIT_I; } - // VBIC R { DECL_Id(0xF2100110); SET_Qdnm; EMIT_I; } - // VEOR { DECL_Id(0xF3000110); SET_Qdnm; EMIT_I; } - // VBSL { DECL_Id(0xF3100110); SET_Qdnm; EMIT_I; } - // VBIT { DECL_Id(0xF3200110); SET_Qdnm; EMIT_I; } - // VBIF { DECL_Id(0xF3300110); SET_Qdnm; EMIT_I; } - // VORN R { DECL_Id(0xF2300110); SET_Qdnm; EMIT_I; } - - // VBIC I { DECL_Id(0xF2800030); SET_Qd; SET_IMM??; EMIT_I; } - // VMOV I { DECL_Id(0xF2800010); SET_Qd; SET_IMM??; EMIT_I; } - // VMVN I { DECL_Id(0xF2800030); SET_Qd; SET_IMM??; EMIT_I; } - // VORR I { DECL_Id(0xF2800010); SET_Qd; SET_IMM??; EMIT_I; } - // VAND I VBIC I - // VORN I VORR I - - - - - -/* - * A.SIMD comparison - * - Vector Absolute Compare VACGE, VACGT, VACLE,VACLT - Vector Compare Equal VCEQ (register) - Vector Compare Equal to Zer VCEQ (immediate #0) - Vector Compare Greater Than or Equal VCGE (register) - Vector Compare Greater Than or Equal to Zero VCGE (immediate #0) - Vector Compare Greater Than VCGT (register) - Vector Compare Greater Than Zero VCGT (immediate #0) - Vector Compare Less Than or Equal to Zero VCLE (immediate #0) - Vector Compare Less Than Zero VCLT (immediate #0) - Vector Test Bits VTST -*/ - // VAC{COND} { DECL_Id(0xF3000E10); SET_Vdnm; EMIT_I; } - - // VCEQ { DECL_Id(0xF3000810); SET_Vdnm; EMIT_I; } .F32 { DECL_Id(0xF2000E00); SET_Vdnm; EMIT_I; } - VdpInstrI(CEQ, 0xF3200810) -// VdpInstrF(CEQ, 0xF2000e00) - // VCGE { DECL_Id(0xF2000310); SET_Vdnm; EMIT_I; } .F32 { DECL_Id(0xF3000E00); SET_Vdnm; EMIT_I; } - VdpInstrI(CGE, 0xF2200310) -// VdpInstrF(CGE, 0xF3000e00) - // VCGT { DECL_Id(0xF2000300); SET_Vdnm; EMIT_I; } .F32 { DECL_Id(0xF3200E00); SET_Vdnm; EMIT_I; } - //*SEB* 0xF220030 for S32, 0xF3200300 for U32, 0xF2000300 is for S8. - VdpInstrI(CGT, 0xF2200300) - //VdpInstrF(CGT, 0xF3200e00) - // VCLE { DECL_Id(0xF3B10180); SET_Vdnm; EMIT_I; } // R is VCGE w/ operands reversed - // VCLT { DECL_Id(0xF3B10200); SET_Vdnm; EMIT_I; } // R is VCGT w/ operands reversed - // VTST { DECL_Id(0xF2000810); SET_Vdnm; EMIT_I; } - - // VCEQZ { DECL_Id(0xF3B10100); SET_Vd; SET_Vm; EMIT_I; } - // VCGEZ { DECL_Id(0xF3B10080); SET_Vd; SET_Vm; EMIT_I; } - // VCGTZ { DECL_Id(0xF3B10000); SET_Vd; SET_Vm; EMIT_I; } - - - - - - -/* - * A.SIMD shift ** SET_imm6; needed for non Vdnm - * - Vector Saturating Rounding Shift Left VQRSHL - Vector Saturating Rounding Shift Right and Narrow VQRSHRN, VQRSHRUN - Vector Saturating Shift Left VQSHL (register), VQSHL, VQSHLU (immediate) - Vector Saturating Shift Right and Narrow VQSHRN, VQSHRUN - Vector Rounding Shift Left VRSHL - Vector Rounding Shift Right VRSHR - Vector Rounding Shift Right and Accumulate VRSRA - Vector Rounding Shift Right and Narrow VRSHRN - Vector Shift Left VSHL (immediate) on page A8-750 VSHL (register) - Vector Shift Left Long VSHLL - Vector Shift Right VSHR - Vector Shift Right and Narrow VSHRN - Vector Shift Left and Insert VSLI - Vector Shift Right and Accumulate VSRA - Vector Shift Right and Insert VSRI -*/ - // VQRSHL { DECL_Id(0xF SET_Vdnm; EMIT_I; } // * TODO - // VQRSHRN { DECL_Id(0xF SET_Vd; SET_Vm; EMIT_I; } - // VQRSHRUN { DECL_Id(0xF SET_Vd; SET_Vm; EMIT_I; } - // VQSHL R { DECL_Id(0xF SET_Vdnm; EMIT_I; } - // VQSHL I { DECL_Id(0xF SET_Vd; SET_Vm; EMIT_I; } - // VQSHLU { DECL_Id(0xF SET_Vd; SET_Vm; EMIT_I; } - // VQSHRN { DECL_Id(0xF SET_Vd; SET_Vm; EMIT_I; } - // VQSHRUN { DECL_Id(0xF SET_Vd; SET_Vm; EMIT_I; } - // VRSHL { DECL_Id(0xF SET_Vdnm; EMIT_I; } - // VRSHR { DECL_Id(0xF SET_Vd; SET_Vm; EMIT_I; } - // VRSRA { DECL_Id(0xF SET_Vd; SET_Vm; EMIT_I; } - // VRSHRN { DECL_Id(0xF SET_Vd; SET_Vm; EMIT_I; } - // VSHL I { DECL_Id(0xF SET_Vd; SET_Vm; EMIT_I; } - // VSHL R { DECL_Id(0xF SET_Vdnm; EMIT_I; } - // VSHLL { DECL_Id(0xF SET_Vd; SET_Vm; EMIT_I; } - // VSHR { DECL_Id(0xF SET_Vd; SET_Vm; EMIT_I; } - // VSHRN { DECL_Id(0xF SET_Vd; SET_Vm; EMIT_I; } - // VSLI { DECL_Id(0xF SET_Vd; SET_Vm; EMIT_I; } - // VSRA { DECL_Id(0xF SET_Vd; SET_Vm; EMIT_I; } - // VSRI { DECL_Id(0xF SET_Vd; SET_Vm; EMIT_I; } - - - - -/* - * A.SIMD multiply - * - Vector Multiply Accumulate VMLA, VMLAL, VMLS, VMLSL (integer) , VMLA, VMLS (floating-point), VMLA, VMLAL, VMLS, VMLSL (by scalar) - Vector Multiply Accumulate Long - Vector Multiply Subtract - Vector Multiply Subtract Long - Vector Multiply VMUL, VMULL (integer and polynomial) - Vector Multiply Long VMUL (floating-point) on page A8-664 VMUL, VMULL (by scalar) - Vector Saturating Doubling Multiply Accumulate Long VQDMLAL, VQDMLSL - Vector Saturating Doubling Multiply Subtract Long - Vector Saturating Doubling Multiply Returning High Half VQDMULH - Vector Saturating Rounding Doubling Multiply Ret. High Half VQRDMULH - Vector Saturating Doubling Multiply Long VQDMULL -*/ - - VdpInstrI(MLA, 0xF2000900) - VdpInstrI(MLS, 0xF3000900) - - VdpInstrF(MLA, 0xF2000D10) - VdpInstrF(MLS, 0xF2200D10) - - //by scalar - //this should be really qd,dn,sm not dm - EAPI VMUL_F32(eFQReg Qd,eFQReg Qn, eFDReg Dm, int idx) - { - DECL_Id(0xF2800840); - - SET_Qd; - SET_Qn; - I |= 1<<8; //SET_F - - SET_Dm; - I |= 0x1<<24; //SET_Q not compatible - - I |= 2<<20; //size to 32 - - I |= Dm&15; //only lower 15 regs are avail - - - //set register sub index - if (idx) - I |= 1<<5; - - EMIT_I; - } - - EAPI VMLA_F32(eFQReg Qd,eFQReg Qn, eFDReg Dm, int idx) - { - DECL_Id(0xF2800040); - - SET_Qd; - SET_Qn; - I |= 1<<8; //SET_F - - SET_Dm; - I |= 0x1<<24; //SET_Q not compatible - - I |= 2<<20; //size to 32 - - I |= Dm&15; //only lower 15 regs are avail - - - //set register sub index - if (idx) - I |= 1<<5; - - EMIT_I; - } -// VdpInstrU(MLAL, 0xF2000D10) *QDD -// VdpInstrU(MLSL, 0xF2200D10) *QDD - - - VdpInstrI(MUL, 0xF2000910) - - VdpInstrF(MUL, 0xF3000D10) - -// VdpInstrU(MULL, 0xF3000D10) *QDD - - // VMLA I { DECL_Id(0xF2000900); SET_Vdnm; EMIT_I; } // * I |= ((size&3)<<20) - // VMLS I { DECL_Id(0xF3000900); SET_Vdnm; EMIT_I; } // * I |= ((size&3)<<20) - // VMLAL I { DECL_Id(0xF2800800); SET_Vdnm; EMIT_I; } // * I |= ((size&3)<<20) * I |= ((U&1)<<24) - // VMLSL I { DECL_Id(0xF2800A00); SET_Vdnm; EMIT_I; } // * I |= ((size&3)<<20) * I |= ((U&1)<<24) - - // VMLA F { DECL_Id(0xF2000D10); SET_Vdnm; EMIT_I; } // * I |= ((sz&1)<<20) - // VMLS F { DECL_Id(0xF2200D10); SET_Vdnm; EMIT_I; } // * I |= ((sz&1)<<20) - - // VMLA S { DECL_Id(0xF2800040); SET_Vdnm; EMIT_I; } // * I |= ((size&3)<<20) - // VMLS S { DECL_Id(0xF2800440); SET_Vdnm; EMIT_I; } // * I |= ((size&3)<<20) - // CMLAL S { DECL_Id(0xF2800240); SET_Vdnm; EMIT_I; } // * I |= ((size&3)<<20) * I |= ((U&1)<<24) - // VMLSL S { DECL_Id(0xF2800640); SET_Vdnm; EMIT_I; } // * I |= ((size&3)<<20) * I |= ((U&1)<<24) - - // VMUL IP { DECL_Id(0xF2000910); SET_Vdnm; EMIT_I; } // * I |= ((size&3)<<20) * I |= 1<<24 for polynomial - // VMULL IP { DECL_Id(0xF2800C00); SET_Vdnm; EMIT_I; } // * I |= ((size&3)<<20) * I |= 1<<9 for polynomial * I |= ((U&1)<<24) - - // VMUL F { DECL_Id(0xF3000D10); SET_Vdnm; EMIT_I; } // * I |= ((sz&1)<<20) - - // VMUL S { DECL_Id(0xF2800840); SET_Vdnm; EMIT_I; } // * I |= ((size&3)<<20) - // VMULL S { DECL_Id(0xF2800A40); SET_Vdnm; EMIT_I; } // * I |= ((size&3)<<20) * I |= ((U&1)<<24) - // VQDMLAL { DECL_Id(0xF2800900); SET_Vdnm; EMIT_I; } // * I |= ((size&3)<<20) - // VQDMLSL { DECL_Id(0xF2800B00); SET_Vdnm; EMIT_I; } // * I |= ((size&3)<<20) - // VQDMULH { DECL_Id(0xF2000B00); SET_Vdnm; EMIT_I; } // * I |= ((size&3)<<20) - // VQRDMULH { DECL_Id(0xF3000B00); SET_Vdnm; EMIT_I; } // * I |= ((size&3)<<20) - // VQDMULL { DECL_Id(0xF2800D00); SET_Vdnm; EMIT_I; } // * I |= ((size&3)<<20) - - - - - - - -/* - * A.SIMD misc - * - Vector Absolute Difference and Accumulate VABA, VABAL - Vector Absolute Difference VABD, VABDL (integer) , VABD (floating-point) - Vector Absolute VABS - Vector Convert between floating-point and fixed point VCVT (between floating-point and fixed-point, Advanced SIMD) - Vector Convert between floating-point and integer VCVT (between floating-point and integer, Advanced SIMD) - Vector Convert between half-precision and single-precision VCVT (between half-precision and single-precision, Advanced SIMD) - Vector Count Leading Sign Bits VCLS - Vector Count Leading Zeros VCLZ - Vector Count Set Bits VCNT - Vector Duplicate scalar VDUP (scalar) - Vector Extract VEXT - Vector Move and Narrow VMOVN - Vector Move Long VMOVL - Vector Maximum, Minimum VMAX, VMIN (integer) , VMAX, VMIN (floating-point) - Vector Negate VNEG - Vector Pairwise Maximum, Minimum VPMAX, VPMIN (integer) , VPMAX, VPMIN (floating-point) - Vector Reciprocal Estimate VRECPE - Vector Reciprocal Step VRECPS - Vector Reciprocal Square Root Estimate VRSQRTE - Vector Reciprocal Square Root Step VRSQRTS - Vector Reverse VREV16, VREV32, VREV64 - Vector Saturating Absolute VQABS - Vector Saturating Move and Narrow VQMOVN, VQMOVUN - Vector Saturating Negate VQNEG - Vector Swap VSWP - Vector Table Lookup VTBL, VTBX - Vector Transpose VTRN - Vector Unzip VUZP - Vector Zip VZIP -*/ - // VABA { DECL_Id(0xF2000710); // WIP - // VABAL { DECL_Id(0xF2800500); - - // VABD I { DECL_Id(0xF); - // VABDL I { DECL_Id(0xF); - - // VABD F { DECL_Id(0xF); - - // VABS { DECL_Id(0xF); SET_Vd; SET_Vm; EMIT_I; } - - EAPI VSQRT_F32(eFSReg Sd, eFSReg Sm, ConditionCode CC=AL) - { - DECL_Id(0x0EB10AC0); SET_CC; - - I |= ((Sd&0x1E)<<11) | ((Sd&1)<<22); - I |= ((Sm&0x1E)>>1) | ((Sm&1)<<5); - EMIT_I; - } - - EAPI VABS_F32(eFSReg Sd, eFSReg Sm, ConditionCode CC=AL) - { - DECL_Id(0x0EB00AC0); SET_CC; - - I |= ((Sd&0x1E)<<11) | ((Sd&1)<<22); - I |= ((Sm&0x1E)>>1) | ((Sm&1)<<5); - EMIT_I; - } - EAPI VNEG_F32(eFSReg Sd, eFSReg Sm, ConditionCode CC=AL) - { - DECL_Id(0x0EB10A40); SET_CC; - - I |= ((Sd&0x1E)<<11) | ((Sd&1)<<22); - I |= ((Sm&0x1E)>>1) | ((Sm&1)<<5); - EMIT_I; - } - - //imm move, fpu - EAPI VMOV(eFSReg Sd, u32 imm8_fpu, ConditionCode CC=AL) - { - DECL_Id(0x0EB10A00); SET_CC; - - I |= (imm8_fpu&0x0F); //bits 3:0 - I |= (imm8_fpu&0xF0)<<12; //bits 19:16 - - I |= ((Sd&0x1E)<<11) | ((Sd&1)<<22); - EMIT_I; - } - - const u32 fpu_imm_1=0x70;//01110000 - - EAPI VCMP_F32(eFSReg Sd, eFSReg Sm, ConditionCode CC=AL) - { - DECL_Id(0x0EB40A40); SET_CC; - - I |= ((Sd&0x1E)<<11) | ((Sd&1)<<22); - I |= ((Sm&0x1E)>>1) | ((Sm&1)<<5); - EMIT_I; - } - - VdpInstrF(CEQ, 0xF2000E00) - VdpInstrF(CGE, 0xF3000E00) - VdpInstrF(CGT, 0xF3200E00) - - // VCVT FFP { DECL_Id(0xF); SET_Vd; SET_Vm; EMIT_I; } - // VCVT FI { DECL_Id(0xF); SET_Vd; SET_Vm; EMIT_I; } - // VCVT HS { DECL_Id(0xF); SET_Vd; SET_Vm; EMIT_I; } - - // VCLS { DECL_Id(0xF); SET_Vd; SET_Vm; EMIT_I; } - // VCLZ { DECL_Id(0xF); SET_Vd; SET_Vm; EMIT_I; } - // VCNT { DECL_Id(0xF); SET_Vd; SET_Vm; EMIT_I; } - - - // VEXT { DECL_Id(0xF); SET_Vdnm; EMIT_I; } - - // VMAX I { DECL_Id(0xF); - // VMIN I { DECL_Id(0xF); - // VMAX F { DECL_Id(0xF); - // VMIN F { DECL_Id(0xF); - // VNEG { DECL_Id(0xF); - - // VPMAX I { DECL_Id(0xF); - // VPMIN I { DECL_Id(0xF); - // VPMAX F { DECL_Id(0xF); - // VPMIN F { DECL_Id(0xF); - - // VRECPE { DECL_Id(0xF3B30400); SET_Vd; SET_Vm; EMIT_I; } // size&3<<18 - // VRECPS { DECL_Id(0xF2000F10); SET_Vdnm; EMIT_I; } // sz&1<<20 (.F32) - // VRSQRTE { DECL_Id(0xF3B30480); SET_Vd; SET_Vm; EMIT_I; } // size&3<<18 F&1<<8 *** - // VRSQRTS { DECL_Id(0xF2200F10); SET_Vdnm; EMIT_I; } // sz&1<<20 (.F32) - // VREVsz { DECL_Id(0xF3B00000); SET_Vd; SET_Vm; EMIT_I; } // size&3<<18 op&3<<7 *** - - // VQABS { DECL_Id(0xF3B00700); SET_Vd; SET_Vm; EMIT_I; } // size&3<<18 - // VQMOVN { DECL_Id(0xF3B20200); SET_Vd; SET_Vm; EMIT_I; } // size&3<<18 op&3<<6 op:00=MOVN op11=srcUnsigned op:x1=dstUnsigned - // VQMOVUN { DECL_Id(0xF3B20200); SET_Vd; SET_Vm; EMIT_I; } // size&3<<18 - // VQNEG { DECL_Id(0xF3B00780); SET_Vd; SET_Vm; EMIT_I; } // size&3<<18 - - // VSWP { DECL_Id(0xF3B20000); SET_Vd; SET_Vm; EMIT_I; } // size&3<<18 - // VTBL { DECL_Id(0xF3B00800); SET_Vdnm; EMIT_I; } // len&3<<8 - // VTBX { DECL_Id(0xF3B00840); SET_Vdnm; EMIT_I; } // len&3<<8 - // VTRN { DECL_Id(0xF3B20080); SET_Vd; SET_Vm; EMIT_I; } // size&3<<18 - // VUZP { DECL_Id(0xF3B20100); SET_Vd; SET_Vm; EMIT_I; } // size&3<<18 - // VZIP { DECL_Id(0xF3B20180); SET_Vd; SET_Vm; EMIT_I; } // size&3<<18 - // - - -// VDUP & VMOVN & VMOVL are implemented in VRegXfer.h ... - - - - - - - /* - * VFPv3 Instructions - * - */ - - - - -#define SET_Sd \ - I |= ((Sd&0x1E)<<11); \ - I |= ((Sd&0x01)<<22) - -#define SET_Sn \ - I |= ((Sn&0x1E)<<15); \ - I |= ((Sn&0x01)<<7) - -#define SET_Sm \ - I |= ((Sm&0x1E)>>1); \ - I |= ((Sm&0x01)<<5) - - - -#define SET_Sdnm SET_Sd; SET_Sn; SET_Sm - - -#define VfpInstrS(viName, viId) EAPI V##viName##_VFP (eFSReg Sd, eFSReg Sn, eFSReg Sm, ConditionCode CC=CC_AL) { DECL_Id(viId); SET_CC; SET_Sdnm; EMIT_I; } - - VfpInstrS(MLA, 0x0E000A00) - VfpInstrS(MLS, 0x0E000A40) - - VfpInstrS(NMLA, 0x0E100A40) - VfpInstrS(NMLS, 0x0E100A00) - VfpInstrS(NMUL, 0x0E200A40) - - - VfpInstrS(MUL, 0x0E200A00) - - VfpInstrS(ADD, 0x0E300A00) - VfpInstrS(SUB, 0x0E300A40) - - VfpInstrS(DIV, 0x0E800A00) - - - EAPI VCVT_to_S32_VFP (eFSReg Sd, eFSReg Sm, ConditionCode CC=CC_AL) { DECL_Id(0x0EBD0AC0); SET_CC; SET_Sd; SET_Sm; EMIT_I; } // VfpInstrS(ABS, 0x0EB00AC0) ** {D,S}dm - // 0x0EB80A40 is to_U32. to_S32 is 0x0EB80AC0 - EAPI VCVT_from_S32_VFP (eFSReg Sd, eFSReg Sm, ConditionCode CC=CC_AL) { DECL_Id(0x0EB80AC0); SET_CC; SET_Sd; SET_Sm; EMIT_I; } // VfpInstrS(ABS, 0x0EB00AC0) ** {D,S}dm - - EAPI VABS_VFP (eFSReg Sd, eFSReg Sm, ConditionCode CC=CC_AL) { DECL_Id(0x0EB00AC0); SET_CC; SET_Sd; SET_Sm; EMIT_I; } // VfpInstrS(ABS, 0x0EB00AC0) ** {D,S}dm - EAPI VNEG_VFP (eFSReg Sd, eFSReg Sm, ConditionCode CC=CC_AL) { DECL_Id(0x0EB10A40); SET_CC; SET_Sd; SET_Sm; EMIT_I; } // VfpInstrS(NEG, 0x0EB10A40) ** {D,S}dm - EAPI VSQRT_VFP(eFSReg Sd, eFSReg Sm, ConditionCode CC=CC_AL) { DECL_Id(0x0EB10AC0); SET_CC; SET_Sd; SET_Sm; EMIT_I; } // VfpInstrS(SQRT, 0x0EB10AC0) ** {D,S}dm - -// - x0 Vector Move VMOV (immediate) on page A8-640 -// 0000 01 Vector Move VMOV (register) on page A8-642 - -// 001x x1 Vector Convert VCVTB, VCVTT (between half-precision and single-precision, VFP) on page A8-588 -// 010x x1 Vector Compare VCMP, VCMPE on page A8-572 -// 0111 11 Vector Convert VCVT (between double-precision and single-precision) on page A8-584 -// 1000 x1 Vector Convert VCVT, VCVTR (between floating-point and integer, VFP) on page A8-578 -// 101x x1 Vector Convert VCVT (between floating-point and fixed-point, VFP) on page A8-582 -// 110x x1 Vector Convert VCVT, VCVTR (between floating-point and integer, VFP) on page A8-578 -// 111x x1 Vector Convert VCVT (between floating-point and fixed-point, VFP) on page A8-582 - - ////// hack - - EAPI VDIV_HACKF32(eFDReg Sd, eFDReg Sn, eFDReg Sm) - { - ConditionCode CC=AL; - - eFSReg SdS=(eFSReg)(Sd*2); - eFSReg SnS=(eFSReg)(Sn*2); - eFSReg SmS=(eFSReg)(Sm*2); - - verify((int)Sd < 32 && (int)Sn < 32 && (int)Sm < 32); - - VDIV_VFP(SdS,SnS,SmS); - } - - - - - - - - - - - - - - - - - -#undef SET_Qd -#undef SET_Dd - -#undef SET_Qn -#undef SET_Dn - -#undef SET_Qm -#undef SET_Dm - -#undef SET_Q - -#undef SET_Qdnm -#undef SET_Ddnm - - - -}; diff --git a/core/arm_emitter/E_VLoadStore.h b/core/arm_emitter/E_VLoadStore.h deleted file mode 100644 index 8bb040a76..000000000 --- a/core/arm_emitter/E_VLoadStore.h +++ /dev/null @@ -1,210 +0,0 @@ -/* - * E_VLoadStore.h * VFP/A.SIMD Load/Store Instruction Set Encoding - * - */ -#pragma once - - -namespace ARM -{ - - // [cond][110][Opcode][-Rn-][----][101][---------] - // - // cond != 0b1111 | LDC && STC consume these - - - -#define SET_Dd \ - I |= ((Dd&0x0F)<<12); \ - I |= ((Dd&0x10)<<18) - -#define SET_Sd \ - I |= ((Sd&0x1E)<<11); \ - I |= ((Sd&0x01)<<22) - - -#define SET_Rn \ - I |= ((Rn&15)<<16) - -#define SET_uImm8 \ - I |= (uImm8 & 255) - -#define SET_sImm8 \ - if (sImm8 > 0) { \ - I |= (1<<23); \ - } \ - I |= (abs(sImm8) & 255) - - -#define SET_PUW(_P,_U,_W) \ - I |= ( ((_P&1)<<24) | ((_U&1)<<23) | ((_W&1)<<21) ) - - - - - /* - * V{LD,ST}R: Vector Load/Store Register - * - * V{LD,ST}R.64 // VFP && A.SIMD - * V{LD,ST}R.32 // VFP - */ - - EAPI VLDR(eFDReg Dd, eReg Rn, s32 sImm8, ConditionCode CC=AL) // VLDR.64 - { - DECL_Id(0x0D100B00); SET_CC; - SET_Dd; SET_Rn; SET_sImm8; - EMIT_I; - } - - EAPI VLDR(eFSReg Sd, eReg Rn, s32 sImm8, ConditionCode CC=AL) // VLDR.32 - { - DECL_Id(0x0D100A00); SET_CC; - SET_Sd; SET_Rn; SET_sImm8; - EMIT_I; - } - - - EAPI VSTR(eFDReg Dd, eReg Rn, s32 sImm8, ConditionCode CC=AL) // VSTR.64 - { - DECL_Id(0x0D000B00); SET_CC; - SET_Dd; SET_Rn; SET_sImm8; - EMIT_I; - } - - EAPI VSTR(eFSReg Sd, eReg Rn, s32 sImm8, ConditionCode CC=AL) // VSTR.32 - { - DECL_Id(0x0D000A00); SET_CC; - SET_Sd; SET_Rn; SET_sImm8; - EMIT_I; - } - - - - /* - * V{LD,ST}M: Vector Load/Store Multiple - * - * V{LD,ST}R.64 // VFP && A.SIMD - * V{LD,ST}R.32 // VFP - * - * uImm8: directional count, abs(sImm8) is the reg. count - * Dd: Register to start sequential operation from.. - * - * suffix DB: P=1 U=0 W=1, Decrement Before, Addresses end just before the address in Rn. - * suffix IA: P=0 U=1 W=?, Increment After, Addresses start at address in Rn - * - * ** These are very complicated encoding and require a lot of error checking and even more thought when used. ** - * ** Simply using, MOV32(R4, &double_array[0]); VLDM(D0, R4, 4); should however work to load an array of 4 doubles ** - */ - - EAPI VLDM(eFDReg Dd, eReg Rn, u32 uImm8, u32 WB=0, ConditionCode CC=AL) // VLDM.64 - { - // ASSERT( (uImm8>0) && (uImm8<=16) && ((Dd+uImm8) <= 32) ) - uImm8<<=1; - DECL_Id(0x0C100B00); SET_CC; - SET_Dd; SET_Rn; - SET_uImm8; SET_PUW(0,1,WB); // Defaulting to IA w/o ! (Write-back) - EMIT_I; - } - - EAPI VLDM(eFSReg Sd, eReg Rn, u32 uImm8, ConditionCode CC=AL) // VLDM.32 - { - // ASSERT( (uImm8>0) && ((Dd+uImm8) <= 32) ) - DECL_Id(0x0C100A00); SET_CC; - SET_Sd; SET_Rn; - SET_uImm8; SET_PUW(0,1,0); // Defaulting to IA w/o ! (Write-back) - EMIT_I; - } - - - EAPI VSTM(eFDReg Dd, eReg Rn, u32 uImm8, ConditionCode CC=AL) // VSTM.64 - { - // ASSERT( (uImm8>0) && (uImm8<=16) && ((Dd+uImm8) <= 32) ) - uImm8<<=1; - DECL_Id(0x0C000B00); SET_CC; - SET_Dd; SET_Rn; - SET_uImm8; SET_PUW(0,1,0); // Defaulting to IA w/o ! (Write-back) - EMIT_I; - } - - EAPI VSTM(eFSReg Sd, eReg Rn, u32 uImm8, ConditionCode CC=AL) // VSTM.32 - { - // ASSERT( (uImm8>0) && ((Dd+uImm8) <= 32) ) - DECL_Id(0x0C000A00); SET_CC; - SET_Sd; SET_Rn; - SET_uImm8; SET_PUW(0,1,0); // Defaulting to IA w/o ! (Write-back) - EMIT_I; - } - - - - - /* - * V{LD,ST}n: Various extra load/store multiple. - * - * Not Implemented. - * - */ - - - - - - - /* - * VPUSH/VPOP: Vector Load/Store multiple consecutive vector registers to the stack. - * - * V{PUSH,POP} .64: A.SIMD .32 VFP - */ - - EAPI VPUSH(eFDReg Dd, eReg Rn, u32 uImm8, ConditionCode CC=AL) // VPUSH.64 - { - uImm8<<=1; - DECL_Id(0x0D2D0B00); SET_CC; - SET_Dd; SET_uImm8; - EMIT_I; - } - - EAPI VPUSH(eFSReg Sd, eReg Rn, u32 uImm8, ConditionCode CC=AL) // VPUSH.32 - { - DECL_Id(0x0D2D0A00); SET_CC; - SET_Sd; SET_uImm8; - EMIT_I; - } - - - EAPI VPOP(eFDReg Dd, eReg Rn, u32 uImm8, ConditionCode CC=AL) // VPOP.64 - { - uImm8<<=1; - DECL_Id(0x0CBD0B00); SET_CC; - SET_Dd; SET_uImm8; - EMIT_I; - } - - EAPI VPOP(eFSReg Sd, eReg Rn, u32 uImm8, ConditionCode CC=AL) // VPOP.32 - { - DECL_Id(0x0CBD0A00); SET_CC; - SET_Sd; SET_uImm8; - EMIT_I; - } - - - - - /* - * Best practice is to remove macro definitions, - * this way they can't affect subsequent headers. - */ - -#undef SET_Dd -#undef SET_Sd - -#undef SET_Rn - -#undef SET_uImm8 -#undef SET_sImm8 - -#undef SET_PUW - - - -}; \ No newline at end of file diff --git a/core/arm_emitter/E_VRegXfer.h b/core/arm_emitter/E_VRegXfer.h deleted file mode 100644 index 58c331a5f..000000000 --- a/core/arm_emitter/E_VRegXfer.h +++ /dev/null @@ -1,344 +0,0 @@ -/* - * E_VRegXfer.h * VFP/A.SIMD Register Transfer Instruction Set Encoding - * - */ -#pragma once - - -namespace ARM -{ - - - - /////// REPLACE THIS MESS W. SET_Rto16 / SET_Rto16_H22 , SET_Rto12 , SET_Rto0_H5 OR just leave it all in the fn's ////// - - -#define SET_Qd \ - I |= (((Qd<<1)&0x0E)<<16); \ - I |= (((Qd<<1)&0x10)<<3) - -#define SET_Qm \ - I |= (((Qm<<1)&0x0E)); \ - I |= (((Qm<<1)&0x10)<<1) - - -#define SET_Dd \ - I |= ((Dd&0x0F)<<16); \ - I |= ((Dd&0x10)<<3) - -#define SET_Dm \ - I |= ((Dm&0x0F)); \ - I |= ((Dm&0x10)<<1) - - -#define SET_Sn \ - I |= ((Sn&0x1E)<<15); \ - I |= ((Sn&0x01)<<7) - -#define SET_Sm \ - I |= ((Sm&0x1E)>>1); \ - I |= ((Sm&0x01)<<5) - - -#define SET_Rn \ - I |= ((Rn&15)<<16) - -#define SET_Rt \ - I |= ((Rt&15)<<12) - -#define SET_Rtx2 \ - I |= ((Rt&15)<<12) | ((Rt2&15)<<16) - - // VDUP VMOV VMRS VMSR - - /* - * VDUP.SZ: Duplicates an element from ARM reg Rt into every element of {Q,D}d A.SIMD - * - */ - EAPI VDUP(eFQReg Qd, eReg Rt, u32 Size=32, ConditionCode CC=AL) - { - DECL_Id(0x0E800B10); SET_CC; - SET_Qd; SET_Rt; I |= (1<<21); // Q - if (Size==16) { I |= (1<<5); } // e - if (Size==8) { I |= (1<<22); } // b - EMIT_I; - } - - EAPI VDUP(eFDReg Dd, eReg Rt, u32 Size=32, ConditionCode CC=AL) - { - DECL_Id(0x0E800B10); SET_CC; - SET_Dd; SET_Rt; // No Q - if (Size==16) { I |= (1<<5); } // e - if (Size==8) { I |= (1<<22); } // b - EMIT_I; - } - - EAPI VDUP8 (eFQReg Qd, eReg Rt, ConditionCode=AL) { VDUP(Qd,Rt,8, CC); } - EAPI VDUP8 (eFDReg Dd, eReg Rt, ConditionCode=AL) { VDUP(Dd,Rt,8, CC); } - EAPI VDUP16(eFQReg Qd, eReg Rt, ConditionCode=AL) { VDUP(Qd,Rt,16,CC); } - EAPI VDUP16(eFDReg Dd, eReg Rt, ConditionCode=AL) { VDUP(Dd,Rt,16,CC); } - EAPI VDUP32(eFQReg Qd, eReg Rt, ConditionCode=AL) { VDUP(Qd,Rt,32,CC); } - EAPI VDUP32(eFDReg Dd, eReg Rt, ConditionCode=AL) { VDUP(Dd,Rt,32,CC); } - - EAPI VDUP32(eFQReg Qd, eFDReg Dm, int idx) - { - DECL_Id(0xF3B00C00); - - //Set_Qd seems to be incompitable here ? - I |= (((Qd<<1)&0x0E)<<12); \ - I |= (((Qd<<1)&0x10)<<18); - SET_Dm; - I |= 0x40; //SET_Q - I |= 0x4 << 16; // 32 bits 4=0100 - I |= (idx&1) << 19; // set idx - - EMIT_I; - } - - - - - /* - * VMOV: (register) - * - * VMOV { , } A.SIMD - * VMOV { , } VFP sz1 UNDEFINED in single only VFP - */ - - EAPI VMOV(eFQReg Qd, eFQReg Qm) // UNCONDITIONAL - { - DECL_Id(0xF2200110); - - I |= ((Qd&0x0F)<<12) | ((Qd&0x10)<<18); - I |= ((Qm&0x0F)<<16) | ((Qm&0x10)<<1); // If !Consistent(M:Qm) then its VORR - I |= ((Qm&0x0F)) | ((Qm&0x10)<<3); // If !Consistent(M:Qm) then its VORR - EMIT_I; - } - - EAPI VMOV(eFDReg Dd, eFDReg Dm) // UNCONDITIONAL - { - DECL_Id(0xF2200110); - - I |= ((Dd&0x0F)<<12) | ((Dd&0x10)<<18); - I |= ((Dm&0x0F)<<16) | ((Dm&0x10)<<3); // If !Consistent(M:Dm) then its VORR - I |= ((Dm&0x0F)) | ((Dm&0x10)<<1); // If !Consistent(M:Dm) then its VORR - EMIT_I; - } - - -// EAPI VMOV(eFDReg Dd, eFDReg Dm, ConditionCode CC=AL) {} VFP Double Version Not Implemented here for obvious reasons : same as below except would set SZ @bit8 : 0x0EB00B40 - - EAPI VMOV(eFSReg Sd, eFSReg Sm, ConditionCode CC=AL) - { - DECL_Id(0x0EB00A40); SET_CC; - - I |= ((Sd&0x1E)<<11) | ((Sd&1)<<22); - I |= ((Sm&0x1E)>>1) | ((Sm&1)<<5); - EMIT_I; - } - - - - /* - * VMOV: (Immediate) A.SIMD / VFP - * - */ - - //// TO BE IMPLEMENTED //// - - - - - /* - * VMOV: (ARM to scalar) A.SIMD / VFP IF Size=32 - * - */ - EAPI VMOV(eFDReg Dd, u32 Index, eReg Rt, u32 Size=32, ConditionCode CC=AL) - { - DECL_Id(0x0E000B10); SET_CC; - SET_Dd; SET_Rt; - // Dd[x] where x==Index Dd is 64b, 2x32[0,1](1bit) 4x16[0-3](2bits) 8x8[0-7](3bits) - if (Size== 8) { I |= (1<<22) | ((Index&4)<<18) | ((Index&3)<<5) ; } // x -> opc1:0, opc2 (3bits) | opc1:1 SET - if (Size==16) { I |= (1<<5) | ((Index&2)<<20) | ((Index&1<<6)) ; } // x -> opc1:0, opc2:1 (2bits) | opc2:0 SET - if (Size==32) { I |= ((Index&1)<<21) ; } // x -> opc1:0 (1bit) - EMIT_I; - } - - - - /* - * VMOV: (scalar to ARM) A.SIMD / VFP IF Size=32 - * - * Note: U (bit32) is unsigned bit, invalid for 32b and we do not handle it at all.. might want to set it for byte,short - */ - EAPI VMOV(eReg Rt, eFDReg Dd, u32 Index, u32 Size=32, ConditionCode CC=AL) // This is really Vn, but we'll use the same macros.. - { - DECL_Id(0x0E100B10); SET_CC; - SET_Dd; SET_Rt; - // Dd[x] where x==Index Dd is 64b, 2x32[0,1](1bit) 4x16[0-3](2bits) 8x8[0-7](3bits) - if (Size== 8) { I |= (1<<22) | ((Index&4)<<18) | ((Index&3)<<5) ; } // x -> opc1:0, opc2 (3bits) | opc1:1 SET - if (Size==16) { I |= (1<<5) | ((Index&2)<<20) | ((Index&1<<6)) ; } // x -> opc1:0, opc2:1 (2bits) | opc2:0 SET - if (Size==32) { I |= ((Index&1)<<21) ; } // x -> opc1:0 (1bit) - EMIT_I; - } - - - - - /* - * VMOV: (between ARM and single either direction) VFP - * - */ - EAPI VMOV(eReg Rt, eFSReg Sn, ConditionCode CC=AL) // Sn !d - { - DECL_Id(0x0E000A10); SET_CC; - SET_Sn; SET_Rt; I |= (1<<20); // op set = TO ARM reg - EMIT_I; - } - EAPI VMOV(eFSReg Sn, eReg Rt, ConditionCode CC=AL) // Sn !d - { - DECL_Id(0x0E000A10); SET_CC; - SET_Sn; SET_Rt; // op NOT set = TO FP Single reg - EMIT_I; - } - - - - - - /* - * VMOV: (between two ARM regs and two contiguous singles either direction) VFP - * - */ - EAPI VMOV(eReg Rt, eReg Rt2, eFSReg Sm, ConditionCode CC=AL) // Sn !d - { - DECL_Id(0x0E000A10); SET_CC; - SET_Sm; SET_Rtx2; I |= (1<<20); // op set = TO ARM regs - EMIT_I; - } - EAPI VMOV(eFSReg Sm, eReg Rt, eReg Rt2, ConditionCode CC=AL) // Sn !d - { - DECL_Id(0x0E000A10); SET_CC; - SET_Sm; SET_Rtx2; // op NOT set = TO FP Single(s) - EMIT_I; - } - - - - - - /* - * VMOV: (between two ARM regs and a Double) Dm <-> Rt2:Rt A.SIMD/VFP - * - */ - EAPI VMOV(eReg Rt, eReg Rt2, eFDReg Dm, ConditionCode CC=AL) // Sn !d - { - DECL_Id(0x0C400B10); SET_CC; - SET_Dm; SET_Rtx2; I |= (1<<20); // op set = TO ARM regs - EMIT_I; - } - EAPI VMOV(eFDReg Dm, eReg Rt, eReg Rt2, ConditionCode CC=AL) // Sn !d - { - DECL_Id(0x0C400B10); SET_CC; - SET_Dm; SET_Rtx2; // op NOT set = TO FP Single(s) - EMIT_I; - } - - - - - /* - * VMOVL: Takes each element in a VDouble && Sign or Zero extends into a VQuad A.SIMD - * - */ - EAPI VMOVL(eFQReg Qd, eFDReg Dm, u32 Size=32, u32 Sign=0) // UNCONDITIONAL Q & ~1 - { - Size >>= 3; // Sz/8 = 1,2 or 4 else if >0 its VSHLL - Sign = (Sign>0)?0:1; // Invert to Unsigned - DECL_Id(0xF2800A10); - - SET_Dm; - I |= ((Qd&0x0F)<<12); - I |= ((Qd&0x10)<<18); - I |= ((Size &7)<<19); // imm3 - I |= ((Sign &1)<<24); // U - EMIT_I; - } - - EAPI VMOVL_S8 (eFQReg Qd, eFDReg Dm) { VMOVL(Qd,Dm,8,1); } - EAPI VMOVL_U8 (eFQReg Qd, eFDReg Dm) { VMOVL(Qd,Dm,8,0); } - EAPI VMOVL_S16(eFQReg Qd, eFDReg Dm) { VMOVL(Qd,Dm,16,1); } - EAPI VMOVL_U16(eFQReg Qd, eFDReg Dm) { VMOVL(Qd,Dm,16,0); } - EAPI VMOVL_S32(eFQReg Qd, eFDReg Dm) { VMOVL(Qd,Dm,32,1); } - EAPI VMOVL_U32(eFQReg Qd, eFDReg Dm) { VMOVL(Qd,Dm,32,0); } - - - - - - /* - * VMOVN: Copies least significant half of each element of a VQuad into the elements of a VDouble A.SIMD - * - */ - EAPI VMOVN(eFDReg Dd, eFQReg Qm, u32 Size=32) // UNCONDITIONAL Q & ~1 - { - Size >>= 4; // Sz/32 = 0,1 or 2 - DECL_Id(0xF3B20200); - - SET_Qm; - I |= ((Dd&0x0F)<<12); - I |= ((Dd&0x10)<<18); - I |= ((Size &3)<<18); // size - EMIT_I; - } - - EAPI VMOVN16(eFDReg Dd, eFQReg Qm) { VMOVN(Dd,Qm,16); } - EAPI VMOVN32(eFDReg Dd, eFQReg Qm) { VMOVN(Dd,Qm,32); } - EAPI VMOVN64(eFDReg Dd, eFQReg Qm) { VMOVN(Dd,Qm,64); } - - - - - - - - /* - * VM{RS,SR} Move ARM reg To/From FPSCR A.SIMD/VFP - */ - EAPI VMRS(eReg Rt, ConditionCode CC=AL) - { - DECL_Id(0x0EF10A10); - SET_CC; SET_Rt; - EMIT_I; - } - EAPI VMSR(eReg Rt, ConditionCode CC=AL) - { - DECL_Id(0x0EE10A10); - SET_CC; SET_Rt; - EMIT_I; - } - - - - - - - - - - -#undef SET_Qd -#undef SET_Qm - -#undef SET_Dd -#undef SET_Dm - -#undef SET_Sn -#undef SET_Sm - -#undef SET_Rn -#undef SET_Rt -#undef SET_Rtx2 - - -}; \ No newline at end of file diff --git a/core/arm_emitter/H_Branches.h b/core/arm_emitter/H_Branches.h deleted file mode 100644 index 9a32385fd..000000000 --- a/core/arm_emitter/H_Branches.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * H_Branches.h - * - * - */ -#pragma once - - - - -namespace ARM -{ - - - - inline static ptrdiff_t Literal(unat FnAddr) - { - u8* pc_addr = (u8*)EMIT_GET_PTR(); - return (ptrdiff_t)((ptrdiff_t)FnAddr - ((ptrdiff_t)pc_addr+8)); - //return -(ptrdiff_t)((pc_addr+8)-(ptrdiff_t)FnAddr); - } - - EAPI CALL(unat FnAddr, ConditionCode CC=AL) - { - bool isThumb = FnAddr & 1; - FnAddr &= ~1; - ptrdiff_t lit = Literal(FnAddr); - - if(0==lit) { - printf("Error, Compiler caught NULL literal, CALL(%08zX)\n", FnAddr); - verify(false); - return; - } - if( (lit<-33554432) || (lit>33554428) ) // ..28 for BL ..30 for BLX - { - printf("Warning, CALL(%08zX) is out of range for literal(%08zX)\n", FnAddr, lit); - // verify(false); - - MOV32(IP, FnAddr, CC); - BLX(IP, CC); - return; - } - - if (isThumb) { - verify (CC==CC_EQ); - BLX(lit, isThumb); - } else { - BL(lit,CC); - } - } - - - - EAPI JUMP(unat FnAddr, ConditionCode CC=AL) - { - bool isThumb = FnAddr & 1; - FnAddr &= ~1; - - verify(!isThumb); - ptrdiff_t lit = Literal(FnAddr); - - /*if(0==lit) { - printf("Error, Compiler caught NULL literal, JUMP(%08X)\n", FnAddr); - verify(false); - return; - }*/ - if( (lit<-33554432) || (lit>33554428) ) // ..28 for BL ..30 for BLX - { - printf("Warning, %zX is out of range for imm jump! \n", FnAddr); - //verify(false); - - MOV32(IP, FnAddr, CC); - BX(IP, CC); - return; - } - B(lit,CC); // Note, wont work for THUMB*, have to use bx which is reg only ! - } - - - -} - - - - diff --git a/core/arm_emitter/H_LoadStore.h b/core/arm_emitter/H_LoadStore.h deleted file mode 100644 index f70baa807..000000000 --- a/core/arm_emitter/H_LoadStore.h +++ /dev/null @@ -1,150 +0,0 @@ -/* - * H_LoadStore.h - * - * - */ -#pragma once - - - -namespace ARM -{ - - /* - * Load Helpers - */ - - EAPI LoadImmBase(eReg Rt, u32 Base, ConditionCode CC=AL) - { - MOV32(Rt, Base, CC); - -#if defined(_DEVEL) - LDR(Rt,Rt,0, Offset, CC); -#else - LDR(Rt,Rt,0, CC); -#endif - } - - EAPI LoadImmBase(eReg Rt, eReg Rn, u32 Base, ConditionCode CC=AL) - { - MOV32(Rn, Base, CC); - -#if defined(_DEVEL) - LDR(Rt,Rn,0, Offset, CC); -#else - LDR(Rt,Rn,0, CC); -#endif - } - - EAPI LoadImmBase16(eReg Rt, u32 Base, bool Extend=false, ConditionCode CC=AL) - { - MOV32(Rt, Base, CC); - LDRH(Rt,Rt,0, CC); - - if(Extend) - SXTH(Rt,Rt); - } - - EAPI LoadImmBase16(eReg Rt, eReg Rn, u32 Base, bool Extend=false, ConditionCode CC=AL) - { - MOV32(Rn, Base, CC); - LDRH(Rt,Rn,0, CC); - - if(Extend) - SXTH(Rt,Rt); - } - - - - /* - * Store Helpers - */ - - // you pick regs, loads Base with reg addr, you supply data in Rt - EAPI StoreImmBase(eReg Rt, eReg Rn, u32 Base, ConditionCode CC=AL) - { - MOV32(Rn, Base, CC); - -#if defined(_DEVEL) - STR(Rt,Rn,0, Offset, CC); -#else - STR(Rt,Rn,0, CC); -#endif - } - - // you pick regs, loads Rt with const val, you supply base for Rn - EAPI StoreImmVal(eReg Rt, eReg Rn, u32 Val, ConditionCode CC=AL) - { - MOV32(Rt, Val, CC); - -#if defined(_DEVEL) - STR(Rt,Rn,0, Offset, CC); -#else - STR(Rt,Rn,0, CC); -#endif - } - - // you pick regs, loads Base with reg addr, loads Rt with const val - EAPI StoreImms(eReg Rt, eReg Rn, u32 Base, u32 Val, ConditionCode CC=AL) - { - MOV32(Rn, Base, CC); - MOV32(Rt, Val, CC); - -#if defined(_DEVEL) - STR(Rt,Rn,0, Offset, CC); -#else - STR(Rt,Rn,0, CC); -#endif - } - - - -#if defined(_DEVEL) && 0 // These require testing // - - EAPI LoadImmBase8(eReg Rt, u32 Base, bool Extend=false, ConditionCode CC=AL) - { - MOV32(Rt, Base, CC); - LDRB(Rt,Rt,0, CC); - - if(Extend) - SXTB(Rt,Rt); - } - - EAPI LoadImmBase8(eReg Rt, eReg Rn, u32 Base, bool Extend=false, ConditionCode CC=AL) - { - MOV32(Rn, Base, CC); - LDRB(Rt,Rn,0, CC); - - if(Extend) - SXTB(Rt,Rt); - } - -#endif // defined(_DEVEL) - - - -} - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/core/arm_emitter/H_fp.h b/core/arm_emitter/H_fp.h deleted file mode 100644 index e76339627..000000000 --- a/core/arm_emitter/H_fp.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * H_fp.h - * - * ARMv7 floating point help routines. - */ -#pragma once - - - /// WIP /// - - - -namespace ARM -{ - -#if defined(_DEVEL) - - - /* - * vfpVersion(): Returns VFP Arch. Version, or -1 if not supported - */ - int vfpVersion() - { - // FPSID bits [22:16] contain version - - return 0; - } - - int neonVersion() - { - // ?? - - return 0; - } - - -#define VFP_SINGLE (1<<0) -#define VFP_DOUBLE (1<<1) -#define NEON_INTEGER (1<<2) -#define NEON_SINGLE (1<<3) -#define VFP_TRAPS (1<<8) // VFPv3U -#define FPEXT_HALF (1<<16) // Half precision extension - - u32 fpFeatures() - { - - - return 0; - } - - -#endif // _DEVEL - - - - - -#if 0 - -Version 1 of the Common VFP subarchitecture has special behavior when the FPSCR.IXE bit is set to 1. - The Common VFP subarchitecture version can be identified by checking FPSID bits [22:16]. This field is - 0b0000001 for version 1. In version 1 of the Common VFP subarchitecture the FPEXC.DEX bit is RAZ/WI. - - -Detecting which VFP Common subarchitecture registers are implemented - An implementation can choose not to implement FPINST and FPINST2, if these registers are not required. - -Set FPEXC.EX=1 and FPEXC.FP2V=1 -Read back the FPEXC register -if FPEXC.EX == 0 then - Neither FPINST nor FPINST2 are implemented -else - if FPEXC.FP2V == 0 then - FPINST is implemented, FPINST2 is not implemented. - else - Both FPINST and FPINST2 are implemented. -Clean up - -#endif - - - - -} - - - - - - - - - - - - - - - - - - diff --git a/core/arm_emitter/H_psuedo.h b/core/arm_emitter/H_psuedo.h deleted file mode 100644 index 7ee53ecf7..000000000 --- a/core/arm_emitter/H_psuedo.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * H_psuedo.h - * - */ - -#pragma once - -namespace ARM -{ - - EAPI MOV32(eReg Rd, u32 Imm32, ConditionCode CC=AL) - { - MOVW(Rd,((Imm32)&0xFFFF),CC); - if (Imm32>>16) - MOVT(Rd,((Imm32>>16)&0xFFFF),CC); - } -#if 0 - EAPI NEG(eReg Rd,eReg Rs) - { - RSB(Rd,Rs,0); - } -#endif - EAPI NOT(eReg Rd,eReg Rs) - { - MVN(Rd,Rs); - } - - -} \ No newline at end of file diff --git a/core/arm_emitter/H_state.h b/core/arm_emitter/H_state.h deleted file mode 100644 index 4195baaf1..000000000 --- a/core/arm_emitter/H_state.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * H_state.h - * - */ -#pragma once - - -namespace ARM -{ - - enum InstructionSet { - IS_ARM, - IS_Thumb, - IS_Jazelle, - IS_ThumbEE - }; - - enum Endian { - E_Little, - E_Big - }; - - - - - - - - - - - - - - - -} - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/core/arm_emitter/arm_coding.h b/core/arm_emitter/arm_coding.h deleted file mode 100644 index f0cad69a3..000000000 --- a/core/arm_emitter/arm_coding.h +++ /dev/null @@ -1,254 +0,0 @@ -/* - * coding.h, arm binary instruction coding - * - */ -#pragma once - -namespace ARM -{ - - - /* - * Encoding Hi-Order 4bits, ConditionCode - * - */ - - enum ConditionCode - { - EQ = 0x00, Equal = EQ, // 0000 Equal Z set - NE = 0x01, NotEqual = NE, // 0001 Not equal Z clear - CS = 0x02, CarrySet = CS, // 0010 Carry set/unsigned higher or same C set - CC = 0x03, CarryClr = CC, // 0011 Carry clear/unsigned lowe C clear - MI = 0x04, Minus = MI, // 0100 Minus/negative N set - PL = 0x05, Plus = PL, // 0101 Plus/positive or zero N clear - VS = 0x06, Overflow = VS, // 0110 Overflow V set - VC = 0x07, NoOverflow = VC, // 0111 No overflow V clear - - HI = 0x08, UnHigher = HI, // 1000 Unsigned higher C set and Z clear - LS = 0x09, UnLower = LS, // 1001 Unsigned lower or same C clear or Z set - - GE = 0x0A, GrOrEqual = GE, // 1010 Signed greater than or equal N set and V set, or N clear and V clear (N == V) - LT = 0x0B, Less = LT, // 1011 Signed less than N set and V clear, or N clear and V set (N != V) - GT = 0x0C, Greater = GT, // 1100 Signed greater than Z clear, and either N set and V set, or N clear and V clear (Z == 0,N == V) - LE = 0x0D, LessOrEqual = LE, // 1101 Signed less than or equal Z set, or N set and V clear, or N clear and V set (Z == 1 or N != V) - - AL = 0x0E, Always = AL, // 1110 Always (unconditional) - - - - UC = 0x0F, Unconditional= UC, // 1111 Unconditional Special Instruction for ARMv5? and above - -#define _ARM_COMPAT -#if defined(_ARM_COMPAT) - CC_EQ=EQ, CC_NE=NE, CC_CS=CS, CC_CC=CC, CC_MI=MI, CC_PL=PL, CC_VS=VS, CC_VC=VC, - CC_HI=HI, CC_LS=LS, CC_GE=GE, CC_LT=LT, CC_GT=GT, CC_LE=LE, CC_AL=AL, CC_UC=UC, - - CC_HS=CS, CC_LO=CC, -#endif - - ConditionCode_Size - }; - - - - /* - * Data-processing OPCODE 4bits, DPOP - * - */ - - enum DPOP - { - DP_AND, // 0000 Logical AND Rd := Rn AND shifter_operand - DP_EOR, // 0001 Logical Exclusive OR Rd := Rn EOR shifter_operand - DP_SUB, // 0010 Subtract Rd := Rn - shifter_operand - DP_RSB, // 0011 Reverse Subtract Rd := shifter_operand - Rn - DP_ADD, // 0100 Add Rd := Rn + shifter_operand - DP_ADC, // 0101 Add with Carry Rd := Rn + shifter_operand + Carry Flag - DP_SBC, // 0110 Subtract with Carry Rd := Rn - shifter_operand - NOT(Carry Flag) - DP_RSC, // 0111 Reverse Subtract with Carry Rd := shifter_operand - Rn - NOT(Carry Flag) - DP_TST, // 1000 Test Update flags after Rn AND shifter_operand - DP_TEQ, // 1001 Test Equivalence Update flags after Rn EOR shifter_operand - DP_CMP, // 1010 Compare Update flags after Rn - shifter_operand - DP_CMN, // 1011 Compare Negated Update flags after Rn + shifter_operand - DP_ORR, // 1100 Logical (inclusive) OR Rd := Rn OR shifter_operand - DP_MOV, // 1101 Move Rd := shifter_operand (no first operand) - DP_BIC, // 1110 Bit Clear Rd := Rn AND NOT(shifter_operand) - DP_MVN // 1111 Move Not Rd := NOT shifter_operand (no first operand) - }; - - - - enum ShiftOp { - S_LSL, - S_LSR, - S_ASR, - S_ROR, - S_RRX=S_ROR - }; - - - /* - * eReg: ARM Register ID - * - */ - enum eReg - { - r0=0,r1, r2, r3, - r4, r5, r6, r7, - r8, r9, r10, r11, - r12, r13, r14, r15, - - R0=0,R1, R2, R3, - R4, R5, R6, R7, - R8, R9, R10, R11, - R12, R13, R14, R15, - - // Aliases - - a1 = r0, a2 = r1, a3 = r2, a4 = r3, - A1 = R0, A2 = R1, A3 = R2, A4 = R3, - - v1 = r4, v2 = r5, v3 = r6, v4 = r7, v5 = r8, v6 = r9, - V1 = R4, V2 = R5, V3 = R6, V4 = R7, V5 = R8, V6 = R9, - - rfp = r9, sl = r10, fp = r11, ip = r12, sp = r13, lr = r14, pc = r15, - RFP = R9, SL = R10, FP = R11, IP = R12, SP = R13, LR = R14, PC = R15, - }; - - - - /* - * eFQReg: Float [Quad] Register ID (A.SIMD) - * - */ - enum eFQReg - { - q0=0,q1, q2, q3, - q4, q5, q6, q7, - q8, q9, q10, q11, - q12, q13, q14, q15, - - Q0=0,Q1, Q2, Q3, - Q4, Q5, Q6, Q7, - Q8, Q9, Q10, Q11, - Q12, Q13, Q14, Q15 - }; - - - /* - * eFDReg: Float [Double] Register ID (VFP / A.SIMD) - * - */ - enum eFDReg - { - d0=0,d1, d2, d3, - d4, d5, d6, d7, - d8, d9, d10, d11, - d12, d13, d14, d15, - d16, d17, d18, d19, - d20, d21, d22, d23, - d24, d25, d26, d27, - d28, d29, d30, d31, - - D0=0,D1, D2, D3, - D4, D5, D6, D7, - D8, D9, D10, D11, - D12, D13, D14, D15, - D16, D17, D18, D19, - D20, D21, D22, D23, - D24, D25, D26, D27, - D28, D29, D30, D31 - }; - - - /* - * eFSReg: Float [Single] Register ID (VFP) - * - * Note: Using [f,F]regN syntax to avoid clash with s{8,16,32} types. - */ - enum eFSReg - { - f0=0,f1, f2, f3, - f4, f5, f6, f7, - f8, f9, f10, f11, - f12, f13, f14, f15, - f16, f17, f18, f19, - f20, f21, f22, f23, - f24, f25, f26, f27, - f28, f29, f30, f31, - - F0=0,F1, F2, F3, - F4, F5, F6, F7, - F8, F9, F10, F11, - F12, F13, F14, F15, - F16, F17, F18, F19, - F20, F21, F22, F23, - F24, F25, F26, F27, - F28, F29, F30, F31 - }; - - - - enum eFSpecialReg - { - - // VM** - - FPINST = 9, - FPINST2=10, - - FP_R_ERROR=0xFF - }; - - - enum ePushPopReg - { - _r0 =0x0001, _r1 =0x0002, _r2 =0x0004, _r3 =0x0008, - _r4 =0x0010, _r5 =0x0020, _r6 =0x0040, _r7 =0x0080, - _r8 =0x0100, _r9 =0x0200, _r10=0x0400, _r11=0x0800, - _r12=0x1000, _r13=0x2000, _r14=0x4000, _r15=0x8000, - - _a1 = _r0, _a2 = _r1, _a3 = _r2, _a4 = _r3, - _v1 = _r4, _v2 = _r5, _v3 = _r6, _v4 = _r7, - _v5 = _r8, _v6 = _r9, _rfp = _r9, _sl = _r10, - _fp = _r11, _ip = _r12, _sp = _r13, _lr = _r14, - _pc = _r15, - - _push_all = 0xFFFF, // Save All 15 - _push_call = _lr|_rfp, // Save lr && _rfb(cycle count) - - _push_eabi = _lr|_v1|_v2|_v3|_v4|_v5|_v6|_sl|_fp|_ip, // this is guesswork .. - }; - - - - /// WIP /// - - struct FPReg - { - union - { - u8 vQ8[16]; - u8 vD8[8]; - u8 vS8[4]; - - u16 vQ16[8]; // *VFP: If half-word extensions are enabled - u16 vD16[4]; // *VFP: If half-word extensions are enabled - u16 vS16[2]; // *VFP: If half-word extensions are enabled - - u32 vQ32[4]; - u32 vD32[2]; - u32 S; - - // u64 vQ64[2]; - // u64 D; - }; - - }; - - - - - - - -}; diff --git a/core/arm_emitter/arm_disasm.h b/core/arm_emitter/arm_disasm.h deleted file mode 100755 index e34dbda64..000000000 --- a/core/arm_emitter/arm_disasm.h +++ /dev/null @@ -1,242 +0,0 @@ -/* - * disasm.h, a horribly static yet (hopefully) correct disassembler - * - */ -#pragma once - -namespace ARM -{ - - - - inline static void armdis_cc(u32 cond, char *ccbuff) // Length is always 8 for our static'{n,m}ess - { - switch(cond) - { - case EQ: sprintf(ccbuff, "EQ"); return; - case NE: sprintf(ccbuff, "NE"); return; - case CS: sprintf(ccbuff, "CS"); return; - case CC: sprintf(ccbuff, "CC"); return; - case MI: sprintf(ccbuff, "MI"); return; - case PL: sprintf(ccbuff, "PL"); return; - case VS: sprintf(ccbuff, "VS"); return; - case VC: sprintf(ccbuff, "VC"); return; - - case HI: sprintf(ccbuff, "HI"); return; - case LS: sprintf(ccbuff, "LS"); return; - - case GE: sprintf(ccbuff, "GE"); return; - case LT: sprintf(ccbuff, "LT"); return; - case GT: sprintf(ccbuff, "GT"); return; - case LE: sprintf(ccbuff, "LE"); return; - - case AL: return; // sprintf(ccbuff, "AL"); -- ALways doesn't need to be specified - - case UC: // - default: return; // DIE - } - } - - - inline static void armdis_dp(u32 dpop, char *dpbuff) // Length is always 8 ... - { - switch(dpop) - { - case DP_AND: sprintf(dpbuff, "AND"); return; - case DP_EOR: sprintf(dpbuff, "EOR"); return; - case DP_SUB: sprintf(dpbuff, "SUB"); return; - case DP_RSB: sprintf(dpbuff, "RSB"); return; - case DP_ADD: sprintf(dpbuff, "ADD"); return; - case DP_ADC: sprintf(dpbuff, "ADC"); return; - case DP_SBC: sprintf(dpbuff, "SBC"); return; - case DP_RSC: sprintf(dpbuff, "RSC"); return; - case DP_TST: sprintf(dpbuff, "TST"); return; - case DP_TEQ: sprintf(dpbuff, "TEQ"); return; - case DP_CMP: sprintf(dpbuff, "CMP"); return; - case DP_CMN: sprintf(dpbuff, "CMN"); return; - case DP_ORR: sprintf(dpbuff, "ORR"); return; - case DP_MOV: sprintf(dpbuff, "MOV"); return; - case DP_BIC: sprintf(dpbuff, "BIC"); return; - case DP_MVN: sprintf(dpbuff, "MVN"); return; - } - } - - - - inline static void armdis(u32 op, char *disbuf, u32 len=512) - { - char ipref[8]={0}, isuff[8]={0}, icond[8]={0} ; - - - // u32 uOP = ((op>>12)&0xFF00) | ((op>>4)&255) ; - - u32 uCC = ((op>>28) & 0x0F) ; // - - u32 uO1 = ((op>>25) & 0x07) ; // - u32 uO2 = ((op>> 4) & 0x01) ; // - u32 uC1 = ((op>>21) & 0x0F) ; // - u32 uC2 = ((op>> 5) & 0x07) ; // - u32 uSB = ((op>>20) & 0x01) ; // Sign Change Bit - - - /* - if (uCC == UC) { - - printf ("DBG armdis has UC instruction %X\n", op); - sprintf (disbuf, "UNCONDITIONAL / UNHANDLED INSTRUCTION"); - return; - - } - - - if (uCC != AL) { - armdis_cc(uCC,isuff); - } - - - if (uO1 == 0) - { - - if (uO2 == 0) { - - if ((uC1 & 0xC) == 8) { - printf ("DBG armdis 0:0 10xx misc instruction \n", uCC); - sprintf (disbuf, "UNHANDLED INSTRUCTION 0:"); - return; - } - - // DP imm.shift - - - - } - - else if (uO2 == 1) { - - sprintf (disbuf, "UNHANDLED INSTRUCTION 0:"); - } - - } - else if (uO1 == 1) { - - sprintf (disbuf, "UNHANDLED INSTRUCTION 1:"); - } - else if (uO1 == 2) { - - sprintf (disbuf, "UNHANDLED INSTRUCTION 2:"); - } - else if (uO1 == 3) { - - sprintf (disbuf, "UNHANDLED INSTRUCTION 3:"); - } - else if (uO1 == 4) { - - sprintf (disbuf, "UNHANDLED INSTRUCTION 4:"); - } - else if (uO1 == 5) { - - sprintf (disbuf, "UNHANDLED INSTRUCTION 5:"); - } - else if (uO1 == 6) { - - sprintf (disbuf, "UNHANDLED INSTRUCTION 6:"); - } - else if (uO1 == 7) { - - sprintf (disbuf, "UNHANDLED INSTRUCTION 7:"); - } - else if (uO1 == 8) { - - sprintf (disbuf, "UNHANDLED INSTRUCTION 8:"); - } - else if (uO1 == 9) { - - sprintf (disbuf, "UNHANDLED INSTRUCTION 9:"); - } - else if (uO1 == 10) { - - sprintf (disbuf, "UNHANDLED INSTRUCTION 10:"); - } - else if (uO1 == 11) { - - sprintf (disbuf, "UNHANDLED INSTRUCTION 11:"); - } - else if (uO1 == 12) { - - sprintf (disbuf, "UNHANDLED INSTRUCTION 12:"); - } - else if (uO1 == 13) { - - sprintf (disbuf, "UNHANDLED INSTRUCTION 13:"); - } - else if (uO1 == 14) { - - sprintf (disbuf, "UNHANDLED INSTRUCTION 14:"); - } - else if (uO1 == 15) { - - sprintf (disbuf, "UNHANDLED INSTRUCTION 15:"); - } - else { - - sprintf (disbuf, "INVALID INSTRUCTION"); - } - */ - if (!uC1 && uO1==5) { - //B - char tmp[20]; - tmp[0]='\0'; - armdis_cc(uCC, tmp); - sprintf(disbuf, "B%s %08X", tmp, (op&0xffffff)<<2); - } else { - armdis_dp(uC1, disbuf); - char tmp[20]; - tmp[0]='\0'; - armdis_cc(uCC, tmp); - if (tmp[0]) { - strcat(disbuf, ".\0"); - strcat(disbuf, tmp); - } - if (uSB) strcat(disbuf, ".S\0"); - bool shifter=false; - switch (uO1) { - case 0: - // reg_reg - sprintf(tmp,"\tr%d, r%d", (op>>12)&0x0f, (op)&0x0f); - shifter=true; - break; - case 1: - // reg_imm - sprintf(tmp,"\tr%d, %04X", (op>>16)&0x0f, (op)&0xffff); - break; - default: - shifter=true; - sprintf(tmp, " 0x%0X", uO1); - } - strcat(disbuf, tmp); - char* ShiftOpStr[]={"LSL","LSR","ASR","ROR"}; - u32 shiftop=(op>>5)&0x3; - u32 shiftoptype=(op>>4)&0x1; - u32 shiftopreg=(op>>8)&0xf; - u32 shiftopimm=(op>>7)&0x1f; - if (shifter) { - if (!shiftop && !shiftoptype && !shiftopimm) - { - //nothing - } else { - if ((shiftop==1) || (shiftop==2)) if (!shiftoptype) if (!shiftopimm) shiftopimm=32; - sprintf(tmp, " ,%s %s%d", ShiftOpStr[shiftop], (shiftoptype)?" r":" #", (shiftoptype)?shiftopreg:shiftopimm); - strcat(disbuf, tmp); - } - } - } - } - - - - - - - -}; - diff --git a/core/arm_emitter/arm_emitter.h b/core/arm_emitter/arm_emitter.h deleted file mode 100644 index 636cedf3d..000000000 --- a/core/arm_emitter/arm_emitter.h +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Emitter.h - * - * ARMv7 ISA Emitter for code generation. - * - * David Miller, 2011. - */ -#pragma once - - -#include "arm_coding.h" -#include "arm_registers.h" -//#include "arm_disasm.h" - - -namespace ARM -{ - - /* - * Emitter - Reserved for use w/ static members .. - * - * - */ - - class Emitter - { - - }; - - - - - - -#if defined(_DEBUG) || defined(DEBUG) - - #define EAPI static void - - #define DECL_I \ - u32 Instruction=0 - - #define DECL_Id(d) \ - u32 Instruction=(d) - -#else - -// #define _inlineExSVoidA __extension__ static __inline void __attribute__ ((__always_inline__)) - - #define EAPI \ - inline static void - - #define DECL_I \ - static u32 Instruction; \ - Instruction=0 - - #define DECL_Id(d) \ - static u32 Instruction; \ - Instruction=(d) - -#endif - - - /* - * TEMP - */ - - -#define I (Instruction) - -#define SET_CC I |= (((u32)CC&15)<<28) - -#ifndef EMIT_I -#define EMIT_I emit_Write32((I)); -#endif - -#ifndef EMIT_GET_PTR -#define EMIT_GET_PTR() emit_GetCCPtr() -#endif - -}; - - - - - - -/* - * ARM Core Instructions - */ - -#include "E_Branches.h" -#include "E_DataOp.h" -#include "E_Multiply.h" -#include "E_Parallel.h" -#include "E_Extend.h" -#include "E_Misc.h" -#include "E_Status.h" -#include "E_LoadStore.h" -#include "E_Special.h" - - -/* - * ARM VFP/A.SIMD Extension Instructions - */ - -#include "E_VLoadStore.h" -#include "E_VRegXfer.h" -#include "E_VDataOp.h" - - -/* - * Helper Routines & Psuedo-Instructions - */ - -#include "H_psuedo.h" - -#include "H_Branches.h" -#include "H_LoadStore.h" - -//#include "H_state.h" -//#include "H_fp.h" - - - - - - - diff --git a/core/arm_emitter/arm_registers.h b/core/arm_emitter/arm_registers.h deleted file mode 100644 index 5dc993a5d..000000000 --- a/core/arm_emitter/arm_registers.h +++ /dev/null @@ -1,487 +0,0 @@ -/* - * registers.h - * - * ARMv7-A system register(s). - */ -#pragma once - -namespace ARM -{ - -/************************************************************************************************* - * CP15 Registers for VMSA Implementation. [ref.DDI0406B B3.12] - *************************************************************************************************/ - /* - [NAME,] Description [Flags] - c0 { - 0 c0 { - 0 MIDR, Main ID RO - 1 CTR, Cache Type RO - 2 TCMTR, TCM Type RO, IMPL.DEFINED - 3 TLBTR, TLB Type RO, IMPL.DEFINED - 5 MPIDR, Multiprocessor Affinity RO - {4,6,7} MIDR$, Main ID Aliases RO - } - - 0 c[1-7] [0-7] CPUID ID_{PFRn,DFRn,AFR0,MMFRn,ISARn} RO - - 1 c0 0 CCSIDR, Cache Size ID RO - 1 c0 1 CLIDR, Cache Level ID RO - 1 c0 7 AIDR, Aux ID RO, IMPL.DEFINED - - 2 c0 0 CSSELR, Cache Size Selection RW - } - - c1 0 c0 [0-2] System Control RW - c1 0 c1 [0-2] Security Extension RW, IMPL.OPTIONAL - - c2 0 c0 [0-2] Translation Table Base RW - - c3 0 c0 0 DACR, Domain Access Control RW - - c5 0 c{0,1} {0,1} Fault Status RW - - c6 0 c0 {0,2] Fault Address RW - - c7 0 { - c0 4 NOP WO - c1 {0,6} Cache Maintenance operations, Multiprocessing Extensions WO - c4 0 PAR, Physical Address RW - c5 {0,1,6,7} Cache and branch predictor maintenance operations WO - c5 4 CP15ISB, Instruction barrier operation WO, USER - c6 {1,2} Cache Maintenance operations WO - c8 [0-7] VA to PA translation ops. WO - c10 {1,2} Cache management ops. WO - c10 {4,5} Data barrier ops. WO, USER - c11 1 DCCMVAU, Cache barrier ops. WO - c13 1 NOP WO - c14 {1,2} Cache management ops. WO - } - - c8 0 c{3,5,6,7} [0-3] TLB maintenance ops. * WO - - c9 [0-7] c{0,2,5,8} [0-7] Reserved for Branch Predictor, Cache and TCM ops. RSVD, OP.ACCESS - c9 [0-7] c[12-15] [0-7] Reserved for Performance monitors. RSVD, OP.ACCESS - - c10 0 c{0,1,4,8} [0-7] Reserved for TLB lockdown ops. RSVD, OP.ACCESS - c10 0 c2 {0,1} PRRR, NMRRR, TEX Remap RW - - c11 [0-7] c{0,8,15} [0-7] Reserved for DMA ops. TCM access. RSVD, OP.ACCESS - - c12 0 c0 {0,1} Security Extensions RW, IMPL.OPTIONAL - c12 0 c1 0 ISR, Security Extensions RO, IMPL.OPTIONAL - - c13 0 c0 0 FCSEIDR, FCSE PID RO-if-FCSE-!IMPL / RW? - c13 0 c0 [1-4] Software Thread and Context ID RW - - c15 * * * IMPLEMENTATION DEFINED IMPL.DEFINED - */ - - - -/************************************************************************************************* - * CP15 c0: ID codes registers - *************************************************************************************************/ - - - /* - * MIDR: Main ID Register - */ - struct MIDR - { - u32 Revision : 4; - u32 PriPartNum : 12; // IF Impl:ARM && PriPartNo top 4bits are 0 || 7: arch&variant encoding differs - u32 Architecture : 4; - u32 Variant : 4; - u32 Implementer : 8; - }; - - enum MIDR_Implementer - { - ARM_Ltd = 0x41, // 'A' - DigitalEquipment_Corp = 0x44, // 'D' - Motorola_FreescaleSemi_Inc = 0x4D, // 'M' - QUALCOMM_Inc = 0x51, // 'Q' - MarvellSemi_Inc = 0x56, // 'V' - Intel_Corp = 0x69, // 'i' - - TexasInstruments_Inc = 0xFF // 'T' ??? - }; - - enum MIDR_Arch - { - ARMv4 = 1, - ARMv4T = 2, - ARMv5 = 3, // obselete - ARMv5T = 4, - ARMv5TE = 5, - ARMv5TEJ = 6, - ARMv6 = 7, - CPUID_Defined = 15 - }; - - - - - /* - * CTR, Cache Type Register - */ - struct CTR - { - u32 IminLine : 4; - u32 SBZ : 10; - u32 L1Ip : 2; - u32 DminLine : 4; // - u32 ERG : 4; // Exclusives Reservation Granule. - u32 CWG : 4; // Cache Writeback Granule. - u32 RAZ : 1; - u32 REGFMT : 3; // Set to 0b100 for ARMv7 register format, or 0b000 for <=ARMv6 format - }; - - - - - /* - * TCMTR, TCM Type Register - */ - // High 3 bits is 0b100, the rest is IMPL.DEFINED - typedef u32 TCMTR; - - - /* - * TLBTR, TLB Type Register - */ - // Low bit is nU : SET:1: Not unified ( separate instruction and data TLBs ) - typedef u32 TLBTR; - - - /* - * MPIDR, Multiprocessor Affinity Register - */ - - struct MPIDR - { - u32 AffinityLevel0 : 8; - u32 AffinityLevel1 : 8; - u32 AffinityLevel2 : 8; - u32 MT : 1; - u32 RAZ : 5; // Reserved As Zero - u32 U : 1; // Set: Processor is part of a Uniprocessor system. - u32 MP_Impl : 1; // RAO if MP Extensions are implemented. - }; - - - /* - * CCSIDR, Cache Size ID Registers - */ - struct CCSIDR - { - u32 LineSize : 3; - u32 Associativity : 10; - u32 NumSets : 15; - u32 WA : 1; - u32 RA : 1; - u32 WB : 1; - u32 WT : 1; - }; - - - - /* - * CLIDR, Cache Level ID Register - */ - struct CLIDR - { - u32 Ctype1 : 3; - u32 Ctype2 : 3; - u32 Ctype3 : 3; - u32 Ctype4 : 3; - u32 Ctype5 : 3; - u32 Ctype6 : 3; - u32 Ctype7 : 3; - u32 LoUIS : 3; - u32 LoC : 3; - u32 LoUU : 3; - u32 RAZ : 2; // RAZ - }; - - - /* - * AIDR, Auxiliary ID Register. - */ - typedef u32 AIDR; // IMPLEMENTATION DEFINED - - - - /* - * CSSELR, Cache Size Selection Register - */ - struct CSSELR - { - u32 InD : 1; - u32 Level : 3; - u32 SBZP : 28; - }; - - - - - - - - - - - -/************************************************************************************************* - * CP15 c1: System control registers - *************************************************************************************************/ - - - - - // SCTRL, ACTLR ////////////////////// TODO /////////////////////// - - - - - /* - * CPACR: Coprocessor Access Control Register. - * - * Controls access to all coprocessors other than CP14 & CP15. - * It may be used to check for their presence by testing modification to cpN bits. - * - * Notes: - * - * D32DIS:1 && ASEDIS:0 is INVALID - * ASEDIS on hw { w. VFP & w.o A.SIMD } is RAO/WI, if bit is not supported it is RAZ/WI. - * - * When Security Extensions are enabled, NSACR controls CP access from non-secure state. - * - * VFP uses CP10 && CP11, the values of .cp10 && .cp11 should be the same. - */ - union CPACR - { - struct { - u32 cp0 : 2; // cpN [0-13]: - u32 cp1 : 2; // Defines access rights for individual coprocessors. - u32 cp2 : 2; // See CP_Access enum below for possible values; - u32 cp3 : 2; // - u32 cp4 : 2; // To test - u32 cp5 : 2; - u32 cp6 : 2; - u32 cp7 : 2; - u32 cp8 : 2; - u32 cp9 : 2; - u32 cp10 : 2; - u32 cp11 : 2; - u32 cp12 : 2; - u32 cp13 : 2; - u32 rsvcd: 2; // SBZP - u32 D32DIS:1; // SET: Disables use of D16-D32 of the VFP register file. - u32 ASEDIS:1; // SET: Disables all A.SIMD Instructions, VFPv3 shall remain valid. - }; - - u32 R; - }; - - /* - * CP_Access: Enumerates access rights for CPACR.cpN - * - */ - enum CP_Access - { - A_Deny, // Deny Access, Attempts to access cause Exception::Undefined_Instruction - A_Privileged, // Privileged Access, Attempts to access cause Exception::Undefined_Instruction in User mode. - A_Reserved, // Reserved Value, Use of this value is UNPREDICTABLE. - A_Full // Full Access, Access is defined by coprocessor. - }; - - - - - - - /* - * SCR: Secure Configuration Register. - * - * Requires: Security Extension. - */ - union SCR - { - struct { - u32 NS : 1; // - u32 IRQ : 1; // - u32 FIQ : 1; // - u32 EA : 1; // - u32 FW : 1; // - u32 AW : 1; // - u32 nET : 1; // - u32 SBZP:25; - }; - - u32 R; - }; - - - - - // SDER, Secure Debug Enable Register - - // NSACR, Non-Secure Access Control Register - - - - - - - - - - - - -/************************************************************************************************* - * CP15 c{2,3}: Memory protection and control registers - *************************************************************************************************/ - - - // TTBR0 TTVR1 TTVCR - // DACR, Domain Access Control Register - - - -/************************************************************************************************* - * CP15 c4: Not used - *************************************************************************************************/ - - - -/************************************************************************************************* - * CP15 c{5,6}: Memory system fault registers - *************************************************************************************************/ - - // DFSR, Data Fault Status Register - // IFSR, Instruction Fault Status Register - // ADFSR, Aux. DFSR - // AIFSR, Aux. IFSR - // DFAR, Data Fault Address Register - // IFAR, Instruction Fault Address Register - - - - -/************************************************************************************************* - * CP15 c7: Cache maintenance / misc - *************************************************************************************************/ - - - - - - -/************************************************************************************************* - ************************************************************************************************* - * A.SIMD and VFP extension system registers - ************************************************************************************************* - *************************************************************************************************/ - - enum FP_SysRegs - { - R_FPSID = 0, // 0b0000 - R_MVFR1 = 6, // 0b0110 - R_MVFR0 = 7, // 0b0111 - }; - - - // FPSID Floating Point System ID Register - // MVFR1 Media and VFP Feature Register 1 - // MVFR0 Media and VFP Feature Register 0 - - struct FPSID - { - u32 Revision : 4; // IMPL.DEFINED - u32 Variant : 4; // IMPL.DEFINED - u32 PartNumber : 8; // IMPL.DEFINED - u32 SubArch : 7; // MSB:1 when designed by ARM - u32 SW : 1; // Is a software impl. if set - u32 Implementer : 8; // Same as MIDR.Implementer - }; - - enum FP_SubArch - { - VFPv1 = 0, // Not Permitted in ARMv7 - VFPv2_Cv1, // Not Permitted in ARMv7 - VFPv3_Cv2, // - VFPv3_Null, // Full hardware, no trap - VFPv3_Cv3, // - }; - - - - // Floating-point status and control register - // - struct FPSCR - { - u32 IOC : 1; // * All * bits are cumulative exception bits - u32 DZC : 1; // * - u32 OFC : 1; // * - u32 UFC : 1; // * - u32 IXC : 1; // * - u32 SBZP1 : 2; // - u32 IDC : 1; // * - u32 IOE : 1; // ** All ** bits are FP trap enable bits - u32 DZE : 1; // ** only supported in VFPv2 && VFPv3U - u32 OFE : 1; // ** - RAZ elsewhere - - u32 UFE : 1; // ** - u32 IXE : 1; // ** - u32 SBZP2 : 2; // - u32 IDE : 1; // ** - u32 Len : 3; // SBZ for ARMv7 VFP, ignored for A.SIMD - u32 SBZP3 : 1; // - u32 Stride : 2; // SBZ for ARMv7 VFP, ignored for A.SIMD - u32 RMode : 2; // Rounding Mode - u32 FZ : 1; // Flush-to-Zero - u32 DN : 1; // Default NaN mode control - u32 AHP : 1; // Alt. Half-precision - u32 QC : 1; // Cumulative saturation, A.SIMD - u32 V : 1; // CC Overflow - u32 C : 1; // CC Carry - u32 Z : 1; // CC Zero - u32 N : 1; // CC Negative - }; - - enum FP_RoundingMode // A.SIMD Always uses RN ! - { - RN, // Round to Nearest - RP, // Round towards Plus Infinity - RM, // Round towards Minus Infinity - RZ // Round towards Zero - }; - - - struct MVFR0 - { - u32 A_SIMD : 4; - u32 Single : 4; - u32 Double : 4; - u32 Trap : 4; - u32 Divide : 4; - u32 Sqrt : 4; - u32 ShortVec: 4; - u32 Rounding: 4; - }; - - struct MVFR1 - { - u32 FtZ_mode : 4; - u32 D_NaN_mode : 4; - u32 NFP_LdStr : 4; - u32 NFP_int : 4; - u32 NFP_SPFP : 4; - u32 NFP_HPFP : 4; - u32 VFP_HPFP : 4; - u32 RAZ : 4; - - }; -}; \ No newline at end of file diff --git a/core/core.mk b/core/core.mk index 0f21dc347..b446023fa 100755 --- a/core/core.mk +++ b/core/core.mk @@ -4,29 +4,32 @@ RZDCY_SRC_DIR ?= $(call my-dir) VERSION_HEADER := $(RZDCY_SRC_DIR)/version.h RZDCY_MODULES := cfg/ hw/arm7/ hw/aica/ hw/holly/ hw/ hw/gdrom/ hw/maple/ \ - hw/mem/ hw/pvr/ hw/sh4/ hw/sh4/interpr/ hw/sh4/modules/ plugins/ profiler/ oslib/ \ - hw/extdev/ hw/arm/ hw/naomi/ imgread/ ./ deps/zlib/ deps/chdr/ deps/crypto/ \ - deps/libelf/ deps/chdpsr/ arm_emitter/ rend/ reios/ deps/xbrz/ \ - deps/imgui/ archive/ input/ log/ wsi/ network/ hw/bba/ debug/ + hw/mem/ hw/pvr/ hw/sh4/ hw/sh4/interpr/ hw/sh4/modules/ profiler/ oslib/ \ + hw/naomi/ imgread/ ./ deps/zlib/ deps/chdr/ \ + deps/libelf/ deps/chdpsr/ rend/ reios/ deps/xbrz/ \ + deps/imgui/ archive/ input/ log/ wsi/ network/ hw/bba/ debug/ \ + hw/modem/ deps/picotcp/modules/ deps/picotcp/stack/ -ifndef NOT_ARM - RZDCY_MODULES += rec-ARM/ -endif - -ifdef X86_REC - RZDCY_MODULES += rec-x86/ -endif - -ifdef X64_REC - RZDCY_MODULES += rec-x64/ -endif - -ifdef CPP_REC - RZDCY_MODULES += rec-cpp/ -endif - -ifdef ARM64_REC - RZDCY_MODULES += rec-ARM64/ deps/vixl/ deps/vixl/aarch64/ +ifndef NO_REC + ifndef NOT_ARM + RZDCY_MODULES += rec-ARM/ deps/vixl/ deps/vixl/aarch32/ + endif + + ifdef X86_REC + RZDCY_MODULES += rec-x86/ + endif + + ifdef X64_REC + RZDCY_MODULES += rec-x64/ + endif + + ifdef CPP_REC + RZDCY_MODULES += rec-cpp/ + endif + + ifdef ARM64_REC + RZDCY_MODULES += rec-ARM64/ deps/vixl/ deps/vixl/aarch64/ + endif endif ifndef NO_REND @@ -83,10 +86,9 @@ endif RZDCY_CFLAGS += -I$(RZDCY_SRC_DIR) -I$(RZDCY_SRC_DIR)/rend/gles -I$(RZDCY_SRC_DIR)/deps \ -I$(RZDCY_SRC_DIR)/deps/vixl -I$(RZDCY_SRC_DIR)/khronos -I$(RZDCY_SRC_DIR)/deps/glslang \ - -I$(RZDCY_SRC_DIR)/deps/glm -I$(RZDCY_SRC_DIR)/deps/xbyak -I$(RZDCY_SRC_DIR)/deps/nowide/include + -I$(RZDCY_SRC_DIR)/deps/glm -I$(RZDCY_SRC_DIR)/deps/xbyak -I$(RZDCY_SRC_DIR)/deps/nowide/include \ + -I$(RZDCY_SRC_DIR)/deps/picotcp/include -I$(RZDCY_SRC_DIR)/deps/picotcp/modules -RZDCY_CFLAGS += -I$(RZDCY_SRC_DIR)/deps/picotcp/include -I$(RZDCY_SRC_DIR)/deps/picotcp/modules -RZDCY_MODULES += hw/modem/ deps/picotcp/modules/ deps/picotcp/stack/ ifdef USE_SYSTEM_MINIUPNPC RZDCY_CFLAGS += -I/usr/include/miniupnpc else @@ -118,8 +120,6 @@ endif RZDCY_CFLAGS += -DZ_HAVE_UNISTD_H -I$(RZDCY_SRC_DIR)/deps/zlib RZDCY_CFLAGS += -DXXH_INLINE_ALL -I$(RZDCY_SRC_DIR)/deps/xxHash -I$(RZDCY_SRC_DIR)/deps/stb -RZDCY_CXXFLAGS := $(RZDCY_CFLAGS) -fno-rtti -std=c++11 - RZDCY_FILES += $(foreach dir,$(addprefix $(RZDCY_SRC_DIR)/,$(RZDCY_MODULES)),$(wildcard $(dir)*.cpp)) RZDCY_FILES += $(foreach dir,$(addprefix $(RZDCY_SRC_DIR)/,$(RZDCY_MODULES)),$(wildcard $(dir)*.cc)) RZDCY_FILES += $(foreach dir,$(addprefix $(RZDCY_SRC_DIR)/,$(RZDCY_MODULES)),$(wildcard $(dir)*.c)) diff --git a/core/deps/vixl/code-generation-scopes-vixl.h b/core/deps/vixl/code-generation-scopes-vixl.h index b7ea2d92b..5920e240f 100644 --- a/core/deps/vixl/code-generation-scopes-vixl.h +++ b/core/deps/vixl/code-generation-scopes-vixl.h @@ -89,7 +89,7 @@ class CodeBufferCheckScope { VIXL_ASSERT(!initialised_); VIXL_ASSERT(assembler != NULL); assembler_ = assembler; - if (check_policy == kReserveBufferSpace) { + if (check_policy == kReserveBufferSpace && assembler->GetBuffer()->IsManaged()) { assembler->GetBuffer()->EnsureSpaceFor(size); } #ifdef VIXL_DEBUG diff --git a/core/deps/vixl/platform-vixl.h b/core/deps/vixl/platform-vixl.h index 65f90ee3c..ca0560ee5 100644 --- a/core/deps/vixl/platform-vixl.h +++ b/core/deps/vixl/platform-vixl.h @@ -32,6 +32,7 @@ extern "C" { #include } +#define VIXL_INCLUDE_TARGET_A32 #define VIXL_CODE_BUFFER_MALLOC //#define VIXL_DEBUG diff --git a/core/hw/arm7/arm7_rec_arm32.cpp b/core/hw/arm7/arm7_rec_arm32.cpp index 52d23729e..90dcc17b9 100644 --- a/core/hw/arm7/arm7_rec_arm32.cpp +++ b/core/hw/arm7/arm7_rec_arm32.cpp @@ -23,31 +23,39 @@ #include "arm7_rec.h" #include "hw/mem/_vmem.h" -#define _DEVEL 1 -#define EMIT_I aicaarm::armEmit32(I) -#define EMIT_GET_PTR() aicaarm::recompiler::currentCode() -namespace aicaarm { - static void armEmit32(u32 emit32); -} -#include "arm_emitter/arm_emitter.h" -#undef I -using namespace ARM; +#include +using namespace vixl::aarch32; namespace aicaarm { +class Arm32Assembler : public MacroAssembler +{ +public: + Arm32Assembler() = default; + Arm32Assembler(u8 *buffer, size_t size) : MacroAssembler(buffer, size, A32) {} + + void Finalize() { + FinalizeCode(); + vmem_platform_flush_cache(GetBuffer()->GetStartAddress(), GetCursorAddress() - 1, + GetBuffer()->GetStartAddress(), GetCursorAddress() - 1); + } +}; + +static Arm32Assembler ass; + static void (*arm_dispatch)(); -static void loadReg(eReg host_reg, Arm7Reg guest_reg, ArmOp::Condition cc = ArmOp::AL) +static void loadReg(Register host_reg, Arm7Reg guest_reg, ConditionType cc = al) { - LDR(host_reg, r8, (u8*)&arm_Reg[guest_reg].I - (u8*)&arm_Reg[0].I, ARM::Offset, (ARM::ConditionCode)cc); + ass.Ldr(cc, host_reg, MemOperand(r8, (u8*)&arm_Reg[guest_reg].I - (u8*)&arm_Reg[0].I)); } -static void storeReg(eReg host_reg, Arm7Reg guest_reg, ArmOp::Condition cc = ArmOp::AL) +static void storeReg(Register host_reg, Arm7Reg guest_reg, ConditionType cc = al) { - STR(host_reg, r8, (u8*)&arm_Reg[guest_reg].I - (u8*)&arm_Reg[0].I, ARM::Offset, (ARM::ConditionCode)cc); + ass.Str(cc, host_reg, MemOperand(r8, (u8*)&arm_Reg[guest_reg].I - (u8*)&arm_Reg[0].I)); } -static const std::array alloc_regs{ +const std::array alloc_regs{ r5, r6, r7, r9, r10, r11 }; @@ -57,19 +65,16 @@ class Arm32ArmRegAlloc : public ArmRegAlloc void LoadReg(int host_reg, Arm7Reg armreg, ArmOp::Condition cc = ArmOp::AL) { - // printf("LoadReg R%d <- r%d\n", host_reg, armreg); - loadReg(getReg(host_reg), armreg, cc); + loadReg(getReg(host_reg), armreg, (ConditionType)cc); } void StoreReg(int host_reg, Arm7Reg armreg, ArmOp::Condition cc = ArmOp::AL) { - // printf("StoreReg R%d -> r%d\n", host_reg, armreg); - storeReg(getReg(host_reg), armreg, cc); + storeReg(getReg(host_reg), armreg, (ConditionType)cc); } - static eReg getReg(int i) + static Register getReg(int i) { - verify(i >= 0 && (u32)i < alloc_regs.size()); return alloc_regs[i]; } @@ -77,7 +82,7 @@ public: Arm32ArmRegAlloc(const std::vector& block_ops) : super(block_ops) {} - eReg map(Arm7Reg r) + Register map(Arm7Reg r) { int i = super::map(r); return getReg(i); @@ -86,18 +91,6 @@ public: friend super; }; -static void armEmit32(u32 emit32) -{ - if (recompiler::spaceLeft() <= 1024) - { - ERROR_LOG(AICA_ARM, "JIT buffer full: %d bytes free", recompiler::spaceLeft()); - die("AICA ARM code buffer full"); - } - - *(u32 *)recompiler::currentCode() = emit32; - recompiler::advance(4); -} - static Arm32ArmRegAlloc *regalloc; static void loadFlags() @@ -105,158 +98,117 @@ static void loadFlags() //Load flags loadReg(r3, RN_PSR_FLAGS); //move them to flags register - MSR(0, 8, r3); + ass.Msr(APSR_nzcvq, r3); } static void storeFlags() { //get results from flags register - MRS(r3, 0); + ass.Mrs(r3, APSR); //Store flags storeReg(r3, RN_PSR_FLAGS); } -static u32 *startConditional(ArmOp::Condition cc) +static Label *startConditional(ArmOp::Condition cc) { if (cc == ArmOp::AL) return nullptr; - verify(cc <= ArmOp::LE); - ARM::ConditionCode condition = (ARM::ConditionCode)((u32)cc ^ 1); - u32 *code = (u32 *)recompiler::currentCode(); - JUMP((u32)code, condition); + ConditionType condition = (ConditionType)((u32)cc ^ 1); + Label *label = new Label(); + ass.B(condition, label); - return code; + return label; } -static void endConditional(u32 *pos) +static void endConditional(Label *label) { - if (pos != nullptr) + if (label != nullptr) { - u32 *curpos = (u32 *)recompiler::currentCode(); - ARM::ConditionCode condition = (ARM::ConditionCode)(*pos >> 28); - recompiler::icPtr = (u8 *)pos; - JUMP((u32)curpos, condition); - recompiler::icPtr = (u8 *)curpos; + ass.Bind(label); + delete label; } } -static eReg getOperand(ArmOp::Operand arg, eReg scratch_reg) +static Operand getOperand(const ArmOp::Operand& arg) { + Register reg; if (arg.isNone()) - return (eReg)-1; - else if (arg.isImmediate()) - { - if (is_i8r4(arg.getImmediate())) - MOV(scratch_reg, arg.getImmediate()); - else - MOV32(scratch_reg, arg.getImmediate()); - } - else if (arg.isReg()) + return reg; + if (arg.isImmediate()) { if (!arg.isShifted()) - return regalloc->map(arg.getReg().armreg); - MOV(scratch_reg, regalloc->map(arg.getReg().armreg)); + return Operand(arg.getImmediate()); + // Used by pc-rel ops: pc is immediate but can be shifted by reg (or even imm if op sets flags) + ass.Mov(r1, arg.getImmediate()); + reg = r1; + } + else if (arg.isReg()) + reg = regalloc->map(arg.getReg().armreg); + + if (arg.isShifted()) + { + if (!arg.shift_imm) + { + // Shift by register + Register shift_reg = regalloc->map(arg.shift_reg.armreg); + return Operand(reg, (ShiftType)arg.shift_type, shift_reg); + } + else + { + // Shift by immediate + if (arg.shift_value != 0 || arg.shift_type != ArmOp::LSL) // LSL 0 is a no-op + { + if (arg.shift_value == 0 && arg.shift_type == ArmOp::ROR) + return Operand(reg, RRX); + else + { + u32 shiftValue = arg.shift_value; + if (shiftValue == 0 && (arg.shift_type == ArmOp::LSR || arg.shift_type == ArmOp::ASR)) + shiftValue = 32; + return Operand(reg, (ShiftType)arg.shift_type, shiftValue); + } + } + } } - if (!arg.shift_imm) - { - // Shift by register - eReg shift_reg = regalloc->map(arg.shift_reg.armreg); - MOV(scratch_reg, scratch_reg, (ARM::ShiftOp)arg.shift_type, shift_reg); - } - else - { - // Shift by immediate - if (arg.shift_value != 0 || arg.shift_type != ArmOp::LSL) // LSL 0 is a no-op - MOV(scratch_reg, scratch_reg, (ARM::ShiftOp)arg.shift_type, arg.shift_value); - } + return reg; +} + +static Register loadOperand(const ArmOp::Operand& arg, Register scratch_reg) +{ + Operand operand = getOperand(arg); + if (operand.IsPlainRegister()) + return operand.GetBaseRegister(); + ass.Mov(scratch_reg, operand); return scratch_reg; } -template +template void emit3ArgOp(const ArmOp& op) { - eReg rn; - const ArmOp::Operand *op2; - if (op.op_type != ArmOp::MOV && op.op_type != ArmOp::MVN) - { - rn = getOperand(op.arg[0], r2); - op2 = &op.arg[1]; - } - else - op2 = &op.arg[0]; - - eReg rd = regalloc->map(op.rd.getReg().armreg); - bool set_flags = op.flags & ArmOp::OP_SETS_FLAGS; - eReg rm; - if (op2->isImmediate()) - { - if (is_i8r4(op2->getImmediate()) && op2->shift_imm) - { - OpImmediate(rd, rn, op2->getImmediate(), set_flags, CC_AL); - return; - } - MOV32(r0, op2->getImmediate()); - rm = r0; - } - else if (op2->isReg()) - rm = regalloc->map(op2->getReg().armreg); - - if (op2->shift_imm) - OpShiftImm(rd, rn, rm, (ShiftOp)op2->shift_type, op2->shift_value, set_flags, CC_AL); - else - { - // Shift by reg - eReg shift_reg = regalloc->map(op2->shift_reg.armreg); - OpShiftReg(rd, rn, rm, (ShiftOp)op2->shift_type, shift_reg, set_flags, CC_AL); - } + Register rd = regalloc->map(op.rd.getReg().armreg); + Register rn = loadOperand(op.arg[0], r2); + Operand operand = getOperand(op.arg[1]); + (ass.*Op)((FlagsUpdate)set_flags, al, rd, rn, operand); } -template +template void emit2ArgOp(const ArmOp& op) { - // Used for rd (MOV, MVN) and rn (CMP, TST, ...) - eReg rd; - const ArmOp::Operand *op2; - if (op.op_type != ArmOp::MOV && op.op_type != ArmOp::MVN) - { - rd = getOperand(op.arg[0], r2); - op2 = &op.arg[1]; - } - else { - op2 = &op.arg[0]; - rd = regalloc->map(op.rd.getReg().armreg); - } - bool set_flags = op.flags & ArmOp::OP_SETS_FLAGS; - eReg rm; - if (op2->isImmediate()) - { - if (is_i8r4(op2->getImmediate()) && op2->shift_imm) - { - OpImmediate(rd, op2->getImmediate(), set_flags, CC_AL); - return; - } - MOV32(r0, op2->getImmediate()); - rm = r0; - } - else if (op2->isReg()) - rm = regalloc->map(op2->getReg().armreg); + Register rd = regalloc->map(op.rd.getReg().armreg); + Operand operand = getOperand(op.arg[0]); + (ass.*Op)((FlagsUpdate)set_flags, al, rd, operand); +} - if (op2->shift_imm) - OpShiftImm(rd, rm, (ShiftOp)op2->shift_type, op2->shift_value, set_flags, CC_AL); - else - { - // Shift by reg - eReg shift_reg = regalloc->map(op2->shift_reg.armreg); - OpShiftReg(rd, rm, (ShiftOp)op2->shift_type, shift_reg, set_flags, CC_AL); - } +template +void emitTestOp(const ArmOp& op) +{ + Register rn = loadOperand(op.arg[0], r2); + Operand operand = getOperand(op.arg[1]); + (ass.*Op)(al, rn, operand); } static void emitDataProcOp(const ArmOp& op) @@ -264,52 +216,52 @@ static void emitDataProcOp(const ArmOp& op) switch (op.op_type) { case ArmOp::AND: - emit3ArgOp<&AND, &AND, &AND>(op); + emit3ArgOp<&MacroAssembler::And>(op); break; case ArmOp::EOR: - emit3ArgOp<&EOR, &EOR, &EOR>(op); + emit3ArgOp<&MacroAssembler::Eor>(op); break; case ArmOp::SUB: - emit3ArgOp<&SUB, &SUB, &SUB>(op); + emit3ArgOp<&MacroAssembler::Sub>(op); break; case ArmOp::RSB: - emit3ArgOp<&RSB, &RSB, &RSB>(op); + emit3ArgOp<&MacroAssembler::Rsb>(op); break; case ArmOp::ADD: - emit3ArgOp<&ADD, &ADD, &ADD>(op); + emit3ArgOp<&MacroAssembler::Add>(op); break; case ArmOp::ORR: - emit3ArgOp<&ORR, &ORR, &ORR>(op); + emit3ArgOp<&MacroAssembler::Orr>(op); break; case ArmOp::BIC: - emit3ArgOp<&BIC, &BIC, &BIC>(op); + emit3ArgOp<&MacroAssembler::Bic>(op); break; case ArmOp::ADC: - emit3ArgOp<&ADC, &ADC, &ADC>(op); + emit3ArgOp<&MacroAssembler::Adc>(op); break; case ArmOp::SBC: - emit3ArgOp<&SBC, &SBC, &SBC>(op); + emit3ArgOp<&MacroAssembler::Sbc>(op); break; case ArmOp::RSC: - emit3ArgOp<&RSC, &RSC, &RSC>(op); + emit3ArgOp<&MacroAssembler::Rsc>(op); break; case ArmOp::TST: - emit2ArgOp<&TST, &TST, &TST>(op); + emitTestOp<&MacroAssembler::Tst>(op); break; case ArmOp::TEQ: - emit2ArgOp<&TEQ, &TEQ, &TEQ>(op); + emitTestOp<&MacroAssembler::Teq>(op); break; case ArmOp::CMP: - emit2ArgOp<&CMP, &CMP, &CMP>(op); + emitTestOp<&MacroAssembler::Cmp>(op); break; case ArmOp::CMN: - emit2ArgOp<&CMN, &CMN, &CMN>(op); + emitTestOp<&MacroAssembler::Cmn>(op); break; case ArmOp::MOV: - emit2ArgOp<&MOV, &MOV, &MOV>(op); + emit2ArgOp<&MacroAssembler::Mov>(op); break; case ArmOp::MVN: - emit2ArgOp<&MVN, &MVN, &MVN>(op); + emit2ArgOp<&MacroAssembler::Mvn>(op); break; default: die("invalid op"); @@ -317,85 +269,91 @@ static void emitDataProcOp(const ArmOp& op) } } -static void call(u32 addr, ARM::ConditionCode cc = ARM::CC_AL) +static void jump(const void *code) { - storeFlags(); - CALL(addr, cc); - loadFlags(); + ptrdiff_t offset = reinterpret_cast(code) - ass.GetBuffer()->GetStartAddress(); + Label code_label(offset); + ass.B(&code_label); +} + +static void call(const void *code, bool saveFlags = true) +{ + if (saveFlags) + storeFlags(); + ptrdiff_t offset = reinterpret_cast(code) - ass.GetBuffer()->GetStartAddress(); + Label code_label(offset); + ass.Bl(&code_label); + if (saveFlags) + loadFlags(); } static void emitMemOp(const ArmOp& op) { - eReg addr_reg = getOperand(op.arg[0], r2); + Register addr_reg = loadOperand(op.arg[0], r2); if (op.pre_index) { const ArmOp::Operand& offset = op.arg[1]; if (offset.isReg()) { - eReg offset_reg = getOperand(offset, r3); + Register offset_reg = loadOperand(offset, r3); if (op.add_offset) - ADD(r0, addr_reg, offset_reg); + ass.Add(r0, addr_reg, offset_reg); else - SUB(r0, addr_reg, offset_reg); + ass.Sub(r0, addr_reg, offset_reg); addr_reg = r0; } else if (offset.isImmediate() && offset.getImmediate() != 0) { - if (is_i8r4(offset.getImmediate())) + if (ImmediateA32::IsImmediateA32(offset.getImmediate())) { if (op.add_offset) - ADD(r0, addr_reg, offset.getImmediate()); + ass.Add(r0, addr_reg, offset.getImmediate()); else - SUB(r0, addr_reg, offset.getImmediate()); + ass.Sub(r0, addr_reg, offset.getImmediate()); } else { - MOV32(r0, offset.getImmediate()); + ass.Mov(r0, offset.getImmediate()); if (op.add_offset) - ADD(r0, addr_reg, r0); + ass.Add(r0, addr_reg, r0); else - SUB(r0, addr_reg, r0); + ass.Sub(r0, addr_reg, r0); } addr_reg = r0; } } - if (addr_reg != r0) - MOV(r0, addr_reg); + if (!addr_reg.Is(r0)) + ass.Mov(r0, addr_reg); if (op.op_type == ArmOp::STR) { if (op.arg[2].isImmediate()) - { - if (is_i8r4(op.arg[2].getImmediate())) - MOV(r1, op.arg[2].getImmediate()); - else - MOV32(r1, op.arg[2].getImmediate()); - } + ass.Mov(r1, op.arg[2].getImmediate()); else - MOV(r1, regalloc->map(op.arg[2].getReg().armreg)); + ass.Mov(r1, regalloc->map(op.arg[2].getReg().armreg)); } - call((u32)recompiler::getMemOp(op.op_type == ArmOp::LDR, op.byte_xfer)); + call(recompiler::getMemOp(op.op_type == ArmOp::LDR, op.byte_xfer)); if (op.op_type == ArmOp::LDR) - MOV(regalloc->map(op.rd.getReg().armreg), r0); + ass.Mov(regalloc->map(op.rd.getReg().armreg), r0); } static void emitBranch(const ArmOp& op) { if (op.arg[0].isImmediate()) - MOV32(r0, op.arg[0].getImmediate()); + ass.Mov(r0, op.arg[0].getImmediate()); else { - MOV(r0, regalloc->map(op.arg[0].getReg().armreg)); - BIC(r0, r0, 3); + ass.Mov(r0, regalloc->map(op.arg[0].getReg().armreg)); + ass.Bic(r0, r0, 3); } storeReg(r0, R15_ARM_NEXT); } static void emitMRS(const ArmOp& op) { - call((u32)CPUUpdateCPSR); + call((void *)CPUUpdateCPSR); if (op.spsr) loadReg(regalloc->map(op.rd.getReg().armreg), RN_SPSR); @@ -406,34 +364,34 @@ static void emitMRS(const ArmOp& op) static void emitMSR(const ArmOp& op) { if (op.arg[0].isImmediate()) - MOV32(r0, op.arg[0].getImmediate()); + ass.Mov(r0, op.arg[0].getImmediate()); else - MOV(r0, regalloc->map(op.arg[0].getReg().armreg)); + ass.Mov(r0, regalloc->map(op.arg[0].getReg().armreg)); if (op.spsr) - call((u32)recompiler::MSR_do<1>); + call((void *)recompiler::MSR_do<1>); else - call((u32)recompiler::MSR_do<0>); + call((void *)recompiler::MSR_do<0>); } static void emitFallback(const ArmOp& op) { //Call interpreter - MOV32(r0, op.arg[0].getImmediate()); - call((u32)recompiler::interpret); + ass.Mov(r0, op.arg[0].getImmediate()); + call((void *)recompiler::interpret); } void arm7backend_compile(const std::vector& block_ops, u32 cycles) { - void *codestart = recompiler::currentCode(); + ass = Arm32Assembler((u8 *)recompiler::currentCode(), recompiler::spaceLeft()); loadReg(r2, CYCL_CNT); - while (!is_i8r4(cycles)) + while (!ImmediateA32::IsImmediateA32(cycles)) { - SUB(r2, r2, 256); + ass.Sub(r2, r2, 256); cycles -= 256; } - SUB(r2, r2, cycles); + ass.Sub(r2, r2, cycles); storeReg(r2, CYCL_CNT); regalloc = new Arm32ArmRegAlloc(block_ops); @@ -445,10 +403,10 @@ void arm7backend_compile(const std::vector& block_ops, u32 cycles) const ArmOp& op = block_ops[i]; DEBUG_LOG(AICA_ARM, "-> %s", op.toString().c_str()); - u32 *condPos = nullptr; + Label *condLabel = nullptr; if (op.op_type != ArmOp::FALLBACK) - condPos = startConditional(op.condition); + condLabel = startConditional(op.condition); regalloc->load(i); @@ -472,14 +430,14 @@ void arm7backend_compile(const std::vector& block_ops, u32 cycles) regalloc->store(i); - endConditional(condPos); + endConditional(condLabel); } storeFlags(); - JUMP((uintptr_t)arm_dispatch); + jump((void *)arm_dispatch); - vmem_platform_flush_cache(codestart, (u8*)recompiler::currentCode() - 1, - codestart, (u8*)recompiler::currentCode() - 1); + ass.Finalize(); + recompiler::advance(ass.GetBuffer()->GetSizeInBytes()); delete regalloc; regalloc = nullptr; @@ -493,65 +451,53 @@ void arm7backend_flush() verify(arm_compilecode != nullptr); return; } - void *codestart = recompiler::currentCode(); - uintptr_t arm_exit = (uintptr_t)codestart; - uintptr_t arm_dofiq = (uintptr_t)codestart; + ass = Arm32Assembler((u8 *)recompiler::currentCode(), recompiler::spaceLeft()); + Label arm_exit; + Label arm_dofiq; // arm_mainloop: - arm_mainloop = (arm_mainloop_t)codestart; - u32 regList = (1 << r4) | (1 << r5) | (1 << r6) | (1 << r7) - | (1 << r8) | (1 << r9) | (1 << r10) | (1 << r11) | (1 << lr); - PUSH(regList); - SUB(sp, sp, 4); // 8-byte stack alignment + arm_mainloop = ass.GetCursorAddress(); + RegisterList regList = RegisterList::Union( + RegisterList(r4, r5, r6, r7), + RegisterList(r8, r9, r10, r11), + RegisterList(lr)); + ass.Push(regList); + ass.Sub(sp, sp, 4); // 8-byte stack alignment - MOV(r8, r0); // load regs - MOV(r4, r1); // load entry points + ass.Mov(r8, r0); // load regs + ass.Mov(r4, r1); // load entry points // arm_dispatch: - arm_dispatch = (void (*)())recompiler::currentCode(); + arm_dispatch = ass.GetCursorAddress(); loadReg(r3, CYCL_CNT); // load cycle counter loadReg(r0, R15_ARM_NEXT); // load Next PC loadReg(r1, INTR_PEND); // load Interrupt - CMP(r3, 0); - u8 *exit_fixup = (u8 *)recompiler::currentCode(); - JUMP(arm_exit, CC_LE); // exit if counter <= 0 - UBFX(r2, r0, 2, 21); // assuming 8 MB address space max (23 bits) - CMP(r1, 0); - u8 *dofiq_fixup = (u8 *)recompiler::currentCode(); - JUMP(arm_dofiq, CC_NE); // if interrupt pending, handle it + ass.Cmp(r3, 0); + ass.B(le, &arm_exit); // exit if counter <= 0 + ass.Ubfx(r2, r0, 2, 21); // assuming 8 MB address space max (23 bits) + ass.Cmp(r1, 0); + ass.B(ne, &arm_dofiq); // if interrupt pending, handle it - LDR(pc, r4, r2, AddrMode::Offset, true, ShiftOp::S_LSL, 2); + ass.Ldr(pc, MemOperand(r4, r2, LSL, 2)); // arm_dofiq: - arm_dofiq = (uintptr_t)recompiler::currentCode(); - // fix up - u8 *icptr_save = (u8 *)recompiler::currentCode(); - recompiler::icPtr = dofiq_fixup; - JUMP(arm_dofiq, CC_NE); - recompiler::icPtr = icptr_save; - // end fix up - CALL((uintptr_t)CPUFiq); - JUMP((uintptr_t)arm_dispatch); + ass.Bind(&arm_dofiq); + call((void *)CPUFiq, false); + jump((void *)arm_dispatch); // arm_exit: - arm_exit = (uintptr_t)recompiler::currentCode(); - // fix up - icptr_save = (u8 *)recompiler::currentCode(); - recompiler::icPtr = exit_fixup; - JUMP(arm_exit, CC_LE); - recompiler::icPtr = icptr_save; - // end fix up - ADD(sp, sp, 4); - POP(regList); - MOV(pc, lr); + ass.Bind(&arm_exit); + ass.Add(sp, sp, 4); + ass.Pop(regList); + ass.Mov(pc, lr); // arm_compilecode: - arm_compilecode = (void (*)())recompiler::currentCode(); - CALL((uintptr_t)recompiler::compile); - JUMP((uintptr_t)arm_dispatch); + arm_compilecode = ass.GetCursorAddress(); + call((void *)recompiler::compile, false); + jump((void *)arm_dispatch); - vmem_platform_flush_cache(codestart, (u8*)recompiler::currentCode() - 1, - codestart, (u8*)recompiler::currentCode() - 1); + ass.Finalize(); + recompiler::advance(ass.GetBuffer()->GetSizeInBytes()); } } diff --git a/core/hw/arm7/arm7_rec_arm64.cpp b/core/hw/arm7/arm7_rec_arm64.cpp index 7ac4aa0cc..f95f04b0c 100644 --- a/core/hw/arm7/arm7_rec_arm64.cpp +++ b/core/hw/arm7/arm7_rec_arm64.cpp @@ -112,7 +112,7 @@ class Arm7Compiler : public MacroAssembler Bl(&function_label); } - Operand getOperand(ArmOp::Operand arg, const Register& scratch_reg) + Operand getOperand(const ArmOp::Operand& arg, const Register& scratch_reg) { Register rm; if (arg.isNone()) @@ -691,13 +691,11 @@ public: void AArch64ArmRegAlloc::LoadReg(int host_reg, Arm7Reg armreg) { - // printf("LoadReg W%d <- r%d\n", host_reg, armreg); assembler.Ldr(getReg(host_reg), arm_reg_operand(armreg)); } void AArch64ArmRegAlloc::StoreReg(int host_reg, Arm7Reg armreg) { - // printf("StoreReg W%d -> r%d\n", host_reg, armreg); assembler.Str(getReg(host_reg), arm_reg_operand(armreg)); } diff --git a/core/hw/sh4/dyna/blockmanager.h b/core/hw/sh4/dyna/blockmanager.h index d5cd99ac0..67c75dc82 100644 --- a/core/hw/sh4/dyna/blockmanager.h +++ b/core/hw/sh4/dyna/blockmanager.h @@ -1,6 +1,3 @@ -/* - In case you wonder, the extern "C" stuff are for the assembly code on beagleboard/pandora -*/ #pragma once #include "types.h" @@ -79,11 +76,7 @@ struct RuntimeBlockInfo: RuntimeBlockInfo_Core void bm_WriteBlockMap(const std::string& file); - -extern "C" { -ATTR_USED DynarecCodeEntryPtr DYNACALL bm_GetCodeByVAddr(u32 addr); -} - +DynarecCodeEntryPtr DYNACALL bm_GetCodeByVAddr(u32 addr); RuntimeBlockInfoPtr bm_GetBlock(void* dynarec_code); RuntimeBlockInfoPtr bm_GetStaleBlock(void* dynarec_code); RuntimeBlockInfoPtr DYNACALL bm_GetBlock(u32 addr); diff --git a/core/hw/sh4/dyna/driver.cpp b/core/hw/sh4/dyna/driver.cpp index 27c3cf1e7..fbd301761 100644 --- a/core/hw/sh4/dyna/driver.cpp +++ b/core/hw/sh4/dyna/driver.cpp @@ -69,7 +69,7 @@ static void recSh4_Run() sh4_int_bCpuRun = true; RestoreHostRoundingMode(); - sh4_dyna_rcb=(u8*)&Sh4cntx + sizeof(Sh4cntx); + u8 *sh4_dyna_rcb = (u8 *)&Sh4cntx + sizeof(Sh4cntx); INFO_LOG(DYNAREC, "cntx // fpcb offset: %td // pc offset: %td // pc %08X", (u8*)&sh4rcb.fpcb - sh4_dyna_rcb, (u8*)&sh4rcb.cntx.pc - sh4_dyna_rcb, sh4rcb.cntx.pc); ngen_mainloop(sh4_dyna_rcb); diff --git a/core/hw/sh4/dyna/ngen.h b/core/hw/sh4/dyna/ngen.h index af3880652..4f3eaca20 100644 --- a/core/hw/sh4/dyna/ngen.h +++ b/core/hw/sh4/dyna/ngen.h @@ -66,10 +66,6 @@ extern u32* emit_ptr; extern u8* CodeCache; -#ifdef __cplusplus -extern "C" { -#endif - void emit_Write32(u32 data); void emit_Skip(u32 sz); u32 emit_FreeSpace(); @@ -99,12 +95,14 @@ void ngen_init(); //Called to compile a block void ngen_Compile(RuntimeBlockInfo* block, bool smc_checks, bool reset, bool staging, bool optimise); -//Called when blocks are reseted +//Called when blocks are reset void ngen_ResetBlocks(); //Value to be returned when the block manager failed to find a block, //should call rdv_FailedToFindBlock and then jump to the return value extern void (*ngen_FailedToFindBlock)(); -//the dynarec mainloop +// The dynarec mainloop +// cntx points right after the Sh4RCB struct, +// which corresponds to the start of the 512 MB or 4 GB virtual address space if enabled. void ngen_mainloop(void* cntx); void ngen_GetFeatures(ngen_features* dst); @@ -129,7 +127,3 @@ void ngen_CC_Call(shil_opcode* op,void* function); void ngen_CC_Finish(shil_opcode* op); RuntimeBlockInfo* ngen_AllocateBlock(); - -#ifdef __cplusplus -} -#endif diff --git a/core/hw/sh4/interpr/sh4_fpu.cpp b/core/hw/sh4/interpr/sh4_fpu.cpp index efce83d94..cede754a4 100644 --- a/core/hw/sh4/interpr/sh4_fpu.cpp +++ b/core/hw/sh4/interpr/sh4_fpu.cpp @@ -452,7 +452,6 @@ sh4op(i1111_nnn0_1111_1101) //FSRRA //1111_nnnn_0111_1101 sh4op(i1111_nnnn_0111_1101) { - // What about double precision? u32 n = GetN(op); if (fpscr.PR==0) { diff --git a/core/hw/sh4/interpr/sh4_opcodes.cpp b/core/hw/sh4/interpr/sh4_opcodes.cpp index 9b9c95950..bd6aaee5c 100644 --- a/core/hw/sh4/interpr/sh4_opcodes.cpp +++ b/core/hw/sh4/interpr/sh4_opcodes.cpp @@ -1201,7 +1201,6 @@ INLINE void DYNACALL do_sqw(u32 Dest) void DYNACALL do_sqw_mmu(u32 dst) { do_sqw(dst); } -#if HOST_CPU != CPU_ARM //yes, this micro optimization makes a difference void DYNACALL do_sqw_nommu_area_3(u32 dst, const SQBuffer *sqb) { @@ -1209,7 +1208,6 @@ void DYNACALL do_sqw_nommu_area_3(u32 dst, const SQBuffer *sqb) pmem += (dst & (RAM_SIZE_MAX - 1)) >> 5; *pmem = sqb[(dst >> 5) & 1]; } -#endif void DYNACALL do_sqw_nommu_area_3_nonvmem(u32 dst, const SQBuffer *sqb) { diff --git a/core/hw/sh4/sh4_core_regs.cpp b/core/hw/sh4/sh4_core_regs.cpp index adfa0e8c6..c488f5e67 100644 --- a/core/hw/sh4/sh4_core_regs.cpp +++ b/core/hw/sh4/sh4_core_regs.cpp @@ -9,7 +9,6 @@ Sh4RCB* p_sh4rcb; sh4_if sh4_cpu; -u8* sh4_dyna_rcb; static INLINE void ChangeGPR() { diff --git a/core/hw/sh4/sh4_if.h b/core/hw/sh4/sh4_if.h index 3c467ae6a..f89ddd033 100644 --- a/core/hw/sh4/sh4_if.h +++ b/core/hw/sh4/sh4_if.h @@ -280,9 +280,6 @@ struct alignas(32) SQBuffer { }; void DYNACALL do_sqw_mmu(u32 dst); -#if HOST_CPU == CPU_ARM -extern "C" -#endif void DYNACALL do_sqw_nommu_area_3(u32 dst, const SQBuffer *sqb); void DYNACALL do_sqw_nommu_area_3_nonvmem(u32 dst, const SQBuffer *sqb); void DYNACALL do_sqw_nommu_full(u32 dst, const SQBuffer *sqb); @@ -309,7 +306,6 @@ struct alignas(PAGE_SIZE) Sh4RCB }; extern Sh4RCB* p_sh4rcb; -extern u8* sh4_dyna_rcb; INLINE u32 sh4_sr_GetFull() { diff --git a/core/hw/sh4/sh4_interpreter.h b/core/hw/sh4/sh4_interpreter.h index 1d0959c50..aa3e5104f 100644 --- a/core/hw/sh4/sh4_interpreter.h +++ b/core/hw/sh4/sh4_interpreter.h @@ -42,12 +42,6 @@ void ExecuteDelayslot(); void ExecuteDelayslot_RTE(); #define SH4_TIMESLICE 448 // at 112 Bangai-O doesn't start. 224 is ok - // at 448 Gundam Side Story hangs on Sega copyright screen, 224 ok, 672 ok(!) - -extern "C" { int UpdateSystem(); - -ATTR_USED int UpdateSystem_INTC(); - -} +int UpdateSystem_INTC(); diff --git a/core/rec-ARM/ngen_arm.S b/core/rec-ARM/ngen_arm.S deleted file mode 100644 index 7cb46c329..000000000 --- a/core/rec-ARM/ngen_arm.S +++ /dev/null @@ -1,174 +0,0 @@ -@@ - -#include "build.h" - -.arm -.align 8 - -.equ SH4_TIMESLICE, 448 - -#if defined(__APPLE__) -#define CSYM(n) _##n -#define HIDDEN(n) -#else -#define CSYM(n) n -#define HIDDEN(n) .hidden CSYM(n) -#endif - -@@@@@@@@@@ some helpers @@@@@@@@@@ - -.global CSYM(do_sqw_nommu_area_3) -HIDDEN(do_sqw_nommu_area_3) -@r0: addr -@r1: sq_both -CSYM(do_sqw_nommu_area_3): -add r3,r1,#0x0C000000 @ get ram ptr from r1, part 1 -and r2,r0,#0x20 @ SQ# selection, isolate -ubfx r0,r0,#5,#20 @ get ram offset -add r1,r2 @ SQ# selection, add to SQ ptr -add r3,#512 @ get ram ptr from r1, part 2 -add r3,r0,lsl #5 @ ram + offset -vldm r1,{d0-d3} -vstm r3,{d0-d3} -bx lr - -#if FEAT_SHREC != DYNAREC_NONE - -@@@@@@@@@@ ngen_LinkBlock_*****_stub @@@@@@@@@@ - -.global CSYM(ngen_LinkBlock_Generic_stub) -HIDDEN(ngen_LinkBlock_Generic_stub) -CSYM(ngen_LinkBlock_Generic_stub): - - mov r1,r4 @ djump/pc -> in case we need it .. - b CSYM(ngen_LinkBlock_Shared_stub) - - -.global CSYM(ngen_LinkBlock_cond_Branch_stub) -HIDDEN(ngen_LinkBlock_cond_Branch_stub) -CSYM(ngen_LinkBlock_cond_Branch_stub): - - mov r1,#1 - b CSYM(ngen_LinkBlock_Shared_stub) - -.global CSYM(ngen_LinkBlock_cond_Next_stub) -HIDDEN(ngen_LinkBlock_cond_Next_stub) -CSYM(ngen_LinkBlock_cond_Next_stub): - - mov r1,#0 - b CSYM(ngen_LinkBlock_Shared_stub) - - -.global CSYM(ngen_LinkBlock_Shared_stub) -HIDDEN(ngen_LinkBlock_Shared_stub) -CSYM(ngen_LinkBlock_Shared_stub): - - mov r0,lr - sub r0,#4 @go before the call - bl CSYM(rdv_LinkBlock) - bx r0 - -@@@@@@@@@@ ngen_FailedToFindBlock_ @@@@@@@@@@ - - -.global CSYM(ngen_FailedToFindBlock_) -HIDDEN(ngen_FailedToFindBlock_) -CSYM(ngen_FailedToFindBlock_): - mov r0,r4 - bl CSYM(rdv_FailedToFindBlock) - bx r0 - -@@@@@@@@@@ ngen_blockcheckfail @@@@@@@@@@ - -.global CSYM(ngen_blockcheckfail) -HIDDEN(ngen_blockcheckfail) -CSYM(ngen_blockcheckfail): - bl CSYM(rdv_BlockCheckFail) - bx r0 - - -@@@@@@@@@@ ngen_mainloop @@@@@@@@@@ - -@ you can load the address of the sh4 reg struct on the mainloop init -@ using (u8*)regptr-(u8*)Sh4cntx -@ all registers are < 1024 bytes from that -@ so you can use reg+imm forms for it - - - -.global CSYM(ngen_mainloop) -HIDDEN(ngen_mainloop) -CSYM(ngen_mainloop): - -push { r4-r12,lr } - - -#if defined(__APPLE__) - mov r11, #SH4_TIMESLICE @ load cycle counter -#else - mov r9, #SH4_TIMESLICE @ load cycle counter -#endif - - mov r8, r0 @Load context - ldr r4, [r8,#-184] @load pc - - b CSYM(no_update) @Go to mainloop ! - - - @this code is here for fall-through behavior of do_iter - .global CSYM(intc_sched) - HIDDEN(intc_sched) -CSYM(intc_sched): @ next_pc _MUST_ be on ram -#if defined(__APPLE__) - add r11,r11,#SH4_TIMESLICE -#else - add r9,r9,#SH4_TIMESLICE -#endif - mov r4,lr - bl CSYM(UpdateSystem) - mov lr,r4 - cmp r0,#0 - bne CSYM(do_iter) - ldr r0,[r8,#-156] @load CpuRunning - cmp r0,#0 - bxne lr - -HIDDEN(do_iter) -CSYM(do_iter): - mov r0,r4 - bl CSYM(rdv_DoInterrupts) - mov r4,r0 - -.global CSYM(no_update) -HIDDEN(no_update) -CSYM(no_update): @ next_pc _MUST_ be on r4 *R4 NOT R0 anymore* - - ldr r0,[r8,#-156] @load CpuRunning - cmp r0,#0 - beq CSYM(cleanup) - -#if RAM_SIZE_MAX == 33554432 - sub r2,r8,#0x4100000 - ubfx r1,r4,#1,#24 @ 24+1 bits: 32 MB - @ RAM wraps around so if actual RAM size is 16MB, we won't overflow -#elif RAM_SIZE_MAX == 16777216 - sub r2,r8,#0x2100000 - ubfx r1,r4,#1,#23 @ 23+1 bits: 16 MB -#else -#error "Define RAM_SIZE_MAX" -#endif - ldr pc,[r2,r1,lsl #2] - - @bic r1,r4,#0xFF000000 - @ldr pc,[r2,r1,lsl #1] - -HIDDEN(cleanup) -CSYM(cleanup): - -pop {r4-r12,lr} -bx lr - -end_ngen_mainloop: -@@@@@@@@@@ ngen_mainloop @@@@@@@@@@ - -#endif diff --git a/core/rec-ARM/rec_arm.cpp b/core/rec-ARM/rec_arm.cpp index 32c89b326..1ee4443b3 100644 --- a/core/rec-ARM/rec_arm.cpp +++ b/core/rec-ARM/rec_arm.cpp @@ -1,7 +1,29 @@ -#include +/* + This file is part of Flycast. + + Flycast is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + Flycast is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Flycast. If not, see . +*/ #include "types.h" -#if FEAT_SHREC == DYNAREC_JIT +#if FEAT_SHREC == DYNAREC_JIT && HOST_CPU == CPU_ARM +#include +#include +#include + +#include +using namespace vixl::aarch32; + #include "hw/sh4/sh4_opcode_list.h" #include "hw/sh4/sh4_mmr.h" @@ -9,12 +31,10 @@ #include "hw/sh4/sh4_interrupts.h" #include "hw/sh4/sh4_core.h" #include "hw/sh4/dyna/ngen.h" +#include "hw/sh4/dyna/ssa_regalloc.h" #include "hw/sh4/sh4_mem.h" #include "cfg/option.h" -#define _DEVEL 1 -#include "arm_emitter/arm_emitter.h" - //#define CANONICALTEST /* @@ -40,208 +60,218 @@ r8: sh4 cntx r9: cycle counter - Callstack cache - fpu reg alloc d8:d15, single storage */ -#define FPCB_OFFSET (-(int)(FPCB_SIZE * sizeof(void*) + FPCB_PAD)) - -template -s32 rcb_noffs(T* ptr) +#ifdef __clang__ +extern "C" char *stpcpy(char *dst, char const *src) { - s32 rv = (u8*)ptr - (u8*)p_sh4rcb - sizeof(Sh4RCB); - verify(rv < 0); - - return rv; + size_t src_len = strlen(src); + return (char *)memcpy(dst, src, src_len) + src_len; } +#endif +#undef do_sqw_nommu +#define rcbOffset(x) (-sizeof(Sh4RCB) + offsetof(Sh4RCB, x)) struct DynaRBI: RuntimeBlockInfo { virtual u32 Relink(); - virtual void Relocate(void* dst) - { - - } - ARM::eReg T_reg; + virtual void Relocate(void* dst) { } + Register T_reg; }; -using namespace ARM; +using FPBinOP = void (MacroAssembler::*)(DataType, SRegister, SRegister, SRegister); +using FPUnOP = void (MacroAssembler::*)(DataType, SRegister, SRegister); +using BinaryOP = void (MacroAssembler::*)(Register, Register, const Operand&); - -// These have to be declared somewhere or linker dies -u8* ARM::emit_opt=0; -eReg ARM::reg_addr; -eReg ARM::reg_dst; -s32 ARM::imma; - - -typedef ConditionCode eCC; - -#define EmitAPI \ - inline static void - -#define lr_r14 r14 -#define rfp_r9 r9 - - -typedef void FPBinOP (eFSReg Sd, eFSReg Sn, eFSReg Sm, ConditionCode CC); -typedef void FPUnOP (eFSReg Sd, eFSReg Sm, ConditionCode CC); -typedef void BinaryOP (eReg Rd, eReg Rn, eReg Rm, ConditionCode CC); -typedef void BinaryOPImm (eReg Rd, eReg Rn, s32 sImm8, ConditionCode CC); -typedef void UnaryOP (eReg Rd, eReg Rs); - -// you pick reg, loads Base with reg addr, no reg. mapping yet ! -static void LoadSh4Reg_mem(eReg Rt, u32 Sh4_Reg, eCC CC=CC_AL) +class Arm32Assembler : public MacroAssembler { - const u32 shRegOffs = (u8*)GetRegPtr(Sh4_Reg)-sh4_dyna_rcb ; +public: + Arm32Assembler() = default; + Arm32Assembler(u8 *buffer, size_t size) : MacroAssembler(buffer, size, A32) {} - LDR(Rt, r8, shRegOffs, Offset, CC); -} - - -// you pick regs, loads Base with reg addr, no reg. mapping yet ! -// data should already exist for Rt ! -static void StoreSh4Reg_mem(eReg Rt,u32 Sh4_Reg, eCC CC=CC_AL) -{ - const u32 shRegOffs = (u8*)GetRegPtr(Sh4_Reg)-sh4_dyna_rcb ; - - STR(Rt, r8, shRegOffs, Offset, CC); -} - -//#define OLD_REGALLOC -#ifdef OLD_REGALLOC -#include "hw/sh4/dyna/regalloc.h" -#else -#include "hw/sh4/dyna/ssa_regalloc.h" -#endif - -#if defined(__APPLE__) -eReg alloc_regs[]={r5,r6,r7,r10,(eReg)-1}; -#else -eReg alloc_regs[]={r5,r6,r7,r10,r11,(eReg)-1}; -#endif -eFSReg alloc_fpu[]={f16,f17,f18,f19,f20,f21,f22,f23, - f24,f25,f26,f27,f28,f29,f30,f31,(eFSReg)-1}; - -struct arm_reg_alloc: RegAlloc -{ - virtual void Preload(u32 reg,eReg nreg) - { - verify(reg!=reg_pc_dyn); - LoadSh4Reg_mem(nreg,reg); + void Finalize() { + FinalizeCode(); + vmem_platform_flush_cache(GetBuffer()->GetStartAddress(), GetCursorAddress() - 1, + GetBuffer()->GetStartAddress(), GetCursorAddress() - 1); } - virtual void Writeback(u32 reg,eReg nreg) +}; + +static Arm32Assembler ass; +static u8 *sh4_dyna_context; + +static void loadSh4Reg(Register Rt, u32 Sh4_Reg) +{ + const int shRegOffs = (u8*)GetRegPtr(Sh4_Reg) - sh4_dyna_context; + + ass.Ldr(Rt, MemOperand(r8, shRegOffs)); +} + +static void storeSh4Reg(Register Rt, u32 Sh4_Reg) +{ + const int shRegOffs = (u8*)GetRegPtr(Sh4_Reg) - sh4_dyna_context; + + ass.Str(Rt, MemOperand(r8, shRegOffs)); +} + +const int alloc_regs[] = { 5, 6, 7, 10, 11, -1 }; +const int alloc_fpu[] = { 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, -1 }; + +struct arm_reg_alloc: RegAlloc +{ + void Preload(u32 reg, int nreg) override { - if (reg==reg_pc_dyn) + loadSh4Reg(Register(nreg), reg); + } + void Writeback(u32 reg, int nreg) override + { + if (reg == reg_pc_dyn) // reg_pc_dyn has been stored in r4 by the jdyn op implementation // No need to write it back since it won't be used past the end of the block - ; //MOV(r4,nreg); + ; //ass.Mov(r4, Register(nreg)); else - StoreSh4Reg_mem(nreg,reg); + storeSh4Reg(Register(nreg), reg); } - virtual void Preload_FPU(u32 reg,eFSReg nreg) + void Preload_FPU(u32 reg, int nreg) override { - const s32 shRegOffs = (u8*)GetRegPtr(reg)-sh4_dyna_rcb ; + const s32 shRegOffs = (u8*)GetRegPtr(reg) - sh4_dyna_context; - VLDR((nreg),r8,shRegOffs/4); + ass.Vldr(SRegister(nreg), MemOperand(r8, shRegOffs)); } - virtual void Writeback_FPU(u32 reg,eFSReg nreg) + void Writeback_FPU(u32 reg, int nreg) override { - const s32 shRegOffs = (u8*)GetRegPtr(reg)-sh4_dyna_rcb ; + const s32 shRegOffs = (u8*)GetRegPtr(reg) - sh4_dyna_context; - VSTR((nreg),r8,shRegOffs/4); + ass.Vstr(SRegister(nreg), MemOperand(r8, shRegOffs)); } - eFSReg mapfs(const shil_param& prm) + + SRegister mapFReg(const shil_param& prm) { - return mapf(prm); + return SRegister(mapf(prm)); + } + Register mapReg(const shil_param& prm) + { + return Register(mapg(prm)); } }; static arm_reg_alloc reg; -static u32 blockno=0; +static const void *no_update; +static const void *intc_sched; +static const void *ngen_blockcheckfail; +static const void *ngen_LinkBlock_Generic_stub; +static const void *ngen_LinkBlock_cond_Branch_stub; +static const void *ngen_LinkBlock_cond_Next_stub; +static void (*ngen_FailedToFindBlock_)(); +static void (*mainloop)(void *); -extern "C" void no_update(); -extern "C" void intc_sched(); -extern "C" void ngen_blockcheckfail(); +static std::map ccmap; +static std::map ccnmap; - -extern "C" void ngen_LinkBlock_Generic_stub(); -extern "C" void ngen_LinkBlock_cond_Branch_stub(); -extern "C" void ngen_LinkBlock_cond_Next_stub(); - -extern "C" void ngen_FailedToFindBlock_(); - -#include - -static std::map ccmap; -static std::map ccnmap; - -u32 DynaRBI::Relink() +void ngen_mainloop(void* context) { - verify(emit_ptr==0); - u8* code_start=(u8*)code+relink_offset; - emit_ptr=(u32*)code_start; + sh4_dyna_context = (u8 *)context; + mainloop(context); +} - switch(BlockType) +static void jump(const void *code, ConditionType cond = al) +{ + ptrdiff_t offset = reinterpret_cast(code) - ass.GetBuffer()->GetStartAddress(); + verify((offset & 3) == 0); + if (offset < -32 * 1024 * 1024 || offset >= 32 * 1024 * 1024) + { + WARN_LOG(DYNAREC, "jump offset too large: %d", offset); + UseScratchRegisterScope scope(&ass); + Register reg = scope.Acquire(); + ass.Mov(cond, reg, (u32)code); + ass.Bx(cond, reg); + } + else + { + Label code_label(offset); + ass.B(cond, &code_label); + } +} + +static void call(const void *code, ConditionType cond = al) +{ + ptrdiff_t offset = reinterpret_cast(code) - ass.GetBuffer()->GetStartAddress(); + verify((offset & 3) == 0); + if (offset < -32 * 1024 * 1024 || offset >= 32 * 1024 * 1024) + { + WARN_LOG(DYNAREC, "call offset too large: %d", offset); + UseScratchRegisterScope scope(&ass); + Register reg = scope.Acquire(); + ass.Mov(cond, reg, (u32)code); + ass.Blx(cond, reg); + } + else + { + Label code_label(offset); + ass.Bl(cond, &code_label); + } +} + +static u32 relinkBlock(DynaRBI *block) +{ + u32 start_offset = ass.GetCursorOffset(); + switch(block->BlockType) { case BET_Cond_0: case BET_Cond_1: { //quick opt here: - //peek into reg alloc, store actuall sr_T register to relink_data + //peek into reg alloc, store actual sr_T register to relink_data #ifndef CANONICALTEST - bool last_op_sets_flags=!has_jcond && oplist.size() > 0 && - oplist[oplist.size()-1].rd._reg==reg_sr_T && ccmap.count(oplist[oplist.size()-1].op); + bool last_op_sets_flags = !block->has_jcond && !block->oplist.empty() && + block->oplist.back().rd._reg == reg_sr_T && ccmap.count(block->oplist.back().op); #else bool last_op_sets_flags = false; #endif - ConditionCode CC=CC_EQ; + ConditionType CC = eq; if (last_op_sets_flags) { - shilop op=oplist[oplist.size()-1].op; + shilop op = block->oplist.back().op; - verify(ccmap.count(op)>0); - - if ((BlockType&1)==1) - CC=ccmap[op]; + if ((block->BlockType & 1) == 1) + CC = ccmap[op]; else - CC=ccnmap[op]; + CC = ccnmap[op]; } else { - if (!has_jcond) + if (!block->has_jcond) { - if (T_reg != (eReg)-1) + if (block->T_reg.IsRegister()) { - MOV(r4, T_reg); + ass.Mov(r4, block->T_reg); } else { - INFO_LOG(DYNAREC, "SLOW COND PATH %d", oplist.empty() ? -1 : oplist[oplist.size()-1].op); - LoadSh4Reg_mem(r4, reg_sr_T); + INFO_LOG(DYNAREC, "SLOW COND PATH %x", block->oplist.empty() ? -1 : block->oplist.back().op); + loadSh4Reg(r4, reg_sr_T); } } - - CMP(r4,(BlockType&1)); + ass.Cmp(r4, block->BlockType & 1); } - if (pBranchBlock) - JUMP((u32)pBranchBlock->code,CC); + if (block->pBranchBlock) + jump((void *)block->pBranchBlock->code, CC); else - CALL((u32)ngen_LinkBlock_cond_Branch_stub,CC); + call(ngen_LinkBlock_cond_Branch_stub, CC); - if (pNextBlock) - JUMP((u32)pNextBlock->code); + if (block->pNextBlock) + jump((void *)block->pNextBlock->code); else - CALL((u32)ngen_LinkBlock_cond_Next_stub); + call(ngen_LinkBlock_cond_Next_stub); break; } @@ -249,173 +279,85 @@ u32 DynaRBI::Relink() case BET_DynamicRet: case BET_DynamicCall: case BET_DynamicJump: - { -#ifdef CALLSTACK -#error offset broken - SUB(r2, r8, -FPCB_OFFSET); + ass.Sub(r2, r8, -rcbOffset(fpcb)); #if RAM_SIZE_MAX == 33554432 - UBFX(r1, r4, 1, 24); + ass.Ubfx(r1, r4, 1, 24); #else - UBFX(r1, r4, 1, 23); -#endif - - if (BlockType==BET_DynamicRet) - { - LDR(r14,r2,r1,Offset,true,S_LSL,2); - BX(R14); //BX LR (ret hint) - } - else if (BlockType==BET_DynamicCall) - { - LDR(r0,r2,r1,Offset,true,S_LSL,2); - BLX(r0); //BLX r0 (call hint) - } - else - { - LDR(r15,r2,r1,Offset,true,S_LSL,2); - } -#else - if (relink_data==0) - { -#if 1 - //this is faster - //why ? (Icache ?) - SUB(r2, r8, -FPCB_OFFSET); -#if RAM_SIZE_MAX == 33554432 - UBFX(r1, r4, 1, 24); -#else - UBFX(r1, r4, 1, 23); -#endif - LDR(r15,r2,r1,Offset,true,S_LSL,2); - -#else - if (pBranchBlock) - { - MOV32(r1,pBranchBlock->addr); //2 - CMP(r4,r1); //1 - JUMP((unat)pBranchBlock->code,CC_EQ); //1 - CALL((unat)ngen_LinkBlock_Generic_stub);//1 - } - else - { - SUB(r2, r8, -FPCB_OFFSET); - -#if RAM_SIZE_MAX == 33554432 - UBFX(r1, r4, 1, 24); -#else - UBFX(r1, r4, 1, 23); -#endif - NOP();NOP(); //2 - LDR(r15,r2,r1,Offset,true,S_LSL,2); //1 - } -#endif - } - else - { - verify(pBranchBlock==0); - SUB(r2, r8, -FPCB_OFFSET); - -#if RAM_SIZE_MAX == 33554432 - UBFX(r1, r4, 1, 24); -#else - UBFX(r1, r4, 1, 23); -#endif - LDR(r15,r2,r1,Offset,true,S_LSL,2); - } + ass.Ubfx(r1, r4, 1, 23); #endif + ass.Ldr(pc, MemOperand(r2, r1, LSL, 2)); break; - } case BET_StaticCall: case BET_StaticJump: - { - if (pBranchBlock==0) - CALL((u32)ngen_LinkBlock_Generic_stub); + if (block->pBranchBlock == nullptr) + call(ngen_LinkBlock_Generic_stub); else - { -#ifdef CALLSTACK - if (BlockType==BET_StaticCall) - CALL((u32)pBranchBlock->code); - else -#endif - JUMP((u32)pBranchBlock->code); - } + call((void *)block->pBranchBlock->code); break; - } case BET_StaticIntr: case BET_DynamicIntr: - { - if (BlockType==BET_StaticIntr) - { - MOV32(r4,NextBlock); - } - //else -> already in r4 djump ! + if (block->BlockType == BET_StaticIntr) + ass.Mov(r4, block->NextBlock); + //else -> already in r4 djump ! - StoreSh4Reg_mem(r4,reg_nextpc); - - CALL((u32)UpdateINTC); - LoadSh4Reg_mem(r4,reg_nextpc); - JUMP((u32)no_update); - break; - } + storeSh4Reg(r4, reg_nextpc); + call((void *)UpdateINTC); + loadSh4Reg(r4, reg_nextpc); + jump(no_update); + break; default: - ERROR_LOG(DYNAREC, "Error, Relink() Block Type: %X", BlockType); + ERROR_LOG(DYNAREC, "Error, Relink() Block Type: %X", block->BlockType); verify(false); break; } - - vmem_platform_flush_cache(code_start, emit_ptr - 1, code_start, emit_ptr - 1); - - u32 sz=(u8*)emit_ptr-code_start; - - emit_ptr=0; - return sz; + return ass.GetCursorOffset() - start_offset; } -static eReg GetParam(const shil_param& param, eReg raddr = r0) +u32 DynaRBI::Relink() +{ + ass = Arm32Assembler((u8 *)code + relink_offset, host_code_size - relink_offset); + + u32 size = relinkBlock(this); + + ass.Finalize(); + + return size; +} + +static Register GetParam(const shil_param& param, Register raddr = r0) { if (param.is_imm()) { - MOV32(raddr, param._imm); + ass.Mov(raddr, param._imm); return raddr; } - else if (param.is_r32i()) - { - return reg.mapg(param); - } - else - { - die("Invalid parameter"); - return (eReg)-1; - } + if (param.is_r32i()) + return reg.mapReg(param); + + die("Invalid parameter"); + return Register(); } -static void ngen_Unary(shil_opcode* op, UnaryOP unop) +static void ngen_Binary(shil_opcode* op, BinaryOP dtop) { - unop(reg.mapg(op->rd),reg.mapg(op->rs1)); -} - -static void ngen_Binary(shil_opcode* op, BinaryOP dtop, BinaryOPImm dtopimm) -{ - eReg rs1 = GetParam(op->rs1); + Register rs1 = GetParam(op->rs1); - eReg rs2 = r1; + Register rs2 = r1; if (op->rs2.is_imm()) { - if (is_i8r4(op->rs2._imm)) + if (ImmediateA32::IsImmediateA32(op->rs2._imm)) { - dtopimm(reg.mapg(op->rd), rs1, op->rs2._imm, CC_AL); + (ass.*dtop)(reg.mapReg(op->rd), rs1, Operand(op->rs2._imm)); return; } - else - { - MOV32(rs2, op->rs2._imm); - } + ass.Mov(rs2, op->rs2._imm); } else if (op->rs2.is_r32i()) { - rs2 = reg.mapg(op->rs2); + rs2 = reg.mapReg(op->rs2); } else { @@ -423,42 +365,39 @@ static void ngen_Binary(shil_opcode* op, BinaryOP dtop, BinaryOPImm dtopimm) verify(false); } - dtop(reg.mapg(op->rd), rs1, rs2, CC_AL); + (ass.*dtop)(reg.mapReg(op->rd), rs1, rs2); } static void ngen_fp_bin(shil_opcode* op, FPBinOP fpop) { - eFSReg rs1 = f0; + SRegister rs1 = s0; if (op->rs1.is_imm()) { - MOV32(r0, op->rs1._imm); - VMOV(rs1, r0); + ass.Mov(r0, op->rs1._imm); + ass.Vmov(rs1, r0); } else { - rs1 = reg.mapf(op->rs1); + rs1 = reg.mapFReg(op->rs1); } - eFSReg rs2 = f1; + SRegister rs2 = s1; if (op->rs2.is_imm()) { - MOV32(r0, op->rs2._imm); - VMOV(rs2, r0); + ass.Mov(r0, op->rs2._imm); + ass.Vmov(rs2, r0); } else { - rs2 = reg.mapf(op->rs2); + rs2 = reg.mapFReg(op->rs2); } - fpop(reg.mapfs(op->rd), rs1, rs2, CC_AL); + (ass.*fpop)(DataType(F32), reg.mapFReg(op->rd), rs1, rs2); } static void ngen_fp_una(shil_opcode* op, FPUnOP fpop) { - verify(op->rd.is_r32f()); - verify(op->rs1.is_r32f()); - - fpop(reg.mapfs(op->rd), reg.mapfs(op->rs1), CC_AL); + (ass.*fpop)(DataType(F32), reg.mapFReg(op->rd), reg.mapFReg(op->rs1)); } struct CC_PS @@ -478,28 +417,28 @@ void ngen_CC_Param(shil_opcode* op,shil_param* par,CanonicalParamType tp) switch(tp) { case CPT_f32rv: - #ifdef __ARM_PCS_VFP - // -mfloat-abi=hard - if (reg.IsAllocg(*par)) - VMOV(reg.mapg(*par), f0); - else if (reg.IsAllocf(*par)) - VMOV(reg.mapfs(*par), f0); - break; - #endif +#ifdef __ARM_PCS_VFP + // -mfloat-abi=hard + if (reg.IsAllocg(*par)) + ass.Vmov(reg.mapReg(*par), s0); + else if (reg.IsAllocf(*par)) + ass.Vmov(reg.mapFReg(*par), s0); + break; +#endif case CPT_u32rv: case CPT_u64rvL: if (reg.IsAllocg(*par)) - MOV(reg.mapg(*par), r0); + ass.Mov(reg.mapReg(*par), r0); else if (reg.IsAllocf(*par)) - VMOV(reg.mapfs(*par), r0); + ass.Vmov(reg.mapFReg(*par), r0); else die("unhandled param"); break; case CPT_u64rvH: verify(reg.IsAllocg(*par)); - MOV(reg.mapg(*par), r1); + ass.Mov(reg.mapReg(*par), r1); break; case CPT_u32: @@ -519,52 +458,51 @@ void ngen_CC_Param(shil_opcode* op,shil_param* par,CanonicalParamType tp) void ngen_CC_Call(shil_opcode* op, void* function) { - u32 rd = r0; - u32 fd = f0; + Register rd = r0; + SRegister fd = s0; for (int i = CC_pars.size(); i-- > 0; ) { CC_PS& param = CC_pars[i]; if (param.type == CPT_ptr) { - MOV32((eReg)rd, (u32)param.par->reg_ptr()); + ass.Mov(rd, (u32)param.par->reg_ptr()); } else { if (param.par->is_reg()) { - #ifdef __ARM_PCS_VFP +#ifdef __ARM_PCS_VFP // -mfloat-abi=hard if (param.type == CPT_f32) { if (reg.IsAllocg(*param.par)) - VMOV((eFSReg)fd, reg.mapg(*param.par)); + ass.Vmov(fd, reg.mapReg(*param.par)); else if (reg.IsAllocf(*param.par)) - VMOV((eFSReg)fd, reg.mapfs(*param.par)); + ass.Vmov(fd, reg.mapFReg(*param.par)); else die("Must not happen!"); continue; } - #endif +#endif if (reg.IsAllocg(*param.par)) - MOV((eReg)rd, reg.mapg(*param.par)); + ass.Mov(rd, reg.mapReg(*param.par)); else if (reg.IsAllocf(*param.par)) - VMOV((eReg)rd, reg.mapfs(*param.par)); + ass.Vmov(rd, reg.mapFReg(*param.par)); else die("Must not happen!"); } else { verify(param.par->is_imm()); - MOV32((eReg)rd, param.par->_imm); + ass.Mov(rd, param.par->_imm); } } - rd++; - fd++; + rd = Register(rd.GetCode() + 1); + fd = SRegister(fd.GetCode() + 1); } - //printf("used reg r0 to r%d, %d params, calling %08X\n",rd-1,CC_pars.size(),function); - CALL((u32)function); + call(function); } void ngen_CC_Finish(shil_opcode* op) @@ -583,66 +521,29 @@ enum mem_op_type static mem_op_type memop_type(shil_opcode* op) { + int sz = op->flags & 0x7f; + bool fp32 = op->rs2.is_r32f() || op->rd.is_r32f(); - int Lsz=-1; - int sz=op->flags&0x7f; + if (sz == 1) + return SZ_8; + else if (sz == 2) + return SZ_16; + else if (sz == 4) + return fp32 ? SZ_32F : SZ_32I; + else if (sz == 8) + return SZ_64F; - bool fp32=op->rs2.is_r32f() || op->rd.is_r32f(); - - if (sz==1) Lsz=SZ_8; - if (sz==2) Lsz=SZ_16; - if (sz==4 && !fp32) Lsz=SZ_32I; - if (sz==4 && fp32) Lsz=SZ_32F; - if (sz==8) Lsz=SZ_64F; - - verify(Lsz!=-1); - - return (mem_op_type)Lsz; + die("Unknown op"); + return SZ_32I; } -static u32 memop_bytes(mem_op_type tp) +const u32 memop_bytes[] = { 1, 2, 4, 4, 8 }; +static const void *_mem_hndl_SQ32[3][14]; +static const void *_mem_hndl[2][3][14]; +const void * const _mem_func[2][2] = { - const u32 rv[] = { 1,2,4,4,8}; - - return rv[tp]; -} - -/* - 8/16/32 I R/W B - ubfx r0,raddr,.. - ldr(sh/sb)/str(h/b) rd/s,[r0+r8] - - 32/64 F R/W B - ubfx r0,raddr,.. - add r0,r0,r8 - vldr/vstr rd/s,[r0] {32 or 64 bit forms} - - - 32 I / 32/64 F W SQ - ubfx r0,raddr,.. - add r0,r0,r8 - str/vstr/vstr.d rs,[r0-offs] - - 8/16/32 I R/W M - mov r0,raddr - call MEMHANDLER - - 32/64 F R M - mov r0,raddr - vmov r1,rs // vmov.d r3:r2,rs - call MEMHANDER // call MEMHANDLER64 - - 32/64 F W M - mov r0,raddr - call MEMHANDER // call MEMHANDLER64 - vmov rd,r0 // vmov.d rd,r3:r2 -*/ -static unat _mem_hndl_SQ32[3][14]; -static unat _mem_hndl[2][3][14]; -const unat _mem_func[2][5]= -{ - {0,0,0,(unat)_vmem_WriteMem32,(unat)_vmem_WriteMem64}, - {0,0,0,(unat)_vmem_ReadMem32,(unat)_vmem_ReadMem64}, + { (void *)_vmem_WriteMem32, (void *)_vmem_WriteMem64 }, + { (void *)_vmem_ReadMem32, (void *)_vmem_ReadMem64 }, }; const struct @@ -653,32 +554,32 @@ const struct mem_op_type optp; u32 offs; } -op_table[]= +op_table[] = { //LDRSB - {0x0E500FF0,0x001000D0,true,SZ_8,1}, + { 0x0E500FF0, 0x001000D0, true, SZ_8, 1 }, //LDRSH - {0x0E500FF0,0x001000F0,true,SZ_16,1}, + { 0x0E500FF0, 0x001000F0, true, SZ_16, 1 }, //LDR - {0x0E500010,0x06100000,true,SZ_32I,1}, + { 0x0E500010, 0x06100000, true, SZ_32I, 1 }, //VLDR.32 - {0x0F300F00,0x0D100A00,true,SZ_32F,2}, + { 0x0F300F00, 0x0D100A00, true, SZ_32F, 2 }, //VLDR.64 - {0x0F300F00,0x0D100B00,true,SZ_64F,2}, + { 0x0F300F00, 0x0D100B00, true, SZ_64F, 2 }, // //STRB - {0x0FF00010,0x07C00000,false,SZ_8,1}, + { 0x0FF00010, 0x07C00000, false, SZ_8, 1 }, //STRH - {0x0FF00FF0,0x018000B0,false,SZ_16,1}, + { 0x0FF00FF0, 0x018000B0, false, SZ_16, 1 }, //STR - {0x0E500010,0x06000000,false,SZ_32I,1}, + { 0x0E500010, 0x06000000, false, SZ_32I, 1 }, //VSTR.32 - {0x0F300F00,0x0D000A00,false,SZ_32F,2}, + { 0x0F300F00, 0x0D000A00, false, SZ_32F, 2 }, //VSTR.64 - {0x0F300F00,0x0D000B00,false,SZ_64F,2}, + { 0x0F300F00, 0x0D000B00, false, SZ_64F, 2 }, - {0,0}, + { 0, 0 }, }; union arm_mem_op @@ -699,38 +600,39 @@ union arm_mem_op u32 full; }; -static void vmem_slowpath(eReg raddr, eReg rt, eFSReg ft, eFDReg fd, mem_op_type optp, bool read) +static void vmem_slowpath(Register raddr, Register rt, SRegister ft, DRegister fd, mem_op_type optp, bool read) { - if (raddr != r0) - MOV(r0, (eReg)raddr); + if (!raddr.Is(r0)) + ass.Mov(r0, raddr); if (!read) { - if (optp <= SZ_32I) MOV(r1, rt); - else if (optp == SZ_32F) VMOV(r1, ft); - else if (optp == SZ_64F) VMOV(r2, r3, fd); + if (optp <= SZ_32I) + ass.Mov(r1, rt); + else if (optp == SZ_32F) + ass.Vmov(r1, ft); + else if (optp == SZ_64F) + ass.Vmov(r2, r3, fd); } - if (fd != d0 && optp == SZ_64F) - { - die("BLAH"); - } - - u32 funct = 0; + const void *funct = nullptr; if (optp <= SZ_32I) - funct = _mem_hndl[read][optp][raddr]; + funct = _mem_hndl[read][optp][raddr.GetCode()]; else - funct = _mem_func[read][optp]; + funct = _mem_func[read][optp - SZ_32F]; - verify(funct != 0); - CALL(funct); + verify(funct != nullptr); + call(funct); if (read) { - if (optp <= SZ_32I) MOV(rt, r0); - else if (optp == SZ_32F) VMOV(ft, r0); - else if (optp == SZ_64F) VMOV(fd, r0, r1); + if (optp <= SZ_32I) + ass.Mov(rt, r0); + else if (optp == SZ_32F) + ass.Vmov(ft, r0); + else if (optp == SZ_64F) + ass.Vmov(fd, r0, r1); } } @@ -739,25 +641,23 @@ bool ngen_Rewrite(host_context_t &context, void *faultAddress) u32 *regs = context.reg; arm_mem_op *ptr = (arm_mem_op *)context.pc; - static_assert(sizeof(*ptr) == 4, "sizeof(arm_mem_op) == 4"); - mem_op_type optp; - u32 read=0; - s32 offs=-1; + u32 read; + s32 offs = -1; - u32 fop=ptr[0].full; + u32 fop = ptr[0].full; - for (int i=0;op_table[i].mask;i++) + for (int i = 0; op_table[i].mask; i++) { - if ((fop&op_table[i].mask)==op_table[i].key) + if ((fop & op_table[i].mask) == op_table[i].key) { - optp=op_table[i].optp; - read=op_table[i].read; - offs=op_table[i].offs; + optp = op_table[i].optp; + read = op_table[i].read; + offs = op_table[i].offs; } } - if (offs==-1) + if (offs == -1) { ERROR_LOG(DYNAREC, "%08X : invalid size", fop); die("can't decode opcode\n"); @@ -765,20 +665,20 @@ bool ngen_Rewrite(host_context_t &context, void *faultAddress) ptr -= offs; - eReg raddr,rt; - eFSReg ft; - eFDReg fd; + Register raddr, rt; + SRegister ft; + DRegister fd; //Get used regs from opcodes .. - if ((ptr[0].full & 0x0FE00070)==0x07E00050) + if ((ptr[0].full & 0x0FE00070) == 0x07E00050) { //from ubfx ! - raddr=(eReg)(ptr[0].Ra); + raddr = Register(ptr[0].Ra); } - else if ((ptr[0].full & 0x0FE00000)==0x03C00000) + else if ((ptr[0].full & 0x0FE00000) == 0x03C00000) { - raddr=(eReg)(ptr[0].Rn); + raddr = Register(ptr[0].Rn); } else { @@ -786,160 +686,112 @@ bool ngen_Rewrite(host_context_t &context, void *faultAddress) die("Invalid opcode: vmem fixup\n"); } //from mem op - rt=(eReg)(ptr[offs].Rt); - ft=(eFSReg)(ptr[offs].Rt*2 + ptr[offs].D); - fd=(eFDReg)(ptr[offs].D*16 + ptr[offs].Rt); + rt = Register(ptr[offs].Rt); + ft = SRegister(ptr[offs].Rt * 2 + ptr[offs].D); + fd = DRegister(ptr[offs].D * 16 + ptr[offs].Rt); //get some other relevant data - u32 sh4_addr=regs[raddr]; + u32 sh4_addr = regs[raddr.GetCode()]; u32 fault_offs = (uintptr_t)faultAddress - regs[8]; - u8* sh4_ctr=(u8*)regs[8]; - bool is_sq=(sh4_addr>>26)==0x38; + bool is_sq = (sh4_addr >> 26) == 0x38; - verify(emit_ptr==0); - emit_ptr=(u32*)ptr; + ass = Arm32Assembler((u8 *)ptr, 12); - - /* - mov r0,raddr - - 8/16/32I: - call _mem_hdlp[read][optp][rt] - 32F/64F: - 32F,r: vmov r1,ft - 64F,r: vmov [r3:r2],fd - call _mem_hdlp[read][optp][0] - 32F,w: vmov ft,r0 - 64F,w: vmov fd,[r1:r0] - */ - //fault offset must always be the addr from ubfx (sanity check) - verify((fault_offs==0) || fault_offs==(0x1FFFFFFF&sh4_addr)); + verify(fault_offs == 0 || fault_offs == (sh4_addr & 0x1FFFFFFF)); if (is_sq && !read && optp >= SZ_32I) { if (optp >= SZ_32F) { - MOV(r0, raddr); + if (!raddr.Is(r0)) + ass.Mov(r0, raddr); + else + ass.Nop(); raddr = r0; } switch (optp) { case SZ_32I: - MOV(r1, rt); + ass.Mov(r1, rt); break; case SZ_32F: - VMOV(r1, ft); + ass.Vmov(r1, ft); break; case SZ_64F: - VMOV(r2, r3, fd); + ass.Vmov(r2, r3, fd); break; default: break; } - CALL((unat)_mem_hndl_SQ32[optp - SZ_32I][raddr]); - } - else if (false && is_sq) //THPS2 uses cross area SZ_32F so this is disabled for now - { - //SQ ! - s32 sq_offs=(u8 *)sq_both-sh4_ctr; - verify(sq_offs==rcb_noffs(sq_both)); - - verify(!read && optp>=SZ_32I); - - if (optp==SZ_32I) - { - MOV(r1,rt); - - CALL((unat)_mem_hndl_SQ32[raddr]); - } - else - { - //UBFX(r1,raddr,0,6); - AND(r1,raddr,0x3F); - ADD(r1,r1,r8); - - if (optp==SZ_32I) STR(rt,r1,sq_offs); // cross writes are possible, so this can't be assumed - else if (optp==SZ_32F) VSTR(ft,r1,sq_offs/4); - else if (optp==SZ_64F) VSTR(fd,r1,sq_offs/4); - } + call(_mem_hndl_SQ32[optp - SZ_32I][raddr.GetCode()]); } else { //Fallback to function ! - if (offs==2) + if (optp >= SZ_32F) { - if (raddr!=r0) - MOV(r0,(eReg)raddr); + if (!raddr.Is(r0)) + ass.Mov(r0, raddr); else - NOP(); + ass.Nop(); } if (!read) { - if (optp<=SZ_32I) MOV(r1,rt); - else if (optp==SZ_32F) VMOV(r1,ft); - else if (optp==SZ_64F) VMOV(r2,r3,fd); + if (optp <= SZ_32I) + ass.Mov(r1, rt); + else if (optp == SZ_32F) + ass.Vmov(r1, ft); + else if (optp == SZ_64F) + ass.Vmov(r2, r3, fd); } - if (fd!=d0 && optp==SZ_64F) - { - die("BLAH"); - } + const void *funct = nullptr; - u32 funct=0; + if (offs == 1) + funct = _mem_hndl[read][optp][raddr.GetCode()]; + else if (optp >= SZ_32F) + funct = _mem_func[read][optp - SZ_32F]; - if (offs==1) - funct=_mem_hndl[read][optp][raddr]; - else if (offs==2) - funct=_mem_func[read][optp]; - - verify(funct!=0); - CALL(funct); + verify(funct != nullptr); + call(funct); if (read) { - if (optp<=SZ_32I) MOV(rt,r0); - else if (optp==SZ_32F) VMOV(ft,r0); - else if (optp==SZ_64F) VMOV(fd,r0,r1); + if (optp <= SZ_32I) + ass.Mov(rt, r0); + else if (optp == SZ_32F) + ass.Vmov(ft, r0); + else if (optp == SZ_64F) + ass.Vmov(fd, r0, r1); } } - vmem_platform_flush_cache((void*)ptr, (u8*)emit_ptr - 1, (void*)ptr, (u8*)emit_ptr - 1); - emit_ptr = 0; + ass.Finalize(); context.pc = (size_t)ptr; return true; } -EAPI NEG(eReg Rd, eReg Rs) -{ - RSB(Rd, Rs, 0); -} - -EAPI NEG(eReg Rd, eReg Rs, ConditionCode CC) -{ - RSB(Rd, Rs, 0, CC); -} - -static eReg GenMemAddr(shil_opcode* op, eReg raddr = r0) +static Register GenMemAddr(shil_opcode* op, Register raddr = r0) { if (op->rs3.is_imm()) { - if(is_i8r4(op->rs3._imm)) + if (ImmediateA32::IsImmediateA32(op->rs3._imm)) { - ADD(raddr,reg.mapg(op->rs1),op->rs3._imm); + ass.Add(raddr, reg.mapReg(op->rs1), op->rs3._imm); } else { - MOV32(r1,op->rs3._imm); - ADD(raddr,reg.mapg(op->rs1),r1); + ass.Mov(r1, op->rs3._imm); + ass.Add(raddr, reg.mapReg(op->rs1), r1); } } else if (op->rs3.is_r32i()) { - ADD(raddr,reg.mapg(op->rs1),reg.mapg(op->rs3)); + ass.Add(raddr, reg.mapReg(op->rs1), reg.mapReg(op->rs3)); } else if (!op->rs3.is_null()) { @@ -948,50 +800,50 @@ static eReg GenMemAddr(shil_opcode* op, eReg raddr = r0) } else if (op->rs1.is_imm()) { - MOV32(raddr, op->rs1._imm); + ass.Mov(raddr, op->rs1._imm); } else { - raddr = reg.mapg(op->rs1); + raddr = reg.mapReg(op->rs1); } return raddr; } -static bool ngen_readm_immediate(RuntimeBlockInfo* block, shil_opcode* op, bool staging, bool optimise) +static bool ngen_readm_immediate(RuntimeBlockInfo* block, shil_opcode* op, bool optimise) { if (!op->rs1.is_imm()) return false; mem_op_type optp = memop_type(op); bool isram = false; - void* ptr = _vmem_read_const(op->rs1._imm, isram, std::min(4u, memop_bytes(optp))); - eReg rd = (optp != SZ_32F && optp != SZ_64F) ? reg.mapg(op->rd) : r0; + void* ptr = _vmem_read_const(op->rs1._imm, isram, std::min(4u, memop_bytes[optp])); + Register rd = (optp != SZ_32F && optp != SZ_64F) ? reg.mapReg(op->rd) : r0; if (isram) { - MOV32(r0, (u32)ptr); + ass.Mov(r0, (u32)ptr); switch(optp) { case SZ_8: - LDRSB(rd, r0); + ass.Ldrsb(rd, MemOperand(r0)); break; case SZ_16: - LDRSH(rd, r0); + ass.Ldrsh(rd, MemOperand(r0)); break; case SZ_32I: - LDR(rd, r0); + ass.Ldr(rd, MemOperand(r0)); break; case SZ_32F: - VLDR(reg.mapfs(op->rd), r0, 0); + ass.Vldr(reg.mapFReg(op->rd), MemOperand(r0)); break; case SZ_64F: - VLDR(d0, r0, 0); - VSTR(d0, r8, op->rd.reg_nofs() / 4); + ass.Vldr(d0, MemOperand(r0)); + ass.Vstr(d0, MemOperand(r8, op->rd.reg_nofs())); break; } } @@ -1002,27 +854,27 @@ static bool ngen_readm_immediate(RuntimeBlockInfo* block, shil_opcode* op, bool { verify(!reg.IsAllocAny(op->rd)); // Need to call the handler twice - MOV32(r0, op->rs1._imm); - CALL((u32)ptr); - STR(r0, r8, op->rd.reg_nofs()); + ass.Mov(r0, op->rs1._imm); + call(ptr); + ass.Str(r0, MemOperand(r8, op->rd.reg_nofs())); - MOV32(r0, op->rs1._imm + 4); - CALL((u32)ptr); - STR(r0, r8, op->rd.reg_nofs() + 4); + ass.Mov(r0, op->rs1._imm + 4); + call(ptr); + ass.Str(r0, MemOperand(r8, op->rd.reg_nofs() + 4)); } else { - MOV32(r0, op->rs1._imm); - CALL((u32)ptr); + ass.Mov(r0, op->rs1._imm); + call(ptr); switch(optp) { case SZ_8: - SXTB(r0, r0); + ass.Sxtb(r0, r0); break; case SZ_16: - SXTH(r0, r0); + ass.Sxth(r0, r0); break; case SZ_32I: @@ -1035,9 +887,9 @@ static bool ngen_readm_immediate(RuntimeBlockInfo* block, shil_opcode* op, bool } if (reg.IsAllocg(op->rd)) - MOV(rd, r0); + ass.Mov(rd, r0); else if (reg.IsAllocf(op->rd)) - VMOV(reg.mapfs(op->rd), r0); + ass.Vmov(reg.mapFReg(op->rd), r0); else die("Unsupported"); } @@ -1046,48 +898,48 @@ static bool ngen_readm_immediate(RuntimeBlockInfo* block, shil_opcode* op, bool return true; } -static bool ngen_writemem_immediate(RuntimeBlockInfo* block, shil_opcode* op, bool staging, bool optimise) +static bool ngen_writemem_immediate(RuntimeBlockInfo* block, shil_opcode* op, bool optimise) { if (!op->rs1.is_imm()) return false; mem_op_type optp = memop_type(op); bool isram = false; - void* ptr = _vmem_write_const(op->rs1._imm, isram, std::min(4u, memop_bytes(optp))); + void* ptr = _vmem_write_const(op->rs1._imm, isram, std::min(4u, memop_bytes[optp])); - eReg rs2 = r1; - eFSReg rs2f = f0; + Register rs2 = r1; + SRegister rs2f = s0; if (op->rs2.is_imm()) - MOV32(rs2, op->rs2._imm); + ass.Mov(rs2, op->rs2._imm); else if (optp == SZ_32F) - rs2f = reg.mapf(op->rs2); + rs2f = reg.mapFReg(op->rs2); else if (optp != SZ_64F) - rs2 = reg.mapg(op->rs2); + rs2 = reg.mapReg(op->rs2); if (isram) { - MOV32(r0, (u32)ptr); + ass.Mov(r0, (u32)ptr); switch(optp) { case SZ_8: - STRB(rs2, r0); + ass.Strb(rs2, MemOperand(r0)); break; case SZ_16: - STRH(rs2, r0, 0); + ass.Strh(rs2, MemOperand(r0)); break; case SZ_32I: - STR(rs2, r0); + ass.Str(rs2, MemOperand(r0)); break; case SZ_32F: - VSTR(rs2f, r0, 0); + ass.Vstr(rs2f, MemOperand(r0)); break; case SZ_64F: - VLDR(d0, r8, op->rs2.reg_nofs() / 4); - VSTR(d0, r0, 0); + ass.Vldr(d0, MemOperand(r8, op->rs2.reg_nofs())); + ass.Vstr(d0, MemOperand(r0)); break; default: @@ -1099,154 +951,151 @@ static bool ngen_writemem_immediate(RuntimeBlockInfo* block, shil_opcode* op, bo { if (optp == SZ_64F) die("SZ_64F not supported"); - MOV32(r0, op->rs1._imm); + ass.Mov(r0, op->rs1._imm); if (optp == SZ_32F) - VMOV(r1, rs2f); - else if (r1 != rs2) - MOV(r1, rs2); + ass.Vmov(r1, rs2f); + else if (!rs2.Is(r1)) + ass.Mov(r1, rs2); - CALL((u32)ptr); + call(ptr); } return true; } -static void ngen_compile_opcode(RuntimeBlockInfo* block, shil_opcode* op, bool staging, bool optimise) +static void ngen_compile_opcode(RuntimeBlockInfo* block, shil_opcode* op, bool optimise) { switch(op->op) { case shop_readm: - { - if (!ngen_readm_immediate(block, op, staging, optimise)) + if (!ngen_readm_immediate(block, op, optimise)) { mem_op_type optp = memop_type(op); - eReg raddr=GenMemAddr(op); + Register raddr = GenMemAddr(op); if (_nvmem_enabled()) { - BIC(r1,raddr,0xE0000000); + ass.Bic(r1, raddr, 0xE0000000); switch(optp) { case SZ_8: - LDRSB(reg.mapg(op->rd),r1,r8,true); + ass.Ldrsb(reg.mapReg(op->rd), MemOperand(r1, r8)); break; case SZ_16: - LDRSH(reg.mapg(op->rd),r1,r8,true); + ass.Ldrsh(reg.mapReg(op->rd), MemOperand(r1, r8)); break; case SZ_32I: - LDR(reg.mapg(op->rd),r1,r8,Offset,true); + ass.Ldr(reg.mapReg(op->rd), MemOperand(r1, r8)); break; case SZ_32F: - ADD(r1,r1,r8); //3 opcodes, there's no [REG+REG] VLDR - VLDR(reg.mapf(op->rd),r1,0); + ass.Add(r1, r1, r8); //3 opcodes, there's no [REG+REG] VLDR + ass.Vldr(reg.mapFReg(op->rd), MemOperand(r1)); break; case SZ_64F: - ADD(r1,r1,r8); //3 opcodes, there's no [REG+REG] VLDR - VLDR(d0,r1,0); //TODO: use reg alloc + ass.Add(r1, r1, r8); //3 opcodes, there's no [REG+REG] VLDR + ass.Vldr(d0, MemOperand(r1)); //TODO: use reg alloc - VSTR(d0,r8,op->rd.reg_nofs()/4); + ass.Vstr(d0, MemOperand(r8, op->rd.reg_nofs())); break; } } else { switch(optp) { case SZ_8: - vmem_slowpath(raddr, reg.mapg(op->rd), f0, d0, optp, true); + vmem_slowpath(raddr, reg.mapReg(op->rd), s0, d0, optp, true); break; case SZ_16: - vmem_slowpath(raddr, reg.mapg(op->rd), f0, d0, optp, true); + vmem_slowpath(raddr, reg.mapReg(op->rd), s0, d0, optp, true); break; case SZ_32I: - vmem_slowpath(raddr, reg.mapg(op->rd), f0, d0, optp, true); + vmem_slowpath(raddr, reg.mapReg(op->rd), s0, d0, optp, true); break; case SZ_32F: - vmem_slowpath(raddr, r0, reg.mapf(op->rd), d0, optp, true); + vmem_slowpath(raddr, r0, reg.mapFReg(op->rd), d0, optp, true); break; case SZ_64F: - vmem_slowpath(raddr, r0, f0, d0, optp, true); - VSTR(d0,r8,op->rd.reg_nofs()/4); + vmem_slowpath(raddr, r0, s0, d0, optp, true); + ass.Vstr(d0, MemOperand(r8, op->rd.reg_nofs())); break; } } } - } - break; - + break; case shop_writem: - { - if (!ngen_writemem_immediate(block, op, staging, optimise)) + if (!ngen_writemem_immediate(block, op, optimise)) { mem_op_type optp = memop_type(op); - eReg raddr=GenMemAddr(op); + Register raddr = GenMemAddr(op); - eReg rs2 = r2; - eFSReg rs2f = f2; + Register rs2 = r2; + SRegister rs2f = s2; //TODO: use reg alloc if (optp == SZ_64F) - VLDR(d0,r8,op->rs2.reg_nofs()/4); + ass.Vldr(d0, MemOperand(r8, op->rs2.reg_nofs())); else if (op->rs2.is_imm()) { - MOV32(rs2, op->rs2._imm); + ass.Mov(rs2, op->rs2._imm); if (optp == SZ_32F) - VMOV(rs2f, rs2); + ass.Vmov(rs2f, rs2); } else { if (optp == SZ_32F) - rs2f = reg.mapf(op->rs2); + rs2f = reg.mapFReg(op->rs2); else - rs2 = reg.mapg(op->rs2); + rs2 = reg.mapReg(op->rs2); } - if (_nvmem_enabled()) { - BIC(r1,raddr,0xE0000000); + if (_nvmem_enabled()) + { + ass.Bic(r1, raddr, 0xE0000000); switch(optp) { case SZ_8: - STRB(rs2, r1, r8, Offset, true); + ass.Strb(rs2, MemOperand(r1, r8)); break; case SZ_16: - STRH(rs2, r1, r8, true); + ass.Strh(rs2, MemOperand(r1, r8)); break; case SZ_32I: - STR(rs2, r1, r8, Offset, true); + ass.Str(rs2, MemOperand(r1, r8)); break; case SZ_32F: - ADD(r1, r1, r8); //3 opcodes: there's no [REG+REG] VLDR, also required for SQ - VSTR(rs2f, r1, 0); + ass.Add(r1, r1, r8); //3 opcodes: there's no [REG+REG] VLDR, also required for SQ + ass.Vstr(rs2f, MemOperand(r1)); break; case SZ_64F: - ADD(r1, r1, r8); //3 opcodes: there's no [REG+REG] VLDR, also required for SQ - VSTR(d0, r1, 0); //TODO: use reg alloc + ass.Add(r1, r1, r8); //3 opcodes: there's no [REG+REG] VLDR, also required for SQ + ass.Vstr(d0, MemOperand(r1)); //TODO: use reg alloc break; } } else { switch(optp) { case SZ_8: - vmem_slowpath(raddr, rs2, f0, d0, optp, false); + vmem_slowpath(raddr, rs2, s0, d0, optp, false); break; case SZ_16: - vmem_slowpath(raddr, rs2, f0, d0, optp, false); + vmem_slowpath(raddr, rs2, s0, d0, optp, false); break; case SZ_32I: - vmem_slowpath(raddr, rs2, f0, d0, optp, false); + vmem_slowpath(raddr, rs2, s0, d0, optp, false); break; case SZ_32F: @@ -1254,289 +1103,264 @@ static void ngen_compile_opcode(RuntimeBlockInfo* block, shil_opcode* op, bool s break; case SZ_64F: - vmem_slowpath(raddr, r0, f0, d0, optp, false); + vmem_slowpath(raddr, r0, s0, d0, optp, false); break; } } } - } - break; + break; //dynamic jump, r+imm32.This will be at the end of the block, but doesn't -have- to be the last opcode case shop_jdyn: - { - //ReadReg rs1(r4,op->rs1); - verify(op->rd.is_reg() && op->rd._reg==reg_pc_dyn); + verify(op->rd.is_reg() && op->rd._reg == reg_pc_dyn); if (op->rs2.is_imm()) { - MOV32(r2, op->rs2.imm_value()); - ADD(r4, reg.mapg(op->rs1), r2); + ass.Mov(r2, op->rs2.imm_value()); + ass.Add(r4, reg.mapReg(op->rs1), r2); } - else //if (r4!=rs1.reg) + else { - MOV(r4, reg.mapg(op->rs1)); + ass.Mov(r4, reg.mapReg(op->rs1)); } break; - } case shop_mov32: + verify(op->rd.is_r32()); + + if (op->rs1.is_imm()) { - verify(op->rd.is_r32()); - - if (op->rs1.is_imm()) + if (op->rd.is_r32i()) { - if (op->rd.is_r32i()) - { - MOV32(reg.mapg(op->rd),op->rs1._imm); - } - else - { - if (op->rs1._imm==0) - { - //VEOR(reg.mapf(op->rd),reg.mapf(op->rd),reg.mapf(op->rd)); - //hum, vmov can't do 0, but can do all kind of weird small consts ... really useful ... - //simd is slow on a9 -#if 0 - MOVW(r0,0); - VMOV(reg.mapfs(op->rd),r0); -#else - //1-1=0 ! - //should be slightly faster ... - //we could get rid of the imm mov, if not for infs & co .. - VMOV(reg.mapfs(op->rd),fpu_imm_1); - VSUB_VFP(reg.mapfs(op->rd),reg.mapfs(op->rd),reg.mapfs(op->rd)); -#endif - } - else if (op->rs1._imm == 0x3F800000) - VMOV(reg.mapfs(op->rd), fpu_imm_1); - else - { - MOV32(r0, op->rs1._imm); - VMOV(reg.mapfs(op->rd), r0); - } - } - } - else if (op->rs1.is_r32()) - { - u32 type=0; - - if (reg.IsAllocf(op->rd)) - type|=1; - - if (reg.IsAllocf(op->rs1)) - type|=2; - - switch(type) - { - case 0: //reg=reg - if (reg.mapg(op->rd)!=reg.mapg(op->rs1)) - MOV(reg.mapg(op->rd),reg.mapg(op->rs1)); - break; - - case 1: //vfp=reg - VMOV(reg.mapfs(op->rd),reg.mapg(op->rs1)); - break; - - case 2: //reg=vfp - VMOV(reg.mapg(op->rd),reg.mapfs(op->rs1)); - break; - - case 3: //vfp=vfp - VMOV(reg.mapfs(op->rd),reg.mapfs(op->rs1)); - break; - } + ass.Mov(reg.mapReg(op->rd), op->rs1._imm); } else { - die("Invalid mov32 size"); + if (op->rs1._imm==0) + { + //VEOR(reg.mapFReg(op->rd),reg.mapFReg(op->rd),reg.mapFReg(op->rd)); + //hum, vmov can't do 0, but can do all kind of weird small consts ... really useful ... + //simd is slow on a9 +#if 0 + ass.Movw(r0, 0); + ass.Vmov(reg.mapFReg(op->rd), r0); +#else + //1-1=0 ! + //should be slightly faster ... + //we could get rid of the imm mov, if not for infs & co .. + ass.Vmov(reg.mapFReg(op->rd), 1.f);; + ass.Vsub(reg.mapFReg(op->rd), reg.mapFReg(op->rd), reg.mapFReg(op->rd)); +#endif + } + else if (op->rs1._imm == 0x3F800000) + ass.Vmov(reg.mapFReg(op->rd), 1.f); + else + { + ass.Mov(r0, op->rs1._imm); + ass.Vmov(reg.mapFReg(op->rd), r0); + } } - + } + else if (op->rs1.is_r32()) + { + u32 type = 0; + + if (reg.IsAllocf(op->rd)) + type |= 1; + + if (reg.IsAllocf(op->rs1)) + type |= 2; + + switch(type) + { + case 0: // reg = reg + ass.Mov(reg.mapReg(op->rd), reg.mapReg(op->rs1)); + break; + + case 1: // vfp = reg + ass.Vmov(reg.mapFReg(op->rd), reg.mapReg(op->rs1)); + break; + + case 2: // reg = vfp + ass.Vmov(reg.mapReg(op->rd), reg.mapFReg(op->rs1)); + break; + + case 3: // vfp = vfp + ass.Vmov(reg.mapFReg(op->rd), reg.mapFReg(op->rs1)); + break; + } + } + else + { + die("Invalid mov32 size"); } break; case shop_mov64: - { verify(op->rs1.is_r64() && op->rd.is_r64()); - //LoadSh4Reg64(r0,op->rs1); - //StoreSh4Reg64(r0,op->rd); - - VLDR(d0,r8,op->rs1.reg_nofs()/4); - VSTR(d0,r8,op->rd.reg_nofs()/4); + ass.Vldr(d0, MemOperand(r8, op->rs1.reg_nofs())); + ass.Vstr(d0, MemOperand(r8, op->rd.reg_nofs())); break; - } case shop_jcond: - { - verify(op->rd.is_reg() && op->rd._reg==reg_pc_dyn); - //ReadReg rs1(r4,op->rs1); - - //if (r4!=rs1.reg) - MOV(r4,reg.mapg(op->rs1)); + verify(op->rd.is_reg() && op->rd._reg == reg_pc_dyn); + ass.Mov(r4, reg.mapReg(op->rs1)); break; - } case shop_ifb: - { if (op->rs1._imm) { - MOV32(r1,op->rs2._imm); - StoreSh4Reg_mem(r1,reg_nextpc); - //StoreImms(r3,r2,(u32)&next_pc,(u32)op->rs2._imm); + ass.Mov(r1, op->rs2._imm); + storeSh4Reg(r1, reg_nextpc); } - MOV32(r0, op->rs3._imm); - CALL((u32)(OpPtr[op->rs3._imm])); + ass.Mov(r0, op->rs3._imm); + call((void *)OpPtr[op->rs3._imm]); break; - } #ifndef CANONICALTEST - case shop_neg: ngen_Unary(op,NEG); break; - case shop_not: ngen_Unary(op,NOT); break; + case shop_neg: + ass.Rsb(reg.mapReg(op->rd), reg.mapReg(op->rs1), 0); + break; + case shop_not: + ass.Mvn(reg.mapReg(op->rd), reg.mapReg(op->rs1)); + break; + case shop_shl: + ngen_Binary(op, &MacroAssembler::Lsl); + break; + case shop_shr: + ngen_Binary(op, &MacroAssembler::Lsr); + break; + case shop_sar: + ngen_Binary(op, &MacroAssembler::Asr); + break; - case shop_shl: ngen_Binary(op,LSL,LSL); break; - case shop_shr: ngen_Binary(op,LSR,LSR); break; - case shop_sar: ngen_Binary(op,ASR,ASR); break; + case shop_and: + ngen_Binary(op, &MacroAssembler::And); + break; + case shop_or: + ngen_Binary(op, &MacroAssembler::Orr); + break; + case shop_xor: + ngen_Binary(op, &MacroAssembler::Eor); + break; - case shop_and: ngen_Binary(op,AND,AND); break; - case shop_or: ngen_Binary(op,ORR,ORR); break; - case shop_xor: ngen_Binary(op,EOR,EOR); break; + case shop_add: + ngen_Binary(op, &MacroAssembler::Add); + break; + case shop_sub: + ngen_Binary(op, &MacroAssembler::Sub); + break; + case shop_ror: + ngen_Binary(op, &MacroAssembler::Ror); + break; - case shop_add: ngen_Binary(op,ADD,ADD); break; - case shop_sub: ngen_Binary(op,SUB,SUB); break; - case shop_ror: ngen_Binary(op,ROR,ROR); break; - case shop_adc: - { - //RSBS(reg.map(op.rs3),reg.map(op.rs3),0); - //ADCS(reg.map(op.rs1),reg.map(op.rs2),reg.map(op.rs3)); - //ADC(reg.map(op.rs3),reg.map(op.rs3),reg.map(op.rs3),LSL,31); + { + Register rs1 = GetParam(op->rs1, r1); + Register rs2 = GetParam(op->rs2, r2); + Register rs3 = GetParam(op->rs3, r3); - //ADD(r0,reg.map(op.rs1), - -#if 0 - MOVW(r1,0); - ADD(r0,reg.mapg(op->rs1),reg.mapg(op->rs2),true); - ADC(r1,r1,0); - ADD(reg.mapg(op->rd),r0,reg.mapg(op->rs3),true); - ADC(reg.mapg(op->rd2),r1,0); -#else - eReg rs1 = GetParam(op->rs1, r1); - eReg rs2 = GetParam(op->rs2, r2); - eReg rs3 = GetParam(op->rs3, r3); - - LSR(r0, rs3, 1, true); //C=rs3, r0=0 - ADC(reg.mapg(op->rd), rs1, rs2, true); //(C,rd)=rs1+rs2+rs3(C) - ADC(reg.mapg(op->rd2), r0, 0); //rd2=C, (or MOVCS rd2, 1) -#endif - } - break; + ass.Lsr(SetFlags, r0, rs3, 1); //C=rs3, r0=0 + ass.Adc(SetFlags, reg.mapReg(op->rd), rs1, rs2); //(C,rd)=rs1+rs2+rs3(C) + ass.Adc(reg.mapReg(op->rd2), r0, 0); //rd2=C, (or MOVCS rd2, 1) + } + break; case shop_rocr: { - eReg rd2 = reg.mapg(op->rd2); - eReg rs1 = GetParam(op->rs1, r1); - eReg rs2 = GetParam(op->rs2, r2); - if (rd2 != rs1) { - LSR(rd2, rs2, 1, true); //C=rs2, rd2=0 - AND(rd2, rs1, 1); //get new carry + Register rd2 = reg.mapReg(op->rd2); + Register rs1 = GetParam(op->rs1, r1); + Register rs2 = GetParam(op->rs2, r2); + if (!rd2.Is(rs1)) { + ass.Lsr(SetFlags, rd2, rs2, 1); //C=rs2, rd2=0 + ass.And(rd2, rs1, 1); //get new carry } else { - LSR(r0, rs2, 1, true); //C=rs2, rd2=0 - ADD(r0, rs1, 1); + ass.Lsr(SetFlags, r0, rs2, 1); //C=rs2, rd2=0 + ass.Add(r0, rs1, 1); } - RRX(reg.mapg(op->rd), rs1); //RRX w/ carry :) - if (rd2 == rs1) - MOV(rd2, r0); - + ass.Rrx(reg.mapReg(op->rd), rs1); //RRX w/ carry :) + if (rd2.Is(rs1)) + ass.Mov(rd2, r0); } break; case shop_rocl: { - //ADD(reg.mapg(op->rd),reg.mapg(op->rs2),reg.mapg(op->rs1),1,true); //(C,rd)= rs1<<1 + (|) rs2 - eReg rs1 = GetParam(op->rs1, r1); - eReg rs2 = GetParam(op->rs2, r2); - ORR(reg.mapg(op->rd), rs2, rs1, true, S_LSL, 1); //(C,rd)= rs1<<1 + (|) rs2 - MOVW(reg.mapg(op->rd2), 0); //clear rd2 (for ADC/MOVCS) - ADC(reg.mapg(op->rd2), reg.mapg(op->rd2), 0); //rd2=C (or MOVCS rd2, 1) + Register rs1 = GetParam(op->rs1, r1); + Register rs2 = GetParam(op->rs2, r2); + ass.Orr(SetFlags, reg.mapReg(op->rd), rs2, Operand(rs1, LSL, 1)); //(C,rd)= rs1<<1 + (|) rs2 + ass.Mov(reg.mapReg(op->rd2), 0); //clear rd2 (for ADC/MOVCS) + ass.Adc(reg.mapReg(op->rd2), reg.mapReg(op->rd2), 0); //rd2=C (or MOVCS rd2, 1) } break; case shop_sbc: - //printf("sbc: r%d r%d r%d r%d r%d\n",reg.mapg(op->rd),reg.mapg(op->rd2),reg.mapg(op->rs1),reg.mapg(op->rs2), reg.mapg(op->rs3)); { - eReg rd2 = reg.mapg(op->rd2); - eReg rs1 = GetParam(op->rs1, r1); - if (rs1 == rd2) + Register rd2 = reg.mapReg(op->rd2); + Register rs1 = GetParam(op->rs1, r1); + if (rs1.Is(rd2)) { - MOV(r1, rs1); + ass.Mov(r1, rs1); rs1 = r1; } - eReg rs2 = GetParam(op->rs2, r2); - if (rs2 == rd2) + Register rs2 = GetParam(op->rs2, r2); + if (rs2.Is(rd2)) { - MOV(r2, rs2); + ass.Mov(r2, rs2); rs2 = r2; } - eReg rs3 = GetParam(op->rs3, r3); - EOR(rd2, rs3, 1); - LSR(rd2, rd2, 1, true); //C=rs3, rd2=0 - SBC(reg.mapg(op->rd), rs1, rs2, true); - MOV(rd2, 1, CC_CC); + Register rs3 = GetParam(op->rs3, r3); + ass.Eor(rd2, rs3, 1); + ass.Lsr(SetFlags, rd2, rd2, 1); //C=rs3, rd2=0 + ass.Sbc(SetFlags, reg.mapReg(op->rd), rs1, rs2); + ass.Mov(cc, rd2, 1); } break; case shop_negc: { - eReg rd2 = reg.mapg(op->rd2); - eReg rs1 = GetParam(op->rs1, r1); - if (rs1 == rd2) + Register rd2 = reg.mapReg(op->rd2); + Register rs1 = GetParam(op->rs1, r1); + if (rs1.Is(rd2)) { - MOV(r1, rs1); + ass.Mov(r1, rs1); rs1 = r1; } - eReg rs2 = GetParam(op->rs2, r2); - EOR(rd2, rs2, 1); - LSR(rd2, rd2, 1, true); //C=rs3, rd2=0 - SBC(reg.mapg(op->rd), rd2, rs1, true); // rd2 == 0 - MOV(rd2, 1, CC_CC); + Register rs2 = GetParam(op->rs2, r2); + ass.Eor(rd2, rs2, 1); + ass.Lsr(SetFlags, rd2, rd2, 1); //C=rs3, rd2=0 + ass.Sbc(SetFlags, reg.mapReg(op->rd), rd2, rs1); // rd2 == 0 + ass.Mov(cc, rd2, 1); } break; case shop_shld: - //printf("shld: r%d r%d r%d\n",reg.mapg(op->rd),reg.mapg(op->rs1),reg.mapg(op->rs2)); { verify(!op->rs2.is_imm()); - AND(r0, reg.mapg(op->rs2), 0x8000001F, true); - RSB(r0, r0, 0x80000020, CC_MI); - eReg rs1 = GetParam(op->rs1, r1); - LSR(reg.mapg(op->rd), rs1, r0, CC_MI); - LSL(reg.mapg(op->rd), rs1, r0, CC_PL); - //MOV(reg.mapg(op->rd), reg.mapg(op->rs1), S_LSL, r0, CC_PL); - //MOV(reg.mapg(op->rd), reg.mapg(op->rs1), S_LSR, r0, CC_MI); + ass.And(SetFlags, r0, reg.mapReg(op->rs2), 0x8000001F); + ass.Rsb(mi, r0, r0, 0x80000020); + Register rs1 = GetParam(op->rs1, r1); + ass.Lsr(mi, reg.mapReg(op->rd), rs1, r0); + ass.Lsl(pl, reg.mapReg(op->rd), rs1, r0); } break; case shop_shad: - //printf("shad: r%d r%d r%d\n",reg.mapg(op->rd),reg.mapg(op->rs1),reg.mapg(op->rs2)); { verify(!op->rs2.is_imm()); - AND(r0, reg.mapg(op->rs2), 0x8000001F, true); - RSB(r0, r0, 0x80000020, CC_MI); - eReg rs1 = GetParam(op->rs1, r1); - ASR(reg.mapg(op->rd), rs1, r0, CC_MI); - LSL(reg.mapg(op->rd), rs1, r0, CC_PL); - //MOV(reg.mapg(op->rd), reg.mapg(op->rs1), S_LSL, r0, CC_PL); - //MOV(reg.mapg(op->rd), reg.mapg(op->rs1), S_ASR, r0, CC_MI); + ass.And(SetFlags, r0, reg.mapReg(op->rs2), 0x8000001F); + ass.Rsb(mi, r0, r0, 0x80000020); + Register rs1 = GetParam(op->rs1, r1); + ass.Asr(mi, reg.mapReg(op->rd), rs1, r0); + ass.Lsl(pl, reg.mapReg(op->rd), rs1, r0); } break; case shop_sync_sr: - { //must flush: SRS, SRT, r0-r7, r0b-r7b - CALL((u32)UpdateSR); + call((void *)UpdateSR); break; - } case shop_test: case shop_seteq: @@ -1544,64 +1368,64 @@ static void ngen_compile_opcode(RuntimeBlockInfo* block, shil_opcode* op, bool s case shop_setgt: case shop_setae: case shop_setab: - { - eReg rd = reg.mapg(op->rd); - eReg rs1 = GetParam(op->rs1, r0); - - eReg rs2 = r1; - bool is_imm = false; - - if (op->rs2.is_imm()) { - if (!is_i8r4(op->rs2._imm)) - MOV32(rs2,(u32)op->rs2._imm); + Register rd = reg.mapReg(op->rd); + Register rs1 = GetParam(op->rs1, r0); + + Register rs2 = r1; + bool is_imm = false; + + if (op->rs2.is_imm()) + { + if (!ImmediateA32::IsImmediateA32(op->rs2._imm)) + ass.Mov(rs2, (u32)op->rs2._imm); + else + is_imm = true; + } + else if (op->rs2.is_r32i()) + { + rs2 = reg.mapReg(op->rs2); + } else - is_imm = true; - } - else if (op->rs2.is_r32i()) - { - rs2 = reg.mapg(op->rs2); - } - else - { - ERROR_LOG(DYNAREC, "ngen_Bin ??? %d", op->rs2.type); - verify(false); - } + { + ERROR_LOG(DYNAREC, "ngen_Bin ??? %d", op->rs2.type); + verify(false); + } - if (op->op == shop_test) - { - if (is_imm) - TST(rs1, op->rs2._imm); + if (op->op == shop_test) + { + if (is_imm) + ass.Tst(rs1, op->rs2._imm); + else + ass.Tst(rs1, rs2); + } else - TST(rs1, rs2); - } - else - { - if (is_imm) - CMP(rs1, op->rs2._imm); - else - CMP(rs1, rs2); - } + { + if (is_imm) + ass.Cmp(rs1, op->rs2._imm); + else + ass.Cmp(rs1, rs2); + } - eCC opcls2[]={CC_EQ,CC_EQ,CC_GE,CC_GT,CC_HS,CC_HI }; + static const ConditionType opcls2[] = { eq, eq, ge, gt, hs, hi }; - MOVW(rd, 0); - MOVW(rd, 1, opcls2[op->op-shop_test]); - break; - } + ass.Mov(rd, 0); + ass.Mov(opcls2[op->op-shop_test], rd, 1); + } + break; case shop_setpeq: { - eReg rs1 = GetParam(op->rs1, r1); - eReg rs2 = GetParam(op->rs2, r2); - EOR(r1, rs1, rs2); - MOVW(reg.mapg(op->rd), 0); + Register rs1 = GetParam(op->rs1, r1); + Register rs2 = GetParam(op->rs2, r2); + ass.Eor(r1, rs1, rs2); + ass.Mov(reg.mapReg(op->rd), 0); - TST(r1, 0xFF000000u); - TST(r1, 0x00FF0000u, CC_NE); - TST(r1, 0x0000FF00u, CC_NE); - TST(r1, 0x000000FFu, CC_NE); - MOVW(reg.mapg(op->rd), 1, CC_EQ); + ass.Tst(r1, 0xFF000000u); + ass.Tst(ne, r1, 0x00FF0000u); + ass.Tst(ne, r1, 0x0000FF00u); + ass.Tst(ne, r1, 0x000000FFu); + ass.Mov(eq, reg.mapReg(op->rd), 1); } break; @@ -1609,100 +1433,98 @@ static void ngen_compile_opcode(RuntimeBlockInfo* block, shil_opcode* op, bool s case shop_mul_u16: { - eReg rs2 = GetParam(op->rs2, r2); - UXTH(r1, reg.mapg(op->rs1)); - UXTH(r2, rs2); - MUL(reg.mapg(op->rd), r1, r2); + Register rs2 = GetParam(op->rs2, r2); + ass.Uxth(r1, reg.mapReg(op->rs1)); + ass.Uxth(r2, rs2); + ass.Mul(reg.mapReg(op->rd), r1, r2); } break; case shop_mul_s16: { - eReg rs2 = GetParam(op->rs2, r2); - SXTH(r1, reg.mapg(op->rs1)); - SXTH(r2, rs2); - MUL(reg.mapg(op->rd), r1, r2); + Register rs2 = GetParam(op->rs2, r2); + ass.Sxth(r1, reg.mapReg(op->rs1)); + ass.Sxth(r2, rs2); + ass.Mul(reg.mapReg(op->rd), r1, r2); } break; case shop_mul_i32: { - eReg rs2 = GetParam(op->rs2, r2); + Register rs2 = GetParam(op->rs2, r2); //x86_opcode_class opdt[]={op_movzx16to32,op_movsx16to32,op_mov32,op_mov32,op_mov32}; //x86_opcode_class opmt[]={op_mul32,op_mul32,op_mul32,op_mul32,op_imul32}; //only the top 32 bits are different on signed vs unsigned - MUL(reg.mapg(op->rd), reg.mapg(op->rs1), rs2); + ass.Mul(reg.mapReg(op->rd), reg.mapReg(op->rs1), rs2); } break; case shop_mul_u64: { - eReg rs2 = GetParam(op->rs2, r2); - UMULL(reg.mapg(op->rd2), reg.mapg(op->rd), reg.mapg(op->rs1), rs2); + Register rs2 = GetParam(op->rs2, r2); + ass.Umull(reg.mapReg(op->rd), reg.mapReg(op->rd2), reg.mapReg(op->rs1), rs2); } break; case shop_mul_s64: { - eReg rs2 = GetParam(op->rs2, r2); - SMULL(reg.mapg(op->rd2), reg.mapg(op->rd), reg.mapg(op->rs1), rs2); + Register rs2 = GetParam(op->rs2, r2); + ass.Smull(reg.mapReg(op->rd), reg.mapReg(op->rd2), reg.mapReg(op->rs1), rs2); } break; case shop_pref: { - ConditionCode cc = CC_EQ; + ConditionType cc = eq; if (!op->rs1.is_imm()) { - LSR(r1,reg.mapg(op->rs1),26); - MOV(r0,reg.mapg(op->rs1)); - CMP(r1,0x38); + ass.Lsr(r1, reg.mapReg(op->rs1), 26); + ass.Mov(r0, reg.mapReg(op->rs1)); + ass.Cmp(r1, 0x38); } else { // The SSA pass has already checked that the // destination is a store queue so no need to check - MOV32(r0, op->rs1.imm_value()); - cc = CC_AL; + ass.Mov(r0, op->rs1.imm_value()); + cc = al; } if (CCN_MMUCR.AT) { - CALL((unat)&do_sqw_mmu, cc); + call((void *)do_sqw_mmu, cc); } else { - LDR(r2,r8,rcb_noffs(&do_sqw_nommu)); - SUB(r1,r8,-rcb_noffs(sq_both)); - BLX(r2, cc); + ass.Ldr(r2, MemOperand(r8, rcbOffset(do_sqw_nommu))); + ass.Sub(r1, r8, -rcbOffset(sq_buffer)); + ass.Blx(cc, r2); } } break; case shop_ext_s8: case shop_ext_s16: - { - verify(op->rd.is_r32i()); - verify(op->rs1.is_r32i()); - - (op->op==shop_ext_s8?SXTB:SXTH)(reg.mapg(op->rd),reg.mapg(op->rs1),CC_AL); - } + if (op->op == shop_ext_s8) + ass.Sxtb(reg.mapReg(op->rd), reg.mapReg(op->rs1)); + else + ass.Sxth(reg.mapReg(op->rd), reg.mapReg(op->rs1)); break; case shop_xtrct: { - eReg rd = reg.mapg(op->rd); - eReg rs1 = reg.mapg(op->rs1); - eReg rs2 = reg.mapg(op->rs2); - if (rd == rs1) + Register rd = reg.mapReg(op->rd); + Register rs1 = reg.mapReg(op->rs1); + Register rs2 = reg.mapReg(op->rs2); + if (rd.Is(rs1)) { - verify(rd != rs2); - LSR(rd, rs1, 16); - LSL(r0, rs2, 16); + verify(!rd.Is(rs2)); + ass.Lsr(rd, rs1, 16); + ass.Lsl(r0, rs2, 16); } else { - LSL(rd, rs2, 16); - LSR(r0, rs1, 16); + ass.Lsl(rd, rs2, 16); + ass.Lsr(r0, rs1, 16); } - ORR(rd, rd, r0); + ass.Orr(rd, rd, r0); } break; @@ -1714,188 +1536,160 @@ static void ngen_compile_opcode(RuntimeBlockInfo* block, shil_opcode* op, bool s case shop_fsub: case shop_fmul: case shop_fdiv: - { - FPBinOP* opcds[] = { VADD_VFP, VSUB_VFP, VMUL_VFP, VDIV_VFP }; - ngen_fp_bin(op, opcds[op->op-shop_fadd]); - } - break; + { + static const FPBinOP opcds[] = { + &MacroAssembler::Vadd, &MacroAssembler::Vsub, &MacroAssembler::Vmul, &MacroAssembler::Vdiv + }; + ngen_fp_bin(op, opcds[op->op - shop_fadd]); + } + break; case shop_fabs: case shop_fneg: - { - FPUnOP* opcds[] = { VABS_VFP, VNEG_VFP }; - ngen_fp_una(op, opcds[op->op-shop_fabs]); - } - break; + { + static const FPUnOP opcds[] = { &MacroAssembler::Vabs, &MacroAssembler::Vneg }; + ngen_fp_una(op, opcds[op->op - shop_fabs]); + } + break; case shop_fsqrt: - { - ngen_fp_una(op, VSQRT_F32); - } - break; + ngen_fp_una(op, &MacroAssembler::Vsqrt); + break; case shop_fmac: { - eFSReg rd = reg.mapf(op->rd); - eFSReg rs1 = f1; + SRegister rd = reg.mapFReg(op->rd); + SRegister rs1 = s1; if (op->rs1.is_imm()) { - MOV32(r0, op->rs1.imm_value()); - VMOV(rs1, r0); + ass.Mov(r0, op->rs1.imm_value()); + ass.Vmov(rs1, r0); } else - rs1 = reg.mapf(op->rs1); - eFSReg rs2 = f2; + rs1 = reg.mapFReg(op->rs1); + SRegister rs2 = s2; if (op->rs2.is_imm()) { - MOV32(r1, op->rs2.imm_value()); - VMOV(rs2, r1); + ass.Mov(r1, op->rs2.imm_value()); + ass.Vmov(rs2, r1); } else { - rs2 = reg.mapf(op->rs2); - if (rs2 == rd) + rs2 = reg.mapFReg(op->rs2); + if (rs2.Is(rd)) { - VMOV(f2, rs2); - rs2 = f2; + ass.Vmov(s2, rs2); + rs2 = s2; } } - eFSReg rs3 = f3; + SRegister rs3 = s3; if (op->rs3.is_imm()) { - MOV32(r2, op->rs3.imm_value()); - VMOV(rs3, r2); + ass.Mov(r2, op->rs3.imm_value()); + ass.Vmov(rs3, r2); } else { - rs3 = reg.mapf(op->rs3); - if (rs3 == rd) + rs3 = reg.mapFReg(op->rs3); + if (rs3.Is(rd)) { - VMOV(f3, rs3); - rs3 = f3; + ass.Vmov(s3, rs3); + rs3 = s3; } } - if (rd != rs1) - VMOV(rd, rs1); - VMLA_VFP(rd, rs2, rs3); + if (!rd.Is(rs1)) + ass.Vmov(rd, rs1); + ass.Vmla(rd, rs2, rs3); } break; case shop_fsrra: - { - VMOV(f1,fpu_imm_1); - VSQRT_VFP(f0,reg.mapfs(op->rs1)); - - VDIV_VFP(reg.mapfs(op->rd),f1,f0); - } + ass.Vmov(s1, 1.f); + ass.Vsqrt(s0, reg.mapFReg(op->rs1)); + ass.Vdiv(reg.mapFReg(op->rd), s1, s0); break; case shop_fsetgt: case shop_fseteq: - { - // - #if 1 - { - //this is apparently much faster (tested on A9) - MOVW(reg.mapg(op->rd),0); - VCMP_F32(reg.mapfs(op->rs1),reg.mapfs(op->rs2)); + //this is apparently much faster (tested on A9) + ass.Mov(reg.mapReg(op->rd), 0); + ass.Vcmp(reg.mapFReg(op->rs1), reg.mapFReg(op->rs2)); - VMRS(R15); - if (op->op==shop_fsetgt) - { - MOVW(reg.mapg(op->rd),1,CC_GT); - } - else - { - MOVW(reg.mapg(op->rd),1,CC_EQ); - } - } + ass.Vmrs(RegisterOrAPSR_nzcv(APSR_nzcv), FPSCR); + if (op->op == shop_fsetgt) + ass.Mov(gt, reg.mapReg(op->rd), 1); + else + ass.Mov(eq, reg.mapReg(op->rd), 1); #else - { - if (op->op==shop_fsetgt) - VCGT_F32(d0,reg.mapf(op->rs1),reg.mapf(op->rs2)); - else - VCEQ_F32(d0,reg.mapf(op->rs1),reg.mapf(op->rs2)); + if (op->op == shop_fsetgt) + ass.Vcgt(d0, reg.mapFReg(op->rs1), reg.mapFReg(op->rs2)); + else + ass.Vceq(d0, reg.mapFReg(op->rs1), reg.mapFReg(op->rs2)); - VMOV(r0,f0); - - AND(reg.mapg(op->rd),r0,1); - } + ass.Vmov(r0, s0); + ass.And(reg.mapReg(op->rd), r0, 1); #endif - } - break; + break; case shop_fsca: - { - //r1: base ptr - MOVW(r1,((unat)sin_table)&0xFFFF); - UXTH(r0,reg.mapg(op->rs1)); - MOVT(r1,((u32)sin_table)>>16); - - /* - LDRD(r0,r1,r0,lsl,3); - VMOV.64 - or - ADD(r0,r1,r0,LSL,3); - VLDR(d0,r0); - */ + //r1: base ptr + ass.Mov(r1, (u32)sin_table & 0xFFFF); + ass.Uxth(r0, reg.mapReg(op->rs1)); + ass.Movt(r1, (u32)sin_table >> 16); - //LSL(r0,r0,3); - //ADD(r0,r1,r0); //EMITTER: Todo, add with shifted ! - ADD(r0,r1,r0, S_LSL, 3); - - VLDR(/*reg.mapf(op->rd,0)*/d0,r0,0); - VSTR(d0,r8,op->rd.reg_nofs()/4); - } + ass.Add(r0, r1, Operand(r0, LSL, 3)); + + ass.Vldr(d0, MemOperand(r0)); + ass.Vstr(d0, MemOperand(r8, op->rd.reg_nofs())); break; case shop_fipr: { - eFQReg _r1=q0; - eFQReg _r2=q0; + QRegister _r1 = q0; + QRegister _r2 = q0; - SUB(r0,r8,op->rs1.reg_aofs()); - if (op->rs2.reg_aofs()==op->rs1.reg_aofs()) + ass.Sub(r0, r8, op->rs1.reg_aofs()); + if (op->rs2.reg_aofs() == op->rs1.reg_aofs()) { - VLDM(d0,r0,2); + ass.Vldm(r0, NO_WRITE_BACK, DRegisterList(d0, 2)); } else { - SUB(r1,r8,op->rs2.reg_aofs()); - VLDM(d0,r0,2); - VLDM(d2,r1,2); - _r2=q1; + ass.Sub(r1, r8, op->rs2.reg_aofs()); + ass.Vldm(r0, NO_WRITE_BACK, DRegisterList(d0, 2)); + ass.Vldm(r1, NO_WRITE_BACK, DRegisterList(d2, 2)); + _r2 = q1; } #if 1 //VFP - eFSReg fs2=_r2==q0?f0:f4; + SRegister fs2 = _r2.Is(q0) ? s0 : s4; - VMUL_VFP(reg.mapfs(op->rd),f0,(eFSReg)(fs2+0)); - VMLA_VFP(reg.mapfs(op->rd),f1,(eFSReg)(fs2+1)); - VMLA_VFP(reg.mapfs(op->rd),f2,(eFSReg)(fs2+2)); - VMLA_VFP(reg.mapfs(op->rd),f3,(eFSReg)(fs2+3)); + ass.Vmul(reg.mapFReg(op->rd), s0, fs2); + ass.Vmla(reg.mapFReg(op->rd), s1, SRegister(fs2.GetCode() + 1)); + ass.Vmla(reg.mapFReg(op->rd), s2, SRegister(fs2.GetCode() + 2)); + ass.Vmla(reg.mapFReg(op->rd), s3, SRegister(fs2.GetCode() + 3)); #else - VMUL_F32(q0,_r1,_r2); - VPADD_F32(d0,d0,d1); - VADD_VFP(reg.mapfs(op->rd),f0,f1); + ass.Vmul(q0, _r1, _r2); + ass.Vpadd(d0, d0, d1); + ass.Vadd(reg.mapFReg(op->rd), f0, f1); #endif } break; case shop_ftrv: { - eReg rdp=r1; - SUB(r2,r8,op->rs2.reg_aofs()); - SUB(r1,r8,op->rs1.reg_aofs()); + Register rdp = r1; + ass.Sub(r2, r8, op->rs2.reg_aofs()); + ass.Sub(r1, r8, op->rs1.reg_aofs()); if (op->rs1.reg_aofs() != op->rd.reg_aofs()) { - rdp=r0; - SUB(r0,r8,op->rd.reg_aofs()); + rdp = r0; + ass.Sub(r0, r8, op->rd.reg_aofs()); } #if 1 @@ -1905,178 +1699,116 @@ static void ngen_compile_opcode(RuntimeBlockInfo* block, shil_opcode* op, bool s //f12,f13,f14,f15 : mtx temp //(This is actually faster than using neon) - VLDM(d4,r2,2,1); - VLDM(d0,r1,2); + ass.Vldm(r2, WRITE_BACK, DRegisterList(d4, 2)); + ass.Vldm(r1, NO_WRITE_BACK, DRegisterList(d0, 2)); - VMUL_VFP(f4,f8,f0); - VMUL_VFP(f5,f9,f0); - VMUL_VFP(f6,f10,f0); - VMUL_VFP(f7,f11,f0); + ass.Vmul(s4, vixl::aarch32::s8, s0); + ass.Vmul(s5, s9, s0); + ass.Vmul(s6, s10, s0); + ass.Vmul(s7, s11, s0); - VLDM(d6,r2,2,1); + ass.Vldm(r2, WRITE_BACK, DRegisterList(d6, 2)); - VMLA_VFP(f4,f12,f1); - VMLA_VFP(f5,f13,f1); - VMLA_VFP(f6,f14,f1); - VMLA_VFP(f7,f15,f1); + ass.Vmla(s4, s12, s1); + ass.Vmla(s5, s13, s1); + ass.Vmla(s6, s14, s1); + ass.Vmla(s7, s15, s1); - VLDM(d4,r2,2,1); + ass.Vldm(r2, WRITE_BACK, DRegisterList(d4, 2)); - VMLA_VFP(f4,f8,f2); - VMLA_VFP(f5,f9,f2); - VMLA_VFP(f6,f10,f2); - VMLA_VFP(f7,f11,f2); + ass.Vmla(s4, vixl::aarch32::s8, s2); + ass.Vmla(s5, s9, s2); + ass.Vmla(s6, s10, s2); + ass.Vmla(s7, s11, s2); - VLDM(d6,r2,2); + ass.Vldm(r2, NO_WRITE_BACK, DRegisterList(d6, 2)); - VMLA_VFP(f4,f12,f3); - VMLA_VFP(f5,f13,f3); - VMLA_VFP(f6,f14,f3); - VMLA_VFP(f7,f15,f3); + ass.Vmla(s4, s12, s3); + ass.Vmla(s5, s13, s3); + ass.Vmla(s6, s14, s3); + ass.Vmla(s7, s15, s3); - VSTM(d2,rdp,2); + ass.Vstm(rdp, NO_WRITE_BACK, DRegisterList(d2, 2)); #else //this fits really nicely to NEON ! - VLDM(d16,r2,8); - VLDM(d0,r1,2); + // TODO + ass.Vldm(d16,r2,8); + ass.Vldm(d0,r1,2); - VMUL_F32(q2,q8,d0,0); - VMLA_F32(q2,q9,d0,1); - VMLA_F32(q2,q10,d1,0); - VMLA_F32(q2,q11,d1,1); - VSTM(d4,rdp,2); - - - /* - Alternative mtrx - - 0 1 4 5 - 2 3 6 7 - 8 9 c d - a b e f - - * ABCD - - v0= A*0 + B*4 + C*8 + D*c - v1= A*1 + B*5 + C*9 + D*d - v3= A*2 + B*6 + C*a + D*e - v4= A*3 + B*7 + C*b + D*f - D0 D1 - f0 f1 f2 f3 - 0145 * AABB + 89cd*CCDD = A0+C8|A1+C9|B4+Dc|B5+Dd -> - - v01=D0+D1 = { A0+B4+C8+Dc, A1+B5+C9+Dd } - - AB, CD -> AABB CCDD - - - //in-shuffle - //4 mul - //4 mla - //1 add - */ + ass.Vmla(q2,q8,d0,0); + ass.Vmla(q2,q9,d0,1); + ass.Vmla(q2,q10,d1,0); + ass.Vmla(q2,q11,d1,1); + ass.Vstm(d4,rdp,2); #endif } break; - - case shop_frswap: - { - verify(op->rd._reg==op->rs2._reg); - verify(op->rd2._reg==op->rs1._reg); - - verify(op->rs1.count()==16 && op->rs2.count()==16); - verify(op->rd2.count()==16 && op->rd.count()==16); - - SUB(r0,r8,op->rs1.reg_aofs()); - SUB(r1,r8,op->rd.reg_aofs()); - //Assumes no FPU reg alloc here - //frswap touches all FPU regs, so all spans should be clear here .. - VLDM(d0,r1,8); - VLDM(d8,r0,8); - VSTM(d0,r0,8); - VSTM(d8,r1,8); - } + case shop_frswap: + ass.Sub(r0, r8, op->rs1.reg_aofs()); + ass.Sub(r1, r8, op->rd.reg_aofs()); + //Assumes no FPU reg alloc here + //frswap touches all FPU regs, so all spans should be clear here .. + ass.Vldm(r1, NO_WRITE_BACK, DRegisterList(d0, 8)); + ass.Vldm(r0, NO_WRITE_BACK, DRegisterList(d8, 8)); + ass.Vstm(r0, NO_WRITE_BACK, DRegisterList(d0, 8)); + ass.Vstm(r1, NO_WRITE_BACK, DRegisterList(d8, 8)); break; - - case shop_cvt_f2i_t: - - //printf("f2i: r%d f%d\n",reg.mapg(op->rd),reg.mapf(op->rs1)); - //BKPT(); - VCVT_to_S32_VFP(f0,reg.mapf(op->rs1)); - VMOV(reg.mapg(op->rd),f0); - //shil_chf[op->op](op); - break; - - case shop_cvt_i2f_n: // may be some difference should be made ? - case shop_cvt_i2f_z: - - //printf("i2f: f%d r%d\n",reg.mapf(op->rd),reg.mapg(op->rs1)); - //BKPT(); - VMOV(f0, reg.mapg(op->rs1)); - VCVT_from_S32_VFP(reg.mapfs(op->rd),f0); - //shil_chf[op->op](op); - break; + case shop_cvt_f2i_t: + ass.Vcvt(S32, F32, s0, reg.mapFReg(op->rs1)); + ass.Vmov(reg.mapReg(op->rd), s0); + break; + + case shop_cvt_i2f_n: // may be some difference should be made ? + case shop_cvt_i2f_z: + ass.Vmov(s0, reg.mapReg(op->rs1)); + ass.Vcvt(F32, S32, reg.mapFReg(op->rd), s0); + break; #endif default: - //printf("CFB %d\n",op->op); shil_chf[op->op](op); break; - -__default: - ERROR_LOG(DYNAREC, "@@ Error, Default case (0x%X) in ngen_CompileBlock!", op->op); - verify(false); - break; - } + } } -void ngen_Compile(RuntimeBlockInfo* block, bool force_checks, bool reset, bool staging,bool optimise) +void ngen_Compile(RuntimeBlockInfo* block, bool force_checks, bool reset, bool staging, bool optimise) { - //printf("Compile: %08X, %d, %d\n",block->addr,staging,optimise); - block->code=(DynarecCodeEntryPtr)EMIT_GET_PTR(); + ass = Arm32Assembler((u8 *)emit_GetCCPtr(), emit_FreeSpace()); - //StoreImms(r0,r1,(u32)&last_run_block,(u32)code); //useful when code jumps to random locations ... - ++blockno; + block->code = (DynarecCodeEntryPtr)emit_GetCCPtr(); //reg alloc - reg.DoAlloc(block,alloc_regs,alloc_fpu); + reg.DoAlloc(block, alloc_regs, alloc_fpu); - u8* blk_start=(u8*)EMIT_GET_PTR(); + u8* blk_start = ass.GetCursorAddress(); - if (staging) - { - MOV32(r0,(u32)&block->staging_runs); - LDR(r1,r0); - SUB(r1,r1,1); - STR(r1,r0); - } //pre-load the first reg alloc operations, for better efficiency .. if (!block->oplist.empty()) - reg.OpBegin(&block->oplist[0],0); + reg.OpBegin(&block->oplist[0], 0); //scheduler if (force_checks) { s32 sz = block->sh4_code_size; u32 addr = block->addr; - MOV32(r0,addr); + ass.Mov(r0, addr); while (sz > 0) { if (sz > 2) { - u32* ptr=(u32*)GetMemPtr(addr,4); - if (ptr != NULL) + u32* ptr = (u32*)GetMemPtr(addr, 4); + if (ptr != nullptr) { - MOV32(r2,(u32)ptr); - LDR(r2,r2,0); - MOV32(r1,*ptr); - CMP(r1,r2); + ass.Mov(r2, (u32)ptr); + ass.Ldr(r2, MemOperand(r2)); + ass.Mov(r1, *ptr); + ass.Cmp(r1, r2); - JUMP((u32)ngen_blockcheckfail, CC_NE); + jump(ngen_blockcheckfail, ne); } addr += 4; sz -= 4; @@ -2084,14 +1816,14 @@ void ngen_Compile(RuntimeBlockInfo* block, bool force_checks, bool reset, bool s else { u16* ptr = (u16 *)GetMemPtr(addr, 2); - if (ptr != NULL) + if (ptr != nullptr) { - MOV32(r2, (u32)ptr); - LDRH(r2, r2, 0, AL); - MOVW(r1, *ptr, AL); - CMP(r1, r2); + ass.Mov(r2, (u32)ptr); + ass.Ldrh(r2, MemOperand(r2)); + ass.Mov(r1, *ptr); + ass.Cmp(r1, r2); - JUMP((u32)ngen_blockcheckfail, CC_NE); + jump(ngen_blockcheckfail, ne); } addr += 2; sz -= 2; @@ -2099,31 +1831,25 @@ void ngen_Compile(RuntimeBlockInfo* block, bool force_checks, bool reset, bool s } } - u32 cyc=block->guest_cycles; - if (!is_i8r4(cyc)) - { - cyc&=~3; - } + u32 cyc = block->guest_cycles; + if (!ImmediateA32::IsImmediateA32(cyc)) + cyc &= ~3; -#if defined(__APPLE__) - SUB(r11,r11,cyc,true,CC_AL); -#else - SUB(rfp_r9,rfp_r9,cyc,true,CC_AL); -#endif - CALL((u32)intc_sched, CC_LE); + ass.Sub(SetFlags, r9, r9, cyc); + call(intc_sched, le); //compile the block's opcodes shil_opcode* op; - for (size_t i=0;ioplist.size();i++) + for (size_t i = 0; i < block->oplist.size(); i++) { - op=&block->oplist[i]; + op = &block->oplist[i]; - op->host_offs=(u8*)EMIT_GET_PTR()-blk_start; + op->host_offs = ass.GetCursorOffset(); - if (i!=0) - reg.OpBegin(op,i); + if (i != 0) + reg.OpBegin(op, i); - ngen_compile_opcode(block,op,staging,optimise); + ngen_compile_opcode(block, op, optimise); reg.OpEnd(op); } @@ -2133,65 +1859,44 @@ void ngen_Compile(RuntimeBlockInfo* block, bool force_checks, bool reset, bool s // This will be used when the block in (re)linked const shil_param param = shil_param(reg_sr_T); if (reg.IsAllocg(param)) - { - ((DynaRBI *)block)->T_reg = reg.mapg(param); - } + ((DynaRBI *)block)->T_reg = reg.mapReg(param); else - { - ((DynaRBI *)block)->T_reg = (eReg)-1; - } + ((DynaRBI *)block)->T_reg = Register(); } reg.Cleanup(); + + //try to early-lookup the blocks -- to avoid rewrites in case they exist ... + //this isn't enabled for now, as the shared_ptr to this block doesn't exist yet /* - - extern u32 ralst[4]; - - MOV32(r0,(u32)&ralst[0]); - - LDR(r1,r0,0); - ADD(r1,r1,reg.preload_gpr); - STR(r1,r0,0); - - LDR(r1,r0,4); - ADD(r1,r1,reg.preload_fpu); - STR(r1,r0,4); - - LDR(r1,r0,8); - ADD(r1,r1,reg.writeback_gpr); - STR(r1,r0,8); - - LDR(r1,r0,12); - ADD(r1,r1,reg.writeback_fpu); - STR(r1,r0,12); + if (block->BranchBlock != NullAddress) + { + block->pBranchBlock = bm_GetBlock(block->BranchBlock).get(); + if (block->pNextBlock) + block->pNextBlock->AddRef(block); + } + if (block->NextBlock != NullAddress) + { + block->pNextBlock = bm_GetBlock(block->NextBlock).get(); + if (block->pBranchBlock) + block->pBranchBlock->AddRef(block); + } */ - /* - //try to early-lookup the blocks -- to avoid rewrites in case they exist ... - //this isn't enabled for now, as I'm not quite solid on the state of block referrals .. - - block->pBranchBlock=bm_GetBlock(block->BranchBlock); - block->pNextBlock=bm_GetBlock(block->NextBlock); - if (block->pNextBlock) block->pNextBlock->AddRef(block); - if (block->pBranchBlock) block->pBranchBlock->AddRef(block); - */ - - //Relink written bytes must be added to the count ! - block->relink_offset=(u8*)EMIT_GET_PTR()-(u8*)block->code; - block->relink_data=0; + block->relink_offset = ass.GetCursorOffset(); + block->relink_data = 0; - emit_Skip(block->Relink()); - u8* pEnd = (u8*)EMIT_GET_PTR(); - - // Clear the area we've written to for cache - vmem_platform_flush_cache((void*)block->code, pEnd - 1, (void*)block->code, pEnd - 1); + emit_Skip(block->relink_offset); + emit_Skip(relinkBlock((DynaRBI *)block)); + ass.Finalize(); + u8* pEnd = ass.GetCursorAddress(); //blk_start might not be the same, due to profiling counters .. - block->host_opcodes=(pEnd-blk_start)/4; + block->host_opcodes = (pEnd - blk_start) / 4; //host code size needs to cover the entire range of the block - block->host_code_size=(pEnd-(u8*)block->code); + block->host_code_size = pEnd - (u8*)block->code; } void ngen_ResetBlocks() @@ -2199,26 +1904,110 @@ void ngen_ResetBlocks() INFO_LOG(DYNAREC, "@@ ngen_ResetBlocks()"); } -// FPCB_OFFSET must be i8r4 -#if RAM_SIZE_MAX == 33554432 -static_assert(FPCB_OFFSET == -0x4100000, "Invalid FPCB_OFFSET"); -#elif RAM_SIZE_MAX == 16777216 -static_assert(FPCB_OFFSET == -0x2100000, "Invalid FPCB_OFFSET"); -#endif - void ngen_init() { INFO_LOG(DYNAREC, "Initializing the ARM32 dynarec"); - verify(rcb_noffs(p_sh4rcb->fpcb) == FPCB_OFFSET); - verify(rcb_noffs(p_sh4rcb->sq_buffer) == -512); - verify(rcb_noffs(&next_pc) == -184); - verify(rcb_noffs(&p_sh4rcb->cntx.CpuRunning) == -156); - ngen_FailedToFindBlock = &ngen_FailedToFindBlock_; + ass = Arm32Assembler((u8 *)emit_GetCCPtr(), emit_FreeSpace()); + // Stubs + Label ngen_LinkBlock_Shared_stub; + // ngen_LinkBlock_Generic_stub + ngen_LinkBlock_Generic_stub = ass.GetCursorAddress(); + ass.Mov(r1,r4); // djump/pc -> in case we need it .. + ass.B(&ngen_LinkBlock_Shared_stub); + // ngen_LinkBlock_cond_Branch_stub + ngen_LinkBlock_cond_Branch_stub = ass.GetCursorAddress(); + ass.Mov(r1, 1); + ass.B(&ngen_LinkBlock_Shared_stub); + // ngen_LinkBlock_cond_Next_stub + ngen_LinkBlock_cond_Next_stub = ass.GetCursorAddress(); + ass.Mov(r1, 0); + ass.B(&ngen_LinkBlock_Shared_stub); + // ngen_LinkBlock_Shared_stub + ass.Bind(&ngen_LinkBlock_Shared_stub); + ass.Mov(r0, lr); + ass.Sub(r0, r0, 4); // go before the call + call((void *)rdv_LinkBlock); + ass.Bx(r0); + // ngen_FailedToFindBlock_ + ngen_FailedToFindBlock_ = ass.GetCursorAddress(); + ass.Mov(r0, r4); + call((void *)rdv_FailedToFindBlock); + ass.Bx(r0); + // ngen_blockcheckfail + ngen_blockcheckfail = ass.GetCursorAddress(); + call((void *)rdv_BlockCheckFail); + ass.Bx(r0); + + // Main loop + Label no_updateLabel; + // ngen_mainloop: + mainloop = ass.GetCursorAddress(); + RegisterList savedRegisters = RegisterList::Union( + RegisterList(r4, r5, r6, r7), + RegisterList(r8, r9, r10, r11), + RegisterList(r12, lr)); + { + UseScratchRegisterScope scope(&ass); + scope.ExcludeAll(); + ass.Push(savedRegisters); + } + ass.Mov(r9, SH4_TIMESLICE); // load cycle counter + ass.Mov(r8, r0); // load context + ass.Ldr(r4, MemOperand(r8, rcbOffset(cntx.pc))); // load pc + ass.B(&no_updateLabel); // Go to mainloop ! + // this code is here for fall-through behavior of do_iter + Label do_iter; + Label cleanup; + // intc_sched: + intc_sched = ass.GetCursorAddress(); + ass.Add(r9, r9, SH4_TIMESLICE); + ass.Mov(r4, lr); + call((void *)UpdateSystem); + ass.Mov(lr, r4); + ass.Cmp(r0, 0); + ass.B(ne, &do_iter); + ass.Ldr(r0, MemOperand(r8, rcbOffset(cntx.CpuRunning))); + ass.Cmp(r0, 0); + ass.Bx(ne, lr); + // do_iter: + ass.Bind(&do_iter); + ass.Mov(r0, r4); + call((void *)rdv_DoInterrupts); + ass.Mov(r4, r0); + + // no_update: + no_update = ass.GetCursorAddress(); + ass.Bind(&no_updateLabel); + // next_pc _MUST_ be on r4 *R4 NOT R0 anymore* + ass.Ldr(r0, MemOperand(r8, rcbOffset(cntx.CpuRunning))); + ass.Cmp(r0, 0); + ass.B(eq, &cleanup); + + ass.Sub(r2, r8, -rcbOffset(fpcb)); +#if RAM_SIZE_MAX == 33554432 + ass.Ubfx(r1, r4, 1, 24); // 24+1 bits: 32 MB + // RAM wraps around so if actual RAM size is 16MB, we won't overflow +#elif RAM_SIZE_MAX == 16777216 + ass.Ubfx(r1, r4, 1, 23); // 23+1 bits: 16 MB +#else +#error "Define RAM_SIZE_MAX" +#endif + ass.Ldr(pc, MemOperand(r2, r1, LSL, 2)); + // cleanup: + ass.Bind(&cleanup); + { + UseScratchRegisterScope scope(&ass); + scope.ExcludeAll(); + ass.Pop(savedRegisters); + } + ass.Bx(lr); + + // Memory handlers for (int s=0;s<6;s++) { - void* fn=s==0?(void*)_vmem_ReadMem8SX32: + const void* fn=s==0?(void*)_vmem_ReadMem8SX32: s==1?(void*)_vmem_ReadMem16SX32: s==2?(void*)_vmem_ReadMem32: s==3?(void*)_vmem_WriteMem8: @@ -2234,97 +2023,82 @@ void ngen_init() if (i==1 || i ==2 || i == 3 || i == 4 || i==12 || i==13) continue; - unat v; - if (read) - { - if (i==0) - v=(unat)fn; - else - { - v=(unat)EMIT_GET_PTR(); - MOV(r0,(eReg)(i)); - JUMP((u32)fn); - } - } + const void *v; + if (i == 0) + v = fn; else { - if (i==0) - v=(unat)fn; - else - { - v=(unat)EMIT_GET_PTR(); - MOV(r0,(eReg)(i)); - JUMP((u32)fn); - } + v = ass.GetCursorAddress(); + ass.Mov(r0, Register(i)); + jump(fn); } - _mem_hndl[read][s%3][i]=v; + _mem_hndl[read][s % 3][i] = v; } } for (int optp = SZ_32I; optp <= SZ_64F; optp++) { //r0 to r13 - for (eReg reg = r0; reg <= r13; reg = (eReg)(reg + 1)) + for (int reg = 0; reg <= 13; reg++) { - if (reg == r1 || reg == r2 || reg == r3 || reg == r4 || reg == r12 || reg == r13) + if (reg == 1 || reg == 2 || reg == 3 || reg == 4 || reg == 12 || reg == 13) continue; - if (optp != SZ_32I && reg != r0) + if (optp != SZ_32I && reg != 0) continue; - _mem_hndl_SQ32[optp - SZ_32I][reg] = (unat)EMIT_GET_PTR(); + _mem_hndl_SQ32[optp - SZ_32I][reg] = ass.GetCursorAddress(); if (optp == SZ_64F) { - LSR(r1, r0, 26); - CMP(r1, 0x38); - AND(r1, r0, 0x3F); - ADD(r1, r1, r8); - JUMP((unat)&_vmem_WriteMem64, CC_NE); - STR(r2, r1, rcb_noffs(sq_both)); - STR(r3, r1, rcb_noffs(sq_both) + 4); + ass.Lsr(r1, r0, 26); + ass.Cmp(r1, 0x38); + ass.And(r1, r0, 0x3F); + ass.Add(r1, r1, r8); + jump((void *)&_vmem_WriteMem64, ne); + ass.Strd(r2, r3, MemOperand(r1, rcbOffset(sq_buffer))); } else { - AND(r3, reg, 0x3F); - LSR(r2, reg, 26); - ADD(r3, r3, r8); - CMP(r2, 0x38); - if (reg != r0) - MOV(r0, reg, CC_NE); - JUMP((unat)&_vmem_WriteMem32, CC_NE); - STR(r1, r3, rcb_noffs(sq_both)); + ass.And(r3, Register(reg), 0x3F); + ass.Lsr(r2, Register(reg), 26); + ass.Add(r3, r3, r8); + ass.Cmp(r2, 0x38); + if (reg != 0) + ass.Mov(ne, r0, Register(reg)); + jump((void *)&_vmem_WriteMem32, ne); + ass.Str(r1, MemOperand(r3, rcbOffset(sq_buffer))); } - BX(LR); + ass.Bx(lr); } } + ass.Finalize(); + emit_Skip(ass.GetBuffer()->GetSizeInBytes()); - INFO_LOG(DYNAREC, "readm helpers: up to %p", EMIT_GET_PTR()); + ngen_FailedToFindBlock = ngen_FailedToFindBlock_; + + INFO_LOG(DYNAREC, "readm helpers: up to %p", ass.GetCursorAddress()); emit_SetBaseAddr(); - ccmap[shop_test]=CC_EQ; - ccnmap[shop_test]=CC_NE; + ccmap[shop_test] = eq; + ccnmap[shop_test] = ne; - ccmap[shop_seteq]=CC_EQ; - ccnmap[shop_seteq]=CC_NE; + ccmap[shop_seteq] = eq; + ccnmap[shop_seteq] = ne; - ccmap[shop_setge]=CC_GE; - ccnmap[shop_setge]=CC_LT; + ccmap[shop_setge] = ge; + ccnmap[shop_setge] = lt; - ccmap[shop_setgt]=CC_GT; - ccnmap[shop_setgt]=CC_LE; + ccmap[shop_setgt] = gt; + ccnmap[shop_setgt] = le; - ccmap[shop_setae]=CC_HS; - ccnmap[shop_setae]=CC_LO; - - ccmap[shop_setab]=CC_HI; - ccnmap[shop_setab]=CC_LS; - - //ccmap[shop_fseteq]=CC_EQ; - //ccmap[shop_fsetgt]=CC_GT; + ccmap[shop_setae] = hs; + ccnmap[shop_setae] = lo; + ccmap[shop_setab] = hi; + ccnmap[shop_setab] = ls; } diff --git a/core/rec-ARM64/rec_arm64.cpp b/core/rec-ARM64/rec_arm64.cpp index 9fd55da86..6c98dd0d6 100644 --- a/core/rec-ARM64/rec_arm64.cpp +++ b/core/rec-ARM64/rec_arm64.cpp @@ -1946,25 +1946,7 @@ private: else { Mov(w1, reg2); - - switch(size) - { - case 1: - GenCallRuntime((void (*)())ptr); - break; - - case 2: - GenCallRuntime((void (*)())ptr); - break; - - case 4: - GenCallRuntime((void (*)())ptr); - break; - - default: - die("Invalid size"); - break; - } + GenCallRuntime((void (*)())ptr); } }