BizHawk/libmeteor/source/interpreter_arm.cpp

1478 lines
32 KiB
C++

// Meteor - A Nintendo Gameboy Advance emulator
// Copyright (C) 2009-2011 Philippe Daouadi
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#ifndef __INTERPRETER_ARM_H__
#define __INTERPRETER_ARM_H__
/*
- From GBATEK -
ARM Binary Opcode Format
|..3 ..................2 ..................1 ..................0|
|1_0_9_8_7_6_5_4_3_2_1_0_9_8_7_6_5_4_3_2_1_0_9_8_7_6_5_4_3_2_1_0|
|_Cond__|0_0_0|___Op__|S|__Rn___|__Rd___|__Shift__|Typ|0|__Rm___| DataProc
|_Cond__|0_0_0|___Op__|S|__Rn___|__Rd___|__Rs___|0|Typ|1|__Rm___| DataProc
|_Cond__|0_0_1|___Op__|S|__Rn___|__Rd___|_Shift_|___Immediate___| DataProc
|_Cond__|0_0_1_1_0|P|1|0|_Field_|__Rd___|_Shift_|___Immediate___| PSR Imm
|_Cond__|0_0_0_1_0|P|L|0|_Field_|__Rd___|0_0_0_0|0_0_0_0|__Rm___| PSR Reg
|_Cond__|0_0_0_1_0_0_1_0_1_1_1_1_1_1_1_1_1_1_1_1|0_0|L|1|__Rn___| BX,BLX
|1_1_1_0|0_0_0_1_0_0_1_0|_____immediate_________|0_1_1_1|_immed_| BKPT ARM9
|_Cond__|0_0_0_1_0_1_1_0_1_1_1_1|__Rd___|1_1_1_1|0_0_0_1|__Rm___| CLZ ARM9
|_Cond__|0_0_0_1_0|Op_|0|__Rn___|__Rd___|0_0_0_0|0_1_0_1|__Rm___| QALU ARM9
|_Cond__|0_0_0_0_0_0|A|S|__Rd___|__Rn___|__Rs___|1_0_0_1|__Rm___| Multiply
|_Cond__|0_0_0_0_1|U|A|S|_RdHi__|_RdLo__|__Rs___|1_0_0_1|__Rm___| MulLong
|_Cond__|0_0_0_1_0|Op_|0|Rd/RdHi|Rn/RdLo|__Rs___|1|y|x|0|__Rm___| MulHalf
|_Cond__|0_0_0_1_0|B|0_0|__Rn___|__Rd___|0_0_0_0|1_0_0_1|__Rm___| TransSwp12
|_Cond__|0_0_0|P|U|0|W|L|__Rn___|__Rd___|0_0_0_0|1|S|H|1|__Rm___| TransReg10
|_Cond__|0_0_0|P|U|1|W|L|__Rn___|__Rd___|OffsetH|1|S|H|1|OffsetL| TransImm10
|_Cond__|0_1_0|P|U|B|W|L|__Rn___|__Rd___|_________Offset________| TransImm9
|_Cond__|0_1_1|P|U|B|W|L|__Rn___|__Rd___|__Shift__|Typ|0|__Rm___| TransReg9
|_Cond__|0_1_1|________________xxx____________________|1|__xxx__| Undefined
|_Cond__|1_0_0|P|U|S|W|L|__Rn___|__________Register_List________| BlockTrans
|_Cond__|1_0_1|L|___________________Offset______________________| B,BL,BLX
|_Cond__|1_1_0|P|U|N|W|L|__Rn___|__CRd__|__CP#__|____Offset_____| CoDataTrans
|_Cond__|1_1_0_0_0_1_0|L|__Rn___|__Rd___|__CP#__|_CPopc_|__CRm__| CoRR ARM9
|_Cond__|1_1_1_0|_CPopc_|__CRn__|__CRd__|__CP#__|_CP__|0|__CRm__| CoDataOp
|_Cond__|1_1_1_0|CPopc|L|__CRn__|__Rd___|__CP#__|_CP__|1|__CRm__| CoRegTrans
|_Cond__|1_1_1_1|_____________Ignored_by_Processor______________| SWI
*/
#include "ameteor/interpreter.hpp"
#include "globals.hpp"
#include "cpu_globals.hpp"
#include "ameteor.hpp"
#include "debug.hpp"
#define Rn ((code >> 16) & 0xF)
#define Rd ((code >> 12) & 0xF)
#define Rs ((code >> 8) & 0xF)
#define Rm (code & 0xF)
// Load/Store immediate offset
#define LSOff (code & 0xFFF)
#ifdef METDEBUG
# define NOT_PC(reg) \
if (reg == 15) \
met_abort("Register is PC")
# define NOT_PC_ALL() \
NOT_PC(Rn); \
NOT_PC(Rd); \
NOT_PC(Rs); \
NOT_PC(Rm)
# define NOT_SAME2(reg1, reg2) \
if (reg1 == reg2) \
met_abort("Two same registers")
# define NOT_SAME3(reg1, reg2, reg3) \
NOT_SAME2(reg1, reg2); \
NOT_SAME2(reg2, reg3); \
NOT_SAME2(reg1, reg3)
#else
# define NOT_PC(reg) {}
# define NOT_PC_ALL() {}
# define NOT_SAME2(reg1, reg2) {}
# define NOT_SAME3(reg1, reg2, reg3) {}
#endif
#define ARM(name) \
inline void Interpreter::a##name ()
#define NIARM(name) \
void Interpreter::a##name ()
namespace AMeteor
{
// Branch and Exchange
ARM(BXBLX)
{
if ((code & 0x0FFFFF00) != 0x012FFF00)
met_abort("Bits 8-27 must be 0001 00101111 11111111 for BX/BLX instructions");
if (Rm == 15)
met_abort("Branching on PC is undefined");
if (code & (0x1 << 5)) // BLX
{
R(14) = R(15);
met_abort("BLX not completly implemented");
}
// else BX
if (R(Rm) & 0x1)
{
FLAG_T = 1;
R(15) = R(Rm) + 1;
CYCLES32NSeq(R(15), 3);
}
else
{
if (R(Rm) & 0x3)
met_abort("BX with ARM on non 32 bit aligned address");
R(15) = R(Rm)+4;
CYCLES32NSeq(R(15), 3);
}
}
// Branch and Branch with Link
ARM(BBL)
{
if (((code >> 25) & 0x7) != 0x5)
met_abort("Bits 25-27 must be 101 for B/BL/BLX instructions");
if (((code >> 28) & 0xF) == 0xF)
met_abort("BLX not implemented");
int32_t off = code & 0x00FFFFFF;
// Extend the sign bit
off <<= 8;
off >>= 8;
// off is in steps of 4
off *= 4;
if (code & (0x1 << 24)) // BL
{
// R(15) points two instructions later, R(14) should point to next
// instruction
// LR = PC + 4 and R15 = PC + 8
R(14) = R(15) - 4 ;
}
// else B
R(15) += off + 4;
CYCLES32NSeq(R(15), 3);
}
// Data Processing
NIARM(_DataProcShiftImm)
{
if ((code >> 26) & 0x3)
met_abort("Bits 26-27 must be 00 for DataProc instructions");
uint8_t shift;
uint32_t op2 = 0; // to avoid a warning
bool shiftcarry = FLAG_C;
uint8_t rd = Rd;
uint8_t rm = Rm;
if (rm == 15 && !(code & (0x1 << 20)) && (code & (0x1 << 4)))
met_abort("Rm = 15, not implemented");
shift = (code >> 7) & 0x1F;
switch ((code >> 5) & 0x3)
{
case 0: // Logical Shift Left
if (shift)
{
op2 = R(rm) << shift;
shiftcarry = (R(rm) >> (32-shift)) & 0x1;
}
else // LSL#0
op2 = R(rm);
break;
case 1: // Logical Shift Right
if (shift)
{
op2 = R(rm) >> shift;
shiftcarry = (R(rm) >> (shift-1)) & 0x1;
}
else // LSR#32
{
op2 = 0;
shiftcarry = R(rm) >> 31;
}
break;
case 2: // Arithmetic Shift Right
if (shift)
{
op2 = ((int32_t)R(rm)) >> shift;
shiftcarry = (((int32_t)R(rm)) >> (shift-1)) & 0x1;
}
else // ASR#32
{
op2 = ((int32_t)R(rm)) >> 31;
shiftcarry = op2 & 0x1;
}
break;
case 3: // ROtate Right
if (shift)
{
op2 = ROR(R(rm), shift);
shiftcarry = op2 >> 31;
}
else // RRX#1
{
shiftcarry = R(rm) & 0x1;
op2 = (FLAG_C << 31) | (R(rm) >> 1);
}
break;
}
a_DataProcCore(rd, R(Rn), op2, shiftcarry);
}
NIARM(_DataProcShiftReg)
{
if ((code >> 26) & 0x3)
met_abort("Bits 26-27 must be 00 for DataProc instructions");
if (code & (0x1 << 7))
met_abort("Bit 7 must be 0 for DataProc with shift by register instructions");
uint8_t shift;
uint32_t op1, op2 = 0; // to avoid a warning
bool shiftcarry = FLAG_C;
uint8_t rd = Rd;
uint8_t rm = Rm;
if (rm == 15 && !(code & (0x1 << 20)) && (code & (0x1 << 4)))
met_abort("Rm = 15, not implemented");
op1 = R(Rn);
NOT_PC(Rs);
ICYCLES(1);
if (Rn == 15)
op1 += 4;
shift = R(Rs) & 0xFF; // only first byte used
if (shift)
switch ((code >> 5) & 0x3)
{
case 0: // Logical Shift Left
if (shift == 32)
{
op2 = 0;
shiftcarry = R(rm) & 0x1;
}
else if (shift < 32)
{
op2 = R(rm) << shift;
shiftcarry = (R(rm) >> (32-shift)) & 0x1;
}
else
{
op2 = 0;
shiftcarry = 0;
}
break;
case 1: // Logical Shift Right
if (shift == 32)
{
op2 = 0;
shiftcarry = R(rm) >> 31;
}
else if (shift < 32)
{
op2 = R(rm) >> shift;
shiftcarry = (R(rm) >> (shift-1)) & 0x1;
}
else
{
op2 = 0;
shiftcarry = 0;
}
break;
case 2: // Arithmetic Shift Right
if (shift >= 32)
{
op2 = ((int32_t)R(rm)) >> 31;
shiftcarry = op2 & 0x1;
}
else
{
op2 = ((int32_t)R(rm)) >> shift;
shiftcarry = (((int32_t)R(rm)) >> (shift-1)) & 0x1;
}
break;
case 3: // ROtate Right
op2 = ROR(R(rm), shift % 32);
shiftcarry = op2 >> 31;
break;
}
else
op2 = R(rm);
a_DataProcCore(rd, op1, op2, shiftcarry);
}
NIARM(_DataProcImm)
{
if ((code >> 26) & 0x3)
met_abort("Bits 26-27 must be 00 for DataProc instructions");
uint32_t op2;
bool shiftcarry = FLAG_C;
uint8_t rd = Rd;
uint8_t shift = ((code >> 8) & 0xF);
if (shift)
{
op2 = ROR(code & 0xFF, shift * 2);
shiftcarry = op2 >> 31;
}
else
op2 = code & 0xFF;
a_DataProcCore(rd, R(Rn), op2, shiftcarry);
}
inline void Interpreter::a_DataProcCore(uint8_t rd,
uint32_t op1, uint32_t op2, bool shiftcarry)
{
uint8_t opcode = (code >> 21) & 0xF;
if (opcode < 0x8 || opcode > 0xB)
{
}
else if (!((code >> 20) & 0x1) || (rd != 0x0 && rd != 0xF))
met_abort("Set condition bit not set for test operation or Rd not acceptable for a test");
if ((opcode == 0xD || opcode == 0xF) && Rn)
met_abort("Rn not null for MOV or MVN");
#ifndef X86_ASM
uint32_t res;
#endif
if (code & (0x1 << 20)) // if set condition
{
switch (opcode)
{
case 0x0 : // AND
#ifdef X86_ASM
asm("andl %4, %3\n"
"setzb %1\n"
"setsb %2\n"
:"=r"(R(rd)), "=m"(FLAG_Z), "=m"(FLAG_N)
:"0"(op1), "r"(op2));
#else
res = R(rd) = op1 & op2;
FLAG_Z = !res;
FLAG_N = res >> 31;
#endif
FLAG_C = shiftcarry;
break;
case 0x1 : // EOR
#ifdef X86_ASM
asm("xorl %4, %3\n"
"setzb %1\n"
"setsb %2\n"
:"=r"(R(rd)), "=m"(FLAG_Z), "=m"(FLAG_N)
:"0"(op1), "r"(op2));
#else
res = R(rd) = op1 ^ op2;
FLAG_Z = !res;
FLAG_N = res >> 31;
#endif
FLAG_C = shiftcarry;
break;
case 0x2 : // SUB
#ifdef X86_ASM
asm("subl %6, %5\n"
"setzb %1\n"
"setsb %2\n"
"setncb %3\n"
"setob %4\n"
:"=r"(R(rd)), "=m"(FLAG_Z), "=m"(FLAG_N), "=m"(FLAG_C),
"=m"(FLAG_V)
:"0"(op1), "r"(op2));
#else
res = R(rd) = op1 - op2;
FLAG_Z = !res;
FLAG_N = res >> 31;
FLAG_C = SUBCARRY(op1, op2, res);
FLAG_V = SUBOVERFLOW(op1, op2, res);
#endif
break;
case 0x3 : // RSB
#ifdef X86_ASM
asm("subl %6, %5\n"
"setzb %1\n"
"setsb %2\n"
"setncb %3\n"
"setob %4\n"
:"=r"(R(rd)), "=m"(FLAG_Z), "=m"(FLAG_N), "=m"(FLAG_C),
"=m"(FLAG_V)
:"0"(op2), "r"(op1));
#else
res = R(rd) = op2 - op1;
FLAG_Z = !res;
FLAG_N = res >> 31;
FLAG_C = SUBCARRY(op1, op2, res);
FLAG_V = SUBOVERFLOW(op1, op2, res);
#endif
break;
case 0x4 : // ADD
#ifdef X86_ASM
asm("addl %6, %5\n"
"setzb %1\n"
"setsb %2\n"
"setcb %3\n"
"setob %4\n"
:"=r"(R(rd)), "=m"(FLAG_Z), "=m"(FLAG_N), "=m"(FLAG_C),
"=m"(FLAG_V)
:"0"(op1), "r"(op2));
#else
res = R(rd) = op1 + op2;
FLAG_Z = !res;
FLAG_N = res >> 31;
FLAG_C = ADDCARRY(op1, op2, res);
FLAG_V = ADDOVERFLOW(op1, op2, res);
#endif
break;
case 0x5 : // ADC
// TODO test on hardware how overflow and carry work for this
// instruction
#ifdef X86_ASM
asm("addl %6, %5\n"
"setzb %1\n"
"setsb %2\n"
"setcb %3\n"
"setob %4\n"
:"=r"(R(rd)), "=m"(FLAG_Z), "=m"(FLAG_N), "=m"(FLAG_C),
"=m"(FLAG_V)
:"0"(op1+FLAG_C), "r"(op2));
#else
res = R(rd) = op1 + op2 + FLAG_C;
FLAG_Z = !res;
FLAG_N = res >> 31;
FLAG_C = ADDCARRY(op1, op2, res);
FLAG_V = ADDOVERFLOW(op1, op2, res);
#endif
break;
case 0x6 : // SBC
// TODO test on hardware how overflow and carry work for this
// instruction
#ifdef X86_ASM
asm("subl %6, %5\n"
"setzb %1\n"
"setsb %2\n"
"setncb %3\n"
"setob %4\n"
:"=r"(R(rd)), "=m"(FLAG_Z), "=m"(FLAG_N), "=m"(FLAG_C),
"=m"(FLAG_V)
:"0"(op1+FLAG_C-1), "r"(op2));
#else
res = R(rd) = op1 - op2 + FLAG_C - 1;
FLAG_Z = !res;
FLAG_N = res >> 31;
FLAG_C = SUBCARRY(op1, op2, res);
FLAG_V = SUBOVERFLOW(op1, op2, res);
#endif
break;
case 0x7 : // RSC
#ifdef X86_ASM
asm("subl %6, %5\n"
"setzb %1\n"
"setsb %2\n"
"setncb %3\n"
"setob %4\n"
:"=r"(R(rd)), "=m"(FLAG_Z), "=m"(FLAG_N), "=m"(FLAG_C),
"=m"(FLAG_V)
:"0"(op2+FLAG_C-1), "r"(op1));
#else
res = R(rd) = op2 - op1 + FLAG_C - 1;
FLAG_Z = !res;
FLAG_N = res >> 31;
FLAG_C = SUBCARRY(op1, op2, res);
FLAG_V = SUBOVERFLOW(op1, op2, res);
#endif
break;
case 0x8 : // TST
#ifdef X86_ASM
asm("testl %3, %2\n"
"setzb %0\n"
"setsb %1\n"
:"=m"(FLAG_Z), "=m"(FLAG_N)
:"r"(op1), "r"(op2));
#else
res = op1 & op2;
FLAG_Z = !res;
FLAG_N = res >> 31;
#endif
SETFB(C, shiftcarry);
break;
case 0x9 : // TEQ
#ifdef X86_ASM
asm("xorl %3, %2\n"
"setzb %0\n"
"setsb %1\n"
:"=m"(FLAG_Z), "=m"(FLAG_N)
:"r"(op1), "r"(op2)
:"2");
#else
res = op1 ^ op2;
FLAG_Z = !res;
FLAG_N = res >> 31;
#endif
SETFB(C, shiftcarry);
break;
case 0xA : // CMP
#ifdef X86_ASM
asm("cmpl %5, %4\n"
"setzb %0\n"
"setsb %1\n"
"setncb %2\n"
"setob %3\n"
:"=m"(FLAG_Z), "=m"(FLAG_N), "=m"(FLAG_C), "=m"(FLAG_V)
:"r"(op1), "r"(op2));
#else
res = op1 - op2;
FLAG_Z = !res;
FLAG_N = res >> 31;
FLAG_C = SUBCARRY(op1, op2, res);
FLAG_V = SUBOVERFLOW(op1, op2, res);
#endif
break;
case 0xB : // CMN
#ifdef X86_ASM
asm("addl %5, %4\n"
"setzb %0\n"
"setsb %1\n"
"setcb %2\n"
"setob %3\n"
:"=m"(FLAG_Z), "=m"(FLAG_N), "=m"(FLAG_C), "=m"(FLAG_V)
:"r"(op1), "r"(op2)
:"4");
#else
res = op1 + op2;
FLAG_Z = !res;
FLAG_N = res >> 31;
FLAG_C = ADDCARRY(op1, op2, res);
FLAG_V = ADDOVERFLOW(op1, op2, res);
#endif
break;
case 0xC : // ORR
#ifdef X86_ASM
asm("orl %4, %3\n"
"setzb %1\n"
"setsb %2\n"
:"=r"(R(rd)), "=m"(FLAG_Z), "=m"(FLAG_N)
:"0"(op1), "r"(op2));
#else
res = R(rd) = op1 | op2;
FLAG_Z = !res;
FLAG_N = res >> 31;
#endif
FLAG_C = shiftcarry;
break;
case 0xD : // MOV
#ifdef X86_ASM
R(rd) = op2;
asm("testl %2, %2\n"
"setzb %0\n"
"setsb %1\n"
:"=m"(FLAG_Z), "=m"(FLAG_N)
:"r"(op2));
#else
res = R(rd) = op2;
FLAG_Z = !res;
FLAG_N = res >> 31;
#endif
FLAG_C = shiftcarry;
break;
case 0xE : // BIC
#ifdef X86_ASM
asm("not %3\n"
"andl %4, %3\n"
"setzb %1\n"
"setsb %2\n"
:"=r"(R(rd)), "=m"(FLAG_Z), "=m"(FLAG_N)
:"0"(op2), "r"(op1));
#else
res = R(rd) = op1 & ~op2;
FLAG_Z = !res;
FLAG_N = res >> 31;
#endif
FLAG_C = shiftcarry;
break;
case 0xF : // MVN
#ifdef X86_ASM
asm("xorl $0xffffffff, %3\n"
"setzb %1\n"
"setsb %2\n"
:"=r"(R(rd)), "=m"(FLAG_Z), "=m"(FLAG_N)
:"0"(op2));
#else
res = R(rd) = ~op2;
FLAG_Z = !res;
FLAG_N = res >> 31;
#endif
FLAG_C = shiftcarry;
break;
}
if (rd == 15)
CPU.SwitchModeBack();
}
else
switch (opcode)
{
case 0x0 : // AND
R(rd) = op1 & op2;
break;
case 0x1 : // EOR
R(rd) = op1 ^ op2;
break;
case 0x2 : // SUB
R(rd) = op1 - op2;
break;
case 0x3 : // RSB
R(rd) = op2 - op1;
break;
case 0x4 : // ADD
R(Rd) = op1 + op2;
break;
case 0x5 : // ADC
R(rd) = op1 + op2 + FLAG_C;
break;
case 0x6 : // SBC
R(rd) = op1 - op2 + FLAG_C - 1;
break;
case 0x7 : // RSC
R(rd) = op2 - op1 + FLAG_C - 1;
break;
case 0x8 : // TST
case 0x9 : // TEQ
case 0xA : // CMP
case 0xB : // CMN
met_abort("Comparison or test without set flags bit");
break;
case 0xC : // ORR
R(rd) = op1 | op2;
break;
case 0xD : // MOV
R(rd) = op2;
break;
case 0xE : // BIC
R(rd) = op1 & ~op2;
break;
case 0xF : // MVN
R(rd) = ~op2;
break;
}
if (rd == 15 && (opcode < 0x8 || opcode > 0xB))
{
if (FLAG_T)
{
CYCLES16NSeq(R(15), 3);
R(15) += 2;
}
else
{
CYCLES32NSeq(R(15), 3);
R(15) += 4;
}
}
else
CYCLES32Seq(R(15), 1);
if (opcode >= 0x8 && opcode <= 0xB && rd == 0xF)
met_abort("P test instruction (not implemented)");
}
// PSR Transfer (MRS, MSR)
ARM(PSR)
{
if ((code >> 26) & 0x3)
met_abort("Bits 26-27 must be 00 for PSR instructions");
if (((code >> 23) & 0x3) != 0x2)
met_abort("Bits 23-24 must be 10 for PSR instructions");
if (code & (0x1 << 20))
met_abort("Bit 20 must be 0 for PSR instructions");
bool oncpsr = !(code & (0x1 << 22));
if (oncpsr)
CPU.UpdateCpsr();
uint32_t& psr = oncpsr ? CPSR : SPSR;
if (code & (0x1 << 21)) // MSR
{
if (((code >> 12) & 0xF) != 0xF)
met_abort("Bits 12-15 must be 0xF for MSR instruction");
uint32_t val;
if (code & (0x1 << 25))
{
//val = ROR(code & 0xF, ((code >> 8) & 0x4) * 2);
val = ROR(code & 0xF, (code >> 7) & 0x6);
}
else
{
if ((code >> 4) & 0xFF)
met_abort("Bits 4-11 must be 0 for MSR instruction");
val = R(Rm);
}
if (!(code & (0x1 << 19)))
val = (val & 0x00FFFFFF) | (psr & 0xFF000000);
if (!(code & (0x1 << 18)))
val = (val & 0xFF00FFFF) | (psr & 0x00FF0000);
if (!(code & (0x1 << 17)))
val = (val & 0xFFFF00FF) | (psr & 0x0000FF00);
if (!(code & (0x1 << 16)))
val = (val & 0xFFFFFF00) | (psr & 0x000000FF);
else if (oncpsr &&
(psr & 0x1F) != (val & 0x1F)) // have we changed mode ?
CPU.SwitchToMode(val & 0x1F);
psr = val;
if (oncpsr)
{
CPU.UpdateICpsr();
CPU.CheckInterrupt();
}
}
else // MRS
{
if ((code >> 25) & 0x1)
met_abort("Bit 25 must be 0 for MRS instruction");
if (((code >> 16) & 0xF) != 0xF)
met_abort("Bits 16-19 must be 0xF for MRS instruction");
if (code & 0xFFF)
met_abort("Bits 0-11 must be 0 for MRS instruction");
R(Rd) = psr;
}
CYCLES32Seq(R(15), 1);
}
// Multiply and Multiply-Accumulate (MUL,MLA)
ARM(_Multiply)
{
// NOTE : In this instruction Rn and Rd are inverted
if ((code >> 25) & 0x7)
met_abort("Bits 25-27 must be 000 for Multiply instructions");
if (code & (0x1 << 24))
{
if (!(code & (0x1 << 7)))
met_abort("Bit 7 must be 1 for halfword multiply");
if (code & (0x1 << 4))
met_abort("Bit 7 must be 0 for halfword multiply");
}
else
{
if (((code >> 4) & 0xF) != 0x9)
met_abort("Bits 4-7 must be 1001 for non halfword multiplies");
}
NOT_PC_ALL();
switch ((code >> 21) & 0xF)
{
case 0x0 : // MUL
if (Rd != 0)
met_abort("Rd must be 0 for MUL instructions");
NOT_SAME2(Rn, Rm);
R(Rn) = R(Rm)*R(Rs);
if (code & (0x1 << 20))
{
FZ(R(Rn));
FN(R(Rn));
}
MULICYCLES(Rs);
CYCLES32Seq(R(15), 1);
break;
case 0x1 : // MLA
NOT_SAME2(Rn, Rm);
R(Rn) = R(Rm)*R(Rs)+R(Rd);
if (code & (0x1 << 20))
{
FZ(R(Rn));
FN(R(Rn));
}
MULICYCLES(Rs);
ICYCLES(1);
CYCLES32Seq(R(15), 1);
break;
case 0x4 : // UMULL
{
NOT_SAME3(Rn, Rd, Rm);
uint64_t out = ((uint64_t)R(Rm))*((uint64_t)R(Rs));
R(Rn) = out >> 32;
R(Rd) = out & 0xFFFFFFFF;
if (code & (0x1 << 20))
{
FZ(out);
FN(R(Rn));
}
}
MULICYCLES(Rs);
ICYCLES(1);
CYCLES32Seq(R(15), 1);
break;
case 0x5 : // UMLAL
{
NOT_SAME3(Rn, Rd, Rm);
uint64_t out =
((uint64_t)R(Rm)) * ((uint64_t)R(Rs)) +
((((uint64_t)R(Rn)) << 32) | ((uint64_t)R(Rd)));
R(Rn) = out >> 32;
R(Rd) = out & 0xFFFFFFFF;
if (code & (0x1 << 20))
{
FZ(out);
FN(R(Rn));
}
}
MULICYCLES(Rs);
ICYCLES(2);
CYCLES32Seq(R(15), 1);
break;
case 0x6 : // SMULL
{
NOT_SAME3(Rn, Rd, Rm);
int64_t out = ((int64_t)(int32_t)R(Rm)) * ((int64_t)(int32_t)R(Rs));
R(Rn) = out >> 32;
R(Rd) = out & 0xFFFFFFFF;
if (code & (0x1 << 20))
{
FZ(out);
FN(R(Rn));
}
}
MULICYCLES(Rs);
ICYCLES(1);
CYCLES32Seq(R(15), 1);
break;
case 0x7 : // SMLAL
{
NOT_SAME3(Rn, Rd, Rm);
int64_t out = ((int64_t)(int32_t)R(Rm)) * ((int64_t)(int32_t)R(Rs))
+ ((((int64_t)R(Rn)) << 32) | ((int64_t)R(Rd)));
R(Rn) = out >> 32;
R(Rd) = out & 0xFFFFFFFF;
if (code & (0x1 << 20))
{
FZ(out);
FN(R(Rn));
}
}
MULICYCLES(Rs);
ICYCLES(2);
CYCLES32Seq(R(15), 1);
break;
case 0x8 : // SMLAxy
case 0x9 : // SMLAW/SMULW
case 0xA : // SMLALxy
case 0xB : // SMULxy
default :
met_abort("Not implemented multiply instruction or unknown");
}
}
// Single Data Transfer (LDR, STR, PLD)
// Load and store
// FIXME : should this support Prepare Cache for Load instructions ?
ARM(LDRSTR)
{
if (((code >> 28) & 0xF) == 0xF)
met_abort("PLD instructions not implemented");
if (((code >> 26) & 0x3) != 0x1)
met_abort("Bits 26-27 must be 01 for LDR/STR instructions");
uint32_t offset;
if (code & (0x1 << 25)) // register offset
{
if (code & (0x1 << 4))
met_abort("Bit 4 must be 0 for LDR or STR instruction with register offset");
offset = (code >> 7) & 0x1F;
switch ((code >> 5) & 0x3)
{
case 0: // Logical Shift Left
if (offset)
offset = R(Rm) << offset;
else
offset = R(Rm);
break;
case 1: // Logical Shift Right
if (offset)
offset = R(Rm) >> offset;
else
offset = 0;
break;
case 2: // Arithmetic Shift Right
if (offset)
offset = ((int32_t)R(Rm)) >> offset;
else
{
if (R(Rm) >> 31)
offset = 0xFFFFFFFF;
else
offset = 0;
}
break;
case 3: // ROtate Right
if (offset)
offset = ROR(R(Rm), offset);
else
offset = (FLAG_C << 31) | (R(Rm) >> 1);
break;
}
}
else // immediate offset
{
offset = code & 0xFFF;
}
// bit 24 : 0 = add offset after and write-back, 1 = add offset before
uint32_t add = R(Rn);
if (code & (0x1 << 24))
{
if (code & (0x1 << 23))
add += offset;
else
add -= offset;
}
/* bit 22 : 0 = write word, 1 = write byte
* bit 20 : 0 = store, 1 = load */
if (code & (0x1 << 22))
{
if (code & (0x1 << 20)) // LDRB
{
R(Rd) = MEM.Read8(add);
if (Rd == 15)
met_abort("LDRB to R15 !");
CYCLES16NSeq(add, 1);
ICYCLES(1);
CYCLES32Seq(R(15), 1);
}
else // STRB
{
MEM.Write8(add, R(Rd));
CYCLES16NSeq(add, 1);
CYCLES32NSeq(R(15), 1);
}
}
else
{
if (code & (0x1 << 20)) // LDR
{
R(Rd) = MEM.Read32(add);
CYCLES32NSeq(add, 1);
ICYCLES(1);
if (Rd == 15)
{
CYCLES32NSeq(R(15), 3);
R(Rd) += 4;
}
else
CYCLES32Seq(R(15), 1);
}
else // STR
{
MEM.Write32(add, R(Rd));
CYCLES32NSeq(add, 1);
CYCLES32NSeq(R(15), 1);
}
}
// bit 21 if write before : 0 = nothing, 1 = write-back
if (!(code & (0x1 << 24))) // in post, writeback is always enabled
{
if (code & (0x1 << 23))
R(Rn) = add + offset;
else
R(Rn) = add - offset;
}
else if (code & (0x1 << 21))
R(Rn) = add;
}
// Halfword, Doubleword, and Signed Data Transfer
ARM(STRLDR_HD)
{
if ((code >> 25) & 0x7)
met_abort("Bits 25-27 must be 000 for halfword transfer instructions");
if (!(code & (0x1 << 7)) || !(code & (0x1 << 4)))
met_abort("Bits 4 and 7 must be 1 for halfword transfer instructions");
if (Rd == 15)
met_abort("operation on r15, not implemented");
uint8_t rd = Rd;
uint32_t off;
if (code & (0x1 << 22)) // immediate offset
off = ((code >> 4) & 0xF0) | (code & 0xF);
else // register offset
{
if ((code >> 8) & 0xF)
met_abort("Bits 8-11 must be 0 for halfword transfer with register offset instructions");
NOT_PC(Rm);
off = R(Rm);
}
uint32_t add = R(Rn);
if (code & (0x1 << 24))
{
if (code & (0x1 << 23))
add += off;
else
add -= off;
}
else if (code & (0x1 << 21))
met_abort("Bit 21 must be 0 for post indexed halfword transfers instructions");
switch (((code >> 18) & 0x4) | ((code >> 5) & 0x3))
{
case 0x0:
met_abort("Reserved for SWP instruction !");
break;
case 0x1: // STRH
MEM.Write16(add, rd == 15 ? R(15) + 4 : R(rd));
CYCLES16NSeq(add, 1);
CYCLES32NSeq(R(15), 1);
break;
case 0x2: // LDRD
if (rd % 2)
met_abort("Register number not even for double word transfer");
if (add % 8)
met_abort("Address not double word aligned");
if (rd == 15)
met_abort("Rd is 15 for double word transfer !");
R(rd) = MEM.Read32(add);
R(rd+1) = MEM.Read32(add+4);
CYCLES32NSeq(add, 2);
ICYCLES(1);
CYCLES32Seq(R(15), 1);
break;
case 0x3: // STRD
if (rd % 2)
met_abort("Register number not even for double word transfer");
if (add % 8)
met_abort("Address not double word aligned");
if (rd == 15)
met_abort("Rd is 15 for double word transfer !");
MEM.Write32(add, R(rd));
MEM.Write32(add + 4, rd == 14 ? R(15) + 4 : R(rd+1));
CYCLES32NSeq(add, 2);
CYCLES32NSeq(R(15), 1);
break;
case 0x4:
met_abort("Reserved !");
break;
case 0x5: // LDRH
R(rd) = MEM.Read16(add);
CYCLES16NSeq(add, 1);
ICYCLES(1);
CYCLES32Seq(R(15), 1);
break;
case 0x6: // LDRSB
R(rd) = MEM.Read8(add);
// sign-extend
R(rd) <<= 24;
R(rd) = ((int32_t)R(rd)) >> 24;
CYCLES16NSeq(add, 1);
ICYCLES(1);
CYCLES32Seq(R(15), 1);
break;
case 0x7: // LDRSH
R(rd) = MEM.Read16(add);
// sign-extend
R(rd) <<= 16;
R(rd) = ((int32_t)R(rd)) >> 16;
CYCLES16NSeq(add, 1);
ICYCLES(1);
CYCLES32Seq(R(15), 1);
break;
}
if (!(code & (0x1 << 24))) // in post, writeback is always enabled
{
if (code & (0x1 << 23))
R(Rn) = add + off;
else
R(Rn) = add - off;
}
else if (code & (0x1 << 21))
R(Rn) = add;
}
// Block Data Transfer (LDM,STM)
ARM(LDMSTM)
{
if (((code >> 25) & 0x7) != 0x4)
met_abort("Bits 25-27 must be 100 for LDM/STM instructions");
if (code & (0x1 << 22))
met_abort("not implemented");
static const uint8_t NumBits[] =
{0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
// Works like LDR and STR, look at above comments
uint8_t numregs =
NumBits[(code >> 12) & 0xF] +
NumBits[(code >> 8) & 0xF] +
NumBits[(code >> 4) & 0xF] +
NumBits[ code & 0xF];
uint16_t regs = code & 0xFFFF;
uint32_t add, baseadd;
baseadd = add = R(Rn);
if (code & (0x1 << 24))
if (code & (0x1 << 23))
add += 4; // increment before
else
add -= numregs * 4; // decrement before
else
if (!(code & (0x1 << 23)))
add -= (numregs-1) * 4; // decrement after
add &= 0xFFFFFFFC;
if (code & (0x1 << 20)) // LDM
{
CYCLES32NSeq(add, numregs);
ICYCLES(1);
for (register uint8_t n = 0; n < 16; ++n)
if (regs & (0x1 << n))
{
R(n) = MEM.Read32 (add);
if (n == 15)
R(15) += 4;
add += 4;
}
if (regs & (0x1 << 15))
CYCLES32NSeq(R(15), 3);
else
CYCLES32Seq(R(15), 1);
}
else // STM
{
CYCLES32NSeq(add, numregs);
CYCLES32NSeq(R(15), 1);
for (register uint8_t n = 0; n < 16; ++n)
if (regs & (0x1 << n))
{
if (n == 15)
MEM.Write32 (add, R(15) + 4);
else
MEM.Write32 (add, R(n));
add += 4;
}
}
if (code & (0x1 << 21))
if (code & (0x1 << 23))
R(Rn) = baseadd + numregs * 4;
else
R(Rn) = baseadd - numregs * 4;
}
// Single Data Swap (SWP)
ARM(SWP)
{
if (((code >> 23) & 0x1F) != 0x02)
met_abort("Bits 23-27 must be 00010 for SWP instructions");
if ((code >> 20) & 0x3)
met_abort("Bits 20-21 must be 00 for SWP instructions");
if (((code >> 4) & 0xFF) != 0x09)
met_abort("Bits 4-11 must be 00001001 for SWP instructions");
if (code & (0x1 << 22)) // SWPB
{
R(Rd) = MEM.Read8(R(Rn));
MEM.Write8(R(Rn), R(Rm));
}
else // SWP
{
R(Rd) = MEM.Read32(R(Rn));
MEM.Write32(R(Rn), R(Rm));
}
CYCLES32NSeq(R(Rn), 1);
CYCLES32NSeq(R(Rn), 1);
ICYCLES(1);
CYCLES32Seq(R(15), 1);
}
// Software Interrupt (SWI,BKPT)
ARM(SWI)
{
if (((code >> 24) & 0xF) != 0xF)
met_abort("Bits 24-27 must be 1111 for SWI instructions");
CPU.SoftwareInterrupt((code >> 16) & 0xFF);
// FIXME seems wrong !
CYCLES32NSeq(0, 3);
}
inline bool Interpreter::a_CheckCondition (uint8_t cond)
{
if (cond == 0xE)
return true;
switch (cond)
{
case 0x0 : // EQ
if (!FLAG_Z)
return false;
break;
case 0x1 : // NE
if (FLAG_Z)
return false;
break;
case 0x2 : // CS
if (!FLAG_C)
return false;
break;
case 0x3 : // CC
if (FLAG_C)
return false;
break;
case 0x4 : // MI
if (!FLAG_N)
return false;
break;
case 0x5 : // PL
if (FLAG_N)
return false;
break;
case 0x6 : // VS
if (!FLAG_V)
return false;
break;
case 0x7 : // VC
if (FLAG_V)
return false;
break;
case 0x8 : // HI
if (!FLAG_C || FLAG_Z)
return false;
break;
case 0x9 : // LS
if (FLAG_C && !FLAG_Z)
return false;
break;
case 0xA : // GE
if (FLAG_N != FLAG_V)
return false;
break;
case 0xB : // LT
if (FLAG_N == FLAG_V)
return false;
break;
case 0xC : // GT
if (FLAG_Z || FLAG_N != FLAG_V)
return false;
break;
case 0xD : // LE
if (!FLAG_Z && FLAG_N == FLAG_V)
return false;
break;
case 0xE : // AL
break;
case 0xF : // reserved
break;
}
return true;
}
NIARM(_Code)
{
if (!a_CheckCondition(code >> 28)) // condition failed
CYCLES32Seq(R(15), 1);
else
switch ((code >> 25) & 0x7)
{
case 0x0:
switch ((code >> 18) & 0x60 | (code >> 16) & 0x10 |
(code >> 4) & 0x0F)
{
case 0x40:
aPSR();
break;
case 0x00:
case 0x02:
case 0x04:
case 0x06:
case 0x08:
case 0x0A:
case 0x0C:
case 0x0E:
case 0x10:
case 0x12:
case 0x14:
case 0x16:
case 0x18:
case 0x1A:
case 0x1C:
case 0x1E:
case 0x20:
case 0x22:
case 0x24:
case 0x26:
case 0x28:
case 0x2A:
case 0x2C:
case 0x2E:
case 0x30:
case 0x32:
case 0x34:
case 0x36:
case 0x38:
case 0x3A:
case 0x3C:
case 0x3E:
case 0x50:
case 0x52:
case 0x54:
case 0x56:
case 0x58:
case 0x5A:
case 0x5C:
case 0x5E:
case 0x60:
case 0x62:
case 0x64:
case 0x66:
case 0x68:
case 0x6A:
case 0x6C:
case 0x6E:
case 0x70:
case 0x72:
case 0x74:
case 0x76:
case 0x78:
case 0x7A:
case 0x7C:
case 0x7E:
a_DataProcShiftImm();
break;
case 0x01:
case 0x03:
case 0x05:
case 0x07:
case 0x11:
case 0x13:
case 0x15:
case 0x17:
case 0x21:
case 0x23:
case 0x25:
case 0x27:
case 0x31:
case 0x33:
case 0x35:
case 0x37:
case 0x51:
case 0x53:
case 0x55:
case 0x57:
case 0x61:
case 0x63:
case 0x65:
case 0x67:
case 0x71:
case 0x73:
case 0x75:
case 0x77:
a_DataProcShiftReg();
break;
case 0x09:
case 0x19:
case 0x29:
case 0x39:
case 0x48:
case 0x4A:
case 0x4C:
case 0x4E:
a_Multiply();
break;
case 0x0B:
case 0x0D:
case 0x0F:
case 0x1B:
case 0x1D:
case 0x1F:
case 0x2B:
case 0x2D:
case 0x2F:
case 0x3B:
case 0x3D:
case 0x3F:
case 0x4B:
case 0x4D:
case 0x4F:
case 0x5B:
case 0x5D:
case 0x5F:
case 0x6B:
case 0x6D:
case 0x6F:
case 0x7B:
case 0x7D:
case 0x7F:
aSTRLDR_HD();
break;
case 0x49:
aSWP();
break;
case 0x41:
case 0x43:
aBXBLX();
break;
default:
met_abort("unknown");
break;
}
break;
case 0x1:
// TODO PSR
a_DataProcImm();
break;
case 0x2:
case 0x3:
aLDRSTR();
break;
case 0x4:
aLDMSTM();
break;
case 0x5:
aBBL();
break;
case 0x7:
if (code & (0x1 << 24))
aSWI();
else
met_abort("unknown");
break;
default:
{ std::cerr << IOS_ADD << R(15)-8 << " : " << IOS_ADD << code << " : "; debug_bits(code); met_abort("not implemented"); }
break;
}
}
}
#undef Rn
#undef Rd
#undef Rs
#undef Rm
#undef LSOff
#undef NOT_PC
#undef NOT_PC_ALL
#undef NOT_SAME2
#undef NOT_SAME3
#undef ARM
#endif