[ARM] Merge load instructions in to one. Also rapid prototype 13 more load instructions. This disables fastmem currently for loads.

This commit is contained in:
Ryan Houdek 2013-09-03 09:22:43 +00:00
parent d4d6eb562e
commit 30cd436e86
3 changed files with 201 additions and 287 deletions

View File

@ -126,6 +126,8 @@ public:
// TODO: This shouldn't be here // TODO: This shouldn't be here
void UnsafeStoreFromReg(ARMReg dest, ARMReg value, int accessSize, s32 offset); void UnsafeStoreFromReg(ARMReg dest, ARMReg value, int accessSize, s32 offset);
void SafeStoreFromReg(bool fastmem, s32 dest, u32 value, s32 offsetReg, int accessSize, s32 offset); void SafeStoreFromReg(bool fastmem, s32 dest, u32 value, s32 offsetReg, int accessSize, s32 offset);
void SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, int accessSize, s32 offset, bool signExtend, bool reverse);
void LoadToReg(ARMReg dest, ARMReg addr, int accessSize, s32 offset); void LoadToReg(ARMReg dest, ARMReg addr, int accessSize, s32 offset);
@ -176,14 +178,10 @@ public:
// LoadStore // LoadStore
void stX(UGeckoInstruction _inst); void stX(UGeckoInstruction _inst);
void lXX(UGeckoInstruction _inst);
void icbi(UGeckoInstruction _inst); void icbi(UGeckoInstruction _inst);
void dcbst(UGeckoInstruction _inst); void dcbst(UGeckoInstruction _inst);
void lbz(UGeckoInstruction _inst);
void lhz(UGeckoInstruction _inst);
void lha(UGeckoInstruction _inst);
void lwz(UGeckoInstruction _inst);
void lwzx(UGeckoInstruction _inst);
// Floating point // Floating point
void fabsx(UGeckoInstruction _inst); void fabsx(UGeckoInstruction _inst);

View File

@ -217,6 +217,186 @@ void JitArm::stX(UGeckoInstruction inst)
} }
} }
void JitArm::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, int accessSize, s32 offset, bool signExtend, bool reverse)
{
ARMReg RD = gpr.R(dest);
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
if (offsetReg == -1)
MOVI2R(rA, offset);
else
MOV(rA, gpr.R(offsetReg));
if (addr != -1)
ADD(rA, rA, gpr.R(addr));
switch (accessSize)
{
case 8:
MOVI2R(rB, (u32)&Memory::Read_U8);
break;
case 16:
MOVI2R(rB, (u32)&Memory::Read_U16);
break;
case 32:
MOVI2R(rB, (u32)&Memory::Read_U32);
break;
}
PUSH(4, R0, R1, R2, R3);
MOV(R0, rA);
BL(rB);
MOV(rA, R0);
POP(4, R0, R1, R2, R3);
MOV(RD, rA);
if (signExtend) // Only on 16 loads
SXTH(RD, RD);
if (reverse)
{
if (accessSize == 32)
REV(RD, RD);
else if (accessSize == 16)
REV16(RD, RD);
}
gpr.Unlock(rA, rB);
}
void JitArm::lXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
u32 a = inst.RA, b = inst.RB, d = inst.RD;
s32 offset = inst.SIMM_16;
u32 accessSize = 0;
s32 offsetReg = -1;
bool zeroA = true;
bool update = false;
bool signExtend = false;
bool reverse = false;
switch(inst.OPCD)
{
case 31:
switch(inst.SUBOP10)
{
case 55: // lwzux
zeroA = false;
update = true;
case 23: // lwzx
accessSize = 32;
offsetReg = b;
break;
case 119: //lbzux
zeroA = false;
update = true;
case 87: // lbzx
accessSize = 8;
offsetReg = b;
break;
case 311: // lhzux
zeroA = false;
update = true;
case 279: // lhzx
accessSize = 16;
offsetReg = b;
break;
case 375: // lhaux
zeroA = false;
update = true;
case 343: // lhax
accessSize = 16;
signExtend = true;
offsetReg = b;
break;
case 534: // lwbrx
accessSize = 32;
reverse = true;
break;
case 790: // lhbrx
accessSize = 16;
reverse = true;
break;
}
break;
case 33: // lwzu
zeroA = false;
update = true;
case 32: // lwz
accessSize = 32;
break;
case 35: // lbzu
zeroA = false;
update = true;
case 34: // lbz
accessSize = 8;
break;
case 41: // lhzu
zeroA = false;
update = true;
case 40: // lhz
accessSize = 16;
break;
case 43: // lhau
zeroA = false;
update = true;
case 42: // lha
signExtend = true;
accessSize = 16;
break;
}
// Check for exception before loading
ARMReg rA = gpr.GetReg(false);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
FixupBranch DoNotLoad = B_CC(CC_EQ);
SafeLoadToReg(d, zeroA ? a ? a : -1 : a, offsetReg, accessSize, offset, signExtend, reverse);
if (update)
{
rA = gpr.GetReg(false);
ARMReg RA = gpr.R(a);
if (offsetReg == -1)
MOVI2R(rA, offset);
else
MOV(RA, gpr.R(offsetReg));
ADD(RA, RA, rA);
}
SetJumpTarget(DoNotLoad);
// LWZ idle skipping
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle &&
(inst.hex & 0xFFFF0000) == 0x800D0000 &&
(Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 ||
(SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) &&
Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8)
{
ARMReg RD = gpr.R(d);
gpr.Flush();
fpr.Flush();
// if it's still 0, we can wait until the next event
TST(RD, RD);
FixupBranch noIdle = B_CC(CC_NEQ);
rA = gpr.GetReg();
MOVI2R(rA, (u32)&PowerPC::OnIdle);
MOVI2R(R0, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16);
BL(rA);
gpr.Unlock(rA);
WriteExceptionExit();
SetJumpTarget(noIdle);
//js.compilerPC += 8;
return;
}
}
void JitArm::LoadToReg(ARMReg dest, ARMReg addr, int accessSize, s32 offset) void JitArm::LoadToReg(ARMReg dest, ARMReg addr, int accessSize, s32 offset)
{ {
ARMReg rA = gpr.GetReg(); ARMReg rA = gpr.GetReg();
@ -255,270 +435,6 @@ void JitArm::LoadToReg(ARMReg dest, ARMReg addr, int accessSize, s32 offset)
} }
gpr.Unlock(rA); gpr.Unlock(rA);
} }
void JitArm::lbz(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg RD = gpr.R(inst.RD);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
FixupBranch DoNotLoad = B_CC(CC_EQ);
#if FASTMEM
// Backpatch route
// Gets loaded in to RD
// Address is in R10
if (Core::g_CoreStartupParameter.bFastmem)
{
gpr.Unlock(rA, rB);
if (inst.RA)
{
ARMReg RA = gpr.R(inst.RA);
MOV(R10, RA); // - 4
}
else
MOV(R10, 0); // - 4
LoadToReg(RD, R10, 8, inst.SIMM_16);
}
else
#endif
{
if (inst.RA)
{
MOVI2R(rB, inst.SIMM_16);
ARMReg RA = gpr.R(inst.RA);
ADD(rB, rB, RA);
}
else
MOVI2R(rB, (u32)inst.SIMM_16);
MOVI2R(rA, (u32)&Memory::Read_U8);
PUSH(4, R0, R1, R2, R3);
MOV(R0, rB);
BL(rA);
MOV(rA, R0);
POP(4, R0, R1, R2, R3);
MOV(RD, rA);
gpr.Unlock(rA, rB);
}
SetJumpTarget(DoNotLoad);
}
void JitArm::lhz(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg RD = gpr.R(inst.RD);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
FixupBranch DoNotLoad = B_CC(CC_EQ);
#if FASTMEM
// Backpatch route
// Gets loaded in to RD
// Address is in R10
if (Core::g_CoreStartupParameter.bFastmem)
{
if (inst.RA)
{
ARMReg RA = gpr.R(inst.RA);
MOV(R10, RA); // - 4
}
else
MOV(R10, 0); // - 4
LoadToReg(RD, R10, 16, (u32)inst.SIMM_16);
}
else
#endif
{
if (inst.RA)
{
MOVI2R(rB, inst.SIMM_16);
ARMReg RA = gpr.R(inst.RA);
ADD(rB, rB, RA);
}
else
MOVI2R(rB, (u32)inst.SIMM_16);
MOVI2R(rA, (u32)&Memory::Read_U16);
PUSH(4, R0, R1, R2, R3);
MOV(R0, rB);
BL(rA);
MOV(rA, R0);
POP(4, R0, R1, R2, R3);
MOV(RD, rA);
}
gpr.Unlock(rA, rB);
SetJumpTarget(DoNotLoad);
}
void JitArm::lha(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg RD = gpr.R(inst.RD);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
FixupBranch DoNotLoad = B_CC(CC_EQ);
if (inst.RA)
{
MOVI2R(rB, inst.SIMM_16);
ARMReg RA = gpr.R(inst.RA);
ADD(rB, rB, RA);
}
else
MOVI2R(rB, (u32)inst.SIMM_16);
MOVI2R(rA, (u32)&Memory::Read_U16);
PUSH(4, R0, R1, R2, R3);
MOV(R0, rB);
BL(rA);
MOV(rA, R0);
SXTH(rA, rA);
POP(4, R0, R1, R2, R3);
MOV(RD, rA);
gpr.Unlock(rA, rB);
SetJumpTarget(DoNotLoad);
}
void JitArm::lwz(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg RD = gpr.R(inst.RD);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
FixupBranch DoNotLoad = B_CC(CC_EQ);
#if FASTMEM
// Backpatch route
// Gets loaded in to RD
// Address is in R10
if (Core::g_CoreStartupParameter.bFastmem)
{
gpr.Unlock(rA, rB);
if (inst.RA)
{
ARMReg RA = gpr.R(inst.RA);
MOV(R10, RA); // - 4
}
else
MOV(R10, 0); // - 4
LoadToReg(RD, R10, 32, (u32)inst.SIMM_16);
}
else
#endif
{
if (inst.RA)
{
MOVI2R(rB, inst.SIMM_16);
ARMReg RA = gpr.R(inst.RA);
ADD(rB, rB, RA);
}
else
MOVI2R(rB, (u32)inst.SIMM_16);
MOVI2R(rA, (u32)&Memory::Read_U32);
PUSH(4, R0, R1, R2, R3);
MOV(R0, rB);
BL(rA);
MOV(rA, R0);
POP(4, R0, R1, R2, R3);
MOV(RD, rA);
gpr.Unlock(rA, rB);
}
SetJumpTarget(DoNotLoad);
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle &&
(inst.hex & 0xFFFF0000) == 0x800D0000 &&
(Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 ||
(SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) &&
Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8)
{
gpr.Flush();
fpr.Flush();
// if it's still 0, we can wait until the next event
TST(RD, RD);
FixupBranch noIdle = B_CC(CC_NEQ);
rA = gpr.GetReg();
MOVI2R(rA, (u32)&PowerPC::OnIdle);
MOVI2R(R0, PowerPC::ppcState.gpr[inst.RA] + (s32)(s16)inst.SIMM_16);
BL(rA);
gpr.Unlock(rA);
WriteExceptionExit();
SetJumpTarget(noIdle);
//js.compilerPC += 8;
return;
}
}
void JitArm::lwzx(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(LoadStore)
ARMReg rA = gpr.GetReg();
ARMReg rB = gpr.GetReg();
ARMReg RB = gpr.R(inst.RB);
ARMReg RD = gpr.R(inst.RD);
LDR(rA, R9, PPCSTATE_OFF(Exceptions));
CMP(rA, EXCEPTION_DSI);
FixupBranch DoNotLoad = B_CC(CC_EQ);
#if FASTMEM
// Backpatch route
// Gets loaded in to RD
// Address is in R10
if (Core::g_CoreStartupParameter.bFastmem)
{
gpr.Unlock(rA, rB);
if (inst.RA)
{
ARMReg RA = gpr.R(inst.RA);
ADD(R10, RA, RB); // - 4
}
else
MOV(R10, RB); // -4
LoadToReg(RD, R10, 32, 0);
}
else
#endif
{
if (inst.RA)
{
ARMReg RA = gpr.R(inst.RA);
ADD(rB, RA, RB);
}
else
MOV(rB, RB);
MOVI2R(rA, (u32)&Memory::Read_U32);
PUSH(4, R0, R1, R2, R3);
MOV(R0, rB);
BL(rA);
MOV(rA, R0);
POP(4, R0, R1, R2, R3);
MOV(RD, rA);
gpr.Unlock(rA, rB);
}
SetJumpTarget(DoNotLoad);
//// u32 temp = Memory::Read_U32(_inst.RA ? (m_GPR[_inst.RA] + m_GPR[_inst.RB]) : m_GPR[_inst.RB]);
}
void JitArm::dcbst(UGeckoInstruction inst) void JitArm::dcbst(UGeckoInstruction inst)
{ {

View File

@ -78,14 +78,14 @@ static GekkoOPTemplate primarytable[] =
{28, &JitArm::arith}, //"andi_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}}, {28, &JitArm::arith}, //"andi_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}},
{29, &JitArm::arith}, //"andis_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}}, {29, &JitArm::arith}, //"andis_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}},
{32, &JitArm::lwz}, //"lwz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, {32, &JitArm::lXX}, //"lwz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
{33, &JitArm::Default}, //"lwzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, {33, &JitArm::lXX}, //"lwzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
{34, &JitArm::lbz}, //"lbz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, {34, &JitArm::lXX}, //"lbz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
{35, &JitArm::Default}, //"lbzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, {35, &JitArm::lXX}, //"lbzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
{40, &JitArm::lhz}, //"lhz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, {40, &JitArm::lXX}, //"lhz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
{41, &JitArm::Default}, //"lhzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, {41, &JitArm::lXX}, //"lhzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
{42, &JitArm::lha}, //"lha", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, {42, &JitArm::lXX}, //"lha", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
{43, &JitArm::Default}, //"lhau", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, {43, &JitArm::lXX}, //"lhau", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
{44, &JitArm::stX}, //"sth", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, {44, &JitArm::stX}, //"sth", OPTYPE_STORE, FL_IN_A | FL_IN_S}},
{45, &JitArm::stX}, //"sthu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, {45, &JitArm::stX}, //"sthu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}},
@ -221,24 +221,24 @@ static GekkoOPTemplate table31[] =
{1014, &JitArm::Default}, //"dcbz", OPTYPE_DCACHE, 0, 4}}, {1014, &JitArm::Default}, //"dcbz", OPTYPE_DCACHE, 0, 4}},
//load word //load word
{23, &JitArm::lwzx}, //"lwzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, {23, &JitArm::lXX}, //"lwzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{55, &JitArm::Default}, //"lwzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, {55, &JitArm::lXX}, //"lwzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
//load halfword //load halfword
{279, &JitArm::Default}, //"lhzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, {279, &JitArm::lXX}, //"lhzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{311, &JitArm::Default}, //"lhzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, {311, &JitArm::lXX}, //"lhzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
//load halfword signextend //load halfword signextend
{343, &JitArm::Default}, //"lhax", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, {343, &JitArm::lXX}, //"lhax", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{375, &JitArm::Default}, //"lhaux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, {375, &JitArm::lXX}, //"lhaux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
//load byte //load byte
{87, &JitArm::Default}, //"lbzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, {87, &JitArm::lXX}, //"lbzx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{119, &JitArm::Default}, //"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, {119, &JitArm::lXX}, //"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
//load byte reverse //load byte reverse
{534, &JitArm::Default}, //"lwbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, {534, &JitArm::lXX}, //"lwbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{790, &JitArm::Default}, //"lhbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, {790, &JitArm::lXX}, //"lhbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
// Conditional load/store (Wii SMP) // Conditional load/store (Wii SMP)
{150, &JitArm::Default}, //"stwcxd", OPTYPE_STORE, FL_EVIL | FL_SET_CR0}}, {150, &JitArm::Default}, //"stwcxd", OPTYPE_STORE, FL_EVIL | FL_SET_CR0}},