More 32-bit speed

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@164 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2008-08-09 20:18:50 +00:00
parent 12b37ba323
commit c305371725
9 changed files with 132 additions and 63 deletions

View File

@ -669,6 +669,10 @@ namespace Gen
void MOVSX(int dbits, int sbits, X64Reg dest, OpArg src)
{
if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "MOVSX - Imm argument");
if (dbits == sbits) {
MOV(dbits, R(dest), src);
return;
}
src.operandReg = (u8)dest;
if (dbits == 16) Write8(0x66);
src.WriteRex(dbits == 64);
@ -696,6 +700,10 @@ namespace Gen
void MOVZX(int dbits, int sbits, X64Reg dest, OpArg src)
{
if (src.IsImm()) _assert_msg_(DYNA_REC, 0, "MOVZX - Imm argument");
if (dbits == sbits) {
MOV(dbits, R(dest), src);
return;
}
src.operandReg = (u8)dest;
if (dbits == 16) Write8(0x66);
src.WriteRex(dbits == 64);

View File

@ -346,7 +346,7 @@ void CInterpreter::srawx(UGeckoInstruction _inst)
}
else
{
int amount = rb&0x1f;
int amount = rb & 0x1f;
if (amount == 0)
{
m_GPR[_inst.RA] = m_GPR[_inst.RS];

View File

@ -115,13 +115,13 @@ void CInterpreter::lfsx(UGeckoInstruction _inst)
void CInterpreter::lha(UGeckoInstruction _inst)
{
m_GPR[_inst.RD] = (u32)(s16)Memory::Read_U16(Helper_Get_EA(_inst));
m_GPR[_inst.RD] = (u32)(s32)(s16)Memory::Read_U16(Helper_Get_EA(_inst));
}
void CInterpreter::lhau(UGeckoInstruction _inst)
{
u32 uAddress = Helper_Get_EA_U(_inst);
m_GPR[_inst.RD] = (u32)(s16)Memory::Read_U16(uAddress);
m_GPR[_inst.RD] = (u32)(s32)(s16)Memory::Read_U16(uAddress);
m_GPR[_inst.RA] = uAddress;
}

View File

@ -188,6 +188,7 @@ detect immediates in stb stw sth
TODO
lha
srawx
addic_rc
addex
subfcx

View File

@ -86,8 +86,11 @@ namespace Jit64
void FlushRegCaches();
void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset);
void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false);
void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0);
void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset, bool signExtend = false);
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset);
void addx(UGeckoInstruction inst);
void orx(UGeckoInstruction inst);
void andx(UGeckoInstruction inst);
@ -144,6 +147,7 @@ namespace Jit64
void fmaddXX(UGeckoInstruction inst);
void stX(UGeckoInstruction inst); //stw sth stb
void lXz(UGeckoInstruction inst);
void lha(UGeckoInstruction inst);
void rlwinmx(UGeckoInstruction inst);
void rlwimix(UGeckoInstruction inst);
void rlwnmx(UGeckoInstruction inst);
@ -153,6 +157,7 @@ namespace Jit64
void dcbz(UGeckoInstruction inst);
void lfsx(UGeckoInstruction inst);
void subfic(UGeckoInstruction inst);
void subfcx(UGeckoInstruction inst);
void subfx(UGeckoInstruction inst);
void lbzx(UGeckoInstruction inst);

View File

@ -29,45 +29,56 @@
namespace Jit64
{
// Assumes that the flags were just set through an addition.
void GenerateCarry(X64Reg temp_reg) {
SETcc(CC_C, R(temp_reg));
AND(32, M(&XER), Imm32(~(1 << 29)));
SHL(32, R(temp_reg), Imm8(29));
OR(32, M(&XER), R(temp_reg));
}
typedef u32 (*Operation)(u32 a, u32 b);
u32 Add(u32 a, u32 b) {return a + b;}
u32 Or (u32 a, u32 b) {return a | b;}
u32 And(u32 a, u32 b) {return a & b;}
u32 Xor(u32 a, u32 b) {return a ^ b;}
void regimmop(int d, int a, bool binary, u32 value, Operation doop, void(*op)(int, const OpArg&, const OpArg&), bool Rc = false)
void regimmop(int d, int a, bool binary, u32 value, Operation doop, void(*op)(int, const OpArg&, const OpArg&), bool Rc = false, bool carry = false)
{
gpr.Lock(d,a);
if (a || binary)
if (a || binary || carry) // yeh nasty special case addic
{
if (a == d)
{
if (gpr.R(d).IsImm())
if (gpr.R(d).IsImm() && !carry)
{
gpr.SetImmediate32(d, doop((u32)gpr.R(d).offset, value));
}
else
{
if (gpr.R(d).IsImm())
gpr.LoadToX64(d,false);
gpr.LoadToX64(d, false);
op(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16;
if (carry)
GenerateCarry(EAX);
}
}
else
{
gpr.LoadToX64(d,false);
gpr.LoadToX64(d, false);
MOV(32, gpr.R(d), gpr.R(a));
op(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16;
if (carry)
GenerateCarry(EAX);
}
}
else if (doop == Add)
else if (doop == Add && !carry)
{
gpr.SetImmediate32(d, value);
}
else
{
_assert_msg_(DYNA_REC, 0, "WTF");
_assert_msg_(DYNA_REC, 0, "WTF regimmop");
}
if (Rc)
{
@ -95,8 +106,8 @@ namespace Jit64
case 29: regimmop(a, s, true, inst.UIMM << 16, And, AND, true); break;
case 26: regimmop(a, s, true, inst.UIMM, Xor, XOR, false); break; //xori
case 27: regimmop(a, s, true, inst.UIMM << 16, Xor, XOR, false); break; //xoris
case 12: //addic
case 13: //addic_rc
case 12: //regimmop(d, a, false, (u32)(s32)inst.SIMM_16, Add, ADD, false, true); //addic
case 13: //regimmop(d, a, true, (u32)(s32)inst.SIMM_16, Add, ADD, true, true); //addic_rc
default:
Default(inst);
break;
@ -314,20 +325,32 @@ namespace Jit64
else
gpr.LoadToX64(a, true, true);
int imm = inst.SIMM_16;
// XOR(32, R(ECX), R(ECX));
MOV(32, R(EAX), gpr.R(a));
NOT(32, R(EAX));
ADD(32, R(EAX), Imm32(imm+1));
MOV(32, gpr.R(d), R(EAX));
SETcc(CC_C, R(ECX));
AND(32, M(&XER), Imm32(~(1 << 29)));
SHL(32, R(ECX), Imm8(29));
OR(32, M(&XER), R(ECX));
GenerateCarry(ECX);
gpr.UnlockAll();
gpr.UnlockAllX();
// This instruction has no RC flag
}
void subfcx(UGeckoInstruction inst)
{
INSTRUCTION_START;
Default(inst);
return;
/*
u32 a = m_GPR[_inst.RA];
u32 b = m_GPR[_inst.RB];
m_GPR[_inst.RD] = b - a;
SetCarry(a == 0 || Helper_Carry(b, 0-a));
if (_inst.OE) PanicAlert("OE: subfcx");
if (_inst.Rc) Helper_UpdateCR0(m_GPR[_inst.RD]);
*/
}
void subfx(UGeckoInstruction inst)
{
INSTRUCTION_START;
@ -447,9 +470,6 @@ namespace Jit64
}
}
// __________________________________________________________________________________________________
// Helper_Mask
//
u32 Helper_Mask(u8 mb, u8 me)
{
return (((mb > me) ?
@ -459,7 +479,6 @@ namespace Jit64
);
}
void addx(UGeckoInstruction inst)
{
INSTRUCTION_START;
@ -527,10 +546,7 @@ namespace Jit64
MOV(32, R(EAX), gpr.R(a));
ADC(32, R(EAX), gpr.R(b));
MOV(32, gpr.R(d), R(EAX));
SETcc(CC_C, R(ECX)); // store away the resulting carry flag
AND(32, M(&XER), Imm32(~(1 << 29)));
SHL(32, R(ECX), Imm8(29));
OR(32, M(&XER), R(ECX));
GenerateCarry(ECX);
gpr.UnlockAll();
gpr.UnlockAllX();
if (inst.Rc)

View File

@ -49,27 +49,35 @@ namespace Jit64
static u64 GC_ALIGNED16(temp64);
static u32 GC_ALIGNED16(temp32);
void SafeLoadRegToEAX(X64Reg reg, int accessSize, s32 offset)
void UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend)
{
if (offset)
ADD(32, R(reg), Imm32((u32)offset));
TEST(32, R(reg), Imm32(0x0C000000));
FixupBranch argh = J_CC(CC_NZ);
if (accessSize != 32)
XOR(32, R(EAX), R(EAX));
#ifdef _M_IX86
AND(32, R(reg), Imm32(Memory::MEMVIEW32_MASK));
MOV(accessSize, R(EAX), MDisp(reg, (u32)Memory::base));
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
MOVZX(32, accessSize, reg_value, MDisp(reg_addr, (u32)Memory::base + offset));
#else
MOV(accessSize, R(EAX), MComplex(RBX, reg, SCALE_1, 0));
MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset));
#endif
if (accessSize == 32)
{
BSWAP(32, EAX);
}
else if (accessSize == 16)
{
BSWAP(32, EAX);
SHR(32, R(EAX), Imm8(16));
}
if (signExtend && accessSize < 32) {
MOVSX(32, accessSize, EAX, R(EAX));
}
}
void SafeLoadRegToEAX(X64Reg reg, int accessSize, s32 offset, bool signExtend)
{
if (offset)
ADD(32, R(reg), Imm32((u32)offset));
TEST(32, R(reg), Imm32(0x0C000000));
FixupBranch argh = J_CC(CC_NZ);
UnsafeLoadRegToReg(reg, EAX, accessSize, 0, signExtend);
FixupBranch arg2 = J();
SetJumpTarget(argh);
switch (accessSize)
@ -81,6 +89,34 @@ namespace Jit64
SetJumpTarget(arg2);
}
void UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset)
{
if (accessSize != 32) {
PanicAlert("UnsafeWriteRegToReg can't handle %i byte accesses", accessSize);
}
BSWAP(32, reg_value);
#ifdef _M_IX86
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
MOV(accessSize, MDisp(reg_addr, (u32)Memory::base), R(reg_value));
#else
MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, 0), R(reg_value));
#endif
}
// Destroys both arg registers
void SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset)
{
if (offset)
ADD(32, R(reg_addr), Imm32(offset));
TEST(32, R(reg_addr), Imm32(0x0C000000));
FixupBranch argh = J_CC(CC_NZ);
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0);
FixupBranch arg2 = J();
SetJumpTarget(argh);
ABI_CallFunctionRR((void *)&Memory::Write_U32, ABI_PARAM1, ABI_PARAM2);
SetJumpTarget(arg2);
}
void lbzx(UGeckoInstruction inst)
{
INSTRUCTION_START;
@ -146,6 +182,7 @@ namespace Jit64
#endif
// Safe and boring
gpr.Flush(FLUSH_VOLATILE);
fpr.Flush(FLUSH_VOLATILE);
gpr.Lock(d, a);
MOV(32, R(ABI_PARAM1), gpr.R(a));
SafeLoadRegToEAX(ABI_PARAM1, accessSize, offset);
@ -173,6 +210,24 @@ namespace Jit64
gpr.UnlockAll();
}
void lha(UGeckoInstruction inst)
{
INSTRUCTION_START;
int d = inst.RD;
int a = inst.RA;
s32 offset = (s32)(s16)inst.SIMM_16;
// Safe and boring
gpr.Flush(FLUSH_VOLATILE);
fpr.Flush(FLUSH_VOLATILE);
gpr.Lock(d, a);
MOV(32, R(ABI_PARAM1), gpr.R(a));
SafeLoadRegToEAX(ABI_PARAM1, 16, offset, true);
gpr.LoadToX64(d, false, true);
MOV(32, gpr.R(d), R(EAX));
gpr.UnlockAll();
return;
}
// Zero cache line.
void dcbz(UGeckoInstruction inst)
{

View File

@ -70,17 +70,11 @@ void lfs(UGeckoInstruction inst)
gpr.Lock(d, a);
MOV(32, R(ABI_PARAM1), gpr.R(a));
#ifdef _M_X64
if (!jo.noAssumeFPLoadFromMem)
{
MOV(32, R(EAX), MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
//#else
// MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::GetMainRAMPtr() + (u32)offset));
//#endif
BSWAP(32, EAX);
UnsafeLoadRegToReg(ABI_PARAM1, EAX, 32, offset, false);
}
else
#endif
{
SafeLoadRegToEAX(ABI_PARAM1, 32, offset);
}
@ -145,7 +139,6 @@ void stfd(UGeckoInstruction inst)
void stfs(UGeckoInstruction inst)
{
INSTRUCTION_START;
DISABLE_32BIT;
bool update = inst.OPCD & 1;
int s = inst.RS;
int a = inst.RA;
@ -156,9 +149,8 @@ void stfs(UGeckoInstruction inst)
gpr.Flush(FLUSH_VOLATILE);
gpr.Lock(a);
fpr.Lock(s);
gpr.LockX(ABI_PARAM1, ABI_PARAM2);
MOV(32, R(ABI_PARAM2), gpr.R(a));
if (offset)
ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
if (update && offset)
{
MOV(32, gpr.R(a), R(ABI_PARAM2));
@ -167,15 +159,9 @@ void stfs(UGeckoInstruction inst)
MOVSS(M(&temp32), XMM0);
MOV(32, R(ABI_PARAM1), M(&temp32));
TEST(32, R(ABI_PARAM2), Imm32(0x0C000000));
FixupBranch argh = J_CC(CC_NZ);
BSWAP(32, ABI_PARAM1);
MOV(32, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1));
FixupBranch arg2 = J();
SetJumpTarget(argh);
CALL((void *)&Memory::Write_U32);
SetJumpTarget(arg2);
SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, 32, offset);
gpr.UnlockAll();
gpr.UnlockAllX();
fpr.UnlockAll();
}
else
@ -187,14 +173,12 @@ void stfs(UGeckoInstruction inst)
void lfsx(UGeckoInstruction inst)
{
INSTRUCTION_START;
DISABLE_32BIT;
fpr.Lock(inst.RS);
fpr.LoadToX64(inst.RS, false, true);
MOV(32, R(EAX), gpr.R(inst.RB));
if (inst.RA)
ADD(32, R(EAX), gpr.R(inst.RA));
MOV(32, R(EAX), MComplex(RBX, EAX, SCALE_1, 0));
BSWAP(32, EAX);
UnsafeLoadRegToReg(EAX, EAX, 32, false);
MOV(32, M(&temp32), R(EAX));
CVTSS2SD(XMM0, M(&temp32));
MOVDDUP(fpr.R(inst.RS).GetSimpleReg(), R(XMM0));

View File

@ -144,8 +144,8 @@ GekkoOPTemplate primarytable[] =
{8, CInterpreter::subfic, Jit64::subfic, {"subfic", OPTYPE_INTEGER, FL_SET_CA}},
{10, CInterpreter::cmpli, Jit64::cmpli, {"cmpli", OPTYPE_INTEGER, FL_SET_CRn}},
{11, CInterpreter::cmpi, Jit64::cmpi, {"cmpi", OPTYPE_INTEGER, FL_SET_CRn}},
{12, CInterpreter::addic, Jit64::Default, {"addic", OPTYPE_INTEGER, FL_SET_CA}},
{13, CInterpreter::addic_rc, Jit64::Default, {"addic_rc", OPTYPE_INTEGER, FL_SET_CR0}},
{12, CInterpreter::addic, Jit64::reg_imm, {"addic", OPTYPE_INTEGER, FL_SET_CA}},
{13, CInterpreter::addic_rc, Jit64::reg_imm, {"addic_rc", OPTYPE_INTEGER, FL_SET_CR0}},
{14, CInterpreter::addi, Jit64::reg_imm, {"addi", OPTYPE_INTEGER, 0}},
{15, CInterpreter::addis, Jit64::reg_imm, {"addis", OPTYPE_INTEGER, 0}},
@ -166,7 +166,7 @@ GekkoOPTemplate primarytable[] =
{35, CInterpreter::lbzu, Jit64::Default, {"lbzu", OPTYPE_LOAD, 0}},
{40, CInterpreter::lhz, Jit64::lXz, {"lhz", OPTYPE_LOAD, 0}},
{41, CInterpreter::lhzu, Jit64::Default, {"lhzu", OPTYPE_LOAD, 0}},
{42, CInterpreter::lha, Jit64::Default, {"lha", OPTYPE_LOAD, 0}},
{42, CInterpreter::lha, Jit64::lha, {"lha", OPTYPE_LOAD, 0}},
{43, CInterpreter::lhau, Jit64::Default, {"lhau", OPTYPE_LOAD, 0}},
{48, CInterpreter::lfs, Jit64::lfs, {"lfs", OPTYPE_LOADFP, 0}},
@ -398,7 +398,7 @@ GekkoOPTemplate table31_2[] =
{235, CInterpreter::mullwx, Jit64::mullwx, {"mullwx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_IN_B | FL_RC_BIT, 4}},
{104, CInterpreter::negx, Jit64::negx, {"negx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_IN_B | FL_RC_BIT}},
{40, CInterpreter::subfx, Jit64::subfx, {"subfx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_IN_B | FL_RC_BIT}},
{8, CInterpreter::subfcx, Jit64::Default, {"subfcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_IN_B | FL_SET_CA | FL_RC_BIT}},
{8, CInterpreter::subfcx, Jit64::subfcx, {"subfcx", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_IN_B | FL_SET_CA | FL_RC_BIT}},
{136, CInterpreter::subfex, Jit64::Default, {"subfex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_IN_B | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{232, CInterpreter::subfmex, Jit64::Default, {"subfmex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_IN_B | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},
{200, CInterpreter::subfzex, Jit64::Default, {"subfzex", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_IN_B | FL_READ_CA | FL_SET_CA | FL_RC_BIT}},