Reorganize carry to store flags separately instead of part of XER
Also correct behavior with regards to which bits in XER are treated as zero based on a hwtest (probably doesn't affect any real games, but might as well be correct).
This commit is contained in:
parent
788a719718
commit
5fce109ce1
|
@ -335,16 +335,17 @@ union UFPR
|
|||
#define XER_CA_SHIFT 29
|
||||
#define XER_OV_SHIFT 30
|
||||
#define XER_SO_SHIFT 31
|
||||
#define XER_CA_MASK (1U << XER_CA_SHIFT)
|
||||
#define XER_OV_MASK (1U << XER_OV_SHIFT)
|
||||
#define XER_SO_MASK (1U << XER_SO_SHIFT)
|
||||
#define XER_OV_MASK 1
|
||||
#define XER_SO_MASK 2
|
||||
// XER
|
||||
union UReg_XER
|
||||
{
|
||||
struct
|
||||
{
|
||||
u32 BYTE_COUNT : 7;
|
||||
u32 : 22;
|
||||
u32 : 1;
|
||||
u32 BYTE_CMP : 8;
|
||||
u32 : 13;
|
||||
u32 CA : 1;
|
||||
u32 OV : 1;
|
||||
u32 SO : 1;
|
||||
|
|
|
@ -499,7 +499,7 @@ void Interpreter::lhzx(UGeckoInstruction _inst)
|
|||
void Interpreter::lswx(UGeckoInstruction _inst)
|
||||
{
|
||||
u32 EA = Helper_Get_EA_X(_inst);
|
||||
u32 n = rSPR(SPR_XER) & 0x7F;
|
||||
u32 n = (u8)PowerPC::ppcState.xer_stringctrl;
|
||||
int r = _inst.RD;
|
||||
int i = 0;
|
||||
|
||||
|
@ -727,7 +727,7 @@ void Interpreter::stswi(UGeckoInstruction _inst)
|
|||
void Interpreter::stswx(UGeckoInstruction _inst)
|
||||
{
|
||||
u32 EA = Helper_Get_EA_X(_inst);
|
||||
u32 n = rSPR(SPR_XER) & 0x7F;
|
||||
u32 n = (u8)PowerPC::ppcState.xer_stringctrl;
|
||||
int r = _inst.RS;
|
||||
int i = 0;
|
||||
|
||||
|
|
|
@ -112,9 +112,9 @@ void Interpreter::mtfsfx(UGeckoInstruction _inst)
|
|||
|
||||
void Interpreter::mcrxr(UGeckoInstruction _inst)
|
||||
{
|
||||
// USES_XER
|
||||
SetCRField(_inst.CRFD, PowerPC::ppcState.spr[SPR_XER] >> 28);
|
||||
PowerPC::ppcState.spr[SPR_XER] &= ~0xF0000000; // clear 0-3
|
||||
SetCRField(_inst.CRFD, GetXER().Hex >> 28);
|
||||
PowerPC::ppcState.xer_ca = 0;
|
||||
PowerPC::ppcState.xer_so_ov = 0;
|
||||
}
|
||||
|
||||
void Interpreter::mfcr(UGeckoInstruction _inst)
|
||||
|
@ -235,6 +235,9 @@ void Interpreter::mfspr(UGeckoInstruction _inst)
|
|||
rSPR(iIndex) &= ~1;
|
||||
}
|
||||
break;
|
||||
case SPR_XER:
|
||||
rSPR(iIndex) = GetXER().Hex;
|
||||
break;
|
||||
}
|
||||
m_GPR[_inst.RD] = rSPR(iIndex);
|
||||
}
|
||||
|
@ -350,6 +353,10 @@ void Interpreter::mtspr(UGeckoInstruction _inst)
|
|||
case SPR_SDR:
|
||||
Memory::SDRUpdated();
|
||||
break;
|
||||
|
||||
case SPR_XER:
|
||||
SetXER(rSPR(iIndex));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -22,12 +22,12 @@ void Jit64::GenerateConstantOverflow(bool overflow)
|
|||
if (overflow)
|
||||
{
|
||||
//XER[OV/SO] = 1
|
||||
OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK));
|
||||
MOV(8, PPCSTATE(xer_so_ov), Imm8(XER_OV_MASK | XER_SO_MASK));
|
||||
}
|
||||
else
|
||||
{
|
||||
//XER[OV] = 0
|
||||
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_OV_MASK));
|
||||
AND(8, PPCSTATE(xer_so_ov), Imm8(~XER_OV_MASK));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -36,11 +36,11 @@ void Jit64::GenerateOverflow()
|
|||
{
|
||||
FixupBranch jno = J_CC(CC_NO);
|
||||
//XER[OV/SO] = 1
|
||||
OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK));
|
||||
MOV(8, PPCSTATE(xer_so_ov), Imm8(XER_OV_MASK | XER_SO_MASK));
|
||||
FixupBranch exit = J();
|
||||
SetJumpTarget(jno);
|
||||
//XER[OV] = 0
|
||||
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_OV_MASK));
|
||||
AND(8, PPCSTATE(xer_so_ov), Imm8(~XER_OV_MASK));
|
||||
SetJumpTarget(exit);
|
||||
}
|
||||
|
||||
|
@ -60,7 +60,7 @@ void Jit64::FinalizeCarry(CCFlags cond)
|
|||
{
|
||||
// convert the condition to a carry flag (is there a better way?)
|
||||
SETcc(cond, R(RSCRATCH));
|
||||
BT(8, R(RSCRATCH), Imm8(0));
|
||||
SHR(8, R(RSCRATCH), Imm8(1));
|
||||
}
|
||||
js.carryFlagSet = true;
|
||||
}
|
||||
|
@ -92,23 +92,22 @@ void Jit64::FinalizeCarry(bool ca)
|
|||
}
|
||||
else
|
||||
{
|
||||
JitClearCAOV(true, false);
|
||||
JitClearCA();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Assumes CA,OV are clear
|
||||
void Jit64::FinalizeCarryOverflow(bool oe, bool inv)
|
||||
{
|
||||
// USES_XER
|
||||
if (oe)
|
||||
{
|
||||
// Make sure not to lose the carry flags (not a big deal, this path is rare).
|
||||
PUSHF();
|
||||
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~(XER_SO_MASK | XER_OV_MASK)));
|
||||
//XER[OV] = 0
|
||||
AND(8, PPCSTATE(xer_so_ov), Imm8(~XER_OV_MASK));
|
||||
FixupBranch jno = J_CC(CC_NO);
|
||||
//XER[OV/SO] = 1
|
||||
OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK));
|
||||
MOV(8, PPCSTATE(xer_so_ov), Imm8(XER_SO_MASK | XER_OV_MASK));
|
||||
SetJumpTarget(jno);
|
||||
POPF();
|
||||
}
|
||||
|
@ -1792,27 +1791,15 @@ void Jit64::srawix(UGeckoInstruction inst)
|
|||
if (a != s)
|
||||
MOV(32, gpr.R(a), R(RSCRATCH));
|
||||
// some optimized common cases that can be done in slightly fewer ops
|
||||
if (amount == 31)
|
||||
if (amount == 1)
|
||||
{
|
||||
JitSetCA();
|
||||
SAR(32, gpr.R(a), Imm8(31));
|
||||
NEG(32, R(RSCRATCH)); // RSCRATCH = input == INT_MIN ? INT_MIN : -input;
|
||||
AND(32, R(RSCRATCH), Imm32(0x80000000)); // RSCRATCH = input < 0 && input != INT_MIN ? 0 : 0x80000000
|
||||
SHR(32, R(RSCRATCH), Imm8(31 - XER_CA_SHIFT));
|
||||
XOR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); // XER.CA = (input < 0 && input != INT_MIN)
|
||||
}
|
||||
else if (amount == 1)
|
||||
{
|
||||
JitClearCAOV(true, false);
|
||||
SHR(32, R(RSCRATCH), Imm8(31)); // sign
|
||||
AND(32, R(RSCRATCH), gpr.R(a)); // (sign && carry)
|
||||
SHR(32, R(RSCRATCH), Imm8(31)); // sign
|
||||
AND(32, R(RSCRATCH), gpr.R(a)); // (sign && carry)
|
||||
SAR(32, gpr.R(a), Imm8(1));
|
||||
SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
|
||||
OR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); // XER.CA = sign && carry, aka (input&0x80000001) == 0x80000001
|
||||
MOV(8, PPCSTATE(xer_ca), R(RSCRATCH)); // XER.CA = sign && carry, aka (input&0x80000001) == 0x80000001
|
||||
}
|
||||
else
|
||||
{
|
||||
JitClearCAOV(true, false);
|
||||
SAR(32, gpr.R(a), Imm8(amount));
|
||||
SHL(32, R(RSCRATCH), Imm8(32 - amount));
|
||||
TEST(32, R(RSCRATCH), gpr.R(a));
|
||||
|
|
|
@ -140,7 +140,6 @@ void Jit64::mtspr(UGeckoInstruction inst)
|
|||
|
||||
case SPR_LR:
|
||||
case SPR_CTR:
|
||||
case SPR_XER:
|
||||
// These are safe to do the easy way, see the bottom of this function.
|
||||
break;
|
||||
|
||||
|
@ -155,6 +154,24 @@ void Jit64::mtspr(UGeckoInstruction inst)
|
|||
// These are safe to do the easy way, see the bottom of this function.
|
||||
break;
|
||||
|
||||
case SPR_XER:
|
||||
gpr.Lock(d);
|
||||
gpr.BindToRegister(d, true, false);
|
||||
MOV(32, R(RSCRATCH), gpr.R(d));
|
||||
AND(32, R(RSCRATCH), Imm32(0xff7f));
|
||||
MOV(16, PPCSTATE(xer_stringctrl), R(RSCRATCH));
|
||||
|
||||
MOV(32, R(RSCRATCH), gpr.R(d));
|
||||
SHR(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
|
||||
AND(8, R(RSCRATCH), Imm8(1));
|
||||
MOV(8, PPCSTATE(xer_ca), R(RSCRATCH));
|
||||
|
||||
MOV(32, R(RSCRATCH), gpr.R(d));
|
||||
SHR(32, R(RSCRATCH), Imm8(XER_OV_SHIFT));
|
||||
MOV(8, PPCSTATE(xer_so_ov), R(RSCRATCH));
|
||||
gpr.UnlockAll();
|
||||
return;
|
||||
|
||||
default:
|
||||
FALLBACK_IF(true);
|
||||
}
|
||||
|
@ -238,6 +255,18 @@ void Jit64::mfspr(UGeckoInstruction inst)
|
|||
gpr.UnlockAllX();
|
||||
break;
|
||||
}
|
||||
case SPR_XER:
|
||||
gpr.Lock(d);
|
||||
gpr.BindToRegister(d, false);
|
||||
MOVZX(32, 16, gpr.RX(d), PPCSTATE(xer_stringctrl));
|
||||
MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_ca));
|
||||
SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
|
||||
OR(32, gpr.R(d), R(RSCRATCH));
|
||||
|
||||
MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_so_ov));
|
||||
SHL(32, R(RSCRATCH), Imm8(XER_OV_SHIFT));
|
||||
OR(32, gpr.R(d), R(RSCRATCH));
|
||||
break;
|
||||
case SPR_WPAR:
|
||||
case SPR_DEC:
|
||||
case SPR_PMC1:
|
||||
|
@ -422,17 +451,20 @@ void Jit64::mcrxr(UGeckoInstruction inst)
|
|||
INSTRUCTION_START
|
||||
JITDISABLE(bJITSystemRegistersOff);
|
||||
|
||||
// USES_CR
|
||||
|
||||
// Copy XER[0-3] into CR[inst.CRFD]
|
||||
MOV(32, R(RSCRATCH), PPCSTATE(spr[SPR_XER]));
|
||||
SHR(32, R(RSCRATCH), Imm8(28));
|
||||
MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_ca));
|
||||
MOVZX(32, 8, RSCRATCH2, PPCSTATE(xer_so_ov));
|
||||
// [0 SO OV CA]
|
||||
LEA(32, RSCRATCH, MComplex(RSCRATCH, RSCRATCH2, SCALE_2, 0));
|
||||
// [SO OV CA 0] << 3
|
||||
SHL(32, R(RSCRATCH), Imm8(4));
|
||||
|
||||
MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_8, (u32)(u64)m_crTable));
|
||||
MOV(64, R(RSCRATCH), MDisp(RSCRATCH, (u32)(u64)m_crTable));
|
||||
MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(RSCRATCH));
|
||||
|
||||
// Clear XER[0-3]
|
||||
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(0x0FFFFFFF));
|
||||
MOV(8, PPCSTATE(xer_ca), Imm8(0));
|
||||
MOV(8, PPCSTATE(xer_so_ov), Imm8(0));
|
||||
}
|
||||
|
||||
void Jit64::crXXX(UGeckoInstruction inst)
|
||||
|
|
|
@ -1027,9 +1027,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
break;
|
||||
|
||||
X64Reg reg = regFindFreeReg(RI);
|
||||
Jit->MOV(32, R(reg), PPCSTATE(spr[SPR_XER]));
|
||||
Jit->SHR(32, R(reg), Imm8(29));
|
||||
Jit->AND(32, R(reg), Imm8(1));
|
||||
Jit->MOVZX(32, 8, reg, PPCSTATE(xer_ca));
|
||||
RI.regs[reg] = I;
|
||||
break;
|
||||
}
|
||||
|
@ -1107,7 +1105,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
|||
Jit->JitSetCA();
|
||||
FixupBranch cont = Jit->J();
|
||||
Jit->SetJumpTarget(nocarry);
|
||||
Jit->JitClearCAOV(true, false);
|
||||
Jit->JitClearCA();
|
||||
Jit->SetJumpTarget(cont);
|
||||
regNormalRegClear(RI, I);
|
||||
break;
|
||||
|
|
|
@ -896,30 +896,23 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
|
|||
void EmuCodeBlock::JitGetAndClearCAOV(bool oe)
|
||||
{
|
||||
if (oe)
|
||||
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_OV_MASK)); //XER.OV = 0
|
||||
BTR(32, PPCSTATE(spr[SPR_XER]), Imm8(29)); //carry = XER.CA, XER.CA = 0
|
||||
AND(8, PPCSTATE(xer_so_ov), Imm32(~XER_OV_MASK)); //XER.OV = 0
|
||||
SHR(8, PPCSTATE(xer_ca), Imm8(1)); //carry = XER.CA, XER.CA = 0
|
||||
}
|
||||
|
||||
void EmuCodeBlock::JitSetCA()
|
||||
{
|
||||
OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1
|
||||
MOV(8, PPCSTATE(xer_ca), Imm8(1)); //XER.CA = 1
|
||||
}
|
||||
|
||||
// Some testing shows CA is set roughly ~1/3 of the time (relative to clears), so
|
||||
// branchless calculation of CA is probably faster in general.
|
||||
void EmuCodeBlock::JitSetCAIf(CCFlags conditionCode)
|
||||
{
|
||||
SETcc(conditionCode, R(RSCRATCH));
|
||||
MOVZX(32, 8, RSCRATCH, R(RSCRATCH));
|
||||
SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT));
|
||||
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK));
|
||||
OR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); //XER.CA = 1
|
||||
SETcc(conditionCode, PPCSTATE(xer_ca));
|
||||
}
|
||||
|
||||
void EmuCodeBlock::JitClearCAOV(bool ca, bool oe)
|
||||
void EmuCodeBlock::JitClearCA()
|
||||
{
|
||||
u32 mask = (ca ? ~XER_CA_MASK : 0xFFFFFFFF) & (oe ? ~XER_OV_MASK : 0xFFFFFFFF);
|
||||
if (mask == 0xFFFFFFFF)
|
||||
return;
|
||||
AND(32, PPCSTATE(spr[SPR_XER]), Imm32(mask));
|
||||
MOV(8, PPCSTATE(xer_ca), Imm8(0));
|
||||
}
|
||||
|
|
|
@ -121,7 +121,7 @@ public:
|
|||
void JitGetAndClearCAOV(bool oe);
|
||||
void JitSetCA();
|
||||
void JitSetCAIf(Gen::CCFlags conditionCode);
|
||||
void JitClearCAOV(bool ca, bool oe);
|
||||
void JitClearCA();
|
||||
|
||||
void ForceSinglePrecisionS(Gen::X64Reg xmm);
|
||||
void ForceSinglePrecisionP(Gen::X64Reg xmm);
|
||||
|
|
|
@ -61,6 +61,12 @@ struct GC_ALIGNED64(PowerPCState)
|
|||
// This variable should be inside of the CoreTiming namespace if we wanted to be correct.
|
||||
int downcount;
|
||||
|
||||
// XER, reformatted into byte fields for easier access.
|
||||
u8 xer_ca;
|
||||
u8 xer_so_ov; // format: (SO << 1) | OV
|
||||
// The Broadway CPU implements bits 16-23 of the XER register... even though it doesn't support lscbx
|
||||
u16 xer_stringctrl;
|
||||
|
||||
#if _M_X86_64
|
||||
// This member exists for the purpose of an assertion in x86 JitBase.cpp
|
||||
// that its offset <= 0x100. To minimize code size on x86, we want as much
|
||||
|
@ -252,35 +258,40 @@ inline u32 GetCR()
|
|||
return PowerPC::CompactCR();
|
||||
}
|
||||
|
||||
// SetCarry/GetCarry may speed up soon.
|
||||
inline void SetCarry(int ca)
|
||||
{
|
||||
((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).CA = ca;
|
||||
PowerPC::ppcState.xer_ca = ca;
|
||||
}
|
||||
|
||||
inline int GetCarry()
|
||||
{
|
||||
return ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).CA;
|
||||
return PowerPC::ppcState.xer_ca;
|
||||
}
|
||||
|
||||
inline UReg_XER GetXER()
|
||||
{
|
||||
return ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]);
|
||||
u32 xer = 0;
|
||||
xer |= PowerPC::ppcState.xer_stringctrl;
|
||||
xer |= PowerPC::ppcState.xer_ca << XER_CA_SHIFT;
|
||||
xer |= PowerPC::ppcState.xer_so_ov << XER_OV_SHIFT;
|
||||
return xer;
|
||||
}
|
||||
|
||||
inline void SetXER(UReg_XER new_xer)
|
||||
{
|
||||
((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]) = new_xer;
|
||||
PowerPC::ppcState.xer_stringctrl = new_xer.BYTE_COUNT + (new_xer.BYTE_CMP << 8);
|
||||
PowerPC::ppcState.xer_ca = new_xer.CA;
|
||||
PowerPC::ppcState.xer_so_ov = (new_xer.SO << 1) + new_xer.OV;
|
||||
}
|
||||
|
||||
inline int GetXER_SO()
|
||||
{
|
||||
return ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).SO;
|
||||
return PowerPC::ppcState.xer_so_ov >> 1;
|
||||
}
|
||||
|
||||
inline void SetXER_SO(int value)
|
||||
{
|
||||
((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).SO = value;
|
||||
PowerPC::ppcState.xer_so_ov |= value << 1;
|
||||
}
|
||||
|
||||
void UpdateFPRF(double dvalue);
|
||||
|
|
|
@ -63,7 +63,7 @@ static Common::Event g_compressAndDumpStateSyncEvent;
|
|||
static std::thread g_save_thread;
|
||||
|
||||
// Don't forget to increase this after doing changes on the savestate system
|
||||
static const u32 STATE_VERSION = 33;
|
||||
static const u32 STATE_VERSION = 34;
|
||||
|
||||
enum
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue