Merge pull request #852 from FioraAeterna/optimizeca
JIT64: optimize CA calculations
This commit is contained in:
commit
97420c6ec6
|
@ -175,16 +175,15 @@ struct Rectangle
|
||||||
|
|
||||||
} // namespace MathUtil
|
} // namespace MathUtil
|
||||||
|
|
||||||
inline float pow2f(float x) {return x * x;}
|
|
||||||
inline double pow2(double x) {return x * x;}
|
|
||||||
|
|
||||||
float MathFloatVectorSum(const std::vector<float>&);
|
float MathFloatVectorSum(const std::vector<float>&);
|
||||||
|
|
||||||
#define ROUND_UP(x, a) (((x) + (a) - 1) & ~((a) - 1))
|
#define ROUND_UP(x, a) (((x) + (a) - 1) & ~((a) - 1))
|
||||||
#define ROUND_DOWN(x, a) ((x) & ~((a) - 1))
|
#define ROUND_DOWN(x, a) ((x) & ~((a) - 1))
|
||||||
|
|
||||||
|
inline bool IsPow2(u32 imm) {return (imm & (imm - 1)) == 0;}
|
||||||
|
|
||||||
// Rounds down. 0 -> undefined
|
// Rounds down. 0 -> undefined
|
||||||
inline int Log2(u64 val)
|
inline int IntLog2(u64 val)
|
||||||
{
|
{
|
||||||
#if defined(__GNUC__)
|
#if defined(__GNUC__)
|
||||||
return 63 - __builtin_clzll(val);
|
return 63 - __builtin_clzll(val);
|
||||||
|
|
|
@ -331,9 +331,12 @@ union UFPR
|
||||||
float f[2];
|
float f[2];
|
||||||
};
|
};
|
||||||
|
|
||||||
#define XER_CA_MASK 0x20000000
|
#define XER_CA_SHIFT 29
|
||||||
#define XER_OV_MASK 0x40000000
|
#define XER_OV_SHIFT 30
|
||||||
#define XER_SO_MASK 0x80000000
|
#define XER_SO_SHIFT 31
|
||||||
|
#define XER_CA_MASK (1U << XER_CA_SHIFT)
|
||||||
|
#define XER_OV_MASK (1U << XER_OV_SHIFT)
|
||||||
|
#define XER_SO_MASK (1U << XER_SO_SHIFT)
|
||||||
// XER
|
// XER
|
||||||
union UReg_XER
|
union UReg_XER
|
||||||
{
|
{
|
||||||
|
|
|
@ -34,7 +34,7 @@ static GekkoOPTemplate primarytable[] =
|
||||||
{10, Interpreter::cmpli, {"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn, 1, 0, 0, 0}},
|
{10, Interpreter::cmpli, {"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn, 1, 0, 0, 0}},
|
||||||
{11, Interpreter::cmpi, {"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn, 1, 0, 0, 0}},
|
{11, Interpreter::cmpi, {"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn, 1, 0, 0, 0}},
|
||||||
{12, Interpreter::addic, {"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA, 1, 0, 0, 0}},
|
{12, Interpreter::addic, {"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA, 1, 0, 0, 0}},
|
||||||
{13, Interpreter::addic_rc, {"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0, 1, 0, 0, 0}},
|
{13, Interpreter::addic_rc, {"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA | FL_SET_CR0, 1, 0, 0, 0}},
|
||||||
{14, Interpreter::addi, {"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0, 1, 0, 0, 0}},
|
{14, Interpreter::addi, {"addi", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0, 1, 0, 0, 0}},
|
||||||
{15, Interpreter::addis, {"addis", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0, 1, 0, 0, 0}},
|
{15, Interpreter::addis, {"addis", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A0, 1, 0, 0, 0}},
|
||||||
|
|
||||||
|
@ -180,8 +180,8 @@ static GekkoOPTemplate table31[] =
|
||||||
{922, Interpreter::extshx, {"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
{922, Interpreter::extshx, {"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
||||||
{954, Interpreter::extsbx, {"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
{954, Interpreter::extsbx, {"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
||||||
{536, Interpreter::srwx, {"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
{536, Interpreter::srwx, {"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
||||||
{792, Interpreter::srawx, {"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
{792, Interpreter::srawx, {"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
|
||||||
{824, Interpreter::srawix, {"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
{824, Interpreter::srawix, {"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT, 1, 0, 0, 0}},
|
||||||
{24, Interpreter::slwx, {"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
{24, Interpreter::slwx, {"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT, 1, 0, 0, 0}},
|
||||||
|
|
||||||
{54, Interpreter::dcbst, {"dcbst", OPTYPE_DCACHE, 0, 5, 0, 0, 0}},
|
{54, Interpreter::dcbst, {"dcbst", OPTYPE_DCACHE, 0, 5, 0, 0, 0}},
|
||||||
|
@ -260,7 +260,7 @@ static GekkoOPTemplate table31[] =
|
||||||
{339, Interpreter::mfspr, {"mfspr", OPTYPE_SPR, FL_OUT_D, 1, 0, 0, 0}},
|
{339, Interpreter::mfspr, {"mfspr", OPTYPE_SPR, FL_OUT_D, 1, 0, 0, 0}},
|
||||||
{467, Interpreter::mtspr, {"mtspr", OPTYPE_SPR, 0, 2, 0, 0, 0}},
|
{467, Interpreter::mtspr, {"mtspr", OPTYPE_SPR, 0, 2, 0, 0, 0}},
|
||||||
{371, Interpreter::mftb, {"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER, 1, 0, 0, 0}},
|
{371, Interpreter::mftb, {"mftb", OPTYPE_SYSTEM, FL_OUT_D | FL_TIMER, 1, 0, 0, 0}},
|
||||||
{512, Interpreter::mcrxr, {"mcrxr", OPTYPE_SYSTEM, 0, 1, 0, 0, 0}},
|
{512, Interpreter::mcrxr, {"mcrxr", OPTYPE_SYSTEM, FL_READ_CA | FL_SET_CA, 1, 0, 0, 0}},
|
||||||
{595, Interpreter::mfsr, {"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 3, 0, 0, 0}},
|
{595, Interpreter::mfsr, {"mfsr", OPTYPE_SYSTEM, FL_OUT_D, 3, 0, 0, 0}},
|
||||||
{659, Interpreter::mfsrin, {"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 3, 0, 0, 0}},
|
{659, Interpreter::mfsrin, {"mfsrin", OPTYPE_SYSTEM, FL_OUT_D, 3, 0, 0, 0}},
|
||||||
|
|
||||||
|
|
|
@ -100,13 +100,15 @@ public:
|
||||||
void GenerateConstantOverflow(bool overflow);
|
void GenerateConstantOverflow(bool overflow);
|
||||||
void GenerateConstantOverflow(s64 val);
|
void GenerateConstantOverflow(s64 val);
|
||||||
void GenerateOverflow();
|
void GenerateOverflow();
|
||||||
void FinalizeCarryOverflow(bool oe, bool inv = false);
|
void FinalizeCarryOverflow(bool ca, bool oe, bool inv = false);
|
||||||
void GetCarryEAXAndClear();
|
|
||||||
void FinalizeCarryGenerateOverflowEAX(bool oe, bool inv = false);
|
|
||||||
void GenerateCarry();
|
|
||||||
void GenerateRC();
|
|
||||||
void ComputeRC(const Gen::OpArg & arg);
|
void ComputeRC(const Gen::OpArg & arg);
|
||||||
|
|
||||||
|
// use to extract bytes from a register using the regcache. offset is in bytes.
|
||||||
|
Gen::OpArg ExtractFromReg(int reg, int offset);
|
||||||
|
void AndWithMask(Gen::X64Reg reg, u32 mask);
|
||||||
|
bool CheckMergedBranch(int crf);
|
||||||
|
void DoMergedBranch();
|
||||||
|
|
||||||
// Reads a given bit of a given CR register part. Clobbers ABI_PARAM1,
|
// Reads a given bit of a given CR register part. Clobbers ABI_PARAM1,
|
||||||
// don't forget to xlock it before.
|
// don't forget to xlock it before.
|
||||||
void GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate = false);
|
void GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate = false);
|
||||||
|
@ -118,6 +120,8 @@ public:
|
||||||
Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set = true);
|
Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set = true);
|
||||||
void SetFPRFIfNeeded(UGeckoInstruction inst, Gen::X64Reg xmm);
|
void SetFPRFIfNeeded(UGeckoInstruction inst, Gen::X64Reg xmm);
|
||||||
|
|
||||||
|
void MultiplyImmediate(u32 imm, int a, int d, bool overflow);
|
||||||
|
|
||||||
void tri_op(int d, int a, int b, bool reversible, void (Gen::XEmitter::*op)(Gen::X64Reg, Gen::OpArg), UGeckoInstruction inst, bool roundRHS = false);
|
void tri_op(int d, int a, int b, bool reversible, void (Gen::XEmitter::*op)(Gen::X64Reg, Gen::OpArg), UGeckoInstruction inst, bool roundRHS = false);
|
||||||
typedef u32 (*Operation)(u32 a, u32 b);
|
typedef u32 (*Operation)(u32 a, u32 b);
|
||||||
void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (Gen::XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false);
|
void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (Gen::XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false);
|
||||||
|
|
|
@ -193,8 +193,8 @@ static GekkoOPTemplate table31[] =
|
||||||
{922, &Jit64::extshx}, //"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
|
{922, &Jit64::extshx}, //"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
|
||||||
{954, &Jit64::extsbx}, //"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
|
{954, &Jit64::extsbx}, //"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}},
|
||||||
{536, &Jit64::srwx}, //"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
|
{536, &Jit64::srwx}, //"srwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
|
||||||
{792, &Jit64::srawx}, //"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
|
{792, &Jit64::srawx}, //"srawx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT}},
|
||||||
{824, &Jit64::srawix}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
|
{824, &Jit64::srawix}, //"srawix", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_SET_CA | FL_RC_BIT}},
|
||||||
{24, &Jit64::slwx}, //"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
|
{24, &Jit64::slwx}, //"slwx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_B | FL_IN_S | FL_RC_BIT}},
|
||||||
|
|
||||||
{54, &Jit64::dcbst}, //"dcbst", OPTYPE_DCACHE, 0, 4}},
|
{54, &Jit64::dcbst}, //"dcbst", OPTYPE_DCACHE, 0, 4}},
|
||||||
|
|
|
@ -314,7 +314,10 @@ void RegCache::StoreFromRegister(size_t i, FlushMode mode)
|
||||||
|
|
||||||
void GPRRegCache::LoadRegister(size_t preg, X64Reg newLoc)
|
void GPRRegCache::LoadRegister(size_t preg, X64Reg newLoc)
|
||||||
{
|
{
|
||||||
emit->MOV(32, ::Gen::R(newLoc), regs[preg].location);
|
if (regs[preg].location.IsImm() && !regs[preg].location.offset)
|
||||||
|
emit->XOR(32, ::Gen::R(newLoc), ::Gen::R(newLoc));
|
||||||
|
else
|
||||||
|
emit->MOV(32, ::Gen::R(newLoc), regs[preg].location);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPRRegCache::StoreRegister(size_t preg, OpArg newLoc)
|
void GPRRegCache::StoreRegister(size_t preg, OpArg newLoc)
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1104,7 +1104,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress)
|
||||||
Jit->JitSetCA();
|
Jit->JitSetCA();
|
||||||
FixupBranch cont = Jit->J();
|
FixupBranch cont = Jit->J();
|
||||||
Jit->SetJumpTarget(nocarry);
|
Jit->SetJumpTarget(nocarry);
|
||||||
Jit->JitClearCA();
|
Jit->JitClearCAOV(true, false);
|
||||||
Jit->SetJumpTarget(cont);
|
Jit->SetJumpTarget(cont);
|
||||||
regNormalRegClear(RI, I);
|
regNormalRegClear(RI, I);
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -802,10 +802,11 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm)
|
||||||
OR(32, M(&FPSCR), R(EAX));
|
OR(32, M(&FPSCR), R(EAX));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void EmuCodeBlock::JitGetAndClearCAOV(bool oe)
|
||||||
void EmuCodeBlock::JitClearCA()
|
|
||||||
{
|
{
|
||||||
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0
|
if (oe)
|
||||||
|
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_OV_MASK)); //XER.OV = 0
|
||||||
|
BTR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm8(29)); //carry = XER.CA, XER.CA = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmuCodeBlock::JitSetCA()
|
void EmuCodeBlock::JitSetCA()
|
||||||
|
@ -813,10 +814,20 @@ void EmuCodeBlock::JitSetCA()
|
||||||
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1
|
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmuCodeBlock::JitClearCAOV(bool oe)
|
// Some testing shows CA is set roughly ~1/3 of the time (relative to clears), so
|
||||||
|
// branchless calculation of CA is probably faster in general.
|
||||||
|
void EmuCodeBlock::JitSetCAIf(CCFlags conditionCode)
|
||||||
{
|
{
|
||||||
if (oe)
|
SETcc(conditionCode, R(EAX));
|
||||||
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK & ~XER_OV_MASK)); //XER.CA, XER.OV = 0
|
MOVZX(32, 8, EAX, R(AL));
|
||||||
else
|
SHL(32, R(EAX), Imm8(XER_CA_SHIFT));
|
||||||
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0
|
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX)); //XER.CA = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
void EmuCodeBlock::JitClearCAOV(bool ca, bool oe)
|
||||||
|
{
|
||||||
|
u32 mask = (ca ? ~XER_CA_MASK : 0xFFFFFFFF) & (oe ? ~XER_OV_MASK : 0xFFFFFFFF);
|
||||||
|
if (mask == 0xFFFFFFFF)
|
||||||
|
return;
|
||||||
|
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(mask));
|
||||||
}
|
}
|
||||||
|
|
|
@ -50,9 +50,10 @@ public:
|
||||||
void SafeWriteF32ToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, s32 offset, u32 registersInUse, int flags = 0);
|
void SafeWriteF32ToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, s32 offset, u32 registersInUse, int flags = 0);
|
||||||
|
|
||||||
void WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap = false);
|
void WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap = false);
|
||||||
void JitClearCA();
|
void JitGetAndClearCAOV(bool oe);
|
||||||
void JitSetCA();
|
void JitSetCA();
|
||||||
void JitClearCAOV(bool oe);
|
void JitSetCAIf(Gen::CCFlags conditionCode);
|
||||||
|
void JitClearCAOV(bool ca, bool oe);
|
||||||
|
|
||||||
void ForceSinglePrecisionS(Gen::X64Reg xmm);
|
void ForceSinglePrecisionS(Gen::X64Reg xmm);
|
||||||
void ForceSinglePrecisionP(Gen::X64Reg xmm);
|
void ForceSinglePrecisionP(Gen::X64Reg xmm);
|
||||||
|
|
|
@ -430,7 +430,6 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInf
|
||||||
{
|
{
|
||||||
code->wantsCR0 = false;
|
code->wantsCR0 = false;
|
||||||
code->wantsCR1 = false;
|
code->wantsCR1 = false;
|
||||||
code->wantsPS1 = false;
|
|
||||||
|
|
||||||
if (opinfo->flags & FL_USE_FPU)
|
if (opinfo->flags & FL_USE_FPU)
|
||||||
block->m_fpa->any = true;
|
block->m_fpa->any = true;
|
||||||
|
@ -458,6 +457,15 @@ void PPCAnalyzer::SetInstructionStats(CodeBlock *block, CodeOp *code, GekkoOPInf
|
||||||
code->outputFPRF = (opinfo->flags & FL_SET_FPRF) ? true : false;
|
code->outputFPRF = (opinfo->flags & FL_SET_FPRF) ? true : false;
|
||||||
code->canEndBlock = (opinfo->flags & FL_ENDBLOCK) ? true : false;
|
code->canEndBlock = (opinfo->flags & FL_ENDBLOCK) ? true : false;
|
||||||
|
|
||||||
|
code->wantsCA = (opinfo->flags & FL_READ_CA) ? true : false;
|
||||||
|
code->outputCA = (opinfo->flags & FL_SET_CA) ? true : false;
|
||||||
|
|
||||||
|
// mfspr/mtspr can affect/use XER, so be super careful here
|
||||||
|
if (code->inst.OPCD == 31 && code->inst.SUBOP10 == 339) // mfspr
|
||||||
|
code->wantsCA = ((code->inst.SPRU << 5) | (code->inst.SPRL & 0x1F)) == SPR_XER;
|
||||||
|
if (code->inst.OPCD == 31 && code->inst.SUBOP10 == 467) // mtspr
|
||||||
|
code->outputCA = ((code->inst.SPRU << 5) | (code->inst.SPRL & 0x1F)) == SPR_XER;
|
||||||
|
|
||||||
int numOut = 0;
|
int numOut = 0;
|
||||||
int numIn = 0;
|
int numIn = 0;
|
||||||
if (opinfo->flags & FL_OUT_A)
|
if (opinfo->flags & FL_OUT_A)
|
||||||
|
@ -715,26 +723,30 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32
|
||||||
block->m_broken = true;
|
block->m_broken = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scan for CR0 dependency
|
// Scan for flag dependencies; assume the next block (or any branch that can leave the block)
|
||||||
// assume next block wants flags to be safe
|
// wants flags, to be safe.
|
||||||
bool wantsCR0 = true;
|
bool wantsCR0 = true;
|
||||||
bool wantsCR1 = true;
|
bool wantsCR1 = true;
|
||||||
bool wantsPS1 = true;
|
|
||||||
bool wantsFPRF = true;
|
bool wantsFPRF = true;
|
||||||
|
bool wantsCA = true;
|
||||||
for (int i = block->m_num_instructions - 1; i >= 0; i--)
|
for (int i = block->m_num_instructions - 1; i >= 0; i--)
|
||||||
{
|
{
|
||||||
wantsCR0 |= code[i].wantsCR0 || code[i].canEndBlock;
|
bool opWantsCR0 = code[i].wantsCR0;
|
||||||
wantsCR1 |= code[i].wantsCR1 || code[i].canEndBlock;
|
bool opWantsCR1 = code[i].wantsCR1;
|
||||||
wantsPS1 |= code[i].wantsPS1 || code[i].canEndBlock;
|
bool opWantsFPRF = code[i].wantsFPRF;
|
||||||
wantsFPRF |= code[i].wantsFPRF || code[i].canEndBlock;
|
bool opWantsCA = code[i].wantsCA;
|
||||||
code[i].wantsCR0 = wantsCR0;
|
wantsCR0 |= opWantsCR0 || code[i].canEndBlock;
|
||||||
code[i].wantsCR1 = wantsCR1;
|
wantsCR1 |= opWantsCR1 || code[i].canEndBlock;
|
||||||
code[i].wantsPS1 = wantsPS1;
|
wantsFPRF |= opWantsFPRF || code[i].canEndBlock;
|
||||||
|
wantsCA |= opWantsCA || code[i].canEndBlock;
|
||||||
|
code[i].wantsCR0 = wantsCR0;
|
||||||
|
code[i].wantsCR1 = wantsCR1;
|
||||||
code[i].wantsFPRF = wantsFPRF;
|
code[i].wantsFPRF = wantsFPRF;
|
||||||
wantsCR0 &= !code[i].outputCR0;
|
code[i].wantsCA = wantsCA;
|
||||||
wantsCR1 &= !code[i].outputCR1;
|
wantsCR0 &= !code[i].outputCR0 || opWantsCR0;
|
||||||
wantsPS1 &= !code[i].outputPS1;
|
wantsCR1 &= !code[i].outputCR1 || opWantsCR1;
|
||||||
wantsFPRF &= !code[i].outputFPRF;
|
wantsFPRF &= !code[i].outputFPRF || opWantsFPRF;
|
||||||
|
wantsCA &= !code[i].outputCA || opWantsCA;
|
||||||
}
|
}
|
||||||
return address;
|
return address;
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,12 +33,12 @@ struct CodeOp //16B
|
||||||
bool isBranchTarget;
|
bool isBranchTarget;
|
||||||
bool wantsCR0;
|
bool wantsCR0;
|
||||||
bool wantsCR1;
|
bool wantsCR1;
|
||||||
bool wantsPS1;
|
|
||||||
bool wantsFPRF;
|
bool wantsFPRF;
|
||||||
|
bool wantsCA;
|
||||||
bool outputCR0;
|
bool outputCR0;
|
||||||
bool outputCR1;
|
bool outputCR1;
|
||||||
bool outputPS1;
|
|
||||||
bool outputFPRF;
|
bool outputFPRF;
|
||||||
|
bool outputCA;
|
||||||
bool canEndBlock;
|
bool canEndBlock;
|
||||||
bool skip; // followed BL-s for example
|
bool skip; // followed BL-s for example
|
||||||
};
|
};
|
||||||
|
|
|
@ -397,7 +397,7 @@ static wxString NiceSizeFormat(u64 _size)
|
||||||
// Find largest power of 2 less than _size.
|
// Find largest power of 2 less than _size.
|
||||||
// div 10 to get largest named unit less than _size
|
// div 10 to get largest named unit less than _size
|
||||||
// 10 == log2(1024) (number of B in a KiB, KiB in a MiB, etc)
|
// 10 == log2(1024) (number of B in a KiB, KiB in a MiB, etc)
|
||||||
const u64 unit = Log2(std::max<u64>(_size, 1)) / 10;
|
const u64 unit = IntLog2(std::max<u64>(_size, 1)) / 10;
|
||||||
const u64 unit_size = (1 << (unit * 10));
|
const u64 unit_size = (1 << (unit * 10));
|
||||||
|
|
||||||
// mul 1000 for 3 decimal places, add 5 to round up, div 10 for 2 decimal places
|
// mul 1000 for 3 decimal places, add 5 to round up, div 10 for 2 decimal places
|
||||||
|
|
|
@ -23,7 +23,7 @@ static u32 genBuffer()
|
||||||
}
|
}
|
||||||
|
|
||||||
StreamBuffer::StreamBuffer(u32 type, u32 size)
|
StreamBuffer::StreamBuffer(u32 type, u32 size)
|
||||||
: m_buffer(genBuffer()), m_buffertype(type), m_size(ROUND_UP_POW2(size)), m_bit_per_slot(Log2(ROUND_UP_POW2(size) / SYNC_POINTS))
|
: m_buffer(genBuffer()), m_buffertype(type), m_size(ROUND_UP_POW2(size)), m_bit_per_slot(IntLog2(ROUND_UP_POW2(size) / SYNC_POINTS))
|
||||||
{
|
{
|
||||||
m_iterator = 0;
|
m_iterator = 0;
|
||||||
m_used_iterator = 0;
|
m_used_iterator = 0;
|
||||||
|
|
|
@ -91,8 +91,8 @@ static void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType)
|
||||||
|
|
||||||
WRITE(p, " int y_block_position = uv1.y & %d;\n", ~(blkH - 1));
|
WRITE(p, " int y_block_position = uv1.y & %d;\n", ~(blkH - 1));
|
||||||
WRITE(p, " int y_offset_in_block = uv1.y & %d;\n", blkH - 1);
|
WRITE(p, " int y_offset_in_block = uv1.y & %d;\n", blkH - 1);
|
||||||
WRITE(p, " int x_virtual_position = (uv1.x << %d) + y_offset_in_block * position.z;\n", Log2(samples));
|
WRITE(p, " int x_virtual_position = (uv1.x << %d) + y_offset_in_block * position.z;\n", IntLog2(samples));
|
||||||
WRITE(p, " int x_block_position = (x_virtual_position >> %d) & %d;\n", Log2(blkH), ~(blkW - 1));
|
WRITE(p, " int x_block_position = (x_virtual_position >> %d) & %d;\n", IntLog2(blkH), ~(blkW - 1));
|
||||||
if (samples == 1)
|
if (samples == 1)
|
||||||
{
|
{
|
||||||
// 32 bit textures (RGBA8 and Z24) are stored in 2 cache line increments
|
// 32 bit textures (RGBA8 and Z24) are stored in 2 cache line increments
|
||||||
|
@ -100,7 +100,7 @@ static void WriteSwizzler(char*& p, u32 format, API_TYPE ApiType)
|
||||||
WRITE(p, " x_virtual_position = x_virtual_position << 1;\n");
|
WRITE(p, " x_virtual_position = x_virtual_position << 1;\n");
|
||||||
}
|
}
|
||||||
WRITE(p, " int x_offset_in_block = x_virtual_position & %d;\n", blkW - 1);
|
WRITE(p, " int x_offset_in_block = x_virtual_position & %d;\n", blkW - 1);
|
||||||
WRITE(p, " int y_offset = (x_virtual_position >> %d) & %d;\n", Log2(blkW), blkH - 1);
|
WRITE(p, " int y_offset = (x_virtual_position >> %d) & %d;\n", IntLog2(blkW), blkH - 1);
|
||||||
|
|
||||||
WRITE(p, " sampleUv.x = x_offset_in_block + x_block_position;\n");
|
WRITE(p, " sampleUv.x = x_offset_in_block + x_block_position;\n");
|
||||||
WRITE(p, " sampleUv.y = y_block_position + y_offset;\n");
|
WRITE(p, " sampleUv.y = y_block_position + y_offset;\n");
|
||||||
|
|
|
@ -44,17 +44,17 @@ TEST(MathUtil, IsSNAN)
|
||||||
EXPECT_TRUE(MathUtil::IsSNAN(std::numeric_limits<double>::signaling_NaN()));
|
EXPECT_TRUE(MathUtil::IsSNAN(std::numeric_limits<double>::signaling_NaN()));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(MathUtil, Log2)
|
TEST(MathUtil, IntLog2)
|
||||||
{
|
{
|
||||||
EXPECT_EQ(0, Log2(1));
|
EXPECT_EQ(0, IntLog2(1));
|
||||||
EXPECT_EQ(1, Log2(2));
|
EXPECT_EQ(1, IntLog2(2));
|
||||||
EXPECT_EQ(2, Log2(4));
|
EXPECT_EQ(2, IntLog2(4));
|
||||||
EXPECT_EQ(3, Log2(8));
|
EXPECT_EQ(3, IntLog2(8));
|
||||||
EXPECT_EQ(63, Log2(0x8000000000000000ull));
|
EXPECT_EQ(63, IntLog2(0x8000000000000000ull));
|
||||||
|
|
||||||
// Rounding behavior.
|
// Rounding behavior.
|
||||||
EXPECT_EQ(3, Log2(15));
|
EXPECT_EQ(3, IntLog2(15));
|
||||||
EXPECT_EQ(63, Log2(0xFFFFFFFFFFFFFFFFull));
|
EXPECT_EQ(63, IntLog2(0xFFFFFFFFFFFFFFFFull));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(MathUtil, FlushToZero)
|
TEST(MathUtil, FlushToZero)
|
||||||
|
|
Loading…
Reference in New Issue