diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp index a73a02d172..a0ebd65a9c 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.cpp @@ -585,6 +585,18 @@ InstLoc IRBuilder::FoldInterpreterFallback(InstLoc Op1, InstLoc Op2) { return EmitBiOp(InterpreterFallback, Op1, Op2); } +InstLoc IRBuilder::FoldDoubleBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2) { + if (getOpcode(*Op1) == InsertDoubleInMReg) { + return FoldDoubleBiOp(Opcode, getOp1(Op1), Op2); + } + + if (getOpcode(*Op2) == InsertDoubleInMReg) { + return FoldDoubleBiOp(Opcode, Op1, getOp1(Op2)); + } + + return EmitBiOp(Opcode, Op1, Op2); +} + InstLoc IRBuilder::FoldBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2, unsigned extra) { switch (Opcode) { case Add: return FoldAdd(Op1, Op2); @@ -601,6 +613,7 @@ InstLoc IRBuilder::FoldBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2, unsigned case ICmpSgt: case ICmpSlt: case ICmpSge: case ICmpSle: return FoldICmp(Opcode, Op1, Op2); case InterpreterFallback: return FoldInterpreterFallback(Op1, Op2); + case FDMul: case FDAdd: case FDSub: return FoldDoubleBiOp(Opcode, Op1, Op2); default: return EmitBiOp(Opcode, Op1, Op2, extra); } } diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h index 1762459286..9c6de955b3 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/IR.h @@ -88,7 +88,6 @@ namespace IREmitter { Store16, Store32, BranchCond, -#if 0 // Floating-point // There are three floating-point formats: single, double, // and packed. For any operation where the format of the @@ -98,59 +97,15 @@ namespace IREmitter { // The "mreg" format is a pair of doubles; this is the // most general possible represenation which is used // in the register state. - // This might seem like overkill, but it's a huge advantage - // to keep operands in the right format because extra - // precision can screw up games. - // FIXME: Does the slight loss of precision due to not - // having a madd instruction matter? It would be a - // performance loss for singles because the operations - // would have to be done in double precision, and a completely - // accurate double madd would require an extremely expensive - // fallback. - FDAdd, - FDSub, - FDMul, - FDDiv, - FDNeg, - FSAdd, - FSSub, - FSMul, - FSDiv, - FSNeg, - FPSAdd, - FPSSub, - FPSMul, - FPSDiv, - FPSNeg, - // FP Loads - LoadSingle, - LoadDouble, - // LoadPacked, // FIXME: Work out how this instruction should - // be implemented - // FP Stores - StoreSingle, - StoreDouble, - // StorePacked, // FIXME: Work out how this instruction should - // be implemented - PackedToSingle, // Extract PS0 from packed (type-pun) - // PackedToDouble == PackedToSingle+SingleToDouble - PackedToMReg, // Convert from packed format to mreg format (CVTPS2PD) - SingleToDouble, // Widen single to double (CVTSS2SD) - SingleToPacked, // Duplicate single to packed - // SingleToMReg == SingleToPacked+PackedToMReg - MRegToPacked, // Convert from mreg format to packed format (CVTPD2PS) - MRegToDouble, // Extract bottom half from mreg format. (type-pun) - // MRegToSingle == MRegToDouble + DoubleToSingle - DoubleToMReg, // Convert from double format to mreg format - DoubleToSingle, // Convert from double to single format (CVTSD2SS) - // DoubleToPacked should never be needed - - ForceToPacked, // ForceTo* are "virtual"; they should be - // folded into the above conversions. - ForceToSingle, - ForceToDouble, - ForceToMReg, -#endif + // This might seem like overkill, but the semantics require + // having the different formats. + // FIXME: Check the accuracy of the mapping: + // 1. Is paired arithmetic always rounded to single-precision + // first, or does it do double-to-single like the + // single-precision instructions? + // 2. The implementation of madd is slightly off, and + // the implementation of fmuls is very slightly off; + // likely nothing cares, though. FResult_Start, LoadSingle, LoadDouble, @@ -263,6 +218,7 @@ namespace IREmitter { InstLoc FoldXor(InstLoc Op1, InstLoc Op2); InstLoc FoldBranchCond(InstLoc Op1, InstLoc Op2); InstLoc FoldICmp(unsigned Opcode, InstLoc Op1, InstLoc Op2); + InstLoc FoldDoubleBiOp(unsigned Opcode, InstLoc Op1, InstLoc Op2); InstLoc FoldInterpreterFallback(InstLoc Op1, InstLoc Op2); diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit.cpp index 9009dc9e77..53ad8c8127 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit.cpp @@ -178,7 +178,7 @@ namespace CPUCompare CODE_SIZE = 1024*1024*8*8; jo.optimizeStack = true; - jo.enableBlocklink = true; // Speed boost, but not 100% safe + jo.enableBlocklink = false; // Speed boost, but not 100% safe #ifdef _M_X64 jo.enableFastMem = Core::GetStartupParameter().bUseFastMem; #else @@ -200,6 +200,13 @@ namespace CPUCompare asm_routines.Init(); } + void Jit64::ClearCache() + { + blocks.Clear(); + trampolines.ClearCodeSpace(); + ClearCodeSpace(); + } + void Jit64::Shutdown() { FreeCodeSpace(); diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit.h index 7672d1676c..dd3dc844a1 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/Jit.h @@ -146,11 +146,7 @@ public: void NotifyBreakpoint(u32 em_address, bool set); - void ClearCache() - { - blocks.Clear(); - trampolines.ClearCodeSpace(); - } + void ClearCache(); // Run!