Some WIP work on the JIT... only marginally usable at the moment, but I
wanted to back this up somewhere, and the people familiar with the JIT might have comments. There's a big comment in Jit64IL/IR.cpp with a high-level overview of what this is. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1724 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
1d0d106736
commit
68c451f008
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,322 @@
|
|||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#ifndef IR_H
|
||||
#define IR_H
|
||||
|
||||
#include "x64Emitter.h"
|
||||
#include <vector>
|
||||
|
||||
namespace IREmitter {
|
||||
|
||||
enum Opcode {
|
||||
Nop = 0,
|
||||
|
||||
// "Zero-operand" operators
|
||||
// Register load operators
|
||||
LoadGReg,
|
||||
LoadLink,
|
||||
LoadCR,
|
||||
LoadCarry,
|
||||
LoadCTR,
|
||||
LoadMSR,
|
||||
|
||||
// Unary operators
|
||||
// Integer unary operators
|
||||
SExt8,
|
||||
SExt16,
|
||||
BSwap32,
|
||||
BSwap16,
|
||||
Load8, // These loads zext
|
||||
Load16,
|
||||
Load32,
|
||||
// Branches
|
||||
BranchUncond,
|
||||
// Register store operators
|
||||
StoreGReg,
|
||||
StoreCR,
|
||||
StoreLink,
|
||||
StoreCarry,
|
||||
StoreCTR,
|
||||
StoreMSR,
|
||||
// Arbitrary interpreter instruction
|
||||
InterpreterFallback,
|
||||
|
||||
// Binary operators
|
||||
// Commutative integer operators
|
||||
Add,
|
||||
Mul,
|
||||
And,
|
||||
Or,
|
||||
Xor,
|
||||
// Non-commutative integer operators
|
||||
Sub,
|
||||
Shl, // Note that shifts ignore bits above the bottom 5
|
||||
Shrl,
|
||||
Sarl,
|
||||
Rol,
|
||||
ICmpCRSigned, // CR for signed int compare
|
||||
ICmpCRUnsigned, // CR for unsigned int compare
|
||||
ICmpEq, // One if equal, zero otherwise
|
||||
ICmpUgt, // One if op1 > op2, zero otherwise
|
||||
// Memory store operators
|
||||
Store8,
|
||||
Store16,
|
||||
Store32,
|
||||
BranchCond,
|
||||
|
||||
// "Trinary" operators
|
||||
// FIXME: Need to change representation!
|
||||
//Select, // Equivalent to C "Op1 ? Op2 : Op3"
|
||||
|
||||
// Integer constants
|
||||
CInt16,
|
||||
CInt32,
|
||||
|
||||
// "Opcode" representing a register too far away to
|
||||
// reference directly; this is a size optimization
|
||||
Tramp,
|
||||
// "Opcode"s representing the start and end
|
||||
BlockStart, BlockEnd
|
||||
};
|
||||
|
||||
typedef unsigned Inst;
|
||||
typedef Inst* InstLoc;
|
||||
|
||||
unsigned inline getOpcode(Inst i) {
|
||||
return i & 255;
|
||||
}
|
||||
|
||||
unsigned inline isImm(Inst i) {
|
||||
return getOpcode(i) >= CInt16 && getOpcode(i) <= CInt32;
|
||||
}
|
||||
|
||||
unsigned inline isUnary(Inst i) {
|
||||
return getOpcode(i) >= SExt8 && getOpcode(i) <= BSwap16;
|
||||
}
|
||||
|
||||
unsigned inline isBinary(Inst i) {
|
||||
return getOpcode(i) >= Add && getOpcode(i) <= ICmpCRUnsigned;
|
||||
}
|
||||
|
||||
unsigned inline isMemLoad(Inst i) {
|
||||
return getOpcode(i) >= Load8 && getOpcode(i) <= Load32;
|
||||
}
|
||||
|
||||
unsigned inline isMemStore(Inst i) {
|
||||
return getOpcode(i) >= Store8 && getOpcode(i) <= Store32;
|
||||
}
|
||||
|
||||
unsigned inline isRegLoad(Inst i) {
|
||||
return getOpcode(i) >= LoadGReg && getOpcode(i) <= LoadCR;
|
||||
}
|
||||
|
||||
unsigned inline isRegStore(Inst i) {
|
||||
return getOpcode(i) >= LoadGReg && getOpcode(i) <= LoadCR;
|
||||
}
|
||||
|
||||
unsigned inline isBranch(Inst i) {
|
||||
return getOpcode(i) >= BranchUncond &&
|
||||
getOpcode(i) <= BranchCond;
|
||||
}
|
||||
|
||||
unsigned inline isInterpreterFallback(Inst i) {
|
||||
return getOpcode(i) == InterpreterFallback;
|
||||
}
|
||||
|
||||
InstLoc inline getOp1(InstLoc i) {
|
||||
return i - 1 - ((*i >> 8) & 255);
|
||||
}
|
||||
|
||||
InstLoc inline getOp2(InstLoc i) {
|
||||
return i - 1 - ((*i >> 16) & 255);
|
||||
}
|
||||
|
||||
class IRBuilder {
|
||||
InstLoc EmitZeroOp(unsigned Opcode, unsigned extra);
|
||||
InstLoc EmitUOp(unsigned OpCode, InstLoc Op1,
|
||||
unsigned extra = 0);
|
||||
InstLoc EmitBiOp(unsigned OpCode, InstLoc Op1, InstLoc Op2);
|
||||
|
||||
InstLoc FoldAdd(InstLoc Op1, InstLoc Op2);
|
||||
InstLoc FoldAnd(InstLoc Op1, InstLoc Op2);
|
||||
InstLoc FoldOr(InstLoc Op1, InstLoc Op2);
|
||||
InstLoc FoldRol(InstLoc Op1, InstLoc Op2);
|
||||
InstLoc FoldShl(InstLoc Op1, InstLoc Op2);
|
||||
InstLoc FoldShrl(InstLoc Op1, InstLoc Op2);
|
||||
InstLoc FoldXor(InstLoc Op1, InstLoc Op2);
|
||||
|
||||
InstLoc FoldInterpreterFallback(InstLoc Op1, InstLoc Op2);
|
||||
|
||||
InstLoc FoldZeroOp(unsigned Opcode, unsigned extra);
|
||||
InstLoc FoldUOp(unsigned OpCode, InstLoc Op1,
|
||||
unsigned extra = 0);
|
||||
InstLoc FoldBiOp(unsigned OpCode, InstLoc Op1, InstLoc Op2);
|
||||
|
||||
public:
|
||||
InstLoc EmitIntConst(unsigned value);
|
||||
InstLoc EmitStoreLink(InstLoc val) {
|
||||
return FoldUOp(StoreLink, val);
|
||||
}
|
||||
InstLoc EmitBranchUncond(InstLoc val) {
|
||||
return FoldUOp(BranchUncond, val);
|
||||
}
|
||||
InstLoc EmitBranchCond(InstLoc check, InstLoc dest) {
|
||||
return FoldBiOp(BranchCond, check, dest);
|
||||
}
|
||||
InstLoc EmitLoadCR(unsigned crreg) {
|
||||
return FoldZeroOp(LoadCR, crreg);
|
||||
}
|
||||
InstLoc EmitStoreCR(InstLoc value, unsigned crreg) {
|
||||
return FoldUOp(StoreCR, value, crreg);
|
||||
}
|
||||
InstLoc EmitLoadLink() {
|
||||
return FoldZeroOp(LoadLink, 0);
|
||||
}
|
||||
InstLoc EmitLoadMSR() {
|
||||
return FoldZeroOp(LoadMSR, 0);
|
||||
}
|
||||
InstLoc EmitStoreMSR(InstLoc val) {
|
||||
return FoldUOp(StoreMSR, val);
|
||||
}
|
||||
InstLoc EmitLoadGReg(unsigned reg) {
|
||||
return FoldZeroOp(LoadGReg, reg);
|
||||
}
|
||||
InstLoc EmitStoreGReg(InstLoc value, unsigned reg) {
|
||||
return FoldUOp(StoreGReg, value, reg);
|
||||
}
|
||||
InstLoc EmitAnd(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(And, op1, op2);
|
||||
}
|
||||
InstLoc EmitXor(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(Xor, op1, op2);
|
||||
}
|
||||
InstLoc EmitSub(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(Sub, op1, op2);
|
||||
}
|
||||
InstLoc EmitOr(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(Or, op1, op2);
|
||||
}
|
||||
InstLoc EmitAdd(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(Add, op1, op2);
|
||||
}
|
||||
InstLoc EmitMul(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(Mul, op1, op2);
|
||||
}
|
||||
InstLoc EmitRol(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(Rol, op1, op2);
|
||||
}
|
||||
InstLoc EmitShl(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(Shl, op1, op2);
|
||||
}
|
||||
InstLoc EmitShrl(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(Shrl, op1, op2);
|
||||
}
|
||||
InstLoc EmitSarl(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(Sarl, op1, op2);
|
||||
}
|
||||
InstLoc EmitLoadCTR() {
|
||||
return FoldZeroOp(LoadCTR, 0);
|
||||
}
|
||||
InstLoc EmitStoreCTR(InstLoc op1) {
|
||||
return FoldUOp(StoreCTR, op1);
|
||||
}
|
||||
InstLoc EmitICmpEq(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(ICmpEq, op1, op2);
|
||||
}
|
||||
InstLoc EmitICmpUgt(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(ICmpUgt, op1, op2);
|
||||
}
|
||||
InstLoc EmitLoad8(InstLoc op1) {
|
||||
return FoldUOp(Load8, op1);
|
||||
}
|
||||
InstLoc EmitLoad16(InstLoc op1) {
|
||||
return FoldUOp(Load16, op1);
|
||||
}
|
||||
InstLoc EmitLoad32(InstLoc op1) {
|
||||
return FoldUOp(Load32, op1);
|
||||
}
|
||||
InstLoc EmitStore8(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(Store8, op1, op2);
|
||||
}
|
||||
InstLoc EmitStore16(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(Store16, op1, op2);
|
||||
}
|
||||
InstLoc EmitStore32(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(Store32, op1, op2);
|
||||
}
|
||||
InstLoc EmitSExt16(InstLoc op1) {
|
||||
return FoldUOp(SExt16, op1);
|
||||
}
|
||||
InstLoc EmitSExt8(InstLoc op1) {
|
||||
return FoldUOp(SExt8, op1);
|
||||
}
|
||||
InstLoc EmitICmpCRSigned(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(ICmpCRSigned, op1, op2);
|
||||
}
|
||||
InstLoc EmitICmpCRUnsigned(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(ICmpCRUnsigned, op1, op2);
|
||||
}
|
||||
InstLoc EmitInterpreterFallback(InstLoc op1, InstLoc op2) {
|
||||
return FoldBiOp(InterpreterFallback, op1, op2);
|
||||
}
|
||||
InstLoc EmitStoreCarry(InstLoc op1) {
|
||||
return FoldUOp(StoreCarry, op1);
|
||||
}
|
||||
|
||||
void StartBackPass() { curReadPtr = &InstList[InstList.size()]; }
|
||||
void StartForwardPass() { curReadPtr = &InstList[0]; }
|
||||
InstLoc ReadForward() { return curReadPtr++; }
|
||||
InstLoc ReadBackward() { return --curReadPtr; }
|
||||
InstLoc getFirstInst() { return &InstList[0]; }
|
||||
unsigned getNumInsts() { return InstList.size(); }
|
||||
unsigned ReadInst(InstLoc I) { return *I; }
|
||||
unsigned GetImmValue(InstLoc I);
|
||||
|
||||
void Reset() {
|
||||
InstList.clear();
|
||||
InstList.reserve(100000);
|
||||
for (unsigned i = 0; i < 32; i++) {
|
||||
GRegCache[i] = 0;
|
||||
GRegCacheStore[i] = 0;
|
||||
}
|
||||
CarryCache = 0;
|
||||
CarryCacheStore = 0;
|
||||
for (unsigned i = 0; i < 8; i++) {
|
||||
CRCache[i] = 0;
|
||||
CRCacheStore[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
IRBuilder() { Reset(); }
|
||||
|
||||
private:
|
||||
std::vector<Inst> InstList; // FIXME: We must ensure this is
|
||||
// continuous!
|
||||
std::vector<unsigned> ConstList;
|
||||
InstLoc curReadPtr;
|
||||
InstLoc GRegCache[32];
|
||||
InstLoc GRegCacheStore[32];
|
||||
InstLoc CarryCache;
|
||||
InstLoc CarryCacheStore;
|
||||
InstLoc CRCache[8];
|
||||
InstLoc CRCacheStore[8];
|
||||
};
|
||||
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,528 @@
|
|||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "Common.h"
|
||||
#include "x64Emitter.h"
|
||||
#include "ABI.h"
|
||||
#include "Thunk.h"
|
||||
#include "../../HLE/HLE.h"
|
||||
#include "../../Core.h"
|
||||
#include "../../PatchEngine.h"
|
||||
#include "../../CoreTiming.h"
|
||||
#include "../../Debugger/Debugger_BreakPoints.h"
|
||||
#include "../PowerPC.h"
|
||||
#include "../Profiler.h"
|
||||
#include "../PPCTables.h"
|
||||
#include "../PPCAnalyst.h"
|
||||
#include "../../HW/Memmap.h"
|
||||
#include "../../HW/GPFifo.h"
|
||||
#include "Jit.h"
|
||||
#include "JitAsm.h"
|
||||
#include "JitCache.h"
|
||||
#include "JitRegCache.h"
|
||||
|
||||
using namespace Gen;
|
||||
using namespace PowerPC;
|
||||
|
||||
extern int blocksExecuted;
|
||||
|
||||
// Dolphin's PowerPC->x86 JIT dynamic recompiler
|
||||
// (Nearly) all code by ector (hrydgard)
|
||||
// Features:
|
||||
// * x86 & x64 support, lots of shared code.
|
||||
// * Basic block linking
|
||||
// * Fast dispatcher
|
||||
|
||||
// Unfeatures:
|
||||
// * Does not recompile all instructions - sometimes falls back to inserting a CALL to the corresponding JIT function.
|
||||
|
||||
// Various notes below
|
||||
|
||||
// Register allocation
|
||||
// RAX - Generic quicktemp register
|
||||
// RBX - point to base of memory map
|
||||
// RSI RDI R12 R13 R14 R15 - free for allocation
|
||||
// RCX RDX R8 R9 R10 R11 - allocate in emergencies. These need to be flushed before functions are called.
|
||||
// RSP - stack pointer, do not generally use, very dangerous
|
||||
// RBP - ?
|
||||
|
||||
// IMPORTANT:
|
||||
// Make sure that all generated code and all emulator state sits under the 2GB boundary so that
|
||||
// RIP addressing can be used easily. Windows will always allocate static code under the 2GB boundary.
|
||||
// Also make sure to use VirtualAlloc and specify EXECUTE permission.
|
||||
|
||||
// Open questions
|
||||
// * Should there be any statically allocated registers? r3, r4, r5, r8, r0 come to mind.. maybe sp
|
||||
// * Does it make sense to finish off the remaining non-jitted instructions? Seems we are hitting diminishing returns.
|
||||
// * Why is the FPU exception handling not working 100%? Several games still get corrupted floating point state.
|
||||
// This can even be seen in one homebrew Wii demo - RayTracer.elf
|
||||
|
||||
// Other considerations
|
||||
//
|
||||
// Many instructions have shorter forms for EAX. However, I believe their performance boost
|
||||
// will be as small to be negligble, so I haven't dirtied up the code with that. AMD recommends it in their
|
||||
// optimization manuals, though.
|
||||
//
|
||||
// We support block linking. Reserve space at the exits of every block for a full 5-byte jmp. Save 16-bit offsets
|
||||
// from the starts of each block, marking the exits so that they can be nicely patched at any time.
|
||||
//
|
||||
// Blocks do NOT use call/ret, they only jmp to each other and to the dispatcher when necessary.
|
||||
//
|
||||
// All blocks that can be precompiled will be precompiled. Code will be memory protected - any write will mark
|
||||
// the region as non-compilable, and all links to the page will be torn out and replaced with dispatcher jmps.
|
||||
//
|
||||
// Alternatively, icbi instruction SHOULD mark where we can't compile
|
||||
//
|
||||
// Seldom-happening events is handled by adding a decrement of a counter to all blr instructions (which are
|
||||
// expensive anyway since we need to return to dispatcher, except when they can be predicted).
|
||||
|
||||
// TODO: SERIOUS synchronization problem with the video plugin setting tokens and breakpoints in dual core mode!!!
|
||||
// Somewhat fixed by disabling idle skipping when certain interrupts are enabled
|
||||
// This is no permantent reliable fix
|
||||
// TODO: Zeldas go whacko when you hang the gfx thread
|
||||
|
||||
// Idea - Accurate exception handling
|
||||
// Compute register state at a certain instruction by running the JIT in "dry mode", and stopping at the right place.
|
||||
// Not likely to be done :P
|
||||
|
||||
|
||||
// Optimization Ideas -
|
||||
/*
|
||||
* Assume SP is in main RAM (in Wii mode too?) - partly done
|
||||
* Assume all floating point loads and double precision loads+stores are to/from main ram
|
||||
(single precision can be used in write gather pipe, specialized fast check added)
|
||||
* AMD only - use movaps instead of movapd when loading ps from memory?
|
||||
* HLE functions like floorf, sin, memcpy, etc - they can be much faster
|
||||
* ABI optimizations - drop F0-F13 on blr, for example. Watch out for context switching.
|
||||
CR2-CR4 are non-volatile, rest of CR is volatile -> dropped on blr.
|
||||
R5-R12 are volatile -> dropped on blr.
|
||||
* classic inlining across calls.
|
||||
|
||||
Low hanging fruit:
|
||||
stfd -- guaranteed in memory
|
||||
cmpl
|
||||
mulli
|
||||
stfs
|
||||
stwu
|
||||
lb/stzx
|
||||
|
||||
bcx - optimize!
|
||||
bcctr
|
||||
stfs
|
||||
psq_st
|
||||
addx
|
||||
orx
|
||||
rlwimix
|
||||
fcmpo
|
||||
DSP_UpdateARAMDMA
|
||||
lfd
|
||||
stwu
|
||||
cntlzwx
|
||||
bcctrx
|
||||
WriteBigEData
|
||||
|
||||
TODO
|
||||
lha
|
||||
srawx
|
||||
addic_rc
|
||||
addex
|
||||
subfcx
|
||||
subfex
|
||||
|
||||
fmaddx
|
||||
fmulx
|
||||
faddx
|
||||
fnegx
|
||||
frspx
|
||||
frsqrtex
|
||||
ps_sum0
|
||||
ps_muls0
|
||||
ps_adds1
|
||||
|
||||
*/
|
||||
|
||||
Jit64 jit;
|
||||
|
||||
int CODE_SIZE = 1024*1024*16;
|
||||
|
||||
namespace CPUCompare
|
||||
{
|
||||
extern u32 m_BlockStart;
|
||||
}
|
||||
|
||||
void Jit(u32 em_address)
|
||||
{
|
||||
jit.Jit(em_address);
|
||||
}
|
||||
|
||||
void Jit64::Init()
|
||||
{
|
||||
asm_routines.compareEnabled = ::Core::g_CoreStartupParameter.bRunCompareClient;
|
||||
if (Core::g_CoreStartupParameter.bJITUnlimitedCache)
|
||||
CODE_SIZE = 1024*1024*8*8;
|
||||
|
||||
jo.optimizeStack = true;
|
||||
jo.enableBlocklink = true; // Speed boost, but not 100% safe
|
||||
#ifdef _M_X64
|
||||
jo.enableFastMem = Core::GetStartupParameter().bUseFastMem;
|
||||
#else
|
||||
jo.enableFastMem = false;
|
||||
#endif
|
||||
jo.assumeFPLoadFromMem = true;
|
||||
jo.fpAccurateFlags = true;
|
||||
jo.optimizeGatherPipe = true;
|
||||
jo.fastInterrupts = false;
|
||||
jo.accurateSinglePrecision = false;
|
||||
|
||||
gpr.SetEmitter(this);
|
||||
fpr.SetEmitter(this);
|
||||
|
||||
trampolines.Init();
|
||||
AllocCodeSpace(CODE_SIZE);
|
||||
|
||||
blocks.Init();
|
||||
asm_routines.Init();
|
||||
}
|
||||
|
||||
void Jit64::Shutdown()
|
||||
{
|
||||
FreeCodeSpace();
|
||||
|
||||
blocks.Shutdown();
|
||||
trampolines.Shutdown();
|
||||
asm_routines.Shutdown();
|
||||
}
|
||||
|
||||
|
||||
void Jit64::WriteCallInterpreter(UGeckoInstruction inst)
|
||||
{
|
||||
gpr.Flush(FLUSH_ALL);
|
||||
fpr.Flush(FLUSH_ALL);
|
||||
if (js.isLastInstruction)
|
||||
{
|
||||
MOV(32, M(&PC), Imm32(js.compilerPC));
|
||||
MOV(32, M(&NPC), Imm32(js.compilerPC + 4));
|
||||
}
|
||||
Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst);
|
||||
ABI_CallFunctionC((void*)instr, inst.hex);
|
||||
if (js.isLastInstruction)
|
||||
{
|
||||
MOV(32, R(EAX), M(&NPC));
|
||||
WriteRfiExitDestInEAX();
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::unknown_instruction(UGeckoInstruction inst)
|
||||
{
|
||||
// CCPU::Break();
|
||||
PanicAlert("unknown_instruction %08x - Fix me ;)", inst.hex);
|
||||
}
|
||||
|
||||
void Jit64::Default(UGeckoInstruction _inst)
|
||||
{
|
||||
ibuild.EmitInterpreterFallback(
|
||||
ibuild.EmitIntConst(_inst.hex),
|
||||
ibuild.EmitIntConst(js.compilerPC));
|
||||
}
|
||||
|
||||
void Jit64::HLEFunction(UGeckoInstruction _inst)
|
||||
{
|
||||
gpr.Flush(FLUSH_ALL);
|
||||
fpr.Flush(FLUSH_ALL);
|
||||
ABI_CallFunctionCC((void*)&HLE::Execute, js.compilerPC, _inst.hex);
|
||||
MOV(32, R(EAX), M(&NPC));
|
||||
WriteExitDestInEAX(0);
|
||||
}
|
||||
|
||||
void Jit64::DoNothing(UGeckoInstruction _inst)
|
||||
{
|
||||
// Yup, just don't do anything.
|
||||
}
|
||||
|
||||
void Jit64::NotifyBreakpoint(u32 em_address, bool set)
|
||||
{
|
||||
int block_num = blocks.GetBlockNumberFromStartAddress(em_address);
|
||||
if (block_num >= 0)
|
||||
{
|
||||
blocks.DestroyBlock(block_num, false);
|
||||
}
|
||||
}
|
||||
|
||||
static const bool ImHereDebug = false;
|
||||
static const bool ImHereLog = false;
|
||||
static std::map<u32, int> been_here;
|
||||
|
||||
void ImHere()
|
||||
{
|
||||
static FILE *f = 0;
|
||||
if (ImHereLog) {
|
||||
if (!f)
|
||||
{
|
||||
#ifdef _M_X64
|
||||
f = fopen("log64.txt", "w");
|
||||
#else
|
||||
f = fopen("log32.txt", "w");
|
||||
#endif
|
||||
}
|
||||
fprintf(f, "%08x\n", PC);
|
||||
}
|
||||
if (been_here.find(PC) != been_here.end()) {
|
||||
been_here.find(PC)->second++;
|
||||
if ((been_here.find(PC)->second) & 1023)
|
||||
return;
|
||||
}
|
||||
LOG(DYNA_REC, "I'm here - PC = %08x , LR = %08x", PC, LR);
|
||||
printf("I'm here - PC = %08x , LR = %08x", PC, LR);
|
||||
been_here[PC] = 1;
|
||||
}
|
||||
|
||||
void Jit64::Cleanup()
|
||||
{
|
||||
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
|
||||
ABI_CallFunction((void *)&GPFifo::CheckGatherPipe);
|
||||
}
|
||||
|
||||
void Jit64::WriteExit(u32 destination, int exit_num)
|
||||
{
|
||||
Cleanup();
|
||||
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
|
||||
|
||||
//If nobody has taken care of this yet (this can be removed when all branches are done)
|
||||
JitBlock *b = js.curBlock;
|
||||
b->exitAddress[exit_num] = destination;
|
||||
b->exitPtrs[exit_num] = GetWritableCodePtr();
|
||||
|
||||
// Link opportunity!
|
||||
int block = blocks.GetBlockNumberFromStartAddress(destination);
|
||||
if (block >= 0 && jo.enableBlocklink)
|
||||
{
|
||||
// It exists! Joy of joy!
|
||||
JMP(blocks.GetBlock(block)->checkedEntry, true);
|
||||
b->linkStatus[exit_num] = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV(32, M(&PC), Imm32(destination));
|
||||
JMP(asm_routines.dispatcher, true);
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::WriteExitDestInEAX(int exit_num)
|
||||
{
|
||||
MOV(32, M(&PC), R(EAX));
|
||||
Cleanup();
|
||||
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
|
||||
JMP(asm_routines.dispatcher, true);
|
||||
}
|
||||
|
||||
void Jit64::WriteRfiExitDestInEAX()
|
||||
{
|
||||
MOV(32, M(&PC), R(EAX));
|
||||
Cleanup();
|
||||
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
|
||||
JMP(asm_routines.testExceptions, true);
|
||||
}
|
||||
|
||||
void Jit64::WriteExceptionExit(u32 exception)
|
||||
{
|
||||
Cleanup();
|
||||
OR(32, M(&PowerPC::ppcState.Exceptions), Imm32(exception));
|
||||
MOV(32, M(&PC), Imm32(js.compilerPC + 4));
|
||||
JMP(asm_routines.testExceptions, true);
|
||||
}
|
||||
|
||||
void STACKALIGN Jit64::Run()
|
||||
{
|
||||
CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode;
|
||||
pExecAddr();
|
||||
//Will return when PowerPC::state changes
|
||||
}
|
||||
|
||||
void Jit64::SingleStep()
|
||||
{
|
||||
// NOT USED, NOT TESTED, PROBABLY NOT WORKING YET
|
||||
// PanicAlert("Single");
|
||||
/*
|
||||
JitBlock temp_block;
|
||||
PPCAnalyst::CodeBuffer temp_codebuffer(1); // Only room for one instruction! Single step!
|
||||
const u8 *code = DoJit(PowerPC::ppcState.pc, &temp_codebuffer, &temp_block);
|
||||
CompiledCode pExecAddr = (CompiledCode)code;
|
||||
pExecAddr();*/
|
||||
}
|
||||
|
||||
void STACKALIGN Jit64::Jit(u32 em_address)
|
||||
{
|
||||
if (GetSpaceLeft() < 0x10000 || blocks.IsFull())
|
||||
{
|
||||
LOG(DYNA_REC, "JIT cache full - clearing.")
|
||||
if (Core::g_CoreStartupParameter.bJITUnlimitedCache)
|
||||
{
|
||||
PanicAlert("What? JIT cache still full - clearing.");
|
||||
}
|
||||
ClearCache();
|
||||
}
|
||||
int block_num = blocks.AllocateBlock(em_address);
|
||||
JitBlock *b = blocks.GetBlock(block_num);
|
||||
blocks.FinalizeBlock(block_num, jo.enableBlocklink, DoJit(em_address, &code_buffer, b));
|
||||
}
|
||||
|
||||
const u8* Jit64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitBlock *b)
|
||||
{
|
||||
Core::g_CoreStartupParameter.bJITLoadStoreOff = true;
|
||||
Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff = true;
|
||||
Core::g_CoreStartupParameter.bJITLoadStorePairedOff = true;
|
||||
Core::g_CoreStartupParameter.bJITFloatingPointOff = true;
|
||||
Core::g_CoreStartupParameter.bJITIntegerOff = true;
|
||||
Core::g_CoreStartupParameter.bJITPairedOff = true;
|
||||
Core::g_CoreStartupParameter.bJITSystemRegistersOff = true;
|
||||
Core::g_CoreStartupParameter.bJITBranchOff = true;
|
||||
if (em_address == 0)
|
||||
PanicAlert("ERROR : Trying to compile at 0. LR=%08x", LR);
|
||||
|
||||
int size;
|
||||
js.isLastInstruction = false;
|
||||
js.blockStart = em_address;
|
||||
js.fifoBytesThisBlock = 0;
|
||||
js.curBlock = b;
|
||||
js.blockSetsQuantizers = false;
|
||||
js.block_flags = 0;
|
||||
js.cancel = false;
|
||||
|
||||
//Analyze the block, collect all instructions it is made of (including inlining,
|
||||
//if that is enabled), reorder instructions for optimal performance, and join joinable instructions.
|
||||
PPCAnalyst::Flatten(em_address, &size, &js.st, &js.gpa, &js.fpa, code_buffer);
|
||||
PPCAnalyst::CodeOp *ops = code_buffer->codebuffer;
|
||||
|
||||
const u8 *start = AlignCode4(); //TODO: Test if this or AlignCode16 make a difference from GetCodePtr
|
||||
b->checkedEntry = start;
|
||||
b->runCount = 0;
|
||||
|
||||
// Downcount flag check. The last block decremented downcounter, and the flag should still be available.
|
||||
FixupBranch skip = J_CC(CC_NBE);
|
||||
MOV(32, M(&PC), Imm32(js.blockStart));
|
||||
JMP(asm_routines.doTiming, true); // downcount hit zero - go doTiming.
|
||||
SetJumpTarget(skip);
|
||||
|
||||
const u8 *normalEntry = GetCodePtr();
|
||||
js.normalEntry = (u8*)normalEntry;
|
||||
|
||||
if (ImHereDebug)
|
||||
ABI_CallFunction((void *)&ImHere); //Used to get a trace of the last few blocks before a crash, sometimes VERY useful
|
||||
|
||||
if (false && js.fpa.any)
|
||||
{
|
||||
//This block uses FPU - needs to add FP exception bailout
|
||||
TEST(32, M(&PowerPC::ppcState.msr), Imm32(1 << 13)); //Test FP enabled bit
|
||||
FixupBranch b1 = J_CC(CC_NZ);
|
||||
MOV(32, M(&PC), Imm32(js.blockStart));
|
||||
JMP(asm_routines.fpException, true);
|
||||
SetJumpTarget(b1);
|
||||
}
|
||||
|
||||
if (false && jo.fastInterrupts)
|
||||
{
|
||||
// This does NOT yet work.
|
||||
TEST(32, M(&PowerPC::ppcState.Exceptions), Imm32(0xFFFFFFFF));
|
||||
FixupBranch b1 = J_CC(CC_Z);
|
||||
MOV(32, M(&PC), Imm32(js.blockStart));
|
||||
JMP(asm_routines.testExceptions, true);
|
||||
SetJumpTarget(b1);
|
||||
}
|
||||
|
||||
// Conditionally add profiling code.
|
||||
if (Profiler::g_ProfileBlocks) {
|
||||
ADD(32, M(&b->runCount), Imm8(1));
|
||||
#ifdef _WIN32
|
||||
b->ticCounter.QuadPart = 0;
|
||||
b->ticStart.QuadPart = 0;
|
||||
b->ticStop.QuadPart = 0;
|
||||
#else
|
||||
//TODO
|
||||
#endif
|
||||
// get start tic
|
||||
PROFILER_QUERY_PERFORMACE_COUNTER(&b->ticStart);
|
||||
}
|
||||
|
||||
//Start up the register allocators
|
||||
//They use the information in gpa/fpa to preload commonly used registers.
|
||||
//gpr.Start(js.gpa);
|
||||
//fpr.Start(js.fpa);
|
||||
ibuild.Reset();
|
||||
|
||||
js.downcountAmount = js.st.numCycles + PatchEngine::GetSpeedhackCycles(em_address);
|
||||
js.blockSize = size;
|
||||
// Translate instructions
|
||||
for (int i = 0; i < (int)size; i++)
|
||||
{
|
||||
// gpr.Flush(FLUSH_ALL);
|
||||
// if (PPCTables::UsesFPU(_inst))
|
||||
// fpr.Flush(FLUSH_ALL);
|
||||
js.compilerPC = ops[i].address;
|
||||
js.op = &ops[i];
|
||||
js.instructionNumber = i;
|
||||
if (i == (int)size - 1)
|
||||
{
|
||||
// WARNING - cmp->branch merging will screw this up.
|
||||
js.isLastInstruction = true;
|
||||
js.next_inst = 0;
|
||||
if (Profiler::g_ProfileBlocks) {
|
||||
// CAUTION!!! push on stack regs you use, do your stuff, then pop
|
||||
PROFILER_VPUSH;
|
||||
// get end tic
|
||||
PROFILER_QUERY_PERFORMACE_COUNTER(&b->ticStop);
|
||||
// tic counter += (end tic - start tic)
|
||||
PROFILER_ADD_DIFF_LARGE_INTEGER(&b->ticCounter, &b->ticStop, &b->ticStart);
|
||||
PROFILER_VPOP;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// help peephole optimizations
|
||||
js.next_inst = ops[i + 1].inst;
|
||||
js.next_compilerPC = ops[i + 1].address;
|
||||
}
|
||||
|
||||
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
|
||||
{
|
||||
js.fifoBytesThisBlock -= 32;
|
||||
ABI_CallFunction(thunks.ProtectFunction((void *)&GPFifo::CheckGatherPipe, 0));
|
||||
}
|
||||
|
||||
// If starting from the breakpointed instruction, we don't break.
|
||||
if (em_address != ops[i].address && BreakPoints::IsAddressBreakPoint(ops[i].address))
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
if (!ops[i].skip)
|
||||
PPCTables::CompileInstruction(ops[i].inst);
|
||||
|
||||
gpr.SanityCheck();
|
||||
fpr.SanityCheck();
|
||||
if (js.cancel)
|
||||
break;
|
||||
}
|
||||
|
||||
WriteCode();
|
||||
|
||||
b->flags = js.block_flags;
|
||||
b->codeSize = (u32)(GetCodePtr() - normalEntry);
|
||||
b->originalSize = size;
|
||||
return normalEntry;
|
||||
}
|
|
@ -0,0 +1,299 @@
|
|||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
// ========================
|
||||
// See comments in Jit.cpp.
|
||||
// ========================
|
||||
|
||||
// Mystery: Capcom vs SNK 800aa278
|
||||
|
||||
// CR flags approach:
|
||||
// * Store that "N+Z flag contains CR0" or "S+Z flag contains CR3".
|
||||
// * All flag altering instructions flush this
|
||||
// * A flush simply does a conditional write to the appropriate CRx.
|
||||
// * If flag available, branch code can become absolutely trivial.
|
||||
|
||||
#ifndef _JIT_H
|
||||
#define _JIT_H
|
||||
|
||||
#include "../PPCAnalyst.h"
|
||||
#include "JitCache.h"
|
||||
#include "JitRegCache.h"
|
||||
#include "x64Emitter.h"
|
||||
#include "x64Analyzer.h"
|
||||
#include "IR.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
#include <windows.h>
|
||||
|
||||
#else
|
||||
|
||||
// A bit of a hack to get things building under linux. We manually fill in this structure as needed
|
||||
// from the real context.
|
||||
struct CONTEXT
|
||||
{
|
||||
#ifdef _M_X64
|
||||
u64 Rip;
|
||||
u64 Rax;
|
||||
#else
|
||||
u32 Eip;
|
||||
u32 Eax;
|
||||
#endif
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
class TrampolineCache : public Gen::XCodeBlock
|
||||
{
|
||||
public:
|
||||
void Init();
|
||||
void Shutdown();
|
||||
|
||||
const u8 *GetReadTrampoline(const InstructionInfo &info);
|
||||
const u8 *GetWriteTrampoline(const InstructionInfo &info);
|
||||
};
|
||||
|
||||
|
||||
class Jit64 : public Gen::XCodeBlock
|
||||
{
|
||||
private:
|
||||
struct JitState
|
||||
{
|
||||
u32 compilerPC;
|
||||
u32 next_compilerPC;
|
||||
u32 blockStart;
|
||||
bool cancel;
|
||||
UGeckoInstruction next_inst; // for easy peephole opt.
|
||||
int blockSize;
|
||||
int instructionNumber;
|
||||
int downcountAmount;
|
||||
int block_flags;
|
||||
|
||||
bool isLastInstruction;
|
||||
bool blockSetsQuantizers;
|
||||
bool forceUnsafeLoad;
|
||||
|
||||
int fifoBytesThisBlock;
|
||||
|
||||
PPCAnalyst::BlockStats st;
|
||||
PPCAnalyst::BlockRegStats gpa;
|
||||
PPCAnalyst::BlockRegStats fpa;
|
||||
PPCAnalyst::CodeOp *op;
|
||||
u8* normalEntry;
|
||||
|
||||
JitBlock *curBlock;
|
||||
};
|
||||
|
||||
struct JitOptions
|
||||
{
|
||||
bool optimizeStack;
|
||||
bool assumeFPLoadFromMem;
|
||||
bool enableBlocklink;
|
||||
bool fpAccurateFlags;
|
||||
bool enableFastMem;
|
||||
bool optimizeGatherPipe;
|
||||
bool fastInterrupts;
|
||||
bool accurateSinglePrecision;
|
||||
};
|
||||
|
||||
JitBlockCache blocks;
|
||||
TrampolineCache trampolines;
|
||||
GPRRegCache gpr;
|
||||
FPURegCache fpr;
|
||||
|
||||
// The default code buffer. We keep it around to not have to alloc/dealloc a
|
||||
// large chunk of memory for each recompiled block.
|
||||
PPCAnalyst::CodeBuffer code_buffer;
|
||||
|
||||
public:
|
||||
Jit64() : code_buffer(32000) {}
|
||||
~Jit64() {}
|
||||
|
||||
JitState js;
|
||||
JitOptions jo;
|
||||
IREmitter::IRBuilder ibuild;
|
||||
|
||||
// Initialization, etc
|
||||
|
||||
void Init();
|
||||
void Shutdown();
|
||||
|
||||
// Jit!
|
||||
|
||||
void Jit(u32 em_address);
|
||||
const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buffer, JitBlock *b);
|
||||
|
||||
JitBlockCache *GetBlockCache() { return &blocks; }
|
||||
|
||||
void NotifyBreakpoint(u32 em_address, bool set);
|
||||
|
||||
void ClearCache()
|
||||
{
|
||||
blocks.Clear();
|
||||
trampolines.ClearCodeSpace();
|
||||
}
|
||||
|
||||
// Run!
|
||||
|
||||
void Run();
|
||||
void SingleStep();
|
||||
|
||||
const u8 *BackPatch(u8 *codePtr, int accessType, u32 em_address, CONTEXT *ctx);
|
||||
|
||||
#define JIT_OPCODE 0
|
||||
|
||||
// Utilities for use by opcodes
|
||||
|
||||
void WriteExit(u32 destination, int exit_num);
|
||||
void WriteExitDestInEAX(int exit_num);
|
||||
void WriteExceptionExit(u32 exception);
|
||||
void WriteRfiExitDestInEAX();
|
||||
void WriteCallInterpreter(UGeckoInstruction _inst);
|
||||
void Cleanup();
|
||||
|
||||
void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false);
|
||||
void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0);
|
||||
void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset, bool signExtend = false);
|
||||
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset);
|
||||
|
||||
void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address);
|
||||
void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address);
|
||||
void GenerateCarry(Gen::X64Reg temp_reg);
|
||||
|
||||
void ForceSinglePrecisionS(Gen::X64Reg xmm);
|
||||
void ForceSinglePrecisionP(Gen::X64Reg xmm);
|
||||
void JitClearCA();
|
||||
void JitSetCA();
|
||||
void tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg));
|
||||
typedef u32 (*Operation)(u32 a, u32 b);
|
||||
void regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc = false, bool carry = false);
|
||||
void fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg));
|
||||
|
||||
void WriteCode();
|
||||
|
||||
// OPCODES
|
||||
void unknown_instruction(UGeckoInstruction _inst);
|
||||
void Default(UGeckoInstruction _inst);
|
||||
void DoNothing(UGeckoInstruction _inst);
|
||||
void HLEFunction(UGeckoInstruction _inst);
|
||||
|
||||
void DynaRunTable4(UGeckoInstruction _inst);
|
||||
void DynaRunTable19(UGeckoInstruction _inst);
|
||||
void DynaRunTable31(UGeckoInstruction _inst);
|
||||
void DynaRunTable59(UGeckoInstruction _inst);
|
||||
void DynaRunTable63(UGeckoInstruction _inst);
|
||||
|
||||
void addx(UGeckoInstruction inst);
|
||||
void orx(UGeckoInstruction inst);
|
||||
void xorx(UGeckoInstruction inst);
|
||||
void andx(UGeckoInstruction inst);
|
||||
void mulli(UGeckoInstruction inst);
|
||||
void mulhwux(UGeckoInstruction inst);
|
||||
void mullwx(UGeckoInstruction inst);
|
||||
void divwux(UGeckoInstruction inst);
|
||||
void srawix(UGeckoInstruction inst);
|
||||
void srawx(UGeckoInstruction inst);
|
||||
void addex(UGeckoInstruction inst);
|
||||
|
||||
void extsbx(UGeckoInstruction inst);
|
||||
void extshx(UGeckoInstruction inst);
|
||||
|
||||
void sc(UGeckoInstruction _inst);
|
||||
void rfi(UGeckoInstruction _inst);
|
||||
|
||||
void bx(UGeckoInstruction inst);
|
||||
void bclrx(UGeckoInstruction _inst);
|
||||
void bcctrx(UGeckoInstruction _inst);
|
||||
void bcx(UGeckoInstruction inst);
|
||||
|
||||
void mtspr(UGeckoInstruction inst);
|
||||
void mfspr(UGeckoInstruction inst);
|
||||
void mtmsr(UGeckoInstruction inst);
|
||||
void mfmsr(UGeckoInstruction inst);
|
||||
void mftb(UGeckoInstruction inst);
|
||||
void mtcrf(UGeckoInstruction inst);
|
||||
void mfcr(UGeckoInstruction inst);
|
||||
|
||||
void reg_imm(UGeckoInstruction inst);
|
||||
|
||||
void ps_sel(UGeckoInstruction inst);
|
||||
void ps_mr(UGeckoInstruction inst);
|
||||
void ps_sign(UGeckoInstruction inst); //aggregate
|
||||
void ps_arith(UGeckoInstruction inst); //aggregate
|
||||
void ps_mergeXX(UGeckoInstruction inst);
|
||||
void ps_maddXX(UGeckoInstruction inst);
|
||||
void ps_rsqrte(UGeckoInstruction inst);
|
||||
void ps_sum(UGeckoInstruction inst);
|
||||
void ps_muls(UGeckoInstruction inst);
|
||||
|
||||
void fp_arith_s(UGeckoInstruction inst);
|
||||
|
||||
void fcmpx(UGeckoInstruction inst);
|
||||
void fmrx(UGeckoInstruction inst);
|
||||
|
||||
void cmpXX(UGeckoInstruction inst);
|
||||
|
||||
void cntlzwx(UGeckoInstruction inst);
|
||||
|
||||
void lfs(UGeckoInstruction inst);
|
||||
void lfd(UGeckoInstruction inst);
|
||||
void stfd(UGeckoInstruction inst);
|
||||
void stfs(UGeckoInstruction inst);
|
||||
void stfsx(UGeckoInstruction inst);
|
||||
void psq_l(UGeckoInstruction inst);
|
||||
void psq_st(UGeckoInstruction inst);
|
||||
|
||||
void fmaddXX(UGeckoInstruction inst);
|
||||
void stX(UGeckoInstruction inst); //stw sth stb
|
||||
void lXz(UGeckoInstruction inst);
|
||||
void lha(UGeckoInstruction inst);
|
||||
void rlwinmx(UGeckoInstruction inst);
|
||||
void rlwimix(UGeckoInstruction inst);
|
||||
void rlwnmx(UGeckoInstruction inst);
|
||||
void negx(UGeckoInstruction inst);
|
||||
void slwx(UGeckoInstruction inst);
|
||||
void srwx(UGeckoInstruction inst);
|
||||
void dcbz(UGeckoInstruction inst);
|
||||
void lfsx(UGeckoInstruction inst);
|
||||
|
||||
void subfic(UGeckoInstruction inst);
|
||||
void subfcx(UGeckoInstruction inst);
|
||||
void subfx(UGeckoInstruction inst);
|
||||
void subfex(UGeckoInstruction inst);
|
||||
|
||||
void lbzx(UGeckoInstruction inst);
|
||||
void lwzx(UGeckoInstruction inst);
|
||||
void lhax(UGeckoInstruction inst);
|
||||
|
||||
void lwzux(UGeckoInstruction inst);
|
||||
|
||||
void stXx(UGeckoInstruction inst);
|
||||
|
||||
void lmw(UGeckoInstruction inst);
|
||||
void stmw(UGeckoInstruction inst);
|
||||
};
|
||||
|
||||
extern Jit64 jit;
|
||||
|
||||
void Jit(u32 em_address);
|
||||
|
||||
void ProfiledReJit();
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,277 @@
|
|||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#include "ABI.h"
|
||||
#include "x64Emitter.h"
|
||||
|
||||
#include "../../HW/Memmap.h"
|
||||
|
||||
#include "../PowerPC.h"
|
||||
#include "../../CoreTiming.h"
|
||||
#include "MemoryUtil.h"
|
||||
|
||||
#include "ABI.h"
|
||||
#include "Jit.h"
|
||||
#include "JitCache.h"
|
||||
|
||||
#include "../../HW/CPUCompare.h"
|
||||
#include "../../HW/GPFifo.h"
|
||||
#include "../../Core.h"
|
||||
#include "JitAsm.h"
|
||||
|
||||
using namespace Gen;
|
||||
int blocksExecuted;
|
||||
|
||||
static int temp32;
|
||||
|
||||
bool compareEnabled = false;
|
||||
|
||||
//TODO - make an option
|
||||
//#if _DEBUG
|
||||
static bool enableDebug = false;
|
||||
//#else
|
||||
// bool enableDebug = false;
|
||||
//#endif
|
||||
|
||||
static bool enableStatistics = false;
|
||||
|
||||
//GLOBAL STATIC ALLOCATIONS x86
|
||||
//EAX - ubiquitous scratch register - EVERYBODY scratches this
|
||||
|
||||
//GLOBAL STATIC ALLOCATIONS x64
|
||||
//EAX - ubiquitous scratch register - EVERYBODY scratches this
|
||||
//RBX - Base pointer of memory
|
||||
//R15 - Pointer to array of block pointers
|
||||
|
||||
AsmRoutineManager asm_routines;
|
||||
|
||||
// PLAN: no more block numbers - crazy opcodes just contain offset within
|
||||
// dynarec buffer
|
||||
// At this offset - 4, there is an int specifying the block number.
|
||||
|
||||
|
||||
void AsmRoutineManager::Generate()
|
||||
{
|
||||
enterCode = AlignCode16();
|
||||
ABI_PushAllCalleeSavedRegsAndAdjustStack();
|
||||
#ifndef _M_IX86
|
||||
// Two statically allocated registers.
|
||||
MOV(64, R(RBX), Imm64((u64)Memory::base));
|
||||
MOV(64, R(R15), Imm64((u64)jit.GetBlockCache()->GetCodePointers())); //It's below 2GB so 32 bits are good enough
|
||||
#endif
|
||||
|
||||
const u8 *outerLoop = GetCodePtr();
|
||||
ABI_CallFunction(reinterpret_cast<void *>(&CoreTiming::Advance));
|
||||
FixupBranch skipToRealDispatch = J(); //skip the sync and compare first time
|
||||
|
||||
dispatcher = GetCodePtr();
|
||||
//This is the place for CPUCompare!
|
||||
|
||||
//The result of slice decrementation should be in flags if somebody jumped here
|
||||
FixupBranch bail = J_CC(CC_S);
|
||||
SetJumpTarget(skipToRealDispatch);
|
||||
|
||||
dispatcherNoCheck = GetCodePtr();
|
||||
MOV(32, R(EAX), M(&PowerPC::ppcState.pc));
|
||||
dispatcherPcInEAX = GetCodePtr();
|
||||
#ifdef _M_IX86
|
||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(32, R(EBX), Imm32((u32)Memory::base));
|
||||
MOV(32, R(EAX), MComplex(EBX, EAX, SCALE_1, 0));
|
||||
#else
|
||||
MOV(32, R(EAX), MComplex(RBX, RAX, SCALE_1, 0));
|
||||
#endif
|
||||
TEST(32, R(EAX), Imm32(0xFC));
|
||||
FixupBranch notfound = J_CC(CC_NZ);
|
||||
BSWAP(32, EAX);
|
||||
//IDEA - we have 26 bits, why not just use offsets from base of code?
|
||||
if (enableStatistics)
|
||||
{
|
||||
ADD(32, M(&blocksExecuted), Imm8(1));
|
||||
}
|
||||
if (enableDebug)
|
||||
{
|
||||
ADD(32, M(&PowerPC::ppcState.DebugCount), Imm8(1));
|
||||
}
|
||||
//grab from list and jump to it
|
||||
#ifdef _M_IX86
|
||||
MOV(32, R(EDX), ImmPtr(jit.GetBlockCache()->GetCodePointers()));
|
||||
JMPptr(MComplex(EDX, EAX, 4, 0));
|
||||
#else
|
||||
JMPptr(MComplex(R15, RAX, 8, 0));
|
||||
#endif
|
||||
SetJumpTarget(notfound);
|
||||
|
||||
//Ok, no block, let's jit
|
||||
#ifdef _M_IX86
|
||||
ABI_AlignStack(4);
|
||||
PUSH(32, M(&PowerPC::ppcState.pc));
|
||||
CALL(reinterpret_cast<void *>(&Jit));
|
||||
ABI_RestoreStack(4);
|
||||
#else
|
||||
MOV(32, R(ABI_PARAM1), M(&PowerPC::ppcState.pc));
|
||||
CALL((void *)&Jit);
|
||||
#endif
|
||||
JMP(dispatcherNoCheck); // no point in special casing this
|
||||
|
||||
//FP blocks test for FPU available, jump here if false
|
||||
fpException = AlignCode4();
|
||||
MOV(32, R(EAX), M(&PC));
|
||||
MOV(32, M(&NPC), R(EAX));
|
||||
OR(32, M(&PowerPC::ppcState.Exceptions), Imm32(EXCEPTION_FPU_UNAVAILABLE));
|
||||
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions));
|
||||
MOV(32, R(EAX), M(&NPC));
|
||||
MOV(32, M(&PC), R(EAX));
|
||||
JMP(dispatcher);
|
||||
|
||||
SetJumpTarget(bail);
|
||||
doTiming = GetCodePtr();
|
||||
|
||||
ABI_CallFunction(reinterpret_cast<void *>(&CoreTiming::Advance));
|
||||
|
||||
testExceptions = GetCodePtr();
|
||||
TEST(32, M(&PowerPC::ppcState.Exceptions), Imm32(0xFFFFFFFF));
|
||||
FixupBranch skipExceptions = J_CC(CC_Z);
|
||||
MOV(32, R(EAX), M(&PC));
|
||||
MOV(32, M(&NPC), R(EAX));
|
||||
ABI_CallFunction(reinterpret_cast<void *>(&PowerPC::CheckExceptions));
|
||||
MOV(32, R(EAX), M(&NPC));
|
||||
MOV(32, M(&PC), R(EAX));
|
||||
SetJumpTarget(skipExceptions);
|
||||
|
||||
TEST(32, M((void*)&PowerPC::state), Imm32(0xFFFFFFFF));
|
||||
J_CC(CC_Z, outerLoop, true);
|
||||
|
||||
//Landing pad for drec space
|
||||
ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
||||
RET();
|
||||
|
||||
breakpointBailout = GetCodePtr();
|
||||
//Landing pad for drec space
|
||||
ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
||||
RET();
|
||||
|
||||
GenerateCommon();
|
||||
}
|
||||
|
||||
|
||||
void AsmRoutineManager::GenFifoWrite(int size)
|
||||
{
|
||||
// Assume value in ABI_PARAM1
|
||||
PUSH(ESI);
|
||||
if (size != 32)
|
||||
PUSH(EDX);
|
||||
BSWAP(size, ABI_PARAM1);
|
||||
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
|
||||
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
|
||||
if (size != 32) {
|
||||
MOV(32, R(EDX), R(ABI_PARAM1));
|
||||
MOV(size, MComplex(RAX, RSI, 1, 0), R(EDX));
|
||||
} else {
|
||||
MOV(size, MComplex(RAX, RSI, 1, 0), R(ABI_PARAM1));
|
||||
}
|
||||
ADD(32, R(ESI), Imm8(size >> 3));
|
||||
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
|
||||
if (size != 32)
|
||||
POP(EDX);
|
||||
POP(ESI);
|
||||
RET();
|
||||
}
|
||||
|
||||
void AsmRoutineManager::GenFifoFloatWrite()
|
||||
{
|
||||
// Assume value in XMM0
|
||||
PUSH(ESI);
|
||||
PUSH(EDX);
|
||||
MOVSS(M(&temp32), XMM0);
|
||||
MOV(32, R(EDX), M(&temp32));
|
||||
BSWAP(32, EDX);
|
||||
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
|
||||
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
|
||||
MOV(32, MComplex(RAX, RSI, 1, 0), R(EDX));
|
||||
ADD(32, R(ESI), Imm8(4));
|
||||
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
|
||||
POP(EDX);
|
||||
POP(ESI);
|
||||
RET();
|
||||
}
|
||||
|
||||
void AsmRoutineManager::GenFifoXmm64Write()
|
||||
{
|
||||
// Assume value in XMM0. Assume pre-byteswapped (unlike the others here!)
|
||||
PUSH(ESI);
|
||||
MOV(32, R(EAX), Imm32((u32)(u64)GPFifo::m_gatherPipe));
|
||||
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
|
||||
MOVQ_xmm(MComplex(RAX, RSI, 1, 0), XMM0);
|
||||
ADD(32, R(ESI), Imm8(8));
|
||||
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
|
||||
POP(ESI);
|
||||
RET();
|
||||
}
|
||||
|
||||
void AsmRoutineManager::GenerateCommon()
|
||||
{
|
||||
// USES_CR
|
||||
computeRc = AlignCode16();
|
||||
CMP(32, R(EAX), Imm8(0));
|
||||
FixupBranch pLesser = J_CC(CC_L);
|
||||
FixupBranch pGreater = J_CC(CC_G);
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x2)); // _x86Reg == 0
|
||||
RET();
|
||||
SetJumpTarget(pGreater);
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x4)); // _x86Reg > 0
|
||||
RET();
|
||||
SetJumpTarget(pLesser);
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[0]), Imm8(0x8)); // _x86Reg < 0
|
||||
RET();
|
||||
|
||||
fifoDirectWrite8 = AlignCode4();
|
||||
GenFifoWrite(8);
|
||||
fifoDirectWrite16 = AlignCode4();
|
||||
GenFifoWrite(16);
|
||||
fifoDirectWrite32 = AlignCode4();
|
||||
GenFifoWrite(32);
|
||||
fifoDirectWriteFloat = AlignCode4();
|
||||
GenFifoFloatWrite();
|
||||
fifoDirectWriteXmm64 = AlignCode4();
|
||||
GenFifoXmm64Write();
|
||||
|
||||
doReJit = AlignCode4();
|
||||
ABI_AlignStack(0);
|
||||
CALL(reinterpret_cast<void *>(&ProfiledReJit));
|
||||
ABI_RestoreStack(0);
|
||||
SUB(32, M(&CoreTiming::downcount), Imm8(0));
|
||||
JMP(dispatcher, true);
|
||||
|
||||
computeRcFp = AlignCode16();
|
||||
//CMPSD(R(XMM0), M(&zero),
|
||||
// TODO
|
||||
|
||||
// Fast write routines - special case the most common hardware write
|
||||
// TODO: use this.
|
||||
// Even in x86, the param values will be in the right registers.
|
||||
/*
|
||||
const u8 *fastMemWrite8 = AlignCode16();
|
||||
CMP(32, R(ABI_PARAM2), Imm32(0xCC008000));
|
||||
FixupBranch skip_fast_write = J_CC(CC_NE, false);
|
||||
MOV(32, EAX, M(&m_gatherPipeCount));
|
||||
MOV(8, MDisp(EAX, (u32)&m_gatherPipe), ABI_PARAM1);
|
||||
ADD(32, 1, M(&m_gatherPipeCount));
|
||||
RET();
|
||||
SetJumpTarget(skip_fast_write);
|
||||
CALL((void *)&Memory::Write_U8);*/
|
||||
}
|
|
@ -0,0 +1,88 @@
|
|||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#ifndef _JITASM_H
|
||||
#define _JITASM_H
|
||||
|
||||
#include "x64Emitter.h"
|
||||
|
||||
// In Dolphin, we don't use inline assembly. Instead, we generate all machine-near
|
||||
// code at runtime. In the case of fixed code like this, after writing it, we write
|
||||
// protect the memory, essentially making it work just like precompiled code.
|
||||
|
||||
// There are some advantages to this approach:
|
||||
// 1) No need to setup an external assembler in the build.
|
||||
// 2) Cross platform, as long as it's x86/x64.
|
||||
// 3) Can optimize code at runtime for the specific CPU model.
|
||||
// There aren't really any disadvantages other than having to maintain a x86 emitter,
|
||||
// which we have to do anyway :)
|
||||
//
|
||||
// To add a new asm routine, just add another const here, and add the code to Generate.
|
||||
// Also, possibly increase the size of the code buffer.
|
||||
|
||||
class AsmRoutineManager : public Gen::XCodeBlock
|
||||
{
|
||||
private:
|
||||
void Generate();
|
||||
void GenerateCommon();
|
||||
void GenFifoWrite(int size);
|
||||
void GenFifoFloatWrite();
|
||||
void GenFifoXmm64Write();
|
||||
|
||||
public:
|
||||
void Init() {
|
||||
AllocCodeSpace(8192);
|
||||
Generate();
|
||||
WriteProtect();
|
||||
}
|
||||
|
||||
void Shutdown() {
|
||||
FreeCodeSpace();
|
||||
}
|
||||
|
||||
|
||||
// Public generated functions. Just CALL(M((void*)func)) them.
|
||||
|
||||
const u8 *enterCode;
|
||||
|
||||
const u8 *dispatcher;
|
||||
const u8 *dispatcherNoCheck;
|
||||
const u8 *dispatcherPcInEAX;
|
||||
|
||||
const u8 *fpException;
|
||||
const u8 *computeRc;
|
||||
const u8 *computeRcFp;
|
||||
const u8 *testExceptions;
|
||||
const u8 *dispatchPcInEAX;
|
||||
const u8 *doTiming;
|
||||
|
||||
const u8 *fifoDirectWrite8;
|
||||
const u8 *fifoDirectWrite16;
|
||||
const u8 *fifoDirectWrite32;
|
||||
const u8 *fifoDirectWriteFloat;
|
||||
const u8 *fifoDirectWriteXmm64;
|
||||
|
||||
const u8 *breakpointBailout;
|
||||
|
||||
const u8 *doReJit;
|
||||
|
||||
bool compareEnabled;
|
||||
};
|
||||
|
||||
extern AsmRoutineManager asm_routines;
|
||||
|
||||
#endif
|
|
@ -0,0 +1,215 @@
|
|||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "Common.h"
|
||||
#include "disasm.h"
|
||||
#include "JitAsm.h"
|
||||
#include "../../HW/Memmap.h"
|
||||
|
||||
#include "x64Emitter.h"
|
||||
#include "ABI.h"
|
||||
#include "Thunk.h"
|
||||
#include "x64Analyzer.h"
|
||||
|
||||
#include "StringUtil.h"
|
||||
#include "Jit.h"
|
||||
|
||||
using namespace Gen;
|
||||
|
||||
extern u8 *trampolineCodePtr;
|
||||
|
||||
void BackPatchError(const std::string &text, u8 *codePtr, u32 emAddress) {
|
||||
u64 code_addr = (u64)codePtr;
|
||||
disassembler disasm;
|
||||
char disbuf[256];
|
||||
memset(disbuf, 0, 256);
|
||||
#ifdef _M_IX86
|
||||
disasm.disasm32(0, code_addr, codePtr, disbuf);
|
||||
#else
|
||||
disasm.disasm64(0, code_addr, codePtr, disbuf);
|
||||
#endif
|
||||
PanicAlert("%s\n\n"
|
||||
"Error encountered accessing emulated address %08x.\n"
|
||||
"Culprit instruction: \n%s\nat %08x%08x",
|
||||
text.c_str(), emAddress, disbuf, code_addr>>32, code_addr);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
void TrampolineCache::Init()
|
||||
{
|
||||
AllocCodeSpace(1024 * 1024);
|
||||
}
|
||||
|
||||
void TrampolineCache::Shutdown()
|
||||
{
|
||||
AllocCodeSpace(1024 * 1024);
|
||||
}
|
||||
|
||||
// Extremely simplistic - just generate the requested trampoline. May reuse them in the future.
|
||||
const u8 *TrampolineCache::GetReadTrampoline(const InstructionInfo &info)
|
||||
{
|
||||
if (GetSpaceLeft() < 1024)
|
||||
PanicAlert("Trampoline cache full");
|
||||
|
||||
X64Reg addrReg = (X64Reg)info.scaledReg;
|
||||
X64Reg dataReg = (X64Reg)info.regOperandReg;
|
||||
const u8 *trampoline = GetCodePtr();
|
||||
#ifdef _M_X64
|
||||
// It's a read. Easy.
|
||||
ABI_PushAllCallerSavedRegsAndAdjustStack();
|
||||
if (addrReg != ABI_PARAM1)
|
||||
MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg));
|
||||
if (info.displacement) {
|
||||
ADD(32, R(ABI_PARAM1), Imm32(info.displacement));
|
||||
}
|
||||
switch (info.operandSize) {
|
||||
case 4:
|
||||
CALL(thunks.ProtectFunction((void *)&Memory::Read_U32, 1));
|
||||
break;
|
||||
}
|
||||
ABI_PopAllCallerSavedRegsAndAdjustStack();
|
||||
MOV(32, R(dataReg), R(EAX));
|
||||
RET();
|
||||
#endif
|
||||
return trampoline;
|
||||
}
|
||||
|
||||
// Extremely simplistic - just generate the requested trampoline. May reuse them in the future.
|
||||
const u8 *TrampolineCache::GetWriteTrampoline(const InstructionInfo &info)
|
||||
{
|
||||
if (GetSpaceLeft() < 1024)
|
||||
PanicAlert("Trampoline cache full");
|
||||
|
||||
X64Reg addrReg = (X64Reg)info.scaledReg;
|
||||
X64Reg dataReg = (X64Reg)info.regOperandReg;
|
||||
if (dataReg != EAX)
|
||||
PanicAlert("Backpatch write - not through EAX");
|
||||
|
||||
const u8 *trampoline = GetCodePtr();
|
||||
|
||||
#ifdef _M_X64
|
||||
|
||||
// It's a write. Yay. Remember that we don't have to be super efficient since it's "just" a
|
||||
// hardware access - we can take shortcuts.
|
||||
//if (emAddress == 0xCC008000)
|
||||
// PanicAlert("caught a fifo write");
|
||||
CMP(32, R(addrReg), Imm32(0xCC008000));
|
||||
FixupBranch skip_fast = J_CC(CC_NE, false);
|
||||
MOV(32, R(ABI_PARAM1), R((X64Reg)dataReg));
|
||||
CALL((void*)asm_routines.fifoDirectWrite32);
|
||||
RET();
|
||||
SetJumpTarget(skip_fast);
|
||||
ABI_PushAllCallerSavedRegsAndAdjustStack();
|
||||
if (addrReg != ABI_PARAM1) {
|
||||
MOV(32, R(ABI_PARAM1), R((X64Reg)dataReg));
|
||||
MOV(32, R(ABI_PARAM2), R((X64Reg)addrReg));
|
||||
} else {
|
||||
MOV(32, R(ABI_PARAM2), R((X64Reg)addrReg));
|
||||
MOV(32, R(ABI_PARAM1), R((X64Reg)dataReg));
|
||||
}
|
||||
if (info.displacement) {
|
||||
ADD(32, R(ABI_PARAM2), Imm32(info.displacement));
|
||||
}
|
||||
switch (info.operandSize) {
|
||||
case 4:
|
||||
CALL(thunks.ProtectFunction((void *)&Memory::Write_U32, 2));
|
||||
break;
|
||||
}
|
||||
ABI_PopAllCallerSavedRegsAndAdjustStack();
|
||||
RET();
|
||||
#endif
|
||||
|
||||
return trampoline;
|
||||
}
|
||||
|
||||
|
||||
// This generates some fairly heavy trampolines, but:
|
||||
// 1) It's really necessary. We don't know anything about the context.
|
||||
// 2) It doesn't really hurt. Only instructions that access I/O will get these, and there won't be
|
||||
// that many of them in a typical program/game.
|
||||
const u8 *Jit64::BackPatch(u8 *codePtr, int accessType, u32 emAddress, CONTEXT *ctx)
|
||||
{
|
||||
#ifdef _M_X64
|
||||
if (!jit.IsInCodeSpace(codePtr))
|
||||
return 0; // this will become a regular crash real soon after this
|
||||
|
||||
InstructionInfo info;
|
||||
if (!DisassembleMov(codePtr, info, accessType)) {
|
||||
BackPatchError("BackPatch - failed to disassemble MOV instruction", codePtr, emAddress);
|
||||
}
|
||||
|
||||
/*
|
||||
if (info.isMemoryWrite) {
|
||||
if (!Memory::IsRAMAddress(emAddress, true)) {
|
||||
PanicAlert("Exception: Caught write to invalid address %08x", emAddress);
|
||||
return;
|
||||
}
|
||||
BackPatchError("BackPatch - determined that MOV is write, not yet supported and should have been caught before",
|
||||
codePtr, emAddress);
|
||||
}*/
|
||||
|
||||
if (info.operandSize != 4) {
|
||||
BackPatchError(StringFromFormat("BackPatch - no support for operand size %i", info.operandSize), codePtr, emAddress);
|
||||
}
|
||||
|
||||
if (info.otherReg != RBX)
|
||||
PanicAlert("BackPatch : Base reg not RBX."
|
||||
"\n\nAttempted to access %08x.", emAddress);
|
||||
|
||||
if (accessType == OP_ACCESS_WRITE)
|
||||
PanicAlert("BackPatch : Currently only supporting reads."
|
||||
"\n\nAttempted to write to %08x.", emAddress);
|
||||
|
||||
// In the first iteration, we assume that all accesses are 32-bit. We also only deal with reads.
|
||||
if (accessType == 0)
|
||||
{
|
||||
XEmitter emitter(codePtr);
|
||||
int bswapNopCount;
|
||||
// Check the following BSWAP for REX byte
|
||||
if ((codePtr[info.instructionSize] & 0xF0) == 0x40)
|
||||
bswapNopCount = 3;
|
||||
else
|
||||
bswapNopCount = 2;
|
||||
const u8 *trampoline = trampolines.GetReadTrampoline(info);
|
||||
emitter.CALL((void *)trampoline);
|
||||
emitter.NOP((int)info.instructionSize + bswapNopCount - 5);
|
||||
return codePtr;
|
||||
}
|
||||
else if (accessType == 1)
|
||||
{
|
||||
// TODO: special case FIFO writes. Also, support 32-bit mode.
|
||||
// Also, debug this so that it actually works correctly :P
|
||||
XEmitter emitter(codePtr - 2);
|
||||
// We know it's EAX so the BSWAP before will be two byte. Overwrite it.
|
||||
const u8 *trampoline = trampolines.GetWriteTrampoline(info);
|
||||
emitter.CALL((void *)trampoline);
|
||||
emitter.NOP((int)info.instructionSize - 3);
|
||||
if (info.instructionSize < 3)
|
||||
PanicAlert("instruction too small");
|
||||
// We entered here with a BSWAP-ed EAX. We'll have to swap it back.
|
||||
ctx->Rax = Common::swap32((u32)ctx->Rax);
|
||||
return codePtr - 2;
|
||||
}
|
||||
return 0;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
|
@ -0,0 +1,346 @@
|
|||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
// Enable define below to enable oprofile integration. For this to work,
|
||||
// it requires at least oprofile version 0.9.4, and changing the build
|
||||
// system to link the Dolphin executable against libopagent. Since the
|
||||
// dependency is a little inconvenient and this is possibly a slight
|
||||
// performance hit, it's not enabled by default, but it's useful for
|
||||
// locating performance issues.
|
||||
|
||||
#define OPROFILE_REPORT
|
||||
|
||||
#include "Common.h"
|
||||
#include "../../Core.h"
|
||||
#include "MemoryUtil.h"
|
||||
|
||||
#include "../../HW/Memmap.h"
|
||||
#include "../../CoreTiming.h"
|
||||
|
||||
#include "../PowerPC.h"
|
||||
#include "../PPCTables.h"
|
||||
#include "../PPCAnalyst.h"
|
||||
|
||||
#include "x64Emitter.h"
|
||||
#include "x64Analyzer.h"
|
||||
|
||||
#include "Jit.h"
|
||||
#include "JitCache.h"
|
||||
#include "JitAsm.h"
|
||||
|
||||
#include "disasm.h"
|
||||
|
||||
#ifdef OPROFILE_REPORT
|
||||
#include <opagent.h>
|
||||
#endif
|
||||
|
||||
#ifdef OPROFILE_REPORT
|
||||
op_agent_t agent;
|
||||
#endif
|
||||
|
||||
using namespace Gen;
|
||||
|
||||
#define INVALID_EXIT 0xFFFFFFFF
|
||||
|
||||
|
||||
bool JitBlock::ContainsAddress(u32 em_address)
|
||||
{
|
||||
// WARNING - THIS DOES NOT WORK WITH INLINING ENABLED.
|
||||
return (em_address >= originalAddress && em_address < originalAddress + originalSize);
|
||||
}
|
||||
|
||||
bool JitBlockCache::IsFull() const
|
||||
{
|
||||
return GetNumBlocks() >= MAX_NUM_BLOCKS - 1;
|
||||
}
|
||||
|
||||
void JitBlockCache::Init()
|
||||
{
|
||||
MAX_NUM_BLOCKS = 65536*2;
|
||||
if (Core::g_CoreStartupParameter.bJITUnlimitedCache)
|
||||
{
|
||||
MAX_NUM_BLOCKS = 65536*8;
|
||||
}
|
||||
|
||||
#ifdef OPROFILE_REPORT
|
||||
agent = op_open_agent();
|
||||
#endif
|
||||
blocks = new JitBlock[MAX_NUM_BLOCKS];
|
||||
blockCodePointers = new const u8*[MAX_NUM_BLOCKS];
|
||||
|
||||
Clear();
|
||||
}
|
||||
|
||||
void JitBlockCache::Shutdown()
|
||||
{
|
||||
delete [] blocks;
|
||||
delete [] blockCodePointers;
|
||||
blocks = 0;
|
||||
blockCodePointers = 0;
|
||||
num_blocks = 0;
|
||||
#ifdef OPROFILE_REPORT
|
||||
op_close_agent(agent);
|
||||
#endif
|
||||
}
|
||||
|
||||
// This clears the JIT cache. It's called from JitCache.cpp when the JIT cache
|
||||
// is full and when saving and loading states.
|
||||
void JitBlockCache::Clear()
|
||||
{
|
||||
Core::DisplayMessage("Cleared code cache.", 3000);
|
||||
// Is destroying the blocks really necessary?
|
||||
for (int i = 0; i < num_blocks; i++)
|
||||
{
|
||||
DestroyBlock(i, false);
|
||||
}
|
||||
links_to.clear();
|
||||
num_blocks = 0;
|
||||
memset(blockCodePointers, 0, sizeof(u8*)*MAX_NUM_BLOCKS);
|
||||
}
|
||||
|
||||
void JitBlockCache::DestroyBlocksWithFlag(BlockFlag death_flag)
|
||||
{
|
||||
for (int i = 0; i < num_blocks; i++)
|
||||
{
|
||||
if (blocks[i].flags & death_flag)
|
||||
{
|
||||
DestroyBlock(i, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void JitBlockCache::Reset()
|
||||
{
|
||||
Shutdown();
|
||||
Init();
|
||||
}
|
||||
|
||||
JitBlock *JitBlockCache::GetBlock(int no)
|
||||
{
|
||||
return &blocks[no];
|
||||
}
|
||||
|
||||
int JitBlockCache::GetNumBlocks() const
|
||||
{
|
||||
return num_blocks;
|
||||
}
|
||||
|
||||
bool JitBlockCache::RangeIntersect(int s1, int e1, int s2, int e2) const
|
||||
{
|
||||
// check if any endpoint is inside the other range
|
||||
if ((s1 >= s2 && s1 <= e2) ||
|
||||
(e1 >= s2 && e1 <= e2) ||
|
||||
(s2 >= s1 && s2 <= e1) ||
|
||||
(e2 >= s1 && e2 <= e1))
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
int JitBlockCache::AllocateBlock(u32 em_address)
|
||||
{
|
||||
JitBlock &b = blocks[num_blocks];
|
||||
b.invalid = false;
|
||||
b.originalAddress = em_address;
|
||||
b.originalFirstOpcode = Memory::ReadFast32(em_address);
|
||||
b.exitAddress[0] = INVALID_EXIT;
|
||||
b.exitAddress[1] = INVALID_EXIT;
|
||||
b.exitPtrs[0] = 0;
|
||||
b.exitPtrs[1] = 0;
|
||||
b.linkStatus[0] = false;
|
||||
b.linkStatus[1] = false;
|
||||
num_blocks++; //commit the current block
|
||||
return num_blocks - 1;
|
||||
}
|
||||
|
||||
void JitBlockCache::FinalizeBlock(int block_num, bool block_link, const u8 *code_ptr)
|
||||
{
|
||||
blockCodePointers[block_num] = code_ptr;
|
||||
JitBlock &b = blocks[block_num];
|
||||
Memory::WriteUnchecked_U32((JIT_OPCODE << 26) | block_num, blocks[block_num].originalAddress);
|
||||
if (block_link)
|
||||
{
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
if (b.exitAddress[i] != INVALID_EXIT)
|
||||
links_to.insert(std::pair<u32, int>(b.exitAddress[i], block_num));
|
||||
}
|
||||
|
||||
LinkBlock(block_num);
|
||||
LinkBlockExits(block_num);
|
||||
}
|
||||
|
||||
#ifdef OPROFILE_REPORT
|
||||
char buf[100];
|
||||
sprintf(buf, "EmuCode%x", b.originalAddress);
|
||||
const u8* blockStart = blockCodePointers[block_num];
|
||||
op_write_native_code(agent, buf, (uint64_t)blockStart,
|
||||
blockStart, b.codeSize);
|
||||
#endif
|
||||
}
|
||||
|
||||
const u8 **JitBlockCache::GetCodePointers()
|
||||
{
|
||||
return blockCodePointers;
|
||||
}
|
||||
|
||||
int JitBlockCache::GetBlockNumberFromStartAddress(u32 addr)
|
||||
{
|
||||
if (!blocks)
|
||||
return -1;
|
||||
u32 code = Memory::ReadFast32(addr);
|
||||
if ((code >> 26) == JIT_OPCODE)
|
||||
{
|
||||
// Jitted code.
|
||||
unsigned int block = code & 0x03FFFFFF;
|
||||
if (block >= (unsigned int)num_blocks) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (blocks[block].originalAddress != addr)
|
||||
{
|
||||
//_assert_msg_(DYNA_REC, 0, "GetBlockFromAddress %08x - No match - This is BAD", addr);
|
||||
return -1;
|
||||
}
|
||||
return block;
|
||||
}
|
||||
else
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
void JitBlockCache::GetBlockNumbersFromAddress(u32 em_address, std::vector<int> *block_numbers)
|
||||
{
|
||||
for (int i = 0; i < num_blocks; i++)
|
||||
if (blocks[i].ContainsAddress(em_address))
|
||||
block_numbers->push_back(i);
|
||||
}
|
||||
|
||||
u32 JitBlockCache::GetOriginalCode(u32 address)
|
||||
{
|
||||
int num = GetBlockNumberFromStartAddress(address);
|
||||
if (num == -1)
|
||||
return Memory::ReadUnchecked_U32(address);
|
||||
else
|
||||
return blocks[num].originalFirstOpcode;
|
||||
}
|
||||
|
||||
CompiledCode JitBlockCache::GetCompiledCodeFromBlock(int blockNumber)
|
||||
{
|
||||
return (CompiledCode)blockCodePointers[blockNumber];
|
||||
}
|
||||
|
||||
//Block linker
|
||||
//Make sure to have as many blocks as possible compiled before calling this
|
||||
//It's O(N), so it's fast :)
|
||||
//Can be faster by doing a queue for blocks to link up, and only process those
|
||||
//Should probably be done
|
||||
|
||||
void JitBlockCache::LinkBlockExits(int i)
|
||||
{
|
||||
JitBlock &b = blocks[i];
|
||||
if (b.invalid)
|
||||
{
|
||||
// This block is dead. Don't relink it.
|
||||
return;
|
||||
}
|
||||
for (int e = 0; e < 2; e++)
|
||||
{
|
||||
if (b.exitAddress[e] != INVALID_EXIT && !b.linkStatus[e])
|
||||
{
|
||||
int destinationBlock = GetBlockNumberFromStartAddress(b.exitAddress[e]);
|
||||
if (destinationBlock != -1)
|
||||
{
|
||||
XEmitter emit(b.exitPtrs[e]);
|
||||
emit.JMP(blocks[destinationBlock].checkedEntry, true);
|
||||
b.linkStatus[e] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
using namespace std;
|
||||
|
||||
void JitBlockCache::LinkBlock(int i)
|
||||
{
|
||||
LinkBlockExits(i);
|
||||
JitBlock &b = blocks[i];
|
||||
std::map<u32, int>::iterator iter;
|
||||
pair<multimap<u32, int>::iterator, multimap<u32, int>::iterator> ppp;
|
||||
// equal_range(b) returns pair<iterator,iterator> representing the range
|
||||
// of element with key b
|
||||
ppp = links_to.equal_range(b.originalAddress);
|
||||
if (ppp.first == ppp.second)
|
||||
return;
|
||||
for (multimap<u32, int>::iterator iter2 = ppp.first; iter2 != ppp.second; ++iter2) {
|
||||
// PanicAlert("Linking block %i to block %i", iter2->second, i);
|
||||
LinkBlockExits(iter2->second);
|
||||
}
|
||||
}
|
||||
|
||||
void JitBlockCache::DestroyBlock(int blocknum, bool invalidate)
|
||||
{
|
||||
u32 codebytes = (JIT_OPCODE << 26) | blocknum; //generate from i
|
||||
JitBlock &b = blocks[blocknum];
|
||||
b.invalid = 1;
|
||||
if (codebytes == Memory::ReadFast32(b.originalAddress))
|
||||
{
|
||||
//nobody has changed it, good
|
||||
Memory::WriteUnchecked_U32(b.originalFirstOpcode, b.originalAddress);
|
||||
}
|
||||
else if (!invalidate)
|
||||
{
|
||||
//PanicAlert("Detected code overwrite");
|
||||
//else, we may be in trouble, since we apparently know of this block but it's been
|
||||
//overwritten. We should have thrown it out before, on instruction cache invalidate or something.
|
||||
//Not ne cessarily bad though , if a game has simply thrown away a lot of code and is now using the space
|
||||
//for something else, then it's fine.
|
||||
LOG(MASTER_LOG, "WARNING - ClearCache detected code overwrite @ %08x", blocks[blocknum].originalAddress);
|
||||
}
|
||||
|
||||
// We don't unlink blocks, we just send anyone who tries to run them back to the dispatcher.
|
||||
// Not entirely ideal, but .. pretty good.
|
||||
|
||||
// TODO - make sure that the below stuff really is safe.
|
||||
|
||||
// Spurious entrances from previously linked blocks can only come through checkedEntry
|
||||
XEmitter emit((u8 *)b.checkedEntry);
|
||||
emit.MOV(32, M(&PC), Imm32(b.originalAddress));
|
||||
emit.JMP(asm_routines.dispatcher, true);
|
||||
|
||||
emit.SetCodePtr((u8 *)blockCodePointers[blocknum]);
|
||||
emit.MOV(32, M(&PC), Imm32(b.originalAddress));
|
||||
emit.JMP(asm_routines.dispatcher, true);
|
||||
}
|
||||
|
||||
|
||||
void JitBlockCache::InvalidateCodeRange(u32 address, u32 length)
|
||||
{
|
||||
if (!jit.jo.enableBlocklink)
|
||||
return;
|
||||
return;
|
||||
//This is slow but should be safe (zelda needs it for block linking)
|
||||
for (int i = 0; i < num_blocks; i++)
|
||||
{
|
||||
if (RangeIntersect(blocks[i].originalAddress, blocks[i].originalAddress + blocks[i].originalSize,
|
||||
address, address + length))
|
||||
{
|
||||
DestroyBlock(i, true);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,116 @@
|
|||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#ifndef _JITCACHE_H
|
||||
#define _JITCACHE_H
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "../Gekko.h"
|
||||
#include "../PPCAnalyst.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
enum BlockFlag
|
||||
{
|
||||
BLOCK_USE_GQR0 = 0x1, BLOCK_USE_GQR1 = 0x2, BLOCK_USE_GQR2 = 0x4, BLOCK_USE_GQR3 = 0x8,
|
||||
BLOCK_USE_GQR4 = 0x10, BLOCK_USE_GQR5 = 0x20, BLOCK_USE_GQR6 = 0x40, BLOCK_USE_GQR7 = 0x80,
|
||||
};
|
||||
|
||||
// TODO(ector) - optimize this struct for size
|
||||
struct JitBlock
|
||||
{
|
||||
u32 exitAddress[2]; // 0xFFFFFFFF == unknown
|
||||
u8 *exitPtrs[2]; // to be able to rewrite the exit jump
|
||||
bool linkStatus[2];
|
||||
|
||||
u32 originalAddress;
|
||||
u32 originalFirstOpcode; //to be able to restore
|
||||
u32 codeSize;
|
||||
u32 originalSize;
|
||||
int runCount; // for profiling.
|
||||
|
||||
#ifdef _WIN32
|
||||
// we don't really need to save start and stop
|
||||
// TODO (mb2): ticStart and ticStop -> "local var" mean "in block" ... low priority ;)
|
||||
LARGE_INTEGER ticStart; // for profiling - time.
|
||||
LARGE_INTEGER ticStop; // for profiling - time.
|
||||
LARGE_INTEGER ticCounter; // for profiling - time.
|
||||
#endif
|
||||
const u8 *checkedEntry;
|
||||
bool invalid;
|
||||
int flags;
|
||||
|
||||
bool ContainsAddress(u32 em_address);
|
||||
};
|
||||
|
||||
typedef void (*CompiledCode)();
|
||||
|
||||
class JitBlockCache
|
||||
{
|
||||
const u8 **blockCodePointers;
|
||||
JitBlock *blocks;
|
||||
int num_blocks;
|
||||
std::multimap<u32, int> links_to;
|
||||
int MAX_NUM_BLOCKS;
|
||||
|
||||
bool RangeIntersect(int s1, int e1, int s2, int e2) const;
|
||||
void LinkBlockExits(int i);
|
||||
void LinkBlock(int i);
|
||||
|
||||
public:
|
||||
JitBlockCache() {}
|
||||
|
||||
int AllocateBlock(u32 em_address);
|
||||
void FinalizeBlock(int block_num, bool block_link, const u8 *code_ptr);
|
||||
|
||||
void Clear();
|
||||
void Init();
|
||||
void Shutdown();
|
||||
void Reset();
|
||||
|
||||
bool IsFull() const;
|
||||
|
||||
// Code Cache
|
||||
JitBlock *GetBlock(int block_num);
|
||||
int GetNumBlocks() const;
|
||||
const u8 **GetCodePointers();
|
||||
|
||||
// Fast way to get a block. Only works on the first ppc instruction of a block.
|
||||
int GetBlockNumberFromStartAddress(u32 em_address);
|
||||
|
||||
// slower, but can get numbers from within blocks, not just the first instruction.
|
||||
// WARNING! WILL NOT WORK WITH INLINING ENABLED (not yet a feature but will be soon)
|
||||
// Returns a list of block numbers - only one block can start at a particular address, but they CAN overlap.
|
||||
// This one is slow so should only be used for one-shots from the debugger UI, not for anything during runtime.
|
||||
void GetBlockNumbersFromAddress(u32 em_address, std::vector<int> *block_numbers);
|
||||
|
||||
u32 GetOriginalCode(u32 address);
|
||||
CompiledCode GetCompiledCodeFromBlock(int blockNumber);
|
||||
|
||||
// DOES NOT WORK CORRECTLY WITH INLINING
|
||||
void InvalidateCodeRange(u32 em_address, u32 length);
|
||||
void DestroyBlock(int blocknum, bool invalidate);
|
||||
|
||||
// Not currently used
|
||||
void DestroyBlocksWithFlag(BlockFlag death_flag);
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,395 @@
|
|||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#include "../PowerPC.h"
|
||||
#include "../PPCTables.h"
|
||||
#include "../PPCAnalyst.h"
|
||||
#include "Jit.h"
|
||||
#include "JitCache.h"
|
||||
#include "JitAsm.h"
|
||||
#include "JitRegCache.h"
|
||||
|
||||
using namespace Gen;
|
||||
using namespace PowerPC;
|
||||
|
||||
void RegCache::Start(PPCAnalyst::BlockRegStats &stats)
|
||||
{
|
||||
for (int i = 0; i < NUMXREGS; i++)
|
||||
{
|
||||
xregs[i].free = true;
|
||||
xregs[i].dirty = false;
|
||||
xlocks[i] = false;
|
||||
}
|
||||
for (int i = 0; i < 32; i++)
|
||||
{
|
||||
regs[i].location = GetDefaultLocation(i);
|
||||
regs[i].away = false;
|
||||
}
|
||||
|
||||
// todo: sort to find the most popular regs
|
||||
/*
|
||||
int maxPreload = 2;
|
||||
for (int i = 0; i < 32; i++)
|
||||
{
|
||||
if (stats.numReads[i] > 2 || stats.numWrites[i] >= 2)
|
||||
{
|
||||
LoadToX64(i, true, false); //stats.firstRead[i] <= stats.firstWrite[i], false);
|
||||
maxPreload--;
|
||||
if (!maxPreload)
|
||||
break;
|
||||
}
|
||||
}*/
|
||||
//Find top regs - preload them (load bursts ain't bad)
|
||||
//But only preload IF written OR reads >= 3
|
||||
}
|
||||
|
||||
// these are powerpc reg indices
|
||||
void RegCache::Lock(int p1, int p2, int p3, int p4)
|
||||
{
|
||||
locks[p1] = true;
|
||||
if (p2 != 0xFF) locks[p2] = true;
|
||||
if (p3 != 0xFF) locks[p3] = true;
|
||||
if (p4 != 0xFF) locks[p4] = true;
|
||||
}
|
||||
|
||||
// these are x64 reg indices
|
||||
void RegCache::LockX(int x1, int x2, int x3, int x4)
|
||||
{
|
||||
if (xlocks[x1]) {
|
||||
PanicAlert("RegCache: x %i already locked!");
|
||||
}
|
||||
xlocks[x1] = true;
|
||||
if (x2 != 0xFF) xlocks[x2] = true;
|
||||
if (x3 != 0xFF) xlocks[x3] = true;
|
||||
if (x4 != 0xFF) xlocks[x4] = true;
|
||||
}
|
||||
|
||||
bool RegCache::IsFreeX(int xreg) const
|
||||
{
|
||||
return xregs[xreg].free && !xlocks[xreg];
|
||||
}
|
||||
|
||||
void RegCache::UnlockAll()
|
||||
{
|
||||
for (int i = 0; i < 32; i++)
|
||||
locks[i] = false;
|
||||
}
|
||||
|
||||
void RegCache::UnlockAllX()
|
||||
{
|
||||
for (int i = 0; i < NUMXREGS; i++)
|
||||
xlocks[i] = false;
|
||||
}
|
||||
|
||||
X64Reg RegCache::GetFreeXReg()
|
||||
{
|
||||
int aCount;
|
||||
const int *aOrder = GetAllocationOrder(aCount);
|
||||
for (int i = 0; i < aCount; i++)
|
||||
{
|
||||
X64Reg xr = (X64Reg)aOrder[i];
|
||||
if (!xlocks[xr] && xregs[xr].free)
|
||||
{
|
||||
return (X64Reg)xr;
|
||||
}
|
||||
}
|
||||
//Okay, not found :( Force grab one
|
||||
|
||||
//TODO - add a pass to grab xregs whose ppcreg is not used in the next 3 instructions
|
||||
for (int i = 0; i < aCount; i++)
|
||||
{
|
||||
X64Reg xr = (X64Reg)aOrder[i];
|
||||
if (xlocks[xr])
|
||||
continue;
|
||||
int preg = xregs[xr].ppcReg;
|
||||
if (!locks[preg])
|
||||
{
|
||||
StoreFromX64(preg);
|
||||
return xr;
|
||||
}
|
||||
}
|
||||
//Still no dice? Die!
|
||||
_assert_msg_(DYNA_REC, 0, "Regcache ran out of regs");
|
||||
return (X64Reg) -1;
|
||||
}
|
||||
|
||||
void RegCache::SaveState()
|
||||
{
|
||||
memcpy(saved_locks, locks, sizeof(locks));
|
||||
memcpy(saved_xlocks, xlocks, sizeof(xlocks));
|
||||
memcpy(saved_regs, regs, sizeof(regs));
|
||||
memcpy(saved_xregs, xregs, sizeof(xregs));
|
||||
}
|
||||
|
||||
void RegCache::LoadState()
|
||||
{
|
||||
memcpy(xlocks, saved_xlocks, sizeof(xlocks));
|
||||
memcpy(locks, saved_locks, sizeof(locks));
|
||||
memcpy(regs, saved_regs, sizeof(regs));
|
||||
memcpy(xregs, saved_xregs, sizeof(xregs));
|
||||
}
|
||||
|
||||
void RegCache::FlushR(X64Reg reg)
|
||||
{
|
||||
if (reg >= NUMXREGS)
|
||||
PanicAlert("Flushing non existent reg");
|
||||
if (!xregs[reg].free)
|
||||
{
|
||||
StoreFromX64(xregs[reg].ppcReg);
|
||||
}
|
||||
}
|
||||
|
||||
void RegCache::SanityCheck() const
|
||||
{
|
||||
for (int i = 0; i < 32; i++) {
|
||||
if (regs[i].away) {
|
||||
if (regs[i].location.IsSimpleReg()) {
|
||||
Gen::X64Reg simple = regs[i].location.GetSimpleReg();
|
||||
if (xlocks[simple]) {
|
||||
PanicAlert("%08x : PPC Reg %i is in locked x64 register %i", /*js.compilerPC*/ 0, i, regs[i].location.GetSimpleReg());
|
||||
}
|
||||
if (xregs[simple].ppcReg != i) {
|
||||
PanicAlert("%08x : Xreg/ppcreg mismatch");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RegCache::DiscardRegContentsIfCached(int preg)
|
||||
{
|
||||
if (regs[preg].away && regs[preg].location.IsSimpleReg())
|
||||
{
|
||||
xregs[regs[preg].location.GetSimpleReg()].free = true;
|
||||
xregs[regs[preg].location.GetSimpleReg()].dirty = false;
|
||||
regs[preg].away = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void GPRRegCache::SetImmediate32(int preg, u32 immValue)
|
||||
{
|
||||
DiscardRegContentsIfCached(preg);
|
||||
regs[preg].away = true;
|
||||
regs[preg].location = Imm32(immValue);
|
||||
}
|
||||
|
||||
void GPRRegCache::Start(PPCAnalyst::BlockRegStats &stats)
|
||||
{
|
||||
RegCache::Start(stats);
|
||||
}
|
||||
|
||||
void FPURegCache::Start(PPCAnalyst::BlockRegStats &stats)
|
||||
{
|
||||
RegCache::Start(stats);
|
||||
}
|
||||
|
||||
const int *GPRRegCache::GetAllocationOrder(int &count)
|
||||
{
|
||||
static const int allocationOrder[] =
|
||||
{
|
||||
#ifdef _M_X64
|
||||
#ifdef _WIN32
|
||||
RSI, RDI, R12, R13, R14, R8, R9, R10, R11 //, RCX
|
||||
#else
|
||||
RBP, R12, R13, R14, R8, R9, R10, R11, //, RCX
|
||||
#endif
|
||||
#elif _M_IX86
|
||||
ESI, EDI, EBX, EBP, EDX, ECX,
|
||||
#endif
|
||||
};
|
||||
count = sizeof(allocationOrder) / sizeof(const int);
|
||||
return allocationOrder;
|
||||
}
|
||||
|
||||
const int *FPURegCache::GetAllocationOrder(int &count)
|
||||
{
|
||||
static const int allocationOrder[] =
|
||||
{
|
||||
#ifdef _M_X64
|
||||
XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, XMM2, XMM3, XMM4, XMM5
|
||||
#elif _M_IX86
|
||||
XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
||||
#endif
|
||||
};
|
||||
count = sizeof(allocationOrder) / sizeof(int);
|
||||
return allocationOrder;
|
||||
}
|
||||
|
||||
OpArg GPRRegCache::GetDefaultLocation(int reg) const
|
||||
{
|
||||
return M(&ppcState.gpr[reg]);
|
||||
}
|
||||
|
||||
OpArg FPURegCache::GetDefaultLocation(int reg) const
|
||||
{
|
||||
return M(&ppcState.ps[reg][0]);
|
||||
}
|
||||
|
||||
void RegCache::KillImmediate(int preg)
|
||||
{
|
||||
if (regs[preg].away && regs[preg].location.IsImm())
|
||||
{
|
||||
LoadToX64(preg, true, true);
|
||||
}
|
||||
}
|
||||
|
||||
void GPRRegCache::LoadToX64(int i, bool doLoad, bool makeDirty)
|
||||
{
|
||||
PanicAlert("BADNESS!");
|
||||
|
||||
if (!regs[i].away && regs[i].location.IsImm())
|
||||
PanicAlert("Bad immedaite");
|
||||
|
||||
if (!regs[i].away || (regs[i].away && regs[i].location.IsImm()))
|
||||
{
|
||||
X64Reg xr = GetFreeXReg();
|
||||
if (xregs[xr].dirty) PanicAlert("Xreg already dirty");
|
||||
if (xlocks[xr]) PanicAlert("GetFreeXReg returned locked register");
|
||||
xregs[xr].free = false;
|
||||
xregs[xr].ppcReg = i;
|
||||
xregs[xr].dirty = makeDirty || regs[i].location.IsImm();
|
||||
OpArg newloc = ::Gen::R(xr);
|
||||
if (doLoad)
|
||||
emit->MOV(32, newloc, regs[i].location);
|
||||
for (int j = 0; j < 32; j++)
|
||||
{
|
||||
if (i != j && regs[j].location.IsSimpleReg() && regs[j].location.GetSimpleReg() == xr)
|
||||
{
|
||||
Crash();
|
||||
}
|
||||
}
|
||||
regs[i].away = true;
|
||||
regs[i].location = newloc;
|
||||
}
|
||||
else
|
||||
{
|
||||
// reg location must be simplereg; memory locations
|
||||
// and immediates are taken care of above.
|
||||
xregs[RX(i)].dirty |= makeDirty;
|
||||
}
|
||||
if (xlocks[RX(i)]) {
|
||||
PanicAlert("Seriously WTF, this reg should have been flushed");
|
||||
}
|
||||
}
|
||||
|
||||
void GPRRegCache::StoreFromX64(int i)
|
||||
{
|
||||
if (regs[i].away)
|
||||
{
|
||||
bool doStore;
|
||||
if (regs[i].location.IsSimpleReg())
|
||||
{
|
||||
X64Reg xr = RX(i);
|
||||
xregs[xr].free = true;
|
||||
xregs[xr].ppcReg = -1;
|
||||
doStore = xregs[xr].dirty;
|
||||
xregs[xr].dirty = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
//must be immediate - do nothing
|
||||
doStore = true;
|
||||
}
|
||||
OpArg newLoc = GetDefaultLocation(i);
|
||||
// if (doStore) //<-- Breaks JIT compilation
|
||||
emit->MOV(32, newLoc, regs[i].location);
|
||||
regs[i].location = newLoc;
|
||||
regs[i].away = false;
|
||||
}
|
||||
}
|
||||
|
||||
void FPURegCache::LoadToX64(int i, bool doLoad, bool makeDirty)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, !regs[i].location.IsImm(), "WTF - load - imm");
|
||||
if (!regs[i].away)
|
||||
{
|
||||
// Reg is at home in the memory register file. Let's pull it out.
|
||||
X64Reg xr = GetFreeXReg();
|
||||
_assert_msg_(DYNA_REC, xr >= 0 && xr < NUMXREGS, "WTF - load - invalid reg");
|
||||
xregs[xr].ppcReg = i;
|
||||
xregs[xr].free = false;
|
||||
xregs[xr].dirty = makeDirty;
|
||||
OpArg newloc = ::Gen::R(xr);
|
||||
if (doLoad)
|
||||
{
|
||||
if (!regs[i].location.IsImm() && (regs[i].location.offset & 0xF))
|
||||
{
|
||||
PanicAlert("WARNING - misaligned fp register location %i", i);
|
||||
}
|
||||
emit->MOVAPD(xr, regs[i].location);
|
||||
}
|
||||
regs[i].location = newloc;
|
||||
regs[i].away = true;
|
||||
} else {
|
||||
// There are no immediates in the FPR reg file, so we already had this in a register. Make dirty as necessary.
|
||||
xregs[RX(i)].dirty |= makeDirty;
|
||||
}
|
||||
}
|
||||
|
||||
void FPURegCache::StoreFromX64(int i)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, !regs[i].location.IsImm(), "WTF - store - imm");
|
||||
if (regs[i].away)
|
||||
{
|
||||
X64Reg xr = regs[i].location.GetSimpleReg();
|
||||
_assert_msg_(DYNA_REC, xr >= 0 && xr < NUMXREGS, "WTF - store - invalid reg");
|
||||
xregs[xr].free = true;
|
||||
xregs[xr].dirty = false;
|
||||
xregs[xr].ppcReg = -1;
|
||||
OpArg newLoc = GetDefaultLocation(i);
|
||||
emit->MOVAPD(newLoc, xr);
|
||||
regs[i].location = newLoc;
|
||||
regs[i].away = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
// _assert_msg_(DYNA_REC,0,"already stored");
|
||||
}
|
||||
}
|
||||
|
||||
void RegCache::Flush(FlushMode mode)
|
||||
{
|
||||
for (int i = 0; i < NUMXREGS; i++) {
|
||||
if (xlocks[i])
|
||||
PanicAlert("Somone forgot to unlock X64 reg %i.", i);
|
||||
}
|
||||
for (int i = 0; i < 32; i++)
|
||||
{
|
||||
if (locks[i])
|
||||
{
|
||||
PanicAlert("Somebody forgot to unlock PPC reg %i.", i);
|
||||
}
|
||||
if (regs[i].away)
|
||||
{
|
||||
if (regs[i].location.IsSimpleReg())
|
||||
{
|
||||
X64Reg xr = RX(i);
|
||||
StoreFromX64(i);
|
||||
xregs[xr].dirty = false;
|
||||
}
|
||||
else if (regs[i].location.IsImm())
|
||||
{
|
||||
StoreFromX64(i);
|
||||
}
|
||||
else
|
||||
{
|
||||
_assert_msg_(DYNA_REC,0,"Jit64 - Flush unhandled case, reg %i", i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,150 @@
|
|||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#ifndef _JITREGCACHE_H
|
||||
#define _JITREGCACHE_H
|
||||
|
||||
#include "x64Emitter.h"
|
||||
|
||||
using namespace Gen;
|
||||
enum FlushMode
|
||||
{
|
||||
FLUSH_ALL
|
||||
};
|
||||
|
||||
enum GrabMode
|
||||
{
|
||||
M_READ = 1,
|
||||
M_WRITE = 2,
|
||||
M_READWRITE = 3,
|
||||
};
|
||||
|
||||
struct PPCCachedReg
|
||||
{
|
||||
OpArg location;
|
||||
bool away; // value not in source register
|
||||
};
|
||||
|
||||
struct X64CachedReg
|
||||
{
|
||||
int ppcReg;
|
||||
bool dirty;
|
||||
bool free;
|
||||
};
|
||||
|
||||
typedef int XReg;
|
||||
typedef int PReg;
|
||||
|
||||
#ifdef _M_X64
|
||||
#define NUMXREGS 16
|
||||
#elif _M_IX86
|
||||
#define NUMXREGS 8
|
||||
#endif
|
||||
|
||||
class RegCache
|
||||
{
|
||||
private:
|
||||
bool locks[32];
|
||||
bool saved_locks[32];
|
||||
bool saved_xlocks[NUMXREGS];
|
||||
|
||||
protected:
|
||||
bool xlocks[NUMXREGS];
|
||||
PPCCachedReg regs[32];
|
||||
X64CachedReg xregs[NUMXREGS];
|
||||
|
||||
PPCCachedReg saved_regs[32];
|
||||
X64CachedReg saved_xregs[NUMXREGS];
|
||||
|
||||
void DiscardRegContentsIfCached(int preg);
|
||||
virtual const int *GetAllocationOrder(int &count) = 0;
|
||||
|
||||
XEmitter *emit;
|
||||
|
||||
public:
|
||||
virtual ~RegCache() {}
|
||||
virtual void Start(PPCAnalyst::BlockRegStats &stats) = 0;
|
||||
|
||||
void SetEmitter(XEmitter *emitter) {emit = emitter;}
|
||||
|
||||
void FlushR(X64Reg reg);
|
||||
void FlushR(X64Reg reg, X64Reg reg2) {FlushR(reg); FlushR(reg2);}
|
||||
void FlushLockX(X64Reg reg) {
|
||||
FlushR(reg);
|
||||
LockX(reg);
|
||||
}
|
||||
void FlushLockX(X64Reg reg1, X64Reg reg2) {
|
||||
FlushR(reg1); FlushR(reg2);
|
||||
LockX(reg1); LockX(reg2);
|
||||
}
|
||||
virtual void Flush(FlushMode mode);
|
||||
virtual void Flush(PPCAnalyst::CodeOp *op) {Flush(FLUSH_ALL);}
|
||||
void SanityCheck() const;
|
||||
void KillImmediate(int preg);
|
||||
|
||||
//TODO - instead of doload, use "read", "write"
|
||||
//read only will not set dirty flag
|
||||
virtual void LoadToX64(int preg, bool doLoad = true, bool makeDirty = true) = 0;
|
||||
virtual void StoreFromX64(int preg) = 0;
|
||||
|
||||
const OpArg &R(int preg) const {return regs[preg].location;}
|
||||
X64Reg RX(int preg) const
|
||||
{
|
||||
if (regs[preg].away && regs[preg].location.IsSimpleReg())
|
||||
return regs[preg].location.GetSimpleReg();
|
||||
PanicAlert("Not so simple - %i", preg);
|
||||
return (X64Reg)-1;
|
||||
}
|
||||
virtual OpArg GetDefaultLocation(int reg) const = 0;
|
||||
|
||||
// Register locking.
|
||||
void Lock(int p1, int p2=0xff, int p3=0xff, int p4=0xff);
|
||||
void LockX(int x1, int x2=0xff, int x3=0xff, int x4=0xff);
|
||||
void UnlockAll();
|
||||
void UnlockAllX();
|
||||
|
||||
bool IsFreeX(int xreg) const;
|
||||
|
||||
X64Reg GetFreeXReg();
|
||||
|
||||
void SaveState();
|
||||
void LoadState();
|
||||
};
|
||||
|
||||
class GPRRegCache : public RegCache
|
||||
{
|
||||
public:
|
||||
void Start(PPCAnalyst::BlockRegStats &stats);
|
||||
void LoadToX64(int preg, bool doLoad = true, bool makeDirty = true);
|
||||
void StoreFromX64(int preg);
|
||||
OpArg GetDefaultLocation(int reg) const;
|
||||
const int *GetAllocationOrder(int &count);
|
||||
void SetImmediate32(int preg, u32 immValue);
|
||||
};
|
||||
|
||||
|
||||
class FPURegCache : public RegCache
|
||||
{
|
||||
public:
|
||||
void Start(PPCAnalyst::BlockRegStats &stats);
|
||||
void LoadToX64(int preg, bool doLoad = true, bool makeDirty = true);
|
||||
void StoreFromX64(int preg);
|
||||
const int *GetAllocationOrder(int &count);
|
||||
OpArg GetDefaultLocation(int reg) const;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -0,0 +1,200 @@
|
|||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
#include "Common.h"
|
||||
#include "Thunk.h"
|
||||
|
||||
#include "../../Core.h"
|
||||
#include "../PowerPC.h"
|
||||
#include "../../CoreTiming.h"
|
||||
#include "../PPCTables.h"
|
||||
#include "x64Emitter.h"
|
||||
|
||||
#include "Jit.h"
|
||||
#include "JitRegCache.h"
|
||||
#include "JitCache.h"
|
||||
#include "JitAsm.h"
|
||||
|
||||
// The branches are known good, or at least reasonably good.
|
||||
// No need for a disable-mechanism.
|
||||
|
||||
// If defined, clears CR0 at blr and bl-s. If the assumption that
|
||||
// flags never carry over between functions holds, then the task for
|
||||
// an optimizer becomes much easier.
|
||||
|
||||
// #define ACID_TEST
|
||||
|
||||
// Zelda and many more games seem to pass the Acid Test.
|
||||
|
||||
using namespace Gen;
|
||||
|
||||
void Jit64::sc(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITBranchOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
|
||||
gpr.Flush(FLUSH_ALL);
|
||||
fpr.Flush(FLUSH_ALL);
|
||||
WriteExceptionExit(EXCEPTION_SYSCALL);
|
||||
}
|
||||
|
||||
void Jit64::rfi(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITBranchOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
|
||||
gpr.Flush(FLUSH_ALL);
|
||||
fpr.Flush(FLUSH_ALL);
|
||||
//Bits SRR1[0, 5-9, 16-23, 25-27, 30-31] are placed into the corresponding bits of the MSR.
|
||||
//MSR[13] is set to 0.
|
||||
const u32 mask = 0x87C0FF73;
|
||||
// MSR = (MSR & ~mask) | (SRR1 & mask);
|
||||
MOV(32, R(EAX), M(&MSR));
|
||||
MOV(32, R(ECX), M(&SRR1));
|
||||
AND(32, R(EAX), Imm32(~mask));
|
||||
AND(32, R(ECX), Imm32(mask));
|
||||
OR(32, R(EAX), R(ECX));
|
||||
// MSR &= 0xFFFDFFFF; //TODO: VERIFY
|
||||
AND(32, R(EAX), Imm32(0xFFFDFFFF));
|
||||
MOV(32, M(&MSR), R(EAX));
|
||||
// NPC = SRR0;
|
||||
MOV(32, R(EAX), M(&SRR0));
|
||||
WriteRfiExitDestInEAX();
|
||||
}
|
||||
|
||||
void Jit64::bx(UGeckoInstruction inst)
|
||||
{
|
||||
if (inst.LK)
|
||||
ibuild.EmitStoreLink(ibuild.EmitIntConst(js.compilerPC + 4));
|
||||
|
||||
u32 destination;
|
||||
if (inst.AA)
|
||||
destination = SignExt26(inst.LI << 2);
|
||||
else
|
||||
destination = js.compilerPC + SignExt26(inst.LI << 2);
|
||||
|
||||
ibuild.EmitBranchUncond(ibuild.EmitIntConst(destination));
|
||||
}
|
||||
|
||||
// TODO - optimize to hell and beyond
|
||||
// TODO - make nice easy to optimize special cases for the most common
|
||||
// variants of this instruction.
|
||||
void Jit64::bcx(UGeckoInstruction inst)
|
||||
{
|
||||
if (inst.LK)
|
||||
ibuild.EmitStoreLink(
|
||||
ibuild.EmitIntConst(js.compilerPC + 4));
|
||||
|
||||
IREmitter::InstLoc CRTest = 0, CTRTest = 0;
|
||||
if ((inst.BO & 16) == 0) // Test a CR bit
|
||||
{
|
||||
IREmitter::InstLoc CRReg = ibuild.EmitLoadCR(inst.BI >> 2);
|
||||
IREmitter::InstLoc CRCmp = ibuild.EmitIntConst(8 >> (inst.BI & 3));
|
||||
CRTest = ibuild.EmitAnd(CRReg, CRCmp);
|
||||
if (inst.BO & 8)
|
||||
CRTest = ibuild.EmitXor(CRTest, CRCmp);
|
||||
}
|
||||
|
||||
if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) {
|
||||
IREmitter::InstLoc c = ibuild.EmitLoadCTR();
|
||||
c = ibuild.EmitSub(c, ibuild.EmitIntConst(1));
|
||||
ibuild.EmitStoreCTR(c);
|
||||
}
|
||||
|
||||
if ((inst.BO & 4) == 0) {
|
||||
IREmitter::InstLoc c = ibuild.EmitLoadCTR();
|
||||
if (!(inst.BO & 2)) {
|
||||
CTRTest = ibuild.EmitICmpEq(c,
|
||||
ibuild.EmitIntConst(0));
|
||||
} else {
|
||||
CTRTest = c;
|
||||
}
|
||||
}
|
||||
|
||||
IREmitter::InstLoc Test = CRTest;
|
||||
if (CTRTest) {
|
||||
if (Test)
|
||||
Test = ibuild.EmitOr(Test, CTRTest);
|
||||
else
|
||||
Test = CTRTest;
|
||||
}
|
||||
|
||||
if (!Test) {
|
||||
PanicAlert("Unconditional conditional branch?!");
|
||||
}
|
||||
|
||||
u32 destination;
|
||||
if(inst.AA)
|
||||
destination = SignExt16(inst.BD << 2);
|
||||
else
|
||||
destination = js.compilerPC + SignExt16(inst.BD << 2);
|
||||
|
||||
ibuild.EmitBranchCond(Test, ibuild.EmitIntConst(destination));
|
||||
ibuild.EmitBranchUncond(ibuild.EmitIntConst(js.compilerPC + 4));
|
||||
}
|
||||
|
||||
void Jit64::bcctrx(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITBranchOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
|
||||
gpr.Flush(FLUSH_ALL);
|
||||
fpr.Flush(FLUSH_ALL);
|
||||
|
||||
// bool fastway = true;
|
||||
|
||||
if ((inst.BO & 16) == 0)
|
||||
{
|
||||
PanicAlert("Bizarro bcctrx %08x, not supported.", inst.hex);
|
||||
_assert_msg_(DYNA_REC, 0, "Bizarro bcctrx");
|
||||
/*
|
||||
fastway = false;
|
||||
MOV(32, M(&PC), Imm32(js.compilerPC+4));
|
||||
MOV(32, R(EAX), M(&CR));
|
||||
XOR(32, R(ECX), R(ECX));
|
||||
AND(32, R(EAX), Imm32(0x80000000 >> inst.BI));
|
||||
|
||||
CCFlags branch;
|
||||
if(inst.BO & 8)
|
||||
branch = CC_NZ;
|
||||
else
|
||||
branch = CC_Z;
|
||||
*/
|
||||
// TODO(ector): Why is this commented out?
|
||||
//SETcc(branch, R(ECX));
|
||||
// check for EBX
|
||||
//TEST(32, R(ECX), R(ECX));
|
||||
//linkEnd = J_CC(branch);
|
||||
}
|
||||
// NPC = CTR & 0xfffffffc;
|
||||
MOV(32, R(EAX), M(&CTR));
|
||||
if (inst.LK)
|
||||
MOV(32, M(&LR), Imm32(js.compilerPC + 4)); // LR = PC + 4;
|
||||
AND(32, R(EAX), Imm32(0xFFFFFFFC));
|
||||
WriteExitDestInEAX(0);
|
||||
}
|
||||
|
||||
|
||||
void Jit64::bclrx(UGeckoInstruction inst)
|
||||
{
|
||||
if (inst.hex == 0x4e800020) {
|
||||
ibuild.EmitBranchUncond(ibuild.EmitLoadLink());
|
||||
return;
|
||||
}
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
|
|
@ -0,0 +1,224 @@
|
|||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#include "Common.h"
|
||||
|
||||
#include "../../Core.h"
|
||||
#include "../PowerPC.h"
|
||||
#include "../PPCTables.h"
|
||||
#include "x64Emitter.h"
|
||||
|
||||
#include "Jit.h"
|
||||
#include "JitCache.h"
|
||||
#include "JitRegCache.h"
|
||||
|
||||
#define INSTRUCTION_START
|
||||
// #define INSTRUCTION_START Default(inst); return;
|
||||
|
||||
const u64 GC_ALIGNED16(psSignBits2[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL};
|
||||
const u64 GC_ALIGNED16(psAbsMask2[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
|
||||
const double GC_ALIGNED16(psOneOne2[2]) = {1.0, 1.0};
|
||||
|
||||
void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg))
|
||||
{
|
||||
fpr.Lock(d, a, b);
|
||||
if (d == a)
|
||||
{
|
||||
fpr.LoadToX64(d, true);
|
||||
(this->*op)(fpr.RX(d), fpr.R(b));
|
||||
}
|
||||
else if (d == b && reversible)
|
||||
{
|
||||
fpr.LoadToX64(d, true);
|
||||
(this->*op)(fpr.RX(d), fpr.R(a));
|
||||
}
|
||||
else if (a != d && b != d)
|
||||
{
|
||||
// Sources different from d, can use rather quick solution
|
||||
fpr.LoadToX64(d, !dupe);
|
||||
MOVSD(fpr.RX(d), fpr.R(a));
|
||||
(this->*op)(fpr.RX(d), fpr.R(b));
|
||||
}
|
||||
else if (b != d)
|
||||
{
|
||||
fpr.LoadToX64(d, !dupe);
|
||||
MOVSD(XMM0, fpr.R(b));
|
||||
MOVSD(fpr.RX(d), fpr.R(a));
|
||||
(this->*op)(fpr.RX(d), Gen::R(XMM0));
|
||||
}
|
||||
else // Other combo, must use two temps :(
|
||||
{
|
||||
MOVSD(XMM0, fpr.R(a));
|
||||
MOVSD(XMM1, fpr.R(b));
|
||||
fpr.LoadToX64(d, !dupe);
|
||||
(this->*op)(XMM0, Gen::R(XMM1));
|
||||
MOVSD(fpr.RX(d), Gen::R(XMM0));
|
||||
}
|
||||
if (dupe) {
|
||||
ForceSinglePrecisionS(fpr.RX(d));
|
||||
MOVDDUP(fpr.RX(d), fpr.R(d));
|
||||
}
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
void Jit64::fp_arith_s(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
bool dupe = inst.OPCD == 59;
|
||||
switch (inst.SUBOP5)
|
||||
{
|
||||
case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::DIVSD); break; //div
|
||||
case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::SUBSD); break; //sub
|
||||
case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, dupe, &XEmitter::ADDSD); break; //add
|
||||
case 23: //sel
|
||||
Default(inst);
|
||||
break;
|
||||
case 24: //res
|
||||
Default(inst);
|
||||
break;
|
||||
case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, dupe, &XEmitter::MULSD); break; //mul
|
||||
default:
|
||||
_assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!");
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::fmaddXX(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
|
||||
bool single_precision = inst.OPCD == 59;
|
||||
|
||||
int a = inst.FA;
|
||||
int b = inst.FB;
|
||||
int c = inst.FC;
|
||||
int d = inst.FD;
|
||||
|
||||
fpr.Lock(a, b, c, d);
|
||||
MOVSD(XMM0, fpr.R(a));
|
||||
switch (inst.SUBOP5)
|
||||
{
|
||||
case 28: //msub
|
||||
MULSD(XMM0, fpr.R(c));
|
||||
SUBSD(XMM0, fpr.R(b));
|
||||
break;
|
||||
case 29: //madd
|
||||
MULSD(XMM0, fpr.R(c));
|
||||
ADDSD(XMM0, fpr.R(b));
|
||||
break;
|
||||
case 30: //nmsub
|
||||
MULSD(XMM0, fpr.R(c));
|
||||
SUBSD(XMM0, fpr.R(b));
|
||||
XORPD(XMM0, M((void*)&psSignBits2));
|
||||
break;
|
||||
case 31: //nmadd
|
||||
MULSD(XMM0, fpr.R(c));
|
||||
ADDSD(XMM0, fpr.R(b));
|
||||
XORPD(XMM0, M((void*)&psSignBits2));
|
||||
break;
|
||||
}
|
||||
fpr.LoadToX64(d, false);
|
||||
//YES it is necessary to dupe the result :(
|
||||
//TODO : analysis - does the top reg get used? If so, dupe, if not, don't.
|
||||
if (single_precision) {
|
||||
ForceSinglePrecisionS(XMM0);
|
||||
MOVDDUP(fpr.RX(d), R(XMM0));
|
||||
} else {
|
||||
MOVSD(fpr.RX(d), R(XMM0));
|
||||
}
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
void Jit64::fmrx(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
int d = inst.FD;
|
||||
int b = inst.FB;
|
||||
fpr.LoadToX64(d, true); // we don't want to destroy the high bit
|
||||
MOVSD(fpr.RX(d), fpr.R(b));
|
||||
}
|
||||
|
||||
void Jit64::fcmpx(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
if (jo.fpAccurateFlags)
|
||||
{
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
bool ordered = inst.SUBOP10 == 32;
|
||||
/*
|
||||
double fa = rPS0(_inst.FA);
|
||||
double fb = rPS0(_inst.FB);
|
||||
u32 compareResult;
|
||||
|
||||
if(IsNAN(fa) || IsNAN(fb)) compareResult = 1;
|
||||
else if(fa < fb) compareResult = 8;
|
||||
else if(fa > fb) compareResult = 4;
|
||||
else compareResult = 2;
|
||||
|
||||
FPSCR.FPRF = compareResult;
|
||||
CR = (CR & (~(0xf0000000 >> (_inst.CRFD * 4)))) | (compareResult << ((7 - _inst.CRFD) * 4));
|
||||
*/
|
||||
int a = inst.FA;
|
||||
int b = inst.FB;
|
||||
int crf = inst.CRFD;
|
||||
int shift = crf * 4;
|
||||
//FPSCR
|
||||
//XOR(32,R(EAX),R(EAX));
|
||||
|
||||
fpr.Lock(a,b);
|
||||
if (a != b)
|
||||
fpr.LoadToX64(a, true);
|
||||
|
||||
// USES_CR
|
||||
if (ordered)
|
||||
COMISD(fpr.R(a).GetSimpleReg(), fpr.R(b));
|
||||
else
|
||||
UCOMISD(fpr.R(a).GetSimpleReg(), fpr.R(b));
|
||||
FixupBranch pLesser = J_CC(CC_B);
|
||||
FixupBranch pGreater = J_CC(CC_A);
|
||||
// _x86Reg == 0
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2));
|
||||
FixupBranch continue1 = J();
|
||||
// _x86Reg > 0
|
||||
SetJumpTarget(pGreater);
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4));
|
||||
FixupBranch continue2 = J();
|
||||
// _x86Reg < 0
|
||||
SetJumpTarget(pLesser);
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8));
|
||||
SetJumpTarget(continue1);
|
||||
SetJumpTarget(continue2);
|
||||
fpr.UnlockAll();
|
||||
}
|
|
@ -0,0 +1,520 @@
|
|||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#include "../../Core.h" // include "Common.h", "CoreParameter.h", SCoreStartupParameter
|
||||
#include "../PowerPC.h"
|
||||
#include "../PPCTables.h"
|
||||
#include "x64Emitter.h"
|
||||
|
||||
#include "Jit.h"
|
||||
#include "JitCache.h"
|
||||
#include "JitRegCache.h"
|
||||
#include "JitAsm.h"
|
||||
|
||||
// #define INSTRUCTION_START Default(inst); return;
|
||||
#define INSTRUCTION_START
|
||||
|
||||
static void ComputeRC(IREmitter::IRBuilder& ibuild,
|
||||
IREmitter::InstLoc val) {
|
||||
IREmitter::InstLoc res =
|
||||
ibuild.EmitICmpCRSigned(val, ibuild.EmitIntConst(0));
|
||||
ibuild.EmitStoreCR(res, 0);
|
||||
}
|
||||
|
||||
void Jit64::reg_imm(UGeckoInstruction inst)
|
||||
{
|
||||
int d = inst.RD, a = inst.RA, s = inst.RS;
|
||||
IREmitter::InstLoc val, test, c;
|
||||
switch (inst.OPCD)
|
||||
{
|
||||
case 14: //addi
|
||||
val = ibuild.EmitIntConst(inst.SIMM_16);
|
||||
if (a)
|
||||
val = ibuild.EmitAdd(ibuild.EmitLoadGReg(a), val);
|
||||
ibuild.EmitStoreGReg(val, d);
|
||||
break;
|
||||
case 15: //addis
|
||||
val = ibuild.EmitIntConst(inst.SIMM_16 << 16);
|
||||
if (a)
|
||||
val = ibuild.EmitAdd(ibuild.EmitLoadGReg(a), val);
|
||||
ibuild.EmitStoreGReg(val, d);
|
||||
break;
|
||||
case 24: //ori
|
||||
val = ibuild.EmitIntConst(inst.UIMM);
|
||||
val = ibuild.EmitOr(ibuild.EmitLoadGReg(s), val);
|
||||
ibuild.EmitStoreGReg(val, a);
|
||||
break;
|
||||
case 25: //oris
|
||||
val = ibuild.EmitIntConst(inst.UIMM << 16);
|
||||
val = ibuild.EmitOr(ibuild.EmitLoadGReg(s), val);
|
||||
ibuild.EmitStoreGReg(val, a);
|
||||
break;
|
||||
case 28: //andi
|
||||
val = ibuild.EmitIntConst(inst.UIMM);
|
||||
val = ibuild.EmitAnd(ibuild.EmitLoadGReg(s), val);
|
||||
ibuild.EmitStoreGReg(val, a);
|
||||
ComputeRC(ibuild, val);
|
||||
break;
|
||||
case 29: //andis
|
||||
val = ibuild.EmitIntConst(inst.UIMM << 16);
|
||||
val = ibuild.EmitAnd(ibuild.EmitLoadGReg(s), val);
|
||||
ibuild.EmitStoreGReg(val, a);
|
||||
ComputeRC(ibuild, val);
|
||||
break;
|
||||
case 26: //xori
|
||||
val = ibuild.EmitIntConst(inst.UIMM);
|
||||
val = ibuild.EmitXor(ibuild.EmitLoadGReg(s), val);
|
||||
ibuild.EmitStoreGReg(val, a);
|
||||
break;
|
||||
case 27: //xoris
|
||||
val = ibuild.EmitIntConst(inst.UIMM << 16);
|
||||
val = ibuild.EmitXor(ibuild.EmitLoadGReg(s), val);
|
||||
ibuild.EmitStoreGReg(val, a);
|
||||
break;
|
||||
case 12: //addic
|
||||
case 13: //addic_rc
|
||||
c = ibuild.EmitIntConst(inst.SIMM_16);
|
||||
val = ibuild.EmitAdd(ibuild.EmitLoadGReg(a), c);
|
||||
ibuild.EmitStoreGReg(val, d);
|
||||
test = ibuild.EmitICmpUgt(c, val);
|
||||
ibuild.EmitStoreCarry(test);
|
||||
if (inst.OPCD == 13)
|
||||
ComputeRC(ibuild, val);
|
||||
break;
|
||||
default:
|
||||
Default(inst);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::cmpXX(UGeckoInstruction inst)
|
||||
{
|
||||
IREmitter::InstLoc lhs, rhs, res;
|
||||
lhs = ibuild.EmitLoadGReg(inst.RA);
|
||||
if (inst.OPCD == 31) {
|
||||
rhs = ibuild.EmitLoadGReg(inst.RB);
|
||||
if (inst.SUBOP10 == 32) {
|
||||
res = ibuild.EmitICmpCRUnsigned(lhs, rhs);
|
||||
} else {
|
||||
res = ibuild.EmitICmpCRSigned(lhs, rhs);
|
||||
}
|
||||
} else if (inst.OPCD == 10) {
|
||||
rhs = ibuild.EmitIntConst(inst.UIMM);
|
||||
res = ibuild.EmitICmpCRUnsigned(lhs, rhs);
|
||||
} else { // inst.OPCD == 11
|
||||
rhs = ibuild.EmitIntConst(inst.SIMM_16);
|
||||
res = ibuild.EmitICmpCRSigned(lhs, rhs);
|
||||
}
|
||||
|
||||
ibuild.EmitStoreCR(res, inst.CRFD);
|
||||
}
|
||||
|
||||
void Jit64::orx(UGeckoInstruction inst)
|
||||
{
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB);
|
||||
val = ibuild.EmitOr(ibuild.EmitLoadGReg(inst.RS), val);
|
||||
ibuild.EmitStoreGReg(val, inst.RA);
|
||||
if (inst.Rc)
|
||||
ComputeRC(ibuild, val);
|
||||
}
|
||||
|
||||
|
||||
// m_GPR[_inst.RA] = m_GPR[_inst.RS] ^ m_GPR[_inst.RB];
|
||||
void Jit64::xorx(UGeckoInstruction inst)
|
||||
{
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB);
|
||||
val = ibuild.EmitXor(ibuild.EmitLoadGReg(inst.RS), val);
|
||||
ibuild.EmitStoreGReg(val, inst.RA);
|
||||
if (inst.Rc)
|
||||
ComputeRC(ibuild, val);
|
||||
}
|
||||
|
||||
void Jit64::andx(UGeckoInstruction inst)
|
||||
{
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB);
|
||||
val = ibuild.EmitAnd(ibuild.EmitLoadGReg(inst.RS), val);
|
||||
ibuild.EmitStoreGReg(val, inst.RA);
|
||||
if (inst.Rc)
|
||||
ComputeRC(ibuild, val);
|
||||
}
|
||||
|
||||
void Jit64::extsbx(UGeckoInstruction inst)
|
||||
{
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS);
|
||||
val = ibuild.EmitSExt8(val);
|
||||
ibuild.EmitStoreGReg(val, inst.RA);
|
||||
if (inst.Rc)
|
||||
ComputeRC(ibuild, val);
|
||||
}
|
||||
|
||||
void Jit64::extshx(UGeckoInstruction inst)
|
||||
{
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS);
|
||||
val = ibuild.EmitSExt16(val);
|
||||
ibuild.EmitStoreGReg(val, inst.RA);
|
||||
if (inst.Rc)
|
||||
ComputeRC(ibuild, val);
|
||||
}
|
||||
|
||||
void Jit64::subfic(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
|
||||
INSTRUCTION_START;
|
||||
int a = inst.RA, d = inst.RD;
|
||||
gpr.FlushLockX(ECX);
|
||||
gpr.Lock(a, d);
|
||||
gpr.LoadToX64(d, a == d, true);
|
||||
int imm = inst.SIMM_16;
|
||||
MOV(32, R(EAX), gpr.R(a));
|
||||
NOT(32, R(EAX));
|
||||
ADD(32, R(EAX), Imm32(imm + 1));
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
//GenerateCarry(ECX);
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
// This instruction has no RC flag
|
||||
}
|
||||
|
||||
void Jit64::subfcx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START;
|
||||
Default(inst);
|
||||
return;
|
||||
/*
|
||||
u32 a = m_GPR[_inst.RA];
|
||||
u32 b = m_GPR[_inst.RB];
|
||||
m_GPR[_inst.RD] = b - a;
|
||||
SetCarry(a == 0 || Helper_Carry(b, 0-a));
|
||||
|
||||
if (_inst.OE) PanicAlert("OE: subfcx");
|
||||
if (_inst.Rc) Helper_UpdateCR0(m_GPR[_inst.RD]);
|
||||
*/
|
||||
}
|
||||
|
||||
void Jit64::subfex(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START;
|
||||
Default(inst);
|
||||
return;
|
||||
/*
|
||||
u32 a = m_GPR[_inst.RA];
|
||||
u32 b = m_GPR[_inst.RB];
|
||||
int carry = GetCarry();
|
||||
m_GPR[_inst.RD] = (~a) + b + carry;
|
||||
SetCarry(Helper_Carry(~a, b) || Helper_Carry((~a) + b, carry));
|
||||
|
||||
if (_inst.OE) PanicAlert("OE: subfcx");
|
||||
if (_inst.Rc) Helper_UpdateCR0(m_GPR[_inst.RD]);
|
||||
*/
|
||||
}
|
||||
|
||||
void Jit64::subfx(UGeckoInstruction inst)
|
||||
{
|
||||
if (inst.OE) PanicAlert("OE: subfx");
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB);
|
||||
val = ibuild.EmitSub(val, ibuild.EmitLoadGReg(inst.RA));
|
||||
ibuild.EmitStoreGReg(val, inst.RD);
|
||||
if (inst.Rc)
|
||||
ComputeRC(ibuild, val);
|
||||
}
|
||||
|
||||
void Jit64::mulli(UGeckoInstruction inst)
|
||||
{
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RA);
|
||||
val = ibuild.EmitMul(val, ibuild.EmitIntConst(inst.SIMM_16));
|
||||
ibuild.EmitStoreGReg(val, inst.RD);
|
||||
}
|
||||
|
||||
void Jit64::mullwx(UGeckoInstruction inst)
|
||||
{
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB);
|
||||
val = ibuild.EmitMul(ibuild.EmitLoadGReg(inst.RA), val);
|
||||
ibuild.EmitStoreGReg(val, inst.RD);
|
||||
if (inst.Rc)
|
||||
ComputeRC(ibuild, val);
|
||||
}
|
||||
|
||||
void Jit64::mulhwux(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
|
||||
INSTRUCTION_START;
|
||||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||
gpr.FlushLockX(EDX);
|
||||
gpr.Lock(a, b, d);
|
||||
if (d != a && d != b) {
|
||||
gpr.LoadToX64(d, false, true);
|
||||
} else {
|
||||
gpr.LoadToX64(d, true, true);
|
||||
}
|
||||
if (gpr.RX(d) == EDX)
|
||||
PanicAlert("mulhwux : WTF");
|
||||
MOV(32, R(EAX), gpr.R(a));
|
||||
gpr.KillImmediate(b);
|
||||
MUL(32, gpr.R(b));
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
if (inst.Rc) {
|
||||
MOV(32, R(EAX), R(EDX));
|
||||
MOV(32, gpr.R(d), R(EDX));
|
||||
// result is already in eax
|
||||
CALL((u8*)asm_routines.computeRc);
|
||||
} else {
|
||||
MOV(32, gpr.R(d), R(EDX));
|
||||
}
|
||||
}
|
||||
|
||||
// skipped some of the special handling in here - if we get crashes, let the interpreter handle this op
|
||||
void Jit64::divwux(UGeckoInstruction inst) {
|
||||
Default(inst); return;
|
||||
|
||||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||
gpr.FlushLockX(EDX);
|
||||
gpr.Lock(a, b, d);
|
||||
if (d != a && d != b) {
|
||||
gpr.LoadToX64(d, false, true);
|
||||
} else {
|
||||
gpr.LoadToX64(d, true, true);
|
||||
}
|
||||
MOV(32, R(EAX), gpr.R(a));
|
||||
XOR(32, R(EDX), R(EDX));
|
||||
gpr.KillImmediate(b);
|
||||
DIV(32, gpr.R(b));
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
if (inst.Rc) {
|
||||
CALL((u8*)asm_routines.computeRc);
|
||||
}
|
||||
}
|
||||
|
||||
u32 Helper_Mask(u8 mb, u8 me)
|
||||
{
|
||||
return (((mb > me) ?
|
||||
~(((u32)-1 >> mb) ^ ((me >= 31) ? 0 : (u32) -1 >> (me + 1)))
|
||||
:
|
||||
(((u32)-1 >> mb) ^ ((me >= 31) ? 0 : (u32) -1 >> (me + 1))))
|
||||
);
|
||||
}
|
||||
|
||||
void Jit64::addx(UGeckoInstruction inst)
|
||||
{
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RB);
|
||||
val = ibuild.EmitAdd(ibuild.EmitLoadGReg(inst.RA), val);
|
||||
ibuild.EmitStoreGReg(val, inst.RD);
|
||||
if (inst.Rc)
|
||||
ComputeRC(ibuild, val);
|
||||
}
|
||||
|
||||
// This can be optimized
|
||||
void Jit64::addex(UGeckoInstruction inst)
|
||||
{
|
||||
Default(inst); return;
|
||||
// USES_XER
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
|
||||
INSTRUCTION_START;
|
||||
int a = inst.RA, b = inst.RB, d = inst.RD;
|
||||
gpr.FlushLockX(ECX);
|
||||
gpr.Lock(a, b, d);
|
||||
if (d != a && d != b)
|
||||
gpr.LoadToX64(d, false);
|
||||
else
|
||||
gpr.LoadToX64(d, true);
|
||||
MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER]));
|
||||
SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag
|
||||
MOV(32, R(EAX), gpr.R(a));
|
||||
ADC(32, R(EAX), gpr.R(b));
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
//GenerateCarry(ECX);
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
if (inst.Rc)
|
||||
{
|
||||
CALL((u8*)asm_routines.computeRc);
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::rlwinmx(UGeckoInstruction inst)
|
||||
{
|
||||
unsigned mask = Helper_Mask(inst.MB, inst.ME);
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS);
|
||||
val = ibuild.EmitRol(val, ibuild.EmitIntConst(inst.SH));
|
||||
val = ibuild.EmitAnd(val, ibuild.EmitIntConst(mask));
|
||||
ibuild.EmitStoreGReg(val, inst.RA);
|
||||
if (inst.Rc)
|
||||
ComputeRC(ibuild, val);
|
||||
}
|
||||
|
||||
|
||||
void Jit64::rlwimix(UGeckoInstruction inst)
|
||||
{
|
||||
unsigned mask = Helper_Mask(inst.MB, inst.ME);
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS);
|
||||
val = ibuild.EmitRol(val, ibuild.EmitIntConst(inst.SH));
|
||||
val = ibuild.EmitAnd(val, ibuild.EmitIntConst(mask));
|
||||
IREmitter::InstLoc ival = ibuild.EmitLoadGReg(inst.RA);
|
||||
ival = ibuild.EmitAnd(ival, ibuild.EmitIntConst(~mask));
|
||||
val = ibuild.EmitOr(ival, val);
|
||||
ibuild.EmitStoreGReg(val, inst.RA);
|
||||
if (inst.Rc)
|
||||
ComputeRC(ibuild, val);
|
||||
}
|
||||
|
||||
void Jit64::rlwnmx(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
|
||||
INSTRUCTION_START;
|
||||
int a = inst.RA, b = inst.RB, s = inst.RS;
|
||||
if (gpr.R(a).IsImm())
|
||||
{
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
|
||||
u32 mask = Helper_Mask(inst.MB, inst.ME);
|
||||
gpr.FlushLockX(ECX);
|
||||
gpr.Lock(a, b, s);
|
||||
MOV(32, R(EAX), gpr.R(s));
|
||||
MOV(32, R(ECX), gpr.R(b));
|
||||
AND(32, R(ECX), Imm32(0x1f));
|
||||
ROL(32, R(EAX), R(ECX));
|
||||
AND(32, R(EAX), Imm32(mask));
|
||||
MOV(32, gpr.R(a), R(EAX));
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
if (inst.Rc)
|
||||
{
|
||||
MOV(32, R(EAX), gpr.R(a));
|
||||
CALL((u8*)asm_routines.computeRc);
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::negx(UGeckoInstruction inst)
|
||||
{
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RA);
|
||||
val = ibuild.EmitSub(ibuild.EmitIntConst(0), val);
|
||||
ibuild.EmitStoreGReg(val, inst.RD);
|
||||
if (inst.Rc)
|
||||
ComputeRC(ibuild, val);
|
||||
}
|
||||
|
||||
void Jit64::srwx(UGeckoInstruction inst)
|
||||
{
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS),
|
||||
samt = ibuild.EmitLoadGReg(inst.RB),
|
||||
corr;
|
||||
// FIXME: We can do better with a cmov
|
||||
// FIXME: We can do better on 64-bit
|
||||
val = ibuild.EmitShrl(val, samt);
|
||||
corr = ibuild.EmitShl(samt, ibuild.EmitIntConst(26));
|
||||
corr = ibuild.EmitSarl(corr, ibuild.EmitIntConst(31));
|
||||
corr = ibuild.EmitXor(corr, ibuild.EmitIntConst(-1));
|
||||
val = ibuild.EmitAnd(corr, val);
|
||||
ibuild.EmitStoreGReg(val, inst.RA);
|
||||
if (inst.Rc)
|
||||
ComputeRC(ibuild, val);
|
||||
}
|
||||
|
||||
void Jit64::slwx(UGeckoInstruction inst)
|
||||
{
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS),
|
||||
samt = ibuild.EmitLoadGReg(inst.RB),
|
||||
corr;
|
||||
// FIXME: We can do better with a cmov
|
||||
// FIXME: We can do better on 64-bit
|
||||
val = ibuild.EmitShl(val, samt);
|
||||
corr = ibuild.EmitShl(samt, ibuild.EmitIntConst(26));
|
||||
corr = ibuild.EmitSarl(corr, ibuild.EmitIntConst(31));
|
||||
corr = ibuild.EmitXor(corr, ibuild.EmitIntConst(-1));
|
||||
val = ibuild.EmitAnd(corr, val);
|
||||
ibuild.EmitStoreGReg(val, inst.RA);
|
||||
if (inst.Rc)
|
||||
ComputeRC(ibuild, val);
|
||||
}
|
||||
|
||||
void Jit64::srawx(UGeckoInstruction inst)
|
||||
{
|
||||
// FIXME: We can do a lot better on 64-bit
|
||||
IREmitter::InstLoc val, samt, mask, mask2, test;
|
||||
val = ibuild.EmitLoadGReg(inst.RS);
|
||||
samt = ibuild.EmitLoadGReg(inst.RB);
|
||||
mask = ibuild.EmitIntConst(-1);
|
||||
val = ibuild.EmitSarl(val, samt);
|
||||
mask = ibuild.EmitShl(mask, samt);
|
||||
samt = ibuild.EmitShl(samt, ibuild.EmitIntConst(26));
|
||||
samt = ibuild.EmitSarl(samt, ibuild.EmitIntConst(31));
|
||||
samt = ibuild.EmitAnd(samt, ibuild.EmitIntConst(31));
|
||||
val = ibuild.EmitSarl(val, samt);
|
||||
ibuild.EmitStoreGReg(val, inst.RA);
|
||||
mask = ibuild.EmitShl(mask, samt);
|
||||
mask2 = ibuild.EmitAnd(mask, ibuild.EmitIntConst(0x7FFFFFFF));
|
||||
test = ibuild.EmitOr(val, mask2);
|
||||
test = ibuild.EmitICmpUgt(test, mask);
|
||||
ibuild.EmitStoreCarry(test);
|
||||
}
|
||||
|
||||
void Jit64::srawix(UGeckoInstruction inst)
|
||||
{
|
||||
IREmitter::InstLoc val = ibuild.EmitLoadGReg(inst.RS), test;
|
||||
val = ibuild.EmitSarl(val, ibuild.EmitIntConst(inst.SH));
|
||||
ibuild.EmitStoreGReg(val, inst.RA);
|
||||
unsigned mask = -1u << inst.SH;
|
||||
test = ibuild.EmitOr(val, ibuild.EmitIntConst(mask & 0x7FFFFFFF));
|
||||
test = ibuild.EmitICmpUgt(test, ibuild.EmitIntConst(mask));
|
||||
|
||||
ibuild.EmitStoreCarry(test);
|
||||
if (inst.Rc)
|
||||
ComputeRC(ibuild, val);
|
||||
}
|
||||
|
||||
// count leading zeroes
|
||||
void Jit64::cntlzwx(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
|
||||
INSTRUCTION_START;
|
||||
int a = inst.RA;
|
||||
int s = inst.RS;
|
||||
if (gpr.R(a).IsImm() || gpr.R(s).IsImm() || s == a)
|
||||
{
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
gpr.Lock(a,s);
|
||||
gpr.LoadToX64(a,false);
|
||||
BSR(32, gpr.R(a).GetSimpleReg(), gpr.R(s));
|
||||
FixupBranch gotone = J_CC(CC_NZ);
|
||||
MOV(32, gpr.R(a), Imm32(63));
|
||||
SetJumpTarget(gotone);
|
||||
XOR(32, gpr.R(a), Imm8(0x1f)); // flip order
|
||||
gpr.UnlockAll();
|
||||
|
||||
if (inst.Rc)
|
||||
{
|
||||
MOV(32, R(EAX), gpr.R(a));
|
||||
CALL((u8*)asm_routines.computeRc);
|
||||
// TODO: Check PPC manual too
|
||||
}
|
||||
}
|
|
@ -0,0 +1,198 @@
|
|||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
// TODO(ector): Tons of pshufb optimization of the loads/stores, for SSSE3+, possibly SSE4, only.
|
||||
// Should give a very noticable speed boost to paired single heavy code.
|
||||
|
||||
#include "Common.h"
|
||||
#include "Thunk.h"
|
||||
|
||||
#include "../PowerPC.h"
|
||||
#include "../../Core.h"
|
||||
#include "../../HW/GPFifo.h"
|
||||
#include "../../HW/CommandProcessor.h"
|
||||
#include "../../HW/PixelEngine.h"
|
||||
#include "../../HW/Memmap.h"
|
||||
#include "../PPCTables.h"
|
||||
#include "x64Emitter.h"
|
||||
#include "ABI.h"
|
||||
|
||||
#include "Jit.h"
|
||||
#include "JitCache.h"
|
||||
#include "JitAsm.h"
|
||||
#include "JitRegCache.h"
|
||||
|
||||
// #define INSTRUCTION_START Default(inst); return;
|
||||
#define INSTRUCTION_START
|
||||
|
||||
void Jit64::lbzx(UGeckoInstruction inst)
|
||||
{
|
||||
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB);
|
||||
if (inst.RA)
|
||||
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
|
||||
ibuild.EmitStoreGReg(ibuild.EmitLoad8(addr), inst.RD);
|
||||
}
|
||||
|
||||
void Jit64::lwzx(UGeckoInstruction inst)
|
||||
{
|
||||
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB);
|
||||
if (inst.RA)
|
||||
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
|
||||
ibuild.EmitStoreGReg(ibuild.EmitLoad32(addr), inst.RD);
|
||||
}
|
||||
|
||||
void Jit64::lhax(UGeckoInstruction inst)
|
||||
{
|
||||
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB);
|
||||
if (inst.RA)
|
||||
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
|
||||
IREmitter::InstLoc val = ibuild.EmitLoad16(addr);
|
||||
val = ibuild.EmitSExt16(val);
|
||||
ibuild.EmitStoreGReg(val, inst.RD);
|
||||
}
|
||||
|
||||
void Jit64::lXz(UGeckoInstruction inst)
|
||||
{
|
||||
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16);
|
||||
if (inst.RA)
|
||||
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
|
||||
IREmitter::InstLoc val;
|
||||
switch (inst.OPCD)
|
||||
{
|
||||
case 32: val = ibuild.EmitLoad32(addr); break; //lwz
|
||||
case 40: val = ibuild.EmitLoad16(addr); break; //lhz
|
||||
case 34: val = ibuild.EmitLoad8(addr); break; //lbz
|
||||
default: PanicAlert("lXz: invalid access size");
|
||||
}
|
||||
ibuild.EmitStoreGReg(val, inst.RD);
|
||||
}
|
||||
|
||||
void Jit64::lha(UGeckoInstruction inst)
|
||||
{
|
||||
IREmitter::InstLoc addr =
|
||||
ibuild.EmitIntConst((s32)(s16)inst.SIMM_16);
|
||||
if (inst.RA)
|
||||
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
|
||||
IREmitter::InstLoc val = ibuild.EmitLoad16(addr);
|
||||
val = ibuild.EmitSExt16(val);
|
||||
ibuild.EmitStoreGReg(val, inst.RD);
|
||||
}
|
||||
|
||||
void Jit64::lwzux(UGeckoInstruction inst)
|
||||
{
|
||||
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB);
|
||||
if (inst.RA) {
|
||||
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
|
||||
ibuild.EmitStoreGReg(addr, inst.RA);
|
||||
}
|
||||
ibuild.EmitStoreGReg(ibuild.EmitLoad32(addr), inst.RD);
|
||||
}
|
||||
|
||||
// Zero cache line.
|
||||
void Jit64::dcbz(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
|
||||
MOV(32, R(EAX), gpr.R(inst.RB));
|
||||
if (inst.RA)
|
||||
ADD(32, R(EAX), gpr.R(inst.RA));
|
||||
AND(32, R(EAX), Imm32(~31));
|
||||
XORPD(XMM0, R(XMM0));
|
||||
#ifdef _M_X64
|
||||
MOVAPS(MComplex(EBX, EAX, SCALE_1, 0), XMM0);
|
||||
MOVAPS(MComplex(EBX, EAX, SCALE_1, 16), XMM0);
|
||||
#else
|
||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVAPS(MDisp(EAX, (u32)Memory::base), XMM0);
|
||||
MOVAPS(MDisp(EAX, (u32)Memory::base + 16), XMM0);
|
||||
#endif
|
||||
}
|
||||
|
||||
void Jit64::stX(UGeckoInstruction inst)
|
||||
{
|
||||
IREmitter::InstLoc addr = ibuild.EmitIntConst(inst.SIMM_16),
|
||||
value = ibuild.EmitLoadGReg(inst.RS);
|
||||
if (inst.RA)
|
||||
addr = ibuild.EmitAdd(ibuild.EmitLoadGReg(inst.RA), addr);
|
||||
if (inst.OPCD & 1)
|
||||
ibuild.EmitStoreGReg(addr, inst.RA);
|
||||
switch (inst.OPCD & ~1)
|
||||
{
|
||||
case 36: ibuild.EmitStore32(value, addr); break; //stw
|
||||
case 44: ibuild.EmitStore16(value, addr); break; //sth
|
||||
case 38: ibuild.EmitStore8(value, addr); break; //stb
|
||||
default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return;
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::stXx(UGeckoInstruction inst)
|
||||
{
|
||||
IREmitter::InstLoc addr = ibuild.EmitLoadGReg(inst.RB),
|
||||
value = ibuild.EmitLoadGReg(inst.RS);
|
||||
addr = ibuild.EmitAdd(addr, ibuild.EmitLoadGReg(inst.RA));
|
||||
if (inst.SUBOP10 & 32)
|
||||
ibuild.EmitStoreGReg(addr, inst.RA);
|
||||
switch (inst.SUBOP10 & ~32)
|
||||
{
|
||||
case 151: ibuild.EmitStore32(value, addr); break; //stw
|
||||
case 407: ibuild.EmitStore16(value, addr); break; //sth
|
||||
case 215: ibuild.EmitStore8(value, addr); break; //stb
|
||||
default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return;
|
||||
}
|
||||
}
|
||||
|
||||
// A few games use these heavily in video codecs.
|
||||
void Jit64::lmw(UGeckoInstruction inst)
|
||||
{
|
||||
#ifdef _M_IX86
|
||||
Default(inst); return;
|
||||
#else
|
||||
gpr.FlushLockX(ECX);
|
||||
MOV(32, R(EAX), Imm32((u32)(s32)inst.SIMM_16));
|
||||
if (inst.RA)
|
||||
ADD(32, R(EAX), gpr.R(inst.RA));
|
||||
for (int i = inst.RD; i < 32; i++)
|
||||
{
|
||||
MOV(32, R(ECX), MComplex(EBX, EAX, SCALE_1, (i - inst.RD) * 4));
|
||||
BSWAP(32, ECX);
|
||||
gpr.LoadToX64(i, false, true);
|
||||
MOV(32, gpr.R(i), R(ECX));
|
||||
}
|
||||
gpr.UnlockAllX();
|
||||
#endif
|
||||
}
|
||||
|
||||
void Jit64::stmw(UGeckoInstruction inst)
|
||||
{
|
||||
#ifdef _M_IX86
|
||||
Default(inst); return;
|
||||
#else
|
||||
gpr.FlushLockX(ECX);
|
||||
MOV(32, R(EAX), Imm32((u32)(s32)inst.SIMM_16));
|
||||
if (inst.RA)
|
||||
ADD(32, R(EAX), gpr.R(inst.RA));
|
||||
for (int i = inst.RD; i < 32; i++)
|
||||
{
|
||||
MOV(32, R(ECX), gpr.R(i));
|
||||
BSWAP(32, ECX);
|
||||
MOV(32, MComplex(EBX, EAX, SCALE_1, (i - inst.RD) * 4), R(ECX));
|
||||
}
|
||||
gpr.UnlockAllX();
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,322 @@
|
|||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
// TODO(ector): Tons of pshufb optimization of the loads/stores, for SSSE3+, possibly SSE4, only.
|
||||
// Should give a very noticable speed boost to paired single heavy code.
|
||||
|
||||
#include "Common.h"
|
||||
|
||||
#include "../PowerPC.h"
|
||||
#include "../../Core.h" // include "Common.h", "CoreParameter.h"
|
||||
#include "../../HW/GPFifo.h"
|
||||
#include "../../HW/CommandProcessor.h"
|
||||
#include "../../HW/PixelEngine.h"
|
||||
#include "../../HW/Memmap.h"
|
||||
#include "../PPCTables.h"
|
||||
#include "CPUDetect.h"
|
||||
#include "x64Emitter.h"
|
||||
#include "ABI.h"
|
||||
|
||||
#include "Jit.h"
|
||||
#include "JitCache.h"
|
||||
#include "JitAsm.h"
|
||||
#include "JitRegCache.h"
|
||||
|
||||
// #define INSTRUCTION_START Default(inst); return;
|
||||
#define INSTRUCTION_START
|
||||
|
||||
// pshufb todo: MOVQ
|
||||
const u8 GC_ALIGNED16(bswapShuffle1x4[16]) = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||
const u8 GC_ALIGNED16(bswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||
const u8 GC_ALIGNED16(bswapShuffle1x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||
const u8 GC_ALIGNED16(bswapShuffle1x8Dupe[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0};
|
||||
const u8 GC_ALIGNED16(bswapShuffle2x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8};
|
||||
|
||||
namespace {
|
||||
|
||||
u64 GC_ALIGNED16(temp64);
|
||||
u32 GC_ALIGNED16(temp32);
|
||||
}
|
||||
// TODO: Add peephole optimizations for multiple consecutive lfd/lfs/stfd/stfs since they are so common,
|
||||
// and pshufb could help a lot.
|
||||
// Also add hacks for things like lfs/stfs the same reg consecutively, that is, simple memory moves.
|
||||
|
||||
void Jit64::lfs(UGeckoInstruction inst)
|
||||
{
|
||||
if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
|
||||
int d = inst.RD;
|
||||
int a = inst.RA;
|
||||
if (!a)
|
||||
{
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
gpr.Lock(a);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
if (jo.assumeFPLoadFromMem)
|
||||
{
|
||||
UnsafeLoadRegToReg(ABI_PARAM1, EAX, 32, offset, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
SafeLoadRegToEAX(ABI_PARAM1, 32, offset);
|
||||
}
|
||||
|
||||
MOV(32, M(&temp32), R(EAX));
|
||||
fpr.Lock(d);
|
||||
fpr.LoadToX64(d, false);
|
||||
CVTSS2SD(fpr.RX(d), M(&temp32));
|
||||
MOVDDUP(fpr.RX(d), fpr.R(d));
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
||||
void Jit64::lfd(UGeckoInstruction inst)
|
||||
{
|
||||
if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
|
||||
int d = inst.RD;
|
||||
int a = inst.RA;
|
||||
if (!a)
|
||||
{
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
gpr.Lock(a);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
// TODO - optimize. This has to load the previous value - upper double should stay unmodified.
|
||||
fpr.LoadToX64(d, true);
|
||||
fpr.Lock(d);
|
||||
X64Reg xd = fpr.RX(d);
|
||||
if (cpu_info.bSSSE3) {
|
||||
#ifdef _M_X64
|
||||
MOVQ_xmm(XMM0, MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
|
||||
#else
|
||||
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVQ_xmm(XMM0, MDisp(ABI_PARAM1, (u32)Memory::base + offset));
|
||||
#endif
|
||||
PSHUFB(XMM0, M((void *)bswapShuffle1x8Dupe));
|
||||
MOVSD(xd, R(XMM0));
|
||||
} else {
|
||||
#ifdef _M_X64
|
||||
MOV(64, R(EAX), MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
|
||||
BSWAP(64, EAX);
|
||||
MOV(64, M(&temp64), R(EAX));
|
||||
MOVSD(XMM0, M(&temp64));
|
||||
MOVSD(xd, R(XMM0));
|
||||
#else
|
||||
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::base + offset));
|
||||
BSWAP(32, EAX);
|
||||
MOV(32, M((void*)((u32)&temp64+4)), R(EAX));
|
||||
MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::base + offset + 4));
|
||||
BSWAP(32, EAX);
|
||||
MOV(32, M(&temp64), R(EAX));
|
||||
MOVSD(XMM0, M(&temp64));
|
||||
MOVSD(xd, R(XMM0));
|
||||
#if 0
|
||||
// Alternate implementation; possibly faster
|
||||
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVQ_xmm(XMM0, MDisp(ABI_PARAM1, (u32)Memory::base + offset));
|
||||
PSHUFLW(XMM0, R(XMM0), 0x1B);
|
||||
PSRLW(XMM0, 8);
|
||||
MOVSD(xd, R(XMM0));
|
||||
MOVQ_xmm(XMM0, MDisp(ABI_PARAM1, (u32)Memory::base + offset));
|
||||
PSHUFLW(XMM0, R(XMM0), 0x1B);
|
||||
PSLLW(XMM0, 8);
|
||||
POR(xd, R(XMM0));
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
||||
void Jit64::stfd(UGeckoInstruction inst)
|
||||
{
|
||||
if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
|
||||
int s = inst.RS;
|
||||
int a = inst.RA;
|
||||
if (!a)
|
||||
{
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
gpr.Lock(a);
|
||||
fpr.Lock(s);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
#ifdef _M_IX86
|
||||
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
|
||||
#endif
|
||||
if (cpu_info.bSSSE3) {
|
||||
MOVAPD(XMM0, fpr.R(s));
|
||||
PSHUFB(XMM0, M((void *)bswapShuffle1x8));
|
||||
#ifdef _M_X64
|
||||
MOVQ_xmm(MComplex(RBX, ABI_PARAM1, SCALE_1, offset), XMM0);
|
||||
#else
|
||||
MOVQ_xmm(MDisp(ABI_PARAM1, (u32)Memory::base + offset), XMM0);
|
||||
#endif
|
||||
} else {
|
||||
#ifdef _M_X64
|
||||
fpr.LoadToX64(s, true, false);
|
||||
MOVSD(M(&temp64), fpr.RX(s));
|
||||
MOV(64, R(EAX), M(&temp64));
|
||||
BSWAP(64, EAX);
|
||||
MOV(64, MComplex(RBX, ABI_PARAM1, SCALE_1, offset), R(EAX));
|
||||
#else
|
||||
fpr.LoadToX64(s, true, false);
|
||||
MOVSD(M(&temp64), fpr.RX(s));
|
||||
MOV(32, R(EAX), M(&temp64));
|
||||
BSWAP(32, EAX);
|
||||
MOV(32, MDisp(ABI_PARAM1, (u32)Memory::base + offset + 4), R(EAX));
|
||||
MOV(32, R(EAX), M((void*)((u32)&temp64 + 4)));
|
||||
BSWAP(32, EAX);
|
||||
MOV(32, MDisp(ABI_PARAM1, (u32)Memory::base + offset), R(EAX));
|
||||
#endif
|
||||
}
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
||||
void Jit64::stfs(UGeckoInstruction inst)
|
||||
{
|
||||
if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
|
||||
bool update = inst.OPCD & 1;
|
||||
int s = inst.RS;
|
||||
int a = inst.RA;
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
if (!a || update) {
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
|
||||
if (gpr.R(a).IsImm())
|
||||
{
|
||||
u32 addr = (u32)(gpr.R(a).offset + offset);
|
||||
if (Memory::IsRAMAddress(addr))
|
||||
{
|
||||
if (cpu_info.bSSSE3) {
|
||||
CVTSD2SS(XMM0, fpr.R(s));
|
||||
PSHUFB(XMM0, M((void *)bswapShuffle1x4));
|
||||
WriteFloatToConstRamAddress(XMM0, addr);
|
||||
return;
|
||||
}
|
||||
}
|
||||
else if (addr == 0xCC008000)
|
||||
{
|
||||
// Float directly to write gather pipe! Fun!
|
||||
CVTSD2SS(XMM0, fpr.R(s));
|
||||
CALL((void*)asm_routines.fifoDirectWriteFloat);
|
||||
// TODO
|
||||
js.fifoBytesThisBlock += 4;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
|
||||
gpr.Lock(a);
|
||||
fpr.Lock(s);
|
||||
MOV(32, R(ABI_PARAM2), gpr.R(a));
|
||||
ADD(32, R(ABI_PARAM2), Imm32(offset));
|
||||
if (update && offset)
|
||||
{
|
||||
MOV(32, gpr.R(a), R(ABI_PARAM2));
|
||||
}
|
||||
CVTSD2SS(XMM0, fpr.R(s));
|
||||
MOVSS(M(&temp32), XMM0);
|
||||
MOV(32, R(ABI_PARAM1), M(&temp32));
|
||||
SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, 32, 0);
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
||||
void Jit64::stfsx(UGeckoInstruction inst)
|
||||
{
|
||||
if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
|
||||
// We can take a shortcut here - it's not likely that a hardware access would use this instruction.
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
fpr.Lock(inst.RS);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(inst.RB));
|
||||
if (inst.RA)
|
||||
ADD(32, R(ABI_PARAM1), gpr.R(inst.RA));
|
||||
CVTSD2SS(XMM0, fpr.R(inst.RS));
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
UnsafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0);
|
||||
gpr.UnlockAllX();
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
||||
void Jit64::lfsx(UGeckoInstruction inst)
|
||||
{
|
||||
if (Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreFloatingOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
|
||||
fpr.Lock(inst.RS);
|
||||
fpr.LoadToX64(inst.RS, false, true);
|
||||
MOV(32, R(EAX), gpr.R(inst.RB));
|
||||
if (inst.RA)
|
||||
ADD(32, R(EAX), gpr.R(inst.RA));
|
||||
if (cpu_info.bSSSE3) {
|
||||
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
|
||||
#ifdef _M_IX86
|
||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVD_xmm(r, MDisp(EAX, (u32)Memory::base));
|
||||
#else
|
||||
MOVD_xmm(r, MComplex(RBX, EAX, SCALE_1, 0));
|
||||
#endif
|
||||
PSHUFB(r, M((void *)bswapShuffle1x4));
|
||||
CVTSS2SD(r, R(r));
|
||||
MOVDDUP(r, R(r));
|
||||
} else {
|
||||
UnsafeLoadRegToReg(EAX, EAX, 32, false);
|
||||
MOV(32, M(&temp32), R(EAX));
|
||||
CVTSS2SD(XMM0, M(&temp32));
|
||||
MOVDDUP(fpr.R(inst.RS).GetSimpleReg(), R(XMM0));
|
||||
}
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
|
@ -0,0 +1,458 @@
|
|||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
// TODO(ector): Tons of pshufb optimization of the loads/stores, for SSSE3+, possibly SSE4, only.
|
||||
// Should give a very noticable speed boost to paired single heavy code.
|
||||
|
||||
#include "Common.h"
|
||||
|
||||
#include "Thunk.h"
|
||||
#include "../PowerPC.h"
|
||||
#include "../../Core.h"
|
||||
#include "../../HW/GPFifo.h"
|
||||
#include "../../HW/CommandProcessor.h"
|
||||
#include "../../HW/PixelEngine.h"
|
||||
#include "../../HW/Memmap.h"
|
||||
#include "../PPCTables.h"
|
||||
#include "CPUDetect.h"
|
||||
#include "x64Emitter.h"
|
||||
#include "ABI.h"
|
||||
|
||||
#include "Jit.h"
|
||||
#include "JitCache.h"
|
||||
#include "JitAsm.h"
|
||||
#include "JitRegCache.h"
|
||||
|
||||
#define INSTRUCTION_START
|
||||
// #define INSTRUCTION_START Default(inst); return;
|
||||
|
||||
const u8 GC_ALIGNED16(pbswapShuffle2x4[16]) = {3, 2, 1, 0, 7, 6, 5, 4, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||
const u8 GC_ALIGNED16(pbswapShuffleNoop[16]) = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||
|
||||
static double GC_ALIGNED16(psTemp[2]) = {1.0, 1.0};
|
||||
static u64 GC_ALIGNED16(temp64);
|
||||
|
||||
// TODO(ector): Improve 64-bit version
|
||||
static void WriteDual32(u64 value, u32 address)
|
||||
{
|
||||
Memory::Write_U32((u32)(value >> 32), address);
|
||||
Memory::Write_U32((u32)value, address + 4);
|
||||
}
|
||||
|
||||
const double GC_ALIGNED16(m_quantizeTableD[]) =
|
||||
{
|
||||
(1 << 0), (1 << 1), (1 << 2), (1 << 3),
|
||||
(1 << 4), (1 << 5), (1 << 6), (1 << 7),
|
||||
(1 << 8), (1 << 9), (1 << 10), (1 << 11),
|
||||
(1 << 12), (1 << 13), (1 << 14), (1 << 15),
|
||||
(1 << 16), (1 << 17), (1 << 18), (1 << 19),
|
||||
(1 << 20), (1 << 21), (1 << 22), (1 << 23),
|
||||
(1 << 24), (1 << 25), (1 << 26), (1 << 27),
|
||||
(1 << 28), (1 << 29), (1 << 30), (1 << 31),
|
||||
1.0 / (1ULL << 32), 1.0 / (1 << 31), 1.0 / (1 << 30), 1.0 / (1 << 29),
|
||||
1.0 / (1 << 28), 1.0 / (1 << 27), 1.0 / (1 << 26), 1.0 / (1 << 25),
|
||||
1.0 / (1 << 24), 1.0 / (1 << 23), 1.0 / (1 << 22), 1.0 / (1 << 21),
|
||||
1.0 / (1 << 20), 1.0 / (1 << 19), 1.0 / (1 << 18), 1.0 / (1 << 17),
|
||||
1.0 / (1 << 16), 1.0 / (1 << 15), 1.0 / (1 << 14), 1.0 / (1 << 13),
|
||||
1.0 / (1 << 12), 1.0 / (1 << 11), 1.0 / (1 << 10), 1.0 / (1 << 9),
|
||||
1.0 / (1 << 8), 1.0 / (1 << 7), 1.0 / (1 << 6), 1.0 / (1 << 5),
|
||||
1.0 / (1 << 4), 1.0 / (1 << 3), 1.0 / (1 << 2), 1.0 / (1 << 1),
|
||||
};
|
||||
|
||||
const double GC_ALIGNED16(m_dequantizeTableD[]) =
|
||||
{
|
||||
1.0 / (1 << 0), 1.0 / (1 << 1), 1.0 / (1 << 2), 1.0 / (1 << 3),
|
||||
1.0 / (1 << 4), 1.0 / (1 << 5), 1.0 / (1 << 6), 1.0 / (1 << 7),
|
||||
1.0 / (1 << 8), 1.0 / (1 << 9), 1.0 / (1 << 10), 1.0 / (1 << 11),
|
||||
1.0 / (1 << 12), 1.0 / (1 << 13), 1.0 / (1 << 14), 1.0 / (1 << 15),
|
||||
1.0 / (1 << 16), 1.0 / (1 << 17), 1.0 / (1 << 18), 1.0 / (1 << 19),
|
||||
1.0 / (1 << 20), 1.0 / (1 << 21), 1.0 / (1 << 22), 1.0 / (1 << 23),
|
||||
1.0 / (1 << 24), 1.0 / (1 << 25), 1.0 / (1 << 26), 1.0 / (1 << 27),
|
||||
1.0 / (1 << 28), 1.0 / (1 << 29), 1.0 / (1 << 30), 1.0 / (1 << 31),
|
||||
(1ULL << 32), (1 << 31), (1 << 30), (1 << 29),
|
||||
(1 << 28), (1 << 27), (1 << 26), (1 << 25),
|
||||
(1 << 24), (1 << 23), (1 << 22), (1 << 21),
|
||||
(1 << 20), (1 << 19), (1 << 18), (1 << 17),
|
||||
(1 << 16), (1 << 15), (1 << 14), (1 << 13),
|
||||
(1 << 12), (1 << 11), (1 << 10), (1 << 9),
|
||||
(1 << 8), (1 << 7), (1 << 6), (1 << 5),
|
||||
(1 << 4), (1 << 3), (1 << 2), (1 << 1),
|
||||
};
|
||||
|
||||
// The big problem is likely instructions that set the quantizers in the same block.
|
||||
// We will have to break block after quantizers are written to.
|
||||
void Jit64::psq_st(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStorePairedOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
js.block_flags |= BLOCK_USE_GQR0 << inst.I;
|
||||
|
||||
if (js.blockSetsQuantizers || !Core::GetStartupParameter().bOptimizeQuantizers)
|
||||
{
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
if (!inst.RA)
|
||||
{
|
||||
// This really should never happen. Unless we change this to also support stwux
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
|
||||
const UGQR gqr(rSPR(SPR_GQR0 + inst.I));
|
||||
const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
|
||||
int stScale = gqr.ST_SCALE;
|
||||
bool update = inst.OPCD == 61;
|
||||
|
||||
int offset = inst.SIMM_12;
|
||||
int a = inst.RA;
|
||||
int s = inst.RS; // Fp numbers
|
||||
|
||||
if (inst.W) {
|
||||
// PanicAlert("W=1: stType %i stScale %i update %i", (int)stType, (int)stScale, (int)update);
|
||||
// It's fairly common that games write stuff to the pipe using this. Then, it's pretty much only
|
||||
// floats so that's what we'll work on.
|
||||
switch (stType)
|
||||
{
|
||||
case QUANTIZE_FLOAT:
|
||||
{
|
||||
// This one has quite a bit of optimization potential.
|
||||
if (gpr.R(a).IsImm())
|
||||
{
|
||||
PanicAlert("Imm: %08x", gpr.R(a).offset);
|
||||
}
|
||||
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
|
||||
gpr.Lock(a);
|
||||
fpr.Lock(s);
|
||||
if (update)
|
||||
gpr.LoadToX64(a, true, true);
|
||||
MOV(32, R(ABI_PARAM2), gpr.R(a));
|
||||
if (offset)
|
||||
ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
|
||||
TEST(32, R(ABI_PARAM2), Imm32(0x0C000000));
|
||||
if (update && offset)
|
||||
MOV(32, gpr.R(a), R(ABI_PARAM2));
|
||||
CVTSD2SS(XMM0, fpr.R(s));
|
||||
MOVD_xmm(M(&temp64), XMM0);
|
||||
MOV(32, R(ABI_PARAM1), M(&temp64));
|
||||
FixupBranch argh = J_CC(CC_NZ);
|
||||
BSWAP(32, ABI_PARAM1);
|
||||
#ifdef _M_X64
|
||||
MOV(32, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1));
|
||||
#else
|
||||
MOV(32, R(EAX), R(ABI_PARAM2));
|
||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(32, MDisp(EAX, (u32)Memory::base), R(ABI_PARAM1));
|
||||
#endif
|
||||
FixupBranch skip_call = J();
|
||||
SetJumpTarget(argh);
|
||||
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2);
|
||||
SetJumpTarget(skip_call);
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
fpr.UnlockAll();
|
||||
return;
|
||||
}
|
||||
default:
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (stType == QUANTIZE_FLOAT)
|
||||
{
|
||||
if (gpr.R(a).IsImm() && !update && cpu_info.bSSSE3)
|
||||
{
|
||||
u32 addr = (u32)(gpr.R(a).offset + offset);
|
||||
if (addr == 0xCC008000) {
|
||||
// Writing to FIFO. Let's do fast method.
|
||||
CVTPD2PS(XMM0, fpr.R(s));
|
||||
PSHUFB(XMM0, M((void*)&pbswapShuffle2x4));
|
||||
CALL((void*)asm_routines.fifoDirectWriteXmm64);
|
||||
js.fifoBytesThisBlock += 8;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
|
||||
gpr.Lock(a);
|
||||
fpr.Lock(s);
|
||||
if (update)
|
||||
gpr.LoadToX64(a, true, true);
|
||||
MOV(32, R(ABI_PARAM2), gpr.R(a));
|
||||
if (offset)
|
||||
ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
|
||||
TEST(32, R(ABI_PARAM2), Imm32(0x0C000000));
|
||||
if (update && offset)
|
||||
MOV(32, gpr.R(a), R(ABI_PARAM2));
|
||||
CVTPD2PS(XMM0, fpr.R(s));
|
||||
SHUFPS(XMM0, R(XMM0), 1);
|
||||
MOVQ_xmm(M(&temp64), XMM0);
|
||||
#ifdef _M_X64
|
||||
MOV(64, R(ABI_PARAM1), M(&temp64));
|
||||
FixupBranch argh = J_CC(CC_NZ);
|
||||
BSWAP(64, ABI_PARAM1);
|
||||
MOV(64, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1));
|
||||
FixupBranch arg2 = J();
|
||||
SetJumpTarget(argh);
|
||||
CALL(thunks.ProtectFunction((void *)&WriteDual32, 0));
|
||||
#else
|
||||
FixupBranch argh = J_CC(CC_NZ);
|
||||
MOV(32, R(ABI_PARAM1), M(((char*)&temp64) + 4));
|
||||
BSWAP(32, ABI_PARAM1);
|
||||
AND(32, R(ABI_PARAM2), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(32, MDisp(ABI_PARAM2, (u32)Memory::base), R(ABI_PARAM1));
|
||||
MOV(32, R(ABI_PARAM1), M(&temp64));
|
||||
BSWAP(32, ABI_PARAM1);
|
||||
MOV(32, MDisp(ABI_PARAM2, 4+(u32)Memory::base), R(ABI_PARAM1));
|
||||
FixupBranch arg2 = J();
|
||||
SetJumpTarget(argh);
|
||||
MOV(32, R(ABI_PARAM1), M(((char*)&temp64) + 4));
|
||||
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2);
|
||||
MOV(32, R(ABI_PARAM1), M(((char*)&temp64)));
|
||||
ADD(32, R(ABI_PARAM2), Imm32(4));
|
||||
ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2);
|
||||
#endif
|
||||
SetJumpTarget(arg2);
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
else if (stType == QUANTIZE_U8)
|
||||
{
|
||||
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
|
||||
gpr.Lock(a);
|
||||
fpr.Lock(s);
|
||||
if (update)
|
||||
gpr.LoadToX64(a, true, update);
|
||||
MOV(32, R(ABI_PARAM2), gpr.R(a));
|
||||
if (offset)
|
||||
ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
|
||||
if (update && offset)
|
||||
MOV(32, gpr.R(a), R(ABI_PARAM2));
|
||||
MOVAPD(XMM0, fpr.R(s));
|
||||
MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale]));
|
||||
MULPD(XMM0, R(XMM1));
|
||||
CVTPD2DQ(XMM0, R(XMM0));
|
||||
PACKSSDW(XMM0, R(XMM0));
|
||||
PACKUSWB(XMM0, R(XMM0));
|
||||
MOVD_xmm(M(&temp64), XMM0);
|
||||
MOV(16, R(ABI_PARAM1), M(&temp64));
|
||||
#ifdef _M_X64
|
||||
MOV(16, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1));
|
||||
#else
|
||||
MOV(32, R(EAX), R(ABI_PARAM2));
|
||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(16, MDisp(EAX, (u32)Memory::base), R(ABI_PARAM1));
|
||||
#endif
|
||||
if (update)
|
||||
MOV(32, gpr.R(a), R(ABI_PARAM2));
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
else if (stType == QUANTIZE_S16)
|
||||
{
|
||||
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
|
||||
gpr.Lock(a);
|
||||
fpr.Lock(s);
|
||||
if (update)
|
||||
gpr.LoadToX64(a, true, update);
|
||||
MOV(32, R(ABI_PARAM2), gpr.R(a));
|
||||
if (offset)
|
||||
ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
|
||||
if (update)
|
||||
MOV(32, gpr.R(a), R(ABI_PARAM2));
|
||||
MOVAPD(XMM0, fpr.R(s));
|
||||
MOVDDUP(XMM1, M((void*)&m_quantizeTableD[stScale]));
|
||||
MULPD(XMM0, R(XMM1));
|
||||
SHUFPD(XMM0, R(XMM0), 1);
|
||||
CVTPD2DQ(XMM0, R(XMM0));
|
||||
PACKSSDW(XMM0, R(XMM0));
|
||||
MOVD_xmm(M(&temp64), XMM0);
|
||||
MOV(32, R(ABI_PARAM1), M(&temp64));
|
||||
BSWAP(32, ABI_PARAM1);
|
||||
#ifdef _M_X64
|
||||
MOV(32, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1));
|
||||
#else
|
||||
MOV(32, R(EAX), R(ABI_PARAM2));
|
||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(32, MDisp(EAX, (u32)Memory::base), R(ABI_PARAM1));
|
||||
#endif
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
else {
|
||||
// Dodger uses this.
|
||||
// mario tennis
|
||||
//PanicAlert("st %i:%i", stType, inst.W);
|
||||
Default(inst);
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::psq_l(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStorePairedOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
|
||||
js.block_flags |= BLOCK_USE_GQR0 << inst.I;
|
||||
|
||||
if (js.blockSetsQuantizers || !Core::GetStartupParameter().bOptimizeQuantizers)
|
||||
{
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
|
||||
const UGQR gqr(rSPR(SPR_GQR0 + inst.I));
|
||||
const EQuantizeType ldType = static_cast<EQuantizeType>(gqr.LD_TYPE);
|
||||
int ldScale = gqr.LD_SCALE;
|
||||
bool update = inst.OPCD == 57;
|
||||
if (!inst.RA || inst.W)
|
||||
{
|
||||
// 0 1 during load
|
||||
//PanicAlert("ld:%i %i", ldType, (int)inst.W);
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
int offset = inst.SIMM_12;
|
||||
switch (ldType) {
|
||||
case QUANTIZE_FLOAT: // We know this is from RAM, so we don't need to check the address.
|
||||
{
|
||||
#ifdef _M_X64
|
||||
gpr.LoadToX64(inst.RA, true, update);
|
||||
fpr.LoadToX64(inst.RS, false);
|
||||
if (cpu_info.bSSSE3) {
|
||||
X64Reg xd = fpr.R(inst.RS).GetSimpleReg();
|
||||
MOVQ_xmm(xd, MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
|
||||
PSHUFB(xd, M((void *)pbswapShuffle2x4));
|
||||
CVTPS2PD(xd, R(xd));
|
||||
} else {
|
||||
MOV(64, R(RAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
|
||||
BSWAP(64, RAX);
|
||||
MOV(64, M(&psTemp[0]), R(RAX));
|
||||
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
|
||||
CVTPS2PD(r, M(&psTemp[0]));
|
||||
SHUFPD(r, R(r), 1);
|
||||
}
|
||||
if (update && offset != 0)
|
||||
ADD(32, gpr.R(inst.RA), Imm32(offset));
|
||||
break;
|
||||
#else
|
||||
if (cpu_info.bSSSE3) {
|
||||
gpr.LoadToX64(inst.RA, true, update);
|
||||
fpr.LoadToX64(inst.RS, false);
|
||||
X64Reg xd = fpr.R(inst.RS).GetSimpleReg();
|
||||
MOV(32, R(EAX), gpr.R(inst.RA));
|
||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVQ_xmm(xd, MDisp(EAX, (u32)Memory::base + offset));
|
||||
PSHUFB(xd, M((void *)pbswapShuffle2x4));
|
||||
CVTPS2PD(xd, R(xd));
|
||||
} else {
|
||||
gpr.FlushLockX(ECX);
|
||||
gpr.LoadToX64(inst.RA, true, update);
|
||||
// This can probably be optimized somewhat.
|
||||
LEA(32, ECX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset));
|
||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base));
|
||||
BSWAP(32, RAX);
|
||||
MOV(32, M(&psTemp[0]), R(RAX));
|
||||
MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base + 4));
|
||||
BSWAP(32, RAX);
|
||||
MOV(32, M(((float *)&psTemp[0]) + 1), R(RAX));
|
||||
fpr.LoadToX64(inst.RS, false, true);
|
||||
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
|
||||
CVTPS2PD(r, M(&psTemp[0]));
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
if (update && offset != 0)
|
||||
ADD(32, gpr.R(inst.RA), Imm32(offset));
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
case QUANTIZE_U8:
|
||||
{
|
||||
gpr.LoadToX64(inst.RA, true, update);
|
||||
#ifdef _M_X64
|
||||
MOVZX(32, 16, EAX, MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
|
||||
#else
|
||||
LEA(32, EAX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset));
|
||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVZX(32, 16, EAX, MDisp(EAX, (u32)Memory::base));
|
||||
#endif
|
||||
MOV(32, M(&temp64), R(EAX));
|
||||
MOVD_xmm(XMM0, M(&temp64));
|
||||
// SSE4 optimization opportunity here.
|
||||
PXOR(XMM1, R(XMM1));
|
||||
PUNPCKLBW(XMM0, R(XMM1));
|
||||
PUNPCKLWD(XMM0, R(XMM1));
|
||||
CVTDQ2PD(XMM0, R(XMM0));
|
||||
fpr.LoadToX64(inst.RS, false, true);
|
||||
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
|
||||
MOVDDUP(r, M((void *)&m_dequantizeTableD[ldScale]));
|
||||
MULPD(r, R(XMM0));
|
||||
if (update && offset != 0)
|
||||
ADD(32, gpr.R(inst.RA), Imm32(offset));
|
||||
}
|
||||
break;
|
||||
case QUANTIZE_S16:
|
||||
{
|
||||
gpr.LoadToX64(inst.RA, true, update);
|
||||
#ifdef _M_X64
|
||||
MOV(32, R(EAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
|
||||
#else
|
||||
LEA(32, EAX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset));
|
||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(32, R(EAX), MDisp(EAX, (u32)Memory::base));
|
||||
#endif
|
||||
BSWAP(32, EAX);
|
||||
MOV(32, M(&temp64), R(EAX));
|
||||
fpr.LoadToX64(inst.RS, false, true);
|
||||
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
|
||||
MOVD_xmm(XMM0, M(&temp64));
|
||||
PUNPCKLWD(XMM0, R(XMM0)); // unpack to higher word in each dword..
|
||||
PSRAD(XMM0, 16); // then use this signed shift to sign extend. clever eh? :P
|
||||
CVTDQ2PD(XMM0, R(XMM0));
|
||||
MOVDDUP(r, M((void*)&m_dequantizeTableD[ldScale]));
|
||||
MULPD(r, R(XMM0));
|
||||
SHUFPD(r, R(r), 1);
|
||||
if (update && offset != 0)
|
||||
ADD(32, gpr.R(inst.RA), Imm32(offset));
|
||||
}
|
||||
break;
|
||||
|
||||
/*
|
||||
Dynamic quantizer. Todo when we have a test set.
|
||||
MOVZX(32, 8, EAX, M(((char *)&PowerPC::ppcState.spr[SPR_GQR0 + inst.I]) + 3)); // it's in the high byte.
|
||||
AND(32, R(EAX), Imm8(0x3F));
|
||||
MOV(32, R(ECX), Imm32((u32)&m_dequantizeTableD));
|
||||
MOVDDUP(r, MComplex(RCX, EAX, 8, 0));
|
||||
*/
|
||||
default:
|
||||
// 4 0
|
||||
// 6 0 //power tennis
|
||||
// 5 0
|
||||
// PanicAlert("ld:%i %i", ldType, (int)inst.W);
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
|
||||
//u32 EA = (m_GPR[_inst.RA] + _inst.SIMM_12) : _inst.SIMM_12;
|
||||
}
|
|
@ -0,0 +1,407 @@
|
|||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#include "Common.h"
|
||||
|
||||
#include "../../Core.h"
|
||||
#include "../PowerPC.h"
|
||||
#include "../PPCTables.h"
|
||||
#include "x64Emitter.h"
|
||||
#include "../../HW/GPFifo.h"
|
||||
|
||||
#include "Jit.h"
|
||||
#include "JitCache.h"
|
||||
#include "JitRegCache.h"
|
||||
|
||||
// TODO
|
||||
// ps_madds0
|
||||
// ps_muls0
|
||||
// ps_madds1
|
||||
// ps_sel
|
||||
// cmppd, andpd, andnpd, or
|
||||
// lfsx, ps_merge01 etc
|
||||
|
||||
// #define INSTRUCTION_START Default(inst); return;
|
||||
#define INSTRUCTION_START
|
||||
|
||||
const u64 GC_ALIGNED16(psSignBits[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL};
|
||||
const u64 GC_ALIGNED16(psAbsMask[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
|
||||
const double GC_ALIGNED16(psOneOne[2]) = {1.0, 1.0};
|
||||
const double GC_ALIGNED16(psZeroZero[2]) = {0.0, 0.0};
|
||||
|
||||
void Jit64::ps_mr(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
int d = inst.FD;
|
||||
int b = inst.FB;
|
||||
if (d == b)
|
||||
return;
|
||||
fpr.LoadToX64(d, false);
|
||||
MOVAPD(fpr.RX(d), fpr.R(b));
|
||||
}
|
||||
|
||||
void Jit64::ps_sel(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
Default(inst);
|
||||
return;
|
||||
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
// GRR can't get this to work 100%. Getting artifacts in D.O.N. intro.
|
||||
int d = inst.FD;
|
||||
int a = inst.FA;
|
||||
int b = inst.FB;
|
||||
int c = inst.FC;
|
||||
fpr.FlushLockX(XMM7);
|
||||
fpr.FlushLockX(XMM6);
|
||||
fpr.Lock(a, b, c, d);
|
||||
fpr.LoadToX64(a, true, false);
|
||||
fpr.LoadToX64(d, false, true);
|
||||
// BLENDPD would have been nice...
|
||||
MOVAPD(XMM7, fpr.R(a));
|
||||
CMPPD(XMM7, M((void*)psZeroZero), 1); //less-than = 111111
|
||||
MOVAPD(XMM6, R(XMM7));
|
||||
ANDPD(XMM7, fpr.R(d));
|
||||
ANDNPD(XMM6, fpr.R(c));
|
||||
MOVAPD(fpr.RX(d), R(XMM7));
|
||||
ORPD(fpr.RX(d), R(XMM6));
|
||||
fpr.UnlockAll();
|
||||
fpr.UnlockAllX();
|
||||
}
|
||||
|
||||
void Jit64::ps_sign(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
int d = inst.FD;
|
||||
int b = inst.FB;
|
||||
|
||||
fpr.Lock(d, b);
|
||||
if (d != b)
|
||||
{
|
||||
fpr.LoadToX64(d, false);
|
||||
MOVAPD(fpr.RX(d), fpr.R(b));
|
||||
}
|
||||
else
|
||||
{
|
||||
fpr.LoadToX64(d, true);
|
||||
}
|
||||
|
||||
switch (inst.SUBOP10)
|
||||
{
|
||||
case 40: //neg
|
||||
XORPD(fpr.RX(d), M((void*)&psSignBits));
|
||||
break;
|
||||
case 136: //nabs
|
||||
ORPD(fpr.RX(d), M((void*)&psSignBits));
|
||||
break;
|
||||
case 264: //abs
|
||||
ANDPD(fpr.RX(d), M((void*)&psAbsMask));
|
||||
break;
|
||||
}
|
||||
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
void Jit64::ps_rsqrte(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
int d = inst.FD;
|
||||
int b = inst.FB;
|
||||
fpr.Lock(d, b);
|
||||
SQRTPD(XMM0, fpr.R(b));
|
||||
MOVAPD(XMM1, M((void*)&psOneOne));
|
||||
DIVPD(XMM1, R(XMM0));
|
||||
MOVAPD(fpr.R(d), XMM1);
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
//add a, b, c
|
||||
|
||||
//mov a, b
|
||||
//add a, c
|
||||
//we need:
|
||||
/*
|
||||
psq_l
|
||||
psq_stu
|
||||
*/
|
||||
|
||||
/*
|
||||
add a,b,a
|
||||
*/
|
||||
|
||||
//There's still a little bit more optimization that can be squeezed out of this
|
||||
void Jit64::tri_op(int d, int a, int b, bool reversible, void (XEmitter::*op)(X64Reg, OpArg))
|
||||
{
|
||||
fpr.Lock(d, a, b);
|
||||
|
||||
if (d == a)
|
||||
{
|
||||
fpr.LoadToX64(d, true);
|
||||
(this->*op)(fpr.RX(d), fpr.R(b));
|
||||
}
|
||||
else if (d == b && reversible)
|
||||
{
|
||||
fpr.LoadToX64(d, true);
|
||||
(this->*op)(fpr.RX(d), fpr.R(a));
|
||||
}
|
||||
else if (a != d && b != d)
|
||||
{
|
||||
//sources different from d, can use rather quick solution
|
||||
fpr.LoadToX64(d, false);
|
||||
MOVAPD(fpr.RX(d), fpr.R(a));
|
||||
(this->*op)(fpr.RX(d), fpr.R(b));
|
||||
}
|
||||
else if (b != d)
|
||||
{
|
||||
fpr.LoadToX64(d, false);
|
||||
MOVAPD(XMM0, fpr.R(b));
|
||||
MOVAPD(fpr.RX(d), fpr.R(a));
|
||||
(this->*op)(fpr.RX(d), Gen::R(XMM0));
|
||||
}
|
||||
else //Other combo, must use two temps :(
|
||||
{
|
||||
MOVAPD(XMM0, fpr.R(a));
|
||||
MOVAPD(XMM1, fpr.R(b));
|
||||
fpr.LoadToX64(d, false);
|
||||
(this->*op)(XMM0, Gen::R(XMM1));
|
||||
MOVAPD(fpr.RX(d), Gen::R(XMM0));
|
||||
}
|
||||
ForceSinglePrecisionP(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
void Jit64::ps_arith(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
switch (inst.SUBOP5)
|
||||
{
|
||||
case 18: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::DIVPD); break; //div
|
||||
case 20: tri_op(inst.FD, inst.FA, inst.FB, false, &XEmitter::SUBPD); break; //sub
|
||||
case 21: tri_op(inst.FD, inst.FA, inst.FB, true, &XEmitter::ADDPD); break; //add
|
||||
case 23://sel
|
||||
Default(inst);
|
||||
break;
|
||||
case 24://res
|
||||
Default(inst);
|
||||
break;
|
||||
case 25: tri_op(inst.FD, inst.FA, inst.FC, true, &XEmitter::MULPD); break; //mul
|
||||
default:
|
||||
_assert_msg_(DYNA_REC, 0, "ps_arith WTF!!!");
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::ps_sum(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
int d = inst.FD;
|
||||
int a = inst.FA;
|
||||
int b = inst.FB;
|
||||
int c = inst.FC;
|
||||
fpr.Lock(a,b,c,d);
|
||||
fpr.LoadToX64(d, d == a || d == b || d == c, true);
|
||||
switch (inst.SUBOP5)
|
||||
{
|
||||
case 10:
|
||||
// Do the sum in upper subregisters, merge uppers
|
||||
MOVDDUP(XMM0, fpr.R(a));
|
||||
MOVAPD(XMM1, fpr.R(b));
|
||||
ADDPD(XMM0, R(XMM1));
|
||||
UNPCKHPD(XMM0, fpr.R(c)); //merge
|
||||
MOVAPD(fpr.R(d), XMM0);
|
||||
break;
|
||||
case 11:
|
||||
// Do the sum in lower subregisters, merge lowers
|
||||
MOVAPD(XMM0, fpr.R(a));
|
||||
MOVAPD(XMM1, fpr.R(b));
|
||||
SHUFPD(XMM1, R(XMM1), 5); // copy higher to lower
|
||||
ADDPD(XMM0, R(XMM1)); // sum lowers
|
||||
MOVAPD(XMM1, fpr.R(c));
|
||||
UNPCKLPD(XMM1, R(XMM0)); // merge
|
||||
MOVAPD(fpr.R(d), XMM1);
|
||||
break;
|
||||
default:
|
||||
PanicAlert("ps_sum WTF!!!");
|
||||
}
|
||||
ForceSinglePrecisionP(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
||||
void Jit64::ps_muls(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
int d = inst.FD;
|
||||
int a = inst.FA;
|
||||
int c = inst.FC;
|
||||
fpr.Lock(a, c, d);
|
||||
fpr.LoadToX64(d, d == a || d == c, true);
|
||||
switch (inst.SUBOP5)
|
||||
{
|
||||
case 12:
|
||||
// Single multiply scalar high
|
||||
// TODO - faster version for when regs are different
|
||||
MOVAPD(XMM0, fpr.R(a));
|
||||
MOVDDUP(XMM1, fpr.R(c));
|
||||
MULPD(XMM0, R(XMM1));
|
||||
MOVAPD(fpr.R(d), XMM0);
|
||||
break;
|
||||
case 13:
|
||||
// TODO - faster version for when regs are different
|
||||
MOVAPD(XMM0, fpr.R(a));
|
||||
MOVAPD(XMM1, fpr.R(c));
|
||||
SHUFPD(XMM1, R(XMM1), 3); // copy higher to lower
|
||||
MULPD(XMM0, R(XMM1));
|
||||
MOVAPD(fpr.R(d), XMM0);
|
||||
break;
|
||||
default:
|
||||
PanicAlert("ps_muls WTF!!!");
|
||||
}
|
||||
ForceSinglePrecisionP(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
||||
//TODO: find easy cases and optimize them, do a breakout like ps_arith
|
||||
void Jit64::ps_mergeXX(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
int d = inst.FD;
|
||||
int a = inst.FA;
|
||||
int b = inst.FB;
|
||||
fpr.Lock(a,b,d);
|
||||
|
||||
MOVAPD(XMM0, fpr.R(a));
|
||||
switch (inst.SUBOP10)
|
||||
{
|
||||
case 528:
|
||||
UNPCKLPD(XMM0, fpr.R(b)); //unpck is faster than shuf
|
||||
break; //00
|
||||
case 560:
|
||||
SHUFPD(XMM0, fpr.R(b), 2); //must use shuf here
|
||||
break; //01
|
||||
case 592:
|
||||
SHUFPD(XMM0, fpr.R(b), 1);
|
||||
break; //10
|
||||
case 624:
|
||||
UNPCKHPD(XMM0, fpr.R(b));
|
||||
break; //11
|
||||
default:
|
||||
_assert_msg_(DYNA_REC, 0, "ps_merge - invalid op");
|
||||
}
|
||||
fpr.LoadToX64(d, false);
|
||||
MOVAPD(fpr.RX(d), Gen::R(XMM0));
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
||||
//TODO: add optimized cases
|
||||
void Jit64::ps_maddXX(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITPairedOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
if (inst.Rc) {
|
||||
Default(inst); return;
|
||||
}
|
||||
int a = inst.FA;
|
||||
int b = inst.FB;
|
||||
int c = inst.FC;
|
||||
int d = inst.FD;
|
||||
fpr.Lock(a,b,c,d);
|
||||
|
||||
MOVAPD(XMM0, fpr.R(a));
|
||||
switch (inst.SUBOP5)
|
||||
{
|
||||
case 14: //madds0
|
||||
MOVDDUP(XMM1, fpr.R(c));
|
||||
MULPD(XMM0, R(XMM1));
|
||||
ADDPD(XMM0, fpr.R(b));
|
||||
break;
|
||||
case 15: //madds1
|
||||
MOVAPD(XMM1, fpr.R(c));
|
||||
SHUFPD(XMM1, R(XMM1), 3); // copy higher to lower
|
||||
MULPD(XMM0, R(XMM1));
|
||||
ADDPD(XMM0, fpr.R(b));
|
||||
break;
|
||||
case 28: //msub
|
||||
MULPD(XMM0, fpr.R(c));
|
||||
SUBPD(XMM0, fpr.R(b));
|
||||
break;
|
||||
case 29: //madd
|
||||
MULPD(XMM0, fpr.R(c));
|
||||
ADDPD(XMM0, fpr.R(b));
|
||||
break;
|
||||
case 30: //nmsub
|
||||
MULPD(XMM0, fpr.R(c));
|
||||
SUBPD(XMM0, fpr.R(b));
|
||||
XORPD(XMM0, M((void*)&psSignBits));
|
||||
break;
|
||||
case 31: //nmadd
|
||||
MULPD(XMM0, fpr.R(c));
|
||||
ADDPD(XMM0, fpr.R(b));
|
||||
XORPD(XMM0, M((void*)&psSignBits));
|
||||
break;
|
||||
default:
|
||||
_assert_msg_(DYNA_REC, 0, "ps_maddXX WTF!!!");
|
||||
//Default(inst);
|
||||
//fpr.UnlockAll();
|
||||
return;
|
||||
}
|
||||
fpr.LoadToX64(d, false);
|
||||
MOVAPD(fpr.RX(d), Gen::R(XMM0));
|
||||
ForceSinglePrecisionP(fpr.RX(d));
|
||||
fpr.UnlockAll();
|
||||
}
|
|
@ -0,0 +1,149 @@
|
|||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#include "Common.h"
|
||||
|
||||
#include "../../Core.h"
|
||||
#include "../../CoreTiming.h"
|
||||
#include "../../HW/SystemTimers.h"
|
||||
#include "../PowerPC.h"
|
||||
#include "../PPCTables.h"
|
||||
#include "x64Emitter.h"
|
||||
#include "ABI.h"
|
||||
#include "Thunk.h"
|
||||
|
||||
#include "Jit.h"
|
||||
#include "JitCache.h"
|
||||
#include "JitRegCache.h"
|
||||
|
||||
#define INSTRUCTION_START
|
||||
// #define INSTRUCTION_START Default(inst); return;
|
||||
|
||||
void Jit64::mtspr(UGeckoInstruction inst)
|
||||
{
|
||||
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
|
||||
switch(iIndex) {
|
||||
case SPR_LR:
|
||||
ibuild.EmitStoreLink(ibuild.EmitLoadGReg(inst.RD));
|
||||
return;
|
||||
case SPR_CTR:
|
||||
ibuild.EmitStoreCTR(ibuild.EmitLoadGReg(inst.RD));
|
||||
return;
|
||||
default:
|
||||
printf("mtspr case %d", iIndex);
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::mfspr(UGeckoInstruction inst)
|
||||
{
|
||||
u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F);
|
||||
switch (iIndex)
|
||||
{
|
||||
case SPR_LR:
|
||||
ibuild.EmitStoreGReg(ibuild.EmitLoadLink(), inst.RD);
|
||||
return;
|
||||
case SPR_CTR:
|
||||
ibuild.EmitStoreGReg(ibuild.EmitLoadCTR(), inst.RD);
|
||||
return;
|
||||
default:
|
||||
printf("mfspr case %d", iIndex);
|
||||
Default(inst);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// =======================================================================================
|
||||
// Don't interpret this, if we do we get thrown out
|
||||
// --------------
|
||||
void Jit64::mtmsr(UGeckoInstruction inst)
|
||||
{
|
||||
ibuild.EmitStoreMSR(ibuild.EmitLoadGReg(inst.RS));
|
||||
ibuild.EmitBranchUncond(ibuild.EmitIntConst(js.compilerPC + 4));
|
||||
}
|
||||
// ==============
|
||||
|
||||
|
||||
void Jit64::mfmsr(UGeckoInstruction inst)
|
||||
{
|
||||
ibuild.EmitStoreGReg(ibuild.EmitLoadMSR(), inst.RD);
|
||||
}
|
||||
|
||||
void Jit64::mftb(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
mfspr(inst);
|
||||
}
|
||||
|
||||
void Jit64::mfcr(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
// USES_CR
|
||||
int d = inst.RD;
|
||||
gpr.LoadToX64(d, false, true);
|
||||
MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[0]));
|
||||
SHL(32, R(EAX), Imm8(4));
|
||||
for (int i = 1; i < 7; i++) {
|
||||
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[i]));
|
||||
SHL(32, R(EAX), Imm8(4));
|
||||
}
|
||||
OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[7]));
|
||||
MOV(32, gpr.R(d), R(EAX));
|
||||
}
|
||||
|
||||
void Jit64::mtcrf(UGeckoInstruction inst)
|
||||
{
|
||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff)
|
||||
{Default(inst); return;} // turn off from debugger
|
||||
INSTRUCTION_START;
|
||||
|
||||
// USES_CR
|
||||
u32 mask = 0;
|
||||
u32 crm = inst.CRM;
|
||||
if (crm == 0xFF) {
|
||||
gpr.FlushLockX(ECX);
|
||||
MOV(32, R(EAX), gpr.R(inst.RS));
|
||||
for (int i = 0; i < 8; i++) {
|
||||
MOV(32, R(ECX), R(EAX));
|
||||
SHR(32, R(ECX), Imm8(28 - (i * 4)));
|
||||
AND(32, R(ECX), Imm32(0xF));
|
||||
MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(ECX));
|
||||
}
|
||||
gpr.UnlockAllX();
|
||||
} else {
|
||||
Default(inst);
|
||||
return;
|
||||
|
||||
// TODO: translate this to work in new CR model.
|
||||
for (int i = 0; i < 8; i++) {
|
||||
if (crm & (1 << i))
|
||||
mask |= 0xF << (i*4);
|
||||
}
|
||||
MOV(32, R(EAX), gpr.R(inst.RS));
|
||||
MOV(32, R(ECX), M(&PowerPC::ppcState.cr));
|
||||
AND(32, R(EAX), Imm32(mask));
|
||||
AND(32, R(ECX), Imm32(~mask));
|
||||
OR(32, R(EAX), R(ECX));
|
||||
MOV(32, M(&PowerPC::ppcState.cr), R(EAX));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,161 @@
|
|||
// Copyright (C) 2003-2008 Dolphin Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official SVN repository and contact information can be found at
|
||||
// http://code.google.com/p/dolphin-emu/
|
||||
|
||||
#include "Common.h"
|
||||
#include "Thunk.h"
|
||||
|
||||
#include "../PowerPC.h"
|
||||
#include "../../Core.h"
|
||||
#include "../../HW/GPFifo.h"
|
||||
#include "../../HW/CommandProcessor.h"
|
||||
#include "../../HW/PixelEngine.h"
|
||||
#include "../../HW/Memmap.h"
|
||||
#include "../PPCTables.h"
|
||||
#include "x64Emitter.h"
|
||||
#include "ABI.h"
|
||||
|
||||
#include "Jit.h"
|
||||
#include "JitCache.h"
|
||||
#include "JitAsm.h"
|
||||
#include "JitRegCache.h"
|
||||
|
||||
void Jit64::JitClearCA()
|
||||
{
|
||||
AND(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(~XER_CA_MASK)); //XER.CA = 0
|
||||
}
|
||||
|
||||
void Jit64::JitSetCA()
|
||||
{
|
||||
OR(32, M(&PowerPC::ppcState.spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1
|
||||
}
|
||||
|
||||
void Jit64::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend)
|
||||
{
|
||||
#ifdef _M_IX86
|
||||
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVZX(32, accessSize, reg_value, MDisp(reg_addr, (u32)Memory::base + offset));
|
||||
#else
|
||||
MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset));
|
||||
#endif
|
||||
if (accessSize == 32)
|
||||
{
|
||||
BSWAP(32, reg_value);
|
||||
}
|
||||
else if (accessSize == 16)
|
||||
{
|
||||
BSWAP(32, reg_value);
|
||||
if (signExtend)
|
||||
SAR(32, R(reg_value), Imm8(16));
|
||||
else
|
||||
SHR(32, R(reg_value), Imm8(16));
|
||||
} else if (signExtend) {
|
||||
// TODO: bake 8-bit into the original load.
|
||||
MOVSX(32, accessSize, reg_value, R(reg_value));
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::SafeLoadRegToEAX(X64Reg reg, int accessSize, s32 offset, bool signExtend)
|
||||
{
|
||||
if (offset)
|
||||
ADD(32, R(reg), Imm32((u32)offset));
|
||||
TEST(32, R(reg), Imm32(0x0C000000));
|
||||
FixupBranch argh = J_CC(CC_Z);
|
||||
switch (accessSize)
|
||||
{
|
||||
case 32: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U32, 1), reg); break;
|
||||
case 16: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U16, 1), reg); break;
|
||||
case 8: ABI_CallFunctionR(thunks.ProtectFunction((void *)&Memory::Read_U8, 1), reg); break;
|
||||
}
|
||||
if (signExtend && accessSize < 32) {
|
||||
// Need to sign extend values coming from the Read_U* functions.
|
||||
MOVSX(32, accessSize, EAX, R(EAX));
|
||||
}
|
||||
FixupBranch arg2 = J();
|
||||
SetJumpTarget(argh);
|
||||
UnsafeLoadRegToReg(reg, EAX, accessSize, 0, signExtend);
|
||||
SetJumpTarget(arg2);
|
||||
}
|
||||
|
||||
void Jit64::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset)
|
||||
{
|
||||
if (accessSize == 8 && reg_value >= 4) {
|
||||
PanicAlert("WARNING: likely incorrect use of UnsafeWriteRegToReg!");
|
||||
}
|
||||
BSWAP(accessSize, reg_value);
|
||||
#ifdef _M_IX86
|
||||
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(accessSize, MDisp(reg_addr, (u32)Memory::base + offset), R(reg_value));
|
||||
#else
|
||||
MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, offset), R(reg_value));
|
||||
#endif
|
||||
}
|
||||
|
||||
// Destroys both arg registers
|
||||
void Jit64::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset)
|
||||
{
|
||||
if (offset)
|
||||
ADD(32, R(reg_addr), Imm32(offset));
|
||||
TEST(32, R(reg_addr), Imm32(0x0C000000));
|
||||
FixupBranch argh = J_CC(CC_Z);
|
||||
switch (accessSize)
|
||||
{
|
||||
case 32: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), reg_value, reg_addr); break;
|
||||
case 16: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U16, 2), reg_value, reg_addr); break;
|
||||
case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), reg_value, reg_addr); break;
|
||||
}
|
||||
FixupBranch arg2 = J();
|
||||
SetJumpTarget(argh);
|
||||
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0);
|
||||
SetJumpTarget(arg2);
|
||||
}
|
||||
|
||||
void Jit64::WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address)
|
||||
{
|
||||
#ifdef _M_X64
|
||||
MOV(accessSize, MDisp(RBX, address & 0x3FFFFFFF), arg);
|
||||
#else
|
||||
MOV(accessSize, M((void*)(Memory::base + (address & Memory::MEMVIEW32_MASK))), arg);
|
||||
#endif
|
||||
}
|
||||
|
||||
void Jit64::WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address)
|
||||
{
|
||||
#ifdef _M_X64
|
||||
MOV(32, R(RAX), Imm32(address));
|
||||
MOVSS(MComplex(RBX, RAX, 1, 0), xmm_reg);
|
||||
#else
|
||||
MOVSS(M((void*)((u32)Memory::base + (address & Memory::MEMVIEW32_MASK))), xmm_reg);
|
||||
#endif
|
||||
}
|
||||
|
||||
void Jit64::ForceSinglePrecisionS(X64Reg xmm) {
|
||||
// Most games don't need these. Zelda requires it though - some platforms get stuck without them.
|
||||
if (jo.accurateSinglePrecision)
|
||||
{
|
||||
CVTSD2SS(xmm, R(xmm));
|
||||
CVTSS2SD(xmm, R(xmm));
|
||||
}
|
||||
}
|
||||
|
||||
void Jit64::ForceSinglePrecisionP(X64Reg xmm) {
|
||||
// Most games don't need these. Zelda requires it though - some platforms get stuck without them.
|
||||
if (jo.accurateSinglePrecision)
|
||||
{
|
||||
CVTPD2PS(xmm, R(xmm));
|
||||
CVTPS2PD(xmm, R(xmm));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue