phew, beyond good and evil fixed in 64-bit (not setting DAZ anymore). plus some cleanup.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@183 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2008-08-12 23:27:36 +00:00
parent ac6d34cd10
commit 70a6054d3c
18 changed files with 97 additions and 116 deletions

View File

@ -59,6 +59,7 @@ void Thunk_Init()
MOV(64, M(saved_gpr_state + 48), R(RSI));
MOV(64, M(saved_gpr_state + 56), R(RDI));
#endif
MOV(64, M(saved_gpr_state + 64), R(RBX));
#else
MOV(32, M(saved_gpr_state + 0 ), R(RCX));
MOV(32, M(saved_gpr_state + 4 ), R(RDX));
@ -78,6 +79,7 @@ void Thunk_Init()
MOV(64, R(RSI), M(saved_gpr_state + 48));
MOV(64, R(RDI), M(saved_gpr_state + 56));
#endif
MOV(64, R(RBX), M(saved_gpr_state + 64));
#else
MOV(32, R(RCX), M(saved_gpr_state + 0 ));
MOV(32, R(RDX), M(saved_gpr_state + 4 ));
@ -128,7 +130,6 @@ void *ProtectFunction(void *function, int num_params)
#endif
RET();
#else
//INT3();
CALL((void*)save_regs);
// Re-push parameters from previous stack frame
for (int i = 0; i < num_params; i++) {

View File

@ -189,6 +189,7 @@
StringPooling="true"
RuntimeLibrary="0"
BufferSecurityCheck="false"
EnableEnhancedInstructionSet="2"
FloatingPointModel="2"
UsePrecompiledHeader="2"
AssemblerListingLocation="$(IntDir)\"

View File

@ -131,9 +131,19 @@ void CPeripheralInterface::Write32(const u32 _uValue, const u32 _iAddress)
if ((_uValue != 0x80000001) && (_uValue != 0x80000005)) // DVDLowReset
{
TCHAR szTemp[256];
sprintf(szTemp, "Game wants to reset the machine. PI_RESET_CODE: (%08x)", _uValue);
PanicAlert(szTemp);
switch (_uValue) {
case 3:
PanicAlert("Game wants to go to memory card manager. Since BIOS is being HLE:d - can't do that.\n"
"We might pop up a fake memcard manager here and then reset the game in the future :)\n");
break;
default:
{
TCHAR szTemp[256];
sprintf(szTemp, "Game wants to reset the machine. PI_RESET_CODE: (%08x)", _uValue);
PanicAlert(szTemp);
}
break;
}
}
}
break;

View File

@ -54,8 +54,10 @@ void UpdateSSEState(int round, bool daz)
// Also handle denormals as zero (FZ + DAZ)
csr &= ~0x8020;
if (daz)
csr |= 0x8020;
// SETTING DAZ KILLS BEYOND GOOD AND EVIL
// if (daz)
// csr |= 0x8020;
_mm_setcsr(csr);
}
@ -249,7 +251,7 @@ void CInterpreter::mfspr(UGeckoInstruction _inst)
//(or if it's full, not sure)
//MessageBox(NULL, "Read from SPR_WPAR", "????", MB_OK);
//Paper Mario reads here, this should be investigated ... TODO(ector)
bool wpar_empty = false;
bool wpar_empty = true;
if (!wpar_empty)
rSPR(iIndex) |= 1; // BNE = buffer not empty
else

View File

@ -275,7 +275,13 @@ namespace Jit64
static FILE *f = 0;
if (ImHereLog) {
if (!f)
f = fopen("log.txt", "w");
{
#ifdef _M_X64
f = fopen("log64.txt", "w");
#else
f = fopen("log32.txt", "w");
#endif
}
fprintf(f, "%08x\n", PC);
}
if (been_here.find(PC) != been_here.end()) {
@ -288,13 +294,6 @@ namespace Jit64
been_here[PC] = 1;
}
void FlushRegCaches()
{
//Flush allocators
gpr.End();
fpr.End();
}
void WriteExit(u32 destination, int exit_num)
{
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
@ -367,8 +366,7 @@ namespace Jit64
if (js.fpa.any)
{
//This block uses FPU - needs to add FP exception bailout
// TODO(ector): change to large J_CC(CC_Z) when verified that it still works
TEST(32, M(&PowerPC::ppcState.msr), Imm32(1<<13)); //Test FP bit
TEST(32, M(&PowerPC::ppcState.msr), Imm32(1 << 13)); //Test FP enabled bit
FixupBranch b1 = J_CC(CC_NZ);
MOV(32, M(&PC), Imm32(js.blockStart));
JMP(Asm::fpException, true);
@ -382,18 +380,20 @@ namespace Jit64
js.downcountAmount = js.st.numCycles;
js.blockSize = size;
//Okay, let's emit instructions
//Version 1 - Don't do intra branch analysis
// Translate instructions
for (int i = 0; i < (int)size; i++)
{
//gpr.Flush(js.op);
//if (PPCTables::UsesFPU(_inst))
//fpr.Flush(js.op);
// gpr.Flush(FLUSH_ALL);
// if (PPCTables::UsesFPU(_inst))
// fpr.Flush(FLUSH_ALL);
js.compilerPC = ops[i].address;
js.op = &ops[i];
js.instructionNumber = i;
if (i == (int)size - 1) js.isLastInstruction = true;
PPCTables::CompileInstruction(ops[i].inst);
// if (js.isLastInstruction)
PPCTables::CompileInstruction(ops[i].inst);
// else
// Default(ops[i].inst);
gpr.SanityCheck();
fpr.SanityCheck();
}

View File

@ -65,7 +65,7 @@ namespace Jit64
struct JitOptions
{
bool optimizeStack;
bool noAssumeFPLoadFromMem;
bool assumeFPLoadFromMem;
bool enableBlocklink;
bool fpAccurateFlags;
bool enableFastMem;
@ -84,8 +84,6 @@ namespace Jit64
void HLEFunction(UGeckoInstruction _inst);
void FlushRegCaches();
void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false);
void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0);
void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset, bool signExtend = false);
@ -129,6 +127,7 @@ namespace Jit64
void fp_arith_s(UGeckoInstruction inst);
void fcmpx(UGeckoInstruction inst);
void fmrx(UGeckoInstruction inst);
void cmpli(UGeckoInstruction inst);
void cmpi(UGeckoInstruction inst);

View File

@ -83,7 +83,7 @@ namespace Jit64
#else
jo.enableFastMem = false;
#endif
jo.noAssumeFPLoadFromMem = false;
jo.assumeFPLoadFromMem = true;
jo.fpAccurateFlags = true;
codeCache = (u8*)AllocateExecutableMemory(CODE_SIZE);
@ -216,7 +216,7 @@ namespace Jit64
if (!blocks)
return -1;
u32 code = Memory::ReadFast32(addr);
if ((code>>26) == JIT_OPCODE)
if ((code >> 26) == JIT_OPCODE)
{
//jitted code
unsigned int blockNum = code & 0x03FFFFFF;

View File

@ -173,29 +173,6 @@ namespace Jit64
}
}
void GPRRegCache::GetReadyForOp(int dest, int source)
{
if (regs[dest].location.CanDoOpWith(regs[source].location))
return;
LoadToX64(dest);
if (!regs[dest].location.CanDoOpWith(regs[source].location))
{
_assert_msg_(DYNA_REC, 0, "GetReadyForOp failed");
}
}
void FPURegCache::GetReadyForOp(int dest, int source)
{
if (regs[dest].location.IsSimpleReg())
return;
LoadToX64(dest); //all fp ops have reg as destination
if (!regs[dest].location.CanDoOpWith(regs[source].location))
{
_assert_msg_(DYNA_REC, 0, "GetReadyForOp failed");
}
}
bool GPRRegCache::IsXRegVolatile(X64Reg reg) const
{
#ifdef _WIN32

View File

@ -87,7 +87,6 @@ namespace Jit64
}
virtual void Flush(FlushMode mode);
virtual void Flush(PPCAnalyst::CodeOp *op) {Flush(FLUSH_ALL);}
void End() {Flush(FLUSH_ALL);}
void SanityCheck() const;
void KillImmediate(int preg);
@ -98,8 +97,6 @@ namespace Jit64
virtual void LoadToX64(int preg, bool doLoad = true, bool makeDirty = true) = 0;
virtual void StoreFromX64(int preg) = 0;
virtual void GetReadyForOp(int dest, int source) = 0;
const OpArg &R(int preg) const {return regs[preg].location;}
X64Reg RX(int preg) const
{
@ -132,7 +129,6 @@ namespace Jit64
void Start(PPCAnalyst::BlockRegStats &stats);
void LoadToX64(int preg, bool doLoad = true, bool makeDirty = true);
void StoreFromX64(int preg);
void GetReadyForOp(int dest, int source);
OpArg GetDefaultLocation(int reg) const;
const int *GetAllocationOrder(int &count);
bool IsXRegVolatile(X64Reg reg) const;
@ -148,7 +144,6 @@ namespace Jit64
void StoreFromX64(int preg);
const int *GetAllocationOrder(int &count);
bool IsXRegVolatile(X64Reg reg) const;
void GetReadyForOp(int dest, int source);
OpArg GetDefaultLocation(int reg) const;
};

View File

@ -41,13 +41,15 @@ namespace Jit64
{
void sc(UGeckoInstruction _inst)
{
FlushRegCaches();
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
WriteExceptionExit(EXCEPTION_SYSCALL);
}
void rfi(UGeckoInstruction _inst)
{
FlushRegCaches();
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
//Bits SRR1[0, 5-9, 16-23, 25-27, 30-31] are placed into the corresponding bits of the MSR.
//MSR[13] is set to 0.
const int mask = 0x87C0FF73;
@ -69,7 +71,8 @@ namespace Jit64
{
if (inst.LK)
MOV(32, M(&LR), Imm32(js.compilerPC + 4));
FlushRegCaches();
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
if (js.isLastInstruction)
{
@ -98,7 +101,8 @@ namespace Jit64
{
_assert_msg_(DYNA_REC, js.isLastInstruction, "bcx not last instruction of block");
FlushRegCaches();
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
CCFlags branch;
@ -178,7 +182,8 @@ namespace Jit64
void bcctrx(UGeckoInstruction inst)
{
FlushRegCaches();
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
bool fastway = true;
@ -216,7 +221,8 @@ namespace Jit64
void bclrx(UGeckoInstruction inst)
{
FlushRegCaches();
gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
//Special case BLR
if (inst.hex == 0x4e800020)
{

View File

@ -27,7 +27,7 @@
#ifdef _WIN32
#define INSTRUCTION_START
//#define INSTRUCTION_START Default(inst); return;
// #define INSTRUCTION_START Default(inst); return;
#else
#define INSTRUCTION_START Default(inst); return;
#endif
@ -43,12 +43,12 @@ namespace Jit64
fpr.Lock(d, a, b);
if (d == a)
{
fpr.GetReadyForOp(d, b);
fpr.LoadToX64(d, true);
op(fpr.RX(d), fpr.R(b));
}
else if (d == b && reversible)
{
fpr.GetReadyForOp(d, a);
fpr.LoadToX64(d, true);
op(fpr.RX(d), fpr.R(a));
}
else if (a != d && b != d)
@ -56,7 +56,6 @@ namespace Jit64
// Sources different from d, can use rather quick solution
fpr.LoadToX64(d, !dupe);
MOVSD(fpr.RX(d), fpr.R(a));
fpr.GetReadyForOp(d, b);
op(fpr.RX(d), fpr.R(b));
}
else if (b != d)
@ -90,10 +89,10 @@ namespace Jit64
case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &DIVSD); break; //div
case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &SUBSD); break; //sub
case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, dupe, &ADDSD); break; //add
case 23://sel
case 23: //sel
Default(inst);
break;
case 24://res
case 24: //res
Default(inst);
break;
case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, dupe, &MULSD); break; //mul
@ -110,7 +109,7 @@ namespace Jit64
int c = inst.FC;
int d = inst.FD;
fpr.Lock(a,b,c,d);
fpr.Lock(a, b, c, d);
MOVSD(XMM0, fpr.R(a));
switch (inst.SUBOP5)
{
@ -144,9 +143,10 @@ namespace Jit64
void fmrx(UGeckoInstruction inst)
{
INSTRUCTION_START;
Default(inst); return;
int d = inst.FD;
int b = inst.FB;
fpr.LoadToX64(d, true); // we don't want to destroy the high bit
MOVSD(fpr.RX(d), fpr.R(b));
}
void fcmpx(UGeckoInstruction inst)

View File

@ -45,7 +45,7 @@ namespace Jit64
void regimmop(int d, int a, bool binary, u32 value, Operation doop, void(*op)(int, const OpArg&, const OpArg&), bool Rc = false, bool carry = false)
{
gpr.Lock(d,a);
gpr.Lock(d, a);
if (a || binary || carry) // yeh nasty special case addic
{
if (a == d)
@ -72,8 +72,9 @@ namespace Jit64
GenerateCarry(EAX);
}
}
else if (doop == Add && !carry)
else if (doop == Add)
{
// a == 0, which for these instructions imply value = 0
gpr.SetImmediate32(d, value);
}
else
@ -82,6 +83,7 @@ namespace Jit64
}
if (Rc)
{
// Todo - special case immediates.
MOV(32, R(EAX), gpr.R(d));
CALL((u8*)Asm::computeRc);
}
@ -97,10 +99,10 @@ namespace Jit64
case 14: regimmop(d, a, false, (u32)(s32)inst.SIMM_16, Add, ADD); break; //addi
case 15: regimmop(d, a, false, (u32)inst.SIMM_16 << 16, Add, ADD); break; //addis
case 24:
if (a == 0 && s == 0 && inst.UIMM == 0) //check for nop
{NOP(); return;} //make the nop visible.. or turn to int3? we shouldn't get nops
if (a == 0 && s == 0 && inst.UIMM == 0 && !inst.Rc) //check for nop
{NOP(); return;} //make the nop visible in the generated code. not much use but interesting if we see one.
regimmop(a, s, true, inst.UIMM, Or, OR);
break;//ori
break; //ori
case 25: regimmop(a, s, true, inst.UIMM << 16, Or, OR, false); break;//oris
case 28: regimmop(a, s, true, inst.UIMM, And, AND, true); break;
case 29: regimmop(a, s, true, inst.UIMM << 16, And, AND, true); break;
@ -289,7 +291,8 @@ namespace Jit64
void extsbx(UGeckoInstruction inst)
{
INSTRUCTION_START;
int a = inst.RA, s = inst.RS;
int a = inst.RA,
s = inst.RS;
gpr.LoadToX64(a, a == s, true);
gpr.KillImmediate(s);
MOV(32, R(EAX), gpr.R(s));
@ -319,14 +322,11 @@ namespace Jit64
int a = inst.RA, d = inst.RD;
gpr.FlushLockX(ECX);
gpr.Lock(a, d);
if (a != d)
gpr.LoadToX64(d, false, true);
else
gpr.LoadToX64(a, true, true);
gpr.LoadToX64(d, a == d, true);
int imm = inst.SIMM_16;
MOV(32, R(EAX), gpr.R(a));
NOT(32, R(EAX));
ADD(32, R(EAX), Imm32(imm+1));
ADD(32, R(EAX), Imm32(imm + 1));
MOV(32, gpr.R(d), R(EAX));
GenerateCarry(ECX);
gpr.UnlockAll();
@ -571,7 +571,7 @@ namespace Jit64
{
SHL(32, gpr.R(a), Imm8(inst.SH));
}
else if (inst.ME==31 && inst.MB==32-inst.SH)
else if (inst.ME == 31 && inst.MB == 32 - inst.SH)
{
SHR(32, gpr.R(a), Imm8(inst.MB));
}
@ -732,7 +732,6 @@ namespace Jit64
}
}
// another crazy instruction :(
void srawix(UGeckoInstruction inst)
{
INSTRUCTION_START;
@ -775,6 +774,7 @@ namespace Jit64
}
}
// count leading zeroes
void cntlzwx(UGeckoInstruction inst)
{
INSTRUCTION_START;
@ -791,14 +791,14 @@ namespace Jit64
FixupBranch gotone = J_CC(CC_NZ);
MOV(32, gpr.R(a), Imm32(63));
SetJumpTarget(gotone);
XOR(32, gpr.R(a), Imm8(0x1f)); //flip order
XOR(32, gpr.R(a), Imm8(0x1f)); // flip order
gpr.UnlockAll();
if (inst.Rc)
{
MOV(32, R(EAX), gpr.R(a));
CALL((u8*)Asm::computeRc);
//Check PPC manual too
// TODO: Check PPC manual too
}
}

View File

@ -110,12 +110,12 @@ namespace Jit64
if (offset)
ADD(32, R(reg_addr), Imm32(offset));
TEST(32, R(reg_addr), Imm32(0x0C000000));
FixupBranch argh = J_CC(CC_NZ);
FixupBranch unsafe_addr = J_CC(CC_NZ);
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0);
FixupBranch arg2 = J();
SetJumpTarget(argh);
FixupBranch skip_call = J();
SetJumpTarget(unsafe_addr);
ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2);
SetJumpTarget(arg2);
SetJumpTarget(skip_call);
}
void lbzx(UGeckoInstruction inst)
@ -259,11 +259,7 @@ namespace Jit64
int s = inst.RS;
int a = inst.RA;
bool update = false;
if (inst.OPCD & 1)
{
update = true;
}
bool update = inst.OPCD & 1;
s32 offset = (s32)(s16)inst.SIMM_16;
if (a || update)
@ -356,7 +352,7 @@ namespace Jit64
MOV(32, gpr.R(a), R(ABI_PARAM2));
}
TEST(32, R(ABI_PARAM2), Imm32(0x0C000000));
FixupBranch argh = J_CC(CC_NZ);
FixupBranch unsafe_addr = J_CC(CC_NZ);
if (accessSize == 32)
BSWAP(32, ABI_PARAM1);
else if (accessSize == 16)
@ -371,15 +367,15 @@ namespace Jit64
AND(32, R(ABI_PARAM2), Imm32(Memory::MEMVIEW32_MASK));
MOV(accessSize, MDisp(ABI_PARAM2, (u32)Memory::base), R(ABI_PARAM1));
#endif
FixupBranch arg2 = J();
SetJumpTarget(argh);
FixupBranch skip_call = J();
SetJumpTarget(unsafe_addr);
switch (accessSize)
{
case 32: ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2); break;
case 16: ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U16, 2), ABI_PARAM1, ABI_PARAM2); break;
case 8: ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U8, 2), ABI_PARAM1, ABI_PARAM2); break;
}
SetJumpTarget(arg2);
SetJumpTarget(skip_call);
gpr.UnlockAll();
gpr.UnlockAllX();
}
@ -389,12 +385,13 @@ namespace Jit64
}
}
// A few games use these heavily.
// A few games use these heavily in video codecs.
void lmw(UGeckoInstruction inst)
{
INSTRUCTION_START;
Default(inst);
return;
/*
/// BUGGY
//return _inst.RA ? (m_GPR[_inst.RA] + _inst.SIMM_16) : _inst.SIMM_16;
gpr.Flush(FLUSH_ALL);
@ -413,7 +410,7 @@ namespace Jit64
ADD(32, R(ECX), Imm8(1));
CMP(32, R(ECX), Imm8(32));
J_CC(CC_NE, loopPtr, false);
gpr.UnlockAllX();
gpr.UnlockAllX();*/
}
void stmw(UGeckoInstruction inst)

View File

@ -76,7 +76,7 @@ void lfs(UGeckoInstruction inst)
gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(a);
MOV(32, R(ABI_PARAM1), gpr.R(a));
if (!jo.noAssumeFPLoadFromMem)
if (jo.assumeFPLoadFromMem)
{
UnsafeLoadRegToReg(ABI_PARAM1, EAX, 32, offset, false);
}
@ -174,7 +174,6 @@ void stfd(UGeckoInstruction inst)
void stfs(UGeckoInstruction inst)
{
INSTRUCTION_START;
DISABLE_32BIT;
bool update = inst.OPCD & 1;
int s = inst.RS;
int a = inst.RA;

View File

@ -37,7 +37,7 @@
#include "JitAsm.h"
#include "JitRegCache.h"
//#define INSTRUCTION_START Default(inst); return;
// #define INSTRUCTION_START Default(inst); return;
#define INSTRUCTION_START
#ifdef _M_IX86

View File

@ -114,12 +114,12 @@ namespace Jit64
if (d == a)
{
fpr.GetReadyForOp(d, b);
fpr.LoadToX64(d, true);
op(fpr.RX(d), fpr.R(b));
}
else if (d == b && reversible)
{
fpr.GetReadyForOp(d, a);
fpr.LoadToX64(d, true);
op(fpr.RX(d), fpr.R(a));
}
else if (a != d && b != d)
@ -127,7 +127,6 @@ namespace Jit64
//sources different from d, can use rather quick solution
fpr.LoadToX64(d, false);
MOVAPD(fpr.RX(d), fpr.R(a));
fpr.GetReadyForOp(d, b);
op(fpr.RX(d), fpr.R(b));
}
else if (b != d)

View File

@ -27,14 +27,8 @@
#include "JitCache.h"
#include "JitRegCache.h"
// #define INSTRUCTION_START Default(inst); return;
#define INSTRUCTION_START
#ifdef _M_IX86
#define DISABLE_32BIT Default(inst); return;
#else
#define DISABLE_32BIT ;
#endif
// #define INSTRUCTION_START Default(inst); return;
namespace Jit64
{
@ -83,6 +77,7 @@ namespace Jit64
int d = inst.RD;
switch (iIndex)
{
// case SPR_DEC:
//MessageBox(NULL, "Read from DEC", "????", MB_OK);
//break;

View File

@ -425,7 +425,7 @@ GekkoOPTemplate table63[] =
{0, CInterpreter::fcmpu, Jit64::fcmpx, {"fcmpu", OPTYPE_FPU, FL_RC_BIT_F}},
{14, CInterpreter::fctiwx, Jit64::Default, {"fctiwx", OPTYPE_FPU, FL_RC_BIT_F}},
{15, CInterpreter::fctiwzx, Jit64::Default, {"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}},
{72, CInterpreter::fmrx, Jit64::Default, {"fmrx", OPTYPE_FPU, FL_RC_BIT_F}},
{72, CInterpreter::fmrx, Jit64::fmrx, {"fmrx", OPTYPE_FPU, FL_RC_BIT_F}},
{136, CInterpreter::fnabsx, Jit64::Default, {"fnabsx", OPTYPE_FPU, FL_RC_BIT_F}},
{40, CInterpreter::fnegx, Jit64::Default, {"fnegx", OPTYPE_FPU, FL_RC_BIT_F}},
{12, CInterpreter::frspx, Jit64::Default, {"frspx", OPTYPE_FPU, FL_RC_BIT_F}},