Lots of various changes. CPU detect fix. Maybe a minor speed increase. CPU bugs remain.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@180 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2008-08-12 20:05:45 +00:00
parent 29102ecbc6
commit 575bdd9166
27 changed files with 400 additions and 192 deletions

View File

@ -659,6 +659,14 @@
RelativePath=".\Src\Thread.h" RelativePath=".\Src\Thread.h"
> >
</File> </File>
<File
RelativePath=".\Src\Thunk.cpp"
>
</File>
<File
RelativePath=".\Src\Thunk.h"
>
</File>
<File <File
RelativePath=".\Src\Timer.cpp" RelativePath=".\Src\Timer.cpp"
> >

View File

@ -28,7 +28,7 @@
// * Caller fixes stack after call // * Caller fixes stack after call
// * function subtract from stack for local storage only. // * function subtract from stack for local storage only.
// Scratch: EAX ECX EDX // Scratch: EAX ECX EDX
// Callee-save: EBX ESI EDI EBP // Callee-save: EBX ESI EDI EBP
// Parameters: - // Parameters: -
// Windows 64-bit // Windows 64-bit
@ -103,6 +103,11 @@ void ABI_PopAllCalleeSavedRegsAndAdjustStack();
void ABI_PushAllCallerSavedRegsAndAdjustStack(); void ABI_PushAllCallerSavedRegsAndAdjustStack();
void ABI_PopAllCallerSavedRegsAndAdjustStack(); void ABI_PopAllCallerSavedRegsAndAdjustStack();
#ifdef _M_IX86
inline int ABI_GetNumXMMRegs() { return 8; }
#else
inline int ABI_GetNumXMMRegs() { return 16; }
#endif
#endif // _JIT_ABI_H #endif // _JIT_ABI_H

View File

@ -21,7 +21,7 @@
//#include <config/i386/cpuid.h> //#include <config/i386/cpuid.h>
#include <xmmintrin.h> #include <xmmintrin.h>
void __cpuid(int info[4], int x) {} void __cpuid(int info[4], int x) {memset(info, 0, sizeof(info));}
#endif #endif
@ -72,94 +72,94 @@ void CPUInfoStruct::Detect()
isAMD = true; isAMD = true;
} }
// Get the information associated with each valid Id if (nIds >= 2)
for (unsigned int i = 0; i <= nIds; ++i)
{ {
__cpuid(CPUInfo, i); // Get the information associated with each valid Id
__cpuid(CPUInfo, 1);
nSteppingID = CPUInfo[0] & 0xf;
nModel = (CPUInfo[0] >> 4) & 0xf;
nFamily = (CPUInfo[0] >> 8) & 0xf;
nProcessorType = (CPUInfo[0] >> 12) & 0x3;
nExtendedmodel = (CPUInfo[0] >> 16) & 0xf;
nExtendedfamily = (CPUInfo[0] >> 20) & 0xff;
nBrandIndex = CPUInfo[1] & 0xff;
nCLFLUSHcachelinesize = ((CPUInfo[1] >> 8) & 0xff) * 8;
nAPICPhysicalID = (CPUInfo[1] >> 24) & 0xff;
bSSE3 = (CPUInfo[2] & 0x1) || false;
bSSSE3 = (CPUInfo[2] & 0x200) || false;
bMONITOR_MWAIT = (CPUInfo[2] & 0x8) || false;
bCPLQualifiedDebugStore = (CPUInfo[2] & 0x10) || false;
bThermalMonitor2 = (CPUInfo[2] & 0x100) || false;
nFeatureInfo = CPUInfo[3];
// Interpret CPU feature information. if (CPUInfo[2] & (1 << 23))
if (i == 1)
{ {
nSteppingID = CPUInfo[0] & 0xf; bPOPCNT = true;
nModel = (CPUInfo[0] >> 4) & 0xf; }
nFamily = (CPUInfo[0] >> 8) & 0xf;
nProcessorType = (CPUInfo[0] >> 12) & 0x3;
nExtendedmodel = (CPUInfo[0] >> 16) & 0xf;
nExtendedfamily = (CPUInfo[0] >> 20) & 0xff;
nBrandIndex = CPUInfo[1] & 0xff;
nCLFLUSHcachelinesize = ((CPUInfo[1] >> 8) & 0xff) * 8;
nAPICPhysicalID = (CPUInfo[1] >> 24) & 0xff;
bSSE3NewInstructions = (CPUInfo[2] & 0x1) || false;
bSSSE3NewInstructions = (CPUInfo[2] & 0x200) || false;
bMONITOR_MWAIT = (CPUInfo[2] & 0x8) || false;
bCPLQualifiedDebugStore = (CPUInfo[2] & 0x10) || false;
bThermalMonitor2 = (CPUInfo[2] & 0x100) || false;
nFeatureInfo = CPUInfo[3];
if (CPUInfo[2] & (1 << 23)) if (CPUInfo[2] & (1 << 19))
{ {
bPOPCNT = true; bSSE4_1 = true;
} }
if (CPUInfo[2] & (1 << 19)) if (CPUInfo[2] & (1 << 20))
{ {
bSSE4_1 = true; bSSE4_2 = true;
}
if (CPUInfo[2] & (1 << 20))
{
bSSE4_2 = true;
}
} }
} }
// Calling __cpuid with 0x80000000 as the InfoType argument if (bSSE3)
// gets the number of valid extended IDs.
__cpuid(CPUInfo, 0x80000000);
nExIds = CPUInfo[0];
memset(CPUBrandString, 0, sizeof(CPUBrandString));
// Get the information associated with each extended ID.
for (unsigned int i = 0x80000000; i <= nExIds; ++i)
{ {
__cpuid(CPUInfo, i); // Only SSE3 CPU-s support extended infotypes
// Calling __cpuid with 0x80000000 as the InfoType argument
// gets the number of valid extended IDs.
__cpuid(CPUInfo, 0x80000000);
nExIds = CPUInfo[0];
memset(CPUBrandString, 0, sizeof(CPUBrandString));
// Interpret CPU brand string and cache information. // Get the information associated with each extended ID.
if (i == 0x80000001) for (unsigned int i = 0x80000000; i <= nExIds; ++i)
{ {
// This block seems bugged. __cpuid(CPUInfo, i);
nFeatureInfo2 = CPUInfo[1]; // ECX
bSSE5 = (nFeatureInfo2 & (1 << 11)) ? true : false;
bLZCNT = (nFeatureInfo2 & (1 << 5)) ? true : false;
bSSE4A = (nFeatureInfo2 & (1 << 6)) ? true : false;
bLAHFSAHF64 = (nFeatureInfo2 & (1 << 0)) ? true : false;
CPU64bit = (CPUInfo[2] & (1 << 29)) ? true : false; // Interpret CPU brand string and cache information.
} if (i == 0x80000001)
else if (i == 0x80000002) {
{ // This block seems bugged.
memcpy(CPUBrandString, CPUInfo, sizeof(CPUInfo)); nFeatureInfo2 = CPUInfo[1]; // ECX
} bSSE5 = (nFeatureInfo2 & (1 << 11)) ? true : false;
else if (i == 0x80000003) bLZCNT = (nFeatureInfo2 & (1 << 5)) ? true : false;
{ bSSE4A = (nFeatureInfo2 & (1 << 6)) ? true : false;
memcpy(CPUBrandString + 16, CPUInfo, sizeof(CPUInfo)); bLAHFSAHF64 = (nFeatureInfo2 & (1 << 0)) ? true : false;
}
else if (i == 0x80000004) CPU64bit = (CPUInfo[2] & (1 << 29)) ? true : false;
{ }
memcpy(CPUBrandString + 32, CPUInfo, sizeof(CPUInfo)); else if (i == 0x80000002)
} {
else if (i == 0x80000006) memcpy(CPUBrandString, CPUInfo, sizeof(CPUInfo));
{ }
nCacheLineSize = CPUInfo[2] & 0xff; else if (i == 0x80000003)
nL2Associativity = (CPUInfo[2] >> 12) & 0xf; {
nCacheSizeK = (CPUInfo[2] >> 16) & 0xffff; memcpy(CPUBrandString + 16, CPUInfo, sizeof(CPUInfo));
} }
else if (i == 0x80000008) else if (i == 0x80000004)
{ {
int numLSB = (CPUInfo[2] >> 12) & 0xF; memcpy(CPUBrandString + 32, CPUInfo, sizeof(CPUInfo));
numCores = 1 << numLSB; }
//int coresPerDie = CPUInfo[2] & 0xFF; else if (i == 0x80000006)
// numCores = coresPerDie; {
nCacheLineSize = CPUInfo[2] & 0xff;
nL2Associativity = (CPUInfo[2] >> 12) & 0xf;
nCacheSizeK = (CPUInfo[2] >> 16) & 0xffff;
}
else if (i == 0x80000008)
{
int numLSB = (CPUInfo[2] >> 12) & 0xF;
numCores = 1 << numLSB;
//int coresPerDie = CPUInfo[2] & 0xFF;
// numCores = coresPerDie;
}
} }
} }
@ -222,9 +222,9 @@ void CPUInfoStruct::Detect()
nIds <<= 1; nIds <<= 1;
bFXSAVE_FXRSTOR = (nFeatureInfo & nIds) ? true : false; bFXSAVE_FXRSTOR = (nFeatureInfo & nIds) ? true : false;
nIds <<= 1; nIds <<= 1;
bSSEExtensions = (nFeatureInfo & nIds) ? true : false; bSSE = (nFeatureInfo & nIds) ? true : false;
nIds <<= 1; nIds <<= 1;
bSSE2Extensions = (nFeatureInfo & nIds) ? true : false; bSSE2 = (nFeatureInfo & nIds) ? true : false;
nIds <<= 1; nIds <<= 1;
bSelfSnoop = (nFeatureInfo & nIds) ? true : false; bSelfSnoop = (nFeatureInfo & nIds) ? true : false;
nIds <<= 1; nIds <<= 1;

View File

@ -77,16 +77,16 @@ struct CPUInfoStruct
bool bThermalMonitorandClockCtrl; bool bThermalMonitorandClockCtrl;
bool bMMXTechnology; bool bMMXTechnology;
bool bFXSAVE_FXRSTOR; bool bFXSAVE_FXRSTOR;
bool bSSEExtensions;
bool bSSE2Extensions;
bool bSSE3NewInstructions;
bool bSSSE3NewInstructions;
bool bSelfSnoop; bool bSelfSnoop;
bool bHyper_threadingTechnology; bool bHyper_threadingTechnology;
bool bThermalMonitor; bool bThermalMonitor;
bool bUnknown4; bool bUnknown4;
bool bPendBrkEN; bool bPendBrkEN;
bool bSSE;
bool bSSE2;
bool bSSE3;
bool bSSSE3;
bool bPOPCNT; bool bPOPCNT;
bool bSSE4_1; bool bSSE4_1;
bool bSSE4_2; bool bSSE4_2;

View File

@ -17,6 +17,7 @@ files = ["ABI.cpp",
"PortableSockets.cpp", "PortableSockets.cpp",
"StringUtil.cpp", "StringUtil.cpp",
"TestFramework.cpp", "TestFramework.cpp",
"Thunk.cpp",
"Timer.cpp", "Timer.cpp",
"Thread.cpp", "Thread.cpp",
"x64Emitter.cpp", "x64Emitter.cpp",

View File

@ -0,0 +1,147 @@
// Copyright (C) 2003-2008 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include <map>
#include "Common.h"
#include "Thunk.h"
#include "x64Emitter.h"
#include "MemoryUtil.h"
#include "ABI.h"
using namespace Gen;
#define THUNK_ARENA_SIZE 1024*1024*1
namespace {
static std::map<void *, const u8 *> thunks;
u8 GC_ALIGNED32(saved_fp_state[16 * 4 * 4]);
u8 GC_ALIGNED32(saved_gpr_state[16 * 8]);
}
static u8 *thunk_memory;
static u8 *thunk_code;
static const u8 *save_regs;
static const u8 *load_regs;
u32 saved_return;
void Thunk_Init()
{
thunk_memory = (u8 *)AllocateExecutableMemory(THUNK_ARENA_SIZE);
thunk_code = thunk_memory;
GenContext ctx(&thunk_code);
save_regs = GetCodePtr();
for (int i = 2; i < ABI_GetNumXMMRegs(); i++)
MOVAPS(M(saved_fp_state + i * 16), (X64Reg)(XMM0 + i));
#ifdef _M_X64
MOV(64, M(saved_gpr_state + 0 ), R(RCX));
MOV(64, M(saved_gpr_state + 8 ), R(RDX));
MOV(64, M(saved_gpr_state + 16), R(R8) );
MOV(64, M(saved_gpr_state + 24), R(R9) );
MOV(64, M(saved_gpr_state + 32), R(R10));
MOV(64, M(saved_gpr_state + 40), R(R11));
#ifndef _WIN32
MOV(64, M(saved_gpr_state + 48), R(RSI));
MOV(64, M(saved_gpr_state + 56), R(RDI));
#endif
#else
MOV(32, M(saved_gpr_state + 0 ), R(RCX));
MOV(32, M(saved_gpr_state + 4 ), R(RDX));
#endif
RET();
load_regs = GetCodePtr();
for (int i = 2; i < ABI_GetNumXMMRegs(); i++)
MOVAPS((X64Reg)(XMM0 + i), M(saved_fp_state + i * 16));
#ifdef _M_X64
MOV(64, R(RCX), M(saved_gpr_state + 0 ));
MOV(64, R(RDX), M(saved_gpr_state + 8 ));
MOV(64, R(R8) , M(saved_gpr_state + 16));
MOV(64, R(R9) , M(saved_gpr_state + 24));
MOV(64, R(R10), M(saved_gpr_state + 32));
MOV(64, R(R11), M(saved_gpr_state + 40));
#ifndef _WIN32
MOV(64, R(RSI), M(saved_gpr_state + 48));
MOV(64, R(RDI), M(saved_gpr_state + 56));
#endif
#else
MOV(32, R(RCX), M(saved_gpr_state + 0 ));
MOV(32, R(RDX), M(saved_gpr_state + 4 ));
#endif
RET();
}
void Thunk_Reset()
{
thunks.clear();
thunk_code = thunk_memory;
}
void Thunk_Shutdown()
{
Thunk_Reset();
FreeMemoryPages(thunk_memory, THUNK_ARENA_SIZE);
thunk_memory = 0;
thunk_code = 0;
}
void *ProtectFunction(void *function, int num_params)
{
std::map<void *, const u8 *>::iterator iter;
iter = thunks.find(function);
if (iter != thunks.end())
return (void *)iter->second;
if (!thunk_memory)
PanicAlert("Trying to protect functions before the emu is started. Bad bad bad.");
GenContext gen(&thunk_code);
const u8 *call_point = GetCodePtr();
// Make sure to align stack.
#ifdef _M_X64
#ifdef _WIN32
SUB(64, R(ESP), Imm8(0x28));
#else
SUB(64, R(ESP), Imm8(0x8));
#endif
CALL((void*)save_regs);
CALL((void*)function);
CALL((void*)load_regs);
#ifdef _WIN32
ADD(64, R(ESP), Imm8(0x28));
#else
ADD(64, R(ESP), Imm8(0x8));
#endif
RET();
#else
//INT3();
CALL((void*)save_regs);
// Re-push parameters from previous stack frame
for (int i = 0; i < num_params; i++) {
// ESP is changing, so we do not need i
PUSH(32, MDisp(ESP, (num_params) * 4));
}
CALL(function);
if (num_params)
ADD(32, R(ESP), Imm8(num_params * 4));
CALL((void*)load_regs);
RET();
#endif
thunks[function] = call_point;
return (void *)call_point;
}

View File

@ -0,0 +1,39 @@
// Copyright (C) 2003-2008 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef _THUNK_H
#define _THUNK_H
// This simple class creates a wrapper around a C/C++ function that saves all fp state
// before entering it, and restores it upon exit. This is required to be able to selectively
// call functions from generated code, without inflicting the performance hit and increase
// of complexity that it means to protect the generated code from this problem.
// This process is called thunking.
// There will only ever be one level of thunking on the stack, plus,
// we don't want to pollute the stack, so we store away regs somewhere global.
// NOT THREAD SAFE. This may only be used from the CPU thread.
// Any other thread using this stuff will be FATAL.
void Thunk_Init();
void Thunk_Reset();
void Thunk_Shutdown();
void *ProtectFunction(void *function, int num_params);
#endif

View File

@ -1156,7 +1156,7 @@ namespace Gen
void MOVDDUP(X64Reg regOp, OpArg arg) void MOVDDUP(X64Reg regOp, OpArg arg)
{ {
// TODO(ector): check SSE3 flag // TODO(ector): check SSE3 flag
if (cpu_info.bSSE3NewInstructions) if (cpu_info.bSSE3)
{ {
WriteSSEOp(64, 0x12, false, regOp, arg); //SSE3 WriteSSEOp(64, 0x12, false, regOp, arg); //SSE3
} }
@ -1205,7 +1205,7 @@ namespace Gen
} }
void PSHUFB(X64Reg dest, OpArg arg) { void PSHUFB(X64Reg dest, OpArg arg) {
if (!cpu_info.bSSE3NewInstructions) { if (!cpu_info.bSSSE3) {
PanicAlert("Trying to use PSHUFB on a system that doesn't support it. Bad programmer."); PanicAlert("Trying to use PSHUFB on a system that doesn't support it. Bad programmer.");
} }
Write8(0x66); Write8(0x66);

View File

@ -14,6 +14,9 @@
// Official SVN repository and contact information can be found at // Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/ // http://code.google.com/p/dolphin-emu/
// WARNING - THIS LIBRARY IS NOT THREAD SAFE!!!
#ifndef _DOLPHIN_INTEL_CODEGEN #ifndef _DOLPHIN_INTEL_CODEGEN
#define _DOLPHIN_INTEL_CODEGEN #define _DOLPHIN_INTEL_CODEGEN
@ -92,6 +95,26 @@ namespace Gen
const u8 *GetCodePtr(); const u8 *GetCodePtr();
u8 *GetWritableCodePtr(); u8 *GetWritableCodePtr();
// Safe way to temporarily redirect the code generator.
class GenContext
{
u8 **code_ptr_ptr;
u8 *saved_ptr;
public:
GenContext(u8 **code_ptr_ptr_)
{
saved_ptr = GetWritableCodePtr();
code_ptr_ptr = code_ptr_ptr_;
SetCodePtr(*code_ptr_ptr);
}
~GenContext()
{
*code_ptr_ptr = GetWritableCodePtr();
SetCodePtr(saved_ptr);
}
};
enum NormalOp { enum NormalOp {
nrmADD, nrmADD,
nrmADC, nrmADC,

View File

@ -26,6 +26,7 @@
#include "Console.h" #include "Console.h"
#include "Core.h" #include "Core.h"
#include "CPUDetect.h"
#include "CoreTiming.h" #include "CoreTiming.h"
#include "Boot/Boot.h" #include "Boot/Boot.h"
#include "PatchEngine.h" #include "PatchEngine.h"
@ -140,7 +141,9 @@ bool Init(const SCoreStartupParameter _CoreParameter)
// all right ... here we go // all right ... here we go
Host_SetWaitCursor(false); Host_SetWaitCursor(false);
DisplayMessage("Emulation started.", 3000); DisplayMessage(cpu_info.CPUBrandString, 3000);
DisplayMessage(_CoreParameter.m_strFilename, 3000);
//RegisterPanicAlertHandler(PanicAlertToVideo); //RegisterPanicAlertHandler(PanicAlertToVideo);

View File

@ -16,6 +16,7 @@
// http://code.google.com/p/dolphin-emu/ // http://code.google.com/p/dolphin-emu/
#include "Common.h" #include "Common.h"
#include "Thunk.h"
#include "../Core.h" #include "../Core.h"
#include "HW.h" #include "HW.h"
#include "../PowerPC/PowerPC.h" #include "../PowerPC/PowerPC.h"
@ -42,6 +43,7 @@ namespace HW
{ {
void Init() void Init()
{ {
Thunk_Init(); // not really hw, but this way we know it's inited first :P
// Init the whole Hardware // Init the whole Hardware
PixelEngine::Init(); PixelEngine::Init();
CommandProcessor::Init(); CommandProcessor::Init();
@ -72,5 +74,6 @@ namespace HW
WII_IPC_HLE_Interface::Shutdown(); WII_IPC_HLE_Interface::Shutdown();
WII_IPCInterface::Shutdown(); WII_IPCInterface::Shutdown();
Thunk_Shutdown();
} }
} }

View File

@ -1054,10 +1054,14 @@ void SDRUpdated()
u32 CheckDTLB(u32 _Address, XCheckTLBFlag _Flag) u32 CheckDTLB(u32 _Address, XCheckTLBFlag _Flag)
{ {
PanicAlert("TLB: %s unknown memory (0x%08x)\n" PanicAlert("TLB: %s unknown memory (0x%08x)\n"
"This is either the game crashing randomly, or a TLB write."
"Several games uses the TLB to map memory. This\n" "Several games uses the TLB to map memory. This\n"
"function is not support in dolphin. Cheers!", "function is not supported in Dolphin. "
"Also, unfortunately there is no way to recover from this error,"
"so Dolphin will now exit abruptly. Sorry!",
_Flag == FLAG_WRITE ? "Write to" : "Read from", _Address); _Flag == FLAG_WRITE ? "Write to" : "Read from", _Address);
exit(0);
u32 sr = PowerPC::ppcState.sr[EA_SR(_Address)]; u32 sr = PowerPC::ppcState.sr[EA_SR(_Address)];
u32 offset = EA_Offset(_Address); // 12 bit u32 offset = EA_Offset(_Address); // 12 bit

View File

@ -132,7 +132,7 @@ void CPeripheralInterface::Write32(const u32 _uValue, const u32 _iAddress)
if ((_uValue != 0x80000001) && (_uValue != 0x80000005)) // DVDLowReset if ((_uValue != 0x80000001) && (_uValue != 0x80000005)) // DVDLowReset
{ {
TCHAR szTemp[256]; TCHAR szTemp[256];
sprintf(szTemp, "Unknown write to PI_RESET_CODE (%08x)", _uValue); sprintf(szTemp, "Game wants to reset the machine. PI_RESET_CODE: (%08x)", _uValue);
PanicAlert(szTemp); PanicAlert(szTemp);
} }
} }

View File

@ -145,7 +145,7 @@ void CInterpreter::lmw(UGeckoInstruction _inst)
u32 TempReg = Memory::Read_U32(uAddress); u32 TempReg = Memory::Read_U32(uAddress);
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI) if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{ {
PanicAlert("DSI exception in lmv. This is very bad."); PanicAlert("DSI exception in lmv.");
return; return;
} }
@ -500,9 +500,9 @@ void CInterpreter::lswi(UGeckoInstruction _inst)
u32 n; u32 n;
if (_inst.NB == 0) if (_inst.NB == 0)
n=32; n = 32;
else else
n=_inst.NB; n = _inst.NB;
int r = _inst.RD - 1; int r = _inst.RD - 1;
int i = 0; int i = 0;
@ -511,22 +511,22 @@ void CInterpreter::lswi(UGeckoInstruction _inst)
if (i==0) if (i==0)
{ {
r++; r++;
r&=31; r &= 31;
m_GPR[r] = 0; m_GPR[r] = 0;
} }
u32 TempValue = Memory::Read_U8(EA) << (24-i); u32 TempValue = Memory::Read_U8(EA) << (24 - i);
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI) if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{ {
PanicAlert("DSI exception in lsw. This is very bad."); PanicAlert("DSI exception in lsw.");
return; return;
} }
m_GPR[r] |= TempValue; m_GPR[r] |= TempValue;
i+=8; i += 8;
if (i==32) if (i == 32)
i=0; i = 0;
EA++; EA++;
n--; n--;
} }
@ -546,26 +546,26 @@ void CInterpreter::stswi(UGeckoInstruction _inst)
u32 n; u32 n;
if (_inst.NB == 0) if (_inst.NB == 0)
n=32; n = 32;
else else
n=_inst.NB; n = _inst.NB;
int r = _inst.RS - 1; int r = _inst.RS - 1;
int i = 0; int i = 0;
while (n>0) while (n > 0)
{ {
if (i==0) if (i == 0)
{ {
r++; r++;
r&=31; r &= 31;
} }
Memory::Write_U8((m_GPR[r] >> (24-i)) & 0xFF, EA); Memory::Write_U8((m_GPR[r] >> (24 - i)) & 0xFF, EA);
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI) if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
return; return;
i+=8; i += 8;
if (i==32) if (i == 32)
i=0; i = 0;
EA++; EA++;
n--; n--;
} }

View File

@ -220,7 +220,7 @@ subfex
// Evil // Evil
namespace CPUCompare namespace CPUCompare
{ {
extern u32 m_BlockStart; extern u32 m_BlockStart;
} }
@ -231,9 +231,8 @@ namespace Jit64
void WriteCallInterpreter(UGeckoInstruction _inst) void WriteCallInterpreter(UGeckoInstruction _inst)
{ {
gpr.Flush(js.op); gpr.Flush(FLUSH_ALL);
if (PPCTables::UsesFPU(_inst)) fpr.Flush(FLUSH_ALL);
fpr.Flush(js.op);
if (js.isLastInstruction) if (js.isLastInstruction)
{ {
MOV(32, M(&PC), Imm32(js.compilerPC)); MOV(32, M(&PC), Imm32(js.compilerPC));
@ -250,7 +249,8 @@ namespace Jit64
void HLEFunction(UGeckoInstruction _inst) void HLEFunction(UGeckoInstruction _inst)
{ {
FlushRegCaches(); gpr.Flush(FLUSH_ALL);
fpr.Flush(FLUSH_ALL);
ABI_CallFunctionCC((void*)&HLE::Execute, js.compilerPC, _inst.hex); ABI_CallFunctionCC((void*)&HLE::Execute, js.compilerPC, _inst.hex);
MOV(32, R(EAX), M(&NPC)); MOV(32, R(EAX), M(&NPC));
WriteExitDestInEAX(0); WriteExitDestInEAX(0);

View File

@ -24,6 +24,7 @@
#include "x64Emitter.h" #include "x64Emitter.h"
#include "ABI.h" #include "ABI.h"
#include "Thunk.h"
#include "x64Analyzer.h" #include "x64Analyzer.h"
#include "StringUtil.h" #include "StringUtil.h"
@ -109,7 +110,7 @@ void BackPatch(u8 *codePtr, int accessType, u32 emAddress)
// break; // break;
case 4: case 4:
// THIS FUNCTION CANNOT TOUCH FLOATING POINT REGISTERS. // THIS FUNCTION CANNOT TOUCH FLOATING POINT REGISTERS.
CALL((void *)&Memory::Read_U32); CALL(ProtectFunction((void *)&Memory::Read_U32, 1));
break; break;
default: default:
BackPatchError(StringFromFormat("We don't handle the size %i yet in backpatch", info.operandSize), codePtr, emAddress); BackPatchError(StringFromFormat("We don't handle the size %i yet in backpatch", info.operandSize), codePtr, emAddress);

View File

@ -259,12 +259,12 @@ namespace Jit64
{ {
#ifdef _M_X64 #ifdef _M_X64
#ifdef _WIN32 #ifdef _WIN32
RSI, RDI, R12, R13, R14, R8, R9, RDX, R10, R11 //, RCX RSI, RDI, R12, R13, R14, R8, R9, R10, R11 //, RCX
#else #else
R12, R13, R14, R8, R9, R10, R11, RSI, RDI //, RCX R12, R13, R14, R8, R9, R10, R11, RSI, RDI //, RCX
#endif #endif
#elif _M_IX86 #elif _M_IX86
ESI, EDI, EBX, EBP, EDX ESI, EDI, EBX, EBP, EDX, ECX,
#endif #endif
}; };
count = sizeof(allocationOrder) / sizeof(const int); count = sizeof(allocationOrder) / sizeof(const int);
@ -412,10 +412,7 @@ namespace Jit64
if (regs[i].location.IsSimpleReg()) if (regs[i].location.IsSimpleReg())
{ {
X64Reg xr = RX(i); X64Reg xr = RX(i);
if (mode != FLUSH_VOLATILE || IsXRegVolatile(xr)) StoreFromX64(i);
{
StoreFromX64(i);
}
xregs[xr].dirty = false; xregs[xr].dirty = false;
} }
else if (regs[i].location.IsImm()) else if (regs[i].location.IsImm())

View File

@ -25,7 +25,6 @@ namespace Jit64
using namespace Gen; using namespace Gen;
enum FlushMode enum FlushMode
{ {
FLUSH_VOLATILE,
// FLUSH_ALLNONSTATIC, // FLUSH_ALLNONSTATIC,
FLUSH_ALL FLUSH_ALL
}; };

View File

@ -48,7 +48,7 @@ namespace Jit64
void rfi(UGeckoInstruction _inst) void rfi(UGeckoInstruction _inst)
{ {
FlushRegCaches(); FlushRegCaches();
//Bits SRR1[0,5-9,16<31>23, 25<32>27, 30<33>31] are placed into the corresponding bits of the MSR. //Bits SRR1[0, 5-9, 16-23, 25-27, 30-31] are placed into the corresponding bits of the MSR.
//MSR[13] is set to 0. //MSR[13] is set to 0.
const int mask = 0x87C0FF73; const int mask = 0x87C0FF73;
// MSR = (MSR & ~mask) | (SRR1 & mask); // MSR = (MSR & ~mask) | (SRR1 & mask);
@ -105,7 +105,7 @@ namespace Jit64
const bool only_counter_check = ((inst.BO >> 4) & 1); const bool only_counter_check = ((inst.BO >> 4) & 1);
const bool only_condition_check = ((inst.BO >> 2) & 1); const bool only_condition_check = ((inst.BO >> 2) & 1);
if (only_condition_check && only_counter_check) if (only_condition_check && only_counter_check)
PanicAlert("Stupid bcx encountered. Likely bad or corrupt code."); PanicAlert("Bizarre bcx encountered. Likely bad or corrupt code.");
bool doFullTest = (inst.BO & 16) == 0 && (inst.BO & 4) == 0; bool doFullTest = (inst.BO & 16) == 0 && (inst.BO & 4) == 0;
bool ctrDecremented = false; bool ctrDecremented = false;
@ -182,7 +182,7 @@ namespace Jit64
bool fastway = true; bool fastway = true;
if((inst.BO & 16) == 0) if ((inst.BO & 16) == 0)
{ {
PanicAlert("Bizarro bcctrx %08x, not supported.", inst.hex); PanicAlert("Bizarro bcctrx %08x, not supported.", inst.hex);
_assert_msg_(DYNA_REC, 0, "Bizarro bcctrx"); _assert_msg_(DYNA_REC, 0, "Bizarro bcctrx");

View File

@ -27,6 +27,7 @@
#ifdef _WIN32 #ifdef _WIN32
#define INSTRUCTION_START #define INSTRUCTION_START
//#define INSTRUCTION_START Default(inst); return;
#else #else
#define INSTRUCTION_START Default(inst); return; #define INSTRUCTION_START Default(inst); return;
#endif #endif

View File

@ -317,8 +317,7 @@ namespace Jit64
{ {
INSTRUCTION_START; INSTRUCTION_START;
int a = inst.RA, d = inst.RD; int a = inst.RA, d = inst.RD;
gpr.FlushR(ECX); gpr.FlushLockX(ECX);
gpr.LockX(ECX);
gpr.Lock(a, d); gpr.Lock(a, d);
if (a != d) if (a != d)
gpr.LoadToX64(d, false, true); gpr.LoadToX64(d, false, true);
@ -376,8 +375,7 @@ namespace Jit64
{ {
INSTRUCTION_START; INSTRUCTION_START;
int a = inst.RA, d = inst.RD; int a = inst.RA, d = inst.RD;
gpr.FlushR(EDX); gpr.FlushLockX(EDX);
gpr.LockX(EDX);
gpr.Lock(a, d); gpr.Lock(a, d);
if (d != a) { if (d != a) {
gpr.LoadToX64(d, false, true); gpr.LoadToX64(d, false, true);
@ -396,8 +394,7 @@ namespace Jit64
{ {
INSTRUCTION_START; INSTRUCTION_START;
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.FlushR(EDX); gpr.FlushLockX(EDX);
gpr.LockX(EDX);
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
if (d != a && d != b) { if (d != a && d != b) {
gpr.LoadToX64(d, false, true); gpr.LoadToX64(d, false, true);
@ -420,8 +417,7 @@ namespace Jit64
{ {
INSTRUCTION_START; INSTRUCTION_START;
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.FlushR(EDX); gpr.FlushLockX(EDX);
gpr.LockX(EDX);
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
if (d != a && d != b) { if (d != a && d != b) {
gpr.LoadToX64(d, false, true); gpr.LoadToX64(d, false, true);
@ -450,8 +446,7 @@ namespace Jit64
Default(inst); return; Default(inst); return;
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.FlushR(EDX); gpr.FlushLockX(EDX);
gpr.LockX(EDX);
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
if (d != a && d != b) { if (d != a && d != b) {
gpr.LoadToX64(d, false, true); gpr.LoadToX64(d, false, true);
@ -534,8 +529,7 @@ namespace Jit64
{ {
INSTRUCTION_START; INSTRUCTION_START;
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.FlushR(ECX); gpr.FlushLockX(ECX);
gpr.LockX(ECX);
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
if (d != a && d != b) if (d != a && d != b)
gpr.LoadToX64(d, false); gpr.LoadToX64(d, false);
@ -649,8 +643,7 @@ namespace Jit64
} }
u32 mask = Helper_Mask(inst.MB, inst.ME); u32 mask = Helper_Mask(inst.MB, inst.ME);
gpr.FlushR(ECX); gpr.FlushLockX(ECX);
gpr.LockX(ECX);
gpr.Lock(a, b, s); gpr.Lock(a, b, s);
MOV(32, R(EAX), gpr.R(s)); MOV(32, R(EAX), gpr.R(s));
MOV(32, R(ECX), gpr.R(b)); MOV(32, R(ECX), gpr.R(b));
@ -691,8 +684,7 @@ namespace Jit64
int a = inst.RA; int a = inst.RA;
int b = inst.RB; int b = inst.RB;
int s = inst.RS; int s = inst.RS;
gpr.FlushR(ECX); gpr.FlushLockX(ECX);
gpr.LockX(ECX);
gpr.Lock(a, b, s); gpr.Lock(a, b, s);
gpr.LoadToX64(a, a == s || a == b || s == b, true); gpr.LoadToX64(a, a == s || a == b || s == b, true);
MOV(32, R(ECX), gpr.R(b)); MOV(32, R(ECX), gpr.R(b));
@ -719,8 +711,7 @@ namespace Jit64
int a = inst.RA; int a = inst.RA;
int b = inst.RB; int b = inst.RB;
int s = inst.RS; int s = inst.RS;
gpr.FlushR(ECX); gpr.FlushLockX(ECX);
gpr.LockX(ECX);
gpr.Lock(a, b, s); gpr.Lock(a, b, s);
gpr.LoadToX64(a, a == s || a == b || s == b, true); gpr.LoadToX64(a, a == s || a == b || s == b, true);
MOV(32, R(ECX), gpr.R(b)); MOV(32, R(ECX), gpr.R(b));

View File

@ -19,6 +19,7 @@
// Should give a very noticable speed boost to paired single heavy code. // Should give a very noticable speed boost to paired single heavy code.
#include "Common.h" #include "Common.h"
#include "Thunk.h"
#include "../PowerPC.h" #include "../PowerPC.h"
#include "../../Core.h" #include "../../Core.h"
@ -82,9 +83,9 @@ namespace Jit64
SetJumpTarget(argh); SetJumpTarget(argh);
switch (accessSize) switch (accessSize)
{ {
case 32: ABI_CallFunctionR((void *)&Memory::Read_U32, reg); break; case 32: ABI_CallFunctionR(ProtectFunction((void *)&Memory::Read_U32, 1), reg); break;
case 16: ABI_CallFunctionR((void *)&Memory::Read_U16, reg); break; case 16: ABI_CallFunctionR(ProtectFunction((void *)&Memory::Read_U16, 1), reg); break;
case 8: ABI_CallFunctionR((void *)&Memory::Read_U8, reg); break; case 8: ABI_CallFunctionR(ProtectFunction((void *)&Memory::Read_U8, 1), reg); break;
} }
SetJumpTarget(arg2); SetJumpTarget(arg2);
} }
@ -97,9 +98,9 @@ namespace Jit64
BSWAP(32, reg_value); BSWAP(32, reg_value);
#ifdef _M_IX86 #ifdef _M_IX86
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK)); AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
MOV(accessSize, MDisp(reg_addr, (u32)Memory::base), R(reg_value)); MOV(accessSize, MDisp(reg_addr, (u32)Memory::base + offset), R(reg_value));
#else #else
MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, 0), R(reg_value)); MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, offset), R(reg_value));
#endif #endif
} }
@ -113,17 +114,16 @@ namespace Jit64
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0); UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0);
FixupBranch arg2 = J(); FixupBranch arg2 = J();
SetJumpTarget(argh); SetJumpTarget(argh);
ABI_CallFunctionRR((void *)&Memory::Write_U32, ABI_PARAM1, ABI_PARAM2); ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2);
SetJumpTarget(arg2); SetJumpTarget(arg2);
} }
void lbzx(UGeckoInstruction inst) void lbzx(UGeckoInstruction inst)
{ {
INSTRUCTION_START; INSTRUCTION_START;
gpr.Flush(FLUSH_VOLATILE);
fpr.Flush(FLUSH_VOLATILE);
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.FlushLockX(ABI_PARAM1);
if (b == d || a == d) if (b == d || a == d)
gpr.LoadToX64(d, true, true); gpr.LoadToX64(d, true, true);
else else
@ -134,6 +134,7 @@ namespace Jit64
SafeLoadRegToEAX(ABI_PARAM1, 8, 0); SafeLoadRegToEAX(ABI_PARAM1, 8, 0);
MOV(32, gpr.R(d), R(EAX)); MOV(32, gpr.R(d), R(EAX));
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX();
} }
void lXz(UGeckoInstruction inst) void lXz(UGeckoInstruction inst)
@ -145,7 +146,6 @@ namespace Jit64
// TODO(ector): Make it dynamically enable/disable idle skipping where appropriate // TODO(ector): Make it dynamically enable/disable idle skipping where appropriate
// Will give nice boost to dual core mode // Will give nice boost to dual core mode
// if (CommandProcessor::AllowIdleSkipping() && PixelEngine::AllowIdleSkipping()) // if (CommandProcessor::AllowIdleSkipping() && PixelEngine::AllowIdleSkipping())
if (!Core::GetStartupParameter().bUseDualCore && if (!Core::GetStartupParameter().bUseDualCore &&
inst.OPCD == 32 && inst.OPCD == 32 &&
(inst.hex & 0xFFFF0000) == 0x800D0000 && (inst.hex & 0xFFFF0000) == 0x800D0000 &&
@ -172,7 +172,7 @@ namespace Jit64
{ {
case 32: accessSize = 32; break; //lwz case 32: accessSize = 32; break; //lwz
case 40: accessSize = 16; break; //lhz case 40: accessSize = 16; break; //lhz
case 34: accessSize = 8; break; //lbz case 34: accessSize = 8; break; //lbz
default: _assert_msg_(DYNA_REC, 0, "lXz: invalid access size"); return; default: _assert_msg_(DYNA_REC, 0, "lXz: invalid access size"); return;
} }
@ -183,8 +183,6 @@ namespace Jit64
if (true) { if (true) {
#endif #endif
// Safe and boring // Safe and boring
gpr.Flush(FLUSH_VOLATILE);
fpr.Flush(FLUSH_VOLATILE);
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(d, a); gpr.Lock(d, a);
MOV(32, R(ABI_PARAM1), gpr.R(a)); MOV(32, R(ABI_PARAM1), gpr.R(a));
@ -221,8 +219,6 @@ namespace Jit64
int a = inst.RA; int a = inst.RA;
s32 offset = (s32)(s16)inst.SIMM_16; s32 offset = (s32)(s16)inst.SIMM_16;
// Safe and boring // Safe and boring
gpr.Flush(FLUSH_VOLATILE);
fpr.Flush(FLUSH_VOLATILE);
gpr.FlushLockX(ABI_PARAM1); gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(d, a); gpr.Lock(d, a);
MOV(32, R(ABI_PARAM1), gpr.R(a)); MOV(32, R(ABI_PARAM1), gpr.R(a));
@ -272,8 +268,6 @@ namespace Jit64
s32 offset = (s32)(s16)inst.SIMM_16; s32 offset = (s32)(s16)inst.SIMM_16;
if (a || update) if (a || update)
{ {
gpr.Flush(FLUSH_VOLATILE);
fpr.Flush(FLUSH_VOLATILE);
int accessSize; int accessSize;
switch (inst.OPCD & ~1) switch (inst.OPCD & ~1)
{ {
@ -358,6 +352,7 @@ namespace Jit64
ADD(32, R(ABI_PARAM2), Imm32((u32)offset)); ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
if (update && offset) if (update && offset)
{ {
gpr.LoadToX64(a, true, true);
MOV(32, gpr.R(a), R(ABI_PARAM2)); MOV(32, gpr.R(a), R(ABI_PARAM2));
} }
TEST(32, R(ABI_PARAM2), Imm32(0x0C000000)); TEST(32, R(ABI_PARAM2), Imm32(0x0C000000));
@ -380,9 +375,9 @@ namespace Jit64
SetJumpTarget(argh); SetJumpTarget(argh);
switch (accessSize) switch (accessSize)
{ {
case 32: ABI_CallFunctionRR((void *)&Memory::Write_U32, ABI_PARAM1, ABI_PARAM2); break; case 32: ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2); break;
case 16: ABI_CallFunctionRR((void *)&Memory::Write_U16, ABI_PARAM1, ABI_PARAM2); break; case 16: ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U16, 2), ABI_PARAM1, ABI_PARAM2); break;
case 8: ABI_CallFunctionRR((void *)&Memory::Write_U8, ABI_PARAM1, ABI_PARAM2); break; case 8: ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U8, 2), ABI_PARAM1, ABI_PARAM2); break;
} }
SetJumpTarget(arg2); SetJumpTarget(arg2);
gpr.UnlockAll(); gpr.UnlockAll();

View File

@ -65,7 +65,6 @@ static u32 GC_ALIGNED16(temp32);
void lfs(UGeckoInstruction inst) void lfs(UGeckoInstruction inst)
{ {
INSTRUCTION_START; INSTRUCTION_START;
DISABLE_32BIT;
int d = inst.RD; int d = inst.RD;
int a = inst.RA; int a = inst.RA;
if (!a) if (!a)
@ -74,15 +73,8 @@ void lfs(UGeckoInstruction inst)
return; return;
} }
s32 offset = (s32)(s16)inst.SIMM_16; s32 offset = (s32)(s16)inst.SIMM_16;
gpr.FlushLockX(ABI_PARAM1);
if (jo.noAssumeFPLoadFromMem) { gpr.Lock(a);
// We might call a function.
gpr.Flush(FLUSH_VOLATILE);
fpr.Flush(FLUSH_VOLATILE);
gpr.FlushLockX(ABI_PARAM1);
}
gpr.Lock(d, a);
MOV(32, R(ABI_PARAM1), gpr.R(a)); MOV(32, R(ABI_PARAM1), gpr.R(a));
if (!jo.noAssumeFPLoadFromMem) if (!jo.noAssumeFPLoadFromMem)
{ {
@ -103,6 +95,7 @@ void lfs(UGeckoInstruction inst)
fpr.UnlockAll(); fpr.UnlockAll();
} }
void lfd(UGeckoInstruction inst) void lfd(UGeckoInstruction inst)
{ {
INSTRUCTION_START; INSTRUCTION_START;
@ -115,11 +108,12 @@ void lfd(UGeckoInstruction inst)
return; return;
} }
s32 offset = (s32)(s16)inst.SIMM_16; s32 offset = (s32)(s16)inst.SIMM_16;
gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(a); gpr.Lock(a);
MOV(32, R(ABI_PARAM1), gpr.R(a)); MOV(32, R(ABI_PARAM1), gpr.R(a));
fpr.LoadToX64(d, false); fpr.LoadToX64(d, false);
fpr.Lock(d); fpr.Lock(d);
if (cpu_info.bSSE3NewInstructions) { if (cpu_info.bSSSE3) {
X64Reg xd = fpr.RX(d); X64Reg xd = fpr.RX(d);
MOVQ_xmm(xd, MComplex(RBX, ABI_PARAM1, SCALE_1, offset)); MOVQ_xmm(xd, MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
PSHUFB(xd, M((void *)bswapShuffle1x8Dupe)); PSHUFB(xd, M((void *)bswapShuffle1x8Dupe));
@ -130,13 +124,14 @@ void lfd(UGeckoInstruction inst)
MOVDDUP(fpr.RX(d), M(&temp64)); MOVDDUP(fpr.RX(d), M(&temp64));
} }
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX();
fpr.UnlockAll(); fpr.UnlockAll();
} }
void stfd(UGeckoInstruction inst) void stfd(UGeckoInstruction inst)
{ {
INSTRUCTION_START; INSTRUCTION_START;
if (!cpu_info.bSSSE3NewInstructions) if (!cpu_info.bSSSE3)
{ {
DISABLE_32BIT; DISABLE_32BIT;
} }
@ -148,14 +143,14 @@ void stfd(UGeckoInstruction inst)
return; return;
} }
s32 offset = (s32)(s16)inst.SIMM_16; s32 offset = (s32)(s16)inst.SIMM_16;
gpr.FlushLockX(ABI_PARAM1);
gpr.Lock(a); gpr.Lock(a);
fpr.Lock(s); fpr.Lock(s);
gpr.FlushLockX(ABI_PARAM1);
MOV(32, R(ABI_PARAM1), gpr.R(a)); MOV(32, R(ABI_PARAM1), gpr.R(a));
#ifdef _M_IX86 #ifdef _M_IX86
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK)); AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
#endif #endif
if (cpu_info.bSSSE3NewInstructions) { if (cpu_info.bSSSE3) {
MOVAPS(XMM0, fpr.R(s)); MOVAPS(XMM0, fpr.R(s));
PSHUFB(XMM0, M((void *)bswapShuffle1x8)); PSHUFB(XMM0, M((void *)bswapShuffle1x8));
#ifdef _M_X64 #ifdef _M_X64
@ -175,6 +170,7 @@ void stfd(UGeckoInstruction inst)
fpr.UnlockAll(); fpr.UnlockAll();
} }
void stfs(UGeckoInstruction inst) void stfs(UGeckoInstruction inst)
{ {
INSTRUCTION_START; INSTRUCTION_START;
@ -185,12 +181,11 @@ void stfs(UGeckoInstruction inst)
s32 offset = (s32)(s16)inst.SIMM_16; s32 offset = (s32)(s16)inst.SIMM_16;
if (a && !update) if (a && !update)
{ {
gpr.Flush(FLUSH_VOLATILE); gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
fpr.Flush(FLUSH_VOLATILE);
gpr.Lock(a); gpr.Lock(a);
fpr.Lock(s); fpr.Lock(s);
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
MOV(32, R(ABI_PARAM2), gpr.R(a)); MOV(32, R(ABI_PARAM2), gpr.R(a));
ADD(32, R(ABI_PARAM2), Imm32(offset));
if (update && offset) if (update && offset)
{ {
MOV(32, gpr.R(a), R(ABI_PARAM2)); MOV(32, gpr.R(a), R(ABI_PARAM2));
@ -198,7 +193,7 @@ void stfs(UGeckoInstruction inst)
CVTSD2SS(XMM0, fpr.R(s)); CVTSD2SS(XMM0, fpr.R(s));
MOVSS(M(&temp32), XMM0); MOVSS(M(&temp32), XMM0);
MOV(32, R(ABI_PARAM1), M(&temp32)); MOV(32, R(ABI_PARAM1), M(&temp32));
SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, 32, offset); SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, 32, 0);
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
fpr.UnlockAll(); fpr.UnlockAll();
@ -209,6 +204,7 @@ void stfs(UGeckoInstruction inst)
} }
} }
void lfsx(UGeckoInstruction inst) void lfsx(UGeckoInstruction inst)
{ {
INSTRUCTION_START; INSTRUCTION_START;
@ -217,8 +213,7 @@ void lfsx(UGeckoInstruction inst)
MOV(32, R(EAX), gpr.R(inst.RB)); MOV(32, R(EAX), gpr.R(inst.RB));
if (inst.RA) if (inst.RA)
ADD(32, R(EAX), gpr.R(inst.RA)); ADD(32, R(EAX), gpr.R(inst.RA));
if (cpu_info.bSSSE3NewInstructions) { if (cpu_info.bSSSE3) {
// PanicAlert("SSE3 supported!");
X64Reg r = fpr.R(inst.RS).GetSimpleReg(); X64Reg r = fpr.R(inst.RS).GetSimpleReg();
#ifdef _M_IX86 #ifdef _M_IX86
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK)); AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));

View File

@ -20,6 +20,7 @@
#include "Common.h" #include "Common.h"
#include "Thunk.h"
#include "../PowerPC.h" #include "../PowerPC.h"
#include "../../Core.h" #include "../../Core.h"
#include "../../HW/GPFifo.h" #include "../../HW/GPFifo.h"
@ -36,7 +37,7 @@
#include "JitAsm.h" #include "JitAsm.h"
#include "JitRegCache.h" #include "JitRegCache.h"
// #define INSTRUCTION_START Default(inst); return; //#define INSTRUCTION_START Default(inst); return;
#define INSTRUCTION_START #define INSTRUCTION_START
#ifdef _M_IX86 #ifdef _M_IX86
@ -125,8 +126,6 @@ void psq_st(UGeckoInstruction inst)
if (stType == QUANTIZE_FLOAT) if (stType == QUANTIZE_FLOAT)
{ {
DISABLE_32BIT; DISABLE_32BIT;
gpr.Flush(FLUSH_VOLATILE);
fpr.Flush(FLUSH_VOLATILE);
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
gpr.Lock(a); gpr.Lock(a);
fpr.Lock(s); fpr.Lock(s);
@ -147,7 +146,7 @@ void psq_st(UGeckoInstruction inst)
MOV(64, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1)); MOV(64, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1));
FixupBranch arg2 = J(); FixupBranch arg2 = J();
SetJumpTarget(argh); SetJumpTarget(argh);
CALL((void *)&WriteDual32); CALL(ProtectFunction((void *)&WriteDual32, 0));
SetJumpTarget(arg2); SetJumpTarget(arg2);
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
@ -255,7 +254,7 @@ void psq_l(UGeckoInstruction inst)
#ifdef _M_X64 #ifdef _M_X64
gpr.LoadToX64(inst.RA, true, update); gpr.LoadToX64(inst.RA, true, update);
fpr.LoadToX64(inst.RS, false); fpr.LoadToX64(inst.RS, false);
if (cpu_info.bSSSE3NewInstructions) { if (cpu_info.bSSSE3) {
X64Reg xd = fpr.R(inst.RS).GetSimpleReg(); X64Reg xd = fpr.R(inst.RS).GetSimpleReg();
MOVQ_xmm(xd, MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset)); MOVQ_xmm(xd, MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
PSHUFB(xd, M((void *)pbswapShuffle2x4)); PSHUFB(xd, M((void *)pbswapShuffle2x4));
@ -272,7 +271,7 @@ void psq_l(UGeckoInstruction inst)
ADD(32, gpr.R(inst.RA), Imm32(offset)); ADD(32, gpr.R(inst.RA), Imm32(offset));
break; break;
#else #else
if (cpu_info.bSSSE3NewInstructions) { if (cpu_info.bSSSE3) {
gpr.LoadToX64(inst.RA, true, update); gpr.LoadToX64(inst.RA, true, update);
fpr.LoadToX64(inst.RS, false); fpr.LoadToX64(inst.RS, false);
X64Reg xd = fpr.R(inst.RS).GetSimpleReg(); X64Reg xd = fpr.R(inst.RS).GetSimpleReg();
@ -282,8 +281,7 @@ void psq_l(UGeckoInstruction inst)
PSHUFB(xd, M((void *)pbswapShuffle2x4)); PSHUFB(xd, M((void *)pbswapShuffle2x4));
CVTPS2PD(xd, R(xd)); CVTPS2PD(xd, R(xd));
} else { } else {
gpr.FlushR(ECX); gpr.FlushLockX(ECX);
gpr.LockX(ECX);
gpr.LoadToX64(inst.RA); gpr.LoadToX64(inst.RA);
// This can probably be optimized somewhat. // This can probably be optimized somewhat.
LEA(32, ECX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset)); LEA(32, ECX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset));

View File

@ -93,8 +93,6 @@ namespace Jit64
fpr.UnlockAll(); fpr.UnlockAll();
} }
//add a, b, c //add a, b, c
//mov a, b //mov a, b

View File

@ -59,7 +59,7 @@ namespace Jit64
case SPR_GQR0 + 5: case SPR_GQR0 + 5:
case SPR_GQR0 + 6: case SPR_GQR0 + 6:
case SPR_GQR0 + 7: case SPR_GQR0 + 7:
js.blockSetsQuantizers = false; js.blockSetsQuantizers = true;
// Prevent recompiler from compiling in old quantizer values. // Prevent recompiler from compiling in old quantizer values.
// TODO - actually save the set state and use it in following quantizer ops. // TODO - actually save the set state and use it in following quantizer ops.
break; break;

View File

@ -45,7 +45,7 @@ bool DolphinApp::OnInit()
#ifdef _WIN32 #ifdef _WIN32
// TODO: if First Boot // TODO: if First Boot
if (!cpu_info.bSSE2Extensions) if (!cpu_info.bSSE2)
{ {
MessageBox(0, _T("Hi,\n\nDolphin requires that your CPU has support for SSE2 extensions.\n" MessageBox(0, _T("Hi,\n\nDolphin requires that your CPU has support for SSE2 extensions.\n"
"Unfortunately your CPU does not support them, so Dolphin will not run.\n\n" "Unfortunately your CPU does not support them, so Dolphin will not run.\n\n"