32-bit speedup (videos mostly affected). Lots of various cleanup and future proofing. A small debugger feature.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@162 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2008-08-09 16:56:24 +00:00
parent 61398ea83f
commit e3d21c0b11
27 changed files with 604 additions and 448 deletions

View File

@ -100,6 +100,7 @@ void ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2)
}
#ifdef _WIN32
// Win64 Specific Code
// ====================================
void ABI_PushAllCalleeSavedRegsAndAdjustStack() {
@ -107,27 +108,54 @@ void ABI_PushAllCalleeSavedRegsAndAdjustStack() {
PUSH(RBX);
PUSH(RSI);
PUSH(RDI);
//PUSH(RBP);
PUSH(RBP);
PUSH(R12);
PUSH(R13);
PUSH(R14);
PUSH(R15);
//TODO: Also preserve XMM0-3?
SUB(64, R(RSP), Imm8(0x20));
SUB(64, R(RSP), Imm8(0x28));
}
void ABI_PopAllCalleeSavedRegsAndAdjustStack() {
ADD(64, R(RSP), Imm8(0x20));
ADD(64, R(RSP), Imm8(0x28));
POP(R15);
POP(R14);
POP(R13);
POP(R12);
//POP(RBP);
POP(RBP);
POP(RDI);
POP(RSI);
POP(RBX);
}
// Win64 Specific Code
// ====================================
void ABI_PushAllCallerSavedRegsAndAdjustStack() {
PUSH(RCX);
PUSH(RDX);
PUSH(RSI);
PUSH(RDI);
PUSH(R8);
PUSH(R9);
PUSH(R10);
PUSH(R11);
//TODO: Also preserve XMM0-15?
SUB(64, R(RSP), Imm8(0x28));
}
void ABI_PopAllCallerSavedRegsAndAdjustStack() {
ADD(64, R(RSP), Imm8(0x28));
POP(R11);
POP(R10);
POP(R9);
POP(R8);
POP(RDI);
POP(RSI);
POP(RDX);
POP(RCX);
}
#else
// Unix64 Specific Code
// ====================================
@ -151,6 +179,16 @@ void ABI_PopAllCalleeSavedRegsAndAdjustStack() {
POP(RBX);
}
void ABI_PushAllCallerSavedRegsAndAdjustStack() {
INT3();
//not yet supported
}
void ABI_PopAllCallerSavedRegsAndAdjustStack() {
INT3();
//not yet supported
}
#endif
#endif

View File

@ -92,8 +92,17 @@ void ABI_CallFunctionAC(void *func, const Gen::OpArg &arg1, u32 param2);
void ABI_CallFunctionR(void *func, Gen::X64Reg reg1);
void ABI_CallFunctionRR(void *func, Gen::X64Reg reg1, Gen::X64Reg reg2);
// A function that doesn't have any control over what it will do to regs,
// such as the dispatcher, should be surrounded by these.
void ABI_PushAllCalleeSavedRegsAndAdjustStack();
void ABI_PopAllCalleeSavedRegsAndAdjustStack();
// A function that doesn't know anything about it's surroundings, should
// be surrounded by these to establish a safe environment, where it can roam free.
// An example is a backpatch injected function.
void ABI_PushAllCallerSavedRegsAndAdjustStack();
void ABI_PopAllCallerSavedRegsAndAdjustStack();
#endif // _JIT_ABI_H

View File

@ -26,6 +26,9 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo &info, int acc
//Check for regular prefix
info.operandSize = 4;
info.zeroExtend = false;
info.signExtend = false;
info.hasImmediate = false;
info.isMemoryWrite = false;
int addressSize = 8;
u8 modRMbyte = 0;
@ -33,7 +36,6 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo &info, int acc
bool hasModRM = false;
bool hasSIBbyte = false;
bool hasDisplacement = false;
info.hasImmediate = false;
int displacementSize = 0;
@ -136,6 +138,7 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo &info, int acc
if (accessType == 1)
{
info.isMemoryWrite = true;
//Write access
switch (codeByte)
{
@ -179,7 +182,9 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo &info, int acc
}
else
{
//mov eax,dword ptr [rax] == 8b 00
// Memory read
//mov eax, dword ptr [rax] == 8b 00
switch (codeByte)
{
case 0x0F:
@ -193,6 +198,14 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo &info, int acc
info.zeroExtend = true;
info.operandSize = 2;
break;
case 0xBE: //movsx on byte
info.signExtend = true;
info.operandSize = 1;
break;
case 0xBF:
info.signExtend = true;
info.operandSize = 2;
break;
default:
return false;
}

View File

@ -27,7 +27,9 @@ struct InstructionInfo
int otherReg;
int scaledReg;
bool zeroExtend;
bool signExtend;
bool hasImmediate;
bool isMemoryWrite;
u64 immediate;
s32 displacement;
};

View File

@ -879,6 +879,10 @@
RelativePath=".\Src\PowerPC\Jit64\Jit_LoadStore.cpp"
>
</File>
<File
RelativePath=".\Src\PowerPC\Jit64\Jit_LoadStoreFloating.cpp"
>
</File>
<File
RelativePath=".\Src\PowerPC\Jit64\Jit_LoadStorePaired.cpp"
>

View File

@ -185,10 +185,7 @@ void CBoot::EmulatedBIOS(bool _bDebug)
// return
PC = PowerPC::ppcState.gpr[3];
//
// --- preinit some stuff from bios ---
//
// Bus Clock Speed
Memory::Write_U32(0x09a7ec80, 0x800000F8);

View File

@ -186,9 +186,16 @@ THREAD_RETURN CpuThread(void *pArg)
if (_CoreParameter.bLockThreads)
Common::Thread::SetCurrentThreadAffinity(1); //Force to first core
// Let's run under memory watch
EMM::InstallExceptionHandler();
// StartConsoleThread();
if (_CoreParameter.bUseFastMem)
{
#ifdef _M_X64
// Let's run under memory watch
EMM::InstallExceptionHandler();
#else
PanicAlert("32-bit platforms do not support fastmem yet. Report this bug.");
#endif
}
CCPU::Run();
if (_CoreParameter.bRunCompareServer || _CoreParameter.bRunCompareClient)

View File

@ -144,7 +144,7 @@ template <class T, u8* P> void HWCALL HW_Write_Memory(T _Data, const u32 _Addres
void InitHWMemFuncs()
{
for (int i=0; i<NUMHWMEMFUN; i++)
for (int i = 0; i < NUMHWMEMFUN; i++)
{
hwWrite8 [i] = HW_Default_Write<u8>;
hwWrite16[i] = HW_Default_Write<u16>;
@ -156,7 +156,7 @@ void InitHWMemFuncs()
hwRead64 [i] = HW_Default_Read<u64&>;
}
for (int i=0; i<BLOCKSIZE; i++)
for (int i = 0; i < BLOCKSIZE; i++)
{
hwRead16 [i] = CommandProcessor::Read16;
hwWrite16[i] = CommandProcessor::Write16;
@ -322,13 +322,13 @@ writeFn32 GetHWWriteFun32(const u32 _Address)
PanicAlert("READ: Invalid address: %08x", _Address); \
else \
{ \
if ((_Address & 0xFE000000) == 0x7e000000) \
if (bFakeVMEM && (_Address & 0xFE000000) == 0x7e000000) \
{ \
_var = bswap((*(u##_type*)&m_pFakeVMEM[_Address & FAKEVMEM_MASK])); \
} \
else {/* LOG(MEMMAP,"READ (unknown): %08x (PC: %08x)",_Address,PC);*/ \
/*CCPU::EnableStepping(TRUE);*/ \
/*PanicAlert("READ: Unknown Address", "1", MB_OK);*/ \
/*PanicAlert("READ: Unknown Address", "1", MB_OK);*/ \
u32 tmpAddress = CheckDTLB(EffictiveAddress, flag); \
tmpAddress =(tmpAddress & 0xFFFFFFF0) | (_Address & 0xF); \
if (!(PowerPC::ppcState.Exceptions & EXCEPTION_DSI)) \
@ -393,7 +393,7 @@ writeFn32 GetHWWriteFun32(const u32 _Address)
} \
else \
{ \
if ((_Address & 0xFE000000) == 0x7e000000) \
if (bFakeVMEM && (_Address & 0xFE000000) == 0x7e000000) \
{ \
*(u##_type*)&m_pFakeVMEM[_Address & FAKEVMEM_MASK] = bswap(_Data); \
return; \
@ -1100,7 +1100,7 @@ u32 CheckDTLB(u32 _Address, XCheckTLBFlag _Flag)
// hash function no 2 "not" .360
hash1 = ~hash1;
pteg_addr = ((hash1 & pagetable_hashmask)<<6) | pagetable_base;
for (int i=0; i<8; i++)
for (int i = 0; i < 8; i++)
{
u32 pte = bswap(*(u32*)&pRAM[pteg_addr]);
if ((pte & PTE1_V) && (pte & PTE1_H))

View File

@ -153,10 +153,8 @@ int timeHistory[HISTORYLENGTH] = {0,0,0,0,0};
void Throttle(u64 userdata, int cyclesLate)
{
#ifndef _WIN32
// had some weird problem in linux. will investigate.
return;
#endif
if (!Core::GetStartupParameter().bThrottle)
return;
static Common::Timer timer;
for (int i=0; i<HISTORYLENGTH-1; i++)
@ -208,9 +206,9 @@ void Init()
Common::Timer::IncreaseResolution();
memset(timeHistory, 0, sizeof(timeHistory));
CoreTiming::Clear();
if (Core::GetStartupParameter().bThrottle) {
CoreTiming::ScheduleEvent((int)(GetTicksPerSecond()/ThrottleFrequency), &Throttle, "Throttle");
}
CoreTiming::ScheduleEvent((int)(GetTicksPerSecond() / ThrottleFrequency), &Throttle, "Throttle");
CoreTiming::ScheduleEvent(AI_PERIOD, &AICallback, "AICallback");
CoreTiming::ScheduleEvent(VI_PERIOD, &VICallback, "VICallback");
CoreTiming::ScheduleEvent(DSP_PERIOD, &DSPCallback, "DSPCallback");

View File

@ -20,7 +20,6 @@
#ifdef _WIN32
#include <windows.h>
#include <vector>
#include "Common.h"
@ -32,179 +31,6 @@
namespace EMM
{
/* DESIGN
THIS IS NOT THE CURRENT STATE OF THIS FILE - IT'S UNFINISHED
We grab 4GB of virtual address space, and locate memories in there. The memories are either
VirtualAlloc or mapped swapfile.
I/O areas are mapped into the virtual memspace, and VirtualProtected where necessary.
Every chunk is mapped twice into memory, once into the virtual memspace, and once elsewhere.
This second mapping is used when a "read+writable" pointer is requested for a region. This
would generally be for internal use by IO functions, and for actually performing the writes
and reads after detecting them.
There is individual read and write protection for each chunk of memory.
Every region has a default read-write handler. If an exception is caught, this is executed.
The default read-write handlers use the "writable" pointers.
There should be a method to mark a region for "write notification". Dynarecs can use this
to flush their code caches if a region is written to.
At this moment, there can only be one wrapped memspace at a time.
*/
DWORD_PTR memspaceBottom = 0;
DWORD_PTR memspaceTop = 0;
enum MSFlags
{
MEMSPACE_MIRROR_FIRST_PART = 1,
MEMSPACE_MIRROR_OF_PREVIOUS = 2,
MEMSPACE_MAPPED_HARDWARE = 4,
};
struct MemSpaceEntry
{
u64 emulatedBase;
u64 emulatedSize;
u32 flags;
};
#define MEGABYTE 1024*1024
const MemSpaceEntry GCMemSpace[] =
{
{0x80000000, 24*MEGABYTE, MEMSPACE_MIRROR_FIRST_PART},
{0xC0000000, 24*MEGABYTE, MEMSPACE_MIRROR_OF_PREVIOUS},
{0xCC000000, 0x10000, MEMSPACE_MAPPED_HARDWARE},
{0xE0000000, 0x4000, 0}, //cache
};
struct Watch
{
int ID;
EAddr startAddr;
EAddr endAddr;
WR watchFor;
WatchCallback callback;
WatchType type;
u64 userData;
};
std::vector<Watch> watches;
void UpdateProtection(EAddr startAddr, EAddr endAddr)
{
}
int AddWatchRegion(EAddr startAddr, EAddr endAddr, WR watchFor, WatchType type, WatchCallback callback, u64 userData)
{
static int watchIDGen = 0;
Watch watch;
watch.ID = watchIDGen++;
watch.startAddr = startAddr;
watch.endAddr = endAddr;
watch.watchFor = watchFor;
watch.callback = callback;
watch.userData = userData;
watch.type = type;
watches.push_back(watch);
UpdateProtection(startAddr, endAddr);
return watch.ID;
}
void Notify(EAddr address, WR action)
{
for (std::vector<Watch>::iterator iter = watches.begin(); iter != watches.end(); ++iter)
{
if (action & iter->type)
{
if (address >= iter->startAddr && address < iter->endAddr)
{
//Alright!
iter->callback(address, Access32 /*TODO*/, action, iter->ID);
}
}
}
}
class MemSpace
{
MemSpaceEntry *entries;
u64 emulatedBottom;
u64 emulatedTop;
u64 emulatedSize;
void *virtualBase;
public:
void Init(const MemSpaceEntry *e, int count)
{
/*
//first pass: figure out minimum address, and total amount of allocated memory
emulatedBase = 0xFFFFFFFFFFFFFFFFL;
emulatedTop = 0;
u64 mappedTotal = 0;
for (int i=0; i<count; i++)
{
if (e[i].emulatedBase < emulatedBase)
emulatedBase = e[i].emulatedBase;
if (e[i].emulatedBase+e[i].emulatedSize > emulatedTop)
emulatedTop = e[i].emulatedBase+e[i].emulatedSize;
if (e[i].flags & MEMSPACE_MIRROR_FIRST_PART)
{
mappedTotal += e[i].emulatedSize;
}
}
emulatedSize = emulatedTop - emulatedBase;
// The above stuff is not used atm - we just grab 4G
//second pass: grab 4G of virtual address space
virtualBase = VirtualAlloc(0, 0x100000000L, MEM_RESERVE, PAGE_READWRITE);
//also grab a bunch of virtual memory while we're at it
//Release the 4G space!
//Let's hope no weirdo thread klomps in here and grabs it
VirtualFree(base, 0, MEM_RELEASE);
for (int i=0; i<count; i++)
{
if (e[i].flags & MEMSPACE_MIRROR_FIRST_PART)
{
}
}
//TODO: fill all empty parts of the address space with no-access virtualalloc space
*/
}
u64 GetVirtualBaseAddr() {return (u64)virtualBase;}
void *GetVirtualBase() {return virtualBase;}
void Shutdown()
{
}
};
// ======
// From here on is the code in this file that actually works and is active.
LONG NTAPI Handler(PEXCEPTION_POINTERS pPtrs)
{
@ -299,9 +125,79 @@ void InstallExceptionHandler()
namespace EMM {
#if 0
//
// backtrace useful function
//
void print_trace(const char * msg)
{
void *array[100];
size_t size;
char **strings;
size_t i;
size = backtrace(array, 100);
strings = backtrace_symbols(array, size);
printf("%s Obtained %zd stack frames.\n", msg, size);
for (i = 0; i < size; i++)
printf("--> %s\n", strings[i]);
free(strings);
}
void sigsegv_handler(int signal, int siginfo_t *info, void *raw_context)
{
if (signal != SIGSEGV)
{
// We are not interested in other signals - handle it as usual.
return;
}
ucontext_t *context = (ucontext_t)raw_context;
int si_code = info->si_code;
if (si_code != SEGV_MAPERR)
{
// Huh? Return.
return;
}
mcontext_t *ctx = &context->uc_mcontext;
void *fault_memory_ptr = (void *)info->si_addr;
void *fault_instruction_ptr = (void *)ctx->mc_rip;
if (!Jit64::IsInJitCode(fault_instruction_ptr)) {
// Let's not prevent debugging.
return;
}
u64 memspaceBottom = (u64)Memory::base;
if (badAddress < memspaceBottom) {
PanicAlert("Exception handler - access below memory space. %08x%08x",
badAddress >> 32, badAddress);
}
u32 emAddress = (u32)(badAddress - memspaceBottom);
// Backpatch time.
Jit64::BackPatch(fault_instruction_ptr, accessType, emAddress);
}
#endif
void InstallExceptionHandler()
{
/*
#ifdef _M_IX86
PanicAlert("InstallExceptionHandler called, but this platform does not yet support it.");
return;
#endif
#if 0
sighandler_t old_signal_handler = signal(SIGSEGV , sigsegv_handler);
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
sa.sa_handler = sigsegv_handler;
sa.sa_flags = SA_SIGINFO;
sigemptyset(&sa.sa_mask);
sigaction(SIGSEGV, &sa, NULL);
#endif
/*
* signal(xyz);
*/
}

View File

@ -69,4 +69,3 @@ void WriteHandler32(EAddr address, u32 value);
void WriteHandler64(EAddr address, u64 value);
#endif

View File

@ -24,6 +24,7 @@
#include "../PPCAnalyst.h"
#include "JitCache.h"
#include "x64Emitter.h"
namespace Jit64
{
@ -85,6 +86,7 @@ namespace Jit64
void FlushRegCaches();
void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset);
void addx(UGeckoInstruction inst);
void orx(UGeckoInstruction inst);

View File

@ -14,6 +14,8 @@
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include "ABI.h"
#include "x64Emitter.h"
#include "../../HW/Memmap.h"

View File

@ -1,3 +1,20 @@
// Copyright (C) 2003-2008 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include <string>
#include "Common.h"
@ -36,6 +53,10 @@ void BackPatchError(const std::string &text, u8 *codePtr, u32 emAddress) {
return;
}
// This generates some fairly heavy trampolines, but:
// 1) It's really necessary. We don't know anything about the context.
// 2) It doesn't really hurt. Only instructions that access I/O will get these, and there won't be
// that many of them in a typical program/game.
void BackPatch(u8 *codePtr, int accessType, u32 emAddress)
{
if (!IsInJitCode(codePtr))
@ -48,6 +69,10 @@ void BackPatch(u8 *codePtr, int accessType, u32 emAddress)
if (!DisassembleMov(codePtr, info, accessType)) {
BackPatchError("BackPatch - failed to disassemble MOV instruction", codePtr, emAddress);
}
if (info.isMemoryWrite) {
BackPatchError("BackPatch - determined that MOV is write, not yet supported and should have been caught before",
codePtr, emAddress);
}
if (info.operandSize != 4) {
BackPatchError(StringFromFormat("BackPatch - no support for operand size %i", info.operandSize), codePtr, emAddress);
}
@ -70,19 +95,10 @@ void BackPatch(u8 *codePtr, int accessType, u32 emAddress)
u8 *trampoline = trampolineCodePtr;
SetCodePtr(trampolineCodePtr);
// * Save all volatile regs
PUSH(RCX);
PUSH(RDX);
PUSH(RSI);
PUSH(RDI);
PUSH(R8);
PUSH(R9);
PUSH(R10);
PUSH(R11);
//TODO: Also preserve XMM0-3?
SUB(64, R(RSP), Imm8(0x20));
ABI_PushAllCallerSavedRegsAndAdjustStack();
// * Set up stack frame.
// * Call ReadMemory32
//LEA(32, ECX, MDisp((X64Reg)addrReg, info.displacement));
//LEA(32, ABI_PARAM1, MDisp((X64Reg)addrReg, info.displacement));
MOV(32, R(ABI_PARAM1), R((X64Reg)addrReg));
if (info.displacement) {
ADD(32, R(ABI_PARAM1), Imm32(info.displacement));
@ -91,7 +107,8 @@ void BackPatch(u8 *codePtr, int accessType, u32 emAddress)
//case 1:
// CALL((void *)&Memory::Read_U8);
// break;
case 4:
case 4:
// THIS FUNCTION CANNOT TOUCH FLOATING POINT REGISTERS.
CALL((void *)&Memory::Read_U32);
break;
default:
@ -99,15 +116,7 @@ void BackPatch(u8 *codePtr, int accessType, u32 emAddress)
break;
}
// * Tear down stack frame.
ADD(64, R(RSP), Imm8(0x20));
POP(R11);
POP(R10);
POP(R9);
POP(R8);
POP(RDI);
POP(RSI);
POP(RDX);
POP(RCX);
ABI_PopAllCallerSavedRegsAndAdjustStack();
MOV(32, R(dataReg), R(EAX));
RET();
trampolineCodePtr = GetWritableCodePtr();

View File

@ -1,3 +1,20 @@
// Copyright (C) 2003-2008 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef _JITBACKPATCH_H
#define _JITBACKPATCH_H

View File

@ -17,6 +17,7 @@
#include <map>
#include "Common.h"
#include "../../Core.h"
#include "MemoryUtil.h"
#include "../../HW/Memmap.h"
@ -78,7 +79,7 @@ namespace Jit64
jo.optimizeStack = true;
jo.enableBlocklink = true; // Speed boost, but not 100% safe
#ifdef _M_X64
jo.enableFastMem = true;
jo.enableFastMem = Core::GetStartupParameter().bUseFastMem;
#else
jo.enableFastMem = false;
#endif

View File

@ -28,6 +28,14 @@
// The branches are known good, or at least reasonably good.
// No need for a disable-mechanism.
// If defined, clears CR0 at blr and bl-s. If the assumption that
// flags never carry over between functions holds, then the task for
// an optimizer becomes much easier.
// #define ACID_TEST
// Zelda and many more games seem to pass the Acid Test.
using namespace Gen;
namespace Jit64
{
@ -70,11 +78,16 @@ namespace Jit64
destination = SignExt26(inst.LI << 2);
else
destination = js.compilerPC + SignExt26(inst.LI << 2);
#ifdef ACID_TEST
if (inst.LK)
AND(32, M(&CR), Imm32(~(0xFF000000)));
#endif
WriteExit(destination, 0);
} //else we were merged with the next block, we only need the link above, if that
}
else {
PanicAlert("bx not last instruction of block"); // this should not happen atm
// TODO: investigate the good old method of merging blocks here.
PanicAlert("bx not last instruction of block"); // this should not happen
}
}
@ -171,18 +184,21 @@ namespace Jit64
if((inst.BO & 16) == 0)
{
PanicAlert("Bizarro bcctrx %08x, not supported.", inst.hex);
_assert_msg_(DYNA_REC, 0, "Bizarro bcctrx");
/*
fastway = false;
MOV(32, M(&PC), Imm32(js.compilerPC+4));
MOV(32, R(EAX), M(&CR));
XOR(32, R(ECX), R(ECX));
AND(32, R(EAX), Imm32(0x80000000>>inst.BI));
AND(32, R(EAX), Imm32(0x80000000 >> inst.BI));
CCFlags branch;
if(inst.BO & 8)
branch = CC_NZ;
else
branch = CC_Z;
*/
// TODO(ector): Why is this commented out?
//SETcc(branch, R(ECX));
// check for EBX
@ -205,11 +221,17 @@ namespace Jit64
if (inst.hex == 0x4e800020)
{
//CDynaRegCache::Flush();
// This below line can be used to prove that blr "eats flags" in practice.
// This observation will let us do a lot of fun observations.
#ifdef ACID_TEST
AND(32, M(&CR), Imm32(~(0xFF000000)));
#endif
MOV(32, R(EAX), M(&LR));
MOV(32, M(&PC), R(EAX));
WriteExitDestInEAX(0);
return;
}
// Call interpreter
Default(inst);
MOV(32, R(EAX), M(&NPC));
WriteExitDestInEAX(0);

View File

@ -139,7 +139,7 @@ namespace Jit64
}
//Still here? Do regular path.
#if defined(_M_X64) && defined(_WIN32)
#if defined(_M_X64)
if (accessSize == 8 || accessSize == 16 || !jo.enableFastMem) {
#else
if (true) {
@ -173,154 +173,6 @@ namespace Jit64
gpr.UnlockAll();
}
void lfs(UGeckoInstruction inst)
{
INSTRUCTION_START;
int d = inst.RD;
int a = inst.RA;
if (!a)
{
Default(inst);
return;
}
s32 offset = (s32)(s16)inst.SIMM_16;
gpr.Flush(FLUSH_VOLATILE);
gpr.Lock(d, a);
MOV(32, R(ABI_PARAM1), gpr.R(a));
#ifdef _M_X64
if (!jo.noAssumeFPLoadFromMem)
{
MOV(32, R(EAX), MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
//#else
// MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::GetMainRAMPtr() + (u32)offset));
//#endif
BSWAP(32, EAX);
}
else
#endif
{
SafeLoadRegToEAX(ABI_PARAM1, 32, offset);
}
MOV(32, M(&temp32), R(EAX));
fpr.Lock(d);
fpr.LoadToX64(d, false);
CVTSS2SD(fpr.RX(d), M(&temp32));
MOVDDUP(fpr.RX(d), fpr.R(d));
gpr.UnlockAll();
fpr.UnlockAll();
}
void lfd(UGeckoInstruction inst)
{
INSTRUCTION_START;
DISABLE_32BIT;
int d = inst.RD;
int a = inst.RA;
if (!a)
{
Default(inst);
return;
}
s32 offset = (s32)(s16)inst.SIMM_16;
gpr.Lock(a);
MOV(32, R(ABI_PARAM1), gpr.R(a));
MOV(64, R(EAX), MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
BSWAP(64,EAX);
MOV(64, M(&temp64), R(EAX));
fpr.Lock(d);
fpr.LoadToX64(d, false);
MOVSD(fpr.RX(d), M(&temp64));
MOVDDUP(fpr.RX(d), fpr.R(d));
gpr.UnlockAll();
fpr.UnlockAll();
}
void stfd(UGeckoInstruction inst)
{
INSTRUCTION_START;
DISABLE_32BIT;
int s = inst.RS;
int a = inst.RA;
if (!a)
{
Default(inst);
return;
}
s32 offset = (s32)(s16)inst.SIMM_16;
gpr.Lock(a);
fpr.Lock(s);
fpr.LoadToX64(s, true, false);
MOVSD(M(&temp64), fpr.RX(s));
MOV(32, R(ABI_PARAM1), gpr.R(a));
MOV(64, R(EAX), M(&temp64));
BSWAP(64, EAX);
MOV(64, MComplex(RBX, ABI_PARAM1, SCALE_1, offset), R(EAX));
gpr.UnlockAll();
fpr.UnlockAll();
}
void stfs(UGeckoInstruction inst)
{
INSTRUCTION_START;
DISABLE_32BIT;
bool update = inst.OPCD & 1;
int s = inst.RS;
int a = inst.RA;
s32 offset = (s32)(s16)inst.SIMM_16;
if (a && !update)
{
gpr.Flush(FLUSH_VOLATILE);
gpr.Lock(a);
fpr.Lock(s);
MOV(32, R(ABI_PARAM2), gpr.R(a));
if (offset)
ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
if (update && offset)
{
MOV(32, gpr.R(a), R(ABI_PARAM2));
}
CVTSD2SS(XMM0, fpr.R(s));
MOVSS(M(&temp32), XMM0);
MOV(32, R(ABI_PARAM1), M(&temp32));
TEST(32, R(ABI_PARAM2), Imm32(0x0C000000));
FixupBranch argh = J_CC(CC_NZ);
BSWAP(32, ABI_PARAM1);
MOV(32, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1));
FixupBranch arg2 = J();
SetJumpTarget(argh);
CALL((void *)&Memory::Write_U32);
SetJumpTarget(arg2);
gpr.UnlockAll();
fpr.UnlockAll();
}
else
{
Default(inst);
}
}
void lfsx(UGeckoInstruction inst)
{
INSTRUCTION_START;
DISABLE_32BIT;
fpr.Lock(inst.RS);
fpr.LoadToX64(inst.RS, false, true);
MOV(32, R(EAX), gpr.R(inst.RB));
if (inst.RA)
ADD(32, R(EAX), gpr.R(inst.RA));
MOV(32, R(EAX), MComplex(RBX, EAX, SCALE_1, 0));
BSWAP(32, EAX);
MOV(32, M(&temp32), R(EAX));
CVTSS2SD(XMM0, M(&temp32));
MOVDDUP(fpr.R(inst.RS).GetSimpleReg(), R(XMM0));
fpr.UnlockAll();
}
// Zero cache line.
void dcbz(UGeckoInstruction inst)
{

View File

@ -0,0 +1,204 @@
// Copyright (C) 2003-2008 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
// TODO(ector): Tons of pshufb optimization of the loads/stores, for SSSE3+, possibly SSE4, only.
// Should give a very noticable speed boost to paired single heavy code.
#include "Common.h"
#include "../PowerPC.h"
#include "../../Core.h"
#include "../../HW/GPFifo.h"
#include "../../HW/CommandProcessor.h"
#include "../../HW/PixelEngine.h"
#include "../../HW/Memmap.h"
#include "../PPCTables.h"
#include "x64Emitter.h"
#include "ABI.h"
#include "Jit.h"
#include "JitCache.h"
#include "JitAsm.h"
#include "JitRegCache.h"
// #define INSTRUCTION_START Default(inst); return;
#define INSTRUCTION_START
#ifdef _M_IX86
#define DISABLE_32BIT Default(inst); return;
#else
#define DISABLE_32BIT ;
#endif
namespace Jit64
{
static u64 GC_ALIGNED16(temp64);
static u32 GC_ALIGNED16(temp32);
// TODO: Add peephole optimizations for multiple consecutive lfd/lfs/stfd/stfs since they are so common,
// and pshufb could help a lot.
// Also add hacks for things like lfs/stfs the same reg consecutively, that is, simple memory moves.
void lfs(UGeckoInstruction inst)
{
INSTRUCTION_START;
int d = inst.RD;
int a = inst.RA;
if (!a)
{
Default(inst);
return;
}
s32 offset = (s32)(s16)inst.SIMM_16;
gpr.Flush(FLUSH_VOLATILE);
gpr.Lock(d, a);
MOV(32, R(ABI_PARAM1), gpr.R(a));
#ifdef _M_X64
if (!jo.noAssumeFPLoadFromMem)
{
MOV(32, R(EAX), MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
//#else
// MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::GetMainRAMPtr() + (u32)offset));
//#endif
BSWAP(32, EAX);
}
else
#endif
{
SafeLoadRegToEAX(ABI_PARAM1, 32, offset);
}
MOV(32, M(&temp32), R(EAX));
fpr.Lock(d);
fpr.LoadToX64(d, false);
CVTSS2SD(fpr.RX(d), M(&temp32));
MOVDDUP(fpr.RX(d), fpr.R(d));
gpr.UnlockAll();
fpr.UnlockAll();
}
void lfd(UGeckoInstruction inst)
{
INSTRUCTION_START;
DISABLE_32BIT;
int d = inst.RD;
int a = inst.RA;
if (!a)
{
Default(inst);
return;
}
s32 offset = (s32)(s16)inst.SIMM_16;
gpr.Lock(a);
MOV(32, R(ABI_PARAM1), gpr.R(a));
MOV(64, R(EAX), MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
BSWAP(64, EAX);
MOV(64, M(&temp64), R(EAX));
fpr.Lock(d);
fpr.LoadToX64(d, false);
MOVSD(fpr.RX(d), M(&temp64));
MOVDDUP(fpr.RX(d), fpr.R(d));
gpr.UnlockAll();
fpr.UnlockAll();
}
void stfd(UGeckoInstruction inst)
{
INSTRUCTION_START;
DISABLE_32BIT;
int s = inst.RS;
int a = inst.RA;
if (!a)
{
Default(inst);
return;
}
s32 offset = (s32)(s16)inst.SIMM_16;
gpr.Lock(a);
fpr.Lock(s);
fpr.LoadToX64(s, true, false);
MOVSD(M(&temp64), fpr.RX(s));
MOV(32, R(ABI_PARAM1), gpr.R(a));
MOV(64, R(EAX), M(&temp64));
BSWAP(64, EAX);
MOV(64, MComplex(RBX, ABI_PARAM1, SCALE_1, offset), R(EAX));
gpr.UnlockAll();
fpr.UnlockAll();
}
void stfs(UGeckoInstruction inst)
{
INSTRUCTION_START;
DISABLE_32BIT;
bool update = inst.OPCD & 1;
int s = inst.RS;
int a = inst.RA;
s32 offset = (s32)(s16)inst.SIMM_16;
if (a && !update)
{
gpr.Flush(FLUSH_VOLATILE);
gpr.Lock(a);
fpr.Lock(s);
MOV(32, R(ABI_PARAM2), gpr.R(a));
if (offset)
ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
if (update && offset)
{
MOV(32, gpr.R(a), R(ABI_PARAM2));
}
CVTSD2SS(XMM0, fpr.R(s));
MOVSS(M(&temp32), XMM0);
MOV(32, R(ABI_PARAM1), M(&temp32));
TEST(32, R(ABI_PARAM2), Imm32(0x0C000000));
FixupBranch argh = J_CC(CC_NZ);
BSWAP(32, ABI_PARAM1);
MOV(32, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1));
FixupBranch arg2 = J();
SetJumpTarget(argh);
CALL((void *)&Memory::Write_U32);
SetJumpTarget(arg2);
gpr.UnlockAll();
fpr.UnlockAll();
}
else
{
Default(inst);
}
}
void lfsx(UGeckoInstruction inst)
{
INSTRUCTION_START;
DISABLE_32BIT;
fpr.Lock(inst.RS);
fpr.LoadToX64(inst.RS, false, true);
MOV(32, R(EAX), gpr.R(inst.RB));
if (inst.RA)
ADD(32, R(EAX), gpr.R(inst.RA));
MOV(32, R(EAX), MComplex(RBX, EAX, SCALE_1, 0));
BSWAP(32, EAX);
MOV(32, M(&temp32), R(EAX));
CVTSS2SD(XMM0, M(&temp32));
MOVDDUP(fpr.R(inst.RS).GetSimpleReg(), R(XMM0));
fpr.UnlockAll();
}
} // namespace

View File

@ -35,8 +35,8 @@
#include "JitAsm.h"
#include "JitRegCache.h"
// #define INSTRUCTION_START
#define INSTRUCTION_START Default(inst); return;
// #define INSTRUCTION_START Default(inst); return;
#define INSTRUCTION_START
#ifdef _M_IX86
#define DISABLE_32BIT Default(inst); return;
@ -56,7 +56,7 @@ void WriteDual32(u64 value, u32 address)
Memory::Write_U32((u32)value, address + 4);
}
static const double GC_ALIGNED16(m_quantizeTableD[]) =
const double GC_ALIGNED16(m_quantizeTableD[]) =
{
(1 << 0), (1 << 1), (1 << 2), (1 << 3),
(1 << 4), (1 << 5), (1 << 6), (1 << 7),
@ -76,7 +76,7 @@ static const double GC_ALIGNED16(m_quantizeTableD[]) =
1.0 / (1 << 4), 1.0 / (1 << 3), 1.0 / (1 << 2), 1.0 / (1 << 1),
};
static const double GC_ALIGNED16(m_dequantizeTableD[]) =
const double GC_ALIGNED16(m_dequantizeTableD[]) =
{
1.0 / (1 << 0), 1.0 / (1 << 1), 1.0 / (1 << 2), 1.0 / (1 << 3),
1.0 / (1 << 4), 1.0 / (1 << 5), 1.0 / (1 << 6), 1.0 / (1 << 7),
@ -101,7 +101,6 @@ static const double GC_ALIGNED16(m_dequantizeTableD[]) =
void psq_st(UGeckoInstruction inst)
{
INSTRUCTION_START;
DISABLE_32BIT;
if (js.blockSetsQuantizers || !Core::GetStartupParameter().bOptimizeQuantizers)
{
Default(inst);
@ -124,6 +123,7 @@ void psq_st(UGeckoInstruction inst)
if (stType == QUANTIZE_FLOAT)
{
DISABLE_32BIT;
gpr.Flush(FLUSH_VOLATILE);
gpr.Lock(a);
fpr.Lock(s);
@ -151,7 +151,10 @@ void psq_st(UGeckoInstruction inst)
}
else if (stType == QUANTIZE_U8)
{
gpr.Flush(FLUSH_VOLATILE);
gpr.FlushR(ABI_PARAM1);
gpr.FlushR(ABI_PARAM2);
gpr.LockX(ABI_PARAM1);
gpr.LockX(ABI_PARAM2);
gpr.Lock(a);
fpr.Lock(s);
if (update)
@ -172,17 +175,22 @@ void psq_st(UGeckoInstruction inst)
#ifdef _M_X64
MOV(16, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1));
#else
BSWAP(32, ABI_PARAM1);
SHR(32, R(ABI_PARAM1), Imm8(16));
CALL(&Memory::Write_U16);
MOV(32, R(EAX), R(ABI_PARAM2));
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
MOV(16, MDisp(EAX, (u32)Memory::base), R(ABI_PARAM1));
#endif
if (update)
MOV(32, gpr.R(a), R(ABI_PARAM2));
gpr.UnlockAll();
gpr.UnlockAllX();
fpr.UnlockAll();
}
else if (stType == QUANTIZE_S16)
{
gpr.FlushR(ABI_PARAM1);
gpr.FlushR(ABI_PARAM2);
gpr.LockX(ABI_PARAM1);
gpr.LockX(ABI_PARAM2);
gpr.Lock(a);
fpr.Lock(s);
if (update)
@ -200,15 +208,16 @@ void psq_st(UGeckoInstruction inst)
PACKSSDW(XMM0, R(XMM0));
MOVD_xmm(M(&temp64), XMM0);
MOV(32, R(ABI_PARAM1), M(&temp64));
#ifdef _M_X64
BSWAP(32, ABI_PARAM1);
#ifdef _M_X64
MOV(32, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1));
#else
BSWAP(32, ABI_PARAM1);
PUSH(32, R(ABI_PARAM1));
CALL(&Memory::Write_U32);
MOV(32, R(EAX), R(ABI_PARAM2));
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
MOV(32, MDisp(EAX, (u32)Memory::base), R(ABI_PARAM1));
#endif
gpr.UnlockAll();
gpr.UnlockAllX();
fpr.UnlockAll();
}
else {
@ -223,7 +232,6 @@ void psq_st(UGeckoInstruction inst)
void psq_l(UGeckoInstruction inst)
{
INSTRUCTION_START;
DISABLE_32BIT;
if (js.blockSetsQuantizers || !Core::GetStartupParameter().bOptimizeQuantizers)
{
Default(inst);
@ -241,11 +249,10 @@ void psq_l(UGeckoInstruction inst)
return;
}
int offset = inst.SIMM_12;
//INT3();
switch (ldType) {
#ifdef _M_X64
case QUANTIZE_FLOAT:
{
#ifdef _M_X64
gpr.LoadToX64(inst.RA);
MOV(64, R(RAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
BSWAP(64, RAX);
@ -253,17 +260,42 @@ void psq_l(UGeckoInstruction inst)
fpr.LoadToX64(inst.RS, false);
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
CVTPS2PD(r, M(&psTemp[0]));
SHUFPD(r, R(r),1);
SHUFPD(r, R(r), 1);
if (update)
ADD(32, gpr.R(inst.RA), Imm32(offset));
break;
#else
gpr.FlushR(ECX);
gpr.LockX(ECX);
gpr.LoadToX64(inst.RA);
// This can probably be optimized somewhat.
LEA(32, ECX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset));
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base));
BSWAP(32, RAX);
MOV(32, M(&psTemp[0]), R(RAX));
MOV(32, R(EAX), MDisp(ECX, (u32)Memory::base + 4));
BSWAP(32, RAX);
MOV(32, M(((float *)&psTemp[0]) + 1), R(RAX));
fpr.LoadToX64(inst.RS, false);
X64Reg r = fpr.R(inst.RS).GetSimpleReg();
CVTPS2PD(r, M(&psTemp[0]));
if (update)
ADD(32, gpr.R(inst.RA), Imm32(offset));
gpr.UnlockAllX();
break;
#endif
}
case QUANTIZE_U8:
{
gpr.LoadToX64(inst.RA);
XOR(32, R(EAX), R(EAX));
MOV(16, R(EAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
#ifdef _M_X64
MOVZX(32, 16, EAX, MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
#else
LEA(32, EAX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset));
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
MOVZX(32, 16, EAX, MDisp(EAX, (u32)Memory::base));
#endif
MOV(32, M(&temp64), R(EAX));
MOVD_xmm(XMM0, M(&temp64));
// SSE4 optimization opportunity here.
@ -279,11 +311,16 @@ void psq_l(UGeckoInstruction inst)
ADD(32, gpr.R(inst.RA), Imm32(offset));
}
break;
case QUANTIZE_S16:
{
gpr.LoadToX64(inst.RA);
#ifdef _M_X64
MOV(32, R(EAX), MComplex(RBX, gpr.R(inst.RA).GetSimpleReg(), 1, offset));
#else
LEA(32, EAX, MDisp(gpr.R(inst.RA).GetSimpleReg(), offset));
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
MOV(32, R(EAX), MDisp(EAX, (u32)Memory::base));
#endif
BSWAP(32, EAX);
MOV(32, M(&temp64), R(EAX));
//INT3();
@ -308,12 +345,11 @@ void psq_l(UGeckoInstruction inst)
MOV(32, R(ECX), Imm32((u32)&m_dequantizeTableD));
MOVDDUP(r, MComplex(RCX, EAX, 8, 0));
*/
#endif
default:
// 4 0
// 6 0 //power tennis
// 5 0
//PanicAlert("ld:%i %i", ldType, (int)inst.W);
// PanicAlert("ld:%i %i", ldType, (int)inst.W);
Default(inst);
return;
}

View File

@ -292,8 +292,10 @@ PPCAnalyst::CodeOp *PPCAnalyst::Flatten(u32 address, u32 &realsize, BlockStats &
{
st.isFirstBlockOfFunction = true;
}
gpa.any = true;
fpa.any = false;
enum Todo
{
JustCopy = 0, Flatten = 1, Nothing = 2
@ -307,7 +309,6 @@ PPCAnalyst::CodeOp *PPCAnalyst::Flatten(u32 address, u32 &realsize, BlockStats &
if (iter != functions.end())
{
SFunction &f = iter->second;
if (f.flags & FFLAG_LEAF)
{
//no reason to flatten
@ -394,12 +395,14 @@ PPCAnalyst::CodeOp *PPCAnalyst::Flatten(u32 address, u32 &realsize, BlockStats &
return 0;
}
else
{
return 0;
}
// Do analysis of the code, look for dependencies etc
int numSystemInstructions = 0;
for (int i=0; i<32; i++)
for (int i = 0; i < 32; i++)
{
gpa.firstRead[i] = -1;
gpa.firstWrite[i] = -1;
@ -408,7 +411,7 @@ PPCAnalyst::CodeOp *PPCAnalyst::Flatten(u32 address, u32 &realsize, BlockStats &
}
gpa.any = true;
for (size_t i=0; i<realsize; i++)
for (size_t i = 0; i < realsize; i++)
{
UGeckoInstruction inst = code[i].inst;
if (PPCTables::UsesFPU(inst))
@ -416,7 +419,7 @@ PPCAnalyst::CodeOp *PPCAnalyst::Flatten(u32 address, u32 &realsize, BlockStats &
fpa.any = true;
}
GekkoOPInfo *opinfo = GetOpInfo(code[i].inst);
_assert_msg_(GEKKO,opinfo!=0,"Invalid Op - Error scanning %08x op %08x",address+i*4,inst);
_assert_msg_(GEKKO, opinfo != 0, "Invalid Op - Error scanning %08x op %08x",address+i*4,inst);
int flags = opinfo->flags;
if (flags & FL_TIMER)
@ -424,7 +427,7 @@ PPCAnalyst::CodeOp *PPCAnalyst::Flatten(u32 address, u32 &realsize, BlockStats &
// Does the instruction output CR0?
if (flags & FL_RC_BIT)
code[i].outputCR0 = inst.hex&1; //todo fix
code[i].outputCR0 = inst.hex & 1; //todo fix
else if ((flags & FL_SET_CRn) && inst.CRFD == 0)
code[i].outputCR0 = true;
else
@ -432,18 +435,18 @@ PPCAnalyst::CodeOp *PPCAnalyst::Flatten(u32 address, u32 &realsize, BlockStats &
// Does the instruction output CR1?
if (flags & FL_RC_BIT_F)
code[i].outputCR1 = inst.hex&1; //todo fix
code[i].outputCR1 = inst.hex & 1; //todo fix
else if ((flags & FL_SET_CRn) && inst.CRFD == 1)
code[i].outputCR1 = true;
else
code[i].outputCR1 = (flags & FL_SET_CR1) ? true : false;
for (int j=0; j<3; j++)
for (int j = 0; j < 3; j++)
{
code[i].fregsIn[j] = -1;
code[i].regsIn[j] = -1;
}
for (int j=0; j<2; j++)
for (int j = 0; j < 2; j++)
code[i].regsOut[j] = -1;
code[i].fregOut=-1;
@ -485,14 +488,21 @@ PPCAnalyst::CodeOp *PPCAnalyst::Flatten(u32 address, u32 &realsize, BlockStats &
break;
case OPTYPE_LOADFP:
break;
case OPTYPE_BRANCH:
if (code[i].inst.hex == 0x4e800020)
{
// For analysis purposes, we can assume that blr eats flags.
code[i].outputCR0 = true;
code[i].outputCR1 = true;
}
break;
case OPTYPE_SYSTEM:
case OPTYPE_SYSTEMFP:
numSystemInstructions++;
break;
}
for (int j=0; j<numIn; j++)
for (int j = 0; j < numIn; j++)
{
int r = code[i].regsIn[j];
if (gpa.firstRead[r] == -1)
@ -501,7 +511,7 @@ PPCAnalyst::CodeOp *PPCAnalyst::Flatten(u32 address, u32 &realsize, BlockStats &
gpa.numReads[r]++;
}
for (int j=0; j<numOut; j++)
for (int j = 0; j < numOut; j++)
{
int r = code[i].regsOut[j];
if (gpa.firstWrite[r] == -1)
@ -516,7 +526,7 @@ PPCAnalyst::CodeOp *PPCAnalyst::Flatten(u32 address, u32 &realsize, BlockStats &
bool wantsCR0 = true;
bool wantsCR1 = true;
bool wantsPS1 = true;
for (int i=realsize-1; i; i--)
for (int i = realsize - 1; i; i--)
{
if (code[i].outputCR0)
wantsCR0 = false;
@ -527,12 +537,13 @@ PPCAnalyst::CodeOp *PPCAnalyst::Flatten(u32 address, u32 &realsize, BlockStats &
wantsCR0 |= code[i].wantsCR0;
wantsCR1 |= code[i].wantsCR1;
wantsPS1 |= code[i].wantsPS1;
code[i].wantsCR0 = wantsCR0;
code[i].wantsCR1 = wantsCR1;
code[i].wantsPS1 = wantsPS1;
}
// Time for code shuffling, taking into account the above dependency analysis.
bool successful_shuffle = false;
//Move compares
// Try to push compares as close as possible to the following branch
// this way we can do neat stuff like combining compare and branch
@ -557,8 +568,10 @@ PPCAnalyst::CodeOp *PPCAnalyst::Flatten(u32 address, u32 &realsize, BlockStats &
else merge!
}
*/
*/
if (successful_shuffle) {
// Disasm before and after, display side by side
}
// Decide what regs to potentially regcache
return code;
}
@ -566,7 +579,7 @@ PPCAnalyst::CodeOp *PPCAnalyst::Flatten(u32 address, u32 &realsize, BlockStats &
// Adds the function to the list, unless it's already there
PPCAnalyst::SFunction *PPCAnalyst::AddFunction(u32 startAddr)
{
if (startAddr<0x80000010)
if (startAddr < 0x80000010)
return 0;
XFuncMap::iterator iter = functions.find(startAddr);
if (iter != functions.end())
@ -643,9 +656,7 @@ void PPCAnalyst::FindFunctionsAfterBLR()
if (!f)
break;
else
{
location += f->size * 4;
}
}
else
break;
@ -656,7 +667,7 @@ void PPCAnalyst::FindFunctionsAfterBLR()
void PPCAnalyst::FindFunctions(u32 startAddr, u32 endAddr)
{
//Step 1: Find all functions
FindFunctionsFromBranches(startAddr,endAddr);
FindFunctionsFromBranches(startAddr, endAddr);
LOG(HLE,"Memory scan done. Found %i functions.",functions.size());
@ -861,30 +872,29 @@ bool PPCAnalyst::SaveFuncDB(const TCHAR *filename)
bool PPCAnalyst::LoadFuncDB(const TCHAR *filename)
{
FILE *f = fopen(filename,"rb");
FILE *f = fopen(filename, "rb");
if (!f)
{
LOG(HLE,"Database load failed");
LOG(HLE, "Database load failed");
return false;
}
u32 fcount=0;
fread(&fcount,4,1,f);
for (size_t i=0; i<fcount; i++)
u32 fcount = 0;
fread(&fcount, 4, 1, f);
for (size_t i = 0; i < fcount; i++)
{
FuncDesc temp;
memset(&temp, 0, sizeof(temp));
fread(&temp,sizeof(temp),1,f);
fread(&temp, sizeof(temp), 1, f);
SDBFunc f;
f.name = temp.name;
f.size = temp.size;
database[temp.checkSum] = f;
}
fclose(f);
UseFuncDB();
LOG(HLE,"Database load successful");
LOG(HLE, "Database load successful");
return true;
}
@ -966,7 +976,7 @@ void PPCAnalyst::PrintCallers(u32 funcAddr)
void PPCAnalyst::GetAllFuncs(functionGetterCallback callback)
{
XFuncMap::iterator iter = functions.begin();
while(iter!=functions.end())
while (iter != functions.end())
{
callback(&(iter->second));
iter++;

View File

@ -39,6 +39,7 @@
#include "Debugger/PPCDebugInterface.h"
#include "Debugger/Debugger_SymbolMap.h"
#include "PowerPC/PPCAnalyst.h"
#include "Core.h"
#include "LogManager.h"
@ -64,6 +65,8 @@ BEGIN_EVENT_TABLE(CCodeWindow, wxFrame)
EVT_MENU(IDM_REGISTERWINDOW, CCodeWindow::OnToggleRegisterWindow)
EVT_MENU(IDM_BREAKPOINTWINDOW, CCodeWindow::OnToggleBreakPointWindow)
EVT_MENU(IDM_MEMORYWINDOW, CCodeWindow::OnToggleMemoryWindow)
EVT_MENU(IDM_SCANFUNCTIONS, CCodeWindow::OnSymbolsMenu)
// toolbar
EVT_MENU(IDM_DEBUG_GO, CCodeWindow::OnCodeStep)
EVT_MENU(IDM_STEP, CCodeWindow::OnCodeStep)
@ -195,14 +198,14 @@ void CCodeWindow::CreateMenu(const SCoreStartupParameter& _LocalCoreStartupParam
wxMenuBar* pMenuBar = new wxMenuBar(wxMB_DOCKABLE);
{
wxMenu* pDebugMenu = new wxMenu;
wxMenuItem* interpreter = pDebugMenu->Append(IDM_INTERPRETER, _T("&Interpreter"), wxEmptyString, wxITEM_CHECK);
wxMenu* pCoreMenu = new wxMenu;
wxMenuItem* interpreter = pCoreMenu->Append(IDM_INTERPRETER, _T("&Interpreter"), wxEmptyString, wxITEM_CHECK);
interpreter->Check(!_LocalCoreStartupParameter.bUseDynarec);
// wxMenuItem* dualcore = pDebugMenu->Append(IDM_DUALCORE, _T("&DualCore"), wxEmptyString, wxITEM_CHECK);
// dualcore->Check(_LocalCoreStartupParameter.bUseDualCore);
pMenuBar->Append(pDebugMenu, _T("&Core Startup"));
pMenuBar->Append(pCoreMenu, _T("&Core Startup"));
}
{
@ -225,6 +228,11 @@ void CCodeWindow::CreateMenu(const SCoreStartupParameter& _LocalCoreStartupParam
pMenuBar->Append(pDebugDialogs, _T("&Views"));
}
{
wxMenu *pSymbolsMenu = new wxMenu;
pSymbolsMenu->Append(IDM_SCANFUNCTIONS, _T("&Scan for functions"));
pMenuBar->Append(pSymbolsMenu, _T("&Symbols"));
}
SetMenuBar(pMenuBar);
}
@ -246,6 +254,23 @@ void CCodeWindow::JumpToAddress(u32 _Address)
codeview->Center(_Address);
}
void CCodeWindow::OnSymbolsMenu(wxCommandEvent& event)
{
if (Core::GetState() == Core::CORE_UNINITIALIZED)
{
// TODO: disable menu items instead :P
return;
}
switch (event.GetId())
{
case IDM_SCANFUNCTIONS:
PPCAnalyst::FindFunctions(0x80003100, 0x80400000);
PPCAnalyst::LoadFuncDB("data/totaldb.dsy");
Debugger::GetFromAnalyzer();
NotifyMapLoaded();
break;
}
}
void CCodeWindow::OnCodeStep(wxCommandEvent& event)
{
@ -537,6 +562,7 @@ void CCodeWindow::OnToggleMemoryWindow(wxCommandEvent& event)
}
}
}
void CCodeWindow::OnHostMessage(wxCommandEvent& event)
{
switch (event.GetId())
@ -582,13 +608,13 @@ void CCodeWindow::PopulateToolbar(wxToolBar* toolBar)
h = m_Bitmaps[Toolbar_DebugGo].GetHeight();
toolBar->SetToolBitmapSize(wxSize(w, h));
toolBar->AddTool(IDM_DEBUG_GO, _T("Play"), m_Bitmaps[Toolbar_DebugGo], _T("Delete the selected BreakPoint or MemoryCheck"));
toolBar->AddTool(IDM_STEP, _T("Step"), m_Bitmaps[Toolbar_Step], _T("Add BreakPoint..."));
toolBar->AddTool(IDM_STEPOVER, _T("Step Over"), m_Bitmaps[Toolbar_StepOver], _T("Add BreakPoint..."));
toolBar->AddTool(IDM_SKIP, _T("Skip"), m_Bitmaps[Toolbar_Skip], _T("Add BreakPoint..."));
toolBar->AddTool(IDM_DEBUG_GO, _T("Play"), m_Bitmaps[Toolbar_DebugGo]);
toolBar->AddTool(IDM_STEP, _T("Step"), m_Bitmaps[Toolbar_Step]);
toolBar->AddTool(IDM_STEPOVER, _T("Step Over"), m_Bitmaps[Toolbar_StepOver]);
toolBar->AddTool(IDM_SKIP, _T("Skip"), m_Bitmaps[Toolbar_Skip]);
toolBar->AddSeparator();
toolBar->AddTool(IDM_GOTOPC, _T("Goto PC"), m_Bitmaps[Toolbar_GotoPC], _T("Add BreakPoint..."));
toolBar->AddTool(IDM_SETPC, _T("Set PC"), m_Bitmaps[Toolbar_SetPC], _T("Add BreakPoint..."));
toolBar->AddTool(IDM_GOTOPC, _T("Goto PC"), m_Bitmaps[Toolbar_GotoPC]);
toolBar->AddTool(IDM_SETPC, _T("Set PC"), m_Bitmaps[Toolbar_SetPC]);
toolBar->AddSeparator();
toolBar->AddControl(new wxTextCtrl(toolBar, IDM_ADDRBOX, _T("")));

View File

@ -78,6 +78,7 @@ class CCodeWindow
IDM_REGISTERWINDOW,
IDM_BREAKPOINTWINDOW,
IDM_MEMORYWINDOW,
IDM_SCANFUNCTIONS,
};
enum
@ -120,6 +121,7 @@ class CCodeWindow
void OnToggleLogWindow(wxCommandEvent& event);
void OnToggleMemoryWindow(wxCommandEvent& event);
void OnHostMessage(wxCommandEvent& event);
void OnSymbolsMenu(wxCommandEvent& event);
void CreateMenu(const SCoreStartupParameter& _LocalCoreStartupParameter);

View File

@ -63,9 +63,12 @@ bool BootCore(const std::string& _rFilename)
StartUp.bRunCompareServer = false;
StartUp.bEnableDebugging = g_pCodeWindow ? true : false; // RUNNING_DEBUG
std::string BaseDataPath;
#ifdef _WIN32
#ifdef _WIN32
StartUp.hInstance = wxGetInstance();
#endif
#ifdef _M_X64
StartUp.bUseFastMem = true;
#endif
#endif
StartUp.AutoSetup(SCoreStartupParameter::BOOT_DEFAULT);

View File

@ -90,6 +90,7 @@ EVT_MENU(IDM_CONFIG_PAD_PLUGIN, CFrame::OnPluginPAD)
EVT_MENU(IDM_BROWSE, CFrame::OnBrowse)
EVT_MENU(IDM_TOGGLE_FULLSCREEN, CFrame::OnToggleFullscreen)
EVT_MENU(IDM_TOGGLE_DUALCORE, CFrame::OnToggleDualCore)
EVT_MENU(IDM_TOGGLE_THROTTLE, CFrame::OnToggleThrottle)
EVT_HOST_COMMAND(wxID_ANY, CFrame::OnHostMessage)
END_EVENT_TABLE()
@ -230,6 +231,12 @@ CFrame::CreateMenu()
pEmulationMenu->Append(pItem);
pItem->Check(SConfig::GetInstance().m_LocalCoreStartupParameter.bUseDualCore);
}
{
// throttling
wxMenuItem* pItem = new wxMenuItem(pEmulationMenu, IDM_TOGGLE_THROTTLE, _T("&Speed throttle"), wxEmptyString, wxITEM_CHECK);
pEmulationMenu->Append(pItem);
pItem->Check(SConfig::GetInstance().m_LocalCoreStartupParameter.bThrottle);
}
m_pMenuBar->Append(pEmulationMenu, _T("&Emulation"));
}
@ -518,7 +525,6 @@ CFrame::OnHostMessage(wxCommandEvent& event)
break;
case IDM_BOOTING_STARTED:
if (m_pBootProcessDialog == NULL)
{
/* m_pBootProcessDialog = new wxProgressDialog
@ -537,7 +543,6 @@ CFrame::OnHostMessage(wxCommandEvent& event)
break;
case IDM_BOOTING_ENDED:
if (m_pBootProcessDialog != NULL)
{
// m_pBootProcessDialog->Destroy();
@ -547,7 +552,6 @@ CFrame::OnHostMessage(wxCommandEvent& event)
break;
case IDM_UPDATESTATUSBAR:
if (m_pStatusBar != NULL)
{
m_pStatusBar->SetStatusText(event.GetString());
@ -557,24 +561,26 @@ CFrame::OnHostMessage(wxCommandEvent& event)
}
void
CFrame::OnToggleFullscreen(wxCommandEvent& WXUNUSED (event))
void CFrame::OnToggleFullscreen(wxCommandEvent& WXUNUSED (event))
{
ShowFullScreen(true);
UpdateGUI();
}
void
CFrame::OnToggleDualCore(wxCommandEvent& WXUNUSED (event))
void CFrame::OnToggleDualCore(wxCommandEvent& WXUNUSED (event))
{
SConfig::GetInstance().m_LocalCoreStartupParameter.bUseDualCore = !SConfig::GetInstance().m_LocalCoreStartupParameter.bUseDualCore;
SConfig::GetInstance().SaveSettings();
}
void CFrame::OnToggleThrottle(wxCommandEvent& WXUNUSED (event))
{
SConfig::GetInstance().m_LocalCoreStartupParameter.bThrottle = !SConfig::GetInstance().m_LocalCoreStartupParameter.bThrottle;
SConfig::GetInstance().SaveSettings();
}
void
CFrame::OnKeyDown(wxKeyEvent& event)
void CFrame::OnKeyDown(wxKeyEvent& event)
{
if (((event.GetKeyCode() == WXK_RETURN) && (event.GetModifiers() == wxMOD_ALT)) ||
(event.GetKeyCode() == WXK_ESCAPE))
@ -589,8 +595,7 @@ CFrame::OnKeyDown(wxKeyEvent& event)
}
void
CFrame::UpdateGUI()
void CFrame::UpdateGUI()
{
// buttons
{

View File

@ -66,6 +66,7 @@ class CFrame
void OnBrowse(wxCommandEvent& event);
void OnToggleFullscreen(wxCommandEvent& event);
void OnToggleDualCore(wxCommandEvent& event);
void OnToggleThrottle(wxCommandEvent& event);
void OnKeyDown(wxKeyEvent& event);
void OnHostMessage(wxCommandEvent& event);

View File

@ -33,6 +33,7 @@ enum
IDM_CONFIG_PAD_PLUGIN,
IDM_TOGGLE_FULLSCREEN,
IDM_TOGGLE_DUALCORE,
IDM_TOGGLE_THROTTLE,
IDM_NOTIFYMAPLOADED,
IDM_UPDATELOGDISPLAY,
IDM_UPDATEDISASMDIALOG,