Yet another bunch of optimizations and cleanup.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@191 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2008-08-13 20:48:27 +00:00
parent 8900114c2b
commit 2b8a85ac5a
21 changed files with 457 additions and 256 deletions

View File

@ -896,6 +896,14 @@
RelativePath=".\Src\PowerPC\Jit64\Jit_SystemRegisters.cpp"
>
</File>
<File
RelativePath=".\Src\PowerPC\Jit64\Jit_Util.cpp"
>
</File>
<File
RelativePath=".\Src\PowerPC\Jit64\Jit_Util.h"
>
</File>
<File
RelativePath=".\Src\PowerPC\Jit64\JitAsm.cpp"
>
@ -1162,6 +1170,10 @@
RelativePath=".\Src\PatchEngine.h"
>
</File>
<File
RelativePath=".\Src\SConscript"
>
</File>
<File
RelativePath=".\Src\stdafx.cpp"
>

View File

@ -329,7 +329,7 @@ void Write32(const u32 _Data, const u32 _Address)
void GatherPipeBursted()
{
// we arn't linked, so we don't care about gather pipe data
// if we aren't linked, we don't care about gather pipe data
if (!fifo.bFF_GPLinkEnable)
return;

View File

@ -73,7 +73,7 @@ void CheckGatherPipe()
// increase the CPUWritePointer
CPeripheralInterface::Fifo_CPUWritePointer += GATHER_PIPE_SIZE;
if (CPeripheralInterface::Fifo_CPUWritePointer > CPeripheralInterface::Fifo_CPUEnd)
_assert_msg_(DYNA_REC, 0, "ARGH");
_assert_msg_(DYNA_REC, 0, "Fifo_CPUWritePointer out of bounds");
if (CPeripheralInterface::Fifo_CPUWritePointer >= CPeripheralInterface::Fifo_CPUEnd)
CPeripheralInterface::Fifo_CPUWritePointer = CPeripheralInterface::Fifo_CPUBase;
@ -94,7 +94,7 @@ void Write16(const u16 _iValue, const u32 _iAddress)
{
// LOG(GPFIFO, "GPFIFO #%x: 0x%04x",CPeripheralInterface::Fifo_CPUWritePointer+m_gatherPipeCount, _iValue);
*(u16*)(&m_gatherPipe[m_gatherPipeCount]) = Common::swap16(_iValue);
m_gatherPipeCount+=2;
m_gatherPipeCount += 2;
CheckGatherPipe();
}
@ -105,7 +105,7 @@ void Write32(const u32 _iValue, const u32 _iAddress)
// LOG(GPFIFO, "GPFIFO #%x: 0x%08x / %f",CPeripheralInterface::Fifo_CPUWritePointer+m_gatherPipeCount, _iValue, floatvalue);
#endif
*(u32*)(&m_gatherPipe[m_gatherPipeCount]) = Common::swap32(_iValue);
m_gatherPipeCount+=4;
m_gatherPipeCount += 4;
CheckGatherPipe();
}

View File

@ -878,7 +878,7 @@ u8 *GetPointer(const u32 _Address)
}
bool IsRAMAddress(const u32 addr)
bool IsRAMAddress(const u32 addr, bool allow_locked_cache)
{
switch ((addr >> 24) & 0xFC) {
case 0x00:
@ -896,7 +896,7 @@ bool IsRAMAddress(const u32 addr)
else
return false;
case 0xE0:
if (addr - 0xE0000000 < L1_CACHE_SIZE)
if (allow_locked_cache && addr - 0xE0000000 < L1_CACHE_SIZE)
return true;
else
return false;

View File

@ -77,7 +77,7 @@ namespace Memory
void InitHWMemFuncsWii();
u32 Read_Instruction(const u32 _Address);
bool IsRAMAddress(const u32 addr);
bool IsRAMAddress(const u32 addr, bool allow_locked_cache = false);
writeFn32 GetHWWriteFun32(const u32 _Address);
inline u8* GetCachePtr() {return m_pL1Cache;}

View File

@ -20,12 +20,14 @@
#include "Common.h"
#include "x64Emitter.h"
#include "ABI.h"
#include "Thunk.h"
#include "../../HLE/HLE.h"
#include "../../CoreTiming.h"
#include "../PowerPC.h"
#include "../PPCTables.h"
#include "../PPCAnalyst.h"
#include "../../HW/Memmap.h"
#include "../../HW/GPFifo.h"
#include "Jit.h"
#include "JitAsm.h"
#include "JitCache.h"
@ -294,8 +296,15 @@ namespace Jit64
been_here[PC] = 1;
}
void Cleanup()
{
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0)
CALL((void *)&GPFifo::CheckGatherPipe);
}
void WriteExit(u32 destination, int exit_num)
{
Cleanup();
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
//If nobody has taken care of this yet (this can be removed when all branches are done)
@ -321,6 +330,7 @@ namespace Jit64
void WriteExitDestInEAX(int exit_num)
{
MOV(32, M(&PC), R(EAX));
Cleanup();
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
JMP(Asm::dispatcher, true);
}
@ -328,12 +338,14 @@ namespace Jit64
void WriteRfiExitDestInEAX()
{
MOV(32, M(&PC), R(EAX));
Cleanup();
SUB(32, M(&CoreTiming::downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
JMP(Asm::testExceptions, true);
}
void WriteExceptionExit(u32 exception)
{
Cleanup();
OR(32, M(&PowerPC::ppcState.Exceptions), Imm32(exception));
MOV(32, M(&PC), Imm32(js.compilerPC + 4));
JMP(Asm::testExceptions, true);
@ -396,6 +408,11 @@ namespace Jit64
// Default(ops[i].inst);
gpr.SanityCheck();
fpr.SanityCheck();
if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32)
{
js.fifoBytesThisBlock -= 32;
CALL(ProtectFunction((void *)&GPFifo::CheckGatherPipe, 0));
}
}
js.compilerPC += 4;

View File

@ -69,6 +69,7 @@ namespace Jit64
bool enableBlocklink;
bool fpAccurateFlags;
bool enableFastMem;
bool optimizeGatherPipe;
};
extern JitState js;
@ -84,11 +85,6 @@ namespace Jit64
void HLEFunction(UGeckoInstruction _inst);
void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false);
void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0);
void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset, bool signExtend = false);
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset);
void addx(UGeckoInstruction inst);
void orx(UGeckoInstruction inst);
void andx(UGeckoInstruction inst);

View File

@ -29,6 +29,7 @@
#include "JitCache.h"
#include "../../HW/CPUCompare.h"
#include "../../HW/GPFifo.h"
#include "../../Core.h"
using namespace Gen;
@ -36,6 +37,7 @@ int blocksExecuted;
namespace Jit64
{
namespace Asm
{
const u8 *enterCode;
@ -47,6 +49,11 @@ const u8 *dispatcherNoCheck;
const u8 *dispatcherPcInEAX;
const u8 *computeRc;
const u8 *fifoDirectWrite8;
const u8 *fifoDirectWrite16;
const u8 *fifoDirectWrite32;
const u8 *fifoDirectWriteFloat;
static bool blockMode = false; //doesn't work as true!
bool compareEnabled = false;
@ -73,6 +80,8 @@ static bool enableStatistics = false;
// At this offset - 4, there is an int specifying the block number.
void GenerateCommon();
#ifdef _M_IX86
void Generate()
{
@ -167,36 +176,7 @@ void Generate()
POP(EBP);
RET();
computeRc = AlignCode16();
AND(32, M(&CR), Imm32(0x0FFFFFFF));
CMP(32, R(EAX), Imm8(0));
FixupBranch pLesser = J_CC(CC_L);
FixupBranch pGreater = J_CC(CC_G);
OR(32, M(&CR), Imm32(0x20000000)); // _x86Reg == 0
RET();
SetJumpTarget(pGreater);
OR(32, M(&CR), Imm32(0x40000000)); // _x86Reg > 0
RET();
SetJumpTarget(pLesser);
OR(32, M(&CR), Imm32(0x80000000)); // _x86Reg < 0
RET();
// Fast write routines - special case the most common hardware write
// TODO: use this.
// Even in x86, the param values will be in the right registers.
/*
const u8 *fastMemWrite8 = AlignCode16();
CMP(32, R(ABI_PARAM2), Imm32(0xCC008000));
FixupBranch skip_fast_write = J_CC(CC_NE, false);
MOV(32, EAX, M(&m_gatherPipeCount));
MOV(8, MDisp(EAX, (u32)&m_gatherPipe), ABI_PARAM1);
ADD(32, 1, M(&m_gatherPipeCount));
RET();
SetJumpTarget(skip_fast_write);
CALL((void *)&Memory::Write_U8);*/
GenerateCommon();
}
#elif defined(_M_X64)
@ -271,7 +251,7 @@ void Generate()
CALL((void *)&CoreTiming::Advance);
testExceptions = GetCodePtr();
TEST(32,M(&PowerPC::ppcState.Exceptions), Imm32(0xFFFFFFFF));
TEST(32, M(&PowerPC::ppcState.Exceptions), Imm32(0xFFFFFFFF));
FixupBranch skipExceptions = J_CC(CC_Z);
MOV(32, R(EAX), M(&PC));
MOV(32, M(&NPC), R(EAX));
@ -287,12 +267,59 @@ void Generate()
ABI_PopAllCalleeSavedRegsAndAdjustStack();
RET();
GenerateCommon();
}
#endif
void GenFifoWrite(int size)
{
// Assume value in ABI_PARAM1
PUSH(ESI);
if (size != 32)
PUSH(EDX);
BSWAP(size, ABI_PARAM1);
MOV(32, R(EAX), Imm32((u32)GPFifo::m_gatherPipe));
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
if (size != 32) {
MOV(32, R(EDX), R(ABI_PARAM1));
MOV(size, MComplex(RAX, RSI, 1, 0), R(EDX));
} else {
MOV(size, MComplex(RAX, RSI, 1, 0), R(ABI_PARAM1));
}
ADD(32, R(ESI), Imm8(size >> 3));
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
if (size != 32)
POP(EDX);
POP(ESI);
RET();
}
static int temp32;
void GenFifoFloatWrite()
{
// Assume value in XMM0
PUSH(ESI);
PUSH(EDX);
MOVSS(M(&temp32), XMM0);
MOV(32, R(EDX), M(&temp32));
BSWAP(32, EDX);
MOV(32, R(EAX), Imm32((u32)GPFifo::m_gatherPipe));
MOV(32, R(ESI), M(&GPFifo::m_gatherPipeCount));
MOV(32, MComplex(RAX, RSI, 1, 0), R(EDX));
ADD(32, R(ESI), Imm8(4));
MOV(32, M(&GPFifo::m_gatherPipeCount), R(ESI));
POP(EDX);
POP(ESI);
RET();
}
void GenerateCommon()
{
computeRc = AlignCode16();
AND(32, M(&CR), Imm32(0x0FFFFFFF));
CMP(32, R(EAX), Imm8(0));
FixupBranch pLesser = J_CC(CC_L);
FixupBranch pGreater = J_CC(CC_G);
OR(32, M(&CR), Imm32(0x20000000)); // _x86Reg == 0
RET();
SetJumpTarget(pGreater);
@ -302,68 +329,30 @@ void Generate()
OR(32, M(&CR), Imm32(0x80000000)); // _x86Reg < 0
RET();
/*
const u8 *end = GetCodePtr();
u8 *xDis = new u8[65536];
memset(xDis,0,65536);
disassembler x64disasm;
x64disasm.set_syntax_intel();
u64 disasmPtr = (u64)enterCode;
int size = end-enterCode;
char *sptr = (char*)xDis;
while ((u8*)disasmPtr < end)
{
disasmPtr += x64disasm.disasm64(disasmPtr, disasmPtr, (u8*)disasmPtr, sptr);
sptr += strlen(sptr);
*sptr++ = 13;
*sptr++ = 10;
}
MessageBox(0,(char*)xDis,"yo",0);
delete [] xDis; */
fifoDirectWrite8 = AlignCode4();
GenFifoWrite(8);
fifoDirectWrite16 = AlignCode4();
GenFifoWrite(16);
fifoDirectWrite32 = AlignCode4();
GenFifoWrite(32);
fifoDirectWriteFloat = AlignCode4();
GenFifoFloatWrite();
// Fast write routines - special case the most common hardware write
// TODO: use this.
// Even in x86, the param values will be in the right registers.
/*
RUNTIME_FUNCTION func;
func.BeginAddress = 0;
func.EndAddress = (u32)(GetCodePtr() - enterCode);
func.UnwindData = 0;
RtlAddFunctionTable(&func, 1, (ULONGLONG)enterCode);*/
/*
//we only want to do this once
PUSH(RBX);
PUSH(RSI);
PUSH(RDI);
PUSH(R12);
PUSH(R13);
PUSH(R14);
PUSH(R15);
//TODO: Also preserve XMM0-3?
SUB(64, R(RSP), Imm8(0x20));
MOV(32, R(R15), M(&Memory::base));
MOV(32, M(&PowerPC::ppcState.pc), R(R14));
//Landing pad for drec space
ADD(64, R(RSP), Imm8(0x20));
POP(R15);
POP(R14);
POP(R13);
POP(R12);
POP(RDI);
POP(RSI);
POP(RBX);
RET();*/
}
#endif
}
const u8 *fastMemWrite8 = AlignCode16();
CMP(32, R(ABI_PARAM2), Imm32(0xCC008000));
FixupBranch skip_fast_write = J_CC(CC_NE, false);
MOV(32, EAX, M(&m_gatherPipeCount));
MOV(8, MDisp(EAX, (u32)&m_gatherPipe), ABI_PARAM1);
ADD(32, 1, M(&m_gatherPipeCount));
RET();
SetJumpTarget(skip_fast_write);
CALL((void *)&Memory::Write_U8);*/
}
} // namespace Asm
} // namespace Jit64

View File

@ -34,6 +34,11 @@ namespace Jit64
extern const u8 *dispatchPcInEAX;
extern const u8 *doTiming;
extern const u8 *fifoDirectWrite8;
extern const u8 *fifoDirectWrite16;
extern const u8 *fifoDirectWrite32;
extern const u8 *fifoDirectWriteFloat;
extern bool compareEnabled;
void Generate();
}

View File

@ -85,6 +85,7 @@ namespace Jit64
#endif
jo.assumeFPLoadFromMem = true;
jo.fpAccurateFlags = true;
jo.optimizeGatherPipe = true;
codeCache = (u8*)AllocateExecutableMemory(CODE_SIZE);
genFunctions = (u8*)AllocateExecutableMemory(GEN_SIZE);
@ -260,7 +261,7 @@ namespace Jit64
}
int GetCodeSize() {
return GetCodePtr() - codeCache;
return (int)(GetCodePtr() - codeCache);
}
//Block linker
@ -369,6 +370,7 @@ namespace Jit64
void ClearCache()
{
Core::DisplayMessage("Cleared code cache.", 3000);
// Is destroying the blocks really necessary?
for (int i = 0; i < numBlocks; i++) {
DestroyBlock(i, false);

View File

@ -556,13 +556,23 @@ namespace Jit64
int s = inst.RS;
if (gpr.R(a).IsImm() || gpr.R(s).IsImm())
{
if (gpr.R(s).IsImm())
{
if (gpr.R(s).offset == 0 && !inst.Rc) {
// This is pretty common for some reason
gpr.LoadToX64(a, false);
XOR(32, gpr.R(a), gpr.R(a));
return;
}
// This might also be worth doing.
}
Default(inst);
return;
}
if (a != s)
{
gpr.Lock(a,s);
gpr.Lock(a, s);
gpr.LoadToX64(a, false);
MOV(32, gpr.R(a), gpr.R(s));
}

View File

@ -35,6 +35,7 @@
#include "JitCache.h"
#include "JitAsm.h"
#include "JitRegCache.h"
#include "Jit_Util.h"
// #define INSTRUCTION_START Default(inst); return;
#define INSTRUCTION_START
@ -47,77 +48,10 @@
namespace Jit64
{
static u64 GC_ALIGNED16(temp64);
static u32 GC_ALIGNED16(temp32);
void UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend)
{
#ifdef _M_IX86
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
MOVZX(32, accessSize, reg_value, MDisp(reg_addr, (u32)Memory::base + offset));
#else
MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset));
#endif
if (accessSize == 32)
{
BSWAP(32, EAX);
}
else if (accessSize == 16)
{
BSWAP(32, EAX);
SHR(32, R(EAX), Imm8(16));
}
if (signExtend && accessSize < 32) {
MOVSX(32, accessSize, EAX, R(EAX));
}
namespace {
u64 GC_ALIGNED16(temp64);
u32 GC_ALIGNED16(temp32);
}
void SafeLoadRegToEAX(X64Reg reg, int accessSize, s32 offset, bool signExtend)
{
if (offset)
ADD(32, R(reg), Imm32((u32)offset));
TEST(32, R(reg), Imm32(0x0C000000));
FixupBranch argh = J_CC(CC_NZ);
UnsafeLoadRegToReg(reg, EAX, accessSize, 0, signExtend);
FixupBranch arg2 = J();
SetJumpTarget(argh);
switch (accessSize)
{
case 32: ABI_CallFunctionR(ProtectFunction((void *)&Memory::Read_U32, 1), reg); break;
case 16: ABI_CallFunctionR(ProtectFunction((void *)&Memory::Read_U16, 1), reg); break;
case 8: ABI_CallFunctionR(ProtectFunction((void *)&Memory::Read_U8, 1), reg); break;
}
SetJumpTarget(arg2);
}
void UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset)
{
if (accessSize != 32) {
PanicAlert("UnsafeWriteRegToReg can't handle %i byte accesses", accessSize);
}
BSWAP(32, reg_value);
#ifdef _M_IX86
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
MOV(accessSize, MDisp(reg_addr, (u32)Memory::base + offset), R(reg_value));
#else
MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, offset), R(reg_value));
#endif
}
// Destroys both arg registers
void SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset)
{
if (offset)
ADD(32, R(reg_addr), Imm32(offset));
TEST(32, R(reg_addr), Imm32(0x0C000000));
FixupBranch unsafe_addr = J_CC(CC_NZ);
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0);
FixupBranch skip_call = J();
SetJumpTarget(unsafe_addr);
ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2);
SetJumpTarget(skip_call);
}
void lbzx(UGeckoInstruction inst)
{
INSTRUCTION_START;
@ -272,73 +206,58 @@ namespace Jit64
case 38: accessSize = 8; break; //stb
default: _assert_msg_(DYNA_REC, 0, "AWETKLJASDLKF"); return;
}
/*
if (gpr.R(a).IsImm() && !update)
{
u32 addr = (u32)gpr.R(a).offset;
addr += offset;
//YAY!
//Now do something smart
if ((addr & 0xFFFFF000) == 0xCC008000)
if ((addr & 0xFFFFF000) == 0xCC008000 && jo.optimizeGatherPipe)
{
//MessageBox(0,"FIFO",0,0);
//Do a direct I/O write
#ifdef _M_X64
MOV(32, R(EDX), Imm32((u32)gpr.R(a).offset));
MOV(32, R(ECX), gpr.R(s));
#elif _M_IX86
PUSH(32, Imm32((u32)gpr.R(a).offset));
PUSH(32, gpr.R(s));
#endif
gpr.FlushLockX(ABI_PARAM1);
MOV(32, R(ABI_PARAM1), gpr.R(s));
// INT3();
switch (accessSize)
{
case 8: CALL((void *)&GPFifo::FastWrite8); break;
case 16: CALL((void *)&GPFifo::FastWrite16); break;
case 32: CALL((void *)&GPFifo::FastWrite32); break;
// No need to protect these, they don't touch any state
case 8: CALL((void *)Asm::fifoDirectWrite8); break;
case 16: CALL((void *)Asm::fifoDirectWrite16); break;
case 32: CALL((void *)Asm::fifoDirectWrite32); break;
}
js.fifoBytesThisBlock += accessSize >> 3;
if (js.fifoBytesThisBlock > 32)
{
js.fifoBytesThisBlock -= 32;
CALL((void *)&GPFifo::CheckGatherPipe);
}
#ifdef _M_IX86
ADD(32, R(ESP), Imm8(8));
#endif
gpr.UnlockAllX();
return;
}
else if ((addr>>24) == 0xCC && accessSize == 32) //Other I/O
else if (Memory::IsRAMAddress(addr) && accessSize == 32)
{
#ifdef _M_X64
MOV(32, R(EDX), Imm32((u32)gpr.R(a).offset));
MOV(32, R(ECX), gpr.R(s));
#elif _M_IX86
PUSH(32, Imm32((u32)gpr.R(a).offset));
PUSH(32, gpr.R(s));
#endif
CALL((void *)Memory::GetHWWriteFun32(addr));
#ifdef _M_IX86
ADD(32, R(ESP), Imm8(8));
#endif
MOV(accessSize, R(EAX), gpr.R(s));
BSWAP(accessSize, EAX);
WriteToConstRamAddress(accessSize, R(EAX), addr);
return;
// PanicAlert("yum yum");
// This may be quite beneficial.
}
// Other IO not worth the trouble.
}
// Optimized stack access?
if (accessSize == 32 && !gpr.R(a).IsImm() && a == 1 && js.st.isFirstBlockOfFunction && jo.optimizeStack) //Zelda does not like this
{
//Stack access
MOV(32, R(ECX), gpr.R(a));
gpr.FlushLockX(ABI_PARAM1);
MOV(32, R(ABI_PARAM1), gpr.R(a));
MOV(32, R(EAX), gpr.R(s));
BSWAP(32, EAX);
#ifdef _M_X64
MOV(accessSize, MComplex(RBX, ECX, SCALE_1, (u32)offset), R(EAX));
MOV(accessSize, MComplex(RBX, ABI_PARAM1, SCALE_1, (u32)offset), R(EAX));
#elif _M_IX86
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
MOV(accessSize, MDisp(ECX, (u32)Memory::base + (u32)offset), R(EAX));
MOV(accessSize, MDisp(ABI_PARAM1, (u32)Memory::base + (u32)offset), R(EAX));
#endif
if (update)
ADD(32, gpr.R(a), Imm32(offset));
gpr.UnlockAllX();
return;
}
*/
//Still here? Do regular path.
gpr.Lock(s, a);
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
@ -394,8 +313,8 @@ namespace Jit64
/*
/// BUGGY
//return _inst.RA ? (m_GPR[_inst.RA] + _inst.SIMM_16) : _inst.SIMM_16;
gpr.Flush(FLUSH_ALL);
gpr.LockX(ECX, EDX, ESI);
gpr.FlushLockX(ECX, EDX);
gpr.FlushLockX(ESI);
//INT3();
MOV(32, R(EAX), Imm32((u32)(s32)inst.SIMM_16));
if (inst.RA)

View File

@ -35,6 +35,7 @@
#include "JitCache.h"
#include "JitAsm.h"
#include "JitRegCache.h"
#include "Jit_Util.h"
// #define INSTRUCTION_START Default(inst); return;
#define INSTRUCTION_START
@ -55,9 +56,10 @@ const u8 GC_ALIGNED16(bswapShuffle1x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 8, 9, 10,
const u8 GC_ALIGNED16(bswapShuffle1x8Dupe[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0};
const u8 GC_ALIGNED16(bswapShuffle2x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8};
static u64 GC_ALIGNED16(temp64);
static u32 GC_ALIGNED16(temp32);
namespace {
u64 GC_ALIGNED16(temp64);
u32 GC_ALIGNED16(temp32);
}
// TODO: Add peephole optimizations for multiple consecutive lfd/lfs/stfd/stfs since they are so common,
// and pshufb could help a lot.
// Also add hacks for things like lfs/stfs the same reg consecutively, that is, simple memory moves.
@ -178,29 +180,50 @@ void stfs(UGeckoInstruction inst)
int s = inst.RS;
int a = inst.RA;
s32 offset = (s32)(s16)inst.SIMM_16;
if (a && !update)
{
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
gpr.Lock(a);
fpr.Lock(s);
MOV(32, R(ABI_PARAM2), gpr.R(a));
ADD(32, R(ABI_PARAM2), Imm32(offset));
if (update && offset)
{
MOV(32, gpr.R(a), R(ABI_PARAM2));
}
CVTSD2SS(XMM0, fpr.R(s));
MOVSS(M(&temp32), XMM0);
MOV(32, R(ABI_PARAM1), M(&temp32));
SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, 32, 0);
gpr.UnlockAll();
gpr.UnlockAllX();
fpr.UnlockAll();
}
else
{
if (!a || update) {
Default(inst);
return;
}
if (gpr.R(a).IsImm())
{
u32 addr = gpr.R(a).offset + offset;
if (Memory::IsRAMAddress(addr))
{
if (cpu_info.bSSSE3) {
CVTSD2SS(XMM0, fpr.R(s));
PSHUFB(XMM0, M((void *)bswapShuffle1x4));
WriteFloatToConstRamAddress(XMM0, addr);
return;
}
}
else if (addr == 0xCC008000)
{
// Float directly to write gather pipe! Fun!
CVTSD2SS(XMM0, fpr.R(s));
CALL((void*)Asm::fifoDirectWriteFloat);
// TODO
js.fifoBytesThisBlock += 4;
return;
}
}
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
gpr.Lock(a);
fpr.Lock(s);
MOV(32, R(ABI_PARAM2), gpr.R(a));
ADD(32, R(ABI_PARAM2), Imm32(offset));
if (update && offset)
{
MOV(32, gpr.R(a), R(ABI_PARAM2));
}
CVTSD2SS(XMM0, fpr.R(s));
MOVSS(M(&temp32), XMM0);
MOV(32, R(ABI_PARAM1), M(&temp32));
SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, 32, 0);
gpr.UnlockAll();
gpr.UnlockAllX();
fpr.UnlockAll();
}

View File

@ -108,21 +108,67 @@ void psq_st(UGeckoInstruction inst)
Default(inst);
return;
}
if (!inst.RA)
{
// This really should never happen. Unless we change this to also support stwux
Default(inst);
return;
}
const UGQR gqr(rSPR(SPR_GQR0 + inst.I));
const EQuantizeType stType = static_cast<EQuantizeType>(gqr.ST_TYPE);
int stScale = gqr.ST_SCALE;
bool update = inst.OPCD == 61;
if (!inst.RA || inst.W)
{
// PanicAlert(inst.RA ? "W" : "inst");
Default(inst);
return;
}
int offset = inst.SIMM_12;
int a = inst.RA;
int s = inst.RS; // Fp numbers
if (inst.W) {
// PanicAlert("W=1: stType %i stScale %i update %i", (int)stType, (int)stScale, (int)update);
// It's fairly common that games write stuff to the pipe using this. Then, it's pretty much only
// floats so that's what we'll work on.
switch (stType)
{
case QUANTIZE_FLOAT:
{
if (gpr.R(a).IsImm())
{
PanicAlert("Imm: %08x", gpr.R(a).offset);
}
DISABLE_32BIT;
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
gpr.Lock(a);
fpr.Lock(s);
if (update)
gpr.LoadToX64(a, true, true);
MOV(32, R(ABI_PARAM2), gpr.R(a));
if (offset)
ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
TEST(32, R(ABI_PARAM2), Imm32(0x0C000000));
if (update && offset)
MOV(32, gpr.R(a), R(ABI_PARAM2));
CVTSD2SS(XMM0, fpr.R(s));
MOVD_xmm(M(&temp64), XMM0);
MOV(32, R(ABI_PARAM1), M(&temp64));
FixupBranch argh = J_CC(CC_NZ);
BSWAP(32, ABI_PARAM1);
MOV(32, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1));
FixupBranch skip_call = J();
SetJumpTarget(argh);
CALL(ProtectFunction((void *)&Memory::Write_U32, 2));
SetJumpTarget(skip_call);
gpr.UnlockAll();
gpr.UnlockAllX();
fpr.UnlockAll();
return;
}
default:
Default(inst);
return;
}
return;
}
if (stType == QUANTIZE_FLOAT)
{
DISABLE_32BIT;

View File

@ -43,9 +43,9 @@
namespace Jit64
{
static const u64 GC_ALIGNED16(psSignBits[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL};
static const u64 GC_ALIGNED16(psAbsMask[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
static const double GC_ALIGNED16(psOneOne[2]) = {1.0, 1.0};
const u64 GC_ALIGNED16(psSignBits[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL};
const u64 GC_ALIGNED16(psAbsMask[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
const double GC_ALIGNED16(psOneOne[2]) = {1.0, 1.0};
void ps_sign(UGeckoInstruction inst)
{

View File

@ -0,0 +1,127 @@
// Copyright (C) 2003-2008 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include "Common.h"
#include "Thunk.h"
#include "../PowerPC.h"
#include "../../Core.h"
#include "../../HW/GPFifo.h"
#include "../../HW/CommandProcessor.h"
#include "../../HW/PixelEngine.h"
#include "../../HW/Memmap.h"
#include "../PPCTables.h"
#include "x64Emitter.h"
#include "ABI.h"
#include "Jit.h"
#include "JitCache.h"
#include "JitAsm.h"
#include "JitRegCache.h"
namespace Jit64
{
void UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend)
{
#ifdef _M_IX86
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
MOVZX(32, accessSize, reg_value, MDisp(reg_addr, (u32)Memory::base + offset));
#else
MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset));
#endif
if (accessSize == 32)
{
BSWAP(32, EAX);
}
else if (accessSize == 16)
{
BSWAP(32, EAX);
SHR(32, R(EAX), Imm8(16));
}
if (signExtend && accessSize < 32) {
MOVSX(32, accessSize, EAX, R(EAX));
}
}
void SafeLoadRegToEAX(X64Reg reg, int accessSize, s32 offset, bool signExtend)
{
if (offset)
ADD(32, R(reg), Imm32((u32)offset));
TEST(32, R(reg), Imm32(0x0C000000));
FixupBranch argh = J_CC(CC_NZ);
UnsafeLoadRegToReg(reg, EAX, accessSize, 0, signExtend);
FixupBranch arg2 = J();
SetJumpTarget(argh);
switch (accessSize)
{
case 32: ABI_CallFunctionR(ProtectFunction((void *)&Memory::Read_U32, 1), reg); break;
case 16: ABI_CallFunctionR(ProtectFunction((void *)&Memory::Read_U16, 1), reg); break;
case 8: ABI_CallFunctionR(ProtectFunction((void *)&Memory::Read_U8, 1), reg); break;
}
SetJumpTarget(arg2);
}
void UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset)
{
if (accessSize != 32) {
PanicAlert("UnsafeWriteRegToReg can't handle %i byte accesses", accessSize);
}
BSWAP(32, reg_value);
#ifdef _M_IX86
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
MOV(accessSize, MDisp(reg_addr, (u32)Memory::base + offset), R(reg_value));
#else
MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, offset), R(reg_value));
#endif
}
// Destroys both arg registers
void SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int accessSize, s32 offset)
{
if (offset)
ADD(32, R(reg_addr), Imm32(offset));
TEST(32, R(reg_addr), Imm32(0x0C000000));
FixupBranch unsafe_addr = J_CC(CC_NZ);
UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, 0);
FixupBranch skip_call = J();
SetJumpTarget(unsafe_addr);
ABI_CallFunctionRR(ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2);
SetJumpTarget(skip_call);
}
void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address)
{
#ifdef _M_X64
MOV(accessSize, MDisp(RBX, address & 0x3FFFFFFF), arg);
#else
MOV(accessSize, M((void*)(Memory::base + (address & Memory::MEMVIEW32_MASK))), arg);
#endif
}
void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address)
{
#ifdef _M_X64
MOV(32, R(RAX), Imm32(address));
MOVSS(MComplex(RBX, RAX, 1, 0), xmm_reg);
#else
MOVSS(M((void*)((u32)Memory::base + (address & Memory::MEMVIEW32_MASK))), xmm_reg);
#endif
}
} // namespace

View File

@ -0,0 +1,33 @@
// Copyright (C) 2003-2008 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include "x64Emitter.h"
namespace Jit64
{
// Memory Load/Store
void UnsafeLoadRegToReg(Gen::X64Reg reg_addr, Gen::X64Reg reg_value, int accessSize, s32 offset = 0, bool signExtend = false);
void UnsafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset = 0);
void SafeLoadRegToEAX(Gen::X64Reg reg, int accessSize, s32 offset, bool signExtend = false);
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset);
void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address);
void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address);
} // namespace

View File

@ -76,6 +76,7 @@ files = ["Console.cpp",
"PowerPC/Jit64/Jit_LoadStore.cpp",
"PowerPC/Jit64/Jit_LoadStoreFloating.cpp",
"PowerPC/Jit64/Jit_SystemRegisters.cpp",
"PowerPC/Jit64/Jit_Util.cpp",
"HLE/HLE.cpp",
"HLE/HLE_Misc.cpp",
"HLE/HLE_OS.cpp",

View File

@ -40,6 +40,8 @@
#include "Debugger/PPCDebugInterface.h"
#include "Debugger/Debugger_SymbolMap.h"
#include "PowerPC/PPCAnalyst.h"
#include "PowerPC/Jit64/Jit.h"
#include "PowerPC/Jit64/JitCache.h"
#include "Core.h"
#include "LogManager.h"
@ -235,6 +237,13 @@ void CCodeWindow::CreateMenu(const SCoreStartupParameter& _LocalCoreStartupParam
pSymbolsMenu->Append(IDM_SCANFUNCTIONS, _T("&Scan for functions"));
pMenuBar->Append(pSymbolsMenu, _T("&Symbols"));
}
{
wxMenu *pJitMenu = new wxMenu;
pJitMenu->Append(IDM_CLEARCODECACHE, _T("&Clear code cache"));
pMenuBar->Append(pJitMenu, _T("&JIT"));
}
SetMenuBar(pMenuBar);
}
@ -256,6 +265,16 @@ void CCodeWindow::JumpToAddress(u32 _Address)
codeview->Center(_Address);
}
void CCodeWindow::OnJitMenu(wxCommandEvent& event)
{
switch (event.GetId())
{
case IDM_CLEARCODECACHE:
Jit64::ClearCache();
break;
}
}
void CCodeWindow::OnSymbolsMenu(wxCommandEvent& event)
{
if (Core::GetState() == Core::CORE_UNINITIALIZED)

View File

@ -81,6 +81,7 @@ class CCodeWindow
IDM_SCANFUNCTIONS,
IDM_LOADMAPFILE,
IDM_SAVEMAPFILE,
IDM_CLEARCODECACHE,
};
enum
@ -125,6 +126,7 @@ class CCodeWindow
void OnToggleMemoryWindow(wxCommandEvent& event);
void OnHostMessage(wxCommandEvent& event);
void OnSymbolsMenu(wxCommandEvent& event);
void OnJitMenu(wxCommandEvent& event);
void CreateMenu(const SCoreStartupParameter& _LocalCoreStartupParameter);

View File

@ -147,8 +147,8 @@ bool OpenGL_Create(SVideoInitialize &_VideoInitialize, int _iwidth, int _iheight
nBackbufferHeight = _theight;
// change later
s_nTargetWidth = 640<<g_AAx;
s_nTargetHeight = 480<<g_AAy;
s_nTargetWidth = 640 << g_AAx;
s_nTargetHeight = 480 << g_AAy;
g_VideoInitialize.pPeekMessages = &Callback_PeekMessages;
g_VideoInitialize.pUpdateFPSDisplay = &UpdateFPSDisplay;